77import tensorflow as tf
88import transformers
99
10- from utils import run_benchmark
10+ from utils import run_benchmark , make_spider_plot
1111
1212# Part 1
1313
1414# TEXT: overview of LLM lab
1515# Load pretrained LLM (medium size model)
1616
17- model_name = "facebook/opt-1.3b"
18- model = transformers .TFAutoModelForCausalLM .from_pretrained (model_name )
17+ model_name = "facebook/opt-1.3b"
18+ # had to load non TF version to run benchmarking code
19+ model = transformers .AutoModelForCausalLM .from_pretrained (model_name , device_map = "auto" )
1920tokenizer = transformers .AutoTokenizer .from_pretrained (model_name )
2021
2122# TEXT: explain tokenizer
2223# Include cell for tokenizer inspection
2324
2425# TEXT: explain how LLMs are trained for next token prediction
2526# Write a function to predict next token
26-
27- def predict_next_token (probs ):
27+ def predict_next_token (probs , tokenizer ):
2828 new_token = np .random .choice (len (probs ), p = probs .numpy ())
2929 print (tokenizer .decode (new_token ), end = '' , flush = True )
3030 return new_token
3131
3232# TEXT: explain that next token prediction must be called multiple times for inference
3333# Call in loop for autoregressive inference
34-
35- def generate (start_text , num_steps = 20 , temp = 1. ):
34+ def generate (start_text , model , tokenizer , num_steps = 20 , temp = 1. ):
3635 print (start_text , end = "" )
3736 x = tokenizer .encode (start_text )
3837 num_start = len (x )
@@ -42,46 +41,43 @@ def generate(start_text, num_steps=20, temp=1.):
4241 logits = model (input_tensor ).logits
4342 probs = tf .nn .softmax (logits / temp )[0 , - 1 , :]
4443
45- new_token = predict_next_token (probs )
44+ new_token = predict_next_token (probs , tokenizer )
4645 x .append (new_token )
4746
4847 output = tokenizer .decode (x [num_start :])
4948 return output
5049
5150# Test autoregressive generation
52-
5351# while True:
5452# print("\n\n\n\n\n")
5553# input_text = input("Prompt: ")
56- # output = generate(input_text)
54+ # output = generate(input_text, model, tokenizer )
5755
5856# TEXT: some background on LLM benchmarking
5957# Load benchmark dataset and evaluate model
60-
6158dataset = pd .read_csv ("benchmark.csv" )
62- category_accs_1300m , avg_acc_1300m = run_benchmark (model , tokenizer )
59+ category_accs_1300m , avg_acc_1300m = run_benchmark (model , tokenizer , dataset )
6360
6461# TEXT: ask them to make a prediction on how accuracy will be affected by different model sizes
6562
6663# Benchmark smaller model
67-
6864model_name_350m = "facebook/opt-350m"
69- model_350m = transformers .TFAutoModelForCausalLM .from_pretrained (model_name_350m )
65+ model_350m = transformers .AutoModelForCausalLM .from_pretrained (model_name_350m , device_map = "auto" )
7066tokenizer_350m = transformers .AutoTokenizer .from_pretrained (model_350m )
7167
72- category_accs_350m , avg_acc_350m = run_benchmark (model_350m , tokenizer_350m )
68+ category_accs_350m , avg_acc_350m = run_benchmark (model_350m , tokenizer_350m , dataset )
7369
7470# Benchmark larger model
75-
7671model_name_2700m = "facebook/opt-2.7b"
77- model_2700m = transformers .TFAutoModelForCausalLM .from_pretrained (model_name_2700m )
72+ model_2700m = transformers .AutoModelForCausalLM .from_pretrained (model_name_2700m , device_map = "auto" )
7873tokenizer_2700m = transformers .AutoTokenizer .from_pretrained (model_2700m )
7974
80- category_accs_2700m , avg_acc_2700m = run_benchmark (model_2700m , tokenizer_2700m )
75+ category_accs_2700m , avg_acc_2700m = run_benchmark (model_2700m , tokenizer_2700m , dataset )
8176
8277# Spider plot
8378
84- print (category_accs_1300m )
79+ benchmark_data = {"350M-Model" : category_accs_350m , "1300M-Model" : category_accs_1300m , "2700M-Model" : category_accs_2700m }
80+ make_spider_plot (benchmark_data )
8581
8682# Part 2
8783
0 commit comments