66import numpy as np
77import pandas as pd
88import random
9- import tensorflow as tf
109import torch
1110import torch .nn as nn
1211import torch .nn .functional as F
1312from torch .nn import CrossEntropyLoss
1413from torch .optim import Adam
1514import transformers
16- from trl import SFTTrainer
1715from tqdm import tqdm
1816
1917from utils import run_benchmark , make_spider_plot
2523
2624# model_name = "facebook/opt-1.3b"
2725model_name = "facebook/opt-125m"
28- # had to load non TF version to run benchmarking code
2926model = transformers .AutoModelForCausalLM .from_pretrained (model_name , device_map = "auto" )
3027tokenizer = transformers .AutoTokenizer .from_pretrained (model_name )
3128
@@ -66,7 +63,7 @@ def generate(start_text, model, tokenizer, num_steps=20, temp=1.):
6663# TEXT: some background on LLM benchmarking
6764# Load benchmark dataset and evaluate model
6865benchmark_dataset = pd .read_csv ("benchmark.csv" )
69- category_accs_1300m , avg_acc_1300m = run_benchmark (model , tokenizer , benchmark_dataset )
66+ # category_accs_1300m, avg_acc_1300m = run_benchmark(model, tokenizer, benchmark_dataset)
7067
7168# TEXT: ask them to make a prediction on how accuracy will be affected by different model sizes
7269
@@ -94,7 +91,9 @@ def generate(start_text, model, tokenizer, num_steps=20, temp=1.):
9491
9592# inspect current model
9693# print(model)
97- print (sum (p .numel () for p in model .parameters () if p .requires_grad ))
94+ layer = model .lm_head
95+ print (layer .weight .shape )
96+ print (sum (p .numel () for p in layer .parameters () if p .requires_grad ))
9897
9998# # freeze all parameter gradients
10099for param in model .parameters ():
@@ -150,7 +149,8 @@ def replace_linear_with_lora(module):
150149
151150replace_linear_with_lora (model )
152151
153- print (sum (p .numel () for p in model .parameters () if p .requires_grad ))
152+ layer = model .lm_head
153+ print (sum (p .numel () for p in layer .parameters () if p .requires_grad ))
154154
155155# inspect new model
156156# print(model)
@@ -169,6 +169,7 @@ def replace_linear_with_lora(module):
169169
170170model = model .to ("cuda" )
171171
172+
172173for epoch in range (num_epochs ):
173174 total_loss = 0
174175 num_batches = 0
@@ -212,5 +213,5 @@ def replace_linear_with_lora(module):
212213
213214# add to spider plot
214215# benchmark_data = {"350M-Model": category_accs_350m, "1300M-Model": category_accs_1300m, "1300M-Model-Finetuned": category_accs_1300m_ft, "2700M-Model": category_accs_2700m}
215- benchmark_data = {"350M-Model" : category_accs_1300m , "350M-Model-Finetuned" : category_accs_1300m_ft }
216- make_spider_plot (benchmark_data )
216+ # benchmark_data = {"350M-Model": category_accs_1300m, "350M-Model-Finetuned": category_accs_1300m_ft}
217+ # make_spider_plot(benchmark_data)
0 commit comments