我对机器学习真的很陌生,一般来说我不太精通编码。但是,需要查看我们商店的客户反馈,平均每年很多,但我们无法确定正面、负面和中立的百分比。
目前我正在尝试训练一个 Bert 模型来进行简单的多标签情感分析。输入是我们商店的客户反馈。客户的反馈并不总是那么明确,因为客户确实倾向于长篇大论地讲述他们的经历,而他们的情绪并不总是那么清楚。但是我们设法得到了正、负和中性,每组 2247 个样本。
但是当我尝试训练它时,训练准确率约为 0.4%,这是非常低的。验证分数约为 60%。每个标签的 F1 分数约为 60%。我想知道可以做些什么来提高这种训练的准确性。我被困了一段时间。请查看我的代码并帮助我解决这个问题。
我尝试过改变学习率(尝试了所有 Bert 建议的学习率和 1e-5)、改变 Max LEN、改变 EPOCH 的数量、改变退出率(0.1、0.2、0.3、0.4、0.5),但到目前为止没有任何结果。
#read dataset
df = pd.read_csv("data.csv",header=None, names=['content', 'sentiment'], sep='\;', lineterminator='\r',encoding = "ISO-8859-1",engine="python")
from sklearn.utils import shuffle
df = shuffle(df)
df['sentiment'] = df['sentiment'].replace(to_replace = [-1, 0, 1], value = [0, 1, 2])
df.head()
#Load pretrained FinBert model and get bert tokenizer from it
PRE_TRAINED_MODEL_NAME = 'TurkuNLP/bert-base-finnish-cased-v1'
tokenizer = BertTokenizer.from_pretrained(PRE_TRAINED_MODEL_NAME)
#Choose sequence Length
token_lens = []
for txt in df.content:
tokens = tokenizer.encode(txt, max_length=512)
token_lens.append(len(tokens))
sns.distplot(token_lens)
plt.xlim([0, 256]);
plt.xlabel('Token count');
MAX_LEN = 260
#Make a PyTorch dataset
class FIDataset(Dataset):
def __init__(self, texts, targets, tokenizer, max_len):
self.texts = texts
self.targets = targets
self.tokenizer = tokenizer
self.max_len = max_len
def __len__(self):
return len(self.texts)
def __getitem__(self, item):
text = str(self.texts[item])
target = self.targets[item]
encoding = self.tokenizer.encode_plus(
text,
add_special_tokens=True,
max_length=self.max_len,
return_token_type_ids=False,
pad_to_max_length=True,
return_attention_mask=True,
return_tensors='pt',
)
return {
'text': text,
'input_ids': encoding['input_ids'].flatten(),
'attention_mask': encoding['attention_mask'].flatten(),
'targets': torch.tensor(target, dtype=torch.long)
}
#split test and train
df_train, df_test = train_test_split(
df,
test_size=0.1,
random_state=RANDOM_SEED
)
df_val, df_test = train_test_split(
df_test,
test_size=0.5,
random_state=RANDOM_SEED
)
df_train.shape, df_val.shape, df_test.shape
#data loader function
def create_data_loader(df, tokenizer, max_len, batch_size):
ds = FIDataset(
texts=df.content.to_numpy(),
targets=df.sentiment.to_numpy(),
tokenizer=tokenizer,
max_len=max_len
)
return DataLoader(
ds,
batch_size=batch_size,
num_workers=4
)
#Load data into train, test, val
BATCH_SIZE = 16
train_data_loader = create_data_loader(df_train, tokenizer, MAX_LEN, BATCH_SIZE)
val_data_loader = create_data_loader(df_val, tokenizer, MAX_LEN, BATCH_SIZE)
test_data_loader = create_data_loader(df_test, tokenizer, MAX_LEN, BATCH_SIZE)
# Sentiment Classifier based on Bert model just loaded
class SentimentClassifier(nn.Module):
def __init__(self, n_classes):
super(SentimentClassifier, self).__init__()
self.bert = BertModel.from_pretrained(PRE_TRAINED_MODEL_NAME)
self.drop = nn.Dropout(p=0.1)
self.out = nn.Linear(self.bert.config.hidden_size, n_classes)
def forward(self, input_ids, attention_mask):
returned = self.bert(
input_ids=input_ids,
attention_mask=attention_mask
)
pooled_output = returned["pooler_output"]
output = self.drop(pooled_output)
return self.out(output)
#Create a Classifier instance and move to GPU
model = SentimentClassifier(3)
model = model.to(device)
#Optimize with AdamW
EPOCHS = 5
optimizer = AdamW(model.parameters(), lr= 2e-5, correct_bias=False)
total_steps = len(train_data_loader) * EPOCHS
scheduler = get_linear_schedule_with_warmup(
optimizer,
num_warmup_steps=0,
num_training_steps=total_steps
)
loss_fn = nn.CrossEntropyLoss().to(device)
#Train each Epoch function
def train_epoch(
model,
data_loader,
loss_fn,
optimizer,
device,
scheduler,
n_examples
):
model = model.train()
losses = []
correct_predictions = 0
for d in data_loader:
input_ids = d["input_ids"].to(device)
attention_mask = d["attention_mask"].to(device)
targets = d["targets"].to(device)
outputs = model(
input_ids=input_ids,
attention_mask=attention_mask
)
_, preds = torch.max(outputs, dim=1)
loss = loss_fn(outputs, targets)
correct_predictions += torch.sum(preds == targets)
losses.append(loss.item())
loss.backward()
nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)
optimizer.step()
scheduler.step()
optimizer.zero_grad()
return correct_predictions.double() / n_examples, np.mean(losses)
#Eval model function
def eval_model(model, data_loader, loss_fn, device, n_examples):
model = model.eval()
losses = []
correct_predictions = 0
with torch.no_grad():
torch.cuda.empty_cache()
for d in data_loader:
input_ids = d["input_ids"].to(device)
attention_mask = d["attention_mask"].to(device)
targets = d["targets"].to(device)
outputs = model(
input_ids=input_ids,
attention_mask=attention_mask
)
_, preds = torch.max(outputs, dim=1)
loss = loss_fn(outputs, targets)
correct_predictions += torch.sum(preds == targets)
losses.append(loss.item())
return correct_predictions.double() / n_examples, np.mean(losses)
#training loop through each epochs
import torch
torch.cuda.empty_cache()
history = defaultdict(list)
best_accuracy = 0
if __name__ == '__main__':
for epoch in range(EPOCHS):
print(f'Epoch {epoch + 1}/{EPOCHS}')
print('-' * 10)
train_acc, train_loss = train_epoch(
model,
train_data_loader,
loss_fn,
optimizer,
device,
scheduler,
len(df_train)
)
print(f'Train loss {train_loss} accuracy {train_acc}')
val_acc, val_loss = eval_model(
model,
val_data_loader,
loss_fn,
device,
len(df_val)
)
print(f'Val loss {val_loss} accuracy {val_acc}')
print()
history['train_acc'].append(train_acc)
history['train_loss'].append(train_loss)
history['val_acc'].append(val_acc)
history['val_loss'].append(val_loss)
if val_acc > best_accuracy:
torch.save(model.state_dict(), 'best_model_state.bin')
best_accuracy = val_acc
-- 编辑:我已经打印出预测和目标以及训练和验证准确率
答案 0 :(得分:1)
这里_, preds = torch.max(outputs, dim=1)
,您可能想要argmax
,而不是max
?
打印出 preds
和 targets
以更好地了解发生了什么。
在打印出 preds 和目标后编辑。 对于 epochs 4 和 5,preds
与 targets
完全匹配,因此训练准确率应该为 1。我认为问题在于准确率除以n_examples
,即整个训练数据集中的示例数,而应该除以epoch中的示例数。