Commit 90c0fdfe authored by Park, Se's avatar Park, Se

Delete main.py

parent 2b770646
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
from model import QualityEstimation
from dataloader import Data, LoadData
from pathlib import Path
def set_seed(seed=123):
torch.manual_seed(seed)
torch.cuda.manual_seed(seed)
np.random.seed(seed)
def evaluate(model, loss_fn, dataloader, device):
model.eval()
eval_loss = 0
pred, ref = np.array([]), np.array([])
count = 0
with torch.no_grad():
for token_ids, segment_ids, attn_masks, labels in dataloader:
token_ids, segment_ids, attn_masks, labels = token_ids.to(device), segment_ids.to(device), attn_masks.to(device), labels.to(device)
qe_scores = model(token_ids, segment_ids, attn_masks)
loss = loss_fn(qe_scores.view(-1), labels.view(-1))
qe_scores = qe_scores.detach().cpu().numpy()
qe_scores = qe_scores.reshape((qe_scores.shape[0],))
labels = labels.to('cpu').numpy()
pred = np.concatenate((pred, qe_scores))
ref = np.concatenate((ref, labels))
# print (f'pred: {pred}')
# print (f'ref: {ref}')
eval_loss += loss.item()
count += 1
eval_loss = eval_loss / count
pearson = np.corrcoef(pred, ref)[0, 1]
return eval_loss, pearson
def train(model, loss_fn, optimizer, train_loader, val_loader, num_epoch, device):
best_pearson = -float('inf')
for ep in range(num_epoch):
print('======= Epoch {:} ======='.format(ep))
for it, (token_ids, segment_ids, attn_masks, labels) in enumerate(train_loader):
model.train()
# Clear gradients
optimizer.zero_grad()
# Converting these to cuda tensors
token_ids, segment_ids, attn_masks, labels = token_ids.to(device), segment_ids.to(device), attn_masks.to(device), labels.to(device)
# Obtaining scores from the model
qe_scores = model(token_ids, segment_ids, attn_masks)
# Computing loss
loss = loss_fn(qe_scores.view(-1), labels.view(-1))
# Backpropagating the gradients
loss.backward()
nn.utils.clip_grad_norm_(model.parameters(), 1.0)
# Optimization step
optimizer.step()
if it % 100 == 0 and not it == 0:
print("Iteration {} of epoch {} complete".format(it, ep))
rmse, pearson = evaluate(model, loss_fn, val_loader, device)
print("Epoch {} complete! RMSE: {}, Pearson: {}".format(ep, rmse, pearson))
if pearson > best_pearson:
print("Best Pearson improved from {} to {}, saving model...".format(best_pearson, pearson))
best_pearson = pearson
torch.save(model.state_dict(), '/vol/bitbucket/shp2918/nlp/modelNLP.pt')
if __name__ == "__main__":
PATH = Path("/vol/bitbucket/shp2918/nlp")
use_cuda = torch.cuda.is_available()
device = torch.device('cuda' if use_cuda else 'cpu')
print("Using GPU: {}".format(use_cuda))
set_seed()
model = QualityEstimation(hidden_dim=128)
model.cuda()
loss_fn = nn.MSELoss()
optimizer = optim.AdamW(model.parameters(), lr=2e-5)
MAX_LEN = 64
train_set = LoadData(src_file=PATH/'data/train.enzh.src', mt_file=PATH/'data/train.enzh.mt', score_file=PATH/'data/train.enzh.scores', maxlen=MAX_LEN)
val_set = LoadData(src_file=PATH/'data/dev.enzh.src', mt_file=PATH/'data/dev.enzh.mt', score_file=PATH/'data/dev.enzh.scores', maxlen=MAX_LEN)
train_loader = DataLoader(train_set, batch_size=32)
val_loader = DataLoader(val_set, batch_size=32)
num_epoch = 4
train(model, loss_fn, optimizer, train_loader, val_loader, num_epoch, device)
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment