Commit 65fcce63 authored by Se Park's avatar Se Park

Initial commit

parents
# CO490 - NLP Course Labs (Spring 2020)
## Lab Notebooks
- **(16/01/2020) Lab 1:** Pre-processing and word representations [(Open in Colab)](https://colab.research.google.com/github/ImperialNLP/NLPLabs/blob/master/lab01/preprocessing_and_embeddings.ipynb)
- **(23/01/2020) Lab 2:** Text Classification: Sentiment Analysis [(Open in Colab)](https://colab.research.google.com/github/ImperialNLP/NLPLabs/blob/master/lab02/sentiment_classification.ipynb)
- **(30/01/2020) Lab 3:** Language Modelling
- Part I: N-gram modelling [(Open in Colab)](https://colab.research.google.com/github/ImperialNLP/NLPLabs/blob/master/lab03/ngram_lm.ipynb)
- Part II: Neural language models [(Open in Colab)](https://colab.research.google.com/github/ImperialNLP/NLPLabs/blob/master/lab03/neural_lm.ipynb)
- **(06/02/2020) Lab 4:** Part of Speech Tagging [(Open in Colab)](https://colab.research.google.com/github/ImperialNLP/NLPLabs/blob/master/lab04/POStagging.ipynb)
## Coursework
05/02/2020: A baseline model for the coursework has been [added](/coursework/baseline.ipynb) [(Open in Colab)](https://colab.research.google.com/github/ImperialNLP/NLPLabs/blob/master/coursework/baseline.ipynb)
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
from model import QualityEstimation
from dataloader import LoadData
from pathlib import Path
def set_seed(seed=123):
torch.manual_seed(seed)
torch.cuda.manual_seed(seed)
np.random.seed(seed)
def evaluate(model, loss_fn, dataloader, device):
model.eval()
eval_loss = 0
pred, ref = np.array([]), np.array([])
count = 0
with torch.no_grad():
for seq, attn_masks, labels in dataloader:
seq, attn_masks, labels = seq.to(device), attn_masks.to(device), labels.to(device)
qe_scores = model(seq, attn_masks)
loss = loss_fn(qe_scores, labels)
qe_scores = qe_scores.detach().cpu().numpy()
qe_scores = qe_scores.reshape((qe_scores.shape[0],))
labels = labels.to('cpu').numpy()
pred = np.concatenate((pred, qe_scores))
ref = np.concatenate((ref, labels))
eval_loss += loss.item()
count += 1
eval_loss = eval_loss / count
pearson = np.corrcoef(pred, ref)[0, 1]
return eval_loss, pearson
def train(model, loss_fn, optimizer, train_loader, val_loader, num_epoch, device):
best_acc = 0
for ep in range(num_epoch):
print('======= Epoch {:} ======='.format(ep))
for it, (seq, attn_masks, labels) in enumerate(train_loader):
# Clear gradients
optimizer.zero_grad()
# Converting these to cuda tensors
seq, attn_masks, labels = seq.to(device), attn_masks.to(device), labels.to(device)
# Obtaining scores from the model
qe_scores = model(seq, attn_masks)
# Computing loss
loss = loss_fn(qe_scores, labels)
# Backpropagating the gradients
loss.backward()
nn.utils.clip_grad_norm_(model.parameters(), 1.0)
# Optimization step
optimizer.step()
if it % 100 == 0 and not it == 0:
acc = get_accuracy_from_logits(logits, labels)
print("Iteration {} of epoch {} complete. Loss : {} Accuracy : {}".format(it, ep, loss.item(), acc))
val_acc = evaluate(model, loss_fn, val_loader, device)
print("Epoch {} complete! Validation Accuracy : {}".format(ep, val_acc))
if val_acc > best_acc:
print("Best validation accuracy improved from {} to {}, saving model...".format(best_acc, val_acc))
best_acc = val_acc
torch.save(model.state_dict(), '/vol/bitbucket/shp2918/modelNLP.pt')
if __name__ == "__main__":
PATH = Path("/vol/bitbucket/shp2918/nlp")
use_cuda = torch.cuda.is_available()
device = torch.device('cuda' if use_cuda else 'cpu')
print("Using GPU: {}".format(use_cuda))
set_seed()
model = QualityEstimation(hidden_dim=128)
model.cuda()
loss_fn = nn.MSELoss()
optimizer = optim.AdamW(model.parameters(), lr = 2e-5)
MAX_LEN = 64
st = time.time()
train_set = LoadData(filename=PATH/'data/train.csv', maxlen=MAX_LEN)
val_set = LoadData(filename=PATH/'data/valid.csv', maxlen=MAX_LEN)
train_loader = DataLoader(train_set, batch_size=32, num_workers=5)
val_loader = DataLoader(val_set, batch_size=32, num_workers=5)
num_epoch = 4
train(model, loss_fn, optimizer, train_loader, val_loader, num_epoch, device)
import torch
import torch.nn as nn
from transformers import BertModel, BertConfig
class QualityEstimation(nn.Module):
def __init__(self, hidden_dim):
super(BertForQualityEstimation, self).__init__()
self.hidden_dim = hidden_dim
# Instantiating BERT model object
config = BertConfig()
self.bert = BertModel(config)
self.dropout = nn.Dropout(0.25)
# LSTM and classification layers
self.lstm = nn.LSTM(input_size=config.hidden_size,hidden_size=self.hidden_dim,
num_layers=1,batch_first=True,
dropout=0,bidirectional=False)
self.fc1 = nn.Linear(self.hidden_dim, self.hidden_dim)
self.fc2 = nn.Linear(self.hidden_dim, 1)
self.loss = nn.MSELoss()
def forward(self, input_ids, token_type_ids=None, attention_mask=None, labels=None):
# Feeding the input to BERT model to obtain contextualized representations
flat_input_ids = input_ids.view(-1, input_ids.size(-1))
flat_token_type_ids = token_type_ids.view(-1, token_type_ids.size(-1))
flat_attention_mask = attention_mask.view(-1, attention_mask.size(-1))
encoded_layers, _ = self.bert(flat_input_ids, flat_token_type_ids, flat_attention_mask, output_all_encoded_layers=False)
encoded_layers = self.dropout(encoded_layers)
output, _ = self.lstm(encoded_layers)
output = torch.tanh(self.fc1(output[:,-1,:]))
qe_scores = torch.sigmoid(self.fc2(output))
return qe_scores
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment