Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
M
MetaRL
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Wiki
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Package registry
Model registry
Operate
Environments
Terraform modules
Monitor
Incidents
Service Desk
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
Wang, Mia
MetaRL
Commits
9895ec58
Commit
9895ec58
authored
3 years ago
by
John Carter
Browse files
Options
Downloads
Patches
Plain Diff
UCB1 tidy up code
parent
daf6d1ce
Branches
Branches containing commit
Tags
Tags containing commit
No related merge requests found
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
MetaAugment/UCB1_JC.ipynb
+52
-52
52 additions, 52 deletions
MetaAugment/UCB1_JC.ipynb
with
52 additions
and
52 deletions
MetaAugment/UCB1_JC.ipynb
+
52
−
52
View file @
9895ec58
...
...
@@ -32,12 +32,13 @@
"metadata": {
"id": "U_ZJ2LqDiu_v"
},
"execution_count":
null
,
"execution_count":
1
,
"outputs": []
},
{
"cell_type": "code",
"source": [
"\"\"\"Define internal NN module that trains on the dataset\"\"\"\n",
"class LeNet(nn.Module):\n",
" def __init__(self):\n",
" super().__init__()\n",
...
...
@@ -73,7 +74,7 @@
"metadata": {
"id": "4ksS_duLFADW"
},
"execution_count":
null
,
"execution_count":
2
,
"outputs": []
},
{
...
...
@@ -92,6 +93,7 @@
" indices_test = torch.arange(int(n_samples*len(test_dataset)))\n",
" reduced_test_dataset = data_utils.Subset(shuffled_test_dataset, indices_test)\n",
"\n",
" # push into DataLoader\n",
" train_loader = torch.utils.data.DataLoader(reduced_train_dataset, batch_size=batch_size)\n",
" test_loader = torch.utils.data.DataLoader(reduced_test_dataset, batch_size=batch_size)\n",
"\n",
...
...
@@ -100,7 +102,7 @@
"metadata": {
"id": "xujQtvVWBgMH"
},
"execution_count":
null
,
"execution_count":
3
,
"outputs": []
},
{
...
...
@@ -139,7 +141,7 @@
"metadata": {
"id": "Iql-c88jGGWy"
},
"execution_count":
null
,
"execution_count":
4
,
"outputs": []
},
{
...
...
@@ -155,6 +157,7 @@
" shear = 0\n",
" scale = 1\n",
"\n",
" # check for rotations\n",
" if policies[policy, sub_policy][0] == 0:\n",
" if np.random.uniform() < policies[policy, sub_policy][2]:\n",
" degrees = policies[policy, sub_policy][4]\n",
...
...
@@ -162,6 +165,7 @@
" if np.random.uniform() < policies[policy, sub_policy][3]:\n",
" degrees = policies[policy, sub_policy][5]\n",
"\n",
" # check for shears\n",
" if policies[policy, sub_policy][0] == 1:\n",
" if np.random.uniform() < policies[policy, sub_policy][2]:\n",
" shear = policies[policy, sub_policy][4]\n",
...
...
@@ -169,6 +173,7 @@
" if np.random.uniform() < policies[policy, sub_policy][3]:\n",
" shear = policies[policy, sub_policy][5]\n",
"\n",
" # check for scales\n",
" if policies[policy, sub_policy][0] == 2:\n",
" if np.random.uniform() < policies[policy, sub_policy][2]:\n",
" scale = policies[policy, sub_policy][4]\n",
...
...
@@ -181,34 +186,42 @@
"metadata": {
"id": "QE2VWI8o731X"
},
"execution_count":
null
,
"execution_count":
5
,
"outputs": []
},
{
"cell_type": "code",
"execution_count":
null
,
"execution_count":
6
,
"metadata": {
"id": "vu_4I4qkbx73"
},
"outputs": [],
"source": [
"\"\"\"Sample policy, open and apply above transformations\"\"\"\n",
"def run_UCB1(
q_values, cnts, total_count, q_plus_cnt, policies, num_policies, num_sub_policies, initial_iteration, batch_size, toy_size
, iterations):\n",
"def run_UCB1(
policies, batch_size, toy_size, max_epochs, early_stop_num
, iterations):\n",
"\n",
" #Pull each bandit arm just once\n",
" if initial_iteration:\n",
" iterations = num_policies\n",
" # get number of policies and sub-policies\n",
" num_policies = len(policies)\n",
" num_sub_policies = len(policies[0])\n",
"\n",
" #Initialize vector weights, counts and regret\n",
" q_values = [0]*num_policies\n",
" cnts = [0]*num_policies\n",
" q_plus_cnt = [0]*num_policies\n",
" total_count = 0\n",
"\n",
" for policy in range(iterations):\n",
" # sample policy and get transformations\n",
" if not initial_iteration:\n",
"\n",
" # get the action to try (either initially in order or using best q_plus_cnt value)\n",
" if policy >= num_policies:\n",
" this_policy = np.argmax(q_plus_cnt)\n",
" else:\n",
" this_policy = policy\n",
"\n",
" # get info of transformation for this sub-policy\n",
" degrees, shear, scale = sample_sub_policy(policies, this_policy, num_sub_policies)\n",
"\n",
" # create transformations\n",
" # create transformations
using above info
\n",
" transform = torchvision.transforms.Compose(\n",
" [torchvision.transforms.RandomAffine(degrees=(degrees,degrees), shear=(shear,shear), scale=(scale,scale)),\n",
" torchvision.transforms.ToTensor()])\n",
...
...
@@ -217,26 +230,22 @@
" train_dataset = datasets.MNIST(root='./MetaAugment/train', train=True, download=True, transform=transform)\n",
" test_dataset = datasets.MNIST(root='./MetaAugment/test', train=False, download=True, transform=transform)\n",
"\n",
"\n",
" \"\"\"Make toy dataset\"\"\"\n",
" # create toy dataset from above uploaded data\n",
" train_loader, test_loader = create_toy(train_dataset, test_dataset, batch_size, toy_size)\n",
"\n",
"\n",
" \"\"\" Run model\"\"\"\n",
" # create model\n",
" model = LeNet()\n",
" sgd = optim.SGD(model.parameters(), lr=1e-1)\n",
" cost = nn.CrossEntropyLoss()\n",
"\n",
" # set variables for best validation accuracy and early stop count\n",
" best_acc = 0\n",
" early_stop_cnt = 0\n",
"\n",
" #
choose how many past best
validation accuracy
we go
\n",
"
early_stop_num = 10
\n",
" #
train model and check
validation accuracy
each epoch
\n",
"
for _epoch in range(max_epochs):
\n",
"\n",
" # choose max number of epochs\n",
" epoch = 100\n",
"\n",
" for _epoch in range(epoch):\n",
" # train model\n",
" model.train()\n",
" for idx, (train_x, train_label) in enumerate(train_loader):\n",
" label_np = np.zeros((train_label.shape[0], 10))\n",
...
...
@@ -246,6 +255,7 @@
" loss.backward()\n",
" sgd.step()\n",
"\n",
" # check validation accuracy on validation set\n",
" correct = 0\n",
" _sum = 0\n",
" model.eval()\n",
...
...
@@ -257,6 +267,7 @@
" correct += np.sum(_.numpy(), axis=-1)\n",
" _sum += _.shape[0]\n",
" \n",
" # update best validation accuracy if it was higher, otherwise increase early stop count\n",
" acc = correct / _sum\n",
" if acc > best_acc :\n",
" best_acc = acc\n",
...
...
@@ -264,32 +275,28 @@
" else:\n",
" early_stop_cnt += 1\n",
"\n",
" # exit if validation gets worse
for
\n",
" # exit if validation gets worse
over 10 runs
\n",
" if early_stop_cnt >= early_stop_num:\n",
" break\n",
"\n",
" # update q_values\n",
" if
initial_iteration
:\n",
" if
policy < num_policies
:\n",
" q_values[this_policy] += best_acc\n",
" else:\n",
" q_values[this_policy] = (q_values[this_policy]*cnts[this_policy] + best_acc) / (cnts[this_policy] + 1)\n",
"\n",
" print(q_values)\n",
"\n",
" # update counts\n",
" cnts[this_policy] += 1\n",
" total_count += 1\n",
"\n",
" # update q_plus_cnt values\n",
" if
not initial_iteration
:\n",
" # update q_plus_cnt values
every turn after the initial sweep through
\n",
" if
policy >= num_policies - 1
:\n",
" for i in range(num_policies):\n",
" q_plus_cnt[i] = q_values[i] + np.sqrt(2*np.log(total_count)/cnts[i])\n",
"\n",
" #print(q_values)\n",
"\n",
" if initial_iteration:\n",
" for i in range(num_policies):\n",
" q_plus_cnt[i] = q_values[i] + np.sqrt(2*np.log(total_count)/cnts[i])\n",
"\n",
" return q_values, cnts, total_count, q_plus_cnt"
" return q_values"
]
},
{
...
...
@@ -297,31 +304,26 @@
"source": [
"%%time\n",
"\n",
"batch_size = 32\n",
"toy_size = 0.02\n",
"total_iterations = 50\n",
"batch_size = 32 # size of batch inner NN is trained with\n",
"toy_size = 0.02 # total propeortion of training and test set we use\n",
"max_epochs = 100 # max number of epochs that is run if early stopping is not hit\n",
"early_stop_num = 10 # max number of worse validation scores before early stopping\n",
"iterations = 20 # total iterations, should be more than the number of policies\n",
"\n",
"# generate policies and sub-policies\n",
"num_policies = 10\n",
"num_sub_policies = 5\n",
"policies = generate_policies(num_policies, num_sub_policies)\n",
"\n",
"#Initialize vector weights, counts and regret\n",
"q_values = [0]*num_policies\n",
"cnts = [0]*num_policies\n",
"q_plus_cnt = [0]*num_policies\n",
"total_count = 0\n",
"\n",
"q_values, cnts, total_count, q_plus_cnt = run_UCB1(q_values, cnts, total_count, q_plus_cnt, policies, num_policies, num_sub_policies, True, batch_size, toy_size, 0)\n",
"print(q_values)\n",
"q_values, cnts, total_count, q_plus_cnt = run_UCB1(q_values, cnts, total_count, q_plus_cnt, policies, num_policies, num_sub_policies, False, batch_size, toy_size , total_iterations)\n",
"print(q_values)"
"q_values = run_UCB1(policies, batch_size, toy_size, max_epochs, early_stop_num, iterations)\n",
"#print(q_values)"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "doHUtJ_tEiA6",
"outputId": "
0f25a17b-aab2-4d59-ecea-2e36c7bd5592
"
"outputId": "
6735e812-f7be-4f8b-cec2-52a069f7731b
"
},
"execution_count": null,
"outputs": [
...
...
@@ -329,10 +331,8 @@
"output_type": "stream",
"name": "stdout",
"text": [
"[0.81, 0.94, 0.835, 0.94, 0.775, 0.78, 0.96, 0.935, 0.97, 0.76]\n",
"[0.722, 0.8578571428571429, 0.7966666666666665, 0.8950000000000001, 0.7766666666666667, 0.8558333333333333, 0.8383333333333334, 0.688, 0.8041666666666666, 0.8766666666666668]\n",
"CPU times: user 14min 46s, sys: 10.9 s, total: 14min 57s\n",
"Wall time: 14min 58s\n"
"10\n",
"5\n"
]
}
]
...
...
%% Cell type:code id: tags:
```
import numpy as np
import torch
torch.manual_seed(0)
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torch.utils.data as data_utils
import torchvision
import torchvision.datasets as datasets
```
%% Cell type:code id: tags:
```
"""Define internal NN module that trains on the dataset"""
class LeNet(nn.Module):
def __init__(self):
super().__init__()
self.conv1 = nn.Conv2d(1, 6, 5)
self.relu1 = nn.ReLU()
self.pool1 = nn.MaxPool2d(2)
self.conv2 = nn.Conv2d(6, 16, 5)
self.relu2 = nn.ReLU()
self.pool2 = nn.MaxPool2d(2)
self.fc1 = nn.Linear(256, 120)
self.relu3 = nn.ReLU()
self.fc2 = nn.Linear(120, 84)
self.relu4 = nn.ReLU()
self.fc3 = nn.Linear(84, 10)
self.relu5 = nn.ReLU()
def forward(self, x):
y = self.conv1(x)
y = self.relu1(y)
y = self.pool1(y)
y = self.conv2(y)
y = self.relu2(y)
y = self.pool2(y)
y = y.view(y.shape[0], -1)
y = self.fc1(y)
y = self.relu3(y)
y = self.fc2(y)
y = self.relu4(y)
y = self.fc3(y)
y = self.relu5(y)
return y
```
%% Cell type:code id: tags:
```
"""Make toy dataset"""
def create_toy(train_dataset, test_dataset, batch_size, n_samples):
# shuffle and take first n_samples %age of training dataset
shuffled_train_dataset = torch.utils.data.Subset(train_dataset, torch.randperm(len(train_dataset)).tolist())
indices_train = torch.arange(int(n_samples*len(train_dataset)))
reduced_train_dataset = data_utils.Subset(shuffled_train_dataset, indices_train)
# shuffle and take first n_samples %age of test dataset
shuffled_test_dataset = torch.utils.data.Subset(test_dataset, torch.randperm(len(test_dataset)).tolist())
indices_test = torch.arange(int(n_samples*len(test_dataset)))
reduced_test_dataset = data_utils.Subset(shuffled_test_dataset, indices_test)
# push into DataLoader
train_loader = torch.utils.data.DataLoader(reduced_train_dataset, batch_size=batch_size)
test_loader = torch.utils.data.DataLoader(reduced_test_dataset, batch_size=batch_size)
return train_loader, test_loader
```
%% Cell type:code id: tags:
```
"""Randomly generate 10 policies"""
"""Each policy has 5 sub-policies"""
"""For each sub-policy, pick 2 transformations, 2 probabilities and 2 magnitudes"""
def generate_policies(num_policies, num_sub_policies):
policies = np.zeros([num_policies,num_sub_policies,6])
# Policies array will be 10x5x6
for policy in range(num_policies):
for sub_policy in range(num_sub_policies):
# pick two sub_policy transformations (0=rotate, 1=shear, 2=scale)
policies[policy, sub_policy, 0] = np.random.randint(0,3)
policies[policy, sub_policy, 1] = np.random.randint(0,3)
while policies[policy, sub_policy, 0] == policies[policy, sub_policy, 1]:
policies[policy, sub_policy, 1] = np.random.randint(0,3)
# pick probabilities
policies[policy, sub_policy, 2] = np.random.randint(0,11) / 10
policies[policy, sub_policy, 3] = np.random.randint(0,11) / 10
# pick magnitudes
for transformation in range(2):
if policies[policy, sub_policy, transformation] <= 1:
policies[policy, sub_policy, transformation + 4] = np.random.randint(-4,5)*5
elif policies[policy, sub_policy, transformation] == 2:
policies[policy, sub_policy, transformation + 4] = np.random.randint(5,15)/10
return policies
```
%% Cell type:code id: tags:
```
"""Pick policy and sub-policy"""
"""Each row of data should have a different sub-policy but for now, this will do"""
def sample_sub_policy(policies, policy, num_sub_policies):
sub_policy = np.random.randint(0,num_sub_policies)
degrees = 0
shear = 0
scale = 1
# check for rotations
if policies[policy, sub_policy][0] == 0:
if np.random.uniform() < policies[policy, sub_policy][2]:
degrees = policies[policy, sub_policy][4]
elif policies[policy, sub_policy][1] == 0:
if np.random.uniform() < policies[policy, sub_policy][3]:
degrees = policies[policy, sub_policy][5]
# check for shears
if policies[policy, sub_policy][0] == 1:
if np.random.uniform() < policies[policy, sub_policy][2]:
shear = policies[policy, sub_policy][4]
elif policies[policy, sub_policy][1] == 1:
if np.random.uniform() < policies[policy, sub_policy][3]:
shear = policies[policy, sub_policy][5]
# check for scales
if policies[policy, sub_policy][0] == 2:
if np.random.uniform() < policies[policy, sub_policy][2]:
scale = policies[policy, sub_policy][4]
elif policies[policy, sub_policy][1] == 2:
if np.random.uniform() < policies[policy, sub_policy][3]:
scale = policies[policy, sub_policy][5]
return degrees, shear, scale
```
%% Cell type:code id: tags:
```
"""Sample policy, open and apply above transformations"""
def run_UCB1(
q_values, cnts, total_count, q_plus_cnt, policies, num_policies, num_sub_policies, initial_iteration, batch_size, toy_size
, iterations):
def run_UCB1(
policies, batch_size, toy_size, max_epochs, early_stop_num
, iterations):
#Pull each bandit arm just once
if initial_iteration:
iterations = num_policies
# get number of policies and sub-policies
num_policies = len(policies)
num_sub_policies = len(policies[0])
#Initialize vector weights, counts and regret
q_values = [0]*num_policies
cnts = [0]*num_policies
q_plus_cnt = [0]*num_policies
total_count = 0
for policy in range(iterations):
# sample policy and get transformations
if not initial_iteration:
# get the action to try (either initially in order or using best q_plus_cnt value)
if policy >= num_policies:
this_policy = np.argmax(q_plus_cnt)
else:
this_policy = policy
# get info of transformation for this sub-policy
degrees, shear, scale = sample_sub_policy(policies, this_policy, num_sub_policies)
# create transformations
# create transformations
using above info
transform = torchvision.transforms.Compose(
[torchvision.transforms.RandomAffine(degrees=(degrees,degrees), shear=(shear,shear), scale=(scale,scale)),
torchvision.transforms.ToTensor()])
# open data and apply these transformations
train_dataset = datasets.MNIST(root='./MetaAugment/train', train=True, download=True, transform=transform)
test_dataset = datasets.MNIST(root='./MetaAugment/test', train=False, download=True, transform=transform)
"""Make toy dataset"""
# create toy dataset from above uploaded data
train_loader, test_loader = create_toy(train_dataset, test_dataset, batch_size, toy_size)
""" Run model"""
# create model
model = LeNet()
sgd = optim.SGD(model.parameters(), lr=1e-1)
cost = nn.CrossEntropyLoss()
# set variables for best validation accuracy and early stop count
best_acc = 0
early_stop_cnt = 0
# choose how many past best validation accuracy we go
early_stop_num = 10
# choose max number of epochs
epoch = 100
# train model and check validation accuracy each epoch
for _epoch in range(max_epochs):
for _epoch in range(epoch):
# train model
model.train()
for idx, (train_x, train_label) in enumerate(train_loader):
label_np = np.zeros((train_label.shape[0], 10))
sgd.zero_grad()
predict_y = model(train_x.float())
loss = cost(predict_y, train_label.long())
loss.backward()
sgd.step()
# check validation accuracy on validation set
correct = 0
_sum = 0
model.eval()
for idx, (test_x, test_label) in enumerate(test_loader):
predict_y = model(test_x.float()).detach()
predict_ys = np.argmax(predict_y, axis=-1)
label_np = test_label.numpy()
_ = predict_ys == test_label
correct += np.sum(_.numpy(), axis=-1)
_sum += _.shape[0]
# update best validation accuracy if it was higher, otherwise increase early stop count
acc = correct / _sum
if acc > best_acc :
best_acc = acc
early_stop_cnt = 0
else:
early_stop_cnt += 1
# exit if validation gets worse
for
# exit if validation gets worse
over 10 runs
if early_stop_cnt >= early_stop_num:
break
# update q_values
if
initial_iteration
:
if
policy < num_policies
:
q_values[this_policy] += best_acc
else:
q_values[this_policy] = (q_values[this_policy]*cnts[this_policy] + best_acc) / (cnts[this_policy] + 1)
print(q_values)
# update counts
cnts[this_policy] += 1
total_count += 1
# update q_plus_cnt values
if
not initial_iteration
:
# update q_plus_cnt values
every turn after the initial sweep through
if
policy >= num_policies - 1
:
for i in range(num_policies):
q_plus_cnt[i] = q_values[i] + np.sqrt(2*np.log(total_count)/cnts[i])
#print(q_values)
if initial_iteration:
for i in range(num_policies):
q_plus_cnt[i] = q_values[i] + np.sqrt(2*np.log(total_count)/cnts[i])
return q_values, cnts, total_count, q_plus_cnt
return q_values
```
%% Cell type:code id: tags:
```
%%time
batch_size = 32
toy_size = 0.02
total_iterations = 50
batch_size = 32 # size of batch inner NN is trained with
toy_size = 0.02 # total propeortion of training and test set we use
max_epochs = 100 # max number of epochs that is run if early stopping is not hit
early_stop_num = 10 # max number of worse validation scores before early stopping
iterations = 20 # total iterations, should be more than the number of policies
# generate policies and sub-policies
num_policies = 10
num_sub_policies = 5
policies = generate_policies(num_policies, num_sub_policies)
#Initialize vector weights, counts and regret
q_values = [0]*num_policies
cnts = [0]*num_policies
q_plus_cnt = [0]*num_policies
total_count = 0
q_values, cnts, total_count, q_plus_cnt = run_UCB1(q_values, cnts, total_count, q_plus_cnt, policies, num_policies, num_sub_policies, True, batch_size, toy_size, 0)
print(q_values)
q_values, cnts, total_count, q_plus_cnt = run_UCB1(q_values, cnts, total_count, q_plus_cnt, policies, num_policies, num_sub_policies, False, batch_size, toy_size , total_iterations)
print(q_values)
q_values = run_UCB1(policies, batch_size, toy_size, max_epochs, early_stop_num, iterations)
#print(q_values)
```
%% Output
[0.81, 0.94, 0.835, 0.94, 0.775, 0.78, 0.96, 0.935, 0.97, 0.76]
[0.722, 0.8578571428571429, 0.7966666666666665, 0.8950000000000001, 0.7766666666666667, 0.8558333333333333, 0.8383333333333334, 0.688, 0.8041666666666666, 0.8766666666666668]
CPU times: user 14min 46s, sys: 10.9 s, total: 14min 57s
Wall time: 14min 58s
10
5
...
...
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment