Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
M
MetaRL
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Wiki
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Package registry
Model registry
Operate
Environments
Terraform modules
Monitor
Incidents
Service Desk
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
Wang, Mia
MetaRL
Commits
daf6d1ce
Commit
daf6d1ce
authored
3 years ago
by
John Carter
Browse files
Options
Downloads
Patches
Plain Diff
UCB1 RL algorithm added (JC)
parent
6adbdc53
No related branches found
No related tags found
No related merge requests found
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
MetaAugment/UCB1_JC.ipynb
+341
-0
341 additions, 0 deletions
MetaAugment/UCB1_JC.ipynb
with
341 additions
and
0 deletions
MetaAugment/UCB1_JC.ipynb
0 → 100644
+
341
−
0
View file @
daf6d1ce
{
"nbformat": 4,
"nbformat_minor": 0,
"metadata": {
"colab": {
"name": "UCB1.ipynb",
"provenance": [],
"collapsed_sections": []
},
"kernelspec": {
"name": "python3",
"display_name": "Python 3"
},
"language_info": {
"name": "python"
}
},
"cells": [
{
"cell_type": "code",
"source": [
"import numpy as np\n",
"import torch\n",
"torch.manual_seed(0)\n",
"import torch.nn as nn\n",
"import torch.nn.functional as F\n",
"import torch.optim as optim\n",
"import torch.utils.data as data_utils\n",
"import torchvision\n",
"import torchvision.datasets as datasets"
],
"metadata": {
"id": "U_ZJ2LqDiu_v"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"source": [
"class LeNet(nn.Module):\n",
" def __init__(self):\n",
" super().__init__()\n",
" self.conv1 = nn.Conv2d(1, 6, 5)\n",
" self.relu1 = nn.ReLU()\n",
" self.pool1 = nn.MaxPool2d(2)\n",
" self.conv2 = nn.Conv2d(6, 16, 5)\n",
" self.relu2 = nn.ReLU()\n",
" self.pool2 = nn.MaxPool2d(2)\n",
" self.fc1 = nn.Linear(256, 120)\n",
" self.relu3 = nn.ReLU()\n",
" self.fc2 = nn.Linear(120, 84)\n",
" self.relu4 = nn.ReLU()\n",
" self.fc3 = nn.Linear(84, 10)\n",
" self.relu5 = nn.ReLU()\n",
"\n",
" def forward(self, x):\n",
" y = self.conv1(x)\n",
" y = self.relu1(y)\n",
" y = self.pool1(y)\n",
" y = self.conv2(y)\n",
" y = self.relu2(y)\n",
" y = self.pool2(y)\n",
" y = y.view(y.shape[0], -1)\n",
" y = self.fc1(y)\n",
" y = self.relu3(y)\n",
" y = self.fc2(y)\n",
" y = self.relu4(y)\n",
" y = self.fc3(y)\n",
" y = self.relu5(y)\n",
" return y"
],
"metadata": {
"id": "4ksS_duLFADW"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"source": [
"\"\"\"Make toy dataset\"\"\"\n",
"\n",
"def create_toy(train_dataset, test_dataset, batch_size, n_samples):\n",
" # shuffle and take first n_samples %age of training dataset\n",
" shuffled_train_dataset = torch.utils.data.Subset(train_dataset, torch.randperm(len(train_dataset)).tolist())\n",
" indices_train = torch.arange(int(n_samples*len(train_dataset)))\n",
" reduced_train_dataset = data_utils.Subset(shuffled_train_dataset, indices_train)\n",
"\n",
" # shuffle and take first n_samples %age of test dataset\n",
" shuffled_test_dataset = torch.utils.data.Subset(test_dataset, torch.randperm(len(test_dataset)).tolist())\n",
" indices_test = torch.arange(int(n_samples*len(test_dataset)))\n",
" reduced_test_dataset = data_utils.Subset(shuffled_test_dataset, indices_test)\n",
"\n",
" train_loader = torch.utils.data.DataLoader(reduced_train_dataset, batch_size=batch_size)\n",
" test_loader = torch.utils.data.DataLoader(reduced_test_dataset, batch_size=batch_size)\n",
"\n",
" return train_loader, test_loader"
],
"metadata": {
"id": "xujQtvVWBgMH"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"source": [
"\"\"\"Randomly generate 10 policies\"\"\"\n",
"\"\"\"Each policy has 5 sub-policies\"\"\"\n",
"\"\"\"For each sub-policy, pick 2 transformations, 2 probabilities and 2 magnitudes\"\"\"\n",
"\n",
"def generate_policies(num_policies, num_sub_policies):\n",
" \n",
" policies = np.zeros([num_policies,num_sub_policies,6])\n",
"\n",
" # Policies array will be 10x5x6\n",
" for policy in range(num_policies):\n",
" for sub_policy in range(num_sub_policies):\n",
" # pick two sub_policy transformations (0=rotate, 1=shear, 2=scale)\n",
" policies[policy, sub_policy, 0] = np.random.randint(0,3)\n",
" policies[policy, sub_policy, 1] = np.random.randint(0,3)\n",
" while policies[policy, sub_policy, 0] == policies[policy, sub_policy, 1]:\n",
" policies[policy, sub_policy, 1] = np.random.randint(0,3)\n",
"\n",
" # pick probabilities\n",
" policies[policy, sub_policy, 2] = np.random.randint(0,11) / 10\n",
" policies[policy, sub_policy, 3] = np.random.randint(0,11) / 10\n",
"\n",
" # pick magnitudes\n",
" for transformation in range(2):\n",
" if policies[policy, sub_policy, transformation] <= 1:\n",
" policies[policy, sub_policy, transformation + 4] = np.random.randint(-4,5)*5\n",
" elif policies[policy, sub_policy, transformation] == 2:\n",
" policies[policy, sub_policy, transformation + 4] = np.random.randint(5,15)/10\n",
"\n",
" return policies"
],
"metadata": {
"id": "Iql-c88jGGWy"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"source": [
"\"\"\"Pick policy and sub-policy\"\"\"\n",
"\"\"\"Each row of data should have a different sub-policy but for now, this will do\"\"\"\n",
"\n",
"def sample_sub_policy(policies, policy, num_sub_policies):\n",
" sub_policy = np.random.randint(0,num_sub_policies)\n",
"\n",
" degrees = 0\n",
" shear = 0\n",
" scale = 1\n",
"\n",
" if policies[policy, sub_policy][0] == 0:\n",
" if np.random.uniform() < policies[policy, sub_policy][2]:\n",
" degrees = policies[policy, sub_policy][4]\n",
" elif policies[policy, sub_policy][1] == 0:\n",
" if np.random.uniform() < policies[policy, sub_policy][3]:\n",
" degrees = policies[policy, sub_policy][5]\n",
"\n",
" if policies[policy, sub_policy][0] == 1:\n",
" if np.random.uniform() < policies[policy, sub_policy][2]:\n",
" shear = policies[policy, sub_policy][4]\n",
" elif policies[policy, sub_policy][1] == 1:\n",
" if np.random.uniform() < policies[policy, sub_policy][3]:\n",
" shear = policies[policy, sub_policy][5]\n",
"\n",
" if policies[policy, sub_policy][0] == 2:\n",
" if np.random.uniform() < policies[policy, sub_policy][2]:\n",
" scale = policies[policy, sub_policy][4]\n",
" elif policies[policy, sub_policy][1] == 2:\n",
" if np.random.uniform() < policies[policy, sub_policy][3]:\n",
" scale = policies[policy, sub_policy][5]\n",
"\n",
" return degrees, shear, scale"
],
"metadata": {
"id": "QE2VWI8o731X"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "vu_4I4qkbx73"
},
"outputs": [],
"source": [
"\"\"\"Sample policy, open and apply above transformations\"\"\"\n",
"def run_UCB1(q_values, cnts, total_count, q_plus_cnt, policies, num_policies, num_sub_policies, initial_iteration, batch_size, toy_size, iterations):\n",
"\n",
" #Pull each bandit arm just once\n",
" if initial_iteration:\n",
" iterations = num_policies\n",
"\n",
" for policy in range(iterations):\n",
" # sample policy and get transformations\n",
" if not initial_iteration:\n",
" this_policy = np.argmax(q_plus_cnt)\n",
" else:\n",
" this_policy = policy\n",
"\n",
" degrees, shear, scale = sample_sub_policy(policies, this_policy, num_sub_policies)\n",
"\n",
" # create transformations\n",
" transform = torchvision.transforms.Compose(\n",
" [torchvision.transforms.RandomAffine(degrees=(degrees,degrees), shear=(shear,shear), scale=(scale,scale)),\n",
" torchvision.transforms.ToTensor()])\n",
"\n",
" # open data and apply these transformations\n",
" train_dataset = datasets.MNIST(root='./MetaAugment/train', train=True, download=True, transform=transform)\n",
" test_dataset = datasets.MNIST(root='./MetaAugment/test', train=False, download=True, transform=transform)\n",
"\n",
"\n",
" \"\"\"Make toy dataset\"\"\"\n",
" train_loader, test_loader = create_toy(train_dataset, test_dataset, batch_size, toy_size)\n",
"\n",
"\n",
" \"\"\" Run model\"\"\"\n",
" model = LeNet()\n",
" sgd = optim.SGD(model.parameters(), lr=1e-1)\n",
" cost = nn.CrossEntropyLoss()\n",
"\n",
" best_acc = 0\n",
" early_stop_cnt = 0\n",
"\n",
" # choose how many past best validation accuracy we go\n",
" early_stop_num = 10\n",
"\n",
" # choose max number of epochs\n",
" epoch = 100\n",
"\n",
" for _epoch in range(epoch):\n",
" model.train()\n",
" for idx, (train_x, train_label) in enumerate(train_loader):\n",
" label_np = np.zeros((train_label.shape[0], 10))\n",
" sgd.zero_grad()\n",
" predict_y = model(train_x.float())\n",
" loss = cost(predict_y, train_label.long())\n",
" loss.backward()\n",
" sgd.step()\n",
"\n",
" correct = 0\n",
" _sum = 0\n",
" model.eval()\n",
" for idx, (test_x, test_label) in enumerate(test_loader):\n",
" predict_y = model(test_x.float()).detach()\n",
" predict_ys = np.argmax(predict_y, axis=-1)\n",
" label_np = test_label.numpy()\n",
" _ = predict_ys == test_label\n",
" correct += np.sum(_.numpy(), axis=-1)\n",
" _sum += _.shape[0]\n",
" \n",
" acc = correct / _sum\n",
" if acc > best_acc :\n",
" best_acc = acc\n",
" early_stop_cnt = 0\n",
" else:\n",
" early_stop_cnt += 1\n",
"\n",
" # exit if validation gets worse for \n",
" if early_stop_cnt >= early_stop_num:\n",
" break\n",
"\n",
" # update q_values\n",
" if initial_iteration:\n",
" q_values[this_policy] += best_acc\n",
" else:\n",
" q_values[this_policy] = (q_values[this_policy]*cnts[this_policy] + best_acc) / (cnts[this_policy] + 1)\n",
"\n",
" # update counts\n",
" cnts[this_policy] += 1\n",
" total_count += 1\n",
"\n",
" # update q_plus_cnt values\n",
" if not initial_iteration:\n",
" for i in range(num_policies):\n",
" q_plus_cnt[i] = q_values[i] + np.sqrt(2*np.log(total_count)/cnts[i])\n",
"\n",
" #print(q_values)\n",
"\n",
" if initial_iteration:\n",
" for i in range(num_policies):\n",
" q_plus_cnt[i] = q_values[i] + np.sqrt(2*np.log(total_count)/cnts[i])\n",
"\n",
" return q_values, cnts, total_count, q_plus_cnt"
]
},
{
"cell_type": "code",
"source": [
"%%time\n",
"\n",
"batch_size = 32\n",
"toy_size = 0.02\n",
"total_iterations = 50\n",
"\n",
"num_policies = 10\n",
"num_sub_policies = 5\n",
"policies = generate_policies(num_policies, num_sub_policies)\n",
"\n",
"#Initialize vector weights, counts and regret\n",
"q_values = [0]*num_policies\n",
"cnts = [0]*num_policies\n",
"q_plus_cnt = [0]*num_policies\n",
"total_count = 0\n",
"\n",
"q_values, cnts, total_count, q_plus_cnt = run_UCB1(q_values, cnts, total_count, q_plus_cnt, policies, num_policies, num_sub_policies, True, batch_size, toy_size, 0)\n",
"print(q_values)\n",
"q_values, cnts, total_count, q_plus_cnt = run_UCB1(q_values, cnts, total_count, q_plus_cnt, policies, num_policies, num_sub_policies, False, batch_size, toy_size , total_iterations)\n",
"print(q_values)"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "doHUtJ_tEiA6",
"outputId": "0f25a17b-aab2-4d59-ecea-2e36c7bd5592"
},
"execution_count": null,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"[0.81, 0.94, 0.835, 0.94, 0.775, 0.78, 0.96, 0.935, 0.97, 0.76]\n",
"[0.722, 0.8578571428571429, 0.7966666666666665, 0.8950000000000001, 0.7766666666666667, 0.8558333333333333, 0.8383333333333334, 0.688, 0.8041666666666666, 0.8766666666666668]\n",
"CPU times: user 14min 46s, sys: 10.9 s, total: 14min 57s\n",
"Wall time: 14min 58s\n"
]
}
]
}
]
}
\ No newline at end of file
%% Cell type:code id: tags:
```
import numpy as np
import torch
torch.manual_seed(0)
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torch.utils.data as data_utils
import torchvision
import torchvision.datasets as datasets
```
%% Cell type:code id: tags:
```
class LeNet(nn.Module):
def __init__(self):
super().__init__()
self.conv1 = nn.Conv2d(1, 6, 5)
self.relu1 = nn.ReLU()
self.pool1 = nn.MaxPool2d(2)
self.conv2 = nn.Conv2d(6, 16, 5)
self.relu2 = nn.ReLU()
self.pool2 = nn.MaxPool2d(2)
self.fc1 = nn.Linear(256, 120)
self.relu3 = nn.ReLU()
self.fc2 = nn.Linear(120, 84)
self.relu4 = nn.ReLU()
self.fc3 = nn.Linear(84, 10)
self.relu5 = nn.ReLU()
def forward(self, x):
y = self.conv1(x)
y = self.relu1(y)
y = self.pool1(y)
y = self.conv2(y)
y = self.relu2(y)
y = self.pool2(y)
y = y.view(y.shape[0], -1)
y = self.fc1(y)
y = self.relu3(y)
y = self.fc2(y)
y = self.relu4(y)
y = self.fc3(y)
y = self.relu5(y)
return y
```
%% Cell type:code id: tags:
```
"""Make toy dataset"""
def create_toy(train_dataset, test_dataset, batch_size, n_samples):
# shuffle and take first n_samples %age of training dataset
shuffled_train_dataset = torch.utils.data.Subset(train_dataset, torch.randperm(len(train_dataset)).tolist())
indices_train = torch.arange(int(n_samples*len(train_dataset)))
reduced_train_dataset = data_utils.Subset(shuffled_train_dataset, indices_train)
# shuffle and take first n_samples %age of test dataset
shuffled_test_dataset = torch.utils.data.Subset(test_dataset, torch.randperm(len(test_dataset)).tolist())
indices_test = torch.arange(int(n_samples*len(test_dataset)))
reduced_test_dataset = data_utils.Subset(shuffled_test_dataset, indices_test)
train_loader = torch.utils.data.DataLoader(reduced_train_dataset, batch_size=batch_size)
test_loader = torch.utils.data.DataLoader(reduced_test_dataset, batch_size=batch_size)
return train_loader, test_loader
```
%% Cell type:code id: tags:
```
"""Randomly generate 10 policies"""
"""Each policy has 5 sub-policies"""
"""For each sub-policy, pick 2 transformations, 2 probabilities and 2 magnitudes"""
def generate_policies(num_policies, num_sub_policies):
policies = np.zeros([num_policies,num_sub_policies,6])
# Policies array will be 10x5x6
for policy in range(num_policies):
for sub_policy in range(num_sub_policies):
# pick two sub_policy transformations (0=rotate, 1=shear, 2=scale)
policies[policy, sub_policy, 0] = np.random.randint(0,3)
policies[policy, sub_policy, 1] = np.random.randint(0,3)
while policies[policy, sub_policy, 0] == policies[policy, sub_policy, 1]:
policies[policy, sub_policy, 1] = np.random.randint(0,3)
# pick probabilities
policies[policy, sub_policy, 2] = np.random.randint(0,11) / 10
policies[policy, sub_policy, 3] = np.random.randint(0,11) / 10
# pick magnitudes
for transformation in range(2):
if policies[policy, sub_policy, transformation] <= 1:
policies[policy, sub_policy, transformation + 4] = np.random.randint(-4,5)*5
elif policies[policy, sub_policy, transformation] == 2:
policies[policy, sub_policy, transformation + 4] = np.random.randint(5,15)/10
return policies
```
%% Cell type:code id: tags:
```
"""Pick policy and sub-policy"""
"""Each row of data should have a different sub-policy but for now, this will do"""
def sample_sub_policy(policies, policy, num_sub_policies):
sub_policy = np.random.randint(0,num_sub_policies)
degrees = 0
shear = 0
scale = 1
if policies[policy, sub_policy][0] == 0:
if np.random.uniform() < policies[policy, sub_policy][2]:
degrees = policies[policy, sub_policy][4]
elif policies[policy, sub_policy][1] == 0:
if np.random.uniform() < policies[policy, sub_policy][3]:
degrees = policies[policy, sub_policy][5]
if policies[policy, sub_policy][0] == 1:
if np.random.uniform() < policies[policy, sub_policy][2]:
shear = policies[policy, sub_policy][4]
elif policies[policy, sub_policy][1] == 1:
if np.random.uniform() < policies[policy, sub_policy][3]:
shear = policies[policy, sub_policy][5]
if policies[policy, sub_policy][0] == 2:
if np.random.uniform() < policies[policy, sub_policy][2]:
scale = policies[policy, sub_policy][4]
elif policies[policy, sub_policy][1] == 2:
if np.random.uniform() < policies[policy, sub_policy][3]:
scale = policies[policy, sub_policy][5]
return degrees, shear, scale
```
%% Cell type:code id: tags:
```
"""Sample policy, open and apply above transformations"""
def run_UCB1(q_values, cnts, total_count, q_plus_cnt, policies, num_policies, num_sub_policies, initial_iteration, batch_size, toy_size, iterations):
#Pull each bandit arm just once
if initial_iteration:
iterations = num_policies
for policy in range(iterations):
# sample policy and get transformations
if not initial_iteration:
this_policy = np.argmax(q_plus_cnt)
else:
this_policy = policy
degrees, shear, scale = sample_sub_policy(policies, this_policy, num_sub_policies)
# create transformations
transform = torchvision.transforms.Compose(
[torchvision.transforms.RandomAffine(degrees=(degrees,degrees), shear=(shear,shear), scale=(scale,scale)),
torchvision.transforms.ToTensor()])
# open data and apply these transformations
train_dataset = datasets.MNIST(root='./MetaAugment/train', train=True, download=True, transform=transform)
test_dataset = datasets.MNIST(root='./MetaAugment/test', train=False, download=True, transform=transform)
"""Make toy dataset"""
train_loader, test_loader = create_toy(train_dataset, test_dataset, batch_size, toy_size)
""" Run model"""
model = LeNet()
sgd = optim.SGD(model.parameters(), lr=1e-1)
cost = nn.CrossEntropyLoss()
best_acc = 0
early_stop_cnt = 0
# choose how many past best validation accuracy we go
early_stop_num = 10
# choose max number of epochs
epoch = 100
for _epoch in range(epoch):
model.train()
for idx, (train_x, train_label) in enumerate(train_loader):
label_np = np.zeros((train_label.shape[0], 10))
sgd.zero_grad()
predict_y = model(train_x.float())
loss = cost(predict_y, train_label.long())
loss.backward()
sgd.step()
correct = 0
_sum = 0
model.eval()
for idx, (test_x, test_label) in enumerate(test_loader):
predict_y = model(test_x.float()).detach()
predict_ys = np.argmax(predict_y, axis=-1)
label_np = test_label.numpy()
_ = predict_ys == test_label
correct += np.sum(_.numpy(), axis=-1)
_sum += _.shape[0]
acc = correct / _sum
if acc > best_acc :
best_acc = acc
early_stop_cnt = 0
else:
early_stop_cnt += 1
# exit if validation gets worse for
if early_stop_cnt >= early_stop_num:
break
# update q_values
if initial_iteration:
q_values[this_policy] += best_acc
else:
q_values[this_policy] = (q_values[this_policy]*cnts[this_policy] + best_acc) / (cnts[this_policy] + 1)
# update counts
cnts[this_policy] += 1
total_count += 1
# update q_plus_cnt values
if not initial_iteration:
for i in range(num_policies):
q_plus_cnt[i] = q_values[i] + np.sqrt(2*np.log(total_count)/cnts[i])
#print(q_values)
if initial_iteration:
for i in range(num_policies):
q_plus_cnt[i] = q_values[i] + np.sqrt(2*np.log(total_count)/cnts[i])
return q_values, cnts, total_count, q_plus_cnt
```
%% Cell type:code id: tags:
```
%%time
batch_size = 32
toy_size = 0.02
total_iterations = 50
num_policies = 10
num_sub_policies = 5
policies = generate_policies(num_policies, num_sub_policies)
#Initialize vector weights, counts and regret
q_values = [0]*num_policies
cnts = [0]*num_policies
q_plus_cnt = [0]*num_policies
total_count = 0
q_values, cnts, total_count, q_plus_cnt = run_UCB1(q_values, cnts, total_count, q_plus_cnt, policies, num_policies, num_sub_policies, True, batch_size, toy_size, 0)
print(q_values)
q_values, cnts, total_count, q_plus_cnt = run_UCB1(q_values, cnts, total_count, q_plus_cnt, policies, num_policies, num_sub_policies, False, batch_size, toy_size , total_iterations)
print(q_values)
```
%% Output
[0.81, 0.94, 0.835, 0.94, 0.775, 0.78, 0.96, 0.935, 0.97, 0.76]
[0.722, 0.8578571428571429, 0.7966666666666665, 0.8950000000000001, 0.7766666666666667, 0.8558333333333333, 0.8383333333333334, 0.688, 0.8041666666666666, 0.8766666666666668]
CPU times: user 14min 46s, sys: 10.9 s, total: 14min 57s
Wall time: 14min 58s
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment