UCB1 tidy up code

9895ec58 · John Carter · daf6d1ce · 9895ec58
Commit 9895ec58 authored 3 years ago by John Carter
--- a/MetaAugment/UCB1_JC.ipynb
+++ b/MetaAugment/UCB1_JC.ipynb
@@ -32,12 +32,13 @@
      "metadata": {
        "id": "U_ZJ2LqDiu_v"
      },
-      "execution_count": null,
+      "execution_count": 1,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
+        "\"\"\"Define internal NN module that trains on the dataset\"\"\"\n",
        "class LeNet(nn.Module):\n",
        "    def __init__(self):\n",
        "        super().__init__()\n",
@@ -73,7 +74,7 @@
      "metadata": {
        "id": "4ksS_duLFADW"
      },
-      "execution_count": null,
+      "execution_count": 2,
      "outputs": []
    },
    {
@@ -92,6 +93,7 @@
        "    indices_test = torch.arange(int(n_samples*len(test_dataset)))\n",
        "    reduced_test_dataset = data_utils.Subset(shuffled_test_dataset, indices_test)\n",
        "\n",
+        "    # push into DataLoader\n",
        "    train_loader = torch.utils.data.DataLoader(reduced_train_dataset, batch_size=batch_size)\n",
        "    test_loader = torch.utils.data.DataLoader(reduced_test_dataset, batch_size=batch_size)\n",
        "\n",
@@ -100,7 +102,7 @@
      "metadata": {
        "id": "xujQtvVWBgMH"
      },
-      "execution_count": null,
+      "execution_count": 3,
      "outputs": []
    },
    {
@@ -139,7 +141,7 @@
      "metadata": {
        "id": "Iql-c88jGGWy"
      },
-      "execution_count": null,
+      "execution_count": 4,
      "outputs": []
    },
    {
@@ -155,6 +157,7 @@
        "    shear = 0\n",
        "    scale = 1\n",
        "\n",
+        "    # check for rotations\n",
        "    if policies[policy, sub_policy][0] == 0:\n",
        "        if np.random.uniform() < policies[policy, sub_policy][2]:\n",
        "            degrees = policies[policy, sub_policy][4]\n",
@@ -162,6 +165,7 @@
        "        if np.random.uniform() < policies[policy, sub_policy][3]:\n",
        "            degrees = policies[policy, sub_policy][5]\n",
        "\n",
+        "    # check for shears\n",
        "    if policies[policy, sub_policy][0] == 1:\n",
        "        if np.random.uniform() < policies[policy, sub_policy][2]:\n",
        "            shear = policies[policy, sub_policy][4]\n",
@@ -169,6 +173,7 @@
        "        if np.random.uniform() < policies[policy, sub_policy][3]:\n",
        "            shear = policies[policy, sub_policy][5]\n",
        "\n",
+        "    # check for scales\n",
        "    if policies[policy, sub_policy][0] == 2:\n",
        "        if np.random.uniform() < policies[policy, sub_policy][2]:\n",
        "            scale = policies[policy, sub_policy][4]\n",
@@ -181,34 +186,42 @@
      "metadata": {
        "id": "QE2VWI8o731X"
      },
-      "execution_count": null,
+      "execution_count": 5,
      "outputs": []
    },
    {
      "cell_type": "code",
-      "execution_count": null,
+      "execution_count": 6,
      "metadata": {
        "id": "vu_4I4qkbx73"
      },
      "outputs": [],
      "source": [
        "\"\"\"Sample policy, open and apply above transformations\"\"\"\n",
-        "def run_UCB1(q_values, cnts, total_count, q_plus_cnt, policies, num_policies, num_sub_policies, initial_iteration, batch_size, toy_size, iterations):\n",
+        "def run_UCB1(policies, batch_size, toy_size, max_epochs, early_stop_num, iterations):\n",
        "\n",
-        "    #Pull each bandit arm just once\n",
-        "    if initial_iteration:\n",
-        "        iterations = num_policies\n",
+        "    # get number of policies and sub-policies\n",
+        "    num_policies = len(policies)\n",
+        "    num_sub_policies = len(policies[0])\n",
+        "\n",
+        "    #Initialize vector weights, counts and regret\n",
+        "    q_values = [0]*num_policies\n",
+        "    cnts = [0]*num_policies\n",
+        "    q_plus_cnt = [0]*num_policies\n",
+        "    total_count = 0\n",
        "\n",
        "    for policy in range(iterations):\n",
-        "        # sample policy and get transformations\n",
-        "        if not initial_iteration:\n",
+        "\n",
+        "        # get the action to try (either initially in order or using best q_plus_cnt value)\n",
+        "        if policy >= num_policies:\n",
        "            this_policy = np.argmax(q_plus_cnt)\n",
        "        else:\n",
        "            this_policy = policy\n",
        "\n",
+        "        # get info of transformation for this sub-policy\n",
        "        degrees, shear, scale = sample_sub_policy(policies, this_policy, num_sub_policies)\n",
        "\n",
-        "        # create transformations\n",
+        "        # create transformations using above info\n",
        "        transform = torchvision.transforms.Compose(\n",
        "            [torchvision.transforms.RandomAffine(degrees=(degrees,degrees), shear=(shear,shear), scale=(scale,scale)),\n",
        "            torchvision.transforms.ToTensor()])\n",
@@ -217,26 +230,22 @@
        "        train_dataset = datasets.MNIST(root='./MetaAugment/train', train=True, download=True, transform=transform)\n",
        "        test_dataset = datasets.MNIST(root='./MetaAugment/test', train=False, download=True, transform=transform)\n",
        "\n",
-        "\n",
-        "        \"\"\"Make toy dataset\"\"\"\n",
+        "        # create toy dataset from above uploaded data\n",
        "        train_loader, test_loader = create_toy(train_dataset, test_dataset, batch_size, toy_size)\n",
        "\n",
-        "\n",
-        "        \"\"\" Run model\"\"\"\n",
+        "        # create model\n",
        "        model = LeNet()\n",
        "        sgd = optim.SGD(model.parameters(), lr=1e-1)\n",
        "        cost = nn.CrossEntropyLoss()\n",
        "\n",
+        "        # set variables for best validation accuracy and early stop count\n",
        "        best_acc = 0\n",
        "        early_stop_cnt = 0\n",
        "\n",
-        "        # choose how many past best validation accuracy we go\n",
-        "        early_stop_num = 10\n",
+        "        # train model and check validation accuracy each epoch\n",
+        "        for _epoch in range(max_epochs):\n",
        "\n",
-        "        # choose max number of epochs\n",
-        "        epoch = 100\n",
-        "\n",
-        "        for _epoch in range(epoch):\n",
+        "            # train model\n",
        "            model.train()\n",
        "            for idx, (train_x, train_label) in enumerate(train_loader):\n",
        "                label_np = np.zeros((train_label.shape[0], 10))\n",
@@ -246,6 +255,7 @@
        "                loss.backward()\n",
        "                sgd.step()\n",
        "\n",
+        "            # check validation accuracy on validation set\n",
        "            correct = 0\n",
        "            _sum = 0\n",
        "            model.eval()\n",
@@ -257,6 +267,7 @@
        "                correct += np.sum(_.numpy(), axis=-1)\n",
        "                _sum += _.shape[0]\n",
        "            \n",
+        "            # update best validation accuracy if it was higher, otherwise increase early stop count\n",
        "            acc = correct / _sum\n",
        "            if acc > best_acc :\n",
        "                best_acc = acc\n",
@@ -264,32 +275,28 @@
        "            else:\n",
        "                early_stop_cnt += 1\n",
        "\n",
-        "            # exit if validation gets worse for \n",
+        "            # exit if validation gets worse over 10 runs\n",
        "            if early_stop_cnt >= early_stop_num:\n",
        "                break\n",
        "\n",
        "        # update q_values\n",
-        "        if initial_iteration:\n",
+        "        if policy < num_policies:\n",
        "            q_values[this_policy] += best_acc\n",
        "        else:\n",
        "            q_values[this_policy] = (q_values[this_policy]*cnts[this_policy] + best_acc) / (cnts[this_policy] + 1)\n",
        "\n",
+        "        print(q_values)\n",
+        "\n",
        "        # update counts\n",
        "        cnts[this_policy] += 1\n",
        "        total_count += 1\n",
        "\n",
-        "        # update q_plus_cnt values\n",
-        "        if not initial_iteration:\n",
+        "        # update q_plus_cnt values every turn after the initial sweep through\n",
+        "        if policy >= num_policies - 1:\n",
        "            for i in range(num_policies):\n",
        "                q_plus_cnt[i] = q_values[i] + np.sqrt(2*np.log(total_count)/cnts[i])\n",
        "\n",
-        "        #print(q_values)\n",
-        "\n",
-        "    if initial_iteration:\n",
-        "        for i in range(num_policies):\n",
-        "            q_plus_cnt[i] = q_values[i] + np.sqrt(2*np.log(total_count)/cnts[i])\n",
-        "\n",
-        "    return q_values, cnts, total_count, q_plus_cnt"
+        "    return q_values"
      ]
    },
    {
@@ -297,31 +304,26 @@
      "source": [
        "%%time\n",
        "\n",
-        "batch_size = 32\n",
-        "toy_size = 0.02\n",
-        "total_iterations = 50\n",
+        "batch_size = 32       # size of batch inner NN is trained with\n",
+        "toy_size = 0.02       # total propeortion of training and test set we use\n",
+        "max_epochs = 100      # max number of epochs that is run if early stopping is not hit\n",
+        "early_stop_num = 10   # max number of worse validation scores before early stopping\n",
+        "iterations = 20       # total iterations, should be more than the number of policies\n",
        "\n",
+        "# generate policies and sub-policies\n",
        "num_policies = 10\n",
        "num_sub_policies = 5\n",
        "policies = generate_policies(num_policies, num_sub_policies)\n",
        "\n",
-        "#Initialize vector weights, counts and regret\n",
-        "q_values = [0]*num_policies\n",
-        "cnts = [0]*num_policies\n",
-        "q_plus_cnt = [0]*num_policies\n",
-        "total_count = 0\n",
-        "\n",
-        "q_values, cnts, total_count, q_plus_cnt = run_UCB1(q_values, cnts, total_count, q_plus_cnt, policies, num_policies, num_sub_policies, True, batch_size, toy_size, 0)\n",
-        "print(q_values)\n",
-        "q_values, cnts, total_count, q_plus_cnt = run_UCB1(q_values, cnts, total_count, q_plus_cnt, policies, num_policies, num_sub_policies, False, batch_size, toy_size , total_iterations)\n",
-        "print(q_values)"
+        "q_values = run_UCB1(policies, batch_size, toy_size, max_epochs, early_stop_num, iterations)\n",
+        "#print(q_values)"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "doHUtJ_tEiA6",
-        "outputId": "0f25a17b-aab2-4d59-ecea-2e36c7bd5592"
+        "outputId": "6735e812-f7be-4f8b-cec2-52a069f7731b"
      },
      "execution_count": null,
      "outputs": [
@@ -329,10 +331,8 @@
          "output_type": "stream",
          "name": "stdout",
          "text": [
-            "[0.81, 0.94, 0.835, 0.94, 0.775, 0.78, 0.96, 0.935, 0.97, 0.76]\n",
-            "[0.722, 0.8578571428571429, 0.7966666666666665, 0.8950000000000001, 0.7766666666666667, 0.8558333333333333, 0.8383333333333334, 0.688, 0.8041666666666666, 0.8766666666666668]\n",
-            "CPU times: user 14min 46s, sys: 10.9 s, total: 14min 57s\n",
-            "Wall time: 14min 58s\n"
+            "10\n",
+            "5\n"
          ]
        }
      ]

 %% Cell type:code id: tags:

 ``` 
 import numpy as np
 import torch
 torch.manual_seed(0)
 import torch.nn as nn
 import torch.nn.functional as F
 import torch.optim as optim
 import torch.utils.data as data_utils
 import torchvision
 import torchvision.datasets as datasets
 ```

 %% Cell type:code id: tags:

 ``` 
+"""Define internal NN module that trains on the dataset"""
 class LeNet(nn.Module):
    def __init__(self):
        super().__init__()
        self.conv1 = nn.Conv2d(1, 6, 5)
        self.relu1 = nn.ReLU()
        self.pool1 = nn.MaxPool2d(2)
        self.conv2 = nn.Conv2d(6, 16, 5)
        self.relu2 = nn.ReLU()
        self.pool2 = nn.MaxPool2d(2)
        self.fc1 = nn.Linear(256, 120)
        self.relu3 = nn.ReLU()
        self.fc2 = nn.Linear(120, 84)
        self.relu4 = nn.ReLU()
        self.fc3 = nn.Linear(84, 10)
        self.relu5 = nn.ReLU()

    def forward(self, x):
        y = self.conv1(x)
        y = self.relu1(y)
        y = self.pool1(y)
        y = self.conv2(y)
        y = self.relu2(y)
        y = self.pool2(y)
        y = y.view(y.shape[0], -1)
        y = self.fc1(y)
        y = self.relu3(y)
        y = self.fc2(y)
        y = self.relu4(y)
        y = self.fc3(y)
        y = self.relu5(y)
        return y
 ```

 %% Cell type:code id: tags:

 ``` 
 """Make toy dataset"""

 def create_toy(train_dataset, test_dataset, batch_size, n_samples):
    # shuffle and take first n_samples %age of training dataset
    shuffled_train_dataset = torch.utils.data.Subset(train_dataset, torch.randperm(len(train_dataset)).tolist())
    indices_train = torch.arange(int(n_samples*len(train_dataset)))
    reduced_train_dataset = data_utils.Subset(shuffled_train_dataset, indices_train)

    # shuffle and take first n_samples %age of test dataset
    shuffled_test_dataset = torch.utils.data.Subset(test_dataset, torch.randperm(len(test_dataset)).tolist())
    indices_test = torch.arange(int(n_samples*len(test_dataset)))
    reduced_test_dataset = data_utils.Subset(shuffled_test_dataset, indices_test)

+    # push into DataLoader
    train_loader = torch.utils.data.DataLoader(reduced_train_dataset, batch_size=batch_size)
    test_loader = torch.utils.data.DataLoader(reduced_test_dataset, batch_size=batch_size)

    return train_loader, test_loader
 ```

 %% Cell type:code id: tags:

 ``` 
 """Randomly generate 10 policies"""
 """Each policy has 5 sub-policies"""
 """For each sub-policy, pick 2 transformations, 2 probabilities and 2 magnitudes"""

 def generate_policies(num_policies, num_sub_policies):

    policies = np.zeros([num_policies,num_sub_policies,6])

    # Policies array will be 10x5x6
    for policy in range(num_policies):
        for sub_policy in range(num_sub_policies):
            # pick two sub_policy transformations (0=rotate, 1=shear, 2=scale)
            policies[policy, sub_policy, 0] = np.random.randint(0,3)
            policies[policy, sub_policy, 1] = np.random.randint(0,3)
            while policies[policy, sub_policy, 0] == policies[policy, sub_policy, 1]:
                policies[policy, sub_policy, 1] = np.random.randint(0,3)

            # pick probabilities
            policies[policy, sub_policy, 2] = np.random.randint(0,11) / 10
            policies[policy, sub_policy, 3] = np.random.randint(0,11) / 10

            # pick magnitudes
            for transformation in range(2):
                if policies[policy, sub_policy, transformation] <= 1:
                    policies[policy, sub_policy, transformation + 4] = np.random.randint(-4,5)*5
                elif policies[policy, sub_policy, transformation] == 2:
                    policies[policy, sub_policy, transformation + 4] = np.random.randint(5,15)/10

    return policies
 ```

 %% Cell type:code id: tags:

 ``` 
 """Pick policy and sub-policy"""
 """Each row of data should have a different sub-policy but for now, this will do"""

 def sample_sub_policy(policies, policy, num_sub_policies):
    sub_policy = np.random.randint(0,num_sub_policies)

    degrees = 0
    shear = 0
    scale = 1

+    # check for rotations
    if policies[policy, sub_policy][0] == 0:
        if np.random.uniform() < policies[policy, sub_policy][2]:
            degrees = policies[policy, sub_policy][4]
    elif policies[policy, sub_policy][1] == 0:
        if np.random.uniform() < policies[policy, sub_policy][3]:
            degrees = policies[policy, sub_policy][5]

+    # check for shears
    if policies[policy, sub_policy][0] == 1:
        if np.random.uniform() < policies[policy, sub_policy][2]:
            shear = policies[policy, sub_policy][4]
    elif policies[policy, sub_policy][1] == 1:
        if np.random.uniform() < policies[policy, sub_policy][3]:
            shear = policies[policy, sub_policy][5]

+    # check for scales
    if policies[policy, sub_policy][0] == 2:
        if np.random.uniform() < policies[policy, sub_policy][2]:
            scale = policies[policy, sub_policy][4]
    elif policies[policy, sub_policy][1] == 2:
        if np.random.uniform() < policies[policy, sub_policy][3]:
            scale = policies[policy, sub_policy][5]

    return degrees, shear, scale
 ```

 %% Cell type:code id: tags:

 ``` 
 """Sample policy, open and apply above transformations"""
-def run_UCB1(q_values, cnts, total_count, q_plus_cnt, policies, num_policies, num_sub_policies, initial_iteration, batch_size, toy_size, iterations):
+def run_UCB1(policies, batch_size, toy_size, max_epochs, early_stop_num, iterations):

-    #Pull each bandit arm just once
-    if initial_iteration:
-        iterations = num_policies
+    # get number of policies and sub-policies
+    num_policies = len(policies)
+    num_sub_policies = len(policies[0])
+
+    #Initialize vector weights, counts and regret
+    q_values = [0]*num_policies
+    cnts = [0]*num_policies
+    q_plus_cnt = [0]*num_policies
+    total_count = 0

    for policy in range(iterations):
-        # sample policy and get transformations
-        if not initial_iteration:
+
+        # get the action to try (either initially in order or using best q_plus_cnt value)
+        if policy >= num_policies:
            this_policy = np.argmax(q_plus_cnt)
        else:
            this_policy = policy

+        # get info of transformation for this sub-policy
        degrees, shear, scale = sample_sub_policy(policies, this_policy, num_sub_policies)

-        # create transformations
+        # create transformations using above info
        transform = torchvision.transforms.Compose(
            [torchvision.transforms.RandomAffine(degrees=(degrees,degrees), shear=(shear,shear), scale=(scale,scale)),
            torchvision.transforms.ToTensor()])

        # open data and apply these transformations
        train_dataset = datasets.MNIST(root='./MetaAugment/train', train=True, download=True, transform=transform)
        test_dataset = datasets.MNIST(root='./MetaAugment/test', train=False, download=True, transform=transform)

-
-        """Make toy dataset"""
+        # create toy dataset from above uploaded data
        train_loader, test_loader = create_toy(train_dataset, test_dataset, batch_size, toy_size)

-
-        """ Run model"""
+        # create model
        model = LeNet()
        sgd = optim.SGD(model.parameters(), lr=1e-1)
        cost = nn.CrossEntropyLoss()

+        # set variables for best validation accuracy and early stop count
        best_acc = 0
        early_stop_cnt = 0

-        # choose how many past best validation accuracy we go
-        early_stop_num = 10
-
-        # choose max number of epochs
-        epoch = 100
+        # train model and check validation accuracy each epoch
+        for _epoch in range(max_epochs):

-        for _epoch in range(epoch):
+            # train model
            model.train()
            for idx, (train_x, train_label) in enumerate(train_loader):
                label_np = np.zeros((train_label.shape[0], 10))
                sgd.zero_grad()
                predict_y = model(train_x.float())
                loss = cost(predict_y, train_label.long())
                loss.backward()
                sgd.step()

+            # check validation accuracy on validation set
            correct = 0
            _sum = 0
            model.eval()
            for idx, (test_x, test_label) in enumerate(test_loader):
                predict_y = model(test_x.float()).detach()
                predict_ys = np.argmax(predict_y, axis=-1)
                label_np = test_label.numpy()
                _ = predict_ys == test_label
                correct += np.sum(_.numpy(), axis=-1)
                _sum += _.shape[0]

+            # update best validation accuracy if it was higher, otherwise increase early stop count
            acc = correct / _sum
            if acc > best_acc :
                best_acc = acc
                early_stop_cnt = 0
            else:
                early_stop_cnt += 1

-            # exit if validation gets worse for
+            # exit if validation gets worse over 10 runs
            if early_stop_cnt >= early_stop_num:
                break

        # update q_values
-        if initial_iteration:
+        if policy < num_policies:
            q_values[this_policy] += best_acc
        else:
            q_values[this_policy] = (q_values[this_policy]*cnts[this_policy] + best_acc) / (cnts[this_policy] + 1)

+        print(q_values)
+
        # update counts
        cnts[this_policy] += 1
        total_count += 1

-        # update q_plus_cnt values
-        if not initial_iteration:
+        # update q_plus_cnt values every turn after the initial sweep through
+        if policy >= num_policies - 1:
            for i in range(num_policies):
                q_plus_cnt[i] = q_values[i] + np.sqrt(2*np.log(total_count)/cnts[i])

-        #print(q_values)
-
-    if initial_iteration:
-        for i in range(num_policies):
-            q_plus_cnt[i] = q_values[i] + np.sqrt(2*np.log(total_count)/cnts[i])
-
-    return q_values, cnts, total_count, q_plus_cnt
+    return q_values
 ```

 %% Cell type:code id: tags:

 ``` 
 %%time

-batch_size = 32
-toy_size = 0.02
-total_iterations = 50
+batch_size = 32       # size of batch inner NN is trained with
+toy_size = 0.02       # total propeortion of training and test set we use
+max_epochs = 100      # max number of epochs that is run if early stopping is not hit
+early_stop_num = 10   # max number of worse validation scores before early stopping
+iterations = 20       # total iterations, should be more than the number of policies

+# generate policies and sub-policies
 num_policies = 10
 num_sub_policies = 5
 policies = generate_policies(num_policies, num_sub_policies)

-#Initialize vector weights, counts and regret
-q_values = [0]*num_policies
-cnts = [0]*num_policies
-q_plus_cnt = [0]*num_policies
-total_count = 0
-
-q_values, cnts, total_count, q_plus_cnt = run_UCB1(q_values, cnts, total_count, q_plus_cnt, policies, num_policies, num_sub_policies, True, batch_size, toy_size, 0)
-print(q_values)
-q_values, cnts, total_count, q_plus_cnt = run_UCB1(q_values, cnts, total_count, q_plus_cnt, policies, num_policies, num_sub_policies, False, batch_size, toy_size , total_iterations)
-print(q_values)
+q_values = run_UCB1(policies, batch_size, toy_size, max_epochs, early_stop_num, iterations)
+#print(q_values)
 ```

 %% Output

-    [0.81, 0.94, 0.835, 0.94, 0.775, 0.78, 0.96, 0.935, 0.97, 0.76]
-    [0.722, 0.8578571428571429, 0.7966666666666665, 0.8950000000000001, 0.7766666666666667, 0.8558333333333333, 0.8383333333333334, 0.688, 0.8041666666666666, 0.8766666666666668]
-    CPU times: user 14min 46s, sys: 10.9 s, total: 14min 57s
-    Wall time: 14min 58s
+    10
+    5