Update docstrings in /autoaugment_learners/

2d0583db · Sun Jin Kim · 63a7084c · 2d0583db · 2d0583db · 2d0583db
Commit 2d0583db authored 2 years ago by Sun Jin Kim
--- a/MetaAugment/autoaugment_learners/aa_learner.py
+++ b/MetaAugment/autoaugment_learners/aa_learner.py
@@ -35,7 +35,7 @@ augmentation_space = [

 class aa_learner:
    def __init__(self, sp_num=5, fun_num=14, p_bins=11, m_bins=10, discrete_p_m=False):
-        '''
+        """
        Args:
            spdim (int): number of subpolicies per policy
            fun_num (int): number of image functions in our search space
@@ -46,7 +46,8 @@ class aa_learner:
                                    magnitude as discrete variables as the out put of the 
                                    controller (A controller can be a neural network, genetic
                                    algorithm, etc.)
-        '''
+
+        """
        self.sp_num = sp_num
        self.fun_num = fun_num
        self.p_bins = p_bins
@@ -62,7 +63,7 @@ class aa_learner:


    def translate_operation_tensor(self, operation_tensor, return_log_prob=False, argmax=False):
-        '''
+        """
        takes in a tensor representing an operation and returns an actual operation which
        is in the form of:
            ("Invert", 0.8, None)
@@ -93,9 +94,16 @@ class aa_learner:
            operation (list of tuples):
                                An operation in the format that can be directly put into an
                                AutoAugment object.
-            log_prob
-                                
-        '''
+            log_prob (float):
+                            Used in reinforcement learning updates, such as proximal policy update
+                            in the gru_learner.
+                            Can only be used when self.discrete_p_m.
+                            We add the logged values of the indices of the image_function,
+                            probability, and magnitude chosen.
+                            This corresponds to multiplying the non-logged values, then logging
+                            it.                  
+        """
+
        if (not self.discrete_p_m) and return_log_prob:
            raise ValueError("You are not supposed to use return_log_prob=True when the agent's \
                            self.discrete_p_m is False!")
@@ -174,27 +182,55 @@ class aa_learner:
        

    def generate_new_policy(self):
-        '''
-        Generate a new random policy in the form of
-            [
-            (("Invert", 0.8, None), ("Contrast", 0.2, 6)),
-            (("Rotate", 0.7, 2), ("Invert", 0.8, None)),
-            (("Sharpness", 0.8, 1), ("Sharpness", 0.9, 3)),
-            (("ShearY", 0.5, 8), ("Invert", 0.7, None)),
-            ]
-        '''
+        """
+        Generate a new policy which can be fed into an AutoAugment object 
+        by calling:
+            AutoAugment.subpolicies = policy
+        
+        Args:
+            none
+        
+        Returns:
+            new_policy (list[tuple]):
+                        A new policy generated by the controller. It
+                        has the form of:
+                            [
+                            (("Invert", 0.8, None), ("Contrast", 0.2, 6)),
+                            (("Rotate", 0.7, 2), ("Invert", 0.8, None)),
+                            (("Sharpness", 0.8, 1), ("Sharpness", 0.9, 3)),
+                            (("ShearY", 0.5, 8), ("Invert", 0.7, None)),
+                            ]
+                        This object can be fed into an AutoAUgment object
+                        by calling: AutoAugment.subpolicies = policy
+        """
+
        raise NotImplementedError('generate_new_policy not implemented in aa_learner')


    def learn(self, train_dataset, test_dataset, child_network_architecture, toy_flag):
-        '''
-        Does the loop which is seen in Figure 1 in the AutoAugment paper.
-        In other words, repeat:
+        """
+        Runs the main loop (of finding a good policy for the given child network,
+        training dataset, and test(validation) dataset)
+
+        Does the loop which is seen in Figure 1 in the AutoAugment paper
+        which is:
            1. <generate a random policy>
            2. <see how good that policy is>
            3. <save how good the policy is in a list/dictionary>
-        until a certain condition (either specified by the user or pre-specified) is met
-        '''
+        
+        Args:
+            train_dataset (torchvision.dataset.vision.VisionDataset)
+            test_dataset (torchvision.dataset.vision.VisionDataset)
+            child_network_architecture (type): NOTE THAT THIS VARIABLE IS NOT
+                                    A nn.module object. Therefore, this needs
+                                    to be, say, `models.LeNet` instead of 
+                                    `models.LeNet()`.
+            toy_flag (boolean): whether we want to obtain a toy version of 
+                            train_dataset and test_dataset and use those.
+
+        Returns:
+            none
+        """

        # This is dummy code
        # test out 15 random policies
@@ -211,11 +247,24 @@ class aa_learner:

    def test_autoaugment_policy(self, policy, child_network, train_dataset, test_dataset, 
                                toy_flag, logging=False):
-        '''
+        """
        Given a policy (using AutoAugment paper terminology), we train a child network
        using the policy and return the accuracy (how good the policy is for the dataset and 
        child network).
-        '''
+
+        Args: 
+            policy (list[tuple]): A list of tuples representing a policy.
+            child_network (nn.module)
+            train_dataset (torchvision.dataset.vision.VisionDataset)
+            test_dataset (torchvision.dataset.vision.VisionDataset)
+            toy_flag (boolean): Whether we want to obtain a toy version of 
+                            train_dataset and test_dataset and use those.
+            logging (boolean): Whether we want to save logs
+        
+        Returns:
+            accuracy (float): best accuracy reached in any
+        """
+
        # We need to define an object aa_transform which takes in the image and 
        # transforms it with the policy (specified in its .policies attribute)
        # in its forward pass
@@ -235,7 +284,7 @@ class aa_learner:
                                                batch_size=32,
                                                n_samples=0.5,
                                                seed=100)
-
+        
        # train the child network with the dataloaders equipped with our specific policy
        accuracy = train_child_network(child_network, 
                                    train_loader, 
@@ -249,4 +298,39 @@ class aa_learner:
                                    print_every_epoch=True)
        
        # if logging is true, 'accuracy' is actually a tuple: (accuracy, accuracy_log)
-        return accuracy
\ No newline at end of file
+        return accuracy
+    
+
+    def demo_plot(self, train_dataset, test_dataset, child_network_architecture, toy_flag, n=5):
+        """
+        I made this to plot a couple of accuracy graphs to help manually tune my gradient 
+        optimizer hyperparameters.
+
+        Saves a plot of `n` training accuracy graphs overlapped.
+        """
+        
+        acc_lists = []
+
+        # This is dummy code
+        # test out `n` random policies
+        for _ in range(n):
+            policy = self.generate_new_policy()
+
+            pprint(policy)
+            child_network = child_network_architecture()
+            reward, acc_list = self.test_autoaugment_policy(policy, child_network, train_dataset,
+                                                test_dataset, toy_flag, logging=True)
+
+            self.history.append((policy, reward))
+            acc_lists.append(acc_list)
+
+        for acc_list in acc_lists:
+            plt.plot(acc_list)
+        plt.title('I ran 5 random policies to see if there is any sign of \
+                    catastrophic failure during training. If there are \
+                    any lines which reach significantly lower (>10%) \
+                    accuracies, you might want to tune the hyperparameters')
+        plt.xlabel('epoch')
+        plt.ylabel('accuracy')
+        plt.show()
+        plt.savefig('training_graphs_without_policies')
\ No newline at end of file
--- a/MetaAugment/autoaugment_learners/gru_learner.py
+++ b/MetaAugment/autoaugment_learners/gru_learner.py
@@ -31,22 +31,28 @@ augmentation_space = [


 class gru_learner(aa_learner):
-    # Uses a GRU controller which is updated via Proximal Polixy Optimization
-    # It is the same model use in
-    # http://arxiv.org/abs/1805.09501
-    # and
-    # http://arxiv.org/abs/1611.01578
+    """
+    An AutoAugment learner with a GRU controller 
+
+    The original AutoAugment paper(http://arxiv.org/abs/1805.09501) 
+    uses a LSTM controller updated via Proximal Policy Optimization.
+    (See Section 3 of AutoAugment paper)
+
+    The GRU has been shown to be as powerful of a sequential neural
+    network as the LSTM whilst training and testing much faster
+    (https://arxiv.org/abs/1412.3555), which is why we substituted
+    the LSTM for the GRU.
+    """

    def __init__(self, sp_num=5, fun_num=14, p_bins=11, m_bins=10, discrete_p_m=True, alpha=0.2):
-        '''
+        """
        Args:
-            spdim: number of subpolicies per policy
-            fun_num: number of image functions in our search space
-            p_bins: number of bins we divide the interval [0,1] for probabilities
-            m_bins: number of bins we divide the magnitude space
-
-            alpha: Exploration parameter. The lower this value, the more exploration.
-        '''
+            alpha (float): Exploration parameter. It is multiplied to 
+                    operation tensors before they're softmaxed. 
+                    The lower this value, the more smoothed the output
+                    of the softmaxed will be, hence more exploration.
+        """
+        
        super().__init__(sp_num, fun_num, p_bins, m_bins, discrete_p_m=True)
        self.alpha = alpha

@@ -57,19 +63,41 @@ class gru_learner(aa_learner):


    def generate_new_policy(self):
-        '''
-        We run the GRU for 10 timesteps to obtain 10 operations.
-        At each time step, it outputs a (fun_num + p_bins + m_bins) dimensional vector
+        """
+        The GRU controller pops out a new policy.
+
+        At each time step, the GRU outputs a 
+        (fun_num + p_bins + m_bins, ) dimensional tensor which 
+        contains information regarding which 'image function' to use,
+        which value of 'probability(prob)' and 'magnitude(mag)' to use.
+
+        We run the GRU for 10 timesteps to obtain 10 of such tensors.

-        And then for each operation, we put it through self. 
-        Generate a new policy in the form of
+        We then softmax the parts of the tensor which represents the
+        choice of function, prob, and mag seperately, so that the
+        resulting tensor's values sums up to 3.
+
+        Then we input each tensor into self.translate_operation_tensor
+        with parameter (return_log_prob=True), which outputs a tuple
+        in the form of ('img_function_name', prob, mag) and a float
+        representing the log probability that we chose the chosen 
+        func, prob and mag. 
+
+        We add up the log probabilities of each operation.
+
+        We turn the operations into a list of 5 tuples such as:
            [
            (("Invert", 0.8, None), ("Contrast", 0.2, 6)),
            (("Rotate", 0.7, 2), ("Invert", 0.8, None)),
            (("Sharpness", 0.8, 1), ("Sharpness", 0.9, 3)),
            (("ShearY", 0.5, 8), ("Invert", 0.7, None)),
            ]
-        '''
+        This list can then be input into an AutoAugment object
+        as is done in self.learn()
+        
+        We return the list and the sum of the log probs
+        """
+
        log_prob = 0

        # we need a random input to put in
@@ -108,13 +136,6 @@ class gru_learner(aa_learner):


    def learn(self, train_dataset, test_dataset, child_network_architecture, toy_flag, m=8):
-        '''
-        Does the loop which is seen in Figure 1 in the AutoAugment paper.
-        In other words, repeat:
-            1. <generate a random policy>
-            2. <see how good that policy is>
-            3. <save how good the policy is in a list/dictionary>
-        '''
        # optimizer for training the GRU controller
        cont_optim = torch.optim.SGD(self.controller.parameters(), lr=1e-2)

@@ -179,8 +200,7 @@ if __name__=='__main__':
                                transform=torchvision.transforms.ToTensor())
    child_network = cn.lenet

-    
+
    learner = gru_learner(discrete_p_m=False)
-    newpol = learner.generate_new_policy()
    learner.learn(train_dataset, test_dataset, child_network, toy_flag=True)
    pprint(learner.history)
--- a/MetaAugment/autoaugment_learners/randomsearch_learner.py
+++ b/MetaAugment/autoaugment_learners/randomsearch_learner.py
@@ -32,27 +32,23 @@ augmentation_space = [

 class randomsearch_learner(aa_learner):
    def __init__(self, sp_num=5, fun_num=14, p_bins=11, m_bins=10, discrete_p_m=False):
-        '''
-        Args:
-            spdim: number of subpolicies per policy
-            fun_num: number of image functions in our search space
-            p_bins: number of bins we divide the interval [0,1] for probabilities
-            m_bins: number of bins we divide the magnitude space
-        '''
        super().__init__(sp_num, fun_num, p_bins, m_bins, discrete_p_m)
        

    def generate_new_discrete_operation(self):
-        '''
+        """
        generate a new random operation in the form of a tensor of dimension:
            (fun_num + 11 + 10)

+        Used only when self.discrete_p_m=True
+
        The first fun_num dimensions is a 1-hot encoding to specify which function to use.
        The next 11 dimensions specify which 'probability' to choose.
            (0.0, 0.1, ..., 1.0)
        The next 10 dimensions specify which 'magnitude' to choose.
            (0, 1, ..., 9)
-        '''
+        """
+
        random_fun = np.random.randint(0, self.fun_num)
        random_prob = np.random.randint(0, self.p_bins)
        random_mag = np.random.randint(0, self.m_bins)
@@ -68,17 +64,20 @@ class randomsearch_learner(aa_learner):


    def generate_new_continuous_operation(self):
-        '''
+        """
        Returns operation_tensor, which is a tensor representation of a random operation with
        dimension:
            (fun_num + 1 + 1)

+        Used only when self.discrete_p_m=False.
+
        The first fun_num dimensions is a 1-hot encoding to specify which function to use.
        The next 1 dimensions specify which 'probability' to choose.
            0 < x < 1
        The next 1 dimensions specify which 'magnitude' to choose.
            0 < x < 9
-        '''
+        """
+
        fun_p_m = torch.zeros(self.fun_num + 2)
        
        # pick a random image function
@@ -92,15 +91,11 @@ class randomsearch_learner(aa_learner):


    def generate_new_policy(self):
-        '''
-        Generate a new random policy in the form of
-            [
-            (("Invert", 0.8, None), ("Contrast", 0.2, 6)),
-            (("Rotate", 0.7, 2), ("Invert", 0.8, None)),
-            (("Sharpness", 0.8, 1), ("Sharpness", 0.9, 3)),
-            (("ShearY", 0.5, 8), ("Invert", 0.7, None)),
-            ]
-        '''
+        """
+        Generates a new policy, with the elements chosen at random
+        (unifom random distribution).
+        """
+
        new_policy = []
        
        for _ in range(self.sp_num): # generate sp_num subpolicies for each policy
@@ -123,13 +118,6 @@ class randomsearch_learner(aa_learner):


    def learn(self, train_dataset, test_dataset, child_network_architecture, toy_flag):
-        '''
-        Does the loop which is seen in Figure 1 in the AutoAugment paper.
-        In other words, repeat:
-            1. <generate a random policy>
-            2. <see how good that policy is>
-            3. <save how good the policy is in a list/dictionary>
-        '''
        # test out 15 random policies
        for _ in range(1500):
            policy = self.generate_new_policy()
@@ -147,32 +135,6 @@ class randomsearch_learner(aa_learner):
                    pickle.dump(self.history, file)
    

-    def demo_plot(self, train_dataset, test_dataset, child_network_architecture, toy_flag, n=50):
-        '''
-        I made this to plot a couple of accuracy graphs to help manually tune my gradient 
-        optimizer hyperparameters.
-        '''
-        acc_lists = []
-
-        # This is dummy code
-        # test out 15 random policies
-        for _ in range(n):
-            policy = self.generate_new_policy()
-
-            pprint(policy)
-            child_network = child_network_architecture()
-            reward, acc_list = self.test_autoaugment_policy(policy, child_network, train_dataset,
-                                                test_dataset, toy_flag, logging=True)
-
-            self.history.append((policy, reward))
-            acc_lists.append(acc_list)
-
-        for acc_list in acc_lists:
-            plt.plot(acc_list)
-        plt.title('I ran 50 random policies to see if there is any sign of \
-                    catastrophic failure during training')
-        plt.show()
-        plt.savefig('random_policies')


 if __name__=='__main__':
@@ -190,5 +152,4 @@ if __name__=='__main__':

    rs_learner = randomsearch_learner(discrete_p_m=True)
    rs_learner.learn(train_dataset, test_dataset, child_network, toy_flag=True)
-    # rs_learner.demo_plot(train_dataset, test_dataset, child_network, toy_flag=True)
    pprint(rs_learner.history)
\ No newline at end of file