diff --git a/autoaug/autoaugment_learners/AaLearner.py b/autoaug/autoaugment_learners/AaLearner.py
index f14a107ebc5ac73c6c6fe1e9202936c652ab50d8..c0566d4edba29079f70e1d3e0334393894796a03 100644
--- a/autoaug/autoaugment_learners/AaLearner.py
+++ b/autoaug/autoaugment_learners/AaLearner.py
@@ -13,32 +13,51 @@ import types
 
 
 class AaLearner:
-    """
-    The parent class for all AaLearner's
-    
-    Attributes:
-        op_tensor_length (int): what is the dimension of the tensor that represents
-                            each 'operation' (which is made up of fun_name, prob,
-                            and mag).
-    
-    See Also
-    --------
+    """The parent class for all AaLearner's
 
+    Contains utility methods that child AaLearner's use. 
 
-    Notes
-    -----
+    Args:
+        sp_num (int, optional): number of subpolicies per policy. Defaults to 5.
 
+        p_bins (int, optional): number of bins we divide the interval [0,1] for 
+                        probabilities. e.g. (0.0, 0.1, ... 1.0) Defaults to 11.
 
-    References
-    ----------
-    
+        m_bins (int, optional): number of bins we divide the magnitude space.
+                        Defaults to 10.
+
+        discrete_p_m (bool, optional):
+                        Whether or not the agent should represent probability and 
+                        magnitude as discrete variables as the out put of the 
+                        controller (A controller can be a neural network, genetic
+                        algorithm, etc.). Defaults to False
 
-    Examples
-    --------
+        batch_size (int, optional): child_network training parameter. Defaults to 32.
 
+        toy_size (int, optional): child_network training parameter. ratio of original
+                            dataset used in toy dataset. Defaults to 0.1.
 
+        learning_rate (float, optional): child_network training parameter. Defaults to 1e-2.
+
+        max_epochs (Union[int, float], optional): child_network training parameter. 
+                            Defaults to float('inf').
+
+        early_stop_num (int, optional): child_network training parameter. Defaults to 20.
+
+        exclude_method (list, optional): list of names(:type:str) of image operations
+                        the user wants to exclude from the search space. Defaults to [].
+
+    
+    Attributes:
+        history (list): list of policies that has been input into 
+                        self._test_autoaugment_policy as well as their respective obtained
+                        accuracies
+                        
+        augmentation_space (list): list of image functions that the user has chosen to 
+                        include in the search space.
 
     """
+
     def __init__(self, 
                 # parameters that define the search space
                 sp_num=5,
@@ -53,29 +72,7 @@ class AaLearner:
                 early_stop_num=20,
                 exclude_method = [],
                 ):
-        """
-        Args:
-            sp_num (int, optional): number of subpolicies per policy. Defaults to 5.
-            fun_num (int, optional): number of image functions in our search space.
-                            Defaults to 14.
-            p_bins (int, optional): number of bins we divide the interval [0,1] for 
-                            probabilities. Defaults to 11.
-            m_bins (int, optional): number of bins we divide the magnitude space.
-                            Defaults to 10.
-            discrete_p_m (bool, optional):
-                            Whether or not the agent should represent probability and 
-                            magnitude as discrete variables as the out put of the 
-                            controller (A controller can be a neural network, genetic
-                            algorithm, etc.). Defaults to False
-            
-            batch_size (int, optional): child_network training parameter. Defaults to 32.
-            toy_size (int, optional): child_network training parameter. ratio of original
-                                dataset used in toy dataset. Defaults to 0.1.
-            learning_rate (float, optional): child_network training parameter. Defaults to 1e-2.
-            max_epochs (Union[int, float], optional): child_network training parameter. 
-                                Defaults to float('inf').
-            early_stop_num (int, optional): child_network training parameter. Defaults to 20.
-        """
+        
         # related to defining the search space
         self.sp_num = sp_num
         self.p_bins = p_bins
@@ -298,31 +295,36 @@ class AaLearner:
             2. <see how good that policy is>
             3. <save how good the policy is in a list/dictionary and 
                 (if applicable,) update the controller (e.g. RL agent)>
-        
-        Args:
-            train_dataset (torchvision.dataset.vision.VisionDataset)
-            test_dataset (torchvision.dataset.vision.VisionDataset)
-            child_network_architecture (Union[function, nn.Module]):
-                                NOTE This can be both, for example,
-                                    MyNetworkArchitecture
-                                    and
-                                    MyNetworkArchitecture()
-            iterations (int): how many different policies do you want to test
-        Returns:
-            none
-        
-        
-        If child_network_architecture is a <function>, then we make an 
-        instance of it. If this is a <nn.Module>, we make a copy.deepcopy
+
+        If ``child_network_architecture`` is a ``<function>``, then we make an 
+        instance of it. If this is a ``<nn.Module>``, we make a ``copy.deepcopy``
         of it. We make a copy of it because we we want to keep an untrained 
         (initialized but not trained) version of the child network
         architecture, because we need to train it multiple times
-        for each policy. Keeping child_network_architecture as a `function` is
-        potentially better than keeping it as a nn.Module because every
+        for each policy. Keeping ``child_network_architecture`` as a ``<function>`` is
+        potentially better than keeping it as a ``<nn.Module>`` because every
         time we make a new instance, the weights are differently initialized
         which means that our results will be less biased
         (https://en.wikipedia.org/wiki/Bias_(statistics)).
-        
+
+
+        Args:
+            train_dataset (torchvision.dataset.vision.VisionDataset):
+            test_dataset (torchvision.dataset.vision.VisionDataset):
+            child_network_architecture (Union[function, nn.Module]):
+                                    This can be both, for example,
+
+                                    ``LeNet``
+
+                                    and
+
+                                    ``LeNet()``
+
+            iterations (int): how many different policies do you want to test
+
+        Returns:
+            none
+
 
         Example code:
 
@@ -332,7 +334,7 @@ class AaLearner:
             for _ in range(15):
                 policy = self._generate_new_policy()
 
-                pprint(policy)
+                print(policy)
                 reward = self._test_autoaugment_policy(policy,
                                         child_network_architecture,
                                         train_dataset,
@@ -449,11 +451,11 @@ class AaLearner:
 
         
         Args: 
-            number_policies -> int: Number of (sub)policies to be included in the mega
+            number_policies (int): Number of (sub)policies to be included in the mega
             policy
 
         Returns:
-            megapolicy -> [subpolicy, subpolicy, ...]
+            megapolicy ([subpolicy, subpolicy, ...])
         """
 
         number_policies = max(number_policies, len(self.history))
diff --git a/autoaug/autoaugment_learners/GruLearner.py b/autoaug/autoaugment_learners/GruLearner.py
index 6db3ba5cc5f75b81947ebe52b9df32bcd67194c2..c2ef521e58c4bb3f473fe4ca33a76830deaebeb5 100644
--- a/autoaug/autoaugment_learners/GruLearner.py
+++ b/autoaug/autoaugment_learners/GruLearner.py
@@ -24,21 +24,64 @@ class GruLearner(AaLearner):
     (https://arxiv.org/abs/1412.3555), which is why we substituted
     the LSTM for the GRU.
 
-        
-    See Also
-    --------
+    Args:
+        sp_num (int, optional): number of subpolicies per policy. Defaults to 5.
 
+        p_bins (int, optional): number of bins we divide the interval [0,1] for 
+                        probabilities. e.g. (0.0, 0.1, ... 1.0) Defaults to 11.
 
-    Notes
-    -----
+        m_bins (int, optional): number of bins we divide the magnitude space.
+                        Defaults to 10.
 
+        discrete_p_m (bool, optional):
+                        Whether or not the agent should represent probability and 
+                        magnitude as discrete variables as the out put of the 
+                        controller (A controller can be a neural network, genetic
+                        algorithm, etc.). Defaults to False
 
-    References
-    ----------
+        batch_size (int, optional): child_network training parameter. Defaults to 32.
+
+        toy_size (int, optional): child_network training parameter. ratio of original
+                            dataset used in toy dataset. Defaults to 0.1.
+
+        learning_rate (float, optional): child_network training parameter. Defaults to 1e-2.
+
+        max_epochs (Union[int, float], optional): child_network training parameter. 
+                            Defaults to float('inf').
+
+        early_stop_num (int, optional): child_network training parameter. Defaults to 20.
+
+        exclude_method (list, optional): list of names(:type:str) of image operations
+                        the user wants to exclude from the search space. Defaults to [].
+
+        alpha (float, optional): Exploration parameter. It is multiplied to 
+                                operation tensors before they're softmaxed. 
+                                The lower this value, the more smoothed the output
+                                of the softmaxed will be, hence more exploration.
+                                Defaults to 0.2.
+
+        cont_mb_size (int, optional): Controller Minibatch Size. How many
+                            policies do we test in order to calculate the 
+                            PPO(proximal policy update) gradient to update
+                            the controller. Defaults to 
     
+    Attributes:
+        history (list): list of policies that has been input into 
+                        self._test_autoaugment_policy as well as their respective obtained
+                        accuracies
+                        
+        augmentation_space (list): list of image functions that the user has chosen to 
+                        include in the search space.
 
-    Examples
-    --------
+    References
+    ----------
+    Ekin D. Cubuk, et al. 
+        "AutoAugment: Learning Augmentation Policies from Data"
+        arXiv:1805.09501
+    Junyoung Chung, et al.
+        "Empirical Evaluation of Gated Recurrent Neural 
+        Networks on Sequence Modeling"
+        https://arxiv.org/abs/1412.3555
 
 
 
@@ -61,18 +104,7 @@ class GruLearner(AaLearner):
                 alpha=0.2,
                 cont_mb_size=4,
                 cont_lr=0.03):
-        """
-        Args:
-            alpha (float, optional): Exploration parameter. It is multiplied to 
-                    operation tensors before they're softmaxed. 
-                    The lower this value, the more smoothed the output
-                    of the softmaxed will be, hence more exploration.
-                    Defaults to 0.2.
-            cont_mb_size (int, optional): Controller Minibatch Size. How many
-                    policies do we test in order to calculate the 
-                    PPO(proximal policy update) gradient to update
-                    the controller. Defaults to 
-        """
+
         if discrete_p_m==True:
             print('Warning: Incompatible discrete_p_m=True input into GruLearner. \
                 discrete_p_m=False will be used')
diff --git a/autoaug/autoaugment_learners/RsLearner.py b/autoaug/autoaugment_learners/RsLearner.py
index da0f7695ec94f7387c4d2dcdb42b7c5b0951b957..e1562a431099b6166fb9ef3b63416df6106530de 100644
--- a/autoaug/autoaugment_learners/RsLearner.py
+++ b/autoaug/autoaugment_learners/RsLearner.py
@@ -13,26 +13,60 @@ import pickle
 
 
 class RsLearner(AaLearner):
-    """
+    """This agent tests out randomly generated augmentation policies.
+
     Tests randomly sampled policies from the search space specified by the AutoAugment
-    paper. Acts as a baseline for other AaLearner's.
+    paper. Random search has been shown to be a hard baseline to beat for many other
+    hyper-parameter optimization tasks (see References below). 
+    Hence, this learner acts as a difficult baseline for other AaLearner's.
 
-        
-    See Also
-    --------
+    Args:
+        sp_num (int, optional): number of subpolicies per policy. Defaults to 5.
 
+        p_bins (int, optional): number of bins we divide the interval [0,1] for 
+                        probabilities. e.g. (0.0, 0.1, ... 1.0) Defaults to 11.
 
-    Notes
-    -----
+        m_bins (int, optional): number of bins we divide the magnitude space.
+                        Defaults to 10.
 
+        discrete_p_m (bool, optional):
+                        Whether or not the agent should represent probability and 
+                        magnitude as discrete variables as the out put of the 
+                        controller (A controller can be a neural network, genetic
+                        algorithm, etc.). Defaults to False
 
-    References
-    ----------
+        batch_size (int, optional): child_network training parameter. Defaults to 32.
+
+        toy_size (int, optional): child_network training parameter. ratio of original
+                            dataset used in toy dataset. Defaults to 0.1.
+
+        learning_rate (float, optional): child_network training parameter. Defaults to 1e-2.
+
+        max_epochs (Union[int, float], optional): child_network training parameter. 
+                            Defaults to float('inf').
+
+        early_stop_num (int, optional): child_network training parameter. Defaults to 20.
+
+        exclude_method (list, optional): list of names(:type:str) of image operations
+                        the user wants to exclude from the search space. Defaults to [].
     
+    Attributes:
+        history (list): list of policies that has been input into 
+                        self._test_autoaugment_policy as well as their respective obtained
+                        accuracies
 
-    Examples
-    --------
+        augmentation_space (list): list of image functions that the user has chosen to 
+                        include in the search space.
 
+    References
+    ----------
+    Ekin D. Cubuk, et al. 
+        "AutoAugment: Learning Augmentation Policies from Data"
+        arXiv:1805.09501
+    
+    Bergstra James, Yoshua Bengio
+        "Random Search for Hyper-Parameter Optimization"
+        https://www.jmlr.org/papers/volume13/bergstra12a/bergstra12a.pdf
 
     """
     def __init__(self,
diff --git a/autoaug/autoaugment_learners/UcbLearner.py b/autoaug/autoaugment_learners/UcbLearner.py
index ded7d67a87dbb9582e3753a520b9d15d6de39ff1..7e3e707780bec802c254fe08aab563faacfb612c 100644
--- a/autoaug/autoaugment_learners/UcbLearner.py
+++ b/autoaug/autoaugment_learners/UcbLearner.py
@@ -15,24 +15,71 @@ class UcbLearner(RsLearner):
 
     - Using a toy dataset or a toy CNN
 
+    Args:
+        sp_num (int, optional): number of subpolicies per policy. Defaults to 5.
+
+        p_bins (int, optional): number of bins we divide the interval [0,1] for 
+                        probabilities. e.g. (0.0, 0.1, ... 1.0) Defaults to 11.
+
+        m_bins (int, optional): number of bins we divide the magnitude space.
+                        Defaults to 10.
+
+        discrete_p_m (bool, optional):
+                        Whether or not the agent should represent probability and 
+                        magnitude as discrete variables as the out put of the 
+                        controller (A controller can be a neural network, genetic
+                        algorithm, etc.). Defaults to False
+
+        batch_size (int, optional): child_network training parameter. Defaults to 32.
+
+        toy_size (int, optional): child_network training parameter. ratio of original
+                            dataset used in toy dataset. Defaults to 0.1.
+
+        learning_rate (float, optional): child_network training parameter. Defaults to 1e-2.
+
+        max_epochs (Union[int, float], optional): child_network training parameter. 
+                            Defaults to float('inf').
+
+        early_stop_num (int, optional): child_network training parameter. Defaults to 20.
+
+        exclude_method (list, optional): list of names(:type:str) of image operations
+                        the user wants to exclude from the search space. Defaults to [].
     
+        num_policies (int, optional): Number of policies we want to serach over. 
+                            Defaults to 100.
         
-    See Also
-    --------
+    Attributes:
+        history (list): list of policies that has been input into 
+                        self._test_autoaugment_policy as well as their respective obtained
+                        accuracies
 
+        augmentation_space (list): list of image functions that the user has chosen to 
+                        include in the search space.
 
-    Notes
-    -----
+        policies (list): A list of policies which we are currently searching over.
 
+        avg_accs (list): A list where the nth element indicates the average accuracy 
+                        obtained by the nth policy.
 
-    References
-    ----------
-    
 
-    Examples
-    --------
 
+    Notes
+    -----
+    As opposed the the other learners, this searches over a subset of the entire
+    search space (specified in the AutoAugment paper). The size of the subset is 
+    initialized to be ``self.num_policies``. But we can increase it by running 
+    self.make_more_policies(). For example, we initialize the learner with 
+    ``self.num_policies=7``, run ``self.learn(iterations=20)`` to learn about the
+    seven policies we have in our ``self.policies``. Then run 
+    ``self.make_more_policies(n=5)`` to add 5 more policies to ``self.policies``.
+    Then we can run ``self.learn(iterations=20)`` to continue the UCB1 algorithm
+    with the extended search space.
 
+    References
+    ----------
+    Peter Auer, et al.
+        "Finite-time Analysis of the Multiarmed Bandit Problem"
+        https://homes.di.unimi.it/~cesabian/Pubblicazioni/ml-02.pdf
     
     """
     def __init__(self,