diff --git a/.gitignore b/.gitignore
index e792a3f6e509811af884773c49c715bf525e7580..e541790a38735883601fc9b3b302cf2e9d940bae 100644
--- a/.gitignore
+++ b/.gitignore
@@ -200,8 +200,11 @@ celerybeat.pid
 # SageMath parsed files
 *.sage.py
 
+# we don't want dataset directories
 **/test
 **/train
+# but there is a unit test folder in main that we DO want to track
+!test
 
 # user uplaod
 /react_backend/child_networks
diff --git a/MetaAugment/autoaugment_learners/aa_learner.py b/MetaAugment/autoaugment_learners/aa_learner.py
index 561222a5fac35d5348f99da6a9fba31657afe133..48c05b95d5bef8de9c0405e949f1e6663e66e8ae 100644
--- a/MetaAugment/autoaugment_learners/aa_learner.py
+++ b/MetaAugment/autoaugment_learners/aa_learner.py
@@ -101,7 +101,7 @@ class aa_learner:
         self.op_tensor_length = self.fun_num + p_bins + m_bins if discrete_p_m else self.fun_num +2
 
 
-    def translate_operation_tensor(self, operation_tensor, return_log_prob=False, argmax=False):
+    def _translate_operation_tensor(self, operation_tensor, return_log_prob=False, argmax=False):
         """
         takes in a tensor representing an operation and returns an actual operation which
         is in the form of:
@@ -220,7 +220,7 @@ class aa_learner:
             return operation
         
 
-    def generate_new_policy(self):
+    def _generate_new_policy(self):
         """
         Generate a new policy which can be fed into an AutoAugment object 
         by calling:
@@ -243,7 +243,7 @@ class aa_learner:
                         by calling: AutoAugment.subpolicies = policy
         """
 
-        raise NotImplementedError('generate_new_policy not implemented in aa_learner')
+        raise NotImplementedError('_generate_new_policy not implemented in aa_learner')
 
 
     def learn(self, train_dataset, test_dataset, child_network_architecture, iterations=15):
@@ -290,10 +290,10 @@ class aa_learner:
                       different policies
             
             for _ in range(15):
-                policy = self.generate_new_policy()
+                policy = self._generate_new_policy()
 
                 pprint(policy)
-                reward = self.test_autoaugment_policy(policy,
+                reward = self._test_autoaugment_policy(policy,
                                         child_network_architecture,
                                         train_dataset,
                                         test_dataset)
@@ -302,7 +302,7 @@ class aa_learner:
         """
     
 
-    def test_autoaugment_policy(self,
+    def _test_autoaugment_policy(self,
                                 policy,
                                 child_network_architecture,
                                 train_dataset,
@@ -329,7 +329,9 @@ class aa_learner:
             accuracy (float): best accuracy reached in any
         """
 
-        
+        # we create an instance of the child network that we're going
+        # to train. The method of creation depends on the type of 
+        # input we got for child_network_architecture
         if isinstance(child_network_architecture, types.FunctionType):
             child_network = child_network_architecture()
         elif isinstance(child_network_architecture, type):
@@ -394,10 +396,10 @@ class aa_learner:
     #     # This is dummy code
     #     # test out `n` random policies
     #     for _ in range(n):
-    #         policy = self.generate_new_policy()
+    #         policy = self._generate_new_policy()
 
     #         pprint(policy)
-    #         reward, acc_list = self.test_autoaugment_policy(policy,
+    #         reward, acc_list = self._test_autoaugment_policy(policy,
     #                                             child_network_architecture,
     #                                             train_dataset,
     #                                             test_dataset,
diff --git a/MetaAugment/autoaugment_learners/autoaugment.py b/MetaAugment/autoaugment_learners/autoaugment.py
index 5a8ecbcf6f0b8c6212a8c034a70d61476f4870f6..6baa28cd51c6a9c2a584aea0f3c772a42e55f92c 100644
--- a/MetaAugment/autoaugment_learners/autoaugment.py
+++ b/MetaAugment/autoaugment_learners/autoaugment.py
@@ -446,7 +446,7 @@ if __name__=='__main__':
 
 
 
-    def test_autoaugment_policy(subpolicies, train_dataset, test_dataset):
+    def _test_autoaugment_policy(subpolicies, train_dataset, test_dataset):
 
         aa_transform = AutoAugment()
         aa_transform.subpolicies = subpolicies
@@ -470,8 +470,8 @@ if __name__=='__main__':
         return best_acc, acc_log
 
 
-    _, acc_log1 = test_autoaugment_policy(subpolicies1, train_dataset, test_dataset)
-    _, acc_log2 = test_autoaugment_policy(subpolicies2, train_dataset, test_dataset)
+    _, acc_log1 = _test_autoaugment_policy(subpolicies1, train_dataset, test_dataset)
+    _, acc_log2 = _test_autoaugment_policy(subpolicies2, train_dataset, test_dataset)
 
     plt.plot(acc_log1, label='subpolicies1')
     plt.plot(acc_log2, label='subpolicies2')
diff --git a/MetaAugment/autoaugment_learners/evo_learner.py b/MetaAugment/autoaugment_learners/evo_learner.py
index 34cc2d44555423475914a1ba2528cfddb71aad57..6bf682c1595b0b731c4a68bfc8f619f953b31d29 100644
--- a/MetaAugment/autoaugment_learners/evo_learner.py
+++ b/MetaAugment/autoaugment_learners/evo_learner.py
@@ -1,12 +1,12 @@
 import torch
-torch.manual_seed(0)
 import torch.nn as nn
 import pygad
 import pygad.torchga as torchga
-import copy
+import torchvision
 import torch
 
 from MetaAugment.autoaugment_learners.aa_learner import aa_learner
+import MetaAugment.controller_networks as cont_n
 
 
 class evo_learner(aa_learner):
@@ -14,7 +14,7 @@ class evo_learner(aa_learner):
     def __init__(self, 
                 # search space settings
                 sp_num=5,
-                p_bins=10, 
+                p_bins=11, 
                 m_bins=10, 
                 discrete_p_m=False,
                 exclude_method=[],
@@ -27,7 +27,7 @@ class evo_learner(aa_learner):
                 # evolutionary learner specific settings
                 num_solutions=5,
                 num_parents_mating=3,
-                controller=None
+                controller=cont_n.evo_controller
                 ):
 
         super().__init__(
@@ -43,14 +43,19 @@ class evo_learner(aa_learner):
                     exclude_method=exclude_method
                     )
 
+        # evolutionary algorithm settings
+        self.controller = controller(
+                        fun_num=self.fun_num, 
+                        p_bins=self.p_bins, 
+                        m_bins=self.m_bins, 
+                        sub_num_pol=self.sp_num
+                        )
         self.num_solutions = num_solutions
-        self.controller = controller
         self.torch_ga = torchga.TorchGA(model=self.controller, num_solutions=num_solutions)
         self.num_parents_mating = num_parents_mating
         self.initial_population = self.torch_ga.population_weights
-        self.p_bins = p_bins 
-        self.sub_num_pol = sp_num
-        self.m_bins = m_bins
+
+        # store our logs
         self.policy_dict = {}
         self.policy_result = []
 
@@ -58,6 +63,7 @@ class evo_learner(aa_learner):
         assert num_solutions > num_parents_mating, 'Number of solutions must be larger than the number of parents mating!'
 
 
+
     def get_full_policy(self, x):
         """
         Generates the full policy (self.num_sub_pol subpolicies). Network architecture requires
@@ -77,7 +83,7 @@ class evo_learner(aa_learner):
         section = self.fun_num + self.p_bins + self.m_bins
         y = self.controller.forward(x)
         full_policy = []
-        for pol in range(self.sub_num_pol):
+        for pol in range(self.sp_num):
             int_pol = []
             for _ in range(2):
                 idx_ret = torch.argmax(y[:, (pol * section):(pol*section) + self.fun_num].mean(dim = 0))
@@ -167,10 +173,10 @@ class evo_learner(aa_learner):
                 prob2 += torch.sigmoid(y[idx, section+self.fun_num]).item()
                 if mag1 is not None:
                     # mag1 += min(max(0, (y[idx, self.auto_aug_agent.fun_num+1]).item()), 8)
-                    mag1 += 10 * torch.sigmoid(y[idx, self.fun_num+1]).item()
+                    mag1 += min(9, 10 * torch.sigmoid(y[idx, self.fun_num+1]).item())
                 if mag2 is not None:
                     # mag2 += min(max(0, y[idx, section+self.auto_aug_agent.fun_num+1].item()), 8)
-                    mag2 += 10 * torch.sigmoid(y[idx, self.fun_num+1]).item()
+                    mag2 += min(9, 10 * torch.sigmoid(y[idx, self.fun_num+1]).item())
 
                 counter += 1
 
@@ -240,7 +246,7 @@ class evo_learner(aa_learner):
                 self.policy_dict[trans1][trans2].append(new_set)
                 return False 
             else:
-                self.policy_dict[trans1][trans2] = [new_set]
+                self.policy_dict[trans1] = {trans2: [new_set]}
         if trans2 in self.policy_dict:
             if trans1 in self.policy_dict[trans2]:
                 for test_pol in self.policy_dict[trans2][trans1]:
@@ -249,7 +255,7 @@ class evo_learner(aa_learner):
                 self.policy_dict[trans2][trans1].append(new_set)
                 return False 
             else:
-                self.policy_dict[trans2][trans1] = [new_set]
+                self.policy_dict[trans2] = {trans1: [new_set]}
 
 
     def set_up_instance(self, train_dataset, test_dataset, child_network_architecture):
@@ -277,6 +283,7 @@ class evo_learner(aa_learner):
                                                             weights_vector=solution)
 
             self.controller.load_state_dict(model_weights_dict)
+            train_dataset.transform = torchvision.transforms.ToTensor()
             self.train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=self.batch_size)
 
             for idx, (test_x, label_x) in enumerate(self.train_loader):
@@ -298,11 +305,11 @@ class evo_learner(aa_learner):
             if len(self.policy_result) > self.sp_num:
                 self.policy_result = sorted(self.policy_result, key=lambda x: x[1], reverse=True)
                 self.policy_result = self.policy_result[:self.sp_num]
-                print("Appended policy: ", self.policy_result)
+                print("appended policy: ", self.policy_result)
 
 
             if fit_val > self.history_best[self.gen_count]:
-                print("Best policy: ", full_policy)
+                print("best policy: ", full_policy)
                 self.history_best[self.gen_count] = fit_val 
                 self.best_model = model_weights_dict
             
@@ -335,4 +342,3 @@ class evo_learner(aa_learner):
             mutation_percent_genes = 0.1,
             fitness_func=fitness_func,
             on_generation = on_generation)
-
diff --git a/MetaAugment/autoaugment_learners/gru_learner.py b/MetaAugment/autoaugment_learners/gru_learner.py
index 5c15a4a41b086982aa543bda451c89bfa7eecba9..7ca8088cfd6291f068d1b0eb16c1effa89cfa427 100644
--- a/MetaAugment/autoaugment_learners/gru_learner.py
+++ b/MetaAugment/autoaugment_learners/gru_learner.py
@@ -85,7 +85,7 @@ class gru_learner(aa_learner):
         self.softmax = torch.nn.Softmax(dim=0)
 
 
-    def generate_new_policy(self):
+    def _generate_new_policy(self):
         """
         The GRU controller pops out a new policy.
 
@@ -101,7 +101,7 @@ class gru_learner(aa_learner):
         choice of function, prob, and mag seperately, so that the
         resulting tensor's values sums up to 3.
 
-        Then we input each tensor into self.translate_operation_tensor
+        Then we input each tensor into self._translate_operation_tensor
         with parameter (return_log_prob=True), which outputs a tuple
         in the form of ('img_function_name', prob, mag) and a float
         representing the log probability that we chose the chosen 
@@ -150,8 +150,8 @@ class gru_learner(aa_learner):
             op2 = softmaxed_vectors[2*subpolicy_idx+1]
 
             # translate both vectors
-            op1, log_prob1 = self.translate_operation_tensor(op1, return_log_prob=True)
-            op2, log_prob2 = self.translate_operation_tensor(op2, return_log_prob=True)
+            op1, log_prob1 = self._translate_operation_tensor(op1, return_log_prob=True)
+            op2, log_prob2 = self._translate_operation_tensor(op2, return_log_prob=True)
             
             new_policy.append((op1,op2))
             log_prob += (log_prob1+log_prob2)
@@ -177,10 +177,10 @@ class gru_learner(aa_learner):
 
             for k in range(self.cont_mb_size):
                 # log_prob is $\sum_{t=1}^T log(P(a_t|a_{(t-1):1};\theta_c))$, used in PPO
-                policy, log_prob = self.generate_new_policy()
+                policy, log_prob = self._generate_new_policy()
 
                 pprint(policy)
-                reward = self.test_autoaugment_policy(policy,
+                reward = self._test_autoaugment_policy(policy,
                                                     child_network_architecture, 
                                                     train_dataset,
                                                     test_dataset)
diff --git a/MetaAugment/autoaugment_learners/randomsearch_learner.py b/MetaAugment/autoaugment_learners/randomsearch_learner.py
index 2c35fb80ab15f7b2c51dfdcfbfcff942a6a70032..25fbd1125e5aaf5c88cb3fc2931f7612dc4d831b 100644
--- a/MetaAugment/autoaugment_learners/randomsearch_learner.py
+++ b/MetaAugment/autoaugment_learners/randomsearch_learner.py
@@ -46,7 +46,7 @@ class randomsearch_learner(aa_learner):
                     )
         
 
-    def generate_new_discrete_operation(self):
+    def _generate_new_discrete_operation(self):
         """
         generate a new random operation in the form of a tensor of dimension:
             (fun_num + 11 + 10)
@@ -74,7 +74,7 @@ class randomsearch_learner(aa_learner):
         return torch.cat([fun_t, prob_t, mag_t])
 
 
-    def generate_new_continuous_operation(self):
+    def _generate_new_continuous_operation(self):
         """
         Returns operation_tensor, which is a tensor representation of a random operation with
         dimension:
@@ -101,7 +101,7 @@ class randomsearch_learner(aa_learner):
         return fun_p_m
 
 
-    def generate_new_policy(self):
+    def _generate_new_policy(self):
         """
         Generates a new policy, with the elements chosen at random
         (unifom random distribution).
@@ -115,10 +115,10 @@ class randomsearch_learner(aa_learner):
             for i in range(2):
                 # if our agent uses discrete representations of probability and magnitude
                 if self.discrete_p_m:
-                    new_op = self.generate_new_discrete_operation()
+                    new_op = self._generate_new_discrete_operation()
                 else:
-                    new_op = self.generate_new_continuous_operation()
-                new_op = self.translate_operation_tensor(new_op)
+                    new_op = self._generate_new_continuous_operation()
+                new_op = self._translate_operation_tensor(new_op)
                 ops.append(new_op)
 
             new_subpolicy = tuple(ops)
@@ -135,10 +135,10 @@ class randomsearch_learner(aa_learner):
             iterations=15):
         # test out `iterations` number of  random policies
         for _ in range(iterations):
-            policy = self.generate_new_policy()
+            policy = self._generate_new_policy()
 
             pprint(policy)
-            reward = self.test_autoaugment_policy(policy,
+            reward = self._test_autoaugment_policy(policy,
                                                 child_network_architecture,
                                                 train_dataset,
                                                 test_dataset)
diff --git a/MetaAugment/autoaugment_learners/ucb_learner.py b/MetaAugment/autoaugment_learners/ucb_learner.py
index fdf735bea1916897ee5e28a27bd67c10b5751581..6ed010fc6668ba2577815b6e65382ceef7241a74 100644
--- a/MetaAugment/autoaugment_learners/ucb_learner.py
+++ b/MetaAugment/autoaugment_learners/ucb_learner.py
@@ -47,7 +47,7 @@ class ucb_learner(randomsearch_learner):
         # attributes used in the UCB1 algorithm
         self.num_policies = num_policies
 
-        self.policies = [self.generate_new_policy() for _ in range(num_policies)]
+        self.policies = [self._generate_new_policy() for _ in range(num_policies)]
 
         self.avg_accs = [None]*self.num_policies
         self.best_avg_accs = []
@@ -67,7 +67,7 @@ class ucb_learner(randomsearch_learner):
                     and add to our list of policies
         """
 
-        self.policies += [self.generate_new_policy() for _ in range(n)]
+        self.policies += [self._generate_new_policy() for _ in range(n)]
 
         # all the below need to be lengthened to store information for the 
         # new policies
@@ -96,7 +96,7 @@ class ucb_learner(randomsearch_learner):
                 # test that one
                 this_policy_idx = self.avg_accs.index(None)
                 this_policy = self.policies[this_policy_idx]
-                acc = self.test_autoaugment_policy(
+                acc = self._test_autoaugment_policy(
                                 this_policy,
                                 child_network_architecture,
                                 train_dataset,
@@ -111,7 +111,7 @@ class ucb_learner(randomsearch_learner):
                 # one with the best q_plus_cnt value
                 this_policy_idx = np.argmax(self.q_plus_cnt)
                 this_policy = self.policies[this_policy_idx]
-                acc = self.test_autoaugment_policy(
+                acc = self._test_autoaugment_policy(
                                 this_policy,
                                 child_network_architecture,
                                 train_dataset,
diff --git a/benchmark/scripts/util_04_22.py b/benchmark/scripts/util_04_22.py
index 62c0456af78549bfaa0599a1a61c40a7eb78e806..8d7aa6b18f30b4a07e2531b04ba13c6ace9e0dbf 100644
--- a/benchmark/scripts/util_04_22.py
+++ b/benchmark/scripts/util_04_22.py
@@ -111,7 +111,7 @@ def rerun_best_policy(
         print(f'{_}/{repeat_num}')
         temp_agent = aal.aa_learner(**config)
         accs.append(
-                temp_agent.test_autoaugment_policy(megapol,
+                temp_agent._test_autoaugment_policy(megapol,
                                     child_network_architecture,
                                     train_dataset,
                                     test_dataset,
diff --git a/docs/source/MetaAugment_library/autoaugment_learners/aa_learners.rst b/docs/source/MetaAugment_library/autoaugment_learners/aa_learners.rst
index 4fc99146806c3b71f88e9173a9751d7edf354c27..db9d7b9fb0a1cade9b28ac93e8e8b6c6dab8493b 100644
--- a/docs/source/MetaAugment_library/autoaugment_learners/aa_learners.rst
+++ b/docs/source/MetaAugment_library/autoaugment_learners/aa_learners.rst
@@ -6,5 +6,7 @@ AutoAugment learners
    :toctree: generated
 
    MetaAugment.autoaugment_learners.aa_learner
+   MetaAugment.autoaugment_learners.evo_learner
    MetaAugment.autoaugment_learners.gru_learner
-   MetaAugment.autoaugment_learners.randomsearch_learner
\ No newline at end of file
+   MetaAugment.autoaugment_learners.randomsearch_learner
+   MetaAugment.autoaugment_learners.ucb_learner
\ No newline at end of file
diff --git a/docs/source/MetaAugment_library/autoaugment_learners/generated/MetaAugment.autoaugment_learners.aa_learner.rst b/docs/source/MetaAugment_library/autoaugment_learners/generated/MetaAugment.autoaugment_learners.aa_learner.rst
index 010241cbfcfc58968726e3f4c177ff7e4f70ad25..85be0d0150aa95092c10990d6d17c147c799b3af 100644
--- a/docs/source/MetaAugment_library/autoaugment_learners/generated/MetaAugment.autoaugment_learners.aa_learner.rst
+++ b/docs/source/MetaAugment_library/autoaugment_learners/generated/MetaAugment.autoaugment_learners.aa_learner.rst
@@ -14,11 +14,7 @@
    .. autosummary::
    
       ~aa_learner.__init__
-      ~aa_learner.demo_plot
-      ~aa_learner.generate_new_policy
       ~aa_learner.learn
-      ~aa_learner.test_autoaugment_policy
-      ~aa_learner.translate_operation_tensor
    
    
 
diff --git a/docs/source/MetaAugment_library/autoaugment_learners/generated/MetaAugment.autoaugment_learners.evo_learner.rst b/docs/source/MetaAugment_library/autoaugment_learners/generated/MetaAugment.autoaugment_learners.evo_learner.rst
new file mode 100644
index 0000000000000000000000000000000000000000..37f06b00f70a6e120fdd1f2dd30612da5bf7b7e6
--- /dev/null
+++ b/docs/source/MetaAugment_library/autoaugment_learners/generated/MetaAugment.autoaugment_learners.evo_learner.rst
@@ -0,0 +1,27 @@
+ï»¿MetaAugment.autoaugment\_learners.evo\_learner
+==============================================
+
+.. currentmodule:: MetaAugment.autoaugment_learners
+
+.. autoclass:: evo_learner
+
+   
+   .. automethod:: __init__
+
+   
+   .. rubric:: Methods
+
+   .. autosummary::
+   
+      ~evo_learner.__init__
+      ~evo_learner.get_full_policy
+      ~evo_learner.get_single_policy_cov
+      ~evo_learner.in_pol_dict
+      ~evo_learner.learn
+      ~evo_learner.set_up_instance
+   
+   
+
+   
+   
+   
\ No newline at end of file
diff --git a/docs/source/MetaAugment_library/autoaugment_learners/generated/MetaAugment.autoaugment_learners.gru_learner.rst b/docs/source/MetaAugment_library/autoaugment_learners/generated/MetaAugment.autoaugment_learners.gru_learner.rst
index f5cbe4429c79106ddd6e14c8f54a37226f196a3a..23eb306c38dbc495a4a6c775cf850fe913172876 100644
--- a/docs/source/MetaAugment_library/autoaugment_learners/generated/MetaAugment.autoaugment_learners.gru_learner.rst
+++ b/docs/source/MetaAugment_library/autoaugment_learners/generated/MetaAugment.autoaugment_learners.gru_learner.rst
@@ -14,11 +14,7 @@
    .. autosummary::
    
       ~gru_learner.__init__
-      ~gru_learner.demo_plot
-      ~gru_learner.generate_new_policy
       ~gru_learner.learn
-      ~gru_learner.test_autoaugment_policy
-      ~gru_learner.translate_operation_tensor
    
    
 
diff --git a/docs/source/MetaAugment_library/autoaugment_learners/generated/MetaAugment.autoaugment_learners.randomsearch_learner.rst b/docs/source/MetaAugment_library/autoaugment_learners/generated/MetaAugment.autoaugment_learners.randomsearch_learner.rst
index 5bfb303d8fc1fcb8f8638c186b2e4ecc69be8861..72903e47cdaf1a51c7922127343848ada0a12675 100644
--- a/docs/source/MetaAugment_library/autoaugment_learners/generated/MetaAugment.autoaugment_learners.randomsearch_learner.rst
+++ b/docs/source/MetaAugment_library/autoaugment_learners/generated/MetaAugment.autoaugment_learners.randomsearch_learner.rst
@@ -14,13 +14,7 @@
    .. autosummary::
    
       ~randomsearch_learner.__init__
-      ~randomsearch_learner.demo_plot
-      ~randomsearch_learner.generate_new_continuous_operation
-      ~randomsearch_learner.generate_new_discrete_operation
-      ~randomsearch_learner.generate_new_policy
       ~randomsearch_learner.learn
-      ~randomsearch_learner.test_autoaugment_policy
-      ~randomsearch_learner.translate_operation_tensor
    
    
 
diff --git a/docs/source/MetaAugment_library/autoaugment_learners/generated/MetaAugment.autoaugment_learners.ucb_learner.rst b/docs/source/MetaAugment_library/autoaugment_learners/generated/MetaAugment.autoaugment_learners.ucb_learner.rst
new file mode 100644
index 0000000000000000000000000000000000000000..83f80f4893a8376490d67aeeb1943866cef3ffa3
--- /dev/null
+++ b/docs/source/MetaAugment_library/autoaugment_learners/generated/MetaAugment.autoaugment_learners.ucb_learner.rst
@@ -0,0 +1,24 @@
+ï»¿MetaAugment.autoaugment\_learners.ucb\_learner
+==============================================
+
+.. currentmodule:: MetaAugment.autoaugment_learners
+
+.. autoclass:: ucb_learner
+
+   
+   .. automethod:: __init__
+
+   
+   .. rubric:: Methods
+
+   .. autosummary::
+   
+      ~ucb_learner.__init__
+      ~ucb_learner.learn
+      ~ucb_learner.make_more_policies
+   
+   
+
+   
+   
+   
\ No newline at end of file
diff --git a/docs/source/usage/autoaugment_helperclass.rst b/docs/source/usage/autoaugment_helperclass.rst
index 75f080fb94093d67b94c2fb27a19778453a6505f..cc361da4f66c41c937803225498bc20a7fdf5b0d 100644
--- a/docs/source/usage/autoaugment_helperclass.rst
+++ b/docs/source/usage/autoaugment_helperclass.rst
@@ -11,7 +11,7 @@ we use as a helper class to help us apply AutoAugment policies to datasets.
 This is a tutorial (in the sense describe in https://documentation.divio.com/structure/).
 
 For an example of how the material is used in our library, see the source code of
-:meth:`aa_learner.test_autoaugment_policy <MetaAugment.autoaugment_learners.aa_learner>`.
+:meth:`aa_learner._test_autoaugment_policy <MetaAugment.autoaugment_learners.aa_learner>`.
 
 Let's say we have a policy within the search space specified by the original 
 AutoAugment paper:
diff --git a/temp_util/wapp_util.py b/temp_util/wapp_util.py
index cde572f7f1e0ba1590fb5685b7212f4d0b3b173a..bb10113311f77d32f3f0b996e76d71f16fa0d184 100644
--- a/temp_util/wapp_util.py
+++ b/temp_util/wapp_util.py
@@ -54,6 +54,7 @@ def parse_users_learner_spec(
                         p_bins=11,
                         m_bins=10,
                         discrete_p_m=True,
+                        exclude_method=exclude_method,
                         # hyperparameters for when training the child_network
                         batch_size=batch_size,
                         toy_size=toy_size,
@@ -63,52 +64,55 @@ def parse_users_learner_spec(
                         # ucb_learner specific hyperparameter
                         num_policies=num_policies
                         )
-        pprint(learner.policies)
-        
-        learner.learn(
-            train_dataset=train_dataset,
-            test_dataset=test_dataset,
-            child_network_architecture=child_archi,
-            iterations=5
-            )
     elif auto_aug_learner == 'Evolutionary Learner':
-        network = cont_n.evo_controller(fun_num=num_funcs, p_bins=1, m_bins=1, sub_num_pol=1)
-        child_network = cn.LeNet()
         learner = aal.evo_learner(
-                                network=network, 
-                                fun_num=num_funcs, 
-                                p_bins=1, 
-                                mag_bins=1, 
-                                sub_num_pol=1, 
-                                ds = ds, 
-                                ds_name=ds_name, 
-                                exclude_method=exclude_method, 
-                                child_network=child_network
-                                )
+                        # parameters that define the search space
+                        sp_num=num_sub_policies,
+                        p_bins=11,
+                        m_bins=10,
+                        discrete_p_m=True,
+                        exclude_method=exclude_method,
+                        # hyperparameters for when training the child_network
+                        batch_size=batch_size,
+                        toy_size=toy_size,
+                        learning_rate=learning_rate,
+                        max_epochs=max_epochs,
+                        early_stop_num=early_stop_num,
+                        )
         learner.run_instance()
     elif auto_aug_learner == 'Random Searcher':
         agent = aal.randomsearch_learner(
-                                        sp_num=num_sub_policies,
-                                        batch_size=batch_size,
-                                        learning_rate=learning_rate,
-                                        toy_size=toy_size,
-                                        max_epochs=max_epochs,
-                                        early_stop_num=early_stop_num,
-                                        )
-        agent.learn(train_dataset,
-                    test_dataset,
-                    child_network_architecture=child_archi,
-                    iterations=iterations)
+                        # parameters that define the search space
+                        sp_num=num_sub_policies,
+                        p_bins=11,
+                        m_bins=10,
+                        discrete_p_m=True,
+                        exclude_method=exclude_method,
+                        # hyperparameters for when training the child_network
+                        batch_size=batch_size,
+                        toy_size=toy_size,
+                        learning_rate=learning_rate,
+                        max_epochs=max_epochs,
+                        early_stop_num=early_stop_num,
+                        )
     elif auto_aug_learner == 'GRU Learner':
         agent = aal.gru_learner(
-                                sp_num=num_sub_policies,
-                                batch_size=batch_size,
-                                learning_rate=learning_rate,
-                                toy_size=toy_size,
-                                max_epochs=max_epochs,
-                                early_stop_num=early_stop_num,
-                                )
-        agent.learn(train_dataset,
-                    test_dataset,
-                    child_network_architecture=child_archi,
-                    iterations=iterations)
\ No newline at end of file
+                        # parameters that define the search space
+                        sp_num=num_sub_policies,
+                        p_bins=11,
+                        m_bins=10,
+                        discrete_p_m=True,
+                        exclude_method=exclude_method,
+                        # hyperparameters for when training the child_network
+                        batch_size=batch_size,
+                        toy_size=toy_size,
+                        learning_rate=learning_rate,
+                        max_epochs=max_epochs,
+                        early_stop_num=early_stop_num,
+                        )
+
+
+    agent.learn(train_dataset,
+                test_dataset,
+                child_network_architecture=child_archi,
+                iterations=iterations)
\ No newline at end of file
diff --git a/test/MetaAugment/test_aa_learner.py b/test/MetaAugment/test_aa_learner.py
index b1524988939e9adac0b45285921b8d058f087887..64517cd090360a4f150d1d1e794df6eb08cd6c90 100644
--- a/test/MetaAugment/test_aa_learner.py
+++ b/test/MetaAugment/test_aa_learner.py
@@ -7,13 +7,13 @@ import torchvision.datasets as datasets
 import random
 
 
-def test_translate_operation_tensor():
+def test__translate_operation_tensor():
     """
-    See if aa_learner class's translate_operation_tensor works
+    See if aa_learner class's _translate_operation_tensor works
     by feeding many (valid) inputs in it.
 
     We make a lot of (fun_num+p_bins_m_bins,) size tensors, softmax 
-    them, and feed them through the translate_operation_tensor method
+    them, and feed them through the _translate_operation_tensor method
     to see if it doesn't break
     """
 
@@ -44,7 +44,7 @@ def test_translate_operation_tensor():
         mag_t = softmax(mag_t * alpha)
         softmaxed_vector = torch.cat((fun_t, prob_t, mag_t))
 
-        agent.translate_operation_tensor(softmaxed_vector)
+        agent._translate_operation_tensor(softmaxed_vector)
     
 
     # discrete_p_m=False
@@ -73,10 +73,10 @@ def test_translate_operation_tensor():
 
         softmaxed_vector = torch.cat((fun_t, prob_t, mag_t))
 
-        agent.translate_operation_tensor(softmaxed_vector)
+        agent._translate_operation_tensor(softmaxed_vector)
 
 
-def test_test_autoaugment_policy():
+def test__test_autoaugment_policy():
     agent = aal.aa_learner(
                 sp_num=5,
                 p_bins=11,
@@ -107,7 +107,7 @@ def test_test_autoaugment_policy():
                             train=False, download=True,
                             transform=torchvision.transforms.ToTensor())
 
-    acc = agent.test_autoaugment_policy(
+    acc = agent._test_autoaugment_policy(
                                         policy,
                                         child_network_architecture,
                                         train_dataset,
@@ -116,4 +116,41 @@ def test_test_autoaugment_policy():
                                         )
     
     assert isinstance(acc, float)
+
+
+def test_exclude_method():
+    """
+    we want to see if the exclude_methods
+    parameter is working properly in aa_learners 
+    """
+    
+    exclude_method = [
+                    'ShearX', 
+                    'Color', 
+                    'Brightness', 
+                    'Contrast'
+                    ]
+    agent = aal.gru_learner(
+        exclude_method=exclude_method
+    )
+    for _ in range(200):
+        new_pol, _ = agent._generate_new_policy()
+        print(new_pol)
+        for (op1, op2) in new_pol:
+            image_function_1 = op1[0]
+            image_function_2 = op2[0]
+            assert image_function_1 not in exclude_method
+            assert image_function_2 not in exclude_method
+    
+    agent = aal.randomsearch_learner(
+        exclude_method=exclude_method
+    )
+    for _ in range(200):
+        new_pol= agent._generate_new_policy()
+        print(new_pol)
+        for (op1, op2) in new_pol:
+            image_function_1 = op1[0]
+            image_function_2 = op2[0]
+            assert image_function_1 not in exclude_method
+            assert image_function_2 not in exclude_method
     
\ No newline at end of file
diff --git a/test/MetaAugment/test_evo_learner.py b/test/MetaAugment/test_evo_learner.py
new file mode 100644
index 0000000000000000000000000000000000000000..b917fb3934555585c2b95a412580ab27f517c081
--- /dev/null
+++ b/test/MetaAugment/test_evo_learner.py
@@ -0,0 +1,44 @@
+import MetaAugment.autoaugment_learners as aal
+import MetaAugment.child_networks as cn
+import torchvision
+import torchvision.datasets as datasets
+from pprint import pprint
+
+def test_evo_learner():
+    child_network_architecture = cn.SimpleNet
+    train_dataset = datasets.FashionMNIST(root='./datasets/fashionmnist/train',
+                            train=True, download=True, transform=None)
+    test_dataset = datasets.FashionMNIST(root='./datasets/fashionmnist/test', 
+                            train=False, download=True,
+                            transform=torchvision.transforms.ToTensor())
+
+
+    learner = aal.evo_learner(
+        # parameters that define the search space
+                sp_num=5,
+                p_bins=11,
+                m_bins=10,
+                discrete_p_m=True,
+                exclude_method=['ShearX'],
+                # hyperparameters for when training the child_network
+                batch_size=8,
+                toy_size=0.0001,
+                learning_rate=1e-1,
+                max_epochs=float('inf'),
+                early_stop_num=30,
+                # evolutionary learner specific settings
+                num_solutions=3,
+                num_parents_mating=2,
+    )
+
+    # learn on the 3 policies we generated
+    learner.learn(
+        train_dataset=train_dataset,
+        test_dataset=test_dataset,
+        child_network_architecture=child_network_architecture,
+        iterations=2
+        )
+
+
+if __name__=="__main__":
+    test_evo_learner()
diff --git a/test/MetaAugment/test_gru_learner.py b/test/MetaAugment/test_gru_learner.py
index cd52b0e95f710c8cddf8a9afdbe67a86acb8fb07..b2ea8930df73959c63933804e72d28ce9bdeca84 100644
--- a/test/MetaAugment/test_gru_learner.py
+++ b/test/MetaAugment/test_gru_learner.py
@@ -6,9 +6,9 @@ import torchvision.datasets as datasets
 
 import random
 
-def test_generate_new_policy():
+def test__generate_new_policy():
     """
-    make sure gru_learner.generate_new_policy() is robust
+    make sure gru_learner._generate_new_policy() is robust
     with respect to different values of sp_num, fun_num, 
     p_bins, and m_bins
     """
@@ -24,7 +24,7 @@ def test_generate_new_policy():
             cont_mb_size=2
             )
         for _ in range(4):
-            new_policy = agent.generate_new_policy()
+            new_policy = agent._generate_new_policy()
             assert isinstance(new_policy[0], list), new_policy
 
 
diff --git a/test/MetaAugment/test_randomsearch_learner.py b/test/MetaAugment/test_randomsearch_learner.py
index 61e9f9cd8c86be854dcd8d42cc9ceddde8ada3bc..6c5a935071bfacc8c2e1c8d276732aa7585afa60 100644
--- a/test/MetaAugment/test_randomsearch_learner.py
+++ b/test/MetaAugment/test_randomsearch_learner.py
@@ -6,9 +6,9 @@ import torchvision.datasets as datasets
 
 import random
 
-def test_generate_new_policy():
+def test__generate_new_policy():
     """
-    make sure randomsearch_learner.generate_new_policy() is robust
+    make sure randomsearch_learner._generate_new_policy() is robust
     with respect to different values of sp_num, fun_num, 
     p_bins, and m_bins
     """
@@ -27,7 +27,7 @@ def test_generate_new_policy():
                 discrete_p_m=discrete_p_m
                 )
             for _ in range(4):
-                new_policy = agent.generate_new_policy()
+                new_policy = agent._generate_new_policy()
                 assert isinstance(new_policy, list), new_policy
     
     discrete_p_m = True