diff --git a/04_22_evo.py b/04_22_evo.py
index 53f765d61eb4a46a9c364f63fbd42dd23ec32a7e..cc935e8c68a1eae5ac37922d1a30966aeffc0b1d 100644
--- a/04_22_evo.py
+++ b/04_22_evo.py
@@ -167,17 +167,17 @@ child_network_architecture = cn.LeNet(
                                     img_channels=3
                                     )
 
-# save_dir='./benchmark/pickles/04_22_cf_ln_rssad'
+save_dir='./benchmark/pickles/04_22_cf_ln_rssad'
 
 # # evo
-# run_benchmark(
-#     save_file=save_dir+'.pkl',
-#     train_dataset=train_dataset,
-#     test_dataset=test_dataset,
-#     child_network_architecture=child_network_architecture,
-#     agent_arch=aal.EvoLearner,
-#     config=config,
-#     )
+run_benchmark(
+    save_file=save_dir+'.pkl',
+    train_dataset=train_dataset,
+    test_dataset=test_dataset,
+    child_network_architecture=child_network_architecture,
+    agent_arch=aal.EvoLearner,
+    config=config,
+    )
 
 # # rerun_best_policy(
 # #     agent_pickle=save_dir+'.pkl',
@@ -194,16 +194,16 @@ child_network_architecture = cn.LeNet(
 megapol = [(('ShearY', 0.5, 5), ('Posterize', 0.6, 5)), (('Color', 1.0, 9), ('Contrast', 1.0, 9)), (('TranslateX', 0.5, 5), ('Posterize', 0.5, 5)), (('TranslateX', 0.5, 5), ('Posterize', 0.5, 5)), (('Color', 0.5, 5), ('Posterize', 0.5, 5))]
 
 
-accs=[]
-for _ in range(10):
-    print(f'{_}/{10}')
-    temp_agent = aal.evo_learner(**config)
-    accs.append(
-            temp_agent.test_autoaugment_policy(megapol,
-                                child_network_architecture,
-                                train_dataset,
-                                test_dataset,
-                                logging=False)
-                )
+# accs=[]
+# for _ in range(10):
+#     print(f'{_}/{10}')
+#     temp_agent = aal.EvoLearner(**config)
+#     accs.append(
+#             temp_agent._test_autoaugment_policy(megapol,
+#                                 child_network_architecture,
+#                                 train_dataset,
+#                                 test_dataset,
+#                                 logging=False)
+#                 )
 
-print("CIPHAR10 accs: ", accs)
+# print("CIPHAR10 accs: ", accs)
diff --git a/autoaug/autoaugment_learners/EvoLearner.py b/autoaug/autoaugment_learners/EvoLearner.py
index c01aafa857a654072d129b8a724552e1adc1a7bc..9601f2373cd566d88fc24d9250c3e14f2fcc1417 100644
--- a/autoaug/autoaugment_learners/EvoLearner.py
+++ b/autoaug/autoaugment_learners/EvoLearner.py
@@ -95,8 +95,8 @@ class EvoLearner(AaLearner):
     def __init__(self, 
                 # search space settings
                 sp_num=5,
-                p_bins=11, 
-                m_bins=10, 
+                p_bins=1, 
+                m_bins=1, 
                 discrete_p_m=False,
                 exclude_method=[],
                 # child network settings
@@ -131,6 +131,7 @@ class EvoLearner(AaLearner):
         #                 sub_num_pol=self.sp_num
         #                 )
         self.controller = controller
+
         self.num_solutions = num_solutions
         self.torch_ga = torchga.TorchGA(model=self.controller, num_solutions=num_solutions)
         self.num_parents_mating = num_parents_mating
@@ -144,6 +145,64 @@ class EvoLearner(AaLearner):
 
         assert num_solutions > num_parents_mating, 'Number of solutions must be larger than the number of parents mating!'
 
+
+    def get_full_policy(self, x):
+        """
+        Generates the full policy (self.num_sub_pol subpolicies). Network architecture requires
+        output size 5 * 2 * (self.fun_num + self.p_bins + self.m_bins)
+
+        Parameters 
+        -----------
+        x -> PyTorch tensor
+            Input data for network 
+
+        Returns
+        ----------
+        full_policy -> [((String, float, float), (String, float, float)), ...)
+            Full policy consisting of tuples of subpolicies. Each subpolicy consisting of
+            two transformations, with a probability and magnitude float for each
+        """
+        section = self.fun_num + self.p_bins + self.m_bins
+
+        y = self.controller.forward(x)
+        full_policy = []
+        for pol in range(self.sp_num):
+            int_pol = []
+            for _ in range(2):
+                idx_ret = torch.argmax(y[:, (pol * section):(pol*section) + self.fun_num].mean(dim = 0))
+
+                trans, need_mag = self.augmentation_space[idx_ret]
+
+                if self.p_bins == 1:
+                    # p_ret = min(1, max(0, (y[:, (pol * section)+self.fun_num:(pol*section)+self.fun_num+self.p_bins].mean(dim = 0).item())))
+                    p_ret = torch.sigmoid(y[:, (pol * section)+self.fun_num:(pol*section)+self.fun_num+self.p_bins].mean(dim = 0)).item()
+                else:
+                    p_ret = torch.argmax(y[:, (pol * section)+self.fun_num:(pol*section)+self.fun_num+self.p_bins].mean(dim = 0)).item() * 0.1
+
+                p_ret = round(p_ret, 1)
+
+
+                if need_mag:
+                    # print("original mag", y[:, (pol * section)+self.fun_num+self.p_bins:((pol+1)*section)].mean(dim = 0))
+                    if self.m_bins == 1:
+                        # mag = min(9, max(0, (y[:, (pol * section)+self.fun_num+self.p_bins:((pol+1)*section)].mean(dim = 0).item())))
+                        mag = torch.sigmoid(y[:, (pol * section)+self.fun_num+self.p_bins:((pol+1)*section)].mean(dim = 0)).item()
+
+                    else:
+                        print("bit: ", y[:, (pol * section)+self.fun_num+self.p_bins:((pol+1)*section)].mean(dim = 0))
+                        print("full: ", y[:, (pol * section)+self.fun_num+self.p_bins:((pol+1)*section)].shape)
+                        print("mean: ", torch.argmax(y[:, (pol * section)+self.fun_num+self.p_bins:((pol+1)*section)].mean(dim = 0)))
+                        mag = torch.argmax(y[:, (pol * section)+self.fun_num+self.p_bins:((pol+1)*section)].mean(dim = 0)).item()
+                    mag = int(mag)
+                else:
+                    mag = None
+                int_pol.append((trans, p_ret, mag))
+
+            full_policy.append(tuple(int_pol))
+
+        return full_policy
+
+
     
     def _get_single_policy_cov(self, x, alpha = 0.5):
         """
@@ -155,7 +214,7 @@ class EvoLearner(AaLearner):
         x -> PyTorch Tensor
             Input data for the AutoAugment network 
 
-        alpha -> Float
+        alpha -> float
             Proportion for covariance and population matrices 
 
         Returns
@@ -227,20 +286,20 @@ class EvoLearner(AaLearner):
 
         Parameters
         ------------
-        return_weights -> Bool
+        return_weights -> bool
             Determines if the weight of the GA network should be returned 
         
         Returns
         ------------
         If return_weights:
-            Network weights -> Dictionary
+            Network weights -> dict
         
         Else:
             Solution -> Best GA instance solution
 
-            Solution fitness -> Float
+            Solution fitness -> float
 
-            Solution_idx -> Int
+            Solution_idx -> int
         """
         print("learn0")
         self.num_generations = iterations
@@ -260,6 +319,22 @@ class EvoLearner(AaLearner):
 
 
     def _in_pol_dict(self, new_policy):
+        """
+        Checks if a potential subpolicy has already been testing by the agent
+
+        Parameters
+        ------------
+        new_policy -> subpolicy
+
+        Returns
+        ------------
+        if subpolicy has been tested:
+            -> True 
+        else: 
+            -> False
+
+        
+        """
         new_policy = new_policy[0]
         trans1, trans2 = new_policy[0][0], new_policy[1][0]
         new_set = {new_policy[0][1], new_policy[0][2], new_policy[1][1], new_policy[1][2]}
@@ -276,7 +351,7 @@ class EvoLearner(AaLearner):
 
     def _set_up_instance(self, train_dataset, test_dataset, child_network_architecture):
         """
-        Initialises GA instance, as well as fitness and _on_generation functions
+        Initialises GA instance, as well as the fitness and 'on generation' functions
         
         """
 
@@ -302,20 +377,27 @@ class EvoLearner(AaLearner):
             train_dataset.transform = torchvision.transforms.ToTensor()
             self.train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=100)
             count = 0
+
+            new_pol = True
             for idx, (test_x, label_x) in enumerate(self.train_loader):
-                print("here idx: ", idx)
                 count += 1
-                sub_pol = self._get_single_policy_cov(test_x)
+                # sub_pol = self._get_single_policy_cov(test_x)
+                sub_pol = self.get_full_policy(test_x)
+                print("subpol: ", sub_pol)
 
 
-                while self._in_pol_dict(sub_pol):
-                    sub_pol = self._get_single_policy_cov(test_x)[0]
+                # if self._in_pol_dict(sub_pol):
+                #     sub_pol = self._get_single_policy_cov(test_x)[0]
+                #     new_pol = False 
+                #     fit_val = 0
 
                 if idx == 0:
                     break
 
             print("start test")
-            fit_val = self._test_autoaugment_policy(sub_pol,child_network_architecture,train_dataset,test_dataset)
+            if new_pol:
+                fit_val = self._test_autoaugment_policy(sub_pol,child_network_architecture,train_dataset,test_dataset)
+            print("fit_val: ", fit_val)
             print("end test")
 
 
@@ -342,7 +424,7 @@ class EvoLearner(AaLearner):
 
         def _on_generation(ga_instance):
             """
-            Prints information of generational fitness
+            Prints information of generation's fitness
 
             Parameters 
             -------------
diff --git a/autoaug/autoaugment_learners/GenLearner.py b/autoaug/autoaugment_learners/GenLearner.py
index 3f4a40234285369f775331eb5ae5e913845a7bf5..22630e48087986d9881d533e9c4c756081932705 100644
--- a/autoaug/autoaugment_learners/GenLearner.py
+++ b/autoaug/autoaugment_learners/GenLearner.py
@@ -83,7 +83,7 @@ class Genetic_learner(AaLearner):
         self.num_offspring = num_offspring
 
 
-    def gen_random_subpol(self):
+    def _gen_random_subpol(self):
         """
         Generates a random subpolicy using the reduced augmentation_space
 
@@ -111,7 +111,7 @@ class Genetic_learner(AaLearner):
         return subpol
 
 
-    def gen_random_policy(self):
+    def _gen_random_policy(self):
         """
         Generates a random policy, consisting of sp_num subpolicies
 
@@ -125,7 +125,7 @@ class Genetic_learner(AaLearner):
         return pol
 
     
-    def bin_to_subpol(self, subpol_bin):
+    def _bin_to_subpol(self, subpol_bin):
         """
         Converts a binary string representation of a subpolicy to a subpolicy
 
@@ -166,7 +166,7 @@ class Genetic_learner(AaLearner):
         return pol   
 
 
-    def subpol_to_bin(self, subpol):
+    def _subpol_to_bin(self, subpol):
         """
         Converts a subpolicy to its binary representation 
 
@@ -200,7 +200,7 @@ class Genetic_learner(AaLearner):
         return bin_pol
 
 
-    def choose_parents(self, parents, parents_weights):
+    def _choose_parents(self, parents, parents_weights):
         """
         Chooses parents from which the next policy will be generated from
 
@@ -224,7 +224,7 @@ class Genetic_learner(AaLearner):
         return (parent1, parent2)
 
     
-    def generate_children(self):
+    def _generate_children(self):
         """
         Generates children via the random crossover method
 
@@ -265,18 +265,15 @@ class Genetic_learner(AaLearner):
         """
 
         for idx in range(iterations):
-            print("ITERATION: ", idx)
             if len(self.history) < self.num_offspring:
                 policy = [self.gen_random_subpol()]
             else:
                 policy = self.bin_to_subpol(random.choice(self.generate_children()))
-            print("Policy: ", policy)
             
             reward = self._test_autoaugment_policy(policy,
                                                 child_network_architecture,
                                                 train_dataset,
                                                 test_dataset)  
-            print("reward: ", reward)
 
 
 
diff --git a/autoaug/controller_networks/EvoController.py b/autoaug/controller_networks/EvoController.py
index 33a9a606922483f539f783a33dbf18b1bc16d9cb..db0800f6b52815dc2519402338e6e1ebac939e58 100644
--- a/autoaug/controller_networks/EvoController.py
+++ b/autoaug/controller_networks/EvoController.py
@@ -3,7 +3,7 @@ import torch.nn as nn
 import math
 
 class EvoController(nn.Module):
-    def __init__(self, fun_num=14, p_bins=11, m_bins=10, sub_num_pol=5):
+    def __init__(self, fun_num=14, p_bins=1, m_bins=1, sub_num_pol=5):
         self.fun_num = fun_num
         self.p_bins = p_bins 
         self.m_bins = m_bins