Skip to content
Snippets Groups Projects
Commit e1724cea authored by Sun Jin Kim's avatar Sun Jin Kim
Browse files

Merge branch 'master' of gitlab.doc.ic.ac.uk:yw21218/metarl

parents cda58986 7d36b320
No related branches found
No related tags found
No related merge requests found
Pipeline #273174 failed
...@@ -167,17 +167,17 @@ child_network_architecture = cn.LeNet( ...@@ -167,17 +167,17 @@ child_network_architecture = cn.LeNet(
img_channels=3 img_channels=3
) )
# save_dir='./benchmark/pickles/04_22_cf_ln_rssad' save_dir='./benchmark/pickles/04_22_cf_ln_rssad'
# # evo # # evo
# run_benchmark( run_benchmark(
# save_file=save_dir+'.pkl', save_file=save_dir+'.pkl',
# train_dataset=train_dataset, train_dataset=train_dataset,
# test_dataset=test_dataset, test_dataset=test_dataset,
# child_network_architecture=child_network_architecture, child_network_architecture=child_network_architecture,
# agent_arch=aal.EvoLearner, agent_arch=aal.EvoLearner,
# config=config, config=config,
# ) )
# # rerun_best_policy( # # rerun_best_policy(
# # agent_pickle=save_dir+'.pkl', # # agent_pickle=save_dir+'.pkl',
...@@ -194,16 +194,16 @@ child_network_architecture = cn.LeNet( ...@@ -194,16 +194,16 @@ child_network_architecture = cn.LeNet(
megapol = [(('ShearY', 0.5, 5), ('Posterize', 0.6, 5)), (('Color', 1.0, 9), ('Contrast', 1.0, 9)), (('TranslateX', 0.5, 5), ('Posterize', 0.5, 5)), (('TranslateX', 0.5, 5), ('Posterize', 0.5, 5)), (('Color', 0.5, 5), ('Posterize', 0.5, 5))] megapol = [(('ShearY', 0.5, 5), ('Posterize', 0.6, 5)), (('Color', 1.0, 9), ('Contrast', 1.0, 9)), (('TranslateX', 0.5, 5), ('Posterize', 0.5, 5)), (('TranslateX', 0.5, 5), ('Posterize', 0.5, 5)), (('Color', 0.5, 5), ('Posterize', 0.5, 5))]
accs=[] # accs=[]
for _ in range(10): # for _ in range(10):
print(f'{_}/{10}') # print(f'{_}/{10}')
temp_agent = aal.evo_learner(**config) # temp_agent = aal.EvoLearner(**config)
accs.append( # accs.append(
temp_agent.test_autoaugment_policy(megapol, # temp_agent._test_autoaugment_policy(megapol,
child_network_architecture, # child_network_architecture,
train_dataset, # train_dataset,
test_dataset, # test_dataset,
logging=False) # logging=False)
) # )
print("CIPHAR10 accs: ", accs) # print("CIPHAR10 accs: ", accs)
...@@ -95,8 +95,8 @@ class EvoLearner(AaLearner): ...@@ -95,8 +95,8 @@ class EvoLearner(AaLearner):
def __init__(self, def __init__(self,
# search space settings # search space settings
sp_num=5, sp_num=5,
p_bins=11, p_bins=1,
m_bins=10, m_bins=1,
discrete_p_m=False, discrete_p_m=False,
exclude_method=[], exclude_method=[],
# child network settings # child network settings
...@@ -131,6 +131,7 @@ class EvoLearner(AaLearner): ...@@ -131,6 +131,7 @@ class EvoLearner(AaLearner):
# sub_num_pol=self.sp_num # sub_num_pol=self.sp_num
# ) # )
self.controller = controller self.controller = controller
self.num_solutions = num_solutions self.num_solutions = num_solutions
self.torch_ga = torchga.TorchGA(model=self.controller, num_solutions=num_solutions) self.torch_ga = torchga.TorchGA(model=self.controller, num_solutions=num_solutions)
self.num_parents_mating = num_parents_mating self.num_parents_mating = num_parents_mating
...@@ -144,6 +145,64 @@ class EvoLearner(AaLearner): ...@@ -144,6 +145,64 @@ class EvoLearner(AaLearner):
assert num_solutions > num_parents_mating, 'Number of solutions must be larger than the number of parents mating!' assert num_solutions > num_parents_mating, 'Number of solutions must be larger than the number of parents mating!'
def get_full_policy(self, x):
"""
Generates the full policy (self.num_sub_pol subpolicies). Network architecture requires
output size 5 * 2 * (self.fun_num + self.p_bins + self.m_bins)
Parameters
-----------
x -> PyTorch tensor
Input data for network
Returns
----------
full_policy -> [((String, float, float), (String, float, float)), ...)
Full policy consisting of tuples of subpolicies. Each subpolicy consisting of
two transformations, with a probability and magnitude float for each
"""
section = self.fun_num + self.p_bins + self.m_bins
y = self.controller.forward(x)
full_policy = []
for pol in range(self.sp_num):
int_pol = []
for _ in range(2):
idx_ret = torch.argmax(y[:, (pol * section):(pol*section) + self.fun_num].mean(dim = 0))
trans, need_mag = self.augmentation_space[idx_ret]
if self.p_bins == 1:
# p_ret = min(1, max(0, (y[:, (pol * section)+self.fun_num:(pol*section)+self.fun_num+self.p_bins].mean(dim = 0).item())))
p_ret = torch.sigmoid(y[:, (pol * section)+self.fun_num:(pol*section)+self.fun_num+self.p_bins].mean(dim = 0)).item()
else:
p_ret = torch.argmax(y[:, (pol * section)+self.fun_num:(pol*section)+self.fun_num+self.p_bins].mean(dim = 0)).item() * 0.1
p_ret = round(p_ret, 1)
if need_mag:
# print("original mag", y[:, (pol * section)+self.fun_num+self.p_bins:((pol+1)*section)].mean(dim = 0))
if self.m_bins == 1:
# mag = min(9, max(0, (y[:, (pol * section)+self.fun_num+self.p_bins:((pol+1)*section)].mean(dim = 0).item())))
mag = torch.sigmoid(y[:, (pol * section)+self.fun_num+self.p_bins:((pol+1)*section)].mean(dim = 0)).item()
else:
print("bit: ", y[:, (pol * section)+self.fun_num+self.p_bins:((pol+1)*section)].mean(dim = 0))
print("full: ", y[:, (pol * section)+self.fun_num+self.p_bins:((pol+1)*section)].shape)
print("mean: ", torch.argmax(y[:, (pol * section)+self.fun_num+self.p_bins:((pol+1)*section)].mean(dim = 0)))
mag = torch.argmax(y[:, (pol * section)+self.fun_num+self.p_bins:((pol+1)*section)].mean(dim = 0)).item()
mag = int(mag)
else:
mag = None
int_pol.append((trans, p_ret, mag))
full_policy.append(tuple(int_pol))
return full_policy
def _get_single_policy_cov(self, x, alpha = 0.5): def _get_single_policy_cov(self, x, alpha = 0.5):
""" """
...@@ -155,7 +214,7 @@ class EvoLearner(AaLearner): ...@@ -155,7 +214,7 @@ class EvoLearner(AaLearner):
x -> PyTorch Tensor x -> PyTorch Tensor
Input data for the AutoAugment network Input data for the AutoAugment network
alpha -> Float alpha -> float
Proportion for covariance and population matrices Proportion for covariance and population matrices
Returns Returns
...@@ -227,20 +286,20 @@ class EvoLearner(AaLearner): ...@@ -227,20 +286,20 @@ class EvoLearner(AaLearner):
Parameters Parameters
------------ ------------
return_weights -> Bool return_weights -> bool
Determines if the weight of the GA network should be returned Determines if the weight of the GA network should be returned
Returns Returns
------------ ------------
If return_weights: If return_weights:
Network weights -> Dictionary Network weights -> dict
Else: Else:
Solution -> Best GA instance solution Solution -> Best GA instance solution
Solution fitness -> Float Solution fitness -> float
Solution_idx -> Int Solution_idx -> int
""" """
print("learn0") print("learn0")
self.num_generations = iterations self.num_generations = iterations
...@@ -260,6 +319,22 @@ class EvoLearner(AaLearner): ...@@ -260,6 +319,22 @@ class EvoLearner(AaLearner):
def _in_pol_dict(self, new_policy): def _in_pol_dict(self, new_policy):
"""
Checks if a potential subpolicy has already been testing by the agent
Parameters
------------
new_policy -> subpolicy
Returns
------------
if subpolicy has been tested:
-> True
else:
-> False
"""
new_policy = new_policy[0] new_policy = new_policy[0]
trans1, trans2 = new_policy[0][0], new_policy[1][0] trans1, trans2 = new_policy[0][0], new_policy[1][0]
new_set = {new_policy[0][1], new_policy[0][2], new_policy[1][1], new_policy[1][2]} new_set = {new_policy[0][1], new_policy[0][2], new_policy[1][1], new_policy[1][2]}
...@@ -276,7 +351,7 @@ class EvoLearner(AaLearner): ...@@ -276,7 +351,7 @@ class EvoLearner(AaLearner):
def _set_up_instance(self, train_dataset, test_dataset, child_network_architecture): def _set_up_instance(self, train_dataset, test_dataset, child_network_architecture):
""" """
Initialises GA instance, as well as fitness and _on_generation functions Initialises GA instance, as well as the fitness and 'on generation' functions
""" """
...@@ -302,20 +377,27 @@ class EvoLearner(AaLearner): ...@@ -302,20 +377,27 @@ class EvoLearner(AaLearner):
train_dataset.transform = torchvision.transforms.ToTensor() train_dataset.transform = torchvision.transforms.ToTensor()
self.train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=100) self.train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=100)
count = 0 count = 0
new_pol = True
for idx, (test_x, label_x) in enumerate(self.train_loader): for idx, (test_x, label_x) in enumerate(self.train_loader):
print("here idx: ", idx)
count += 1 count += 1
sub_pol = self._get_single_policy_cov(test_x) # sub_pol = self._get_single_policy_cov(test_x)
sub_pol = self.get_full_policy(test_x)
print("subpol: ", sub_pol)
while self._in_pol_dict(sub_pol): # if self._in_pol_dict(sub_pol):
sub_pol = self._get_single_policy_cov(test_x)[0] # sub_pol = self._get_single_policy_cov(test_x)[0]
# new_pol = False
# fit_val = 0
if idx == 0: if idx == 0:
break break
print("start test") print("start test")
fit_val = self._test_autoaugment_policy(sub_pol,child_network_architecture,train_dataset,test_dataset) if new_pol:
fit_val = self._test_autoaugment_policy(sub_pol,child_network_architecture,train_dataset,test_dataset)
print("fit_val: ", fit_val)
print("end test") print("end test")
...@@ -342,7 +424,7 @@ class EvoLearner(AaLearner): ...@@ -342,7 +424,7 @@ class EvoLearner(AaLearner):
def _on_generation(ga_instance): def _on_generation(ga_instance):
""" """
Prints information of generational fitness Prints information of generation's fitness
Parameters Parameters
------------- -------------
......
...@@ -83,7 +83,7 @@ class Genetic_learner(AaLearner): ...@@ -83,7 +83,7 @@ class Genetic_learner(AaLearner):
self.num_offspring = num_offspring self.num_offspring = num_offspring
def gen_random_subpol(self): def _gen_random_subpol(self):
""" """
Generates a random subpolicy using the reduced augmentation_space Generates a random subpolicy using the reduced augmentation_space
...@@ -111,7 +111,7 @@ class Genetic_learner(AaLearner): ...@@ -111,7 +111,7 @@ class Genetic_learner(AaLearner):
return subpol return subpol
def gen_random_policy(self): def _gen_random_policy(self):
""" """
Generates a random policy, consisting of sp_num subpolicies Generates a random policy, consisting of sp_num subpolicies
...@@ -125,7 +125,7 @@ class Genetic_learner(AaLearner): ...@@ -125,7 +125,7 @@ class Genetic_learner(AaLearner):
return pol return pol
def bin_to_subpol(self, subpol_bin): def _bin_to_subpol(self, subpol_bin):
""" """
Converts a binary string representation of a subpolicy to a subpolicy Converts a binary string representation of a subpolicy to a subpolicy
...@@ -166,7 +166,7 @@ class Genetic_learner(AaLearner): ...@@ -166,7 +166,7 @@ class Genetic_learner(AaLearner):
return pol return pol
def subpol_to_bin(self, subpol): def _subpol_to_bin(self, subpol):
""" """
Converts a subpolicy to its binary representation Converts a subpolicy to its binary representation
...@@ -200,7 +200,7 @@ class Genetic_learner(AaLearner): ...@@ -200,7 +200,7 @@ class Genetic_learner(AaLearner):
return bin_pol return bin_pol
def choose_parents(self, parents, parents_weights): def _choose_parents(self, parents, parents_weights):
""" """
Chooses parents from which the next policy will be generated from Chooses parents from which the next policy will be generated from
...@@ -224,7 +224,7 @@ class Genetic_learner(AaLearner): ...@@ -224,7 +224,7 @@ class Genetic_learner(AaLearner):
return (parent1, parent2) return (parent1, parent2)
def generate_children(self): def _generate_children(self):
""" """
Generates children via the random crossover method Generates children via the random crossover method
...@@ -265,18 +265,15 @@ class Genetic_learner(AaLearner): ...@@ -265,18 +265,15 @@ class Genetic_learner(AaLearner):
""" """
for idx in range(iterations): for idx in range(iterations):
print("ITERATION: ", idx)
if len(self.history) < self.num_offspring: if len(self.history) < self.num_offspring:
policy = [self.gen_random_subpol()] policy = [self.gen_random_subpol()]
else: else:
policy = self.bin_to_subpol(random.choice(self.generate_children())) policy = self.bin_to_subpol(random.choice(self.generate_children()))
print("Policy: ", policy)
reward = self._test_autoaugment_policy(policy, reward = self._test_autoaugment_policy(policy,
child_network_architecture, child_network_architecture,
train_dataset, train_dataset,
test_dataset) test_dataset)
print("reward: ", reward)
......
...@@ -3,7 +3,7 @@ import torch.nn as nn ...@@ -3,7 +3,7 @@ import torch.nn as nn
import math import math
class EvoController(nn.Module): class EvoController(nn.Module):
def __init__(self, fun_num=14, p_bins=11, m_bins=10, sub_num_pol=5): def __init__(self, fun_num=14, p_bins=1, m_bins=1, sub_num_pol=5):
self.fun_num = fun_num self.fun_num = fun_num
self.p_bins = p_bins self.p_bins = p_bins
self.m_bins = m_bins self.m_bins = m_bins
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment