Skip to content
Snippets Groups Projects
Commit 2d0583db authored by Sun Jin Kim's avatar Sun Jin Kim
Browse files

Update docstrings in /autoaugment_learners/

parent 63a7084c
No related branches found
No related tags found
No related merge requests found
......@@ -35,7 +35,7 @@ augmentation_space = [
class aa_learner:
def __init__(self, sp_num=5, fun_num=14, p_bins=11, m_bins=10, discrete_p_m=False):
'''
"""
Args:
spdim (int): number of subpolicies per policy
fun_num (int): number of image functions in our search space
......@@ -46,7 +46,8 @@ class aa_learner:
magnitude as discrete variables as the out put of the
controller (A controller can be a neural network, genetic
algorithm, etc.)
'''
"""
self.sp_num = sp_num
self.fun_num = fun_num
self.p_bins = p_bins
......@@ -62,7 +63,7 @@ class aa_learner:
def translate_operation_tensor(self, operation_tensor, return_log_prob=False, argmax=False):
'''
"""
takes in a tensor representing an operation and returns an actual operation which
is in the form of:
("Invert", 0.8, None)
......@@ -93,9 +94,16 @@ class aa_learner:
operation (list of tuples):
An operation in the format that can be directly put into an
AutoAugment object.
log_prob
'''
log_prob (float):
Used in reinforcement learning updates, such as proximal policy update
in the gru_learner.
Can only be used when self.discrete_p_m.
We add the logged values of the indices of the image_function,
probability, and magnitude chosen.
This corresponds to multiplying the non-logged values, then logging
it.
"""
if (not self.discrete_p_m) and return_log_prob:
raise ValueError("You are not supposed to use return_log_prob=True when the agent's \
self.discrete_p_m is False!")
......@@ -174,27 +182,55 @@ class aa_learner:
def generate_new_policy(self):
'''
Generate a new random policy in the form of
[
(("Invert", 0.8, None), ("Contrast", 0.2, 6)),
(("Rotate", 0.7, 2), ("Invert", 0.8, None)),
(("Sharpness", 0.8, 1), ("Sharpness", 0.9, 3)),
(("ShearY", 0.5, 8), ("Invert", 0.7, None)),
]
'''
"""
Generate a new policy which can be fed into an AutoAugment object
by calling:
AutoAugment.subpolicies = policy
Args:
none
Returns:
new_policy (list[tuple]):
A new policy generated by the controller. It
has the form of:
[
(("Invert", 0.8, None), ("Contrast", 0.2, 6)),
(("Rotate", 0.7, 2), ("Invert", 0.8, None)),
(("Sharpness", 0.8, 1), ("Sharpness", 0.9, 3)),
(("ShearY", 0.5, 8), ("Invert", 0.7, None)),
]
This object can be fed into an AutoAUgment object
by calling: AutoAugment.subpolicies = policy
"""
raise NotImplementedError('generate_new_policy not implemented in aa_learner')
def learn(self, train_dataset, test_dataset, child_network_architecture, toy_flag):
'''
Does the loop which is seen in Figure 1 in the AutoAugment paper.
In other words, repeat:
"""
Runs the main loop (of finding a good policy for the given child network,
training dataset, and test(validation) dataset)
Does the loop which is seen in Figure 1 in the AutoAugment paper
which is:
1. <generate a random policy>
2. <see how good that policy is>
3. <save how good the policy is in a list/dictionary>
until a certain condition (either specified by the user or pre-specified) is met
'''
Args:
train_dataset (torchvision.dataset.vision.VisionDataset)
test_dataset (torchvision.dataset.vision.VisionDataset)
child_network_architecture (type): NOTE THAT THIS VARIABLE IS NOT
A nn.module object. Therefore, this needs
to be, say, `models.LeNet` instead of
`models.LeNet()`.
toy_flag (boolean): whether we want to obtain a toy version of
train_dataset and test_dataset and use those.
Returns:
none
"""
# This is dummy code
# test out 15 random policies
......@@ -211,11 +247,24 @@ class aa_learner:
def test_autoaugment_policy(self, policy, child_network, train_dataset, test_dataset,
toy_flag, logging=False):
'''
"""
Given a policy (using AutoAugment paper terminology), we train a child network
using the policy and return the accuracy (how good the policy is for the dataset and
child network).
'''
Args:
policy (list[tuple]): A list of tuples representing a policy.
child_network (nn.module)
train_dataset (torchvision.dataset.vision.VisionDataset)
test_dataset (torchvision.dataset.vision.VisionDataset)
toy_flag (boolean): Whether we want to obtain a toy version of
train_dataset and test_dataset and use those.
logging (boolean): Whether we want to save logs
Returns:
accuracy (float): best accuracy reached in any
"""
# We need to define an object aa_transform which takes in the image and
# transforms it with the policy (specified in its .policies attribute)
# in its forward pass
......@@ -235,7 +284,7 @@ class aa_learner:
batch_size=32,
n_samples=0.5,
seed=100)
# train the child network with the dataloaders equipped with our specific policy
accuracy = train_child_network(child_network,
train_loader,
......@@ -249,4 +298,39 @@ class aa_learner:
print_every_epoch=True)
# if logging is true, 'accuracy' is actually a tuple: (accuracy, accuracy_log)
return accuracy
\ No newline at end of file
return accuracy
def demo_plot(self, train_dataset, test_dataset, child_network_architecture, toy_flag, n=5):
"""
I made this to plot a couple of accuracy graphs to help manually tune my gradient
optimizer hyperparameters.
Saves a plot of `n` training accuracy graphs overlapped.
"""
acc_lists = []
# This is dummy code
# test out `n` random policies
for _ in range(n):
policy = self.generate_new_policy()
pprint(policy)
child_network = child_network_architecture()
reward, acc_list = self.test_autoaugment_policy(policy, child_network, train_dataset,
test_dataset, toy_flag, logging=True)
self.history.append((policy, reward))
acc_lists.append(acc_list)
for acc_list in acc_lists:
plt.plot(acc_list)
plt.title('I ran 5 random policies to see if there is any sign of \
catastrophic failure during training. If there are \
any lines which reach significantly lower (>10%) \
accuracies, you might want to tune the hyperparameters')
plt.xlabel('epoch')
plt.ylabel('accuracy')
plt.show()
plt.savefig('training_graphs_without_policies')
\ No newline at end of file
......@@ -31,22 +31,28 @@ augmentation_space = [
class gru_learner(aa_learner):
# Uses a GRU controller which is updated via Proximal Polixy Optimization
# It is the same model use in
# http://arxiv.org/abs/1805.09501
# and
# http://arxiv.org/abs/1611.01578
"""
An AutoAugment learner with a GRU controller
The original AutoAugment paper(http://arxiv.org/abs/1805.09501)
uses a LSTM controller updated via Proximal Policy Optimization.
(See Section 3 of AutoAugment paper)
The GRU has been shown to be as powerful of a sequential neural
network as the LSTM whilst training and testing much faster
(https://arxiv.org/abs/1412.3555), which is why we substituted
the LSTM for the GRU.
"""
def __init__(self, sp_num=5, fun_num=14, p_bins=11, m_bins=10, discrete_p_m=True, alpha=0.2):
'''
"""
Args:
spdim: number of subpolicies per policy
fun_num: number of image functions in our search space
p_bins: number of bins we divide the interval [0,1] for probabilities
m_bins: number of bins we divide the magnitude space
alpha: Exploration parameter. The lower this value, the more exploration.
'''
alpha (float): Exploration parameter. It is multiplied to
operation tensors before they're softmaxed.
The lower this value, the more smoothed the output
of the softmaxed will be, hence more exploration.
"""
super().__init__(sp_num, fun_num, p_bins, m_bins, discrete_p_m=True)
self.alpha = alpha
......@@ -57,19 +63,41 @@ class gru_learner(aa_learner):
def generate_new_policy(self):
'''
We run the GRU for 10 timesteps to obtain 10 operations.
At each time step, it outputs a (fun_num + p_bins + m_bins) dimensional vector
"""
The GRU controller pops out a new policy.
At each time step, the GRU outputs a
(fun_num + p_bins + m_bins, ) dimensional tensor which
contains information regarding which 'image function' to use,
which value of 'probability(prob)' and 'magnitude(mag)' to use.
We run the GRU for 10 timesteps to obtain 10 of such tensors.
And then for each operation, we put it through self.
Generate a new policy in the form of
We then softmax the parts of the tensor which represents the
choice of function, prob, and mag seperately, so that the
resulting tensor's values sums up to 3.
Then we input each tensor into self.translate_operation_tensor
with parameter (return_log_prob=True), which outputs a tuple
in the form of ('img_function_name', prob, mag) and a float
representing the log probability that we chose the chosen
func, prob and mag.
We add up the log probabilities of each operation.
We turn the operations into a list of 5 tuples such as:
[
(("Invert", 0.8, None), ("Contrast", 0.2, 6)),
(("Rotate", 0.7, 2), ("Invert", 0.8, None)),
(("Sharpness", 0.8, 1), ("Sharpness", 0.9, 3)),
(("ShearY", 0.5, 8), ("Invert", 0.7, None)),
]
'''
This list can then be input into an AutoAugment object
as is done in self.learn()
We return the list and the sum of the log probs
"""
log_prob = 0
# we need a random input to put in
......@@ -108,13 +136,6 @@ class gru_learner(aa_learner):
def learn(self, train_dataset, test_dataset, child_network_architecture, toy_flag, m=8):
'''
Does the loop which is seen in Figure 1 in the AutoAugment paper.
In other words, repeat:
1. <generate a random policy>
2. <see how good that policy is>
3. <save how good the policy is in a list/dictionary>
'''
# optimizer for training the GRU controller
cont_optim = torch.optim.SGD(self.controller.parameters(), lr=1e-2)
......@@ -179,8 +200,7 @@ if __name__=='__main__':
transform=torchvision.transforms.ToTensor())
child_network = cn.lenet
learner = gru_learner(discrete_p_m=False)
newpol = learner.generate_new_policy()
learner.learn(train_dataset, test_dataset, child_network, toy_flag=True)
pprint(learner.history)
......@@ -32,27 +32,23 @@ augmentation_space = [
class randomsearch_learner(aa_learner):
def __init__(self, sp_num=5, fun_num=14, p_bins=11, m_bins=10, discrete_p_m=False):
'''
Args:
spdim: number of subpolicies per policy
fun_num: number of image functions in our search space
p_bins: number of bins we divide the interval [0,1] for probabilities
m_bins: number of bins we divide the magnitude space
'''
super().__init__(sp_num, fun_num, p_bins, m_bins, discrete_p_m)
def generate_new_discrete_operation(self):
'''
"""
generate a new random operation in the form of a tensor of dimension:
(fun_num + 11 + 10)
Used only when self.discrete_p_m=True
The first fun_num dimensions is a 1-hot encoding to specify which function to use.
The next 11 dimensions specify which 'probability' to choose.
(0.0, 0.1, ..., 1.0)
The next 10 dimensions specify which 'magnitude' to choose.
(0, 1, ..., 9)
'''
"""
random_fun = np.random.randint(0, self.fun_num)
random_prob = np.random.randint(0, self.p_bins)
random_mag = np.random.randint(0, self.m_bins)
......@@ -68,17 +64,20 @@ class randomsearch_learner(aa_learner):
def generate_new_continuous_operation(self):
'''
"""
Returns operation_tensor, which is a tensor representation of a random operation with
dimension:
(fun_num + 1 + 1)
Used only when self.discrete_p_m=False.
The first fun_num dimensions is a 1-hot encoding to specify which function to use.
The next 1 dimensions specify which 'probability' to choose.
0 < x < 1
The next 1 dimensions specify which 'magnitude' to choose.
0 < x < 9
'''
"""
fun_p_m = torch.zeros(self.fun_num + 2)
# pick a random image function
......@@ -92,15 +91,11 @@ class randomsearch_learner(aa_learner):
def generate_new_policy(self):
'''
Generate a new random policy in the form of
[
(("Invert", 0.8, None), ("Contrast", 0.2, 6)),
(("Rotate", 0.7, 2), ("Invert", 0.8, None)),
(("Sharpness", 0.8, 1), ("Sharpness", 0.9, 3)),
(("ShearY", 0.5, 8), ("Invert", 0.7, None)),
]
'''
"""
Generates a new policy, with the elements chosen at random
(unifom random distribution).
"""
new_policy = []
for _ in range(self.sp_num): # generate sp_num subpolicies for each policy
......@@ -123,13 +118,6 @@ class randomsearch_learner(aa_learner):
def learn(self, train_dataset, test_dataset, child_network_architecture, toy_flag):
'''
Does the loop which is seen in Figure 1 in the AutoAugment paper.
In other words, repeat:
1. <generate a random policy>
2. <see how good that policy is>
3. <save how good the policy is in a list/dictionary>
'''
# test out 15 random policies
for _ in range(1500):
policy = self.generate_new_policy()
......@@ -147,32 +135,6 @@ class randomsearch_learner(aa_learner):
pickle.dump(self.history, file)
def demo_plot(self, train_dataset, test_dataset, child_network_architecture, toy_flag, n=50):
'''
I made this to plot a couple of accuracy graphs to help manually tune my gradient
optimizer hyperparameters.
'''
acc_lists = []
# This is dummy code
# test out 15 random policies
for _ in range(n):
policy = self.generate_new_policy()
pprint(policy)
child_network = child_network_architecture()
reward, acc_list = self.test_autoaugment_policy(policy, child_network, train_dataset,
test_dataset, toy_flag, logging=True)
self.history.append((policy, reward))
acc_lists.append(acc_list)
for acc_list in acc_lists:
plt.plot(acc_list)
plt.title('I ran 50 random policies to see if there is any sign of \
catastrophic failure during training')
plt.show()
plt.savefig('random_policies')
if __name__=='__main__':
......@@ -190,5 +152,4 @@ if __name__=='__main__':
rs_learner = randomsearch_learner(discrete_p_m=True)
rs_learner.learn(train_dataset, test_dataset, child_network, toy_flag=True)
# rs_learner.demo_plot(train_dataset, test_dataset, child_network, toy_flag=True)
pprint(rs_learner.history)
\ No newline at end of file
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment