Forked from
Christian Baumgartner / ifind1_scanplanes_tensorflow
3 commits behind the upstream repository.
-
Christian Baumgartner authoredChristian Baumgartner authored
convnet_model.py 9.79 KiB
"""
Example of a simple convolutional neural network (CNN) adapted from the example
on https://tensorflow.googlesource.com/tensorflow/+/master/tensorflow/models/image/mnist/convolutional.py
This file demonstrates how to build a model using tensor flow. The model can then be trained using
the file convnet_train.py.
It also contains functions for preprocessing and loading the iFind image dataset
Author: Christian Baumgartner (18. Jan 2016)
"""
import tensorflow as tf
import os
import numpy as np
from PIL import Image
IMAGE_SIZE = 64 # i.e. IMAGE_SIZExIMAGE_SIZE pixels
NUM_CHANNELS = 1 # i.e. number of colour channels (gray images have 1)
NUM_LABELS = 19 # Number of labels in the data
SEED = 66478 # Set to None for random seed.
IFIND_ROOT = '/vol/medic01/users/cbaumgar/data/iFind1/iFind1_300/iFind1_simple/stillframes_testtrain'
### DEFINE THE FREE PARAMETER VARIABLES OF THE MODEL
# In particular this includes the weights (W) and biases (b) of the convolutional and fully connected layers
# The initial values will be assigned when when we call:
# {tf.initialize_all_variables().run()} in convnet_train.py
conv1_weights = tf.Variable(
tf.truncated_normal([5, 5, NUM_CHANNELS, 64], # 5x5 filter, depth 32.
stddev=0.1,
seed=SEED))
conv1_biases = tf.Variable(tf.zeros([64]))
conv2_weights = tf.Variable(
tf.truncated_normal([5, 5, 64, 64],
stddev=0.1,
seed=SEED))
conv2_biases = tf.Variable(tf.constant(0.1, shape=[64]))
fc1_weights = tf.Variable( # fully connected, depth 512.
tf.truncated_normal(
[4*4*64, 512],
stddev=0.1,
seed=SEED))
fc1_biases = tf.Variable(tf.constant(0.1, shape=[512]))
fc2_weights = tf.Variable(
tf.truncated_normal([512, NUM_LABELS],
stddev=0.1,
seed=SEED))
fc2_biases = tf.Variable(tf.constant(0.1, shape=[NUM_LABELS]))
### Define the model itself
# In particular make layers and connect them to each other
def model(data, train=False):
"""
This function builds the convolution neural network model.
That means all the layers from input to output are defined
It returns the prediction of the model given some data
"""
# 2D convolution, with 'SAME' padding (i.e. the output feature map has
# the same size as the input). Note that {strides} is a 4D array whose
# shape matches the data layout: [image index, y, x, depth].
conv = tf.nn.conv2d(data,
conv1_weights,
strides=[1, 1, 1, 1],
padding='SAME')
# Bias and rectified linear non-linearity.
relu = tf.nn.relu(tf.nn.bias_add(conv, conv1_biases))
# Max pooling. The kernel size spec {ksize} also follows the layout of
# the data. Here we have a pooling window of 2, and a stride of 2.
pool = tf.nn.max_pool(relu,
ksize=[1, 4, 4, 1],
strides=[1, 4, 4, 1],
padding='SAME')
conv = tf.nn.conv2d(pool,
conv2_weights,
strides=[1, 1, 1, 1],
padding='SAME')
relu = tf.nn.relu(tf.nn.bias_add(conv, conv2_biases))
pool = tf.nn.max_pool(relu,
ksize=[1, 4, 4, 1],
strides=[1, 4, 4, 1],
padding='SAME')
# fully connected layers.
pool_shape = pool.get_shape().as_list()
reshape = tf.reshape(
pool,
[pool_shape[0], pool_shape[1] * pool_shape[2] * pool_shape[3]])
# Fully connected layer. Note that the '+' operation automatically
# broadcasts the biases.
hidden = tf.nn.relu(tf.matmul(reshape, fc1_weights) + fc1_biases)
# Add a 50% dropout during training only. Dropout also scales
# activations such that no rescaling is needed at evaluation time.
if train:
hidden = tf.nn.dropout(hidden, 0.5, seed=SEED)
return tf.matmul(hidden, fc2_weights) + fc2_biases
### Load the data
def load_ifind_data(path):
"""
This function tries to load the preprocessed training and testing data
located in `path`
If it cannot find the data it preprocesses the data if finds in
the folder IFIND_ROOT
It returns training and testing data plus two dictionaries for going from label numbers to label names
and back.
"""
expected_label_path = os.path.join(path, 'ifind1_scanplane_labels.npy')
expected_data_path = os.path.join(path, 'ifind1_scanplane_data.npy')
if os.path.exists(expected_data_path) and os.path.exists(expected_label_path):
y_train, y_test, label_names, label_numbers = np.load(expected_label_path)
X_train, X_test = np.load(expected_data_path)
else:
print "Dataset is being created from %s" % IFIND_ROOT
X_train, y_train, X_test, y_test, label_names, label_numbers = _preprocess_dataset(path)
return X_train, y_train, X_test, y_test, label_names, label_numbers
### Helper functions for the data loader
def _preprocess_dataset(path):
"""
Starts the proprocessing
"""
test_folder = os.path.join(IFIND_ROOT, 'test')
train_folder = os.path.join(IFIND_ROOT, 'train')
print "Doing test folder: "
X_test, y_test, label_numbers = _process_folder(test_folder) # don't augment test dataset (we want to test on original data)
print "Doing train folder: "
X_train, y_train, dmy = _process_folder(train_folder, True, label_numbers, augment_dataset=True)
label_names = _invert_dictionary(label_numbers)
# Convert into a format suitable for tensorflow
X_train = _whiten_images(X_train)
X_test = _whiten_images(X_test)
y_train = _dense_to_one_hot(y_train, len(label_names))
y_test = _dense_to_one_hot(y_test, len(label_names))
np.save(os.path.join(path, 'ifind1_scanplane_labels.npy'),(y_train, y_test, label_names, label_numbers))
np.save(os.path.join(path, 'ifind1_scanplane_data.npy'),(X_train, X_test))
return X_train, y_train, X_test, y_test, label_names, label_numbers
def _process_folder(folder, use_existing_label_numbers=False, label_numbers=None, augment_dataset=False):
"""
Goes into a specific folder and preprocess the data it finds there
:param folder: The folder
:param use_existing_label_numbers: Use a label dictionary found in a previous run of this function?
:param label_numbers: If use_existing_label_numbers, give it the label numbers
:param augment_dataset: Augment the dataset? I.e., from each image create multiple versions
:return: the preprocessed data, the labels, and the label dictionary
"""
if not use_existing_label_numbers:
label_numbers = {}
label_counter = -1
label_number = None
labels = []
data = []
done_labels = []
for root, directories, files in os.walk(folder):
for fn in files:
path = os.path.join(root, fn)
label_name = root.split('/')[-1]
if label_name not in done_labels:
label_counter += 1
if not use_existing_label_numbers:
label_number = label_counter
label_numbers[label_name] = label_number
print "added label %d which is %s" % (label_number, label_name)
else:
label_number = label_numbers[label_name]
print "Doing label %s which has label %d" % (label_name, label_number)
done_labels.append(label_name)
# image processing
im = Image.open(path)
im = im.crop((0, 106, im.size[0], 713))
# the crop ranges are square regions from [center, left, right]
crop_ranges = ((176, 0, 783, im.size[1]), (106, 0, 713, im.size[1]), (246, 0, 853, im.size[1]))
if not augment_dataset:
im_array = _crop_image(im, crop_ranges[0])
data.append(np.reshape(im_array, (IMAGE_SIZE, IMAGE_SIZE, 1)))
labels.append(label_number)
else:
for crop_range in crop_ranges:
im_array = _crop_image(im, crop_range)
data.append(np.reshape(im_array, (IMAGE_SIZE, IMAGE_SIZE, 1)))
labels.append(label_number)
data = np.asarray(data)
labels = np.asarray(labels)
return data, labels, label_numbers
def _crop_image(im, crop_range):
"""
Helper for cropping images
"""
im_cropped = im.crop(crop_range)
im_cropped.thumbnail((IMAGE_SIZE, IMAGE_SIZE))
im_gray = im_cropped.convert('L')
im_array = np.asarray(im_gray, dtype=np.float32)
# add a dummy dimensions where usually the color channels would be
im_array = np.reshape(im_array, (IMAGE_SIZE, IMAGE_SIZE, 1))
return im_array
def _invert_dictionary(dictionary):
"""
Helper for inverting a dictionary
"""
return {v: k for k, v in dictionary.items()}
def _whiten_images(X):
"""
Helper for making the images zero mean and unit standard deviation i.e. `white`
"""
X_white = np.zeros(X.shape, dtype=np.float32)
for ii in xrange(X.shape[0]):
Xc = X[ii,:,:,:]
mc = Xc.mean()
sc = Xc.std()
Xc_white = np.divide((Xc - mc), sc)
X_white[ii,:,:,:] = Xc_white
return X_white
def _dense_to_one_hot(labels_dense, num_classes):
"""
Convert class labels from scalars to one-hot vectors.
This means if there are 10 possible labels
1 -> [1,0,0,0,0,0,0,0,0,0]
4 -> [0,0,0,1,0,0,0,0,0,0]
etc...
"""
num_labels = labels_dense.shape[0]
index_offset = np.arange(num_labels) * num_classes
labels_one_hot = np.zeros((num_labels, num_classes))
labels_one_hot.flat[index_offset + labels_dense.ravel()] = 1
return labels_one_hot