Skip to content
Snippets Groups Projects
convnet_model.py 9.79 KiB
"""
Example of a simple convolutional neural network (CNN) adapted from the example
on https://tensorflow.googlesource.com/tensorflow/+/master/tensorflow/models/image/mnist/convolutional.py

This file demonstrates how to build a model using tensor flow. The model can then be trained using
the file convnet_train.py.
It also contains functions for preprocessing and loading the iFind image dataset

Author: Christian Baumgartner (18. Jan 2016)
"""

import tensorflow as tf
import os
import numpy as np
from PIL import Image

IMAGE_SIZE = 64   # i.e. IMAGE_SIZExIMAGE_SIZE pixels
NUM_CHANNELS = 1  # i.e. number of colour channels (gray images have 1)
NUM_LABELS = 19   # Number of labels in the data
SEED = 66478      # Set to None for random seed.
IFIND_ROOT = '/vol/medic01/users/cbaumgar/data/iFind1/iFind1_300/iFind1_simple/stillframes_testtrain'

### DEFINE THE FREE PARAMETER VARIABLES OF THE MODEL
# In particular this includes the weights (W) and biases (b) of the convolutional and fully connected layers
# The initial values will be assigned when when we call:
# {tf.initialize_all_variables().run()} in convnet_train.py

conv1_weights = tf.Variable(
        tf.truncated_normal([5, 5, NUM_CHANNELS, 64],  # 5x5 filter, depth 32.
                            stddev=0.1,
                            seed=SEED))
conv1_biases = tf.Variable(tf.zeros([64]))

conv2_weights = tf.Variable(
        tf.truncated_normal([5, 5, 64, 64],
                            stddev=0.1,
                            seed=SEED))
conv2_biases = tf.Variable(tf.constant(0.1, shape=[64]))

fc1_weights = tf.Variable(  # fully connected, depth 512.
        tf.truncated_normal(
                [4*4*64, 512],
                stddev=0.1,
                seed=SEED))
fc1_biases = tf.Variable(tf.constant(0.1, shape=[512]))

fc2_weights = tf.Variable(
        tf.truncated_normal([512, NUM_LABELS],
                            stddev=0.1,
                            seed=SEED))
fc2_biases = tf.Variable(tf.constant(0.1, shape=[NUM_LABELS]))

### Define the model itself
# In particular make layers and connect them to each other

def model(data, train=False):
    """
    This function builds the convolution neural network model.
    That means all the layers from input to output are defined

    It returns the prediction of the model given some data
    """
    # 2D convolution, with 'SAME' padding (i.e. the output feature map has
    # the same size as the input). Note that {strides} is a 4D array whose
    # shape matches the data layout: [image index, y, x, depth].
    conv = tf.nn.conv2d(data,
                        conv1_weights,
                        strides=[1, 1, 1, 1],
                        padding='SAME')
    # Bias and rectified linear non-linearity.
    relu = tf.nn.relu(tf.nn.bias_add(conv, conv1_biases))
    # Max pooling. The kernel size spec {ksize} also follows the layout of
    # the data. Here we have a pooling window of 2, and a stride of 2.
    pool = tf.nn.max_pool(relu,
                          ksize=[1, 4, 4, 1],
                          strides=[1, 4, 4, 1],
                          padding='SAME')
    conv = tf.nn.conv2d(pool,
                        conv2_weights,
                        strides=[1, 1, 1, 1],
                        padding='SAME')

    relu = tf.nn.relu(tf.nn.bias_add(conv, conv2_biases))
    pool = tf.nn.max_pool(relu,
                          ksize=[1, 4, 4, 1],
                          strides=[1, 4, 4, 1],
                          padding='SAME')

    # fully connected layers.
    pool_shape = pool.get_shape().as_list()
    reshape = tf.reshape(
            pool,
            [pool_shape[0], pool_shape[1] * pool_shape[2] * pool_shape[3]])
    # Fully connected layer. Note that the '+' operation automatically
    # broadcasts the biases.
    hidden = tf.nn.relu(tf.matmul(reshape, fc1_weights) + fc1_biases)
    # Add a 50% dropout during training only. Dropout also scales
    # activations such that no rescaling is needed at evaluation time.
    if train:
        hidden = tf.nn.dropout(hidden, 0.5, seed=SEED)

    return tf.matmul(hidden, fc2_weights) + fc2_biases

### Load the data

def load_ifind_data(path):
    """
    This function tries to load the preprocessed training and testing data
    located in `path`
    If it cannot find the data it preprocesses the data if finds in
    the folder IFIND_ROOT
    It returns training and testing data plus two dictionaries for going from label numbers to label names
    and back.
    """
    expected_label_path = os.path.join(path, 'ifind1_scanplane_labels.npy')
    expected_data_path = os.path.join(path, 'ifind1_scanplane_data.npy')

    if os.path.exists(expected_data_path) and os.path.exists(expected_label_path):

        y_train, y_test, label_names, label_numbers = np.load(expected_label_path)
        X_train, X_test = np.load(expected_data_path)

    else:
        print "Dataset is being created from %s" % IFIND_ROOT
        X_train, y_train, X_test, y_test, label_names, label_numbers = _preprocess_dataset(path)

    return X_train, y_train, X_test, y_test, label_names, label_numbers

### Helper functions for the data loader

def _preprocess_dataset(path):
    """
    Starts the proprocessing
    """
    test_folder = os.path.join(IFIND_ROOT, 'test')
    train_folder = os.path.join(IFIND_ROOT, 'train')

    print "Doing test folder: "
    X_test, y_test, label_numbers = _process_folder(test_folder) # don't augment test dataset (we want to test on original data)
    print "Doing train folder: "
    X_train, y_train, dmy = _process_folder(train_folder, True, label_numbers, augment_dataset=True)

    label_names = _invert_dictionary(label_numbers)

    # Convert into a format suitable for tensorflow
    X_train = _whiten_images(X_train)
    X_test = _whiten_images(X_test)
    y_train = _dense_to_one_hot(y_train, len(label_names))
    y_test = _dense_to_one_hot(y_test, len(label_names))

    np.save(os.path.join(path, 'ifind1_scanplane_labels.npy'),(y_train, y_test, label_names, label_numbers))
    np.save(os.path.join(path, 'ifind1_scanplane_data.npy'),(X_train, X_test))

    return X_train, y_train, X_test, y_test, label_names, label_numbers

def _process_folder(folder, use_existing_label_numbers=False, label_numbers=None, augment_dataset=False):
    """
    Goes into a specific folder and preprocess the data it finds there
    :param folder: The folder
    :param use_existing_label_numbers: Use a label dictionary found in a previous run of this function?
    :param label_numbers: If use_existing_label_numbers, give it the label numbers
    :param augment_dataset: Augment the dataset? I.e., from each image create multiple versions
    :return: the preprocessed data, the labels, and the label dictionary
    """

    if not use_existing_label_numbers:
        label_numbers = {}

    label_counter = -1
    label_number = None
    labels = []
    data = []
    done_labels = []

    for root, directories, files in os.walk(folder):
        for fn in files:

            path = os.path.join(root, fn)
            label_name = root.split('/')[-1]

            if label_name not in done_labels:
                label_counter += 1
                if not use_existing_label_numbers:
                    label_number = label_counter
                    label_numbers[label_name] = label_number
                    print "added label %d which is %s" % (label_number, label_name)
                else:
                    label_number = label_numbers[label_name]
                    print "Doing label %s which has label %d" % (label_name, label_number)

                done_labels.append(label_name)

            # image processing
            im = Image.open(path)
            im = im.crop((0, 106, im.size[0], 713))
            # the crop ranges are square regions from [center, left, right]
            crop_ranges = ((176, 0, 783, im.size[1]), (106, 0, 713, im.size[1]), (246, 0, 853, im.size[1]))

            if not augment_dataset:
                im_array = _crop_image(im, crop_ranges[0])
                data.append(np.reshape(im_array, (IMAGE_SIZE, IMAGE_SIZE, 1)))
                labels.append(label_number)
            else:
                for crop_range in crop_ranges:
                    im_array = _crop_image(im, crop_range)
                    data.append(np.reshape(im_array, (IMAGE_SIZE, IMAGE_SIZE, 1)))
                    labels.append(label_number)

    data = np.asarray(data)
    labels = np.asarray(labels)

    return data, labels, label_numbers

def _crop_image(im, crop_range):
    """
    Helper for cropping images
    """
    im_cropped = im.crop(crop_range)
    im_cropped.thumbnail((IMAGE_SIZE, IMAGE_SIZE))
    im_gray = im_cropped.convert('L')
    im_array = np.asarray(im_gray, dtype=np.float32)
    # add a dummy dimensions where usually the color channels would be
    im_array = np.reshape(im_array, (IMAGE_SIZE, IMAGE_SIZE, 1))
    return im_array

def _invert_dictionary(dictionary):
    """
    Helper for inverting a dictionary
    """
    return {v: k for k, v in dictionary.items()}

def _whiten_images(X):
    """
    Helper for making the images zero mean and unit standard deviation i.e. `white`
    """

    X_white = np.zeros(X.shape, dtype=np.float32)

    for ii in xrange(X.shape[0]):

        Xc = X[ii,:,:,:]
        mc = Xc.mean()
        sc = Xc.std()

        Xc_white = np.divide((Xc - mc), sc)

        X_white[ii,:,:,:] = Xc_white

    return X_white

def _dense_to_one_hot(labels_dense, num_classes):
        """
        Convert class labels from scalars to one-hot vectors.
        This means if there are 10 possible labels
        1 -> [1,0,0,0,0,0,0,0,0,0]
        4 -> [0,0,0,1,0,0,0,0,0,0]
         etc...
        """
        num_labels = labels_dense.shape[0]
        index_offset = np.arange(num_labels) * num_classes
        labels_one_hot = np.zeros((num_labels, num_classes))
        labels_one_hot.flat[index_offset + labels_dense.ravel()] = 1
        return labels_one_hot