Commit 485c40e7 authored by James Hay's avatar James Hay
Browse files

Final Commit. Results are now saved on USB

parent 50cd72b5
Title: Multiple Epidemic (Synthedemic) Fitting Implementation, earlier version for synthetic data.
Author: James Hay
Data: 05/09/14
Institution: Imperial College Department of Computing
Note it is advised that you 'make clean' and 'make' the program before
use. There should be no unusual library requirements (other than
perhaps boost). Gnuplot-iostream should be provided in this folder to
allow printing of graphs.
The Multiple Epidemic fitting framework offers the following features:
1. Iterative fitting of a .csv file of epidemic data. The file must be
set to comma separated values only, and contain two columns. The first
column must contain a time series from 0 to N, and the second column
contains the data to be fit.
2. Upon execution, the user is prompted to specify default or custom
options. Default options are to use an SSE based fitting procedure,
save graphs, and to not include IO in the fitting. The user may choose
to include I0, not save graphs, and to use the MLE based fitting
procedure instead (not currently functional).
3. The user must specify a valid .csv file and valid save directory at
run time. Providing invalid file locations will prompt the user to
correct these.
4. The program outputs a series of Gnuplot created graphs and a .csv
file of the model fitting results at each time point to the provided directory.
5. Note that to edit the list of considered candidate models, the code in datahandler.cpp must be changed, at the top of the "realtime_fit_multi" function.
6. The fitting framework may not be able to characterise some datasets. In such cases, it might be necessary/possible to alter the seed parameter range (in rand params) and optimisation bounds for the transition parameters, S0 and T0 (at the top of datahandler.cpp and the epidemic .hpp files respectively).
Title: Multiple Epidemic (Synthedemic) Fitting Implementation, earlier version for synthetic data.
Author: James Hay
Data: 05/09/14
Institution: Imperial College Department of Computing
Note it is advised that you 'make clean' and 'make' the program before
use. There should be no unusual library requirements (other than
perhaps boost). Gnuplot-iostream should be provided in this folder to
allow printing of graphs.
The Multiple Epidemic fitting framework offers the following features:
1. Iterative fitting of a .csv file of epidemic data. The file must be
set to comma separated values only, and contain two columns. The first
column must contain a time series from 0 to N, and the second column
contains the data to be fit.
2. Upon execution, the user is prompted to specify default or custom
options. Default options are to use an SSE based fitting procedure,
save graphs, and to not include IO in the fitting. The user may choose
to include I0, not save graphs, and to use the MLE based fitting
procedure instead (not currently functional).
3. The user must specify a valid .csv file and valid save directory at
run time. Providing invalid file locations will prompt the user to
correct these.
4. The program outputs a series of Gnuplot created graphs and a .csv
file of the model fitting results at each time point to the provided directory.
5. Note that to edit the list of considered candidate models, the code in datahandler.cpp must be changed, at the top of the "realtime_fit_multi" function.
6. The fitting framework may not be able to characterise some datasets. In such cases, it might be necessary/possible to alter the seed parameter range (in rand params) and optimisation bounds for S0 and T0 (at the top of datahandler.cpp and the epidemic .hpp files respectively).
This diff is collapsed.
#ifndef DATAHANDLER_HPP
#define DATAHANDLER_HPP
#include <iostream>
#include <fstream>
#include <string>
#include <vector>
#include <sstream>
#include <iterator>
#include "../gnuplot-iostream/gnuplot-iostream.h"
#include "epidemic.hpp"
using namespace std;
template<class Con>
void printcon(const Con& c){
std::cout.precision(12);
copy( c.begin(),
c.end(),
ostream_iterator<typename Con::value_type>(cout, " ") );
cout<<endl;
}
class Handler{
private:
vector<double> tempParams;
vector<int> epidemicSizes, tempSizes;
vector<vector<double> > baseModel, current_data, current_model, temp_model, empty_model, temp_data, future_data;
vector<Epidemic*> epidemics, tempEpidemics;
bool useMLE, optimT0, optimI0, singleEpi, plot, save,useLogistic;
bool activeBool = false;
string saveLocation = "graphs/";
double betaupper, betalower, gammalower, gammaupper;
double betaSeirUp,betaSeirDown,gammaSeirDown,gammaSeirUp,alphaSeirUp,alphaSeirDown;
double betaIrsirUp,betaIrsirDown,gammaIrsirDown,gammaIrsirUp;
double betaSerirUp,betaSerirDown,gammaSerirDown,gammaSerirUp,alphaSerirUp,alphaSerirDown,muSerirUp,muSerirDown;
double gammaExpUp,gammaExpDown;
public:
Handler();
~Handler();
// Optimisation Functions
vector<vector<double> > combine_vectors(vector<vector<double> > model, vector<vector<double> > data);
void likelihood_test(vector<double> &params);
void realtime_fit_multi(double targetRSp);
EpiType check_epidemic(vector<vector<double> > residuals);
double optimiseEpidemics(vector<double> &parameters, vector<vector<double> > &results, vector<vector<vector<double> > > &allResults, int& itr);
double fitEpidemics(vector<double> params);
vector<vector<double> > ode_solve(vector<double> params);
vector<vector<vector<double> > > ode_solve_separate(vector<double> params);
double dpois(vector<vector<double> > model, vector<vector<double> > data);
double fitEpidemicsMLE(vector<double> params);
// Data handling functions
vector<vector<double> > return_data(){
return(current_data);
}
double import_data(const char* file);
void update_options(bool mle, bool useT0, bool useI0, bool _singleEpi, bool savePlot, bool saveResults,string location,bool useLogist);
void save_all_results(vector<vector<double> > saveResults, vector<EpiType> selectedTypes);
vector<vector<double> > create_empty_data_vector(int _rows);
// Display functions
void print_epidemic_type(EpiType epi);
void print_vector(vector< vector<double> > my_data);
void print_vector(vector< vector<int> > my_data);
// Maths functions
double calculate_median(vector<vector<double> > data);
double calculate_MAD(vector<vector<double> > data, double median);
vector<vector<double> > base_model(vector<vector<double> > data);
double SStot(vector<vector<double> > data, int column);
double calculate_mean(vector<vector<double> > data, int column);
double calculate_sd(vector<vector<double> > data, int column);
vector<vector<double> > get_residuals(vector<vector<double> > data1, vector<vector<double> > data2, int index);
double aicc(double sse, int n, int k);
double poisson_pmf(const double k, const double lambda);
// Parameter and Epidemic handling functions
vector<double> convert_all_params_back(vector<double> pars);
void sense_check(vector<vector<double> > &model);
double logistic(double x, double xmin, double xmax);
double logit(double y, double xmin, double xmax);
void remove_epidemic(Epidemic* remove);
vector<Epidemic*> fewer_epidemics(int j);
Epidemic* new_epidemic(EpiType _newEpidemic, int time);
vector<double> concatenate_vectors(vector<double> a, vector<double> b);
vector<double> generate_seed_parameters();
void deactivate_epidemics();
void activate_last_epidemic();
vector<double> rand_params(EpiType _type);
void update_latest_epidemic(vector<double> finalParams);
void initial_param_bound();
vector<int> fewer_sizes(int j);
vector<double> convert_parameters_back(vector<double> pars, int number, double detection, double lowerTime, double upperTime, EpiType _type);
vector<double> convert_parameters_forward(vector<double> pars, int number, double time,double lowerTime, double upperTime, EpiType _type);
// Graph plotting functions
void plotGraph(vector<vector<double> > finalResults, vector<vector<double> > data, int index);
void plotGraphMulti(vector<vector<vector<double> > > finalResults, vector<vector<double> > totalResults, vector<vector<double> > data, int index, vector<double> parameters, double _RSquare, int column);
string create_label(Epidemic* epi, vector<double> parameters, int& i);
// Old functions
void testAddition(vector<vector<double> > data1, vector<vector<double> > data2, double offset);
void test_detect(vector<vector<double> > &results, vector<double> &params);
void realtime_fit(vector<vector<double> > &results, vector<double> &params, int version);
void reset_epidemics();
bool check_already_tested(vector<EpiType> tested, EpiType toCheck);
vector<Epidemic*> copy_epidemics(vector<Epidemic*> epi);
double calculate_SSE(vector<vector<double> > data1, vector<vector<double> > data2);
};
#endif
using namespace std;
#include "epidemic.hpp"
Epidemic::~Epidemic(){
}
// Constructor
Epidemic::Epidemic(double _tmax, vector<vector<double> > x, EpiType _type, int detection){
// Determine the type of epidemic and adjust population size stores accordingly
if(_type==sir || _type==irsir){
noPops=3;
parSize= 4;
}
else if(_type==spike){
noPops=1;
parSize= 3;
}
else if (_type==seir){
noPops = 4;
parSize= 5;
}
else if (_type == serir){
noPops = 5;
parSize= 6;
}
else{
noPops=3;
parSize= 4;
}
dPop.resize(noPops);
dPop1.resize(noPops);
dPop2.resize(noPops);
dPop3.resize(noPops);
dPop4.resize(noPops);
tmpPop.resize(noPops);
initialPop.resize(noPops);
populations.resize(noPops);
diffIndex = 0;
// Step size for Runge Kutta. Decrease to increase accuracy, increase to improve speed.
step = 0.5;
// Desired size of total solved model
tmax = _tmax;
// Set current data
current_data = x;
temp_model.resize(x.size()/step);
// Create a vector with the appropriate first column (ie. time values)
for(unsigned int i=0;i<(x.size()/step);++i){
temp_model[i].resize(2);
fill(temp_model[i].begin(),temp_model[i].end(),0.0);
temp_model[i][0] = i;
}
total_model = temp_model;
type = _type;
detectionTime = detection;
detectionTimeTemp = detection;
active = true;
optimTime = detection;
//if(minTime <=0) minTime = 0;
// if(seedTime <=0) seedTime = 0;
}
/* ============================= RUNGE KUTTA INTEGRATION ALGORITHM =========================*/
void Epidemic::Runge_Kutta(){
/* Integrates the equations one step, using Runge-Kutta 4
Note: we work with arrays rather than variables to make the
coding easier */
diffIndex = 0;
for(diffIndex=0;diffIndex<noPops;++diffIndex){
initialPop[diffIndex] = populations[diffIndex];
}
Diff(initialPop);
for(diffIndex=0;diffIndex<noPops;++diffIndex){
dPop1[diffIndex]=dPop[diffIndex];
tmpPop[diffIndex]=initialPop[diffIndex]+step*dPop1[diffIndex]/2;
}
Diff(tmpPop);
for(diffIndex=0;diffIndex<noPops;++diffIndex){
dPop2[diffIndex]=dPop[diffIndex];
tmpPop[diffIndex]=initialPop[diffIndex]+step*dPop2[diffIndex]/2;
}
Diff(tmpPop);
for(diffIndex=0;diffIndex<noPops;++diffIndex){
dPop3[diffIndex]=dPop[diffIndex];
tmpPop[diffIndex]=initialPop[diffIndex]+step*dPop3[diffIndex];
}
Diff(tmpPop);
for(diffIndex=0;diffIndex<noPops;++diffIndex){
dPop4[diffIndex]=dPop[diffIndex];
tmpPop[diffIndex]=initialPop[diffIndex]+(dPop1[diffIndex]/6 + dPop2[diffIndex]/3 + dPop3[diffIndex]/3 + dPop4[diffIndex]/6)*step;
}
for(diffIndex=0;diffIndex<noPops;++diffIndex){
populations[diffIndex] = tmpPop[diffIndex];
}
}
/* Solves the set of differential equations with the current SIR parameters and
saves these to the passed _results vector. Note that this only takes place up to
the current data size. */
void Epidemic::Solve_Eq_t0(vector<vector<double> >& _results, int index){
t=0;
int i=0;
_results[i][0] = t + t0;
_results[i][1] = populations[index];
t+=step;
i++;
do{
Runge_Kutta();
_results[i][0] = t + t0;
_results[i][1] = populations[index];
t+=step; // Step size
i++;
}
while((current_data.size()-t) > step);
}
/* Solves the ODEs for the current SIR parameters and saves these to _results. Difference
to above is that this is carried out for the entire data range (tmax) */
void Epidemic::Solve_Eq_total(vector<vector<double> >& _results, int index){
t=0.0;
int i=0;
_results[i][0] = t + t0;
_results[i][1] = populations[index];
t+=step;
i++;
do{
Runge_Kutta();
_results[i][0] = t + t0;
_results[i][1] = populations[index];
t+=step; // Step size
i++;
}
while((tmax - t) > step);
}
/* ============================== HOUSEKEEPING FUNCTIONS ================================== */
vector<double> Epidemic::return_parameters(){
return(pars);
}
void Epidemic::reset_models(int size){
temp_model.clear();
total_model.clear();
temp_model.resize(size);
// Create a vector with the appropriate first column (ie. time values)
for(int i=0;i<size;++i){
temp_model[i].resize(2);
fill(temp_model[i].begin(),temp_model[i].end(),0.0);
temp_model[i][0] = i*step;
}
total_model= temp_model;
}
// Updates SIR data
void Epidemic::update_data(vector<vector<double> > x){
current_data.clear();
current_data = x;
}
#ifndef EPIDEMIC_HPP
#define EPIDEMIC_HPP
#include <iostream>
#include <fstream>
#include <string>
#include <vector>
#include <cmath>
#include <cstdlib>
#include <iomanip>
#include <gsl/gsl_randist.h>
#include <gsl/gsl_rng.h>
using namespace std;
enum EpiType{none, sir, irsir, seir, serir, spike};
class Epidemic{
protected:
double t, step, t0, tmax;
vector<double> dPop, dPop1, dPop2, dPop3, dPop4, tmpPop, initialPop, populations;
vector<double> pars;
vector<vector<double> > current_data, temp_model, total_model;
int noPops, diffIndex, detectionTime,detectionTimeTemp, seedTime, minTime, optimTime,parSize; //resize the pop vectors to fit this
EpiType type;
bool active;
public:
int infectedIndex;
Epidemic();
Epidemic(double _tmax, vector<vector<double> > x, EpiType _type, int detection);
virtual ~Epidemic() = 0;
vector<double> return_parameters();
int return_param_size(){return(parSize);}
EpiType return_type() { return type;};
int return_detection_time(){return detectionTime;};
int return_seed_time(){return seedTime;};
int return_min_time(){return minTime;};
void update_data(vector<vector<double> > x);
void reset_models(int size);
void deactive_epidemic(){active=false; detectionTime = optimTime;};
void active_epidemic(){ active = true; detectionTime = detectionTimeTemp;}
bool is_active(){return(active);}
void update_time(double optTime) { cout << "Best time is: " << optTime << endl; optimTime=optTime;}
/* NORMAL ODE SOLVER */
virtual void Diff(vector<double> Pop) = 0;
void Runge_Kutta();
void Solve_Eq_t0(vector<vector<double> >& _results, int index);
void Solve_Eq_total(vector<vector<double> >& _results, int index);
/* SSE SOLVER */
virtual vector<vector<double> > ode_solve(vector<double> parameters) = 0;
virtual vector<vector<double> > ode_solve_combined(vector<double> parameters) = 0;
};
#endif
using namespace std;
#include "irsir.hpp"
IRSIR::~IRSIR(){
cout << "deleting" << endl;
}
/* ============================== MODEL EQUATIONS ============================== */
void IRSIR::Diff(vector<double> Pop) {
// The differential equations
dPop[0] = - beta*Pop[0]*Pop[1]; // dS/dt
dPop[1] = beta*Pop[0]*Pop[1] - gamma*Pop[1]*Pop[2]; // dI/dt
dPop[2] = gamma*Pop[1]*Pop[2]; // dR/dt
}
/* ================================== SSE FITTING PROCEDURE ================================= */
/* Calculates a set of ODEs for each set of 4 parameters passed and returns the combined model */
vector<vector<double> > IRSIR::ode_solve_combined(vector<double> parameters){
pars = parameters;
reset_models((tmax/step));
beta = parameters[0];
gamma = parameters[1];
populations[0] = parameters[2];
if(active) t0 = parameters[3];
else t0 = detectionTime;
populations[1] = 1.0;
populations[2] = 1.0;
Solve_Eq_total(temp_model, 1);
return(temp_model);
}
vector<vector<double> > IRSIR::ode_solve(vector<double> parameters){
pars = parameters;
reset_models((current_data.size()/step));
beta = parameters[0];
gamma = parameters[1];
populations[0] = parameters[2];
if(active) t0 = parameters[3];
else t0 = detectionTime;
populations[1] = 1.0;
populations[2] = 1.0;
Solve_Eq_t0(temp_model, 1);
return(temp_model);
}
#ifndef IRSIR_HPP
#define IRSIR_HPP
#include "epidemic.hpp"
using namespace std;
class IRSIR: public Epidemic{
protected:
double beta, gamma;
public:
// Housekeeping functions
IRSIR(double _tmax, vector<vector<double> > x, EpiType _type, int detection) : Epidemic(_tmax, x, _type, detection){
// Stores beta and gamma
beta = 0.001;
gamma = 0.1;
infectedIndex = 2;
pars.clear();
pars.push_back(beta);
pars.push_back(gamma);
// Initial population sizes
populations[0] = 500.0;
pars.push_back(populations[0]);
populations[1] = 1.0;
populations[2] = 0.0;
pars.push_back(1);
minTime = detection - 20;
seedTime = detection - 5;
};
~IRSIR();
/* NORMAL ODE SOLVER */
virtual void Diff(vector<double> Pop);
/* SSE SOLVER */
//double overall_sse(vector<double> parameters);
virtual vector<vector<double> > ode_solve(vector<double> parameters);
virtual vector<vector<double> > ode_solve_combined(vector<double> parameters);
};
#endif
#include <iostream>
#include <string>
#include <stdlib.h>
#include <vector>
#include <cmath>
#include <boost/tuple/tuple.hpp>
#include <boost/filesystem.hpp>
#include <time.h>
#include <algorithm>
#include <limits>
#include <iterator>
//#include "simplex.h"
using namespace std;
#include "datahandler.hpp"
#include "epidemic.hpp"
#include "sir.hpp"
#include "seir.hpp"
const int STEP_SIZE = 0.1;
bool check_existence(char place[80]){
if(boost::filesystem::exists(place) && boost::filesystem::is_directory(place)){
return true;
}
return false;
}
EpiType convert_to_epi_type(int epi){
switch(epi){
case 0: return none;
case 1: return sir;
case 2