Commit 06329788 authored by szymon's avatar szymon
Browse files

Resolving merge conflicts

parents 32699a5a e90c2db9
cmake_minimum_required(VERSION 3.14)
project(ppo_cpp)
set(CMAKE_CXX_STANDARD 14)
include_directories("/usr/local/include/eigen3")
include_directories("/usr/local/include/eigen3/unsuppored")
include_directories("/usr/local/include/tensorflow")
include_directories("/usr/local/include/protobuf")
include_directories("/ppo2")
include_directories("/env")
include_directories("/common")
include_directories("/usr/local/include") #dart
include_directories("/home/szymon/robot_dart/src") #robot_dart
add_executable(test test/test_adventages.cpp test/network_loader.hpp test/main_tests.cpp)
add_executable(ppo_cpp ppo2.cpp env/hexapod_env.hpp env/env_normalize.hpp common/matrix_clamp.hpp args.hxx json.hpp common/serializable.hpp env/hexapod_closed_loop_env.hpp ppo2/runner.hpp)
add_executable(test2 test/main2.cpp test/main_tests.cpp)
add_executable(test3 test/main3.cpp test/main_tests.cpp)
add_executable(test4 test/main4.cpp test/main_tests.cpp)
add_executable(stat_test test/stat_test.cpp common/running_statistics.hpp test/main_tests.cpp common/median.hpp)
target_link_libraries(ppo_cpp "/usr/local/lib/libprotobuf.so")
target_link_libraries(ppo_cpp "/usr/local/lib/libprotoc.so")
target_link_libraries(ppo_cpp "/usr/local/lib/libtensorflow_cc.so")
target_link_libraries(ppo_cpp "/usr/local/lib/libtensorflow_framework.so")
......@@ -17,8 +17,8 @@ typedef Eigen::Matrix<float, Eigen::Dynamic, Eigen::Dynamic, Eigen::RowMajor> Ma
class EnvNormalize : public Env {
public:
explicit EnvNormalize(
Env &env,
EnvNormalize(
std::unique_ptr<Env> env,
bool training,
bool norm_obs = true,
bool norm_reward = true,
......@@ -27,42 +27,42 @@ public:
float gamma = 0.99,
float epsilon = 1e-8)
: Env(),
env{env},
env{std::move(env)},
norm_obs{norm_obs},
norm_reward{norm_reward},
ret{Mat::Zero(env.get_num_envs(),1)},
ret{Mat::Zero(this->env->get_num_envs(),1)},
gamma{gamma},
epsilon{epsilon},
obs_rms{RunningStatistics(env.get_observation_space_size())},
obs_rms{RunningStatistics(this->env->get_observation_space_size())},
ret_rms{},
clamp_obs{env.get_num_envs(),env.get_observation_space_size(),clip_obs},
clamp_obs{this->env->get_num_envs(),this->env->get_observation_space_size(),clip_obs},
clamp_rewards{ret,clip_reward},
ret_like_ones{Mat::Ones(ret.rows(),ret.cols())},
training{training}
{}
std::string get_action_space() override {
return env.get_action_space();
return env->get_action_space();
}
std::string get_observation_space() override {
return env.get_observation_space();
return env->get_observation_space();
}
int get_action_space_size() override {
return env.get_action_space_size();
return env->get_action_space_size();
}
int get_observation_space_size() override {
return env.get_observation_space_size();
return env->get_observation_space_size();
}
int get_num_envs() override {
return env.get_num_envs();
return env->get_num_envs();
}
std::vector<Mat> step(const Mat &actions) override {
const std::vector<Mat>& results = env.step(actions);
const std::vector<Mat>& results = env->step(actions);
Mat rews {results[1]};
......@@ -109,31 +109,32 @@ public:
}
Mat reset() override {
const Mat& obs = env.reset();
const Mat& obs = env->reset();
ret = Mat::Zero(get_num_envs(),1);
return _normalize_observation(obs);
}
void render() override {
env.render();
env->render();
}
float get_time() override {
return env.get_time();
return env->get_time();
}
Mat get_original_obs() override{
return env.get_original_obs();
return env->get_original_obs();
}
Mat get_original_rew() override{
return env.get_original_rew();
return env->get_original_rew();
}
void serialize(nlohmann::json& json) override {
obs_rms.serialize(json["obs_rms"]);
ret_rms.serialize(json["ret_rms"]);
env->serialize(json);
}
void deserialize(nlohmann::json& json) override {
......@@ -141,10 +142,11 @@ public:
//std::cout << "json" <<json << std::endl;
obs_rms.deserialize(json["obs_rms"]);
ret_rms.deserialize(json["ret_rms"]);
env->deserialize(json);
}
private:
Env& env;
std::unique_ptr<Env> env;
bool norm_obs;
bool norm_reward;
Mat ret;
......
......@@ -32,21 +32,29 @@ public:
Mat reset() override {
HexapodEnv::reset();
return std::move(apply_noise());
}
Eigen::VectorXd qpos{local_robot->skeleton()->getPositions()};
Eigen::VectorXd qvel{local_robot->skeleton()->getVelocities()};
void serialize(nlohmann::json& json) override {
HexapodEnv::serialize(json);
json["reset_noise_scale"] = _reset_noise_scale;
}
qpos.tail(18) += _reset_noise_scale * Eigen::VectorXd::Random(18);
qvel.tail(18) += _reset_noise_scale * Eigen::VectorXd::Random(18);
void deserialize(nlohmann::json& json) override {
HexapodEnv::deserialize(json);
local_robot->skeleton()->setPositions(qpos);
local_robot->skeleton()->setVelocities(qvel);
Mat obs{get_obs()};
old_obs = obs;
if (json.count("reset_noise_scale") != 0)
{
_reset_noise_scale = json["reset_noise_scale"].get<double>();
}
else
{
std::cout << "no reset noise found: defaulting to 0" << std::endl;
_reset_noise_scale = 0.;
}
apply_noise();
return std::move(obs);
}
protected:
......@@ -67,6 +75,23 @@ private:
double _reset_noise_scale;
bool observe_velocities;
int observation_space_size;
Mat apply_noise() {
Eigen::VectorXd qpos{local_robot->skeleton()->getPositions()};
Eigen::VectorXd qvel{local_robot->skeleton()->getVelocities()};
qpos.tail(18) += _reset_noise_scale * Eigen::VectorXd::Random(18);
qvel.tail(18) += _reset_noise_scale * Eigen::VectorXd::Random(18);
local_robot->skeleton()->setPositions(qpos);
local_robot->skeleton()->setVelocities(qvel);
Mat obs{get_obs()};
old_obs = obs;
return std::move(obs);
}
};
#endif //PPO_CPP_HEXAPOD_CLOSED_LOOP_ENV_HPP
......@@ -102,11 +102,15 @@ int main(int argc, char **argv)
args::Flag closed_loop(parser,"closed loop environment", "If set, closed-loop hexapod environment will be used, open-loop by default",{"closed_loop","closed-loop","cl"});
args::Flag verbose(parser,"verbose", "output additional logs to the console",{'v',"verbose"});
args::Flag resume(parser,"resume", "flag signalling resuming",{'r',"resume"});
args::ValueFlag<double> duration(parser, "duration", "The total duration of played animation [seconds]", {"duration","du"},5.);
args::ValueFlag<int> threads(parser, "num threads", "Number of threads used in training", {'j',"jobs","threads","n_threads","num_threads","nt"},1);
//seeding needs fixing
// args::ValueFlag<int> seed(parser, "seed", "Seed. Time-based if not specified.", {"seed"});
try
{
parser.ParseCLI(argc, argv);
......@@ -129,43 +133,53 @@ int main(int argc, char **argv)
return 1;
}
auto now = std::chrono::high_resolution_clock::now();
auto nanos = std::chrono::duration_cast<std::chrono::nanoseconds>(now.time_since_epoch()).count();
int seed = static_cast<int>(nanos%std::numeric_limits<int>::max());
srand(seed);
std::cout << "seed: " << seed << std::endl;
//still not deterministic - perhaps TF needs a global seed setter on the graph
// if (seed){
// srand(seed.Get());
// } else {
auto now = std::chrono::high_resolution_clock::now();
auto nanos = std::chrono::duration_cast<std::chrono::nanoseconds>(now.time_since_epoch()).count();
int seed_val = static_cast<int>(nanos % std::numeric_limits<int>::max());
srand(seed_val);
// }
std::cout << "seed: " << seed << std::endl;
auto seconds = time (nullptr);
std::string run_id {id?id.Get():("ppo_"+std::to_string(seconds))};
std::string tb_path {save_path.Get()+"/tensorboard/"+run_id+"/"};
bool training = !load_path;
bool training = !load_path || resume;
// std::cout << "load_path: " << load_path.Get() << std::endl;
// std::cout << "training: " << training << std::endl;
load_and_init_robot2();
std::shared_ptr<Env> wrapped_env;
std::unique_ptr<Env> wrapped_env;
std::vector<std::shared_ptr<Env>> envs;
bool multi_env = threads.Get()>1;
for (int i =0; i<threads.Get(); ++i){
if(multi_env){
for (int i =0; i<threads.Get(); ++i){
//TODO: environment selection should be recoverable from serialization as well
if(closed_loop){
envs.push_back(std::make_shared<HexapodClosedLoopEnv>(reset_noise_scale.Get(),!multi_env));
} else {
envs.push_back(std::make_shared<HexapodEnv>(!multi_env));
}
}
wrapped_env = std::make_unique<VecEnv>(envs);
} else {
//TODO: environment selection should be recoverable from serialization as well
if(closed_loop){
envs.push_back(std::make_shared<HexapodClosedLoopEnv>(reset_noise_scale.Get(),!multi_env));
wrapped_env = std::make_unique<HexapodClosedLoopEnv>(reset_noise_scale.Get(),!multi_env);
} else {
envs.push_back(std::make_shared<HexapodEnv>(!multi_env));
wrapped_env = std::make_unique<HexapodEnv>(!multi_env);
}
}
if(multi_env){
wrapped_env = std::make_shared<VecEnv>(envs);
} else {
wrapped_env = envs[0];
}
EnvNormalize env{*wrapped_env, training};
EnvNormalize env{std::move(wrapped_env),training};
const std::string final_graph_path{graph_path.Get()};
......@@ -177,6 +191,11 @@ int main(int argc, char **argv)
.99,num_batch_steps.Get(),entropy.Get(),learning_rate.Get(),.5,.5,.95,32,num_epochs.Get(),clip_range.Get(),-1,tb_path
};
if(load_path){
algorithm.load(load_path.Get());
}
if(training) {
//shell-dependant timestamped directory creation
......@@ -206,7 +225,6 @@ int main(int argc, char **argv)
algorithm.learn(int_steps,total_saves,checkpoint_path);
} else {
algorithm.load(load_path.Get());
const int playback_steps = static_cast<int>(duration.Get()/0.015);
......
......@@ -190,7 +190,11 @@ public:
observation_space = json["observation_space"].get<std::string>();
action_space = json["action_space"].get<std::string>();
n_envs = json["n_envs"].get<int>();
model_filename = json["model_filename"].get<std::string>();
if (model_filename.empty()) {
model_filename = json["model_filename"].get<std::string>();
} else {
std::cout << "filename passed through CLI overrides deserialized one" << std::endl;
}
reset();
......
//
// Created by szymon on 01/08/19.
//
// tests-main.cpp
// CATCH setup, do not change
#define CATCH_CONFIG_MAIN
#include "catch.hpp"
#!/usr/bin/env bash
#little util to help getting data from large number of experiment results
#a step us meant to execute after open_tensor_board.sh step (when data is ready to query)
#same idea: point at dir, 5 subdirs will be queried
echo input path $1
cd $1
dirs=(*)
length=${#dirs[@]}
counter=1
for (( i=0; i<$length; i++ ))
do
echo
port=$((6010+$i))
name=${dirs[$i]}
dir=$name/tensorboard
if [ -d "$dir" ]; then
echo $name
echo $port
cd $dir
for i in *; do xdg-open "http://szymon-tws:"${port}"/data/plugin/scalars/scalars?tag=episode_reward&run="${i}"&experiment=&format=csv"; done
cd ..
mkdir -p csv
sleep 3s
mv ~/Downloads/scalars* ./csv
cd ..
if [ $(($counter % 5)) -eq 0 ]; then
echo "when ready to continue hit [ENTER]:"
read
fi
counter=$(($counter+1))
fi
done
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment