diff --git a/.teamcity/Dockerfile b/.teamcity/Dockerfile index c3d1c209eb04bf7379969a28d0be4ce1bfe10c0d..e16d09d6f56adfc6ba9bd9a3305e3d8210117146 100644 --- a/.teamcity/Dockerfile +++ b/.teamcity/Dockerfile @@ -18,3 +18,11 @@ FROM parl/parl-test:cuda9.0-cudnn7-v2 COPY ./requirements.txt /root/ + +RUN apt-get install -y libgflags-dev libgoogle-glog-dev libomp-dev unzip +RUN apt-get install -y libgtest-dev && cd /usr/src/gtest && mkdir build \ + && cd build && cmake .. && make && cp libgtest*.a /usr/local/lib + +RUN wget https://github.com/google/protobuf/releases/download/v2.4.1/protobuf-2.4.1.tar.gz \ + && tar -zxvf protobuf-2.4.1.tar.gz \ + && cd protobuf-2.4.1 && ./configure && make && make install diff --git a/.teamcity/build.sh b/.teamcity/build.sh index 6a33424797690bcd088381bd8173ae7d881c2dbc..2f6cec0c6976797572bb9f139e0cd42cde3e5d5e 100755 --- a/.teamcity/build.sh +++ b/.teamcity/build.sh @@ -134,6 +134,19 @@ EOF rm -rf ${REPO_ROOT}/build } +function run_deepes_test { + cd ${REPO_ROOT}/deepes + + cat < noisy_rewards(ITER, 0.0f); noisy_info.resize(ITER); - for (int epoch = 0; epoch < 1000; ++epoch) { + for (int epoch = 0; epoch < 100; ++epoch) { #pragma omp parallel for schedule(dynamic, 1) for (int i = 0; i < ITER; ++i) { auto sampling_agent = sampling_agents[i]; diff --git a/deepes/include/utils.h b/deepes/include/utils.h index c5a2e226d5b038e3eda012ae7a081ebccb842e7b..5835a43defd6a4abfeae7a68a5671f3c3239dcfc 100644 --- a/deepes/include/utils.h +++ b/deepes/include/utils.h @@ -27,7 +27,7 @@ namespace DeepES{ Args: reward: an array of rewards */ -void compute_centered_ranks(std::vector &reward) ; +bool compute_centered_ranks(std::vector &reward); /* Load a protobuf-based configuration from the file. * Args: diff --git a/deepes/scripts/build.sh b/deepes/scripts/build.sh index c40052b8de1f4a85935faefbcccf97b62d6d8687..2b5f6dae884b7714cada14485a89f908e56a7b29 100644 --- a/deepes/scripts/build.sh +++ b/deepes/scripts/build.sh @@ -23,8 +23,11 @@ elif [ $1 = "torch" ]; then #---------------libtorch-------------# if [ ! -d "./libtorch" ];then echo "Cannot find the torch library: ./libtorch" - echo "Please put the torch libraray to current folder according the instruction in README" - exit 1 + echo "Downloading Torch library" + wget -q https://download.pytorch.org/libtorch/cpu/libtorch-cxx11-abi-shared-with-deps-1.4.0%2Bcpu.zip + unzip -q libtorch-cxx11-abi-shared-with-deps-1.4.0+cpu.zip + rm -rf libtorch-cxx11-abi-shared-with-deps-1.4.0+cpu.zip + echo "Torch library Downloaded" fi FLAGS=" -DWITH_TORCH=ON" else diff --git a/deepes/src/optimizer_factory.cc b/deepes/src/optimizer_factory.cc index 55dff6bc76ad7fbcac2f2d22ac4adb7ef90a4dad..08419045267cfbcdbad4a8d083f529af2e047db7 100644 --- a/deepes/src/optimizer_factory.cc +++ b/deepes/src/optimizer_factory.cc @@ -16,6 +16,7 @@ namespace DeepES{ + std::shared_ptr create_optimizer(const OptimizerConfig& optimizer_config) { std::shared_ptr optimizer; std::string opt_type = optimizer_config.type(); diff --git a/deepes/src/utils.cc b/deepes/src/utils.cc index 153f5a10dc44857819d7f172c91fce90cfbfc5ab..cd5b055405ceefc41d7f8be007b52e9e4ddd7221 100644 --- a/deepes/src/utils.cc +++ b/deepes/src/utils.cc @@ -17,7 +17,7 @@ namespace DeepES { -void compute_centered_ranks(std::vector &reward) { +bool compute_centered_ranks(std::vector &reward) { std::vector> reward_index; float gap = 1.0 / (reward.size() - 1); float normlized_rank = -0.5; @@ -32,6 +32,7 @@ void compute_centered_ranks(std::vector &reward) { reward[id] = normlized_rank; normlized_rank += gap; } + return true; } std::vector list_all_model_dirs(std::string path) { diff --git a/deepes/test/CMakeLists.txt b/deepes/test/CMakeLists.txt new file mode 100644 index 0000000000000000000000000000000000000000..defcb66f8129bb2f11945fccf143040914e938b6 --- /dev/null +++ b/deepes/test/CMakeLists.txt @@ -0,0 +1,34 @@ +cmake_minimum_required (VERSION 2.6) +project (DeepES) +set(TARGET unit_test_main) + + +set(CMAKE_CXX_STANDARD 11) +set(CMAKE_CXX_STANDARD_REQUIRED ON) +set(CMAKE_CXX_EXTENSIONS OFF) + +find_package(GTest REQUIRED) +find_package(OpenMP) +if (OPENMP_FOUND) + set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${OpenMP_C_FLAGS}") + set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${OpenMP_CXX_FLAGS}") + set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} ${OpenMP_EXE_LINKER_FLAGS}") +endif() + +# Torch lib +list(APPEND CMAKE_PREFIX_PATH "../libtorch") +find_package(Torch REQUIRED ON) + +# include and source +file(GLOB test_src "../test/src/*.cc") +file(GLOB core_src "../src/*.cc") +file(GLOB agent_src "../src/torch/*.cc") + +include_directories("../include/torch") +include_directories("../include") +include_directories("../benchmark") +include_directories("../test/include") + + +add_executable(${TARGET} "unit_test.cc" ${core_src} ${agent_src} ${test_src}) +target_link_libraries(${TARGET} gflags protobuf pthread glog gtest "${TORCH_LIBRARIES}") diff --git a/deepes/test/include/torch_demo_model.h b/deepes/test/include/torch_demo_model.h new file mode 100644 index 0000000000000000000000000000000000000000..709e28dba6908d23f044abd88ff9c7baabbc230e --- /dev/null +++ b/deepes/test/include/torch_demo_model.h @@ -0,0 +1,64 @@ +// Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef _TORCH_DEMO_MODEL_H +#define _TORCH_DEMO_MODEL_H + +#include + +struct Model : public torch::nn::Module{ + + Model() = delete; + + Model(const int obs_dim, const int act_dim) { + + _obs_dim = obs_dim; + _act_dim = act_dim; + int hid1_size = 30; + int hid2_size = 15; + fc1 = register_module("fc1", torch::nn::Linear(obs_dim, hid1_size)); + fc2 = register_module("fc2", torch::nn::Linear(hid1_size, hid2_size)); + fc3 = register_module("fc3", torch::nn::Linear(hid2_size, act_dim)); + } + + torch::Tensor forward(torch::Tensor x) { + x = x.reshape({-1, _obs_dim}); + x = torch::tanh(fc1->forward(x)); + x = torch::tanh(fc2->forward(x)); + x = torch::tanh(fc3->forward(x)); + return x; + } + + std::shared_ptr clone() { + std::shared_ptr model = std::make_shared(_obs_dim, _act_dim); + std::vector parameters1 = parameters(); + std::vector parameters2 = model->parameters(); + for (int i = 0; i < parameters1.size(); ++i) { + torch::Tensor src = parameters1[i].view({-1}); + torch::Tensor des = parameters2[i].view({-1}); + auto src_a = src.accessor(); + auto des_a = des.accessor(); + for (int j = 0; j < src.size(0); ++j) { + des_a[j] = src_a[j]; + } + } + return model; + } + + int _act_dim; + int _obs_dim; + torch::nn::Linear fc1{nullptr}, fc2{nullptr}, fc3{nullptr}; +}; + +#endif diff --git a/deepes/test/run_test.sh b/deepes/test/run_test.sh new file mode 100644 index 0000000000000000000000000000000000000000..eb85170468f7a56a5e930dc54c75f1fc596f91ab --- /dev/null +++ b/deepes/test/run_test.sh @@ -0,0 +1,29 @@ +#!/bin/bash +export LD_LIBRARY_PATH=/usr/local/lib:$LD_LIBRARY_PATH + +#---------------libtorch-------------# +if [ ! -d "./libtorch" ];then +echo "Cannot find the torch library: ../libtorch" + echo "Downloading Torch library" + wget -q https://download.pytorch.org/libtorch/cpu/libtorch-cxx11-abi-shared-with-deps-1.4.0%2Bcpu.zip + unzip -q libtorch-cxx11-abi-shared-with-deps-1.4.0+cpu.zip + rm -rf libtorch-cxx11-abi-shared-with-deps-1.4.0+cpu.zip + echo "Torch library Downloaded" +fi + +#----------------protobuf-------------# +cp ./src/proto/deepes.proto ./ +protoc deepes.proto --cpp_out ./ +mv deepes.pb.h ./include +mv deepes.pb.cc ./src + + +#----------------build---------------# +rm -rf build +mkdir build +cd build +cmake ../test +make -j10 + +#-----------------run----------------# +./unit_test_main diff --git a/deepes/test/src/optimizers_test.cc b/deepes/test/src/optimizers_test.cc new file mode 100644 index 0000000000000000000000000000000000000000..e589f477ac1bf8810e693b57c43814cdf38f161a --- /dev/null +++ b/deepes/test/src/optimizers_test.cc @@ -0,0 +1,60 @@ +// Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "gtest/gtest.h" +#include +#include "optimizer_factory.h" +#include + +namespace DeepES { + + +TEST(SGDOptimizersTest, Method_update) { + std::shared_ptr config = std::make_shared(); + auto optimizer_config = config->mutable_optimizer(); + optimizer_config->set_base_lr(1.0); + optimizer_config->set_type("sgd"); + std::shared_ptr optimizer = create_optimizer(config->optimizer()); + float sgd_wei[10] = { 0.0 , 0.0 , 0.04216444, 0.0511456 , 0.04231584, 0.01089015, 0.06569759, 0.00127421,-0.00092832, 0.01128081}; + float sgd_grad[10] = {-0.11992419,-0.0 , 0.07681337,-0.06616384, 0.00249889, 0.01158612,-0.3067452 , 0.36048946,-0.15820622,-0.20014143}; + float sgd_new[10] = { 0.01199242, 0.0 , 0.0344831 , 0.05776198, 0.04206595, 0.00973154, 0.09637211,-0.03477474, 0.014892306, 0.03129495}; + + EXPECT_TRUE(optimizer->update(sgd_wei, sgd_grad, 10, "test")); + for (int i = 0; i < 10; ++i) { + EXPECT_FLOAT_EQ(sgd_new[i], sgd_wei[i]) << " i: " << i ; + } + EXPECT_TRUE(optimizer->update(sgd_wei, sgd_grad, 10, "test")); + EXPECT_FALSE(optimizer->update(sgd_wei, sgd_grad, 9, "test")); +} + +TEST(AdamOptimizersTest, Method_update) { + std::shared_ptr config = std::make_shared(); + auto optimizer_config = config->mutable_optimizer(); + optimizer_config->set_base_lr(1.0); + optimizer_config->set_type("adam"); + std::shared_ptr optimizer = create_optimizer(config->optimizer()); + float adam_wei[10] = { 0.0 , 0.0 , 0.04216444, 0.0511456 , 0.04231584, 0.01089015, 0.06569759, 0.00127421,-0.00092832, 0.01128081}; + float adam_grad[10] = {-0.11992419,-0.0 , 0.07681337,-0.06616384, 0.00249889, 0.01158612,-0.3067452 , 0.36048946,-0.15820622,-0.20014143}; + float adam_new[10] = { 0.99999736, 0. ,-0.95783144, 1.05114082,-0.95755763,-0.98908256, 1.06569656,-0.99872491, 0.99906968, 1.01127923}; + + EXPECT_TRUE(optimizer->update(adam_wei, adam_grad, 10, "test")); + for (int i = 0; i < 10; ++i) { + EXPECT_FLOAT_EQ(adam_new[i], adam_wei[i]) << " i: " << i ; + } + EXPECT_TRUE(optimizer->update(adam_wei, adam_grad, 10, "test")); + EXPECT_FALSE(optimizer->update(adam_wei, adam_grad, 9, "test")); +} + +} // namespace + diff --git a/deepes/test/src/torch_agent_test.cc b/deepes/test/src/torch_agent_test.cc new file mode 100644 index 0000000000000000000000000000000000000000..cf0b322380677ea0e17d27a0a0c143a88c9a53eb --- /dev/null +++ b/deepes/test/src/torch_agent_test.cc @@ -0,0 +1,136 @@ +// Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "gtest/gtest.h" +#include +#include +#include + +#include "gaussian_sampling.h" +#include "torch_demo_model.h" +#include "es_agent.h" + +#include +#include +#include +#include + +namespace DeepES { + + +// The fixture for testing class Foo. +class TorchDemoTest : public ::testing::Test { +protected: + float evaluate(std::vector& x_list, std::vector& y_list, int size, std::shared_ptr> agent) { + float total_loss = 0.0; + for (int i = 0; i < size; ++i) { + torch::Tensor x_input = torch::tensor(x_list[i], torch::dtype(torch::kFloat32)); + torch::Tensor predict_y = agent->predict(x_input); + auto pred_y = predict_y.accessor(); + float loss = pow((pred_y[0][0] - y_list[i]), 2); + total_loss += loss; + } + return -total_loss / float(size); + } + + float train_loss() { + return -1.0 * evaluate(x_list, y_list, train_data_size, agent); + } + + float test_loss() { + return -1.0 * evaluate(test_x_list, test_y_list, test_data_size, agent); + } + + float train_test_gap() { + float train_lo = train_loss(); + float test_lo = test_loss(); + if ( train_lo > test_lo) { + return train_lo - test_lo; + } + else { + return test_lo - train_lo; + } + } + + void SetUp() override { + std::default_random_engine generator(0); // fix seed + std::uniform_real_distribution uniform(-3.0, 9.0); + std::normal_distribution norm; + for (int i = 0; i < train_data_size; ++i) { + float x_i = uniform(generator); // generate data between [-3, 9] + float y_i = sin(x_i) + norm(generator)*0.05; // noise std 0.05 + x_list.push_back(x_i); + y_list.push_back(y_i); + } + for (int i= 0; i < test_data_size; ++i) { + float x_i = uniform(generator); + float y_i = sin(x_i); + test_x_list.push_back(x_i); + test_y_list.push_back(y_i); + } + + std::shared_ptr model = std::make_shared(1, 1); + agent = std::make_shared>(model, "../test/torch_sin_config.prototxt"); + + // Clone agents to sample (explore). + std::vector>> sampling_agents; + for (int i = 0; i < iter; ++i) { + sampling_agents.push_back(agent->clone()); + } + + std::vector noisy_keys; + std::vector noisy_rewards(iter, 0.0f); + noisy_keys.resize(iter); + + LOG(INFO) << "start training..."; + for (int epoch = 0; epoch < 1001; ++epoch) { +#pragma omp parallel for schedule(dynamic, 1) + for (int i = 0; i < iter; ++i) { + auto sampling_agent = sampling_agents[i]; + SamplingKey key; + bool success = sampling_agent->add_noise(key); + float reward = evaluate(x_list, y_list, train_data_size, sampling_agent); + noisy_keys[i] = key; + noisy_rewards[i] = reward; + } + bool success = agent->update(noisy_keys, noisy_rewards); + + if (epoch % 100 == 0) { + float reward = evaluate(test_x_list, test_y_list, test_data_size, agent); + float train_reward = evaluate(x_list, y_list, train_data_size, agent); + LOG(INFO) << "Epoch:" << epoch << " Loss: " << -reward << ", Train loss" << -train_reward; + } + } + } + + // Class members declared here can be used by all tests in the test suite + int train_data_size = 300; + int test_data_size = 100; + int iter = 10; + std::vector x_list; + std::vector y_list; + std::vector test_x_list; + std::vector test_y_list; + std::shared_ptr> agent; +}; + + +TEST_F(TorchDemoTest, TrainingEffectTest) { + EXPECT_LT(train_loss(), 0.05); + EXPECT_LT(test_loss(), 0.05); + EXPECT_LT(train_test_gap(), 0.03); +} + + +} // namespace diff --git a/deepes/test/src/utils_test.cc b/deepes/test/src/utils_test.cc new file mode 100644 index 0000000000000000000000000000000000000000..e6455dcb818b739372dfacfaae5541826ed99dd3 --- /dev/null +++ b/deepes/test/src/utils_test.cc @@ -0,0 +1,30 @@ +// Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "gtest/gtest.h" +#include +#include "utils.h" + +namespace DeepES { + +// Tests that the Utils::compute_centered_rank() method. +TEST(UtilsTest, Method_compute_centered_ranks) { + float a[5] = {9.0, 8.0, 7.0, 6.0, 5.0}; + std::vector reward_vec(a, a+5); + EXPECT_EQ(compute_centered_ranks(reward_vec), true); +} + + +} // namespace + diff --git a/deepes/test/torch_sin_config.prototxt b/deepes/test/torch_sin_config.prototxt new file mode 100644 index 0000000000000000000000000000000000000000..6f4776bc4b8afae7042762e22ebfc913485a238f --- /dev/null +++ b/deepes/test/torch_sin_config.prototxt @@ -0,0 +1,14 @@ +seed : 1024 + +gaussian_sampling { + std: 0.005 +} + +optimizer { + type: "Adam", + base_lr: 0.005, + momentum: 0.9, + beta1: 0.9, + beta2: 0.999, + epsilon: 1e-8, +} diff --git a/deepes/test/unit_test.cc b/deepes/test/unit_test.cc new file mode 100644 index 0000000000000000000000000000000000000000..3bbc21f4cdfb8e7709173a258f66560a7f7e27a1 --- /dev/null +++ b/deepes/test/unit_test.cc @@ -0,0 +1,20 @@ +// Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "gtest/gtest.h" + +int main(int argc, char **argv) { + ::testing::InitGoogleTest(&argc, argv); + return RUN_ALL_TESTS(); +} diff --git a/examples/LiftSim_baseline/A2C/README.md b/examples/LiftSim_baseline/A2C/README.md index 50ec00ed1d37ba7e53ff259240e0c2b6365f10b2..235e1aff2c956fd1ec60d999e0c34328f949d80c 100644 --- a/examples/LiftSim_baseline/A2C/README.md +++ b/examples/LiftSim_baseline/A2C/README.md @@ -7,7 +7,7 @@ ## 依赖库 + [paddlepaddle>=1.6.1](https://github.com/PaddlePaddle/Paddle) -+ [parl>=1.2.2](https://github.com/PaddlePaddle/PARL) ++ [parl>=1.2.3](https://github.com/PaddlePaddle/PARL) + [rlschool>=0.1.1][rlschool] diff --git a/parl/__init__.py b/parl/__init__.py index 14a7e16f751bd0f0f1ab0ee4aca927ad97b24f2a..02e7a80ab01591c81862624a8ccd9997e4f429ea 100644 --- a/parl/__init__.py +++ b/parl/__init__.py @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -__version__ = "1.2.2" +__version__ = "1.2.3" """ generates new PARL python API """