Merge branch 'develop' into zhoubo01

4b6fc21c · Bo Zhou · GitHub · 2f6d1e10 · a1ac2da3 · 4b6fc21c
17 changed file
--- a/.teamcity/Dockerfile
+++ b/.teamcity/Dockerfile
@@ -18,3 +18,11 @@
 FROM parl/parl-test:cuda9.0-cudnn7-v2
 COPY ./requirements.txt /root/
+RUN apt-get install -y libgflags-dev libgoogle-glog-dev libomp-dev unzip
+RUN apt-get install -y libgtest-dev && cd /usr/src/gtest && mkdir build \
+	&& cd build && cmake .. && make  && cp libgtest*.a /usr/local/lib
+RUN wget https://github.com/google/protobuf/releases/download/v2.4.1/protobuf-2.4.1.tar.gz \
+    && tar -zxvf protobuf-2.4.1.tar.gz \
+    && cd protobuf-2.4.1 && ./configure && make && make install 
--- a/.teamcity/build.sh
+++ b/.teamcity/build.sh
@@ -134,6 +134,19 @@ EOF
    rm -rf ${REPO_ROOT}/build
 }
+function run_deepes_test {
+    cd ${REPO_ROOT}/deepes
+    cat <<EOF
+    ========================================
+    Running DeepES test...
+    ========================================
+EOF
+    sh test/run_test.sh
+    rm -rf ${REPO_ROOT}/deepes/build
+    rm -rf ${REPO_ROOT}/deepes/libtorch
+}
 function main() {
    set -e
    local CMD=$1
@@ -158,7 +171,7 @@ function main() {
              echo ========================================
              pip install .
              if [ \( $env == "py27" -o $env == "py36" -o $env == "py37" \) ]
-              then  
+              then
                pip install -r .teamcity/requirements.txt
                run_test_with_cpu $env
                run_test_with_cpu $env "DIS_TESTING_SERIALLY"
@@ -176,6 +189,7 @@ function main() {
          /root/miniconda3/envs/empty_env/bin/pip install .
          run_import_test
          run_docs_test
+          run_deepes_test
          ;;
        *)
          print_usage

--- a/deepes/demo/torch/cartpole_solver_parallel.cc
+++ b/deepes/demo/torch/cartpole_solver_parallel.cc
@@ -63,7 +63,7 @@ int main(int argc, char* argv[]) {
  std::vector<float> noisy_rewards(ITER, 0.0f);
  noisy_info.resize(ITER);
-  for (int epoch = 0; epoch < 1000; ++epoch) {
+  for (int epoch = 0; epoch < 100; ++epoch) {
 #pragma omp parallel for schedule(dynamic, 1)
    for (int i = 0; i < ITER; ++i) {
      auto sampling_agent = sampling_agents[i];

--- a/deepes/include/utils.h
+++ b/deepes/include/utils.h
@@ -27,7 +27,7 @@ namespace DeepES{
  Args:
    reward: an array of rewards
 */
-void compute_centered_ranks(std::vector<float> &reward) ;
+bool compute_centered_ranks(std::vector<float> &reward);
 /* Load a protobuf-based configuration from the file.
 * Args:

--- a/deepes/scripts/build.sh
+++ b/deepes/scripts/build.sh
@@ -23,8 +23,11 @@ elif [ $1 = "torch" ]; then
  #---------------libtorch-------------#
  if [ ! -d "./libtorch" ];then
    echo "Cannot find the torch library: ./libtorch"
-    echo "Please put the torch libraray to current folder according the instruction in README"
+      echo "Downloading Torch library"
-    exit 1
+      wget -q https://download.pytorch.org/libtorch/cpu/libtorch-cxx11-abi-shared-with-deps-1.4.0%2Bcpu.zip
+      unzip -q libtorch-cxx11-abi-shared-with-deps-1.4.0+cpu.zip
+      rm -rf libtorch-cxx11-abi-shared-with-deps-1.4.0+cpu.zip
+      echo "Torch library Downloaded"
  fi
  FLAGS=" -DWITH_TORCH=ON"
 else

--- a/deepes/src/optimizer_factory.cc
+++ b/deepes/src/optimizer_factory.cc
@@ -16,6 +16,7 @@
 namespace DeepES{
 std::shared_ptr<Optimizer> create_optimizer(const OptimizerConfig& optimizer_config) {
  std::shared_ptr<Optimizer> optimizer;
  std::string opt_type = optimizer_config.type();

--- a/deepes/src/utils.cc
+++ b/deepes/src/utils.cc
@@ -17,7 +17,7 @@
 namespace DeepES {
-void compute_centered_ranks(std::vector<float> &reward) {
+bool compute_centered_ranks(std::vector<float> &reward) {
  std::vector<std::pair<float, int>> reward_index;
  float gap = 1.0 / (reward.size() - 1);
  float normlized_rank = -0.5;
@@ -32,6 +32,7 @@ void compute_centered_ranks(std::vector<float> &reward) {
    reward[id] = normlized_rank;
    normlized_rank += gap;
  }
+  return true;
 }
 std::vector<std::string> list_all_model_dirs(std::string path) {

--- a/deepes/test/CMakeLists.txt
+++ b/deepes/test/CMakeLists.txt
+cmake_minimum_required (VERSION 2.6)
+project (DeepES)
+set(TARGET unit_test_main)
+set(CMAKE_CXX_STANDARD 11)
+set(CMAKE_CXX_STANDARD_REQUIRED ON)
+set(CMAKE_CXX_EXTENSIONS OFF)
+find_package(GTest REQUIRED)
+find_package(OpenMP)
+if (OPENMP_FOUND)
+    set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${OpenMP_C_FLAGS}")
+    set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${OpenMP_CXX_FLAGS}")
+    set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} ${OpenMP_EXE_LINKER_FLAGS}")
+endif()
+# Torch lib
+list(APPEND CMAKE_PREFIX_PATH "../libtorch")
+find_package(Torch REQUIRED ON)
+# include and source
+file(GLOB test_src "../test/src/*.cc")
+file(GLOB core_src "../src/*.cc")
+file(GLOB agent_src "../src/torch/*.cc")
+include_directories("../include/torch")
+include_directories("../include")
+include_directories("../benchmark")
+include_directories("../test/include")
+add_executable(${TARGET} "unit_test.cc" ${core_src} ${agent_src} ${test_src})
+target_link_libraries(${TARGET} gflags protobuf pthread glog gtest "${TORCH_LIBRARIES}")
--- a/deepes/test/include/torch_demo_model.h
+++ b/deepes/test/include/torch_demo_model.h
+//   Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#ifndef _TORCH_DEMO_MODEL_H
+#define _TORCH_DEMO_MODEL_H
+#include <torch/torch.h>
+struct Model : public torch::nn::Module{
+  Model() = delete;
+  Model(const int obs_dim, const int act_dim) {
+    _obs_dim = obs_dim;
+    _act_dim = act_dim;
+    int hid1_size = 30;
+    int hid2_size = 15;
+    fc1 = register_module("fc1", torch::nn::Linear(obs_dim, hid1_size));
+    fc2 = register_module("fc2", torch::nn::Linear(hid1_size, hid2_size));
+    fc3 = register_module("fc3", torch::nn::Linear(hid2_size, act_dim));
+  }
+  torch::Tensor forward(torch::Tensor x) {
+    x = x.reshape({-1, _obs_dim});
+    x = torch::tanh(fc1->forward(x));
+    x = torch::tanh(fc2->forward(x));
+    x = torch::tanh(fc3->forward(x));
+    return x;
+  }
+  std::shared_ptr<Model> clone() {
+    std::shared_ptr<Model> model = std::make_shared<Model>(_obs_dim, _act_dim);
+    std::vector<torch::Tensor> parameters1 = parameters();
+    std::vector<torch::Tensor> parameters2 = model->parameters();
+    for (int i = 0; i < parameters1.size(); ++i) {
+      torch::Tensor src = parameters1[i].view({-1});
+      torch::Tensor des = parameters2[i].view({-1});
+      auto src_a = src.accessor<float, 1>();
+      auto des_a = des.accessor<float, 1>();
+      for (int j = 0; j < src.size(0); ++j) {
+        des_a[j] = src_a[j];
+      }
+    }
+    return model;
+  }
+  int _act_dim;
+  int _obs_dim;
+  torch::nn::Linear fc1{nullptr}, fc2{nullptr}, fc3{nullptr};
+};
+#endif
--- a/deepes/test/run_test.sh
+++ b/deepes/test/run_test.sh
+#!/bin/bash
+export LD_LIBRARY_PATH=/usr/local/lib:$LD_LIBRARY_PATH
+#---------------libtorch-------------#
+if [ ! -d "./libtorch" ];then
+echo "Cannot find the torch library: ../libtorch"
+    echo "Downloading Torch library"
+    wget -q https://download.pytorch.org/libtorch/cpu/libtorch-cxx11-abi-shared-with-deps-1.4.0%2Bcpu.zip
+    unzip -q libtorch-cxx11-abi-shared-with-deps-1.4.0+cpu.zip
+    rm -rf libtorch-cxx11-abi-shared-with-deps-1.4.0+cpu.zip
+    echo "Torch library Downloaded"
+fi
+#----------------protobuf-------------#
+cp ./src/proto/deepes.proto ./
+protoc deepes.proto --cpp_out ./
+mv deepes.pb.h ./include
+mv deepes.pb.cc ./src
+#----------------build---------------#
+rm -rf build
+mkdir build
+cd build
+cmake ../test
+make -j10
+#-----------------run----------------#
+./unit_test_main
--- a/deepes/test/src/optimizers_test.cc
+++ b/deepes/test/src/optimizers_test.cc
+//   Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#include "gtest/gtest.h"
+#include <vector>
+#include "optimizer_factory.h"
+#include <memory>
+namespace DeepES {
+TEST(SGDOptimizersTest, Method_update) {
+    std::shared_ptr<DeepESConfig> config = std::make_shared<DeepESConfig>();
+	auto optimizer_config = config->mutable_optimizer();
+	optimizer_config->set_base_lr(1.0);
+	optimizer_config->set_type("sgd");
+	std::shared_ptr<Optimizer> optimizer = create_optimizer(config->optimizer());
+	float sgd_wei[10]  = { 0.0       , 0.0       , 0.04216444, 0.0511456 , 0.04231584, 0.01089015, 0.06569759, 0.00127421,-0.00092832, 0.01128081};
+	float sgd_grad[10] = {-0.11992419,-0.0       , 0.07681337,-0.06616384, 0.00249889, 0.01158612,-0.3067452 , 0.36048946,-0.15820622,-0.20014143};
+	float sgd_new[10]  = { 0.01199242, 0.0       , 0.0344831 , 0.05776198, 0.04206595, 0.00973154, 0.09637211,-0.03477474, 0.014892306, 0.03129495};
+	EXPECT_TRUE(optimizer->update(sgd_wei, sgd_grad, 10, "test"));
+	for (int i = 0; i < 10; ++i) {
+		EXPECT_FLOAT_EQ(sgd_new[i], sgd_wei[i]) << " i: " << i ;
+	}
+	EXPECT_TRUE(optimizer->update(sgd_wei, sgd_grad, 10, "test"));
+	EXPECT_FALSE(optimizer->update(sgd_wei, sgd_grad, 9, "test"));
+}
+TEST(AdamOptimizersTest, Method_update) {
+    std::shared_ptr<DeepESConfig> config = std::make_shared<DeepESConfig>();
+	auto optimizer_config = config->mutable_optimizer();
+	optimizer_config->set_base_lr(1.0);
+	optimizer_config->set_type("adam");
+	std::shared_ptr<Optimizer> optimizer = create_optimizer(config->optimizer());
+	float adam_wei[10]  = { 0.0       , 0.0       , 0.04216444, 0.0511456 , 0.04231584, 0.01089015, 0.06569759, 0.00127421,-0.00092832, 0.01128081};
+	float adam_grad[10] = {-0.11992419,-0.0       , 0.07681337,-0.06616384, 0.00249889, 0.01158612,-0.3067452 , 0.36048946,-0.15820622,-0.20014143};
+	float adam_new[10]  = { 0.99999736, 0.        ,-0.95783144, 1.05114082,-0.95755763,-0.98908256, 1.06569656,-0.99872491, 0.99906968, 1.01127923};
+	EXPECT_TRUE(optimizer->update(adam_wei, adam_grad, 10, "test"));
+	for (int i = 0; i < 10; ++i) {
+		EXPECT_FLOAT_EQ(adam_new[i], adam_wei[i]) << " i: " << i ;
+	}
+	EXPECT_TRUE(optimizer->update(adam_wei, adam_grad, 10, "test"));
+	EXPECT_FALSE(optimizer->update(adam_wei, adam_grad, 9, "test"));
+}
+} // namespace
--- a/deepes/test/src/torch_agent_test.cc
+++ b/deepes/test/src/torch_agent_test.cc
+//   Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#include "gtest/gtest.h"
+#include <torch/torch.h>
+#include <glog/logging.h>
+#include <omp.h>
+#include "gaussian_sampling.h"
+#include "torch_demo_model.h"
+#include "es_agent.h"
+#include <memory>
+#include <vector>
+#include <random>
+#include <math.h>
+namespace DeepES {
+// The fixture for testing class Foo.
+class TorchDemoTest : public ::testing::Test {
+protected:
+    float evaluate(std::vector<float>& x_list, std::vector<float>& y_list, int size, std::shared_ptr<ESAgent<Model>> agent) {
+        float total_loss = 0.0;
+        for (int i = 0; i < size; ++i) {
+            torch::Tensor x_input = torch::tensor(x_list[i], torch::dtype(torch::kFloat32));
+            torch::Tensor predict_y = agent->predict(x_input);
+            auto pred_y = predict_y.accessor<float,2>();
+            float loss = pow((pred_y[0][0] - y_list[i]), 2);
+            total_loss += loss;
+        }
+        return -total_loss / float(size);
+    }
+    float train_loss() {
+        return -1.0 * evaluate(x_list, y_list, train_data_size, agent);
+    }
+    float test_loss() {
+        return -1.0 * evaluate(test_x_list, test_y_list, test_data_size, agent);
+    }
+    float train_test_gap() {
+        float train_lo = train_loss();
+        float test_lo = test_loss();
+        if ( train_lo > test_lo) {
+            return train_lo - test_lo;
+        }
+        else {
+            return test_lo - train_lo;
+        }
+    }
+    void SetUp() override {
+        std::default_random_engine generator(0); // fix seed
+        std::uniform_real_distribution<float> uniform(-3.0, 9.0);
+        std::normal_distribution<float> norm;
+        for (int i = 0; i < train_data_size; ++i) {
+            float x_i = uniform(generator); // generate data between [-3, 9]
+            float y_i = sin(x_i) + norm(generator)*0.05; // noise std 0.05
+            x_list.push_back(x_i);
+            y_list.push_back(y_i);
+        }
+        for (int i= 0; i < test_data_size; ++i) {
+            float x_i = uniform(generator);
+            float y_i = sin(x_i);
+            test_x_list.push_back(x_i);
+            test_y_list.push_back(y_i);
+        }
+        std::shared_ptr<Model>  model = std::make_shared<Model>(1, 1);
+        agent = std::make_shared<ESAgent<Model>>(model, "../test/torch_sin_config.prototxt");
+        // Clone agents to sample (explore).
+        std::vector<std::shared_ptr<ESAgent<Model>>> sampling_agents;
+        for (int i = 0; i < iter; ++i) {
+            sampling_agents.push_back(agent->clone());
+        }
+        std::vector<SamplingKey> noisy_keys;
+        std::vector<float> noisy_rewards(iter, 0.0f);
+        noisy_keys.resize(iter);
+        LOG(INFO) << "start training...";
+        for (int epoch = 0; epoch < 1001; ++epoch) {
+#pragma omp parallel for schedule(dynamic, 1)
+            for (int i = 0; i < iter; ++i) {
+                auto sampling_agent = sampling_agents[i];
+                SamplingKey key;
+                bool success = sampling_agent->add_noise(key);
+                float reward = evaluate(x_list, y_list, train_data_size, sampling_agent);
+                noisy_keys[i] = key;
+                noisy_rewards[i] = reward;
+            }
+            bool success = agent->update(noisy_keys, noisy_rewards);
+            if (epoch % 100 == 0) {
+                float reward = evaluate(test_x_list, test_y_list, test_data_size, agent);
+                float train_reward = evaluate(x_list, y_list, train_data_size, agent);
+                LOG(INFO) << "Epoch:" << epoch << " Loss: " << -reward << ", Train loss" << -train_reward;
+            }
+        }
+    }
+    // Class members declared here can be used by all tests in the test suite
+    int train_data_size = 300;
+    int test_data_size = 100;
+    int iter = 10;
+    std::vector<float> x_list;
+    std::vector<float> y_list;
+    std::vector<float> test_x_list;
+    std::vector<float> test_y_list;
+    std::shared_ptr<ESAgent<Model>> agent;
+};
+TEST_F(TorchDemoTest, TrainingEffectTest) {
+	EXPECT_LT(train_loss(), 0.05);
+	EXPECT_LT(test_loss(), 0.05);
+	EXPECT_LT(train_test_gap(), 0.03);
+}
+} // namespace
--- a/deepes/test/src/utils_test.cc
+++ b/deepes/test/src/utils_test.cc
+//   Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#include "gtest/gtest.h"
+#include <vector>
+#include "utils.h"
+namespace DeepES {
+// Tests that the Utils::compute_centered_rank() method.
+TEST(UtilsTest, Method_compute_centered_ranks) {
+	float a[5] = {9.0, 8.0, 7.0, 6.0, 5.0};
+	std::vector<float> reward_vec(a, a+5);
+	EXPECT_EQ(compute_centered_ranks(reward_vec), true);
+}
+} // namespace
--- a/deepes/test/torch_sin_config.prototxt
+++ b/deepes/test/torch_sin_config.prototxt
+seed : 1024
+gaussian_sampling {
+  std: 0.005
+}
+optimizer {
+  type: "Adam",
+  base_lr: 0.005,
+  momentum: 0.9,
+  beta1: 0.9,
+  beta2: 0.999,
+  epsilon: 1e-8,
+}
--- a/deepes/test/unit_test.cc
+++ b/deepes/test/unit_test.cc
+//   Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#include "gtest/gtest.h"
+int main(int argc, char **argv) {
+  ::testing::InitGoogleTest(&argc, argv);
+  return RUN_ALL_TESTS();
+}
--- a/examples/LiftSim_baseline/A2C/README.md
+++ b/examples/LiftSim_baseline/A2C/README.md
@@ -7,7 +7,7 @@
 ## 依赖库
 + [paddlepaddle>=1.6.1](https://github.com/PaddlePaddle/Paddle)
-+ [parl>=1.2.2](https://github.com/PaddlePaddle/PARL)
+ [parl>=1.2.3](https://github.com/PaddlePaddle/PARL)
 + [rlschool>=0.1.1][rlschool]

--- a/parl/__init__.py
+++ b/parl/__init__.py
@@ -12,7 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
-__version__ = "1.2.2"
+__version__ = "1.2.3"
 """
 generates new PARL python API
 """