Merge branch 'develop' of https://github.com/PaddlePaddle/Paddle into...

Merge branch 'develop' of https://github.com/PaddlePaddle/Paddle into feature/combine_open_files_and_double_buffer

Merge branch 'develop' of https://github.com/PaddlePaddle/Paddle into...
Merge branch 'develop' of https://github.com/PaddlePaddle/Paddle into feature/combine_open_files_and_double_buffer
e9c8d930 · yuyang18 · 10b3cbfb · 16403342 · e9c8d930 · e9c8d930
38 changed file
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -282,7 +282,3 @@ if(WITH_DOC)
    find_python_module(recommonmark REQUIRED)
    add_subdirectory(doc)
 endif()
-if (WITH_CONTRIB)
-    add_subdirectory(paddle/contrib)
-endif()
--- a/cmake/inference_lib.cmake
+++ b/cmake/inference_lib.cmake
@@ -138,25 +138,24 @@ copy(memory_lib
 set(inference_deps paddle_fluid_shared paddle_fluid)
-if(WITH_CONTRIB)
+set(module "inference/api")
-    message(STATUS "installing contrib")
+if (WITH_ANAKIN AND WITH_GPU)
-    set(contrib_dst_dir "${FLUID_INSTALL_DIR}/contrib/inference")
+    copy(anakin_inference_lib DEPS paddle_inference_api inference_anakin_api
-    if (WITH_ANAKIN AND WITH_GPU)
-        copy(contrib_anakin_inference_lib DEPS paddle_inference_api inference_anakin_api
        SRCS
-            ${PADDLE_BINARY_DIR}/paddle/contrib/inference/libinference_anakin_api* # compiled anakin api
+        ${PADDLE_BINARY_DIR}/paddle/fluid/inference/api/libinference_anakin_api* # compiled anakin api
        ${PADDLE_BINARY_DIR}/third_party/install/anakin/*.tar.gz # anakin release
-            DSTS ${contrib_dst_dir}/anakin ${contrib_dst_dir}/anakin)
+        DSTS ${dst_dir}/inference/anakin ${dst_dir}/inference/anakin)
-        list(APPEND inference_deps contrib_anakin_inference_lib)
+     list(APPEND inference_deps anakin_inference_lib)
-   endif()
-  copy(contrib_inference_lib DEPS paddle_inference_api paddle_inference_api_shared
-        SRCS ${PADDLE_SOURCE_DIR}/paddle/contrib/inference/paddle_inference_api.h
-        ${PADDLE_BINARY_DIR}/paddle/contrib/inference/libpaddle_inference_api*
-        DSTS ${contrib_dst_dir} ${contrib_dst_dir})
-  list(APPEND inference_deps contrib_inference_lib)
 endif()
+copy(inference_api_lib DEPS paddle_inference_api paddle_inference_api_shared
+  SRCS ${src_dir}/${module}/paddle_inference_api.h 
+       ${src_dir}/${module}/demo_ci
+       ${PADDLE_BINARY_DIR}/paddle/fluid/inference/api/libpaddle_inference_api*
+  DSTS ${dst_dir}/inference ${dst_dir}/inference ${dst_dir}/inference
+)
+list(APPEND inference_deps inference_api_lib)
 set(module "inference")
 copy(inference_lib DEPS ${inference_deps}
  SRCS ${src_dir}/${module}/*.h ${PADDLE_BINARY_DIR}/paddle/fluid/inference/libpaddle_fluid.*

--- a/doc/v2/api/index_en.rst
+++ b/doc/v2/api/index_en.rst
@@ -4,7 +4,6 @@ API
 ..  toctree::
    :maxdepth: 1
-    overview.rst
    model_configs.rst
    data.rst
    run_logic.rst
--- a/paddle/fluid/API.spec
+++ b/paddle/fluid/API.spec
--- a/paddle/fluid/inference/CMakeLists.txt
+++ b/paddle/fluid/inference/CMakeLists.txt
@@ -5,7 +5,7 @@ if (TENSORRT_FOUND)
  add_subdirectory(tensorrt)
 endif()
-set(FLUID_CORE_MODULES proto_desc memory lod_tensor executor )
+set(FLUID_CORE_MODULES proto_desc memory lod_tensor executor)
 # TODO(panyx0718): Should this be called paddle_fluid_inference_api_internal?
 cc_library(paddle_fluid_api
@@ -38,3 +38,4 @@ if(WITH_TESTING)
  # both tests/book and analysis depends the models that generated by python/paddle/fluid/tests/book
  add_subdirectory(tests/book)
 endif()
+add_subdirectory(api)
--- a/paddle/contrib/inference/CMakeLists.txt
+++ b/paddle/contrib/inference/CMakeLists.txt
@@ -43,21 +43,21 @@ function(inference_api_test TARGET_NAME)
 endfunction(inference_api_test)
 cc_library(paddle_inference_api
-    SRCS paddle_inference_api.cc paddle_inference_api_impl.cc
+    SRCS api.cc api_impl.cc
    DEPS ${FLUID_CORE_MODULES} ${GLOB_OP_LIB})
 if(NOT APPLE)
-  set(LINK_FLAGS "-Wl,--retain-symbols-file ${CMAKE_CURRENT_SOURCE_DIR}/paddle_inference_api.sym")
+  set(LINK_FLAGS "-Wl,--retain-symbols-file ${CMAKE_CURRENT_SOURCE_DIR}/api.sym")
  set_target_properties(paddle_inference_api PROPERTIES LINK_FLAGS "${LINK_FLAGS}")
 endif()
 # Here the shared library doesn't depend on other fluid libraries, or double free will occur.
 cc_library(paddle_inference_api_shared SHARED
-    SRCS paddle_inference_api.cc paddle_inference_api_impl.cc)
+    SRCS api.cc api_impl.cc)
 add_dependencies(paddle_inference_api_shared ${FLUID_CORE_MODULES} ${GLOB_OP_LIB})
 set_target_properties(paddle_inference_api_shared PROPERTIES OUTPUT_NAME paddle_inference_api)
 if(NOT APPLE)
-  set(LINK_FLAGS "-Wl,--version-script ${CMAKE_CURRENT_SOURCE_DIR}/paddle_inference_api.map")
+  set(LINK_FLAGS "-Wl,--version-script ${CMAKE_CURRENT_SOURCE_DIR}/api.map")
  set_target_properties(paddle_inference_api_shared PROPERTIES LINK_FLAGS "${LINK_FLAGS}")
  FILE(WRITE ${CMAKE_CURRENT_BINARY_DIR}/check_symbol.cmake
    "execute_process(COMMAND bash -c \"${CMAKE_CURRENT_SOURCE_DIR}/check_symbol.sh"
@@ -73,32 +73,32 @@ if(NOT APPLE)
 endif()
 cc_test(test_paddle_inference_api
-        SRCS test_paddle_inference_api.cc
+        SRCS test_api.cc
        DEPS paddle_inference_api)
-inference_api_test(test_paddle_inference_api_impl
+inference_api_test(test_api_impl
                    ARGS test_word2vec test_image_classification)
 if(WITH_GPU AND TENSORRT_FOUND)
 cc_library(paddle_inference_tensorrt_subgraph_engine
-        SRCS paddle_inference_api_tensorrt_subgraph_engine.cc
+        SRCS api_tensorrt_subgraph_engine.cc
-        DEPS paddle_inference_api analysis tensorrt_engine paddle_inference_api paddle_fluid_api)
+        DEPS paddle_inference_api analysis tensorrt_engine paddle_fluid_api)
-inference_api_test(test_paddle_inference_api_tensorrt_subgraph_engine ARGS test_word2vec)
+inference_api_test(test_api_tensorrt_subgraph_engine ARGS test_word2vec)
 endif()
 if (WITH_ANAKIN) # only needed in CI
    # Due to Anakin do not have official library releases and the versions of protobuf and cuda do not match Paddle's,
    # so anakin library will not be merged to our official inference library. To use anakin prediction API, one need to
    # compile the libinference_anakin_api.a and compile with anakin.so.
-    nv_library(inference_anakin_api SRCS paddle_inference_api.cc paddle_inference_api_anakin_engine.cc)
+    nv_library(inference_anakin_api SRCS api.cc api_anakin_engine.cc)
-    nv_library(inference_anakin_api_shared SHARED SRCS paddle_inference_api.cc paddle_inference_api_anakin_engine.cc)
+    nv_library(inference_anakin_api_shared SHARED SRCS api.cc api_anakin_engine.cc)
    target_compile_options(inference_anakin_api BEFORE PUBLIC ${ANAKIN_COMPILE_EXTRA_FLAGS})
    target_compile_options(inference_anakin_api_shared BEFORE PUBLIC ${ANAKIN_COMPILE_EXTRA_FLAGS})
    target_link_libraries(inference_anakin_api anakin anakin_saber_common)
    target_link_libraries(inference_anakin_api_shared anakin anakin_saber_common)
    if (WITH_TESTING)
-        cc_test(inference_anakin_test SRCS paddle_inference_api_anakin_engine_tester.cc
+        cc_test(inference_anakin_test SRCS api_anakin_engine_tester.cc
                                  ARGS --model=${ANAKIN_INSTALL_DIR}/mobilenet_v2.anakin.bin
                                  DEPS inference_anakin_api)
        target_compile_options(inference_anakin_test BEFORE PUBLIC ${ANAKIN_COMPILE_EXTRA_FLAGS})

--- a/paddle/contrib/inference/README.md
+++ b/paddle/contrib/inference/README.md
--- a/paddle/contrib/inference/paddle_inference_api.cc
+++ b/paddle/contrib/inference/paddle_inference_api.cc
@@ -12,7 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
-#include "paddle/contrib/inference/paddle_inference_api.h"
+#include "paddle/fluid/inference/api/paddle_inference_api.h"
 namespace paddle {

--- a/paddle/contrib/inference/paddle_inference_api.map
+++ b/paddle/contrib/inference/paddle_inference_api.map
--- a/paddle/contrib/inference/paddle_inference_api.sym
+++ b/paddle/contrib/inference/paddle_inference_api.sym
--- a/paddle/contrib/inference/paddle_inference_api_anakin_engine.cc
+++ b/paddle/contrib/inference/paddle_inference_api_anakin_engine.cc
@@ -12,8 +12,9 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
-#include "paddle/contrib/inference/paddle_inference_api_anakin_engine.h"
+#include "paddle/fluid/inference/api/api_anakin_engine.h"
 #include <cuda.h>
+#include <vector>
 namespace paddle {
@@ -47,8 +48,7 @@ bool PaddleInferenceAnakinPredictor::Run(
    }
    auto d_tensor_in_p = executor_.get_in(input.name);
    float *d_data_p = d_tensor_in_p->mutable_data();
-    if (cudaMemcpy(d_data_p,
+    if (cudaMemcpy(d_data_p, static_cast<float *>(input.data.data()),
-                   static_cast<float *>(input.data.data()),
                   d_tensor_in_p->valid_size() * sizeof(float),
                   cudaMemcpyHostToDevice) != 0) {
      LOG(ERROR) << "copy data from CPU to GPU error";
@@ -70,8 +70,7 @@ bool PaddleInferenceAnakinPredictor::Run(
      output.data.Resize(tensor->valid_size() * sizeof(float));
    }
    // Copy data from GPU -> CPU
-    if (cudaMemcpy(output.data.data(),
+    if (cudaMemcpy(output.data.data(), tensor->mutable_data(),
-                   tensor->mutable_data(),
                   tensor->valid_size() * sizeof(float),
                   cudaMemcpyDeviceToHost) != 0) {
      LOG(ERROR) << "copy data from GPU to CPU error";
@@ -106,13 +105,12 @@ std::unique_ptr<PaddlePredictor> PaddleInferenceAnakinPredictor::Clone() {
 // A factory to help create difference predictor.
 template <>
-std::unique_ptr<PaddlePredictor>
+std::unique_ptr<PaddlePredictor> CreatePaddlePredictor<
-CreatePaddlePredictor<AnakinConfig, PaddleEngineKind::kAnakin>(
+    AnakinConfig, PaddleEngineKind::kAnakin>(const AnakinConfig &config) {
-    const AnakinConfig &config) {
  VLOG(3) << "Anakin Predictor create.";
  std::unique_ptr<PaddlePredictor> x(
      new PaddleInferenceAnakinPredictor(config));
  return x;
-};
+}
 }  // namespace paddle
--- a/paddle/contrib/inference/paddle_inference_api_anakin_engine.h
+++ b/paddle/contrib/inference/paddle_inference_api_anakin_engine.h
@@ -19,7 +19,8 @@ limitations under the License. */
 #pragma once
-#include "paddle/contrib/inference/paddle_inference_api.h"
+#include <vector>
+#include "paddle/fluid/inference/api/paddle_inference_api.h"
 // from anakin
 #include "framework/core/net/net.h"
@@ -31,7 +32,7 @@ class PaddleInferenceAnakinPredictor : public PaddlePredictor {
 public:
  PaddleInferenceAnakinPredictor() {}
-  PaddleInferenceAnakinPredictor(const AnakinConfig& config);
+  explicit PaddleInferenceAnakinPredictor(const AnakinConfig& config);
  // NOTE Unlike the native engine, the buffers of anakin engine's output_data
  // should be allocated first.
@@ -48,8 +49,7 @@ class PaddleInferenceAnakinPredictor : public PaddlePredictor {
 private:
  bool Init(const AnakinConfig& config);
-  anakin::graph::Graph<anakin::NV,
+  anakin::graph::Graph<anakin::NV, anakin::saber::AK_FLOAT,
-                       anakin::saber::AK_FLOAT,
                       anakin::Precision::FP32>
      graph_;
  anakin::Net<anakin::NV, anakin::saber::AK_FLOAT, anakin::Precision::FP32>

--- a/paddle/contrib/inference/paddle_inference_api_anakin_engine_tester.cc
+++ b/paddle/contrib/inference/paddle_inference_api_anakin_engine_tester.cc
@@ -16,7 +16,7 @@ limitations under the License. */
 #include <glog/logging.h>
 #include <gtest/gtest.h>
-#include "paddle/contrib/inference/paddle_inference_api.h"
+#include "paddle/fluid/inference/api/paddle_inference_api.h"
 DEFINE_string(model, "", "Directory of the inference model.");

--- a/paddle/contrib/inference/paddle_inference_api_impl.cc
+++ b/paddle/contrib/inference/paddle_inference_api_impl.cc
@@ -21,7 +21,7 @@ limitations under the License. */
 #include <utility>
 #include <vector>
-#include "paddle/contrib/inference/paddle_inference_api_impl.h"
+#include "paddle/fluid/inference/api/api_impl.h"
 namespace paddle {
 namespace {
@@ -77,8 +77,8 @@ bool NativePaddlePredictor::Init(
  if (!config_.model_dir.empty()) {
    // Parameters are saved in separate files sited in
    // the specified `dirname`.
-    inference_program_ = paddle::inference::Load(
+    inference_program_ = paddle::inference::Load(executor_.get(), scope_.get(),
-        executor_.get(), scope_.get(), config_.model_dir);
+                                                 config_.model_dir);
  } else if (!config_.prog_file.empty() && !config_.param_file.empty()) {
    // All parameters are saved in a single file.
    // The file names should be consistent with that used
@@ -91,8 +91,8 @@ bool NativePaddlePredictor::Init(
  }
  ctx_ = executor_->Prepare(*inference_program_, 0);
-  executor_->CreateVariables(
+  executor_->CreateVariables(*inference_program_,
-      *inference_program_, sub_scope_ ? sub_scope_ : scope_.get(), 0);
+                             sub_scope_ ? sub_scope_ : scope_.get(), 0);
  // Get the feed_target_names and fetch_target_names
  feed_target_names_ = inference_program_->GetFeedTargetNames();
@@ -105,7 +105,7 @@ NativePaddlePredictor::~NativePaddlePredictor() {
    PADDLE_ENFORCE_NOT_NULL(scope_, "Should have parent scope!");
    scope_->DeleteScope(sub_scope_);
  }
-};
+}
 bool NativePaddlePredictor::Run(const std::vector<PaddleTensor> &inputs,
                                std::vector<PaddleTensor> *output_data) {
@@ -134,10 +134,8 @@ bool NativePaddlePredictor::Run(const std::vector<PaddleTensor> &inputs,
  // if share variables, we need not create variables
  VLOG(4) << "Run prepared context";
  executor_->RunPreparedContext(
-      ctx_.get(),
+      ctx_.get(), sub_scope_ != nullptr ? sub_scope_ : scope_.get(),
-      sub_scope_ != nullptr ? sub_scope_ : scope_.get(),
+      &feed_targets, &fetch_targets,
-      &feed_targets,
-      &fetch_targets,
      false /* don't create variable eatch time */);
  VLOG(4) << "Finish prepared context";
  if (!GetFetch(fetchs, output_data)) {
@@ -181,8 +179,7 @@ bool NativePaddlePredictor::SetFeed(const std::vector<PaddleTensor> &inputs,
    }
    // TODO(panyx0718): Init LoDTensor from existing memcpy to save a copy.
-    std::memcpy(static_cast<void *>(input_ptr),
+    std::memcpy(static_cast<void *>(input_ptr), inputs[i].data.data(),
-                inputs[i].data.data(),
                inputs[i].data.length());
    feeds->push_back(input);
  }
@@ -232,8 +229,7 @@ bool NativePaddlePredictor::GetFetch(
        size_t start = lod[0][j - 1] * common_dim;
        size_t end = lod[0][j] * common_dim;
        if (end > start) {
-          std::copy(output_ptr + start,
+          std::copy(output_ptr + start, output_ptr + end,
-                    output_ptr + end,
                    data.begin() + (j - 1) * max_dim * common_dim);
        }
      }
@@ -257,15 +253,13 @@ bool NativePaddlePredictor::GetFetch(
 }
 template <>
-std::unique_ptr<PaddlePredictor>
+std::unique_ptr<PaddlePredictor> CreatePaddlePredictor<
-CreatePaddlePredictor<NativeConfig, PaddleEngineKind::kNative>(
+    NativeConfig, PaddleEngineKind::kNative>(const NativeConfig &config) {
-    const NativeConfig &config) {
  VLOG(3) << "create NativePaddlePredictor";
  if (config.use_gpu) {
    // 1. GPU memeroy
    PADDLE_ENFORCE_GT(
-        config.fraction_of_gpu_memory,
+        config.fraction_of_gpu_memory, 0.f,
-        0.f,
        "fraction_of_gpu_memory in the config should be set to range (0., 1.]");
    PADDLE_ENFORCE_GE(config.device, 0, "Invalid device id %d", config.device);
    std::vector<std::string> flags;

--- a/paddle/contrib/inference/paddle_inference_api_impl.h
+++ b/paddle/contrib/inference/paddle_inference_api_impl.h
@@ -19,7 +19,7 @@
 #include <string>
 #include <vector>
-#include "paddle/contrib/inference/paddle_inference_api.h"
+#include "paddle/fluid/inference/api/paddle_inference_api.h"
 #include "paddle/fluid/framework/ddim.h"
 #include "paddle/fluid/framework/lod_tensor.h"

--- a/paddle/contrib/inference/paddle_inference_api_tensorrt_subgraph_engine.cc
+++ b/paddle/contrib/inference/paddle_inference_api_tensorrt_subgraph_engine.cc
@@ -12,9 +12,9 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
-#include "paddle/contrib/inference/paddle_inference_api.h"
-#include "paddle/contrib/inference/paddle_inference_api_impl.h"
 #include "paddle/fluid/inference/analysis/analyzer.h"
+#include "paddle/fluid/inference/api/api_impl.h"
+#include "paddle/fluid/inference/api/paddle_inference_api.h"
 #include "paddle/fluid/inference/utils/singleton.h"
 namespace paddle {
@@ -77,8 +77,8 @@ class TensorRTSubgraphPredictor : public NativePaddlePredictor {
    ctx_ = executor_->Prepare(*inference_program_, 0);
    VLOG(5) << "to create variables";
-    executor_->CreateVariables(
+    executor_->CreateVariables(*inference_program_,
-        *inference_program_, sub_scope_ ? sub_scope_ : scope_.get(), 0);
+                               sub_scope_ ? sub_scope_ : scope_.get(), 0);
    // Get the feed_target_names and fetch_target_names
    feed_target_names_ = inference_program_->GetFeedTargetNames();
@@ -98,8 +98,7 @@ CreatePaddlePredictor<TensorRTConfig, PaddleEngineKind::kAutoMixedTensorRT>(
  if (config.use_gpu) {
    // 1. GPU memeroy
    PADDLE_ENFORCE_GT(
-        config.fraction_of_gpu_memory,
+        config.fraction_of_gpu_memory, 0.f,
-        0.f,
        "fraction_of_gpu_memory in the config should be set to range (0., 1.]");
    PADDLE_ENFORCE_GE(config.device, 0, "Invalid device id %d", config.device);
    std::vector<std::string> flags;

--- a/paddle/contrib/inference/check_symbol.sh
+++ b/paddle/contrib/inference/check_symbol.sh
--- a/paddle/contrib/inference/demo_ci/.gitignore
+++ b/paddle/contrib/inference/demo_ci/.gitignore
--- a/paddle/contrib/inference/demo_ci/CMakeLists.txt
+++ b/paddle/contrib/inference/demo_ci/CMakeLists.txt
@@ -55,11 +55,11 @@ endif()
 # Note: libpaddle_inference_api.so/a must put before libpaddle_fluid.so/a
 if(WITH_STATIC_LIB)
  set(DEPS
-      ${PADDLE_LIB}/contrib/inference/libpaddle_inference_api.a
+      ${PADDLE_LIB}/paddle/fluid/inference/libpaddle_inference_api.a
      ${PADDLE_LIB}/paddle/fluid/inference/libpaddle_fluid.a)
 else()
  set(DEPS
-      ${PADDLE_LIB}/contrib/inference/libpaddle_inference_api.so
+      ${PADDLE_LIB}/paddle/fluid/inference/libpaddle_inference_api.so
      ${PADDLE_LIB}/paddle/fluid/inference/libpaddle_fluid.so)
 endif()
 set(EXTERNAL_LIB "-lrt -ldl -lpthread")

--- a/paddle/contrib/inference/demo_ci/README.md
+++ b/paddle/contrib/inference/demo_ci/README.md
--- a/paddle/contrib/inference/demo_ci/run.sh
+++ b/paddle/contrib/inference/demo_ci/run.sh
@@ -64,7 +64,7 @@ for WITH_STATIC_LIB in ON OFF; do
    -DWITH_GPU=$TEST_GPU_CPU \
    -DWITH_STATIC_LIB=$WITH_STATIC_LIB
  make -j
-  for use_gpu in false; do
+  for use_gpu in $use_gpu_list; do
    for vis_demo_name in $vis_demo_list; do 
      ./vis_demo \
        --modeldir=../data/$vis_demo_name/model \

--- a/paddle/contrib/inference/demo_ci/simple_on_word2vec.cc
+++ b/paddle/contrib/inference/demo_ci/simple_on_word2vec.cc
@@ -19,8 +19,8 @@ limitations under the License. */
 #include <gflags/gflags.h>
 #include <glog/logging.h>
 #include <memory>
-#include <thread>
+#include <thread>  //NOLINT
-#include "contrib/inference/paddle_inference_api.h"
+#include "paddle/fluid/inference/paddle_inference_api.h"
 #include "paddle/fluid/platform/enforce.h"
 DEFINE_string(dirname, "", "Directory of the inference model.");
@@ -63,8 +63,8 @@ void Main(bool use_gpu) {
    PADDLE_ENFORCE(outputs.size(), 1UL);
    // Check the output buffer size and result of each tid.
    PADDLE_ENFORCE(outputs.front().data.length(), 33168UL);
-    float result[5] = {
+    float result[5] = {0.00129761, 0.00151112, 0.000423564, 0.00108815,
-        0.00129761, 0.00151112, 0.000423564, 0.00108815, 0.000932706};
+                       0.000932706};
    const size_t num_elements = outputs.front().data.length() / sizeof(float);
    // The outputs' buffers are in CPU memory.
    for (size_t i = 0; i < std::min(5UL, num_elements); i++) {
@@ -107,8 +107,8 @@ void MainThreads(int num_threads, bool use_gpu) {
        PADDLE_ENFORCE(outputs.size(), 1UL);
        // Check the output buffer size and result of each tid.
        PADDLE_ENFORCE(outputs.front().data.length(), 33168UL);
-        float result[5] = {
+        float result[5] = {0.00129761, 0.00151112, 0.000423564, 0.00108815,
-            0.00129761, 0.00151112, 0.000423564, 0.00108815, 0.000932706};
+                           0.000932706};
        const size_t num_elements =
            outputs.front().data.length() / sizeof(float);
        // The outputs' buffers are in CPU memory.

--- a/paddle/contrib/inference/demo_ci/utils.h
+++ b/paddle/contrib/inference/demo_ci/utils.h
@@ -13,16 +13,15 @@
 // limitations under the License.
 #pragma once
+#include <algorithm>
 #include <string>
 #include <vector>
+#include "paddle/fluid/inference/paddle_inference_api.h"
-#include "contrib/inference/paddle_inference_api.h"
 namespace paddle {
 namespace demo {
-static void split(const std::string& str,
+static void split(const std::string& str, char sep,
-                  char sep,
                  std::vector<std::string>* pieces) {
  pieces->clear();
  if (str.empty()) {

--- a/paddle/contrib/inference/demo_ci/vis_demo.cc
+++ b/paddle/contrib/inference/demo_ci/vis_demo.cc
@@ -29,8 +29,7 @@ DECLARE_double(fraction_of_gpu_memory_to_use);
 DEFINE_string(modeldir, "", "Directory of the inference model.");
 DEFINE_string(refer, "", "path to reference result for comparison.");
 DEFINE_string(
-    data,
+    data, "",
-    "",
    "path of data; each line is a record, format is "
    "'<space splitted floats as data>\t<space splitted ints as shape'");
 DEFINE_bool(use_gpu, false, "Whether use gpu.");

--- a/paddle/contrib/inference/high_level_api.md
+++ b/paddle/contrib/inference/high_level_api.md
--- a/paddle/contrib/inference/high_level_api_cn.md
+++ b/paddle/contrib/inference/high_level_api_cn.md
--- a/paddle/contrib/inference/paddle_inference_api.h
+++ b/paddle/contrib/inference/paddle_inference_api.h
--- a/paddle/contrib/inference/test_paddle_inference_api.cc
+++ b/paddle/contrib/inference/test_paddle_inference_api.cc
@@ -12,10 +12,9 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
-#include "paddle/contrib/inference/paddle_inference_api.h"
 #include <glog/logging.h>
 #include <gtest/gtest.h>
+#include "paddle/fluid/inference/api/paddle_inference_api.h"
 namespace paddle {

--- a/paddle/contrib/inference/test_paddle_inference_api_impl.cc
+++ b/paddle/contrib/inference/test_paddle_inference_api_impl.cc
@@ -15,10 +15,10 @@ limitations under the License. */
 #include <glog/logging.h>
 #include <gtest/gtest.h>
-#include <thread>
+#include <thread>  // NOLINT
 #include "gflags/gflags.h"
-#include "paddle/contrib/inference/paddle_inference_api_impl.h"
+#include "paddle/fluid/inference/api/api_impl.h"
 #include "paddle/fluid/inference/tests/test_helper.h"
 DEFINE_string(dirname, "", "Directory of the inference model.");
@@ -121,8 +121,8 @@ void MainImageClassification(bool use_gpu) {
  // which should be in the range [0.0, 1.0].
  feed_target_shapes[0][0] = batch_size;
  framework::DDim input_dims = framework::make_ddim(feed_target_shapes[0]);
-  SetupTensor<float>(
+  SetupTensor<float>(&input, input_dims, static_cast<float>(0),
-      &input, input_dims, static_cast<float>(0), static_cast<float>(1));
+                     static_cast<float>(1));
  std::vector<framework::LoDTensor*> cpu_feeds;
  cpu_feeds.push_back(&input);

--- a/paddle/contrib/inference/test_paddle_inference_api_tensorrt_subgraph_engine.cc
+++ b/paddle/contrib/inference/test_paddle_inference_api_tensorrt_subgraph_engine.cc
@@ -15,7 +15,7 @@
 #include <gflags/gflags.h>
 #include <glog/logging.h>
 #include <gtest/gtest.h>
-#include "paddle/contrib/inference/paddle_inference_api.h"
+#include "paddle/fluid/inference/api/paddle_inference_api.h"
 namespace paddle {

--- a/paddle/fluid/operators/im2sequence_op.cc
+++ b/paddle/fluid/operators/im2sequence_op.cc
@@ -33,22 +33,14 @@ class Im2SequenceOp : public framework::OperatorWithKernel {
    PADDLE_ENFORCE_EQ(in_dim.size(), 4,
                      "Input(X) format must be 4D tensor, eg., NCHW.");
-    int batch_size = in_dim[0];
    int img_channels = in_dim[1];
-    int img_height = in_dim[2];
-    int img_width = in_dim[3];
    auto kernels = ctx->Attrs().Get<std::vector<int>>("kernels");
    auto strides = ctx->Attrs().Get<std::vector<int>>("strides");
    auto paddings = ctx->Attrs().Get<std::vector<int>>("paddings");
-    int output_height = Im2SeqOutputSize(img_height, kernels[0], paddings[0],
+    ctx->SetOutputDim("Out",
-                                         paddings[2], strides[0]);
+                      {in_dim[0], img_channels * kernels[0] * kernels[1]});
-    int output_width = Im2SeqOutputSize(img_width, kernels[1], paddings[1],
-                                        paddings[3], strides[1]);
-    ctx->SetOutputDim("Out", {batch_size * output_height * output_width,
-                              img_channels * kernels[0] * kernels[1]});
  }
 };

--- a/paddle/fluid/operators/im2sequence_op.h
+++ b/paddle/fluid/operators/im2sequence_op.h
@@ -109,12 +109,13 @@ class Im2SequenceKernel : public framework::OpKernel<T> {
      }
      out->set_lod(lod);
    } else {
-      out->mutable_data<T>(ctx.GetPlace());
      int output_height = Im2SeqOutputSize(img_height, kernels[0], paddings[0],
                                           paddings[2], strides[0]);
      int output_width = Im2SeqOutputSize(img_width, kernels[1], paddings[1],
                                          paddings[3], strides[1]);
+      out->mutable_data<T>({batch_size * output_height * output_width,
+                            img_channels * kernels[0] * kernels[1]},
+                           ctx.GetPlace());
      const std::vector<int> dilations({1, 1});
      auto out_dims = out->dims();
      out->Resize({batch_size, out->numel() / batch_size});

--- a/paddle/fluid/operators/sum_mkldnn_op.cc
+++ b/paddle/fluid/operators/sum_mkldnn_op.cc
@@ -88,7 +88,7 @@ class SumMKLDNNOpKernel : public paddle::framework::OpKernel<T> {
        input_format = memory::format::nc;
      }
-      for (int i = in_place ? 1 : 0; i < N; i++) {
+      for (int i = 0; i < N; i++) {
        PADDLE_ENFORCE(in_vars[i]->IsType<LoDTensor>(),
                       "all inputs must be all LoDTensors");
        auto& input = in_vars[i]->Get<LoDTensor>();

--- a/paddle/scripts/paddle_build.sh
+++ b/paddle/scripts/paddle_build.sh
@@ -19,6 +19,8 @@
 #                   Utils
 #=================================================
+set -ex
 function print_usage() {
    echo -e "\n${RED}Usage${NONE}:
    ${BOLD}${SCRIPT_NAME}${NONE} [OPTION]"
@@ -37,6 +39,7 @@ function print_usage() {
    ${BLUE}fluid_inference_lib${NONE}: deploy fluid inference library
    ${BLUE}check_style${NONE}: run code style check
    ${BLUE}cicheck${NONE}: run CI tasks
+    ${BLUE}assert_api_not_changed${NONE}: check api compability
    "
 }
@@ -326,11 +329,22 @@ function assert_api_not_changed() {
    virtualenv .env
    source .env/bin/activate
    pip install ${PADDLE_ROOT}/build/python/dist/*whl
-    curl ${PADDLE_API_SPEC_URL:-https://raw.githubusercontent.com/PaddlePaddle/FluidAPISpec/master/API.spec} \
-        > origin.spec
    python ${PADDLE_ROOT}/tools/print_signatures.py paddle.fluid > new.spec
-    python ${PADDLE_ROOT}/tools/diff_api.py origin.spec new.spec
+    python ${PADDLE_ROOT}/tools/diff_api.py ${PADDLE_ROOT}/paddle/fluid/API.spec new.spec
    deactivate
+    API_CHANGE=`git diff --name-only HEAD^ | grep "paddle/fluid/API.spec" || true`
+    echo "checking API.spec change, PR: ${GIT_PR_ID}, changes: ${API_CHANGE}"
+    if [ ${API_CHANGE} ] && [ "${GIT_PR_ID}" != "" ]; then
+        # TODO: curl -H 'Authorization: token ${TOKEN}'
+        APPROVALS=`curl -H "Authorization: token ${GITHUB_API_TOKEN}" https://api.github.com/repos/PaddlePaddle/Paddle/pulls/${GIT_PR_ID}/reviews | \
+        python ${PADDLE_ROOT}/tools/check_pr_approval.py 2 7845005 2887803 728699 13348433`
+        echo "current pr ${GIT_PR_ID} got approvals: ${APPROVALS}"
+        if [ "${APPROVALS}" == "FALSE" ]; then
+            echo "You must have at least 2 approvals for the api change!"
+        exit 1
+        fi
+    fi
 }
@@ -531,13 +545,12 @@ function test_fluid_inference_lib() {
    Testing fluid inference library ...
    ========================================
 EOF
-        cd ${PADDLE_ROOT}/paddle/contrib/inference/demo_ci
+        cd ${PADDLE_ROOT}/paddle/fluid/inference/api/demo_ci
        ./run.sh ${PADDLE_ROOT} ${WITH_MKL:-ON} ${WITH_GPU:-OFF}
      fi
 }
 function main() {
-    set -e
    local CMD=$1
    init
    case $CMD in

--- a/python/paddle/fluid/layers/io.py
+++ b/python/paddle/fluid/layers/io.py
@@ -26,8 +26,7 @@ from ..layer_helper import LayerHelper
 from ..unique_name import generate as unique_name
 __all__ = [
-    'data', 'BlockGuardServ', 'ListenAndServ', 'Send', 'Recv',
+    'data', 'open_recordio_file', 'open_files', 'read_file', 'shuffle', 'batch',
-    'open_recordio_file', 'open_files', 'read_file', 'shuffle', 'batch',
    'double_buffer', 'random_data_generator', 'py_reader', 'Preprocessor',
    'load'
 ]
@@ -908,7 +907,7 @@ class Preprocessor(object):
        self.sink_var_names = None
        self.status = Preprocessor.BEFORE_SUB_BLOCK
-    def is_completed(self):
+    def _is_completed(self):
        return self.sub_block and self.source_var_names and self.sink_var_names
    @contextlib.contextmanager
@@ -918,7 +917,7 @@ class Preprocessor(object):
        yield
        self.main_prog.rollback()
        self.status = Preprocessor.AFTER_SUB_BLOCK
-        if not self.is_completed():
+        if not self._is_completed():
            raise RuntimeError(
                "The definition of preprocessor is incompleted! "
                "Please make sure that you have set input and output "

--- a/python/paddle/fluid/optimizer.py
+++ b/python/paddle/fluid/optimizer.py
@@ -1180,16 +1180,16 @@ class ModelAverage(Optimizer):
                self._add_average_restore_op(block, param_grad)
    def _add_average_apply_op(self, block, param_grad):
-        param = block.clone_variable(param_grad[0])
+        param = block._clone_variable(param_grad[0])
-        grad = block.clone_variable(param_grad[1])
+        grad = block._clone_variable(param_grad[1])
-        sum_1 = block.clone_variable(self._get_accumulator('sum_1', param))
+        sum_1 = block._clone_variable(self._get_accumulator('sum_1', param))
-        sum_2 = block.clone_variable(self._get_accumulator('sum_2', param))
+        sum_2 = block._clone_variable(self._get_accumulator('sum_2', param))
-        sum_3 = block.clone_variable(self._get_accumulator('sum_3', param))
+        sum_3 = block._clone_variable(self._get_accumulator('sum_3', param))
-        num_accumulates = block.clone_variable(
+        num_accumulates = block._clone_variable(
            self._get_accumulator('num_accumulates', param))
-        old_num_accumulates = block.clone_variable(
+        old_num_accumulates = block._clone_variable(
            self._get_accumulator('old_num_accumulates', param))
-        num_updates = block.clone_variable(
+        num_updates = block._clone_variable(
            self._get_accumulator('num_updates', param))
        # backup param value to grad
        layers.assign(input=param, output=grad)
@@ -1203,8 +1203,8 @@ class ModelAverage(Optimizer):
        layers.elementwise_div(x=sum, y=tmp, out=param)
    def _add_average_restore_op(self, block, param_grad):
-        param = block.clone_variable(param_grad[0])
+        param = block._clone_variable(param_grad[0])
-        grad = block.clone_variable(param_grad[1])
+        grad = block._clone_variable(param_grad[1])
        layers.assign(input=grad, output=param)
    def _append_average_accumulate_op(self, param):

--- a/python/paddle/fluid/tests/unittests/test_dist_train.py
+++ b/python/paddle/fluid/tests/unittests/test_dist_train.py
@@ -22,6 +22,9 @@ import numpy
 import paddle.fluid as fluid
 import paddle.fluid.layers as layers
+from paddle.fluid.layers.io import ListenAndServ
+from paddle.fluid.layers.io import Recv
+from paddle.fluid.layers.io import Send
 class TestSendOp(unittest.TestCase):
@@ -65,8 +68,7 @@ class TestSendOp(unittest.TestCase):
        main = fluid.Program()
        with fluid.program_guard(main):
-            serv = layers.ListenAndServ(
+            serv = ListenAndServ("127.0.0.1:0", ["X"], optimizer_mode=False)
-                "127.0.0.1:0", ["X"], optimizer_mode=False)
            with serv.do():
                out_var = main.global_block().create_var(
                    name="scale_0.tmp_0",
@@ -99,8 +101,8 @@ class TestSendOp(unittest.TestCase):
                persistable=False,
                shape=[32, 32])
            fluid.initializer.Constant(value=2.3)(get_var, main.global_block())
-            layers.Send("127.0.0.1:%d" % port, [x])
+            Send("127.0.0.1:%d" % port, [x])
-            o = layers.Recv("127.0.0.1:%d" % port, [get_var])
+            o = Recv("127.0.0.1:%d" % port, [get_var])
        exe = fluid.Executor(place)
        self.dist_out = exe.run(main, fetch_list=o)  # o is a list

--- a/paddle/contrib/CMakeLists.txt
+++ b/paddle/contrib/CMakeLists.txt
-# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
+# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -11,6 +11,39 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-#
-add_subdirectory(inference)
+from __future__ import print_function
+import sys
+import json
+def check_approval(count, required_reviewers):
+    json_buff = ""
+    for line in sys.stdin:
+        json_buff = "".join([json_buff, line])
+    json_resp = json.loads(json_buff)
+    approves = 0
+    approved_user_ids = []
+    for review in json_resp:
+        if review["state"] == "APPROVED":
+            approves += 1
+            approved_user_ids.append(review["user"]["id"])
+    # convert to int
+    required_reviewers_int = set()
+    for rr in required_reviewers:
+        required_reviewers_int.add(int(rr))
+    if len(set(approved_user_ids) & required_reviewers_int) >= count:
+        print("TRUE")
+    else:
+        print("FALSE")
+if __name__ == "__main__":
+    if len(sys.argv) > 1 and sys.argv[1].isdigit():
+        check_approval(int(sys.argv[1]), sys.argv[2:])
+    else:
+        print(
+            "Usage: python check_pr_approval.py [count] [required reviewer id] ..."
+        )