diff --git a/doc/v2/build_and_install/build_from_source_cn.rst b/doc/v2/build_and_install/build_from_source_cn.rst
index 115b92a33888abf1e1be400e1abbb58b632a2976..f846928954dd3a05e11054ce2ff2ff839fbefd4b 100644
--- a/doc/v2/build_and_install/build_from_source_cn.rst
+++ b/doc/v2/build_and_install/build_from_source_cn.rst
@@ -19,8 +19,9 @@
----------------
PaddlePaddle需要使用Docker环境完成编译,这样可以免去单独安装编译依赖的步骤,可选的不同编译环境Docker镜像
-可以在 `这里 `_ 找到。或者
-参考下述可选步骤,从源码中构建用于编译PaddlePaddle的Docker镜像。
+可以在 `这里 `_ 找到,您也可以
+在 `这里 `_ 找到 paddle_manylinux_devel
+镜像的编译以及使用方法。或者参考下述可选步骤,从源码中构建用于编译PaddlePaddle的Docker镜像。
如果您选择不使用Docker镜像,则需要在本机安装下面章节列出的 `编译依赖`_ 之后才能开始编译的步骤。
diff --git a/doc/v2/build_and_install/build_from_source_en.rst b/doc/v2/build_and_install/build_from_source_en.rst
index 8fef9e7347e8d924026999bfda985381750c6b51..d1b5b88dff81d4c5cee3dd13a7dccbc333ab6a17 100644
--- a/doc/v2/build_and_install/build_from_source_en.rst
+++ b/doc/v2/build_and_install/build_from_source_en.rst
@@ -22,6 +22,8 @@ How To Build
You need to use Docker to build PaddlePaddle
to avoid installing dependencies by yourself. We have several pre-built
Docker images `here `_ ,
+you can also find how to build and use paddle_manylinux_devel Docker image from
+`here `_
Or you can build your own image from source as the optional step below:
.. code-block:: bash
diff --git a/paddle/fluid/inference/analysis/dot.h b/paddle/fluid/inference/analysis/dot.h
index 3359987874f2d74d7e4646baa38790431c4b28fd..4bf1840fdda8508b52d7274a338c5b1c95baf354 100644
--- a/paddle/fluid/inference/analysis/dot.h
+++ b/paddle/fluid/inference/analysis/dot.h
@@ -21,6 +21,7 @@
#include
#include
+#include
#include
#include
diff --git a/paddle/fluid/inference/engine.h b/paddle/fluid/inference/engine.h
index de0375551e16ec53b90414c7446234fda98bf706..ce2b8161715a3fa2278ce950dbac82c6d0042bef 100644
--- a/paddle/fluid/inference/engine.h
+++ b/paddle/fluid/inference/engine.h
@@ -14,6 +14,7 @@ limitations under the License. */
#pragma once
+#include
#include "paddle/fluid/framework/framework.pb.h"
namespace paddle {
@@ -58,8 +59,8 @@ class EngineBase {
struct Buffer {
void* buffer{nullptr}; // buffer should be allocated only once.
- int max_size; // buffer allocated space.
- int size; // data size.
+ size_t max_size; // buffer allocated space.
+ size_t size; // data size.
DeviceType device{DeviceType::UNK}; // tells which device this buffer is on.
};
diff --git a/paddle/fluid/inference/tensorrt/CMakeLists.txt b/paddle/fluid/inference/tensorrt/CMakeLists.txt
index 677b3e04af8e7f5662a15fb32e3b03f45d262733..b52d083f280e5e7713600a7b748dedd37aca0a1e 100644
--- a/paddle/fluid/inference/tensorrt/CMakeLists.txt
+++ b/paddle/fluid/inference/tensorrt/CMakeLists.txt
@@ -1,5 +1,4 @@
nv_library(tensorrt_engine SRCS engine.cc DEPS framework_proto)
nv_test(test_tensorrt SRCS test_tensorrt.cc DEPS dynload_cuda device_context dynamic_loader)
nv_test(test_tensorrt_engine SRCS test_engine.cc DEPS dynload_cuda tensorrt_engine)
-
add_subdirectory(convert)
diff --git a/paddle/fluid/inference/tensorrt/convert/CMakeLists.txt b/paddle/fluid/inference/tensorrt/convert/CMakeLists.txt
index 286abf736e8ff8a357482419e85ad1258a6c6acd..4fb4511d99179e4ea14cde66feb13bc9e114581a 100644
--- a/paddle/fluid/inference/tensorrt/convert/CMakeLists.txt
+++ b/paddle/fluid/inference/tensorrt/convert/CMakeLists.txt
@@ -1,4 +1,4 @@
-nv_test(test_op_converter SRCS test_op_converter.cc mul_op.cc conv2d_op.cc op_converter.h DEPS ${FLUID_CORE_MODULES})
-nv_test(test_trt_activation_op SRCS test_activation_op.cc activation_op.cc
+nv_test(test_op_converter SRCS test_op_converter.cc mul_op.cc conv2d_op.cc DEPS ${FLUID_CORE_MODULES})
+nv_test(test_trt_activation_op SRCS test_activation_op.cc activation_op.cc io_converter.cc
DEPS ${FLUID_CORE_MODULES} activation_op tensorrt_engine)
nv_test(test_io_converter SRCS test_io_converter.cc io_converter.cc DEPS dynload_cuda dynamic_loader lod_tensor)
diff --git a/paddle/fluid/inference/tensorrt/convert/io_converter.cc b/paddle/fluid/inference/tensorrt/convert/io_converter.cc
index 32e8631fde3f748669d2008b4a060455a37e154e..854f434d93e81237dc85c5df62debcf3b3824b78 100644
--- a/paddle/fluid/inference/tensorrt/convert/io_converter.cc
+++ b/paddle/fluid/inference/tensorrt/convert/io_converter.cc
@@ -23,26 +23,42 @@ namespace tensorrt {
using platform::is_gpu_place;
using platform::is_cpu_place;
-class DefaultInputConverter : public EngineInputConverter {
+class DefaultIOConverter : public EngineIOConverter {
public:
- DefaultInputConverter() {}
+ DefaultIOConverter() {}
// NOTE out is GPU memory.
virtual void operator()(const LoDTensor& in, void* out,
size_t max_size) override {
PADDLE_ENFORCE(out != nullptr);
- PADDLE_ENFORCE_LE(in.memory_size(), max_size);
+ PADDLE_ENFORCE(stream_ != nullptr);
const auto& place = in.place();
+ size_t size = in.memory_size();
+ PADDLE_ENFORCE_LE(size, max_size);
if (is_cpu_place(place)) {
- PADDLE_ENFORCE(stream_ != nullptr);
- PADDLE_ENFORCE_EQ(0,
- cudaMemcpyAsync(out, in.data(), in.memory_size(),
- cudaMemcpyHostToDevice, *stream_));
-
+ PADDLE_ENFORCE_EQ(0, cudaMemcpyAsync(out, in.data(), size,
+ cudaMemcpyHostToDevice, *stream_));
} else if (is_gpu_place(place)) {
- PADDLE_ENFORCE_EQ(0,
- cudaMemcpyAsync(out, in.data(), in.memory_size(),
- cudaMemcpyHostToHost, *stream_));
-
+ PADDLE_ENFORCE_EQ(0, cudaMemcpyAsync(out, in.data(), size,
+ cudaMemcpyDeviceToDevice, *stream_));
+ } else {
+ PADDLE_THROW("Unknown device for converter");
+ }
+ cudaStreamSynchronize(*stream_);
+ }
+ // NOTE in is GPU memory.
+ virtual void operator()(const void* in, LoDTensor* out,
+ size_t max_size) override {
+ PADDLE_ENFORCE(in != nullptr);
+ PADDLE_ENFORCE(stream_ != nullptr);
+ const auto& place = out->place();
+ size_t size = out->memory_size();
+ PADDLE_ENFORCE_LE(size, max_size);
+ if (is_cpu_place(place)) {
+ PADDLE_ENFORCE_EQ(0, cudaMemcpyAsync(out->data(), in, size,
+ cudaMemcpyDeviceToHost, *stream_));
+ } else if (is_gpu_place(place)) {
+ PADDLE_ENFORCE_EQ(0, cudaMemcpyAsync(out->data(), in, size,
+ cudaMemcpyDeviceToDevice, *stream_));
} else {
PADDLE_THROW("Unknown device for converter");
}
@@ -50,7 +66,8 @@ class DefaultInputConverter : public EngineInputConverter {
}
};
-REGISTER_TENSORRT_INPUT_CONVERTER(default, DefaultInputConverter);
+// fluid LodTensor <-> tensorrt ITensor
+REGISTER_TENSORRT_IO_CONVERTER(default, DefaultIOConverter);
} // namespace tensorrt
} // namespace inference
diff --git a/paddle/fluid/inference/tensorrt/convert/io_converter.h b/paddle/fluid/inference/tensorrt/convert/io_converter.h
index 8972dae92be2c2d261a13c48d98e675f64e51d31..71c48e085d25d2bc6720d93735f661f9e3af7b40 100644
--- a/paddle/fluid/inference/tensorrt/convert/io_converter.h
+++ b/paddle/fluid/inference/tensorrt/convert/io_converter.h
@@ -14,6 +14,7 @@ limitations under the License. */
#pragma once
+#include
#include
#include "paddle/fluid/framework/lod_tensor.h"
#include "paddle/fluid/inference/utils/singleton.h"
@@ -25,43 +26,57 @@ namespace tensorrt {
using framework::LoDTensor;
/*
- * Convert Input from Fluid to an Engine.
- * TensorRT's ITensor follows row major, NCHW. Fluid is also row major, so in
- * most cases just need to copy the data.
+ * Convert Input from Fluid to TensorRT Engine.
+ * Convert Output from TensorRT Engine to Fluid.
+ *
+ * Note that TensorRT's ITensor follows row major, NCHW. Fluid is also row
+ * major,
+ * so in the default case just need to copy the data.
*/
-class EngineInputConverter {
+class EngineIOConverter {
public:
- EngineInputConverter() {}
+ EngineIOConverter() {}
virtual void operator()(const LoDTensor& in, void* out, size_t max_size) {}
+ virtual void operator()(const void* in, LoDTensor* out, size_t max_size) {}
void SetStream(cudaStream_t* stream) { stream_ = stream; }
- static void Run(const std::string& in_op_type, const LoDTensor& in, void* out,
- size_t max_size, cudaStream_t* stream) {
+ static void ConvertInput(const std::string& op_type, const LoDTensor& in,
+ void* out, size_t max_size, cudaStream_t* stream) {
PADDLE_ENFORCE(stream != nullptr);
- auto* converter = Registry::Lookup(
- in_op_type, "default" /* default_type */);
+ auto* converter = Registry::Lookup(
+ op_type, "default" /* default_type */);
PADDLE_ENFORCE_NOT_NULL(converter);
converter->SetStream(stream);
(*converter)(in, out, max_size);
}
- virtual ~EngineInputConverter() {}
+ static void ConvertOutput(const std::string& op_type, const void* in,
+ LoDTensor* out, size_t max_size,
+ cudaStream_t* stream) {
+ PADDLE_ENFORCE(stream != nullptr);
+ auto* converter = Registry::Lookup(
+ op_type, "default" /* default_type */);
+ PADDLE_ENFORCE_NOT_NULL(converter);
+ converter->SetStream(stream);
+ (*converter)(in, out, max_size);
+ }
+
+ virtual ~EngineIOConverter() {}
protected:
cudaStream_t* stream_{nullptr};
};
+#define REGISTER_TENSORRT_IO_CONVERTER(op_type__, Converter__) \
+ struct trt_io_##op_type__##_converter { \
+ trt_io_##op_type__##_converter() { \
+ Registry::Register(#op_type__); \
+ } \
+ }; \
+ trt_io_##op_type__##_converter trt_io_##op_type__##_converter__;
+
} // namespace tensorrt
} // namespace inference
} // namespace paddle
-
-#define REGISTER_TENSORRT_INPUT_CONVERTER(in_op_type__, Converter__) \
- struct trt_input_##in_op_type__##_converter { \
- trt_input_##in_op_type__##_converter() { \
- ::paddle::inference::Registry::Register< \
- Converter__>(#in_op_type__); \
- } \
- }; \
- trt_input_##in_op_type__##_converter trt_input_##in_op_type__##_converter__;
diff --git a/paddle/fluid/inference/tensorrt/convert/test_activation_op.cc b/paddle/fluid/inference/tensorrt/convert/test_activation_op.cc
index 669fba1eb81c5caacea039522ea70a2d0523d022..ec33f97c8240dfc09a203d68599bffe78a4abb12 100644
--- a/paddle/fluid/inference/tensorrt/convert/test_activation_op.cc
+++ b/paddle/fluid/inference/tensorrt/convert/test_activation_op.cc
@@ -16,6 +16,7 @@ limitations under the License. */
#include "paddle/fluid/framework/lod_tensor.h"
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/framework/program_desc.h"
+#include "paddle/fluid/inference/tensorrt/convert/io_converter.h"
#include "paddle/fluid/inference/tensorrt/convert/op_converter.h"
#include "paddle/fluid/platform/device_context.h"
#include "paddle/fluid/platform/place.h"
@@ -26,7 +27,7 @@ namespace paddle {
namespace inference {
namespace tensorrt {
-void Compare(float input, float expect) {
+void Compare(const std::string op_type, float input, float expect) {
framework::Scope scope;
platform::CUDAPlace place;
platform::CUDADeviceContext ctx(place);
@@ -35,6 +36,7 @@ void Compare(float input, float expect) {
auto x_var = scope.Var("X");
auto x_tensor = x_var->GetMutable();
x_tensor->Resize({1, 1});
+ x_tensor->mutable_data(place);
std::vector init;
init.push_back(input);
framework::TensorFromVector(init, ctx, x_tensor);
@@ -45,14 +47,15 @@ void Compare(float input, float expect) {
out_tensor->mutable_data(place);
framework::OpDesc op_desc;
- op_desc.SetType("relu");
+ op_desc.SetType(op_type);
op_desc.SetInput("X", {"X"});
op_desc.SetOutput("Out", {"Out"});
- auto relu_op = framework::OpRegistry::CreateOp(*op_desc.Proto());
+ auto op = framework::OpRegistry::CreateOp(*op_desc.Proto());
// run fluid op
- relu_op->Run(scope, place);
+ op->Run(scope, place);
+ // get fluid output
std::vector out1;
framework::TensorToVector(*out_tensor, ctx, &out1);
@@ -63,21 +66,28 @@ void Compare(float input, float expect) {
engine->InitNetwork();
engine->DeclareInput("X", nvinfer1::DataType::kFLOAT,
nvinfer1::DimsCHW{1, 1, 1});
-
+ // convert op
OpConverter op_converter;
op_converter.ConvertOp(*op_desc.Proto(), engine);
engine->DeclareOutput("Out");
engine->FreezeNetwork();
- engine->SetInputFromCPU("X", &input, 1 * sizeof(float));
- // run tensorrt op
+ // convert LoDTensor to ITensor
+ size_t size = x_tensor->memory_size();
+ EngineIOConverter::ConvertInput(op_type, *x_tensor,
+ engine->buffer("X").buffer, size, &stream);
+ // run tensorrt Outp
engine->Execute(1);
-
- float out2;
- engine->GetOutputInCPU("Out", &out2, 1 * sizeof(float));
-
- ASSERT_EQ(out1[0], out2);
+ // convert ITensor to LoDTensor
+ EngineIOConverter::ConvertOutput(op_type, engine->buffer("Out").buffer,
+ out_tensor, size, &stream);
+ // get tensorrt output
+ std::vector out2;
+ framework::TensorToVector(*out_tensor, ctx, &out2);
+
+ // compare
+ ASSERT_EQ(out1[0], out2[0]);
ASSERT_EQ(out1[0], expect);
delete engine;
@@ -85,8 +95,8 @@ void Compare(float input, float expect) {
}
TEST(OpConverter, ConvertRelu) {
- Compare(1, 1); // relu(1) = 1
- Compare(-5, 0); // relu(-5) = 0
+ Compare("relu", 1, 1); // relu(1) = 1
+ Compare("relu", -5, 0); // relu(-5) = 0
}
} // namespace tensorrt
diff --git a/paddle/fluid/inference/tensorrt/convert/test_io_converter.cc b/paddle/fluid/inference/tensorrt/convert/test_io_converter.cc
index afcc516e6b76d58e37ce0e60746704cf3933fac7..8f91309a0a00d5131268f026c319e25ba3cb964a 100644
--- a/paddle/fluid/inference/tensorrt/convert/test_io_converter.cc
+++ b/paddle/fluid/inference/tensorrt/convert/test_io_converter.cc
@@ -12,40 +12,63 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
+#include
#include "paddle/fluid/framework/lod_tensor.h"
#include "paddle/fluid/inference/tensorrt/convert/io_converter.h"
-#include
-
namespace paddle {
namespace inference {
namespace tensorrt {
-class EngineInputConverterTester : public ::testing::Test {
- public:
- void SetUp() override { tensor.Resize({10, 10}); }
+void IOConverterTester(const platform::DeviceContext& ctx) {
+ cudaStream_t stream;
+ ASSERT_EQ(0, cudaStreamCreate(&stream));
- framework::LoDTensor tensor;
-};
+ // init fluid in_tensor
+ framework::LoDTensor in_tensor;
+ in_tensor.Resize({10, 10});
+ auto place = ctx.GetPlace();
+ in_tensor.mutable_data(place);
+ std::vector init;
+ for (int64_t i = 0; i < 10 * 10; ++i) {
+ init.push_back(i);
+ }
+ framework::TensorFromVector(init, ctx, &in_tensor);
-TEST_F(EngineInputConverterTester, DefaultCPU) {
+ // init tensorrt buffer
void* buffer;
- tensor.mutable_data(platform::CPUPlace());
- ASSERT_EQ(cudaMalloc(&buffer, tensor.memory_size()), 0);
+ size_t size = in_tensor.memory_size();
+ ASSERT_EQ(cudaMalloc(&buffer, size), 0);
- cudaStream_t stream;
- EngineInputConverter::Run("test", tensor, buffer, tensor.memory_size(),
- &stream);
+ // convert fluid in_tensor to tensorrt buffer
+ EngineIOConverter::ConvertInput("test", in_tensor, buffer, size, &stream);
+
+ // convert tensorrt buffer to fluid out_tensor
+ framework::LoDTensor out_tensor;
+ out_tensor.Resize({10, 10});
+ out_tensor.mutable_data(place);
+ EngineIOConverter::ConvertOutput("test", buffer, &out_tensor, size, &stream);
+
+ // compare in_tensor and out_tensor
+ std::vector result;
+ framework::TensorToVector(out_tensor, ctx, &result);
+ EXPECT_EQ(init.size(), result.size());
+ for (size_t i = 0; i < init.size(); i++) {
+ EXPECT_EQ(init[i], result[i]);
+ }
+ cudaStreamDestroy(stream);
}
-TEST_F(EngineInputConverterTester, DefaultGPU) {
- void* buffer;
- tensor.mutable_data(platform::CUDAPlace());
- ASSERT_EQ(cudaMalloc(&buffer, tensor.memory_size()), 0);
+TEST(EngineIOConverterTester, DefaultCPU) {
+ platform::CPUPlace place;
+ platform::CPUDeviceContext ctx(place);
+ IOConverterTester(ctx);
+}
- cudaStream_t stream;
- EngineInputConverter::Run("test", tensor, buffer, tensor.memory_size(),
- &stream);
+TEST(EngineIOConverterTester, DefaultGPU) {
+ platform::CUDAPlace place;
+ platform::CUDADeviceContext ctx(place);
+ IOConverterTester(ctx);
}
} // namespace tensorrt
diff --git a/python/paddle/fluid/tests/book/notest_understand_sentiment.py b/python/paddle/fluid/tests/book/notest_understand_sentiment.py
index 241778e303036d068dc0a40e4574a02eb97ad134..792ed7368d646cd9dff9255eb402b6a9b84f69a6 100644
--- a/python/paddle/fluid/tests/book/notest_understand_sentiment.py
+++ b/python/paddle/fluid/tests/book/notest_understand_sentiment.py
@@ -170,7 +170,7 @@ def train(word_dict,
assert save_dirname is None
adagrad = fluid.optimizer.Adagrad(learning_rate=0.002)
- optimize_ops, params_grads = adagrad.minimize(cost)
+ adagrad.minimize(cost)
train_data = paddle.batch(
paddle.reader.shuffle(
diff --git a/python/paddle/fluid/tests/book/test_fit_a_line.py b/python/paddle/fluid/tests/book/test_fit_a_line.py
index ecb34699af0dc14782601702ab8afedbca7e1bfd..b1a6b524d33cae97c8982ffb8f780b1b07761a09 100644
--- a/python/paddle/fluid/tests/book/test_fit_a_line.py
+++ b/python/paddle/fluid/tests/book/test_fit_a_line.py
@@ -33,7 +33,7 @@ def train(use_cuda, save_dirname, is_local):
avg_cost = fluid.layers.mean(cost)
sgd_optimizer = fluid.optimizer.SGD(learning_rate=0.001)
- optimize_ops, params_grads = sgd_optimizer.minimize(avg_cost)
+ sgd_optimizer.minimize(avg_cost)
BATCH_SIZE = 20
diff --git a/python/paddle/fluid/tests/book/test_image_classification.py b/python/paddle/fluid/tests/book/test_image_classification.py
index dbcdb5766e7d20efdb12da0ea4c6f005d903849b..0f3a4c9242a81a3c1fb90268245715a8e59a207a 100644
--- a/python/paddle/fluid/tests/book/test_image_classification.py
+++ b/python/paddle/fluid/tests/book/test_image_classification.py
@@ -125,7 +125,7 @@ def train(net_type, use_cuda, save_dirname, is_local):
test_program = fluid.default_main_program().clone(for_test=True)
optimizer = fluid.optimizer.Adam(learning_rate=0.001)
- optimize_ops, params_grads = optimizer.minimize(avg_cost)
+ optimizer.minimize(avg_cost)
BATCH_SIZE = 128
PASS_NUM = 1
diff --git a/python/paddle/fluid/tests/book/test_label_semantic_roles.py b/python/paddle/fluid/tests/book/test_label_semantic_roles.py
index 0faba33032d5dfc0b751a5191e7b2ae0c1f172bf..09793760e5504c04ad4b0bfac5c5d7b7047cf85d 100644
--- a/python/paddle/fluid/tests/book/test_label_semantic_roles.py
+++ b/python/paddle/fluid/tests/book/test_label_semantic_roles.py
@@ -175,7 +175,7 @@ def train(use_cuda, save_dirname=None, is_local=True):
decay_steps=100000,
decay_rate=0.5,
staircase=True))
- optimize_ops, params_grads = sgd_optimizer.minimize(avg_cost)
+ sgd_optimizer.minimize(avg_cost)
# TODO(qiao)
# add dependency track and move this config before optimizer
diff --git a/python/paddle/fluid/tests/book/test_machine_translation.py b/python/paddle/fluid/tests/book/test_machine_translation.py
index 46c6b9c29a265741a99655d5ac29244798f6fec2..e8a75f473f62df528b7f39bf5f9085076e005c25 100644
--- a/python/paddle/fluid/tests/book/test_machine_translation.py
+++ b/python/paddle/fluid/tests/book/test_machine_translation.py
@@ -185,7 +185,7 @@ def train_main(use_cuda, is_sparse, is_local=True):
learning_rate=1e-4,
regularization=fluid.regularizer.L2DecayRegularizer(
regularization_coeff=0.1))
- optimize_ops, params_grads = optimizer.minimize(avg_cost)
+ optimizer.minimize(avg_cost)
train_data = paddle.batch(
paddle.reader.shuffle(
diff --git a/python/paddle/fluid/tests/book/test_recognize_digits.py b/python/paddle/fluid/tests/book/test_recognize_digits.py
index c115aa4d7d6b514f9207543730e5e76cb0d2040c..578b1162fbd7e3a1b1c0cc934406818f2e07e019 100644
--- a/python/paddle/fluid/tests/book/test_recognize_digits.py
+++ b/python/paddle/fluid/tests/book/test_recognize_digits.py
@@ -95,7 +95,7 @@ def train(nn_type,
test_program = fluid.default_main_program().clone(for_test=True)
optimizer = fluid.optimizer.Adam(learning_rate=0.001)
- optimize_ops, params_grads = optimizer.minimize(avg_loss)
+ optimizer.minimize(avg_loss)
place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace()
diff --git a/python/paddle/fluid/tests/book/test_recommender_system.py b/python/paddle/fluid/tests/book/test_recommender_system.py
index d022dedbff805d597b68b5a47f7931f2dd946615..7be924f762ddeb045dda890dbfdcd96a65449553 100644
--- a/python/paddle/fluid/tests/book/test_recommender_system.py
+++ b/python/paddle/fluid/tests/book/test_recommender_system.py
@@ -160,7 +160,7 @@ def train(use_cuda, save_dirname, is_local=True):
test_program = fluid.default_main_program().clone(for_test=True)
sgd_optimizer = SGDOptimizer(learning_rate=0.2)
- optimize_ops, params_grads = sgd_optimizer.minimize(avg_cost)
+ sgd_optimizer.minimize(avg_cost)
place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace()
diff --git a/python/paddle/fluid/tests/book/test_word2vec.py b/python/paddle/fluid/tests/book/test_word2vec.py
index 6dec0f6857e86b4b9c1c67af934aa9bfdb1c3df7..30e1a5040cc92b02bbbf90dac97001812ec90134 100644
--- a/python/paddle/fluid/tests/book/test_word2vec.py
+++ b/python/paddle/fluid/tests/book/test_word2vec.py
@@ -101,7 +101,7 @@ def train(use_cuda, is_sparse, is_parallel, save_dirname, is_local=True):
avg_cost = fluid.layers.mean(pd())
sgd_optimizer = fluid.optimizer.SGD(learning_rate=0.001)
- optimize_ops, params_grads = sgd_optimizer.minimize(avg_cost)
+ sgd_optimizer.minimize(avg_cost)
train_reader = paddle.batch(
paddle.dataset.imikolov.train(word_dict, N), BATCH_SIZE)
diff --git a/python/paddle/fluid/tests/unittests/test_dist_transpiler.py b/python/paddle/fluid/tests/unittests/test_dist_transpiler.py
new file mode 100644
index 0000000000000000000000000000000000000000..10f8c4f3f0167632bb4a3d454ab026ba73a8f305
--- /dev/null
+++ b/python/paddle/fluid/tests/unittests/test_dist_transpiler.py
@@ -0,0 +1,113 @@
+# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import unittest
+
+import paddle.fluid as fluid
+import paddle.fluid.core as core
+import paddle.fluid.layers as layers
+from paddle.fluid.transpiler.distribute_transpiler import delete_ops
+import numpy
+
+
+class TestDistTranspiler(unittest.TestCase):
+ def setUp(self):
+ self.trainer_id = 0
+ self.trainers = 2
+ self.pservers = 2
+ self.pserver_eps = "127.0.0.1:6174,127.0.0.1:6175"
+ self.current_pserver_ep = "127.0.0.1:6174"
+
+ def net_conf(self):
+ x = fluid.layers.data(name='x', shape=[1000], dtype='float32')
+
+ y_predict = fluid.layers.fc(input=x,
+ size=1000,
+ act=None,
+ param_attr=fluid.ParamAttr(name='fc_w'))
+
+ y = fluid.layers.data(name='y', shape=[1], dtype='float32')
+
+ cost = fluid.layers.square_error_cost(input=y_predict, label=y)
+ avg_cost = fluid.layers.mean(cost)
+ sgd_optimizer = fluid.optimizer.SGD(learning_rate=0.1)
+
+ optimize_ops, params_grads = sgd_optimizer.minimize(avg_cost)
+ return optimize_ops, params_grads
+
+ def test_transpiler(self):
+ trainer = self.get_trainer()
+ pserver, startup = self.get_pserver(self.current_pserver_ep)
+
+ self.assertEqual([op.type for op in trainer.global_block().ops],
+ self.get_expect_trainer_ops())
+
+ self.assertEqual(len(pserver.blocks), 3)
+ # block0: listen_and_serv
+ self.assertEqual([op.type for op in pserver.blocks[0].ops],
+ ["listen_and_serv"])
+ # block2: optimize pass
+ self.assertEqual([op.type for op in pserver.blocks[1].ops],
+ ["sum", "scale", "sgd"])
+
+ # confirm startup program
+
+ self.assertEqual([op.type for op in startup.global_block().ops], [
+ "fill_constant", "fill_constant", "uniform_random", "uniform_random"
+ ])
+
+ # the variable #fc_w will be split into two blocks
+ fc_w_var = startup.global_block().var("fc_w.block1")
+ self.assertEqual(fc_w_var.shape, (500, 1000))
+
+ def get_main_program(self):
+ main = fluid.Program()
+
+ with fluid.program_guard(main):
+ self.net_conf()
+
+ return main
+
+ def get_expect_trainer_ops(self):
+ trainer = fluid.Program()
+
+ with fluid.program_guard(trainer):
+ optimize_ops, params_grads = self.net_conf()
+
+ delete_ops(trainer.global_block(), optimize_ops)
+ return [op.type for op in trainer.global_block().ops
+ ] + ["split_byref", "send", "concat"]
+
+ def get_trainer(self):
+ return self._transpiler_instance().get_trainer_program()
+
+ def get_pserver(self, ep):
+ t = self._transpiler_instance()
+ pserver = t.get_pserver_program(ep)
+ startup = t.get_startup_program(ep, pserver)
+ return pserver, startup
+
+ def _transpiler_instance(self):
+ main = self.get_main_program()
+ t = fluid.DistributeTranspiler()
+ t.transpile(
+ self.trainer_id,
+ program=main,
+ pservers=self.pserver_eps,
+ trainers=self.trainers)
+ return t
+
+
+if __name__ == "__main__":
+ unittest.main()
diff --git a/python/paddle/fluid/transpiler/__init__.py b/python/paddle/fluid/transpiler/__init__.py
index 6d3c1b947f4acb1335b25e6eb0099d5d532c895a..413c36c5c41bbe0169f1c050ccdac040202d66df 100644
--- a/python/paddle/fluid/transpiler/__init__.py
+++ b/python/paddle/fluid/transpiler/__init__.py
@@ -11,6 +11,7 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
+
from distribute_transpiler import DistributeTranspiler
from inference_transpiler import InferenceTranspiler
from memory_optimization_transpiler import memory_optimize, release_memory
diff --git a/python/paddle/fluid/transpiler/distribute_transpiler.py b/python/paddle/fluid/transpiler/distribute_transpiler.py
index b45cb987d896bd189531e97eb62bddbbee16069d..a323f8d03613e7c4149812daab4ccb57fb940a7e 100644
--- a/python/paddle/fluid/transpiler/distribute_transpiler.py
+++ b/python/paddle/fluid/transpiler/distribute_transpiler.py
@@ -17,7 +17,7 @@ from __future__ import print_function
import math
import distributed_splitter as splitter
-from .. import core
+from .. import core, framework
from ..framework import Program, default_main_program, \
default_startup_program, \
Variable, Parameter, grad_var_name
@@ -417,7 +417,7 @@ class DistributeTranspiler:
def __append_optimize_op__(op, block, grad_to_block_id):
if self._is_opt_op(op):
self._append_pserver_ops(block, op, endpoint, grad_to_block_id,
- default_main_program())
+ self.origin_program)
else:
self._append_pserver_non_opt_ops(block, op)
diff --git a/tools/manylinux1/README.md b/tools/manylinux1/README.md
index 898e00bd37c7b7bcbcb4a56476ff10c87381e47a..0e5905040175047f5b79939d97a3efcf38992944 100644
--- a/tools/manylinux1/README.md
+++ b/tools/manylinux1/README.md
@@ -28,3 +28,38 @@ git clone https://github.com/paddlepaddle/paddle
cd paddle/tools/manylinux1
REPO=[yourrepo] ./build_all.sh
```
+
+## Build PaddlePaddle for the different Python ABIs
+
+Choose one of the following Python ABI and set the correct environment variables.
+
+- cp27-cp27m
+
+ ```bash
+ export LD_LIBRARY_PATH=/opt/_internal/cpython-2.7.11-ucs2/lib:${LD_LIBRARY_PATH#/opt/_internal/cpython-2.7.11-ucs4/lib:}
+ export PATH=/opt/python/cp27-cp27m/bin/:${PATH}
+ export PYTHON_FLAGS="-DPYTHON_EXECUTABLE:FILEPATH=/opt/python/cp27-cp27m/bin/python
+ -DPYTHON_INCLUDE_DIR:PATH=/opt/python/cp27-cp27m/include/python2.7
+ -DPYTHON_LIBRARIES:FILEPATH=/opt/_internal/cpython-2.7.11-ucs2/lib/libpython2.7.so"
+ ```
+
+- cp27-cp27mu
+
+ ```bash
+ export LD_LIBRARY_PATH=/opt/_internal/cpython-2.7.11-ucs4/lib:${LD_LIBRARY_PATH#/opt/_internal/cpython-2.7.11-ucs2/lib:}
+ export PATH=/opt/python/cp27-cp27mu/bin/:${PATH}
+ export PYTHON_FLAGS="-DPYTHON_EXECUTABLE:FILEPATH=/opt/python/cp27-cp27mu/bin/python
+ -DPYTHON_INCLUDE_DIR:PATH=/opt/python/cp27-cp27mu/include/python2.7
+ -DPYTHON_LIBRARIES:FILEPATH=/opt/_internal/cpython-2.7.11-ucs4/lib/libpython2.7.so"
+ ```
+
+And then add the `PYTHON_FLAGS` as your cmake flags:
+
+```bash
+cmake ..
+ ${PYTHON_FLAGS} \
+ -DWITH_GPU=OFF \
+ ...
+```
+
+You can find more details about cmake flags at [here](http://www.paddlepaddle.org/docs/develop/documentation/fluid/en/build_and_install/build_from_source_en.html#appendix-build-options)