diff --git a/doc/v2/build_and_install/build_from_source_cn.rst b/doc/v2/build_and_install/build_from_source_cn.rst index 115b92a33888abf1e1be400e1abbb58b632a2976..f846928954dd3a05e11054ce2ff2ff839fbefd4b 100644 --- a/doc/v2/build_and_install/build_from_source_cn.rst +++ b/doc/v2/build_and_install/build_from_source_cn.rst @@ -19,8 +19,9 @@ ---------------- PaddlePaddle需要使用Docker环境完成编译,这样可以免去单独安装编译依赖的步骤,可选的不同编译环境Docker镜像 -可以在 `这里 `_ 找到。或者 -参考下述可选步骤,从源码中构建用于编译PaddlePaddle的Docker镜像。 +可以在 `这里 `_ 找到,您也可以 +在 `这里 `_ 找到 paddle_manylinux_devel +镜像的编译以及使用方法。或者参考下述可选步骤,从源码中构建用于编译PaddlePaddle的Docker镜像。 如果您选择不使用Docker镜像,则需要在本机安装下面章节列出的 `编译依赖`_ 之后才能开始编译的步骤。 diff --git a/doc/v2/build_and_install/build_from_source_en.rst b/doc/v2/build_and_install/build_from_source_en.rst index 8fef9e7347e8d924026999bfda985381750c6b51..d1b5b88dff81d4c5cee3dd13a7dccbc333ab6a17 100644 --- a/doc/v2/build_and_install/build_from_source_en.rst +++ b/doc/v2/build_and_install/build_from_source_en.rst @@ -22,6 +22,8 @@ How To Build You need to use Docker to build PaddlePaddle to avoid installing dependencies by yourself. We have several pre-built Docker images `here `_ , +you can also find how to build and use paddle_manylinux_devel Docker image from +`here `_ Or you can build your own image from source as the optional step below: .. code-block:: bash diff --git a/paddle/fluid/inference/analysis/dot.h b/paddle/fluid/inference/analysis/dot.h index 3359987874f2d74d7e4646baa38790431c4b28fd..4bf1840fdda8508b52d7274a338c5b1c95baf354 100644 --- a/paddle/fluid/inference/analysis/dot.h +++ b/paddle/fluid/inference/analysis/dot.h @@ -21,6 +21,7 @@ #include #include +#include #include #include diff --git a/paddle/fluid/inference/engine.h b/paddle/fluid/inference/engine.h index de0375551e16ec53b90414c7446234fda98bf706..ce2b8161715a3fa2278ce950dbac82c6d0042bef 100644 --- a/paddle/fluid/inference/engine.h +++ b/paddle/fluid/inference/engine.h @@ -14,6 +14,7 @@ limitations under the License. */ #pragma once +#include #include "paddle/fluid/framework/framework.pb.h" namespace paddle { @@ -58,8 +59,8 @@ class EngineBase { struct Buffer { void* buffer{nullptr}; // buffer should be allocated only once. - int max_size; // buffer allocated space. - int size; // data size. + size_t max_size; // buffer allocated space. + size_t size; // data size. DeviceType device{DeviceType::UNK}; // tells which device this buffer is on. }; diff --git a/paddle/fluid/inference/tensorrt/CMakeLists.txt b/paddle/fluid/inference/tensorrt/CMakeLists.txt index 677b3e04af8e7f5662a15fb32e3b03f45d262733..b52d083f280e5e7713600a7b748dedd37aca0a1e 100644 --- a/paddle/fluid/inference/tensorrt/CMakeLists.txt +++ b/paddle/fluid/inference/tensorrt/CMakeLists.txt @@ -1,5 +1,4 @@ nv_library(tensorrt_engine SRCS engine.cc DEPS framework_proto) nv_test(test_tensorrt SRCS test_tensorrt.cc DEPS dynload_cuda device_context dynamic_loader) nv_test(test_tensorrt_engine SRCS test_engine.cc DEPS dynload_cuda tensorrt_engine) - add_subdirectory(convert) diff --git a/paddle/fluid/inference/tensorrt/convert/CMakeLists.txt b/paddle/fluid/inference/tensorrt/convert/CMakeLists.txt index 286abf736e8ff8a357482419e85ad1258a6c6acd..4fb4511d99179e4ea14cde66feb13bc9e114581a 100644 --- a/paddle/fluid/inference/tensorrt/convert/CMakeLists.txt +++ b/paddle/fluid/inference/tensorrt/convert/CMakeLists.txt @@ -1,4 +1,4 @@ -nv_test(test_op_converter SRCS test_op_converter.cc mul_op.cc conv2d_op.cc op_converter.h DEPS ${FLUID_CORE_MODULES}) -nv_test(test_trt_activation_op SRCS test_activation_op.cc activation_op.cc +nv_test(test_op_converter SRCS test_op_converter.cc mul_op.cc conv2d_op.cc DEPS ${FLUID_CORE_MODULES}) +nv_test(test_trt_activation_op SRCS test_activation_op.cc activation_op.cc io_converter.cc DEPS ${FLUID_CORE_MODULES} activation_op tensorrt_engine) nv_test(test_io_converter SRCS test_io_converter.cc io_converter.cc DEPS dynload_cuda dynamic_loader lod_tensor) diff --git a/paddle/fluid/inference/tensorrt/convert/io_converter.cc b/paddle/fluid/inference/tensorrt/convert/io_converter.cc index 32e8631fde3f748669d2008b4a060455a37e154e..854f434d93e81237dc85c5df62debcf3b3824b78 100644 --- a/paddle/fluid/inference/tensorrt/convert/io_converter.cc +++ b/paddle/fluid/inference/tensorrt/convert/io_converter.cc @@ -23,26 +23,42 @@ namespace tensorrt { using platform::is_gpu_place; using platform::is_cpu_place; -class DefaultInputConverter : public EngineInputConverter { +class DefaultIOConverter : public EngineIOConverter { public: - DefaultInputConverter() {} + DefaultIOConverter() {} // NOTE out is GPU memory. virtual void operator()(const LoDTensor& in, void* out, size_t max_size) override { PADDLE_ENFORCE(out != nullptr); - PADDLE_ENFORCE_LE(in.memory_size(), max_size); + PADDLE_ENFORCE(stream_ != nullptr); const auto& place = in.place(); + size_t size = in.memory_size(); + PADDLE_ENFORCE_LE(size, max_size); if (is_cpu_place(place)) { - PADDLE_ENFORCE(stream_ != nullptr); - PADDLE_ENFORCE_EQ(0, - cudaMemcpyAsync(out, in.data(), in.memory_size(), - cudaMemcpyHostToDevice, *stream_)); - + PADDLE_ENFORCE_EQ(0, cudaMemcpyAsync(out, in.data(), size, + cudaMemcpyHostToDevice, *stream_)); } else if (is_gpu_place(place)) { - PADDLE_ENFORCE_EQ(0, - cudaMemcpyAsync(out, in.data(), in.memory_size(), - cudaMemcpyHostToHost, *stream_)); - + PADDLE_ENFORCE_EQ(0, cudaMemcpyAsync(out, in.data(), size, + cudaMemcpyDeviceToDevice, *stream_)); + } else { + PADDLE_THROW("Unknown device for converter"); + } + cudaStreamSynchronize(*stream_); + } + // NOTE in is GPU memory. + virtual void operator()(const void* in, LoDTensor* out, + size_t max_size) override { + PADDLE_ENFORCE(in != nullptr); + PADDLE_ENFORCE(stream_ != nullptr); + const auto& place = out->place(); + size_t size = out->memory_size(); + PADDLE_ENFORCE_LE(size, max_size); + if (is_cpu_place(place)) { + PADDLE_ENFORCE_EQ(0, cudaMemcpyAsync(out->data(), in, size, + cudaMemcpyDeviceToHost, *stream_)); + } else if (is_gpu_place(place)) { + PADDLE_ENFORCE_EQ(0, cudaMemcpyAsync(out->data(), in, size, + cudaMemcpyDeviceToDevice, *stream_)); } else { PADDLE_THROW("Unknown device for converter"); } @@ -50,7 +66,8 @@ class DefaultInputConverter : public EngineInputConverter { } }; -REGISTER_TENSORRT_INPUT_CONVERTER(default, DefaultInputConverter); +// fluid LodTensor <-> tensorrt ITensor +REGISTER_TENSORRT_IO_CONVERTER(default, DefaultIOConverter); } // namespace tensorrt } // namespace inference diff --git a/paddle/fluid/inference/tensorrt/convert/io_converter.h b/paddle/fluid/inference/tensorrt/convert/io_converter.h index 8972dae92be2c2d261a13c48d98e675f64e51d31..71c48e085d25d2bc6720d93735f661f9e3af7b40 100644 --- a/paddle/fluid/inference/tensorrt/convert/io_converter.h +++ b/paddle/fluid/inference/tensorrt/convert/io_converter.h @@ -14,6 +14,7 @@ limitations under the License. */ #pragma once +#include #include #include "paddle/fluid/framework/lod_tensor.h" #include "paddle/fluid/inference/utils/singleton.h" @@ -25,43 +26,57 @@ namespace tensorrt { using framework::LoDTensor; /* - * Convert Input from Fluid to an Engine. - * TensorRT's ITensor follows row major, NCHW. Fluid is also row major, so in - * most cases just need to copy the data. + * Convert Input from Fluid to TensorRT Engine. + * Convert Output from TensorRT Engine to Fluid. + * + * Note that TensorRT's ITensor follows row major, NCHW. Fluid is also row + * major, + * so in the default case just need to copy the data. */ -class EngineInputConverter { +class EngineIOConverter { public: - EngineInputConverter() {} + EngineIOConverter() {} virtual void operator()(const LoDTensor& in, void* out, size_t max_size) {} + virtual void operator()(const void* in, LoDTensor* out, size_t max_size) {} void SetStream(cudaStream_t* stream) { stream_ = stream; } - static void Run(const std::string& in_op_type, const LoDTensor& in, void* out, - size_t max_size, cudaStream_t* stream) { + static void ConvertInput(const std::string& op_type, const LoDTensor& in, + void* out, size_t max_size, cudaStream_t* stream) { PADDLE_ENFORCE(stream != nullptr); - auto* converter = Registry::Lookup( - in_op_type, "default" /* default_type */); + auto* converter = Registry::Lookup( + op_type, "default" /* default_type */); PADDLE_ENFORCE_NOT_NULL(converter); converter->SetStream(stream); (*converter)(in, out, max_size); } - virtual ~EngineInputConverter() {} + static void ConvertOutput(const std::string& op_type, const void* in, + LoDTensor* out, size_t max_size, + cudaStream_t* stream) { + PADDLE_ENFORCE(stream != nullptr); + auto* converter = Registry::Lookup( + op_type, "default" /* default_type */); + PADDLE_ENFORCE_NOT_NULL(converter); + converter->SetStream(stream); + (*converter)(in, out, max_size); + } + + virtual ~EngineIOConverter() {} protected: cudaStream_t* stream_{nullptr}; }; +#define REGISTER_TENSORRT_IO_CONVERTER(op_type__, Converter__) \ + struct trt_io_##op_type__##_converter { \ + trt_io_##op_type__##_converter() { \ + Registry::Register(#op_type__); \ + } \ + }; \ + trt_io_##op_type__##_converter trt_io_##op_type__##_converter__; + } // namespace tensorrt } // namespace inference } // namespace paddle - -#define REGISTER_TENSORRT_INPUT_CONVERTER(in_op_type__, Converter__) \ - struct trt_input_##in_op_type__##_converter { \ - trt_input_##in_op_type__##_converter() { \ - ::paddle::inference::Registry::Register< \ - Converter__>(#in_op_type__); \ - } \ - }; \ - trt_input_##in_op_type__##_converter trt_input_##in_op_type__##_converter__; diff --git a/paddle/fluid/inference/tensorrt/convert/test_activation_op.cc b/paddle/fluid/inference/tensorrt/convert/test_activation_op.cc index 669fba1eb81c5caacea039522ea70a2d0523d022..ec33f97c8240dfc09a203d68599bffe78a4abb12 100644 --- a/paddle/fluid/inference/tensorrt/convert/test_activation_op.cc +++ b/paddle/fluid/inference/tensorrt/convert/test_activation_op.cc @@ -16,6 +16,7 @@ limitations under the License. */ #include "paddle/fluid/framework/lod_tensor.h" #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/framework/program_desc.h" +#include "paddle/fluid/inference/tensorrt/convert/io_converter.h" #include "paddle/fluid/inference/tensorrt/convert/op_converter.h" #include "paddle/fluid/platform/device_context.h" #include "paddle/fluid/platform/place.h" @@ -26,7 +27,7 @@ namespace paddle { namespace inference { namespace tensorrt { -void Compare(float input, float expect) { +void Compare(const std::string op_type, float input, float expect) { framework::Scope scope; platform::CUDAPlace place; platform::CUDADeviceContext ctx(place); @@ -35,6 +36,7 @@ void Compare(float input, float expect) { auto x_var = scope.Var("X"); auto x_tensor = x_var->GetMutable(); x_tensor->Resize({1, 1}); + x_tensor->mutable_data(place); std::vector init; init.push_back(input); framework::TensorFromVector(init, ctx, x_tensor); @@ -45,14 +47,15 @@ void Compare(float input, float expect) { out_tensor->mutable_data(place); framework::OpDesc op_desc; - op_desc.SetType("relu"); + op_desc.SetType(op_type); op_desc.SetInput("X", {"X"}); op_desc.SetOutput("Out", {"Out"}); - auto relu_op = framework::OpRegistry::CreateOp(*op_desc.Proto()); + auto op = framework::OpRegistry::CreateOp(*op_desc.Proto()); // run fluid op - relu_op->Run(scope, place); + op->Run(scope, place); + // get fluid output std::vector out1; framework::TensorToVector(*out_tensor, ctx, &out1); @@ -63,21 +66,28 @@ void Compare(float input, float expect) { engine->InitNetwork(); engine->DeclareInput("X", nvinfer1::DataType::kFLOAT, nvinfer1::DimsCHW{1, 1, 1}); - + // convert op OpConverter op_converter; op_converter.ConvertOp(*op_desc.Proto(), engine); engine->DeclareOutput("Out"); engine->FreezeNetwork(); - engine->SetInputFromCPU("X", &input, 1 * sizeof(float)); - // run tensorrt op + // convert LoDTensor to ITensor + size_t size = x_tensor->memory_size(); + EngineIOConverter::ConvertInput(op_type, *x_tensor, + engine->buffer("X").buffer, size, &stream); + // run tensorrt Outp engine->Execute(1); - - float out2; - engine->GetOutputInCPU("Out", &out2, 1 * sizeof(float)); - - ASSERT_EQ(out1[0], out2); + // convert ITensor to LoDTensor + EngineIOConverter::ConvertOutput(op_type, engine->buffer("Out").buffer, + out_tensor, size, &stream); + // get tensorrt output + std::vector out2; + framework::TensorToVector(*out_tensor, ctx, &out2); + + // compare + ASSERT_EQ(out1[0], out2[0]); ASSERT_EQ(out1[0], expect); delete engine; @@ -85,8 +95,8 @@ void Compare(float input, float expect) { } TEST(OpConverter, ConvertRelu) { - Compare(1, 1); // relu(1) = 1 - Compare(-5, 0); // relu(-5) = 0 + Compare("relu", 1, 1); // relu(1) = 1 + Compare("relu", -5, 0); // relu(-5) = 0 } } // namespace tensorrt diff --git a/paddle/fluid/inference/tensorrt/convert/test_io_converter.cc b/paddle/fluid/inference/tensorrt/convert/test_io_converter.cc index afcc516e6b76d58e37ce0e60746704cf3933fac7..8f91309a0a00d5131268f026c319e25ba3cb964a 100644 --- a/paddle/fluid/inference/tensorrt/convert/test_io_converter.cc +++ b/paddle/fluid/inference/tensorrt/convert/test_io_converter.cc @@ -12,40 +12,63 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ +#include #include "paddle/fluid/framework/lod_tensor.h" #include "paddle/fluid/inference/tensorrt/convert/io_converter.h" -#include - namespace paddle { namespace inference { namespace tensorrt { -class EngineInputConverterTester : public ::testing::Test { - public: - void SetUp() override { tensor.Resize({10, 10}); } +void IOConverterTester(const platform::DeviceContext& ctx) { + cudaStream_t stream; + ASSERT_EQ(0, cudaStreamCreate(&stream)); - framework::LoDTensor tensor; -}; + // init fluid in_tensor + framework::LoDTensor in_tensor; + in_tensor.Resize({10, 10}); + auto place = ctx.GetPlace(); + in_tensor.mutable_data(place); + std::vector init; + for (int64_t i = 0; i < 10 * 10; ++i) { + init.push_back(i); + } + framework::TensorFromVector(init, ctx, &in_tensor); -TEST_F(EngineInputConverterTester, DefaultCPU) { + // init tensorrt buffer void* buffer; - tensor.mutable_data(platform::CPUPlace()); - ASSERT_EQ(cudaMalloc(&buffer, tensor.memory_size()), 0); + size_t size = in_tensor.memory_size(); + ASSERT_EQ(cudaMalloc(&buffer, size), 0); - cudaStream_t stream; - EngineInputConverter::Run("test", tensor, buffer, tensor.memory_size(), - &stream); + // convert fluid in_tensor to tensorrt buffer + EngineIOConverter::ConvertInput("test", in_tensor, buffer, size, &stream); + + // convert tensorrt buffer to fluid out_tensor + framework::LoDTensor out_tensor; + out_tensor.Resize({10, 10}); + out_tensor.mutable_data(place); + EngineIOConverter::ConvertOutput("test", buffer, &out_tensor, size, &stream); + + // compare in_tensor and out_tensor + std::vector result; + framework::TensorToVector(out_tensor, ctx, &result); + EXPECT_EQ(init.size(), result.size()); + for (size_t i = 0; i < init.size(); i++) { + EXPECT_EQ(init[i], result[i]); + } + cudaStreamDestroy(stream); } -TEST_F(EngineInputConverterTester, DefaultGPU) { - void* buffer; - tensor.mutable_data(platform::CUDAPlace()); - ASSERT_EQ(cudaMalloc(&buffer, tensor.memory_size()), 0); +TEST(EngineIOConverterTester, DefaultCPU) { + platform::CPUPlace place; + platform::CPUDeviceContext ctx(place); + IOConverterTester(ctx); +} - cudaStream_t stream; - EngineInputConverter::Run("test", tensor, buffer, tensor.memory_size(), - &stream); +TEST(EngineIOConverterTester, DefaultGPU) { + platform::CUDAPlace place; + platform::CUDADeviceContext ctx(place); + IOConverterTester(ctx); } } // namespace tensorrt diff --git a/python/paddle/fluid/tests/book/notest_understand_sentiment.py b/python/paddle/fluid/tests/book/notest_understand_sentiment.py index 241778e303036d068dc0a40e4574a02eb97ad134..792ed7368d646cd9dff9255eb402b6a9b84f69a6 100644 --- a/python/paddle/fluid/tests/book/notest_understand_sentiment.py +++ b/python/paddle/fluid/tests/book/notest_understand_sentiment.py @@ -170,7 +170,7 @@ def train(word_dict, assert save_dirname is None adagrad = fluid.optimizer.Adagrad(learning_rate=0.002) - optimize_ops, params_grads = adagrad.minimize(cost) + adagrad.minimize(cost) train_data = paddle.batch( paddle.reader.shuffle( diff --git a/python/paddle/fluid/tests/book/test_fit_a_line.py b/python/paddle/fluid/tests/book/test_fit_a_line.py index ecb34699af0dc14782601702ab8afedbca7e1bfd..b1a6b524d33cae97c8982ffb8f780b1b07761a09 100644 --- a/python/paddle/fluid/tests/book/test_fit_a_line.py +++ b/python/paddle/fluid/tests/book/test_fit_a_line.py @@ -33,7 +33,7 @@ def train(use_cuda, save_dirname, is_local): avg_cost = fluid.layers.mean(cost) sgd_optimizer = fluid.optimizer.SGD(learning_rate=0.001) - optimize_ops, params_grads = sgd_optimizer.minimize(avg_cost) + sgd_optimizer.minimize(avg_cost) BATCH_SIZE = 20 diff --git a/python/paddle/fluid/tests/book/test_image_classification.py b/python/paddle/fluid/tests/book/test_image_classification.py index dbcdb5766e7d20efdb12da0ea4c6f005d903849b..0f3a4c9242a81a3c1fb90268245715a8e59a207a 100644 --- a/python/paddle/fluid/tests/book/test_image_classification.py +++ b/python/paddle/fluid/tests/book/test_image_classification.py @@ -125,7 +125,7 @@ def train(net_type, use_cuda, save_dirname, is_local): test_program = fluid.default_main_program().clone(for_test=True) optimizer = fluid.optimizer.Adam(learning_rate=0.001) - optimize_ops, params_grads = optimizer.minimize(avg_cost) + optimizer.minimize(avg_cost) BATCH_SIZE = 128 PASS_NUM = 1 diff --git a/python/paddle/fluid/tests/book/test_label_semantic_roles.py b/python/paddle/fluid/tests/book/test_label_semantic_roles.py index 0faba33032d5dfc0b751a5191e7b2ae0c1f172bf..09793760e5504c04ad4b0bfac5c5d7b7047cf85d 100644 --- a/python/paddle/fluid/tests/book/test_label_semantic_roles.py +++ b/python/paddle/fluid/tests/book/test_label_semantic_roles.py @@ -175,7 +175,7 @@ def train(use_cuda, save_dirname=None, is_local=True): decay_steps=100000, decay_rate=0.5, staircase=True)) - optimize_ops, params_grads = sgd_optimizer.minimize(avg_cost) + sgd_optimizer.minimize(avg_cost) # TODO(qiao) # add dependency track and move this config before optimizer diff --git a/python/paddle/fluid/tests/book/test_machine_translation.py b/python/paddle/fluid/tests/book/test_machine_translation.py index 46c6b9c29a265741a99655d5ac29244798f6fec2..e8a75f473f62df528b7f39bf5f9085076e005c25 100644 --- a/python/paddle/fluid/tests/book/test_machine_translation.py +++ b/python/paddle/fluid/tests/book/test_machine_translation.py @@ -185,7 +185,7 @@ def train_main(use_cuda, is_sparse, is_local=True): learning_rate=1e-4, regularization=fluid.regularizer.L2DecayRegularizer( regularization_coeff=0.1)) - optimize_ops, params_grads = optimizer.minimize(avg_cost) + optimizer.minimize(avg_cost) train_data = paddle.batch( paddle.reader.shuffle( diff --git a/python/paddle/fluid/tests/book/test_recognize_digits.py b/python/paddle/fluid/tests/book/test_recognize_digits.py index c115aa4d7d6b514f9207543730e5e76cb0d2040c..578b1162fbd7e3a1b1c0cc934406818f2e07e019 100644 --- a/python/paddle/fluid/tests/book/test_recognize_digits.py +++ b/python/paddle/fluid/tests/book/test_recognize_digits.py @@ -95,7 +95,7 @@ def train(nn_type, test_program = fluid.default_main_program().clone(for_test=True) optimizer = fluid.optimizer.Adam(learning_rate=0.001) - optimize_ops, params_grads = optimizer.minimize(avg_loss) + optimizer.minimize(avg_loss) place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace() diff --git a/python/paddle/fluid/tests/book/test_recommender_system.py b/python/paddle/fluid/tests/book/test_recommender_system.py index d022dedbff805d597b68b5a47f7931f2dd946615..7be924f762ddeb045dda890dbfdcd96a65449553 100644 --- a/python/paddle/fluid/tests/book/test_recommender_system.py +++ b/python/paddle/fluid/tests/book/test_recommender_system.py @@ -160,7 +160,7 @@ def train(use_cuda, save_dirname, is_local=True): test_program = fluid.default_main_program().clone(for_test=True) sgd_optimizer = SGDOptimizer(learning_rate=0.2) - optimize_ops, params_grads = sgd_optimizer.minimize(avg_cost) + sgd_optimizer.minimize(avg_cost) place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace() diff --git a/python/paddle/fluid/tests/book/test_word2vec.py b/python/paddle/fluid/tests/book/test_word2vec.py index 6dec0f6857e86b4b9c1c67af934aa9bfdb1c3df7..30e1a5040cc92b02bbbf90dac97001812ec90134 100644 --- a/python/paddle/fluid/tests/book/test_word2vec.py +++ b/python/paddle/fluid/tests/book/test_word2vec.py @@ -101,7 +101,7 @@ def train(use_cuda, is_sparse, is_parallel, save_dirname, is_local=True): avg_cost = fluid.layers.mean(pd()) sgd_optimizer = fluid.optimizer.SGD(learning_rate=0.001) - optimize_ops, params_grads = sgd_optimizer.minimize(avg_cost) + sgd_optimizer.minimize(avg_cost) train_reader = paddle.batch( paddle.dataset.imikolov.train(word_dict, N), BATCH_SIZE) diff --git a/python/paddle/fluid/tests/unittests/test_dist_transpiler.py b/python/paddle/fluid/tests/unittests/test_dist_transpiler.py new file mode 100644 index 0000000000000000000000000000000000000000..10f8c4f3f0167632bb4a3d454ab026ba73a8f305 --- /dev/null +++ b/python/paddle/fluid/tests/unittests/test_dist_transpiler.py @@ -0,0 +1,113 @@ +# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import unittest + +import paddle.fluid as fluid +import paddle.fluid.core as core +import paddle.fluid.layers as layers +from paddle.fluid.transpiler.distribute_transpiler import delete_ops +import numpy + + +class TestDistTranspiler(unittest.TestCase): + def setUp(self): + self.trainer_id = 0 + self.trainers = 2 + self.pservers = 2 + self.pserver_eps = "127.0.0.1:6174,127.0.0.1:6175" + self.current_pserver_ep = "127.0.0.1:6174" + + def net_conf(self): + x = fluid.layers.data(name='x', shape=[1000], dtype='float32') + + y_predict = fluid.layers.fc(input=x, + size=1000, + act=None, + param_attr=fluid.ParamAttr(name='fc_w')) + + y = fluid.layers.data(name='y', shape=[1], dtype='float32') + + cost = fluid.layers.square_error_cost(input=y_predict, label=y) + avg_cost = fluid.layers.mean(cost) + sgd_optimizer = fluid.optimizer.SGD(learning_rate=0.1) + + optimize_ops, params_grads = sgd_optimizer.minimize(avg_cost) + return optimize_ops, params_grads + + def test_transpiler(self): + trainer = self.get_trainer() + pserver, startup = self.get_pserver(self.current_pserver_ep) + + self.assertEqual([op.type for op in trainer.global_block().ops], + self.get_expect_trainer_ops()) + + self.assertEqual(len(pserver.blocks), 3) + # block0: listen_and_serv + self.assertEqual([op.type for op in pserver.blocks[0].ops], + ["listen_and_serv"]) + # block2: optimize pass + self.assertEqual([op.type for op in pserver.blocks[1].ops], + ["sum", "scale", "sgd"]) + + # confirm startup program + + self.assertEqual([op.type for op in startup.global_block().ops], [ + "fill_constant", "fill_constant", "uniform_random", "uniform_random" + ]) + + # the variable #fc_w will be split into two blocks + fc_w_var = startup.global_block().var("fc_w.block1") + self.assertEqual(fc_w_var.shape, (500, 1000)) + + def get_main_program(self): + main = fluid.Program() + + with fluid.program_guard(main): + self.net_conf() + + return main + + def get_expect_trainer_ops(self): + trainer = fluid.Program() + + with fluid.program_guard(trainer): + optimize_ops, params_grads = self.net_conf() + + delete_ops(trainer.global_block(), optimize_ops) + return [op.type for op in trainer.global_block().ops + ] + ["split_byref", "send", "concat"] + + def get_trainer(self): + return self._transpiler_instance().get_trainer_program() + + def get_pserver(self, ep): + t = self._transpiler_instance() + pserver = t.get_pserver_program(ep) + startup = t.get_startup_program(ep, pserver) + return pserver, startup + + def _transpiler_instance(self): + main = self.get_main_program() + t = fluid.DistributeTranspiler() + t.transpile( + self.trainer_id, + program=main, + pservers=self.pserver_eps, + trainers=self.trainers) + return t + + +if __name__ == "__main__": + unittest.main() diff --git a/python/paddle/fluid/transpiler/__init__.py b/python/paddle/fluid/transpiler/__init__.py index 6d3c1b947f4acb1335b25e6eb0099d5d532c895a..413c36c5c41bbe0169f1c050ccdac040202d66df 100644 --- a/python/paddle/fluid/transpiler/__init__.py +++ b/python/paddle/fluid/transpiler/__init__.py @@ -11,6 +11,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. + from distribute_transpiler import DistributeTranspiler from inference_transpiler import InferenceTranspiler from memory_optimization_transpiler import memory_optimize, release_memory diff --git a/python/paddle/fluid/transpiler/distribute_transpiler.py b/python/paddle/fluid/transpiler/distribute_transpiler.py index b45cb987d896bd189531e97eb62bddbbee16069d..a323f8d03613e7c4149812daab4ccb57fb940a7e 100644 --- a/python/paddle/fluid/transpiler/distribute_transpiler.py +++ b/python/paddle/fluid/transpiler/distribute_transpiler.py @@ -17,7 +17,7 @@ from __future__ import print_function import math import distributed_splitter as splitter -from .. import core +from .. import core, framework from ..framework import Program, default_main_program, \ default_startup_program, \ Variable, Parameter, grad_var_name @@ -417,7 +417,7 @@ class DistributeTranspiler: def __append_optimize_op__(op, block, grad_to_block_id): if self._is_opt_op(op): self._append_pserver_ops(block, op, endpoint, grad_to_block_id, - default_main_program()) + self.origin_program) else: self._append_pserver_non_opt_ops(block, op) diff --git a/tools/manylinux1/README.md b/tools/manylinux1/README.md index 898e00bd37c7b7bcbcb4a56476ff10c87381e47a..0e5905040175047f5b79939d97a3efcf38992944 100644 --- a/tools/manylinux1/README.md +++ b/tools/manylinux1/README.md @@ -28,3 +28,38 @@ git clone https://github.com/paddlepaddle/paddle cd paddle/tools/manylinux1 REPO=[yourrepo] ./build_all.sh ``` + +## Build PaddlePaddle for the different Python ABIs + +Choose one of the following Python ABI and set the correct environment variables. + +- cp27-cp27m + + ```bash + export LD_LIBRARY_PATH=/opt/_internal/cpython-2.7.11-ucs2/lib:${LD_LIBRARY_PATH#/opt/_internal/cpython-2.7.11-ucs4/lib:} + export PATH=/opt/python/cp27-cp27m/bin/:${PATH} + export PYTHON_FLAGS="-DPYTHON_EXECUTABLE:FILEPATH=/opt/python/cp27-cp27m/bin/python + -DPYTHON_INCLUDE_DIR:PATH=/opt/python/cp27-cp27m/include/python2.7 + -DPYTHON_LIBRARIES:FILEPATH=/opt/_internal/cpython-2.7.11-ucs2/lib/libpython2.7.so" + ``` + +- cp27-cp27mu + + ```bash + export LD_LIBRARY_PATH=/opt/_internal/cpython-2.7.11-ucs4/lib:${LD_LIBRARY_PATH#/opt/_internal/cpython-2.7.11-ucs2/lib:} + export PATH=/opt/python/cp27-cp27mu/bin/:${PATH} + export PYTHON_FLAGS="-DPYTHON_EXECUTABLE:FILEPATH=/opt/python/cp27-cp27mu/bin/python + -DPYTHON_INCLUDE_DIR:PATH=/opt/python/cp27-cp27mu/include/python2.7 + -DPYTHON_LIBRARIES:FILEPATH=/opt/_internal/cpython-2.7.11-ucs4/lib/libpython2.7.so" + ``` + +And then add the `PYTHON_FLAGS` as your cmake flags: + +```bash +cmake .. + ${PYTHON_FLAGS} \ + -DWITH_GPU=OFF \ + ... +``` + +You can find more details about cmake flags at [here](http://www.paddlepaddle.org/docs/develop/documentation/fluid/en/build_and_install/build_from_source_en.html#appendix-build-options)