diff --git a/Dockerfile b/Dockerfile index fc5069a6c080ed23317695e6822c4c46b5b5c7f9..48c750358cfcb227667c429f19befcaa2f51ebbd 100644 --- a/Dockerfile +++ b/Dockerfile @@ -23,7 +23,7 @@ ENV HOME /root COPY ./paddle/scripts/docker/root/ /root/ RUN apt-get update && \ - apt-get install -y --allow-downgrades \ + apt-get install -y --allow-downgrades patchelf \ git python-pip python-dev python-opencv openssh-server bison \ libnccl2=2.1.2-1+cuda8.0 libnccl-dev=2.1.2-1+cuda8.0 \ wget unzip unrar tar xz-utils bzip2 gzip coreutils ntp \ diff --git a/doc/fluid/api/transpiler.rst b/doc/fluid/api/transpiler.rst index 943d39331d26c05764c90cb24f6774997c976bfe..d2ac04f1449c32cb414cea1b76d7469bbe9ccb85 100644 --- a/doc/fluid/api/transpiler.rst +++ b/doc/fluid/api/transpiler.rst @@ -14,6 +14,15 @@ DistributeTranspiler :members: :noindex: +.. _api_fluid_transpiler_InferenceTranspiler: + +InferenceTranspiler +------------------- + +.. autoclass:: paddle.fluid.transpiler.InferenceTranspiler + :members: + :noindex: + .. _api_fluid_transpiler_memory_optimize: memory_optimize diff --git a/paddle/fluid/framework/details/build_strategy.h b/paddle/fluid/framework/details/build_strategy.h index 9c2c845c6efb206fb1ad5150189430b9a6fe9ea3..b2e5399e2376a86c1cd310b29c768832665af87f 100644 --- a/paddle/fluid/framework/details/build_strategy.h +++ b/paddle/fluid/framework/details/build_strategy.h @@ -34,7 +34,7 @@ struct BuildStrategy { std::string debug_graphviz_path_{""}; - bool enable_data_balance_{true}; + bool enable_data_balance_{false}; }; } // namespace details diff --git a/paddle/fluid/framework/details/data_balance_op_handle.cc b/paddle/fluid/framework/details/data_balance_op_handle.cc index d07235df5856591f8ad707c86fa5b3b65868c3d1..68896c8ac1bae7d4bfcfa79cc8ec5c26bf2d93ee 100644 --- a/paddle/fluid/framework/details/data_balance_op_handle.cc +++ b/paddle/fluid/framework/details/data_balance_op_handle.cc @@ -86,9 +86,9 @@ std::vector> DataBalanceOpHandle::GetBalancePlan( } void DataBalanceOpHandle::RunImpl() { - if (places_.size() == 1) { - return; - } + PADDLE_ENFORCE_GT(places_.size(), 1, + "Data balance can only be enabled when the number of " + "places to run larger than 1."); auto in_var_handles = DynamicCast(inputs_); auto out_var_handles = DynamicCast(outputs_); PADDLE_ENFORCE(in_var_handles.size() % places_.size() == 0); diff --git a/paddle/fluid/framework/details/multi_devices_graph_builder.cc b/paddle/fluid/framework/details/multi_devices_graph_builder.cc index 46d0c2769cb334f5cb75ae0ef5e48da45448c48f..b82c2ef4082110f1621eb38d50361396511a4825 100644 --- a/paddle/fluid/framework/details/multi_devices_graph_builder.cc +++ b/paddle/fluid/framework/details/multi_devices_graph_builder.cc @@ -59,6 +59,11 @@ MultiDevSSAGraphBuilder::MultiDevSSAGraphBuilder( grad_names_.insert(GradVarName(p)); } balance_vars_.resize(places_.size(), 0); + if (strategy_.enable_data_balance_ && places_.size() == 1) { + LOG(WARNING) << "It is no need to enable data balance when there is only " + "one place. enable_data_balance is set to False."; + strategy_.enable_data_balance_ = false; + } } void MultiDevSSAGraphBuilder::CreateOpHandleIOs(SSAGraph *result, diff --git a/paddle/fluid/framework/op_registry.h b/paddle/fluid/framework/op_registry.h index 3314e41cc51d74f87be0e2cd5eba9bb260c16be7..e7dfa608b48f89a2155e43c7e63e31154675cd38 100644 --- a/paddle/fluid/framework/op_registry.h +++ b/paddle/fluid/framework/op_registry.h @@ -182,21 +182,15 @@ struct OpKernelRegistrarFunctorEx \ - __op_registrar_##op_type##__(#op_type); \ - int TouchOpRegistrar_##op_type() { \ - __op_registrar_##op_type##__.Touch(); \ - return 0; \ +#define REGISTER_OPERATOR(op_type, op_class, ...) \ + STATIC_ASSERT_GLOBAL_NAMESPACE( \ + __reg_op__##op_type, \ + "REGISTER_OPERATOR must be called in global namespace"); \ + static ::paddle::framework::OperatorRegistrar \ + __op_registrar_##op_type##__(#op_type); \ + int TouchOpRegistrar_##op_type() { \ + __op_registrar_##op_type##__.Touch(); \ + return 0; \ } #define REGISTER_OP_WITHOUT_GRADIENT(op_type, op_class, op_maker_class) \ diff --git a/paddle/fluid/framework/op_registry_test.cc b/paddle/fluid/framework/op_registry_test.cc index 18b1649cc71d5edd5b07740bbad1fe8f81128898..04996d7b09cecc3c330a47153c9b10310f1792f4 100644 --- a/paddle/fluid/framework/op_registry_test.cc +++ b/paddle/fluid/framework/op_registry_test.cc @@ -193,15 +193,10 @@ TEST(OpRegistry, CustomChecker) { ASSERT_EQ(test_attr, 4); } -class CosineOpComplete : public paddle::framework::CosineOp { - public: - DEFINE_OP_CONSTRUCTOR(CosineOpComplete, paddle::framework::CosineOp); - DEFINE_OP_CLONE_METHOD(CosineOpComplete); -}; - TEST(OperatorRegistrar, Test) { paddle::framework::OperatorRegistrar< - CosineOpComplete, paddle::framework::CosineOpProtoAndCheckerMaker> + paddle::framework::CosineOp, + paddle::framework::CosineOpProtoAndCheckerMaker> reg("cos"); } diff --git a/paddle/fluid/framework/operator.h b/paddle/fluid/framework/operator.h index 01d750efbb8aaa35701f6caa7ec103ec21dd529e..1040eb882baea624e972faf4af3094119df72308 100644 --- a/paddle/fluid/framework/operator.h +++ b/paddle/fluid/framework/operator.h @@ -121,10 +121,6 @@ class OperatorBase { //! Get all outputs variable names virtual std::vector OutputVars(bool has_intermediate) const; - // Return a new operator instance, which is as same as this. - // Use unique_ptr to prevent caller forget to delete this pointer. - virtual std::unique_ptr Clone() const = 0; - protected: std::string type_; // NOTE: in case of OpGrad, inputs_ contains: @@ -145,37 +141,6 @@ class OperatorBase { const platform::Place& place) const = 0; }; -// Macro for define a clone method. -// If you are writing an kernel operator, `Clone` will be defined when you -// register it. i.e. `Clone` method is not needed to define by yourself. -#define DEFINE_OP_CLONE_METHOD(cls) \ - std::unique_ptr<::paddle::framework::OperatorBase> Clone() const final { \ - return std::unique_ptr<::paddle::framework::OperatorBase>(new cls(*this)); \ - } - -// Macro for define a default constructor for Operator. -// You can also use -// using PARENT_CLASS::PARENT_CLASS; -// to use parent's constructor. -#define DEFINE_OP_CONSTRUCTOR(cls, parent_cls) \ - cls(const std::string& type, \ - const ::paddle::framework::VariableNameMap& inputs, \ - const ::paddle::framework::VariableNameMap& outputs, \ - const paddle::framework::AttributeMap& attrs) \ - : parent_cls(type, inputs, outputs, attrs) {} - -class NOP : public OperatorBase { - public: - using OperatorBase::OperatorBase; - std::unique_ptr Clone() const override { - return std::unique_ptr(new NOP(*this)); - } - - private: - void RunImpl(const Scope& scope, - const platform::Place& place) const override {} -}; - class ExecutionContext { public: ExecutionContext(const OperatorBase& op, const Scope& scope, diff --git a/paddle/fluid/framework/operator_test.cc b/paddle/fluid/framework/operator_test.cc index e211c678f8d61ddb69b6c612338bfc6a0afe8cd7..ac9dd8245ad4e0e8842f219b23d3866b03fdaedb 100644 --- a/paddle/fluid/framework/operator_test.cc +++ b/paddle/fluid/framework/operator_test.cc @@ -247,26 +247,3 @@ TEST(OpKernel, multi_inputs) { auto op = paddle::framework::OpRegistry::CreateOp(op_desc); op->Run(scope, cpu_place); } - -class OperatorClone : public paddle::framework::OperatorBase { - public: - DEFINE_OP_CLONE_METHOD(OperatorClone); - OperatorClone(const std::string& type, - const paddle::framework::VariableNameMap& inputs, - const paddle::framework::VariableNameMap& outputs, - const paddle::framework::AttributeMap& attrs) - : OperatorBase(type, inputs, outputs, attrs) {} - - private: - void RunImpl(const paddle::framework::Scope& scope, - const paddle::platform::Place& place) const override {} -}; - -TEST(Operator, Clone) { - paddle::framework::InitDevices(true); - OperatorClone a("ABC", paddle::framework::VariableNameMap{}, - paddle::framework::VariableNameMap{}, - paddle::framework::AttributeMap{}); - auto b = a.Clone(); - ASSERT_EQ(a.Type(), b->Type()); -} diff --git a/paddle/fluid/framework/var_type_inference_test.cc b/paddle/fluid/framework/var_type_inference_test.cc index 14b81ddfecb8c996ae8709910c022a074e91eb3c..7842168f603885ce7dc87d2a01dfa4f544389faa 100644 --- a/paddle/fluid/framework/var_type_inference_test.cc +++ b/paddle/fluid/framework/var_type_inference_test.cc @@ -22,6 +22,17 @@ limitations under the License. */ namespace paddle { namespace framework { +class NOP : public OperatorBase { + public: + NOP(const std::string &type, const VariableNameMap &inputs, + const VariableNameMap &outputs, const AttributeMap &attrs) + : OperatorBase(type, inputs, outputs, attrs) {} + + private: + void RunImpl(const Scope &scope, + const platform::Place &place) const override {} +}; + class SumOpMaker : public OpProtoAndCheckerMaker { public: void Make() { diff --git a/paddle/fluid/operators/read_op.cc b/paddle/fluid/operators/read_op.cc index 695d7ea83df952d9f2212cc0aaca5c90c7b47ee7..65fcce8bb019965a805ad09d50be0aba64e4f24e 100644 --- a/paddle/fluid/operators/read_op.cc +++ b/paddle/fluid/operators/read_op.cc @@ -92,9 +92,13 @@ class ReadOpMaker : public framework::OpProtoAndCheckerMaker { void Make() override { AddInput("Reader", "(ReaderHolder) The executed reader."); AddOutput("Out", "(LoDTensor) The output data.").AsDuplicable(); - AddAttr("throw_eof_exp", - "If set true, an exception will be thrown when the Reader " - "yields empty (which means there is no next data).") + AddAttr( + "throw_eof_exp", + "If set true, an exception will be thrown when the Reader " + "yields empty (which means there is no next data).\n" + "NOTES: This flag must be true always. It will be set to false" + " only when the data-balance is enabled in ParallelExecutor" + " and it is set by ParallelExecutor instance, not users.") .SetDefault(true); AddComment(R"DOC( Read Operator diff --git a/paddle/legacy/gserver/gradientmachines/MultiGradientMachine.cpp b/paddle/legacy/gserver/gradientmachines/MultiGradientMachine.cpp index 637686e443ceb740d52d42524246ae48a85d52f0..3ef0dfbfe2e5842918500a3b0706c1a55024ce46 100644 --- a/paddle/legacy/gserver/gradientmachines/MultiGradientMachine.cpp +++ b/paddle/legacy/gserver/gradientmachines/MultiGradientMachine.cpp @@ -532,6 +532,7 @@ void TrainerThread::computeThread() { break; } } + hl_fini(); } void TrainerThread::prefetch() { @@ -651,6 +652,7 @@ void TrainerThread::copyGradToBufferThread() { } partnerThread->notifyGradientCollect(pid); } + hl_fini(); } void TrainerThread::gradCollectThread() { @@ -693,6 +695,7 @@ void TrainerThread::gradCollectThread() { notifyCopyGradToBuffer(pid); } } + hl_fini(); } void TrainerThread::doCallback(int pid) { @@ -741,6 +744,7 @@ void TrainerThread::valueDispatchThread() { thread->notifyValueReady(pid); } + hl_fini(); } void TrainerThread::notifyValueReady(int paramId) { diff --git a/paddle/legacy/gserver/gradientmachines/ParallelNeuralNetwork.cpp b/paddle/legacy/gserver/gradientmachines/ParallelNeuralNetwork.cpp index 450514ca88a319b30ca3ebae669c78502087540a..33d24b5b832fe9011591606860e0f50361367790 100644 --- a/paddle/legacy/gserver/gradientmachines/ParallelNeuralNetwork.cpp +++ b/paddle/legacy/gserver/gradientmachines/ParallelNeuralNetwork.cpp @@ -197,6 +197,7 @@ void ParallelThread::computeThread() { job_work.layer_->markAllInputGrad(); } } + hl_fini(); } void ParallelThread::start() { diff --git a/python/paddle/fluid/tests/unittests/test_data_balance.py b/python/paddle/fluid/tests/unittests/test_data_balance.py index cffa3329ac556dc77f3cb508b807cbd49bb974f7..6d810920d55ccf069ff408c553069e8f5e590271 100644 --- a/python/paddle/fluid/tests/unittests/test_data_balance.py +++ b/python/paddle/fluid/tests/unittests/test_data_balance.py @@ -103,8 +103,12 @@ class TestDataBalance(unittest.TestCase): exe = fluid.Executor(place) exe.run(startup_prog) + build_strategy = fluid.BuildStrategy() + build_strategy.enable_data_balance = True parallel_exe = fluid.ParallelExecutor( - use_cuda=self.use_cuda, main_program=main_prog) + use_cuda=self.use_cuda, + main_program=main_prog, + build_strategy=build_strategy) if (parallel_exe.device_count > self.batch_size): print("WARNING: Unittest TestDataBalance skipped. \ @@ -145,9 +149,12 @@ class TestDataBalance(unittest.TestCase): place = fluid.CUDAPlace(0) if self.use_cuda else fluid.CPUPlace() exe = fluid.Executor(place) exe.run(startup_prog) - + build_strategy = fluid.BuildStrategy() + build_strategy.enable_data_balance = True parallel_exe = fluid.ParallelExecutor( - use_cuda=self.use_cuda, main_program=main_prog) + use_cuda=self.use_cuda, + main_program=main_prog, + build_strategy=build_strategy) if (parallel_exe.device_count > self.batch_size): print("WARNING: Unittest TestDataBalance skipped. \ diff --git a/python/paddle/fluid/transpiler/inference_transpiler.py b/python/paddle/fluid/transpiler/inference_transpiler.py index d32c69d148dfa1633ce344611ca3fe7879a234e9..b8afeae5ebd6ef7948a7c0c2775f419af461da04 100644 --- a/python/paddle/fluid/transpiler/inference_transpiler.py +++ b/python/paddle/fluid/transpiler/inference_transpiler.py @@ -19,7 +19,7 @@ from ..framework import Program from ..executor import global_scope -class InferenceTranspiler: +class InferenceTranspiler(object): ''' Convert the fluid program to optimized inference program. diff --git a/python/paddle/libs/__init__.py b/python/paddle/libs/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..34d4f4d07ed0d452c1965c5f1f198230571931aa --- /dev/null +++ b/python/paddle/libs/__init__.py @@ -0,0 +1,15 @@ +# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# used for setup.py.in to store the thirdparty shared libraries diff --git a/python/setup.py.in b/python/setup.py.in index 51380149d0b09224c02050902897f23f53600de2..5506443733650631fe045be3f701a41519352e8d 100644 --- a/python/setup.py.in +++ b/python/setup.py.in @@ -1,5 +1,7 @@ from setuptools import setup, Distribution, Extension import subprocess +import shutil +import os class BinaryDistribution(Distribution): def has_ext_modules(foo): return True @@ -62,6 +64,7 @@ write_version_py(filename='@PADDLE_BINARY_DIR@/python/paddle/version.py') packages=['paddle', + 'paddle.libs', 'paddle.utils', 'paddle.dataset', 'paddle.reader', @@ -113,12 +116,35 @@ package_dir={ } if '${WITH_FLUID_ONLY}'== 'OFF': package_dir['py_paddle']='${PADDLE_BINARY_DIR}/python/py_paddle' - -paddle_rt_lib_dir = 'lib' -paddle_rt_libs = ['${WARPCTC_LIBRARIES}'] -if '${MKL_SHARED_LIBS}'!= '': - paddle_rt_libs += '${MKL_SHARED_LIBS}'.split(';') +# put all thirdparty libraries in paddle.libs +package_data['paddle.libs']=['libwarpctc.so'] +libs_path='${PADDLE_BINARY_DIR}/python/paddle/libs' +shutil.copy('${WARPCTC_LIBRARIES}', libs_path) +if '${WITH_MKL}' == 'ON': + shutil.copy('${MKLML_LIB}', libs_path) + shutil.copy('${MKLML_IOMP_LIB}', libs_path) + package_data['paddle.libs']+=['libmklml_intel.so','libiomp5.so'] +if '${WITH_MKLDNN}' == 'ON': + # change rpath of libmkldnn.so.0, add $ORIGIN/ to it. + # The reason is that all thirdparty libraries in the same directory, + # thus, libmkldnn.so.0 will find libmklml_intel.so and libiomp5.so. + command = "patchelf --set-rpath '$ORIGIN/' ${MKLDNN_SHARED_LIB}" + if os.system(command) != 0: + raise Exception("patchelf --set-rpath for libmkldnn.so.0 fails") + package_data['paddle.libs']+=['libmkldnn.so.0'] + shutil.copy('${MKLDNN_SHARED_LIB}', libs_path) +# remove unused paddle/libs/__init__.py +os.remove(libs_path+'/__init__.py') +package_dir['paddle.libs']=libs_path + +# change rpath of core.so, add $ORIGIN/../libs/ to it. +# The reason is that libwarpctc.so, libiomp5.so etc are in paddle.libs, and +# core.so is in paddle.fluid, thus paddle/fluid/../libs will pointer to above libraries. +# This operation will fix https://github.com/PaddlePaddle/Paddle/issues/3213 +command = "patchelf --set-rpath '$ORIGIN/../libs/' ${PADDLE_BINARY_DIR}/python/paddle/fluid/core.so" +if os.system(command) != 0: + raise Exception("patchelf --set-rpath for core.so fails") setup(name='${PACKAGE_NAME}', version='${PADDLE_VERSION}', @@ -128,6 +154,5 @@ setup(name='${PACKAGE_NAME}', ext_modules=[Extension('_foo', ['stub.cc'])], package_data=package_data, package_dir=package_dir, - scripts=paddle_bins, - data_files=[(paddle_rt_lib_dir, paddle_rt_libs)] + scripts=paddle_bins )