提交 1377b332 编写于 作者: M minqiyang

Merge branch 'develop' of https://github.com/PaddlePaddle/Paddle into fix_grpc_destroy_bug

......@@ -23,7 +23,7 @@ ENV HOME /root
COPY ./paddle/scripts/docker/root/ /root/
RUN apt-get update && \
apt-get install -y --allow-downgrades \
apt-get install -y --allow-downgrades patchelf \
git python-pip python-dev python-opencv openssh-server bison \
libnccl2=2.1.2-1+cuda8.0 libnccl-dev=2.1.2-1+cuda8.0 \
wget unzip unrar tar xz-utils bzip2 gzip coreutils ntp \
......
......@@ -14,6 +14,15 @@ DistributeTranspiler
:members:
:noindex:
.. _api_fluid_transpiler_InferenceTranspiler:
InferenceTranspiler
-------------------
.. autoclass:: paddle.fluid.transpiler.InferenceTranspiler
:members:
:noindex:
.. _api_fluid_transpiler_memory_optimize:
memory_optimize
......
......@@ -34,7 +34,7 @@ struct BuildStrategy {
std::string debug_graphviz_path_{""};
bool enable_data_balance_{true};
bool enable_data_balance_{false};
};
} // namespace details
......
......@@ -86,9 +86,9 @@ std::vector<std::array<int, 3>> DataBalanceOpHandle::GetBalancePlan(
}
void DataBalanceOpHandle::RunImpl() {
if (places_.size() == 1) {
return;
}
PADDLE_ENFORCE_GT(places_.size(), 1,
"Data balance can only be enabled when the number of "
"places to run larger than 1.");
auto in_var_handles = DynamicCast<VarHandle>(inputs_);
auto out_var_handles = DynamicCast<VarHandle>(outputs_);
PADDLE_ENFORCE(in_var_handles.size() % places_.size() == 0);
......
......@@ -59,6 +59,11 @@ MultiDevSSAGraphBuilder::MultiDevSSAGraphBuilder(
grad_names_.insert(GradVarName(p));
}
balance_vars_.resize(places_.size(), 0);
if (strategy_.enable_data_balance_ && places_.size() == 1) {
LOG(WARNING) << "It is no need to enable data balance when there is only "
"one place. enable_data_balance is set to False.";
strategy_.enable_data_balance_ = false;
}
}
void MultiDevSSAGraphBuilder::CreateOpHandleIOs(SSAGraph *result,
......
......@@ -186,13 +186,7 @@ struct OpKernelRegistrarFunctorEx<PlaceType, false, I,
STATIC_ASSERT_GLOBAL_NAMESPACE( \
__reg_op__##op_type, \
"REGISTER_OPERATOR must be called in global namespace"); \
class _OpClass_##op_type##_ : public op_class { \
public: \
DEFINE_OP_CLONE_METHOD(_OpClass_##op_type##_); \
DEFINE_OP_CONSTRUCTOR(_OpClass_##op_type##_, op_class); \
}; \
static ::paddle::framework::OperatorRegistrar<_OpClass_##op_type##_, \
##__VA_ARGS__> \
static ::paddle::framework::OperatorRegistrar<op_class, ##__VA_ARGS__> \
__op_registrar_##op_type##__(#op_type); \
int TouchOpRegistrar_##op_type() { \
__op_registrar_##op_type##__.Touch(); \
......
......@@ -193,15 +193,10 @@ TEST(OpRegistry, CustomChecker) {
ASSERT_EQ(test_attr, 4);
}
class CosineOpComplete : public paddle::framework::CosineOp {
public:
DEFINE_OP_CONSTRUCTOR(CosineOpComplete, paddle::framework::CosineOp);
DEFINE_OP_CLONE_METHOD(CosineOpComplete);
};
TEST(OperatorRegistrar, Test) {
paddle::framework::OperatorRegistrar<
CosineOpComplete, paddle::framework::CosineOpProtoAndCheckerMaker>
paddle::framework::CosineOp,
paddle::framework::CosineOpProtoAndCheckerMaker>
reg("cos");
}
......
......@@ -121,10 +121,6 @@ class OperatorBase {
//! Get all outputs variable names
virtual std::vector<std::string> OutputVars(bool has_intermediate) const;
// Return a new operator instance, which is as same as this.
// Use unique_ptr to prevent caller forget to delete this pointer.
virtual std::unique_ptr<OperatorBase> Clone() const = 0;
protected:
std::string type_;
// NOTE: in case of OpGrad, inputs_ contains:
......@@ -145,37 +141,6 @@ class OperatorBase {
const platform::Place& place) const = 0;
};
// Macro for define a clone method.
// If you are writing an kernel operator, `Clone` will be defined when you
// register it. i.e. `Clone` method is not needed to define by yourself.
#define DEFINE_OP_CLONE_METHOD(cls) \
std::unique_ptr<::paddle::framework::OperatorBase> Clone() const final { \
return std::unique_ptr<::paddle::framework::OperatorBase>(new cls(*this)); \
}
// Macro for define a default constructor for Operator.
// You can also use
// using PARENT_CLASS::PARENT_CLASS;
// to use parent's constructor.
#define DEFINE_OP_CONSTRUCTOR(cls, parent_cls) \
cls(const std::string& type, \
const ::paddle::framework::VariableNameMap& inputs, \
const ::paddle::framework::VariableNameMap& outputs, \
const paddle::framework::AttributeMap& attrs) \
: parent_cls(type, inputs, outputs, attrs) {}
class NOP : public OperatorBase {
public:
using OperatorBase::OperatorBase;
std::unique_ptr<OperatorBase> Clone() const override {
return std::unique_ptr<OperatorBase>(new NOP(*this));
}
private:
void RunImpl(const Scope& scope,
const platform::Place& place) const override {}
};
class ExecutionContext {
public:
ExecutionContext(const OperatorBase& op, const Scope& scope,
......
......@@ -247,26 +247,3 @@ TEST(OpKernel, multi_inputs) {
auto op = paddle::framework::OpRegistry::CreateOp(op_desc);
op->Run(scope, cpu_place);
}
class OperatorClone : public paddle::framework::OperatorBase {
public:
DEFINE_OP_CLONE_METHOD(OperatorClone);
OperatorClone(const std::string& type,
const paddle::framework::VariableNameMap& inputs,
const paddle::framework::VariableNameMap& outputs,
const paddle::framework::AttributeMap& attrs)
: OperatorBase(type, inputs, outputs, attrs) {}
private:
void RunImpl(const paddle::framework::Scope& scope,
const paddle::platform::Place& place) const override {}
};
TEST(Operator, Clone) {
paddle::framework::InitDevices(true);
OperatorClone a("ABC", paddle::framework::VariableNameMap{},
paddle::framework::VariableNameMap{},
paddle::framework::AttributeMap{});
auto b = a.Clone();
ASSERT_EQ(a.Type(), b->Type());
}
......@@ -22,6 +22,17 @@ limitations under the License. */
namespace paddle {
namespace framework {
class NOP : public OperatorBase {
public:
NOP(const std::string &type, const VariableNameMap &inputs,
const VariableNameMap &outputs, const AttributeMap &attrs)
: OperatorBase(type, inputs, outputs, attrs) {}
private:
void RunImpl(const Scope &scope,
const platform::Place &place) const override {}
};
class SumOpMaker : public OpProtoAndCheckerMaker {
public:
void Make() {
......
......@@ -92,9 +92,13 @@ class ReadOpMaker : public framework::OpProtoAndCheckerMaker {
void Make() override {
AddInput("Reader", "(ReaderHolder) The executed reader.");
AddOutput("Out", "(LoDTensor) The output data.").AsDuplicable();
AddAttr<bool>("throw_eof_exp",
AddAttr<bool>(
"throw_eof_exp",
"If set true, an exception will be thrown when the Reader "
"yields empty (which means there is no next data).")
"yields empty (which means there is no next data).\n"
"NOTES: This flag must be true always. It will be set to false"
" only when the data-balance is enabled in ParallelExecutor"
" and it is set by ParallelExecutor instance, not users.")
.SetDefault(true);
AddComment(R"DOC(
Read Operator
......
......@@ -532,6 +532,7 @@ void TrainerThread::computeThread() {
break;
}
}
hl_fini();
}
void TrainerThread::prefetch() {
......@@ -651,6 +652,7 @@ void TrainerThread::copyGradToBufferThread() {
}
partnerThread->notifyGradientCollect(pid);
}
hl_fini();
}
void TrainerThread::gradCollectThread() {
......@@ -693,6 +695,7 @@ void TrainerThread::gradCollectThread() {
notifyCopyGradToBuffer(pid);
}
}
hl_fini();
}
void TrainerThread::doCallback(int pid) {
......@@ -741,6 +744,7 @@ void TrainerThread::valueDispatchThread() {
thread->notifyValueReady(pid);
}
hl_fini();
}
void TrainerThread::notifyValueReady(int paramId) {
......
......@@ -197,6 +197,7 @@ void ParallelThread::computeThread() {
job_work.layer_->markAllInputGrad();
}
}
hl_fini();
}
void ParallelThread::start() {
......
......@@ -103,8 +103,12 @@ class TestDataBalance(unittest.TestCase):
exe = fluid.Executor(place)
exe.run(startup_prog)
build_strategy = fluid.BuildStrategy()
build_strategy.enable_data_balance = True
parallel_exe = fluid.ParallelExecutor(
use_cuda=self.use_cuda, main_program=main_prog)
use_cuda=self.use_cuda,
main_program=main_prog,
build_strategy=build_strategy)
if (parallel_exe.device_count > self.batch_size):
print("WARNING: Unittest TestDataBalance skipped. \
......@@ -145,9 +149,12 @@ class TestDataBalance(unittest.TestCase):
place = fluid.CUDAPlace(0) if self.use_cuda else fluid.CPUPlace()
exe = fluid.Executor(place)
exe.run(startup_prog)
build_strategy = fluid.BuildStrategy()
build_strategy.enable_data_balance = True
parallel_exe = fluid.ParallelExecutor(
use_cuda=self.use_cuda, main_program=main_prog)
use_cuda=self.use_cuda,
main_program=main_prog,
build_strategy=build_strategy)
if (parallel_exe.device_count > self.batch_size):
print("WARNING: Unittest TestDataBalance skipped. \
......
......@@ -19,7 +19,7 @@ from ..framework import Program
from ..executor import global_scope
class InferenceTranspiler:
class InferenceTranspiler(object):
'''
Convert the fluid program to optimized inference program.
......
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# used for setup.py.in to store the thirdparty shared libraries
from setuptools import setup, Distribution, Extension
import subprocess
import shutil
import os
class BinaryDistribution(Distribution):
def has_ext_modules(foo):
return True
......@@ -62,6 +64,7 @@ write_version_py(filename='@PADDLE_BINARY_DIR@/python/paddle/version.py')
packages=['paddle',
'paddle.libs',
'paddle.utils',
'paddle.dataset',
'paddle.reader',
......@@ -114,11 +117,34 @@ package_dir={
if '${WITH_FLUID_ONLY}'== 'OFF':
package_dir['py_paddle']='${PADDLE_BINARY_DIR}/python/py_paddle'
paddle_rt_lib_dir = 'lib'
paddle_rt_libs = ['${WARPCTC_LIBRARIES}']
if '${MKL_SHARED_LIBS}'!= '':
paddle_rt_libs += '${MKL_SHARED_LIBS}'.split(';')
# put all thirdparty libraries in paddle.libs
package_data['paddle.libs']=['libwarpctc.so']
libs_path='${PADDLE_BINARY_DIR}/python/paddle/libs'
shutil.copy('${WARPCTC_LIBRARIES}', libs_path)
if '${WITH_MKL}' == 'ON':
shutil.copy('${MKLML_LIB}', libs_path)
shutil.copy('${MKLML_IOMP_LIB}', libs_path)
package_data['paddle.libs']+=['libmklml_intel.so','libiomp5.so']
if '${WITH_MKLDNN}' == 'ON':
# change rpath of libmkldnn.so.0, add $ORIGIN/ to it.
# The reason is that all thirdparty libraries in the same directory,
# thus, libmkldnn.so.0 will find libmklml_intel.so and libiomp5.so.
command = "patchelf --set-rpath '$ORIGIN/' ${MKLDNN_SHARED_LIB}"
if os.system(command) != 0:
raise Exception("patchelf --set-rpath for libmkldnn.so.0 fails")
package_data['paddle.libs']+=['libmkldnn.so.0']
shutil.copy('${MKLDNN_SHARED_LIB}', libs_path)
# remove unused paddle/libs/__init__.py
os.remove(libs_path+'/__init__.py')
package_dir['paddle.libs']=libs_path
# change rpath of core.so, add $ORIGIN/../libs/ to it.
# The reason is that libwarpctc.so, libiomp5.so etc are in paddle.libs, and
# core.so is in paddle.fluid, thus paddle/fluid/../libs will pointer to above libraries.
# This operation will fix https://github.com/PaddlePaddle/Paddle/issues/3213
command = "patchelf --set-rpath '$ORIGIN/../libs/' ${PADDLE_BINARY_DIR}/python/paddle/fluid/core.so"
if os.system(command) != 0:
raise Exception("patchelf --set-rpath for core.so fails")
setup(name='${PACKAGE_NAME}',
version='${PADDLE_VERSION}',
......@@ -128,6 +154,5 @@ setup(name='${PACKAGE_NAME}',
ext_modules=[Extension('_foo', ['stub.cc'])],
package_data=package_data,
package_dir=package_dir,
scripts=paddle_bins,
data_files=[(paddle_rt_lib_dir, paddle_rt_libs)]
scripts=paddle_bins
)
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册