提交 ceb150e9 编写于 作者: Y yuyang18

Merge remote-tracking branch 'yx/fix_bce_cdn_link' into feature/refine_parallel_executor

...@@ -159,6 +159,7 @@ def run_benchmark(model, args): ...@@ -159,6 +159,7 @@ def run_benchmark(model, args):
paddle.dataset.mnist.train(), batch_size=args.batch_size) paddle.dataset.mnist.train(), batch_size=args.batch_size)
accuracy = fluid.metrics.Accuracy() accuracy = fluid.metrics.Accuracy()
train_exe = fluid.ParallelExecutor(use_cuda=True, loss_name=avg_cost.name)
iters, num_samples, start_time = 0, 0, time.time() iters, num_samples, start_time = 0, 0, time.time()
for pass_id in range(args.pass_num): for pass_id in range(args.pass_num):
accuracy.reset() accuracy.reset()
...@@ -175,17 +176,20 @@ def run_benchmark(model, args): ...@@ -175,17 +176,20 @@ def run_benchmark(model, args):
y_data = np.array(map(lambda x: x[1], data)).astype("int64") y_data = np.array(map(lambda x: x[1], data)).astype("int64")
y_data = y_data.reshape([len(y_data), 1]) y_data = y_data.reshape([len(y_data), 1])
outs = exe.run( outs = train_exe.run(
fluid.default_main_program(),
feed={"pixel": img_data, feed={"pixel": img_data,
"label": y_data}, "label": y_data},
fetch_list=[avg_cost, batch_acc, batch_size_tensor] fetch_list=[
avg_cost.name, batch_acc.name, batch_size_tensor.name
]
) # The accuracy is the accumulation of batches, but not the current batch. ) # The accuracy is the accumulation of batches, but not the current batch.
accuracy.update(value=outs[1], weight=outs[2]) accuracy.update(
value=np.array(np.mean(outs[1])),
weight=np.mean(np.array(outs[2])))
iters += 1 iters += 1
num_samples += len(y_data) num_samples += len(y_data)
loss = np.array(outs[0]) loss = np.mean(np.array(outs[0]))
acc = np.array(outs[1]) acc = np.mean(np.array(outs[1]))
train_losses.append(loss) train_losses.append(loss)
train_accs.append(acc) train_accs.append(acc)
print("Pass: %d, Iter: %d, Loss: %f, Accuracy: %f" % print("Pass: %d, Iter: %d, Loss: %f, Accuracy: %f" %
......
...@@ -241,6 +241,7 @@ def run_benchmark(model, args): ...@@ -241,6 +241,7 @@ def run_benchmark(model, args):
exe = fluid.Executor(place) exe = fluid.Executor(place)
exe.run(fluid.default_startup_program()) exe.run(fluid.default_startup_program())
accuracy = fluid.average.WeightedAverage() accuracy = fluid.average.WeightedAverage()
train_exe = fluid.ParallelExecutor(use_cuda=True, loss_name=avg_cost.name)
if args.use_fake_data: if args.use_fake_data:
data = train_reader().next() data = train_reader().next()
image = np.array(map(lambda x: x[0].reshape(dshape), data)).astype( image = np.array(map(lambda x: x[0].reshape(dshape), data)).astype(
...@@ -264,14 +265,17 @@ def run_benchmark(model, args): ...@@ -264,14 +265,17 @@ def run_benchmark(model, args):
data)).astype('float32') data)).astype('float32')
label = np.array(map(lambda x: x[1], data)).astype('int64') label = np.array(map(lambda x: x[1], data)).astype('int64')
label = label.reshape([-1, 1]) label = label.reshape([-1, 1])
loss, acc, weight = exe.run( loss, acc, weight = train_exe.run(
fluid.default_main_program(),
feed={'data': image, feed={'data': image,
'label': label}, 'label': label},
fetch_list=[avg_cost, batch_acc, batch_size_tensor]) fetch_list=[
avg_cost.name, batch_acc.name, batch_size_tensor.name
])
iters += 1 iters += 1
num_samples += len(label) num_samples += len(label)
accuracy.add(value=acc, weight=weight) accuracy.add(value=np.array(np.mean(acc)), weight=np.mean(weight))
loss = np.mean(np.array(loss))
acc = np.mean(np.array(acc))
train_losses.append(loss) train_losses.append(loss)
train_accs.append(acc) train_accs.append(acc)
print("Pass: %d, Iter: %d, Loss: %f, Accuracy: %f" % print("Pass: %d, Iter: %d, Loss: %f, Accuracy: %f" %
......
...@@ -169,6 +169,7 @@ def main(): ...@@ -169,6 +169,7 @@ def main():
iters, num_samples, start_time = 0, 0, time.time() iters, num_samples, start_time = 0, 0, time.time()
accuracy = fluid.average.WeightedAverage() accuracy = fluid.average.WeightedAverage()
train_exe = fluid.ParallelExecutor(use_cuda=True, loss_name=avg_cost.name)
for pass_id in range(args.pass_num): for pass_id in range(args.pass_num):
accuracy.reset() accuracy.reset()
train_accs = [] train_accs = []
...@@ -184,14 +185,17 @@ def main(): ...@@ -184,14 +185,17 @@ def main():
y_data = np.array(map(lambda x: x[1], data)).astype("int64") y_data = np.array(map(lambda x: x[1], data)).astype("int64")
y_data = y_data.reshape([-1, 1]) y_data = y_data.reshape([-1, 1])
loss, acc, weight = exe.run( loss, acc, weight = train_exe.run(
fluid.default_main_program(),
feed={"pixel": img_data, feed={"pixel": img_data,
"label": y_data}, "label": y_data},
fetch_list=[avg_cost, batch_acc, batch_size_tensor]) fetch_list=[
accuracy.add(value=acc, weight=weight) avg_cost.name, batch_acc.name, batch_size_tensor.name
])
accuracy.add(value=np.array(np.mean(acc)), weight=np.mean(weight))
iters += 1 iters += 1
num_samples += len(y_data) num_samples += len(y_data)
loss = np.mean(np.array(loss))
acc = np.mean(np.array(acc))
print( print(
"Pass = %d, Iter = %d, Loss = %f, Accuracy = %f" % "Pass = %d, Iter = %d, Loss = %f, Accuracy = %f" %
(pass_id, iters, loss, acc) (pass_id, iters, loss, acc)
......
...@@ -24,7 +24,7 @@ set(BOOST_PROJECT "extern_boost") ...@@ -24,7 +24,7 @@ set(BOOST_PROJECT "extern_boost")
# So we use 1.41.0 here. # So we use 1.41.0 here.
set(BOOST_VER "1.41.0") set(BOOST_VER "1.41.0")
set(BOOST_TAR "boost_1_41_0") set(BOOST_TAR "boost_1_41_0")
set(BOOST_URL "http://paddlepaddledeps.bj.bcebos.com/${BOOST_TAR}.tar.gz") set(BOOST_URL "http://paddlepaddledeps.cdn.bcebos.com/${BOOST_TAR}.tar.gz")
set(BOOST_SOURCES_DIR ${THIRD_PARTY_PATH}/boost) set(BOOST_SOURCES_DIR ${THIRD_PARTY_PATH}/boost)
set(BOOST_DOWNLOAD_DIR "${BOOST_SOURCES_DIR}/src/${BOOST_PROJECT}") set(BOOST_DOWNLOAD_DIR "${BOOST_SOURCES_DIR}/src/${BOOST_PROJECT}")
set(BOOST_INCLUDE_DIR "${BOOST_DOWNLOAD_DIR}/${BOOST_TAR}" CACHE PATH "boost include directory." FORCE) set(BOOST_INCLUDE_DIR "${BOOST_DOWNLOAD_DIR}/${BOOST_TAR}" CACHE PATH "boost include directory." FORCE)
......
...@@ -21,11 +21,12 @@ else() ...@@ -21,11 +21,12 @@ else()
ExternalProject_Add( ExternalProject_Add(
extern_eigen3 extern_eigen3
${EXTERNAL_PROJECT_LOG_ARGS} ${EXTERNAL_PROJECT_LOG_ARGS}
GIT_REPOSITORY "https://github.com/RLovelett/eigen.git" GIT_REPOSITORY "https://github.com/eigenteam/eigen-git-mirror"
# eigen on cuda9.1 missing header of math_funtions.hpp # eigen on cuda9.1 missing header of math_funtions.hpp
# https://stackoverflow.com/questions/43113508/math-functions-hpp-not-found-when-using-cuda-with-eigen # https://stackoverflow.com/questions/43113508/math-functions-hpp-not-found-when-using-cuda-with-eigen
GIT_TAG 917060c364181f33a735dc023818d5a54f60e54c GIT_TAG 917060c364181f33a735dc023818d5a54f60e54c
PREFIX ${EIGEN_SOURCE_DIR} PREFIX ${EIGEN_SOURCE_DIR}
DOWNLOAD_NAME "eigen"
UPDATE_COMMAND "" UPDATE_COMMAND ""
CONFIGURE_COMMAND "" CONFIGURE_COMMAND ""
BUILD_COMMAND "" BUILD_COMMAND ""
......
...@@ -28,7 +28,7 @@ INCLUDE(ExternalProject) ...@@ -28,7 +28,7 @@ INCLUDE(ExternalProject)
SET(MKLML_PROJECT "extern_mklml") SET(MKLML_PROJECT "extern_mklml")
SET(MKLML_VER "mklml_lnx_2018.0.3.20180406") SET(MKLML_VER "mklml_lnx_2018.0.3.20180406")
SET(MKLML_URL "http://paddlepaddledeps.bj.bcebos.com/${MKLML_VER}.tgz") SET(MKLML_URL "http://paddlepaddledeps.cdn.bcebos.com/${MKLML_VER}.tgz")
SET(MKLML_SOURCE_DIR "${THIRD_PARTY_PATH}/mklml") SET(MKLML_SOURCE_DIR "${THIRD_PARTY_PATH}/mklml")
SET(MKLML_DOWNLOAD_DIR "${MKLML_SOURCE_DIR}/src/${MKLML_PROJECT}") SET(MKLML_DOWNLOAD_DIR "${MKLML_SOURCE_DIR}/src/${MKLML_PROJECT}")
SET(MKLML_DST_DIR "mklml") SET(MKLML_DST_DIR "mklml")
......
...@@ -148,4 +148,10 @@ copy(string_lib ...@@ -148,4 +148,10 @@ copy(string_lib
DSTS ${dst_dir}/${module} ${dst_dir}/${module}/tinyformat DSTS ${dst_dir}/${module} ${dst_dir}/${module}/tinyformat
) )
set(module "pybind")
copy(pybind_lib
SRCS ${CMAKE_CURRENT_BINARY_DIR}/paddle/fluid/${module}/pybind.h
DSTS ${dst_dir}/${module}
)
add_custom_target(inference_lib_dist DEPENDS ${inference_lib_dist_dep}) add_custom_target(inference_lib_dist DEPENDS ${inference_lib_dist_dep})
...@@ -40,7 +40,7 @@ template <typename T> ...@@ -40,7 +40,7 @@ template <typename T>
class FCOp : public OperatorBase { class FCOp : public OperatorBase {
public: public:
void Run(...) { void Run(...) {
add(mul(Input<T>("X"), Input<T>("W")), Input<T>("b"); add(mul(Input<T>("X"), Input<T>("W")), Input<T>("b"));
} }
}; };
REGISTER_OP(FCOp, "fc"); REGISTER_OP(FCOp, "fc");
......
...@@ -58,6 +58,7 @@ static DataTypeMap* InitDataTypeMap() { ...@@ -58,6 +58,7 @@ static DataTypeMap* InitDataTypeMap() {
RegType(bool, proto::VarType::BOOL); RegType(bool, proto::VarType::BOOL);
RegType(size_t, proto::VarType::SIZE_T); RegType(size_t, proto::VarType::SIZE_T);
RegType(int16_t, proto::VarType::INT16); RegType(int16_t, proto::VarType::INT16);
RegType(uint8_t, proto::VarType::UINT8);
#undef RegType #undef RegType
return retv; return retv;
......
...@@ -47,8 +47,14 @@ inline void VisitDataType(proto::VarType::Type type, Visitor visitor) { ...@@ -47,8 +47,14 @@ inline void VisitDataType(proto::VarType::Type type, Visitor visitor) {
case proto::VarType::BOOL: case proto::VarType::BOOL:
visitor.template operator()<bool>(); visitor.template operator()<bool>();
break; break;
case proto::VarType::UINT8:
visitor.template operator()<uint8_t>();
break;
case proto::VarType::INT16:
visitor.template operator()<int16_t>();
break;
default: default:
PADDLE_THROW("Not supported"); PADDLE_THROW("Not supported %d", type);
} }
} }
......
...@@ -48,17 +48,18 @@ void FetchOpHandle::RunImpl() { ...@@ -48,17 +48,18 @@ void FetchOpHandle::RunImpl() {
WaitInputVarGenerated(platform::CPUPlace()); WaitInputVarGenerated(platform::CPUPlace());
tensors_.resize(inputs_.size()); tensors_.resize(inputs_.size());
auto *var_handle = static_cast<VarHandle *>(inputs_[0]);
auto &var_name = var_handle->name_;
platform::CPUPlace cpu; platform::CPUPlace cpu;
auto &scopes = *local_scopes_; auto &scopes = *local_scopes_;
for (size_t i = 0; i < scopes.size(); ++i) { for (size_t i = 0; i < inputs_.size(); ++i) {
auto &scope = scopes[i]; auto *var_handle = static_cast<VarHandle *>(inputs_[i]);
auto *var = auto &scope = scopes.at(var_handle->scope_idx_);
scope->FindVar(kLocalExecScopeName)->Get<Scope *>()->FindVar(var_name); auto *var = scope->FindVar(kLocalExecScopeName)
->Get<Scope *>()
->FindVar(var_handle->name_);
PADDLE_ENFORCE_NOT_NULL(var, "Cannot find variable %s in execution scope", PADDLE_ENFORCE_NOT_NULL(var, "Cannot find variable %s in execution scope",
var_name); var_handle->name_);
auto &t = var->Get<framework::LoDTensor>(); auto &t = var->Get<framework::LoDTensor>();
if (platform::is_gpu_place(t.place())) { if (platform::is_gpu_place(t.place())) {
#ifdef PADDLE_WITH_CUDA #ifdef PADDLE_WITH_CUDA
......
...@@ -70,6 +70,14 @@ class OpHandleBase { ...@@ -70,6 +70,14 @@ class OpHandleBase {
const std::vector<VarHandleBase *> &Inputs() const { return inputs_; } const std::vector<VarHandleBase *> &Inputs() const { return inputs_; }
size_t NoDupInputSize() const {
std::unordered_set<VarHandleBase *> res;
for (auto *var : inputs_) {
res.emplace(var);
}
return res.size();
}
const std::vector<VarHandleBase *> &Outputs() const { return outputs_; } const std::vector<VarHandleBase *> &Outputs() const { return outputs_; }
protected: protected:
......
...@@ -174,7 +174,7 @@ void ThreadedSSAGraphExecutor::InsertFetchOps( ...@@ -174,7 +174,7 @@ void ThreadedSSAGraphExecutor::InsertFetchOps(
void ThreadedSSAGraphExecutor::InsertPendingOp( void ThreadedSSAGraphExecutor::InsertPendingOp(
std::unordered_map<OpHandleBase *, size_t> *pending_ops, std::unordered_map<OpHandleBase *, size_t> *pending_ops,
OpHandleBase *op_instance) const { OpHandleBase *op_instance) const {
pending_ops->insert({op_instance, op_instance->Inputs().size()}); pending_ops->insert({op_instance, op_instance->NoDupInputSize()});
} }
void ThreadedSSAGraphExecutor::InsertPendingVar( void ThreadedSSAGraphExecutor::InsertPendingVar(
......
...@@ -103,6 +103,7 @@ message VarType { ...@@ -103,6 +103,7 @@ message VarType {
FP64 = 6; FP64 = 6;
// Tensor<size_t> is used in C++. // Tensor<size_t> is used in C++.
SIZE_T = 19; SIZE_T = 19;
UINT8 = 20;
// Other types that may need additional descriptions // Other types that may need additional descriptions
LOD_TENSOR = 7; LOD_TENSOR = 7;
......
...@@ -228,11 +228,12 @@ TEST(LoD, CheckAbsLoD) { ...@@ -228,11 +228,12 @@ TEST(LoD, CheckAbsLoD) {
ASSERT_FALSE(CheckAbsLoD(abs_lod0)); ASSERT_FALSE(CheckAbsLoD(abs_lod0));
} }
TEST(LoDTensor, RecordIO) { template <typename T>
static void TestRecordIO() {
LoDTensor tensor; LoDTensor tensor;
int* tmp = tensor.mutable_data<int>(make_ddim({4, 5}), platform::CPUPlace()); T* tmp = tensor.mutable_data<T>(make_ddim({4, 5}), platform::CPUPlace());
for (int i = 0; i < 20; ++i) { for (int i = 0; i < 20; ++i) {
tmp[i] = i; tmp[i] = static_cast<T>(i);
} }
std::stringstream* stream = new std::stringstream(); std::stringstream* stream = new std::stringstream();
...@@ -247,7 +248,7 @@ TEST(LoDTensor, RecordIO) { ...@@ -247,7 +248,7 @@ TEST(LoDTensor, RecordIO) {
auto assert_tensor_ok = [](const LoDTensor& tensor) { auto assert_tensor_ok = [](const LoDTensor& tensor) {
for (int i = 0; i < 20; ++i) { for (int i = 0; i < 20; ++i) {
ASSERT_EQ(tensor.data<int>()[i], i); ASSERT_EQ(tensor.data<T>()[i], static_cast<T>(i));
} }
}; };
...@@ -265,5 +266,13 @@ TEST(LoDTensor, RecordIO) { ...@@ -265,5 +266,13 @@ TEST(LoDTensor, RecordIO) {
} }
} }
TEST(LoDTensor, RecordIO) {
TestRecordIO<int>();
TestRecordIO<int16_t>();
TestRecordIO<uint8_t>();
TestRecordIO<float>();
TestRecordIO<double>();
}
} // namespace framework } // namespace framework
} // namespace paddle } // namespace paddle
...@@ -49,7 +49,7 @@ class OpConverter { ...@@ -49,7 +49,7 @@ class OpConverter {
// convert fluid block to tensorrt network // convert fluid block to tensorrt network
void ConvertBlock(const framework::proto::BlockDesc& block, void ConvertBlock(const framework::proto::BlockDesc& block,
TensorRTEngine* engine) { TensorRTEngine* engine) {
for (size_t i = 0; i < block.ops_size(); i++) { for (int i = 0; i < block.ops_size(); i++) {
const auto& op = block.ops(i); const auto& op = block.ops(i);
OpConverter::Run(op, engine); OpConverter::Run(op, engine);
} }
......
...@@ -38,7 +38,9 @@ template struct SetConstant<platform::CPUDeviceContext, bool>; ...@@ -38,7 +38,9 @@ template struct SetConstant<platform::CPUDeviceContext, bool>;
template struct Transpose<platform::CPUDeviceContext, double, RANK>; \ template struct Transpose<platform::CPUDeviceContext, double, RANK>; \
template struct Transpose<platform::CPUDeviceContext, int, RANK>; \ template struct Transpose<platform::CPUDeviceContext, int, RANK>; \
template struct Transpose<platform::CPUDeviceContext, int64_t, RANK>; \ template struct Transpose<platform::CPUDeviceContext, int64_t, RANK>; \
template struct Transpose<platform::CPUDeviceContext, bool, RANK>; template struct Transpose<platform::CPUDeviceContext, bool, RANK>; \
template struct Transpose<platform::CPUDeviceContext, int16_t, RANK>; \
template struct Transpose<platform::CPUDeviceContext, uint8_t, RANK>;
DEFINE_CPU_TRANS(1); DEFINE_CPU_TRANS(1);
DEFINE_CPU_TRANS(2); DEFINE_CPU_TRANS(2);
......
...@@ -105,7 +105,7 @@ class SmoothL1LossGradOp : public framework::OperatorWithKernel { ...@@ -105,7 +105,7 @@ class SmoothL1LossGradOp : public framework::OperatorWithKernel {
using framework::OperatorWithKernel::OperatorWithKernel; using framework::OperatorWithKernel::OperatorWithKernel;
void InferShape(framework::InferShapeContext* ctx) const override { void InferShape(framework::InferShapeContext* ctx) const override {
auto in_dims = ctx->GetInputDim("X"); auto in_dims = ctx->GetInputDim("Diff");
auto out_dims = ctx->GetInputDim(framework::GradVarName("Out")); auto out_dims = ctx->GetInputDim(framework::GradVarName("Out"));
PADDLE_ENFORCE_GE(out_dims.size(), 2, PADDLE_ENFORCE_GE(out_dims.size(), 2,
...@@ -127,12 +127,33 @@ class SmoothL1LossGradOp : public framework::OperatorWithKernel { ...@@ -127,12 +127,33 @@ class SmoothL1LossGradOp : public framework::OperatorWithKernel {
} }
}; };
class SmoothL1LossGradMaker : public framework::SingleGradOpDescMaker {
public:
using framework::SingleGradOpDescMaker::SingleGradOpDescMaker;
protected:
std::unique_ptr<framework::OpDesc> Apply() const override {
auto* op = new framework::OpDesc();
op->SetType("smooth_l1_loss_grad");
op->SetInput("InsideWeight", Input("InsideWeight"));
op->SetInput("OutsideWeight", Input("OutsideWeight"));
op->SetInput("Diff", Output("Diff"));
op->SetInput(framework::GradVarName("Out"), OutputGrad("Out"));
op->SetAttrMap(Attrs());
op->SetOutput(framework::GradVarName("X"), InputGrad("X"));
op->SetOutput(framework::GradVarName("Y"), InputGrad("Y"));
return std::unique_ptr<framework::OpDesc>(op);
}
};
} // namespace operators } // namespace operators
} // namespace paddle } // namespace paddle
namespace ops = paddle::operators; namespace ops = paddle::operators;
REGISTER_OPERATOR(smooth_l1_loss, ops::SmoothL1LossOp, ops::SmoothL1LossOpMaker, REGISTER_OPERATOR(smooth_l1_loss, ops::SmoothL1LossOp, ops::SmoothL1LossOpMaker,
paddle::framework::DefaultGradOpDescMaker<true>); ops::SmoothL1LossGradMaker);
REGISTER_OPERATOR(smooth_l1_loss_grad, ops::SmoothL1LossGradOp); REGISTER_OPERATOR(smooth_l1_loss_grad, ops::SmoothL1LossGradOp);
REGISTER_OP_CPU_KERNEL( REGISTER_OP_CPU_KERNEL(
smooth_l1_loss, smooth_l1_loss,
......
...@@ -20,19 +20,15 @@ ...@@ -20,19 +20,15 @@
#================================================= #=================================================
function print_usage() { function print_usage() {
RED='\033[0;31m'
BLUE='\033[0;34m'
BOLD='\033[1m'
NONE='\033[0m'
echo -e "\n${RED}Usage${NONE}: echo -e "\n${RED}Usage${NONE}:
${BOLD}$0${NONE} [OPTION]" ${BOLD}${SCRIPT_NAME}${NONE} [OPTION]"
echo -e "\n${RED}Options${NONE}: echo -e "\n${RED}Options${NONE}:
${BLUE}build${NONE}: run build for x86 platform ${BLUE}build${NONE}: run build for x86 platform
${BLUE}build_android${NONE}: run build for android platform ${BLUE}build_android${NONE}: run build for android platform
${BLUE}build_ios${NONE}: run build for ios platform ${BLUE}build_ios${NONE}: run build for ios platform
${BLUE}test${NONE}: run all unit tests ${BLUE}test${NONE}: run all unit tests
${BLUE}single_test${NONE}: run a single unit test
${BLUE}bind_test${NONE}: parallel tests bind to different GPU ${BLUE}bind_test${NONE}: parallel tests bind to different GPU
${BLUE}doc${NONE}: generate paddle documents ${BLUE}doc${NONE}: generate paddle documents
${BLUE}html${NONE}: convert C++ source code into HTML ${BLUE}html${NONE}: convert C++ source code into HTML
...@@ -45,7 +41,15 @@ function print_usage() { ...@@ -45,7 +41,15 @@ function print_usage() {
} }
function init() { function init() {
RED='\033[0;31m'
BLUE='\033[0;34m'
BOLD='\033[1m'
NONE='\033[0m'
PADDLE_ROOT="$( cd "$( dirname "${BASH_SOURCE[0]}")/../../" && pwd )" PADDLE_ROOT="$( cd "$( dirname "${BASH_SOURCE[0]}")/../../" && pwd )"
if [ -z "${SCRIPT_NAME}" ]; then
SCRIPT_NAME=$0
fi
} }
function cmake_gen() { function cmake_gen() {
...@@ -91,7 +95,6 @@ function cmake_gen() { ...@@ -91,7 +95,6 @@ function cmake_gen() {
-DWITH_AVX=${WITH_AVX:-OFF} -DWITH_AVX=${WITH_AVX:-OFF}
-DWITH_GOLANG=${WITH_GOLANG:-OFF} -DWITH_GOLANG=${WITH_GOLANG:-OFF}
-DCUDA_ARCH_NAME=${CUDA_ARCH_NAME:-All} -DCUDA_ARCH_NAME=${CUDA_ARCH_NAME:-All}
-DWITH_SWIG_PY=ON
-DWITH_C_API=${WITH_C_API:-OFF} -DWITH_C_API=${WITH_C_API:-OFF}
-DWITH_PYTHON=${WITH_PYTHON:-ON} -DWITH_PYTHON=${WITH_PYTHON:-ON}
-DWITH_SWIG_PY=${WITH_SWIG_PY:-ON} -DWITH_SWIG_PY=${WITH_SWIG_PY:-ON}
...@@ -309,6 +312,25 @@ EOF ...@@ -309,6 +312,25 @@ EOF
fi fi
} }
function single_test() {
TEST_NAME=$1
if [ -z "${TEST_NAME}" ]; then
echo -e "${RED}Usage:${NONE}"
echo -e "${BOLD}${SCRIPT_NAME}${NONE} ${BLUE}single_test${NONE} [test_name]"
exit 1
fi
mkdir -p ${PADDLE_ROOT}/build
cd ${PADDLE_ROOT}/build
if [ ${WITH_TESTING:-ON} == "ON" ] ; then
cat <<EOF
========================================
Running ${TEST_NAME} ...
========================================
EOF
ctest --output-on-failure -R ${TEST_NAME}
fi
}
function bind_test() { function bind_test() {
# the number of process to run tests # the number of process to run tests
NUM_PROC=6 NUM_PROC=6
...@@ -480,6 +502,7 @@ function main() { ...@@ -480,6 +502,7 @@ function main() {
build) build)
cmake_gen ${PYTHON_ABI:-""} cmake_gen ${PYTHON_ABI:-""}
build build
gen_dockerfile
;; ;;
build_android) build_android)
build_android build_android
...@@ -490,6 +513,9 @@ function main() { ...@@ -490,6 +513,9 @@ function main() {
test) test)
run_test run_test
;; ;;
single_test)
single_test $2
;;
bind_test) bind_test)
bind_test bind_test
;; ;;
......
...@@ -63,6 +63,7 @@ EOL ...@@ -63,6 +63,7 @@ EOL
${DOCKER_CMD} run -it \ ${DOCKER_CMD} run -it \
--name $CONTAINER_ID \ --name $CONTAINER_ID \
${DOCKER_ENV} \ ${DOCKER_ENV} \
-e SCRIPT_NAME=$0 \
-v $PADDLE_ROOT:/paddle \ -v $PADDLE_ROOT:/paddle \
-v ${HOME}/.ccache:/root/.ccache \ -v ${HOME}/.ccache:/root/.ccache \
-w /paddle \ -w /paddle \
......
...@@ -54,9 +54,9 @@ class DataToLoDTensorConverter(object): ...@@ -54,9 +54,9 @@ class DataToLoDTensorConverter(object):
self.data.append(data) self.data.append(data)
else: else:
cur_lod_len = len(data) cur_lod_len = len(data)
lod[-1].append(lod[-1][-1] + cur_lod_len) lod[0].append(lod[0][-1] + cur_lod_len)
for each_data in data: for each_data in data:
self._feed_impl_(each_data, lod[:-1], lod_level - 1) self._feed_impl_(each_data, lod[1:], lod_level - 1)
def done(self): def done(self):
arr = numpy.array(self.data, dtype=self.dtype).reshape(self.shape) arr = numpy.array(self.data, dtype=self.dtype).reshape(self.shape)
......
...@@ -1329,6 +1329,8 @@ def sequence_pool(input, pool_type): ...@@ -1329,6 +1329,8 @@ def sequence_pool(input, pool_type):
sqrt : out.data = [2.82, 6.93, 4.24], where 2.82=(1+3)/sqrt(2), sqrt : out.data = [2.82, 6.93, 4.24], where 2.82=(1+3)/sqrt(2),
6.93=(2+4+6)/sqrt(3), 4.24=(5+1)/sqrt(2) 6.93=(2+4+6)/sqrt(3), 4.24=(5+1)/sqrt(2)
max : out.data = [3, 6, 5], where 3=max(1,3), 6=max(2,4,6), 5=max(5,1) max : out.data = [3, 6, 5], where 3=max(1,3), 6=max(2,4,6), 5=max(5,1)
last : out.data = [3, 6, 1], where 3=last(1,3), 6=last(2,4,6), 1=last(5,1)
first : out.data = [1, 2, 5], where 1=first(1,3), 2=first(2,4,6), 5=first(5,1)
Args: Args:
input(variable): The input variable which is a LoDTensor. input(variable): The input variable which is a LoDTensor.
...@@ -1348,6 +1350,8 @@ def sequence_pool(input, pool_type): ...@@ -1348,6 +1350,8 @@ def sequence_pool(input, pool_type):
sum_x = fluid.layers.sequence_pool(input=x, pool_type='sum') sum_x = fluid.layers.sequence_pool(input=x, pool_type='sum')
sqrt_x = fluid.layers.sequence_pool(input=x, pool_type='sqrt') sqrt_x = fluid.layers.sequence_pool(input=x, pool_type='sqrt')
max_x = fluid.layers.sequence_pool(input=x, pool_type='max') max_x = fluid.layers.sequence_pool(input=x, pool_type='max')
last_x = fluid.layers.sequence_pool(input=x, pool_type='last')
first_x = fluid.layers.sequence_pool(input=x, pool_type='first')
""" """
helper = LayerHelper('sequence_pool', **locals()) helper = LayerHelper('sequence_pool', **locals())
dtype = helper.input_dtype() dtype = helper.input_dtype()
...@@ -3263,35 +3267,35 @@ def smooth_l1(x, y, inside_weight=None, outside_weight=None, sigma=None): ...@@ -3263,35 +3267,35 @@ def smooth_l1(x, y, inside_weight=None, outside_weight=None, sigma=None):
""" """
**Smooth L1 Loss Operator. ** **Smooth L1 Loss Operator. **
This operator computes the smooth l1 loss for X and Y. This operator computes the smooth L1 loss for X and Y.
The operator takes the first dimension of X and Y as batch size. The operator takes the first dimension of X and Y as batch size.
For each instance, it computes the smooth l1 loss element by element first For each instance, it computes the smooth L1 loss element by element first
and then sums all the losses. So the shape of Out is [batch_size, 1]. and then sums all the losses. So the shape of Out is [batch_size, 1].
Args: Args:
x (Variable): A tensor with rank at least 2. The input value of smooth x (Variable): A tensor with rank at least 2. The input value of smooth
l1 loss op with shape [batch_size, dim1, ..., dimN]. L1 loss op with shape [batch_size, dim1, ..., dimN].
y (Variable): A tensor with rank at least 2. The target value of smooth y (Variable): A tensor with rank at least 2. The target value of smooth
l1 loss op with same shape as x. L1 loss op with same shape as x.
inside_weight (Variable|None): A tensor with rank at least 2. This inside_weight (Variable|None): A tensor with rank at least 2. This
input is optional and should have same shape with x. If provided, input is optional and should have same shape with x. If provided,
the result of (x - y) will be multiplied by this tensor element by the result of (x - y) will be multiplied by this tensor element by
element. element.
outside_weight (Variable|None): A tensor with rank at least 2. This outside_weight (Variable|None): A tensor with rank at least 2. This
input is optional and should have same shape with x. If provided, input is optional and should have same shape with x. If provided,
the out smooth l1 loss will be multiplied by this tensor element the out smooth L1 loss will be multiplied by this tensor element
by element. by element.
sigma (float|None): Hyper parameter of smooth l1 loss op. A float scalar sigma (float|None): Hyper parameter of smooth L1 loss op. A float scalar
with default value 1.0. with default value 1.0.
Returns: Returns:
Variable: A tensor with rank be 2. The output smooth l1 loss with Variable: A tensor with rank be 2. The output smooth L1 loss with
shape [batch_size, 1]. shape [batch_size, 1].
Examples: Examples:
.. code-block:: python .. code-block:: python
data = fluid.layers.data(name='data', shape=[128], dtype='float32') data = fluid.layers.data(name='data', shape=[128], dtype='float32')
label = fluid.layers.data(name='label', shape=[100], dtype='int64') label = fluid.layers.data(name='label', shape=[100], dtype='float32')
fc = fluid.layers.fc(input=data, size=100) fc = fluid.layers.fc(input=data, size=100)
out = fluid.layers.smooth_l1(x=fc, y=label) out = fluid.layers.smooth_l1(x=fc, y=label)
""" """
...@@ -3769,13 +3773,13 @@ def label_smooth(label, ...@@ -3769,13 +3773,13 @@ def label_smooth(label,
def roi_pool(input, rois, pooled_height=1, pooled_width=1, spatial_scale=1.0): def roi_pool(input, rois, pooled_height=1, pooled_width=1, spatial_scale=1.0):
""" """
Region of interest pooling (also known as RoI pooling) is to perform Region of interest pooling (also known as RoI pooling) is to perform
is to perform max pooling on inputs of nonuniform sizes to obtain is to perform max pooling on inputs of nonuniform sizes to obtain
fixed-size feature maps (e.g. 7*7). fixed-size feature maps (e.g. 7*7).
The operator has three steps: The operator has three steps:
1. Dividing each region proposal into equal-sized sections with 1. Dividing each region proposal into equal-sized sections with
the pooled_width and pooled_height the pooled_width and pooled_height
2. Finding the largest value in each section 2. Finding the largest value in each section
3. Copying these max values to the output buffer 3. Copying these max values to the output buffer
Args: Args:
...@@ -3783,8 +3787,8 @@ def roi_pool(input, rois, pooled_height=1, pooled_width=1, spatial_scale=1.0): ...@@ -3783,8 +3787,8 @@ def roi_pool(input, rois, pooled_height=1, pooled_width=1, spatial_scale=1.0):
rois (Variable): ROIs (Regions of Interest) to pool over. It should rois (Variable): ROIs (Regions of Interest) to pool over. It should
be a 2-D one level LoTensor of shape [num_rois, 4]. be a 2-D one level LoTensor of shape [num_rois, 4].
The layout is [x1, y1, x2, y2], where (x1, y1) The layout is [x1, y1, x2, y2], where (x1, y1)
is the top left coordinates, and (x2, y2) is the is the top left coordinates, and (x2, y2) is the
bottom right coordinates. The num_rois is the bottom right coordinates. The num_rois is the
total number of ROIs in this batch data. total number of ROIs in this batch data.
pooled_height (integer): The pooled output height. Default: 1 pooled_height (integer): The pooled output height. Default: 1
pooled_width (integer): The pooled output width. Default: 1 pooled_width (integer): The pooled output width. Default: 1
...@@ -3793,11 +3797,11 @@ def roi_pool(input, rois, pooled_height=1, pooled_width=1, spatial_scale=1.0): ...@@ -3793,11 +3797,11 @@ def roi_pool(input, rois, pooled_height=1, pooled_width=1, spatial_scale=1.0):
to the scale used when pooling. Default: 1.0 to the scale used when pooling. Default: 1.0
Returns: Returns:
pool_out (Variable): The output is a 4-D tensor of the shape pool_out (Variable): The output is a 4-D tensor of the shape
(num_rois, channels, pooled_h, pooled_w). (num_rois, channels, pooled_h, pooled_w).
Examples: Examples:
pool_out = fluid.layers.roi_pool(input=x, rois=rois, 7, 7, 1.0) pool_out = fluid.layers.roi_pool(input=x, rois=rois, 7, 7, 1.0)
""" """
helper = LayerHelper('roi_pool', **locals()) helper = LayerHelper('roi_pool', **locals())
dtype = helper.input_dtype() dtype = helper.input_dtype()
......
...@@ -62,7 +62,10 @@ def train(use_cuda, train_program, save_dirname): ...@@ -62,7 +62,10 @@ def train(use_cuda, train_program, save_dirname):
optimizer = fluid.optimizer.Adam(learning_rate=0.001) optimizer = fluid.optimizer.Adam(learning_rate=0.001)
trainer = fluid.Trainer( trainer = fluid.Trainer(
train_func=train_program, place=place, optimizer=optimizer) train_func=train_program,
place=place,
optimizer=optimizer,
parallel=True)
def event_handler(event): def event_handler(event):
if isinstance(event, fluid.EndEpochEvent): if isinstance(event, fluid.EndEpochEvent):
...@@ -87,6 +90,9 @@ def train(use_cuda, train_program, save_dirname): ...@@ -87,6 +90,9 @@ def train(use_cuda, train_program, save_dirname):
event.epoch + 1, float(avg_cost), float(acc))) event.epoch + 1, float(avg_cost), float(acc)))
if math.isnan(float(avg_cost)): if math.isnan(float(avg_cost)):
sys.exit("got NaN loss, training failed.") sys.exit("got NaN loss, training failed.")
elif isinstance(event, fluid.EndStepEvent):
print("Step {0}, Epoch {1} Metrics {2}".format(
event.step, event.epoch, map(numpy.array, event.metrics)))
train_reader = paddle.batch( train_reader = paddle.batch(
paddle.reader.shuffle( paddle.reader.shuffle(
...@@ -131,4 +137,4 @@ def main(use_cuda): ...@@ -131,4 +137,4 @@ def main(use_cuda):
if __name__ == '__main__': if __name__ == '__main__':
# for use_cuda in (False, True): # for use_cuda in (False, True):
main(use_cuda=False) main(use_cuda=True)
...@@ -182,12 +182,6 @@ def train(use_cuda, save_dirname=None, is_local=True): ...@@ -182,12 +182,6 @@ def train(use_cuda, save_dirname=None, is_local=True):
crf_decode = fluid.layers.crf_decoding( crf_decode = fluid.layers.crf_decoding(
input=feature_out, param_attr=fluid.ParamAttr(name='crfw')) input=feature_out, param_attr=fluid.ParamAttr(name='crfw'))
chunk_evaluator = fluid.evaluator.ChunkEvaluator(
input=crf_decode,
label=target,
chunk_scheme="IOB",
num_chunk_types=int(math.ceil((label_dict_len - 1) / 2.0)))
train_data = paddle.batch( train_data = paddle.batch(
paddle.reader.shuffle( paddle.reader.shuffle(
paddle.dataset.conll05.test(), buf_size=8192), paddle.dataset.conll05.test(), buf_size=8192),
...@@ -203,7 +197,6 @@ def train(use_cuda, save_dirname=None, is_local=True): ...@@ -203,7 +197,6 @@ def train(use_cuda, save_dirname=None, is_local=True):
def train_loop(main_program): def train_loop(main_program):
exe.run(fluid.default_startup_program()) exe.run(fluid.default_startup_program())
embedding_param = fluid.global_scope().find_var( embedding_param = fluid.global_scope().find_var(
embedding_name).get_tensor() embedding_name).get_tensor()
embedding_param.set( embedding_param.set(
...@@ -213,27 +206,19 @@ def train(use_cuda, save_dirname=None, is_local=True): ...@@ -213,27 +206,19 @@ def train(use_cuda, save_dirname=None, is_local=True):
start_time = time.time() start_time = time.time()
batch_id = 0 batch_id = 0
for pass_id in xrange(PASS_NUM): for pass_id in xrange(PASS_NUM):
chunk_evaluator.reset(exe)
for data in train_data(): for data in train_data():
cost, precision, recall, f1_score = exe.run( cost = exe.run(main_program,
main_program, feed=feeder.feed(data),
feed=feeder.feed(data), fetch_list=[avg_cost])
fetch_list=[avg_cost] + chunk_evaluator.metrics) cost = cost[0]
pass_precision, pass_recall, pass_f1_score = chunk_evaluator.eval(
exe)
if batch_id % 10 == 0: if batch_id % 10 == 0:
print("avg_cost:" + str(cost) + " precision:" + str( print("avg_cost:" + str(cost))
precision) + " recall:" + str(recall) + " f1_score:" +
str(f1_score) + " pass_precision:" + str(
pass_precision) + " pass_recall:" + str(
pass_recall) + " pass_f1_score:" + str(
pass_f1_score))
if batch_id != 0: if batch_id != 0:
print("second per batch: " + str((time.time( print("second per batch: " + str((time.time(
) - start_time) / batch_id)) ) - start_time) / batch_id))
# Set the threshold low to speed up the CI test # Set the threshold low to speed up the CI test
if float(pass_precision) > 0.01: if float(cost) < 60.0:
if save_dirname is not None: if save_dirname is not None:
# TODO(liuyiqun): Change the target to crf_decode # TODO(liuyiqun): Change the target to crf_decode
fluid.io.save_inference_model(save_dirname, [ fluid.io.save_inference_model(save_dirname, [
......
...@@ -13,15 +13,62 @@ ...@@ -13,15 +13,62 @@
# limitations under the License. # limitations under the License.
import paddle.fluid as fluid import paddle.fluid as fluid
import unittest
def test_converter(): class TestDataFeeder(unittest.TestCase):
img = fluid.layers.data(name='image', shape=[1, 28, 28]) def test_lod_level_0_converter(self):
label = fluid.layers.data(name='label', shape=[1], dtype='int64') img = fluid.layers.data(name='image', shape=[1, 28, 28])
feeder = fluid.DataFeeder([img, label], fluid.CPUPlace()) label = fluid.layers.data(name='label', shape=[1], dtype='int64')
result = feeder.feed([[[0] * 784, [9]], [[1] * 784, [1]]]) feeder = fluid.DataFeeder([img, label], fluid.CPUPlace())
print(result) result = feeder.feed([([0] * 784, [9]), ([1] * 784, [1])])
print(result)
self.assertEqual(result['image'].shape(), [2, 1, 28, 28])
self.assertEqual(result['label'].shape(), [2, 1])
self.assertEqual(result['image'].lod(), [])
self.assertEqual(result['label'].lod(), [])
def test_lod_level_1_converter(self):
# lod_level = 1
# each sentence has a different number of words
sentences = fluid.layers.data(
name='sentences', shape=[1], dtype='int64', lod_level=1)
label = fluid.layers.data(name='label', shape=[1], dtype='int64')
feeder = fluid.DataFeeder([sentences, label], fluid.CPUPlace())
# lod = [[0, 3, 5, 9]]
# data = [[1, 2, 3], [4, 5], [6, 7, 8, 9]]
# label = [1] * len(data)
result = feeder.feed(
[([1, 2, 3], [1]), ([4, 5], [1]), ([6, 7, 8, 9], [1])])
print(result)
self.assertEqual(result['sentences'].shape(), [9, 1])
self.assertEqual(result['label'].shape(), [3, 1])
self.assertEqual(result['sentences'].lod(), [[0, 3, 5, 9]])
self.assertEqual(result['label'].lod(), [])
def test_lod_level_2_converter(self):
# lod_level = 2
# paragraphs -> sentences -> words
paragraphs = fluid.layers.data(
name='paragraphs', shape=[1], dtype='int64', lod_level=2)
label = fluid.layers.data(name='label', shape=[1], dtype='int64')
feeder = fluid.DataFeeder([paragraphs, label], fluid.CPUPlace())
# lod = [[0, 2, 3], [0, 3, 5, 9]]
# data = [[[1, 2, 3], [4, 5]], [[6, 7, 8, 9]]]
# label = [1] * len(data)
result = feeder.feed(
[([[1, 2, 3], [4, 5]], [1]), ([[6, 7, 8, 9]], [1])])
print(result)
self.assertEqual(result['paragraphs'].shape(), [9, 1])
self.assertEqual(result['label'].shape(), [2, 1])
self.assertEqual(result['paragraphs'].lod(), [[0, 2, 3], [0, 3, 5, 9]])
self.assertEqual(result['label'].lod(), [])
if __name__ == '__main__': if __name__ == '__main__':
test_converter() unittest.main()
...@@ -28,11 +28,11 @@ function(py_test_modules TARGET_NAME) ...@@ -28,11 +28,11 @@ function(py_test_modules TARGET_NAME)
if(WITH_TESTING) if(WITH_TESTING)
set(options "") set(options "")
set(oneValueArgs "") set(oneValueArgs "")
set(multiValueArgs MODULES DEPS ARGS ENVS) set(multiValueArgs MODULES DEPS ENVS)
cmake_parse_arguments(py_test_modules "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN}) cmake_parse_arguments(py_test_modules "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN})
add_test(NAME ${TARGET_NAME} add_test(NAME ${TARGET_NAME}
COMMAND env PYTHONPATH=${PADDLE_BINARY_DIR}/python ${py_test_modules_ENVS} COMMAND env PYTHONPATH=${PADDLE_BINARY_DIR}/python ${py_test_modules_ENVS}
${PYTHON_EXECUTABLE} -u -m unittest --verbose ${py_test_modules_MODULES} ${py_test_modules_ARGS} ${PYTHON_EXECUTABLE} ${PADDLE_SOURCE_DIR}/tools/test_runner.py ${py_test_modules_MODULES}
WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}) WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR})
endif() endif()
endfunction() endfunction()
......
...@@ -778,7 +778,7 @@ class TestCRFModel(unittest.TestCase): ...@@ -778,7 +778,7 @@ class TestCRFModel(unittest.TestCase):
build_strategy = fluid.BuildStrategy() build_strategy = fluid.BuildStrategy()
build_strategy.reduce_strategy = fluid.BuildStrategy.ReduceStrategy.Reduce build_strategy.reduce_strategy = fluid.BuildStrategy.ReduceStrategy.Reduce
self.check_network_convergence( self.check_network_convergence(
is_sparse=False, build_strategy=build_strategy) is_sparse=True, build_strategy=build_strategy)
def test_update_dense_parameter_reduce(self): def test_update_dense_parameter_reduce(self):
build_strategy = fluid.BuildStrategy() build_strategy = fluid.BuildStrategy()
...@@ -852,8 +852,7 @@ class TestFetchOp(unittest.TestCase): ...@@ -852,8 +852,7 @@ class TestFetchOp(unittest.TestCase):
assert not math.isnan(np.sum(ret[i])) and \ assert not math.isnan(np.sum(ret[i])) and \
not math.isinf(np.sum(ret[i])) not math.isinf(np.sum(ret[i]))
@unittest.skip("this test is buggy") def test_fetch_op(self):
def test_feed(self):
tst_reader = paddle.batch(flowers.test(use_xmap=False), batch_size=16) tst_reader = paddle.batch(flowers.test(use_xmap=False), batch_size=16)
tst_reader_iter = tst_reader() tst_reader_iter = tst_reader()
......
...@@ -20,6 +20,7 @@ import data_feeder ...@@ -20,6 +20,7 @@ import data_feeder
import contextlib import contextlib
import io import io
import unique_name import unique_name
import parallel_executor
# optimizer is same as the parameter of Trainer.__init__. Rename it to opt_module # optimizer is same as the parameter of Trainer.__init__. Rename it to opt_module
import optimizer as opt_module import optimizer as opt_module
...@@ -48,12 +49,14 @@ class BeginStepEvent(object): ...@@ -48,12 +49,14 @@ class BeginStepEvent(object):
def __init__(self, epoch_id, step_id): def __init__(self, epoch_id, step_id):
self.epoch = epoch_id self.epoch = epoch_id
self.step = step_id self.step = step_id
self.fetch_metrics = True
class EndStepEvent(object): class EndStepEvent(object):
def __init__(self, epoch_id, step_id): def __init__(self, epoch_id, step_id, metrics):
self.epoch = epoch_id self.epoch = epoch_id
self.step = step_id self.step = step_id
self.metrics = metrics
def check_and_get_place(place): def check_and_get_place(place):
...@@ -87,12 +90,17 @@ class Trainer(object): ...@@ -87,12 +90,17 @@ class Trainer(object):
Args: Args:
train_func(callable): A function which will return loss. The loss must be a scalar. train_func(callable): A function which will return loss. The loss must be a scalar.
infer_func(callable): A function which will return predict, used to save inference model
optimizer(optimizer.Optimizer): The optimizer should be an instance of Optimizer optimizer(optimizer.Optimizer): The optimizer should be an instance of Optimizer
place: The device place of this trainer. place: The device place of this trainer.
""" """
def __init__(self, train_func, optimizer, param_path=None, place=None): def __init__(self,
train_func,
optimizer,
param_path=None,
place=None,
parallel=False):
self.parallel = parallel
# 1. we need to generate a framework.Program by calling # 1. we need to generate a framework.Program by calling
# program_func. Reference: fluid.program_guard in # program_func. Reference: fluid.program_guard in
# test_word2vec.py # test_word2vec.py
...@@ -106,14 +114,14 @@ class Trainer(object): ...@@ -106,14 +114,14 @@ class Trainer(object):
with framework.program_guard(self.train_program, self.startup_program): with framework.program_guard(self.train_program, self.startup_program):
program_func_outs = train_func() program_func_outs = train_func()
self.test_outputs = program_func_outs if isinstance( self.train_func_outputs = program_func_outs if isinstance(
program_func_outs, list) else [program_func_outs] program_func_outs, list) else [program_func_outs]
self.test_program = self.train_program.clone() self.test_program = self.train_program.clone()
if not isinstance(optimizer, opt_module.Optimizer): if not isinstance(optimizer, opt_module.Optimizer):
raise TypeError( raise TypeError(
"The optimizer should be an instance of Optimizer") "The optimizer should be an instance of Optimizer")
# The fisrt element of program_func_outs is loss. # The fisrt element of program_func_outs is loss.
loss = self.test_outputs[0] loss = self.train_func_outputs[0]
optimize_ops, params_grads = optimizer.minimize(loss) optimize_ops, params_grads = optimizer.minimize(loss)
self.place = check_and_get_place(place) self.place = check_and_get_place(place)
...@@ -131,7 +139,40 @@ class Trainer(object): ...@@ -131,7 +139,40 @@ class Trainer(object):
# load params from param_path into scope # load params from param_path into scope
io.load_persistables(exe, dirname=param_path) io.load_persistables(exe, dirname=param_path)
def _transpile_nccl2_dist(self):
# PADDLE_TRAINER_IPS
if "PADDLE_TRAINER_IPS" not in os.environ:
self.nccl_id_var = None
else:
self.trainer_id = int(os.getenv("PADDLE_TRAINER_ID"))
port = os.getenv("PADDLE_PSERVER_PORT")
worker_ips = os.getenv("PADDLE_TRAINER_IPS")
worker_endpoints = []
for ip in worker_ips.split(","):
worker_endpoints.append(':'.join([ip, port]))
self.num_trainers = len(worker_endpoints)
current_endpoint = os.getenv("POD_IP") + ":" + port
worker_endpoints.remove(current_endpoint)
# TODO(wuyi): use self.nccl_id_var, self.num_trainers and self.trainer_id
# in ParallelExecutor to start
# distributed training using NCCL2
self.nccl_id_var = self.startup_program.global_block().create_var(
name="NCCLID", persistable=True, type=core.VarDesc.VarType.RAW)
self.startup_program.global_block().append_op(
type="gen_nccl_id",
inputs={},
outputs={"NCCLID": self.nccl_id_var},
attrs={
"endpoint": current_endpoint,
"endpoint_list": worker_endpoints,
"trainer_id": self.trainer_id
})
def _dist_transpile_if_necessary(self, optimize_ops, params_grads): def _dist_transpile_if_necessary(self, optimize_ops, params_grads):
self._transpile_nccl2_dist()
if self.nccl_id_var != None:
return
if "PADDLE_TRAINING_ROLE" not in os.environ: if "PADDLE_TRAINING_ROLE" not in os.environ:
return return
...@@ -169,12 +210,7 @@ class Trainer(object): ...@@ -169,12 +210,7 @@ class Trainer(object):
'TRAINING_ROLE environment variable must be either TRAINER or PSERVER' 'TRAINING_ROLE environment variable must be either TRAINER or PSERVER'
) )
def train(self, def train(self, num_epochs, event_handler, reader=None, feed_order=None):
num_epochs,
event_handler,
reader,
feed_order,
parallel=False):
""" """
Train the model. Train the model.
...@@ -182,25 +218,24 @@ class Trainer(object): ...@@ -182,25 +218,24 @@ class Trainer(object):
num_epochs: The number of epoch. An epoch will process all data in reader num_epochs: The number of epoch. An epoch will process all data in reader
event_handler: The event handler. A function with type (ev:Event)->void event_handler: The event handler. A function with type (ev:Event)->void
reader: reader:
parallel: True if use multi-CPUs or multi-GPUs
feed_order: Feeding order of reader. None will following the defining feed_order: Feeding order of reader. None will following the defining
order in program order in program
Returns: Returns:
""" """
if parallel:
raise NotImplementedError(
"Parallel Executor version of trainer is not implemented")
training_role = os.getenv("PADDLE_TRAINING_ROLE", "") training_role = os.getenv("PADDLE_TRAINING_ROLE", "")
if training_role == "PSERVER": if training_role == "PSERVER":
with self._prog_and_scope_guard(): with self._prog_and_scope_guard():
exe = executor.Executor(self.place) exe = executor.Executor(self.place)
exe.run() exe.run()
return return
if self.parallel:
self._train_by_executor(num_epochs, event_handler, reader, feed_order) self._train_by_parallel_executor(num_epochs, event_handler, reader,
feed_order)
else:
self._train_by_executor(num_epochs, event_handler, reader,
feed_order)
def test(self, reader, feed_order): def test(self, reader, feed_order):
""" """
...@@ -212,7 +247,8 @@ class Trainer(object): ...@@ -212,7 +247,8 @@ class Trainer(object):
order in program order in program
""" """
return self._test_by_executor(reader, feed_order, self.test_outputs) return self._test_by_executor(reader, feed_order,
self.train_func_outputs)
def save_params(self, param_path): def save_params(self, param_path):
# reference: save_persistables in io.py # reference: save_persistables in io.py
...@@ -246,13 +282,25 @@ class Trainer(object): ...@@ -246,13 +282,25 @@ class Trainer(object):
feeder = data_feeder.DataFeeder( feeder = data_feeder.DataFeeder(
feed_list=feed_var_list, place=self.place) feed_list=feed_var_list, place=self.place)
exe = executor.Executor(self.place) exe = executor.Executor(self.place)
for epoch_id in range(num_epochs): reader = feeder.decorate_reader(reader, multi_devices=False)
event_handler(BeginEpochEvent(epoch_id)) self._train_by_any_executor(event_handler, exe, num_epochs, reader)
for step_id, data in enumerate(reader()):
event_handler(BeginStepEvent(epoch_id, step_id)) def _train_by_any_executor(self, event_handler, exe, num_epochs, reader):
exe.run(feed=feeder.feed(data), fetch_list=[]) for epoch_id in range(num_epochs):
event_handler(EndStepEvent(epoch_id, step_id)) event_handler(BeginEpochEvent(epoch_id))
event_handler(EndEpochEvent(epoch_id)) for step_id, data in enumerate(reader()):
begin_event = BeginStepEvent(epoch_id, step_id)
event_handler(begin_event)
if begin_event.fetch_metrics:
metrics = exe.run(feed=data,
fetch_list=[
var.name
for var in self.train_func_outputs
])
else:
metrics = exe.run(feed=data, fetch_list=[])
event_handler(EndStepEvent(epoch_id, step_id, metrics))
event_handler(EndEpochEvent(epoch_id))
def _test_by_executor(self, reader, feed_order, fetch_list): def _test_by_executor(self, reader, feed_order, fetch_list):
with executor.scope_guard(self.scope): with executor.scope_guard(self.scope):
...@@ -271,6 +319,28 @@ class Trainer(object): ...@@ -271,6 +319,28 @@ class Trainer(object):
return [x / count for x in accumulated] return [x / count for x in accumulated]
def _train_by_parallel_executor(self, num_epochs, event_handler, reader,
feed_order):
with self._prog_and_scope_guard():
pe = self._get_or_create_parallel_executor()
feed_var_list = build_feed_var_list(self.train_program, feed_order)
feeder = data_feeder.DataFeeder(
feed_list=feed_var_list, place=self.place)
reader = feeder.decorate_reader(reader, multi_devices=True)
for epoch_id in range(num_epochs):
self._train_by_any_executor(event_handler, pe, num_epochs,
reader)
def _get_parallel_executor(self):
return getattr(self, 'parallel_executor', None)
def _get_or_create_parallel_executor(self):
if self._get_parallel_executor() is None:
self.parallel_executor = parallel_executor.ParallelExecutor(
use_cuda=isinstance(self.place, core.CUDAPlace),
loss_name=self.train_func_outputs[0].name)
return self._get_parallel_executor()
def build_feed_var_list(program, feed_order): def build_feed_var_list(program, feed_order):
if not isinstance(program, framework.Program): if not isinstance(program, framework.Program):
......
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import unittest
import os
import sys
import paddle.fluid as fluid
import importlib
import cStringIO
def main():
sys.path.append(os.getcwd())
some_test_failed = False
for module_name in sys.argv[1:]:
buffer = cStringIO.StringIO()
main = fluid.Program()
startup = fluid.Program()
scope = fluid.core.Scope()
with fluid.program_guard(main, startup):
with fluid.scope_guard(scope):
with fluid.unique_name.guard():
test_loader = unittest.TestLoader()
module = importlib.import_module(module_name)
tests = test_loader.loadTestsFromModule(module)
res = unittest.TextTestRunner(stream=buffer).run(tests)
if not res.wasSuccessful():
some_test_failed = True
print >> sys.stderr, module_name, 'failed\n', buffer.getvalue(
)
if some_test_failed:
exit(1)
if __name__ == '__main__':
main()
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册