提交 64a4925c 编写于 作者: T tangwei12

Merge branch 'Pdv' into samplingIdOp

...@@ -27,15 +27,6 @@ script: ...@@ -27,15 +27,6 @@ script:
# 43min timeout # 43min timeout
paddle/scripts/paddle_docker_build.sh ${JOB} paddle/scripts/paddle_docker_build.sh ${JOB}
if [ $? -eq 0 ] || [ $? -eq 142 ]; then true; else exit 1; fi; if [ $? -eq 0 ] || [ $? -eq 142 ]; then true; else exit 1; fi;
- |
if [[ "$JOB" != "doc" ]]; then exit 0; fi;
# For document only
if [[ "$TRAVIS_PULL_REQUEST" != "false" ]]; then exit 0; fi;
if [[ "$TRAVIS_BRANCH" != "develop" && ! "$TRAVIS_BRANCH" =~ ^v|release/[[:digit:]]+\.[[:digit:]]+(\.[[:digit:]]+)?(-\S*)?$ ]]; then exit 0; fi;
export DEPLOY_DOCS_SH=https://raw.githubusercontent.com/PaddlePaddle/PaddlePaddle.org/master/scripts/deploy/deploy_docs.sh
export DOCS_DIR=`pwd`
cd ..
curl $DEPLOY_DOCS_SH | bash -s $CONTENT_DEC_PASSWD $TRAVIS_BRANCH $DOCS_DIR $DOCS_DIR/build/doc/
notifications: notifications:
email: email:
on_success: change on_success: change
......
...@@ -65,6 +65,7 @@ option(REPLACE_ENFORCE_GLOG "Replace PADDLE_ENFORCE with glog/CHECK for better d ...@@ -65,6 +65,7 @@ option(REPLACE_ENFORCE_GLOG "Replace PADDLE_ENFORCE with glog/CHECK for better d
option(WITH_ANAKIN "Compile with Anakin library" OFF) option(WITH_ANAKIN "Compile with Anakin library" OFF)
option(WITH_GRPC "Use grpc as the default rpc framework" ${WITH_DISTRIBUTE}) option(WITH_GRPC "Use grpc as the default rpc framework" ${WITH_DISTRIBUTE})
option(WITH_BRPC_RDMA "Use brpc rdma as the rpc protocal" OFF) option(WITH_BRPC_RDMA "Use brpc rdma as the rpc protocal" OFF)
option(WITH_INFERENCE "Compile fluid inference library" ON)
option(WITH_SYSTEM_BLAS "Use system blas library" OFF) option(WITH_SYSTEM_BLAS "Use system blas library" OFF)
option(PY_VERSION "Compile PaddlePaddle with python3 support" ${PY_VERSION}) option(PY_VERSION "Compile PaddlePaddle with python3 support" ${PY_VERSION})
...@@ -72,6 +73,7 @@ option(PY_VERSION "Compile PaddlePaddle with python3 support" ${PY_VER ...@@ -72,6 +73,7 @@ option(PY_VERSION "Compile PaddlePaddle with python3 support" ${PY_VER
if(NOT PY_VERSION) if(NOT PY_VERSION)
set(PY_VERSION 2.7) set(PY_VERSION 2.7)
endif() endif()
set(PYBIND11_PYTHON_VERSION ${PY_VERSION})
# CMAKE_BUILD_TYPE # CMAKE_BUILD_TYPE
if(NOT CMAKE_BUILD_TYPE) if(NOT CMAKE_BUILD_TYPE)
...@@ -174,6 +176,7 @@ include(external/any) # download libn::any ...@@ -174,6 +176,7 @@ include(external/any) # download libn::any
include(external/eigen) # download eigen3 include(external/eigen) # download eigen3
include(external/pybind11) # download pybind11 include(external/pybind11) # download pybind11
include(external/cares) include(external/cares)
include(external/cub)
if(WITH_DISTRIBUTE) if(WITH_DISTRIBUTE)
if(WITH_GRPC) if(WITH_GRPC)
......
if(NOT WITH_GPU)
return()
endif()
include(ExternalProject)
set(CUB_SOURCE_DIR ${THIRD_PARTY_PATH}/cub)
set(CUB_INCLUDE_DIR ${CUB_SOURCE_DIR}/src/extern_cub)
include_directories(${CUB_INCLUDE_DIR})
ExternalProject_Add(
extern_cub
${EXTERNAL_PROJECT_LOG_ARGS}
GIT_REPOSITORY "https://github.com/NVlabs/cub.git"
GIT_TAG "v1.8.0"
PREFIX ${CUB_SOURCE_DIR}
UPDATE_COMMAND ""
CONFIGURE_COMMAND ""
BUILD_COMMAND ""
INSTALL_COMMAND ""
TEST_COMMAND ""
)
if(${CMAKE_VERSION} VERSION_LESS "3.3.0")
set(dummyfile ${CMAKE_CURRENT_BINARY_DIR}/cub_dummy.c)
file(WRITE ${dummyfile} "const char *dummy = \"${dummyfile}\";")
add_library(cub STATIC ${dummyfile})
else()
add_library(cub INTERFACE)
endif()
add_dependencies(cub extern_cub)
LIST(APPEND externl_project_dependencies cub)
...@@ -264,7 +264,10 @@ function(cc_test TARGET_NAME) ...@@ -264,7 +264,10 @@ function(cc_test TARGET_NAME)
WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}) WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR})
if (${cc_test_SERIAL}) if (${cc_test_SERIAL})
set_property(TEST ${TARGET_NAME} PROPERTY RUN_SERIAL 1) set_property(TEST ${TARGET_NAME} PROPERTY RUN_SERIAL 1)
set_property(TEST ${TARGET_NAME} PROPERTY ENVIRONMENT FLAGS_cpu_deterministic=true)
set_property(TEST ${TARGET_NAME} PROPERTY ENVIRONMENT FLAGS_init_allocated_mem=true) set_property(TEST ${TARGET_NAME} PROPERTY ENVIRONMENT FLAGS_init_allocated_mem=true)
set_property(TEST ${TARGET_NAME} PROPERTY ENVIRONMENT FLAGS_cudnn_deterministic=true)
endif() endif()
endif() endif()
endfunction(cc_test) endfunction(cc_test)
...@@ -329,7 +332,10 @@ function(nv_test TARGET_NAME) ...@@ -329,7 +332,10 @@ function(nv_test TARGET_NAME)
add_test(${TARGET_NAME} ${TARGET_NAME}) add_test(${TARGET_NAME} ${TARGET_NAME})
if (nv_test_SERIAL) if (nv_test_SERIAL)
set_property(TEST ${TARGET_NAME} PROPERTY RUN_SERIAL 1) set_property(TEST ${TARGET_NAME} PROPERTY RUN_SERIAL 1)
set_property(TEST ${TARGET_NAME} PROPERTY ENVIRONMENT FLAGS_cpu_deterministic=true)
set_property(TEST ${TARGET_NAME} PROPERTY ENVIRONMENT FLAGS_init_allocated_mem=true) set_property(TEST ${TARGET_NAME} PROPERTY ENVIRONMENT FLAGS_init_allocated_mem=true)
set_property(TEST ${TARGET_NAME} PROPERTY ENVIRONMENT FLAGS_cudnn_deterministic=true)
endif() endif()
endif() endif()
endfunction(nv_test) endfunction(nv_test)
...@@ -577,7 +583,9 @@ function(py_test TARGET_NAME) ...@@ -577,7 +583,9 @@ function(py_test TARGET_NAME)
set(multiValueArgs SRCS DEPS ARGS ENVS) set(multiValueArgs SRCS DEPS ARGS ENVS)
cmake_parse_arguments(py_test "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN}) cmake_parse_arguments(py_test "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN})
add_test(NAME ${TARGET_NAME} add_test(NAME ${TARGET_NAME}
COMMAND env FLAGS_init_allocated_mem=true PYTHONPATH=${PADDLE_BINARY_DIR}/python ${py_test_ENVS} COMMAND env FLAGS_init_allocated_mem=true FLAGS_cudnn_deterministic=true
FLAGS_cpu_deterministic=true
PYTHONPATH=${PADDLE_BINARY_DIR}/python ${py_test_ENVS}
${PYTHON_EXECUTABLE} -u ${py_test_SRCS} ${py_test_ARGS} ${PYTHON_EXECUTABLE} -u ${py_test_SRCS} ${py_test_ARGS}
WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}) WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR})
endif() endif()
......
# Operator fusion
Fusing multiple operators together is an important method to optimize the program execution, particularly for GPU or other specialized accelerators. An obvious benefit is to avoid the overhead of saving the intermediate result back into global memory.
There are generally two ways to fuse operators, fusing directly connected operators and fusing non directly connected operators. The first method is mainly used by [NNVM Compiler](https://github.com/dmlc/tvm/) and [XLA](https://www.tensorflow.org/performance/xla/). The second method is mainly used by Dynet and TensorFlow Fold to do auto-batching. The principle of fusing operator is according to some rules to combine multiple operations into one, for example, `Y = X * W` and `Z = Y + B` can be fused to `Z = X * W + B`, and `Y1 = X1 * W` and `Y2 = X2 * W` can be fused to `[Y1;Y2] = [X1;X2] * W`. In order to get a short-term profit, we decided to try to manually specify these rules.
## Challenge
The challenge of fusing operators is:
- how to make the rules.
- how to implement these rules efficiently.
### How to make the rules?
The problem of determining the best single location for a fusion operator is an NP-hard combinatorial problem. After analysis the operators of the DL model, we found there are two group of operators can be fused explicitly, one is the simple and adjacent operations, for example, `tmp = x + y` and `z = Relu(tmp)`, and the other is the operators that have the same function, for example, a serials of `SGD` or `Momentum`. They usually appear in the model in a large number. So we should think about how to fuse them separately first.
### How to implement these rules efficiently?
#### How to fuse the adjacent operations efficiently?
Here we use a template function to represent the fused operations. The pros of using a template function are that it is simple and efficient, and the cons are that it is not easy to expand, and it can only be used to express some simple operations. So taking into account our current needs, the template function is more appropriate.
#### How to fuse the operators that have the same function efficiently?
We take SGD operator as an example, the training model may have hundreds of parameters and correspondingly have the same number of SGD operators. The expression(`w = w - lr*w_g`) of those operators is the same, so during of training, the executor will execute this expression hundreds time in CPU or other specialized accelerators. If we can fuse them and make the address of all `w` and all `w_g` continuous respectively, we only need execute one time. For some accelerators, the time of launching kernel is not neglected, so the time of hundreds of times of launching and executing kernel may be larger than launching and executing only once. There usually are many operators that similar to `SGD` in the DL model, such as `AllReduce` and `FC`.
...@@ -336,6 +336,7 @@ paddle.fluid.contrib.BeamSearchDecoder.decode ArgSpec(args=['self'], varargs=Non ...@@ -336,6 +336,7 @@ paddle.fluid.contrib.BeamSearchDecoder.decode ArgSpec(args=['self'], varargs=Non
paddle.fluid.contrib.BeamSearchDecoder.early_stop ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None) paddle.fluid.contrib.BeamSearchDecoder.early_stop ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None)
paddle.fluid.contrib.BeamSearchDecoder.read_array ArgSpec(args=['self', 'init', 'is_ids', 'is_scores'], varargs=None, keywords=None, defaults=(False, False)) paddle.fluid.contrib.BeamSearchDecoder.read_array ArgSpec(args=['self', 'init', 'is_ids', 'is_scores'], varargs=None, keywords=None, defaults=(False, False))
paddle.fluid.contrib.BeamSearchDecoder.update_array ArgSpec(args=['self', 'array', 'value'], varargs=None, keywords=None, defaults=None) paddle.fluid.contrib.BeamSearchDecoder.update_array ArgSpec(args=['self', 'array', 'value'], varargs=None, keywords=None, defaults=None)
paddle.fluid.contrib.memory_usage ArgSpec(args=['program', 'batch_size'], varargs=None, keywords=None, defaults=None)
paddle.fluid.transpiler.DistributeTranspiler.__init__ ArgSpec(args=['self', 'config'], varargs=None, keywords=None, defaults=(None,)) paddle.fluid.transpiler.DistributeTranspiler.__init__ ArgSpec(args=['self', 'config'], varargs=None, keywords=None, defaults=(None,))
paddle.fluid.transpiler.DistributeTranspiler.create_splited_vars ArgSpec(args=['self', 'source_var', 'block', 'tag'], varargs=None, keywords=None, defaults=None) paddle.fluid.transpiler.DistributeTranspiler.create_splited_vars ArgSpec(args=['self', 'source_var', 'block', 'tag'], varargs=None, keywords=None, defaults=None)
paddle.fluid.transpiler.DistributeTranspiler.get_pserver_program ArgSpec(args=['self', 'endpoint'], varargs=None, keywords=None, defaults=None) paddle.fluid.transpiler.DistributeTranspiler.get_pserver_program ArgSpec(args=['self', 'endpoint'], varargs=None, keywords=None, defaults=None)
......
...@@ -5,5 +5,7 @@ add_subdirectory(operators) ...@@ -5,5 +5,7 @@ add_subdirectory(operators)
add_subdirectory(pybind) add_subdirectory(pybind)
add_subdirectory(string) add_subdirectory(string)
add_subdirectory(recordio) add_subdirectory(recordio)
# NOTE: please add subdirectory inference at last. if(WITH_INFERENCE)
add_subdirectory(inference) # NOTE: please add subdirectory inference at last.
add_subdirectory(inference)
endif()
...@@ -21,6 +21,26 @@ namespace framework { ...@@ -21,6 +21,26 @@ namespace framework {
namespace details { namespace details {
struct BuildStrategy { struct BuildStrategy {
// ParallelExecutor supports two modes of ReduceStrategy, kAllReduce and
// kReduce, for CPU and GPU. If you use kAllReduce, different threads
// optimize their parameters separately. If you use kReduce, the optimizations
// of parameters are distributed to different threads.
// For example, a model has 100 parameters and is running with four threads,
// if you choose kAllReduce, every thread is to optimize 100 parameters
// separately, if you choose kReduce, every thread is to optimize 25
// parameters.
// Of particular note is, if you use kReduce when using CPU training,
// all the parameters are shared between different threads. This feature will
// save memory.
// FIXME(zcd): The result of the two modes(kAllReduce and kReduce) maybe not
// equal for GPU. Because, the result of the different order of summing maybe
// different, for example, the result of `a+b+c+d` may be different with the
// result of `c+a+b+d`.
// For GPU, the implementation of kAllReduce and kReduce is adopted NCCL,
// so the result of kAllReduce and kReduce maybe not equal.
// For CPU, if you want to fix the order of summing to make the result
// of kAllReduce and kReduce no diff, you can add
// `FLAGS_cpu_deterministic=true` to env.
enum class ReduceStrategy { kAllReduce = 0, kReduce = 1 }; enum class ReduceStrategy { kAllReduce = 0, kReduce = 1 };
enum class GradientScaleStrategy { enum class GradientScaleStrategy {
......
...@@ -275,7 +275,8 @@ std::unique_ptr<ir::Graph> MultiDevSSAGraphBuilder::ApplyImpl( ...@@ -275,7 +275,8 @@ std::unique_ptr<ir::Graph> MultiDevSSAGraphBuilder::ApplyImpl(
if (strategy_.gradient_scale_ != if (strategy_.gradient_scale_ !=
BuildStrategy::GradientScaleStrategy::kCustomized) { BuildStrategy::GradientScaleStrategy::kCustomized) {
// TODO(paddle-dev): Why is there no input for this op_handle? // TODO(paddle-dev): Why is there no input for this op_handle?
CreateScaleLossGradOp(&result); auto loss_grad_name = node->Op()->OutputArgumentNames()[0];
CreateScaleLossGradOp(&result, loss_grad_name);
} }
// This assumes the backward generating code will ensure IsScaleLossOp // This assumes the backward generating code will ensure IsScaleLossOp
// is true only for the op that scale the final scalar loss. // is true only for the op that scale the final scalar loss.
...@@ -535,7 +536,8 @@ int MultiDevSSAGraphBuilder::GetVarDeviceID(const ir::Graph &graph, ...@@ -535,7 +536,8 @@ int MultiDevSSAGraphBuilder::GetVarDeviceID(const ir::Graph &graph,
return got == sharded_var_device.end() ? -1 : got->second; return got == sharded_var_device.end() ? -1 : got->second;
} }
void MultiDevSSAGraphBuilder::CreateScaleLossGradOp(ir::Graph *result) const { void MultiDevSSAGraphBuilder::CreateScaleLossGradOp(
ir::Graph *result, const std::string &loss_grad_name) const {
for (size_t i = 0; i < places_.size(); ++i) { for (size_t i = 0; i < places_.size(); ++i) {
// Insert ScaleCost OpHandle // Insert ScaleCost OpHandle
#ifdef PADDLE_WITH_CUDA #ifdef PADDLE_WITH_CUDA
...@@ -558,10 +560,10 @@ void MultiDevSSAGraphBuilder::CreateScaleLossGradOp(ir::Graph *result) const { ...@@ -558,10 +560,10 @@ void MultiDevSSAGraphBuilder::CreateScaleLossGradOp(ir::Graph *result) const {
// loss->pending_ops_.emplace_back(op_handle); // loss->pending_ops_.emplace_back(op_handle);
// op_handle->inputs_.emplace_back(loss); // op_handle->inputs_.emplace_back(loss);
CreateOpOutput(result, op_handle, CreateOpOutput(
result->CreateEmptyNode(GradVarName(loss_var_name_), result, op_handle,
ir::Node::Type::kVariable), result->CreateEmptyNode(loss_grad_name, ir::Node::Type::kVariable),
places_[i], i); places_[i], i);
} }
} }
......
...@@ -75,7 +75,9 @@ class MultiDevSSAGraphBuilder : public SSAGraphBuilder { ...@@ -75,7 +75,9 @@ class MultiDevSSAGraphBuilder : public SSAGraphBuilder {
void CreateComputationalOps(ir::Graph *result, ir::Node *node, void CreateComputationalOps(ir::Graph *result, ir::Node *node,
size_t num_places) const; size_t num_places) const;
void CreateScaleLossGradOp(ir::Graph *result) const; void CreateScaleLossGradOp(ir::Graph *result,
const std::string &loss_grad_name) const;
VarHandle *CreateReduceOp(ir::Graph *result, const std::string &og, VarHandle *CreateReduceOp(ir::Graph *result, const std::string &og,
int dst_dev_id) const; int dst_dev_id) const;
void CreateComputationalOp(ir::Graph *result, ir::Node *node, void CreateComputationalOp(ir::Graph *result, ir::Node *node,
......
...@@ -18,6 +18,10 @@ ...@@ -18,6 +18,10 @@
#include "paddle/fluid/framework/details/variable_visitor.h" #include "paddle/fluid/framework/details/variable_visitor.h"
#include "paddle/fluid/platform/profiler.h" #include "paddle/fluid/platform/profiler.h"
DEFINE_bool(
cpu_deterministic, false,
"Whether to make the result of computation deterministic in CPU side.");
namespace paddle { namespace paddle {
namespace framework { namespace framework {
namespace details { namespace details {
...@@ -91,11 +95,33 @@ void ReduceOpHandle::RunImpl() { ...@@ -91,11 +95,33 @@ void ReduceOpHandle::RunImpl() {
} else { } else {
std::vector<const LoDTensor *> lod_tensors = std::vector<const LoDTensor *> lod_tensors =
GetInputValues<LoDTensor>(in_var_handles, var_scopes); GetInputValues<LoDTensor>(in_var_handles, var_scopes);
if (paddle::platform::is_cpu_place(lod_tensors[0]->place())) { if (paddle::platform::is_cpu_place(lod_tensors[0]->place())) {
this->RunAndRecordEvent([&] { this->RunAndRecordEvent([&] {
ReduceLoDTensor func(lod_tensors, // FIXME(zcd): The order of summing is important,
out_var->GetMutable<framework::LoDTensor>()); // especially when the type of data is float or double.
VisitDataType(ToDataType(lod_tensors[0]->type()), func); // For example, the result of `a+b+c+d` may be different
// with the result of `c+a+b+d`, so the summing order should be fixed.
if (!FLAGS_cpu_deterministic) {
ReduceLoDTensor func(lod_tensors,
out_var->GetMutable<framework::LoDTensor>());
VisitDataType(ToDataType(lod_tensors[0]->type()), func);
} else {
// We sum lod_tensors to reduce_sum_trg which is in local_scopes_0
// here, but it doesn't mean reduce_sum_trg must be in local_scopes_0.
auto &reduce_sum_trg = *this->local_scopes_[0]
->FindVar(kLocalExecScopeName)
->Get<Scope *>()
->FindVar(out_var_handle->name_)
->GetMutable<framework::LoDTensor>();
ReduceLoDTensor func(lod_tensors, &reduce_sum_trg);
VisitDataType(ToDataType(lod_tensors[0]->type()), func);
auto trg = out_var->GetMutable<framework::LoDTensor>();
if (reduce_sum_trg.data<void>() != trg->data<void>()) {
TensorCopy(reduce_sum_trg, platform::CPUPlace(), trg);
}
}
}); });
} else if (paddle::platform::is_gpu_place(lod_tensors[0]->place())) { } else if (paddle::platform::is_gpu_place(lod_tensors[0]->place())) {
#ifdef PADDLE_WITH_CUDA #ifdef PADDLE_WITH_CUDA
......
...@@ -330,12 +330,7 @@ void Executor::RunPreparedContext(ExecutorPrepareContext* ctx, Scope* scope, ...@@ -330,12 +330,7 @@ void Executor::RunPreparedContext(ExecutorPrepareContext* ctx, Scope* scope,
} }
for (auto& op : ctx->ops_) { for (auto& op : ctx->ops_) {
VLOG(4) << place_ << " " << op->DebugStringEx(local_scope);
op->Run(*local_scope, place_); op->Run(*local_scope, place_);
// NOTE! Please do not delete this line, it's usefull because the debug
// string before and after op.run are different, after run the output
// will have right shape which is usefull for debug.
VLOG(3) << place_ << " " << op->DebugStringEx(local_scope);
if (FLAGS_benchmark) { if (FLAGS_benchmark) {
VLOG(2) << "Memory used after operator " + op->Type() + " running: " VLOG(2) << "Memory used after operator " + op->Type() + " running: "
......
...@@ -127,7 +127,7 @@ static LoD GetLoD(const Scope& scope, const std::string& name) { ...@@ -127,7 +127,7 @@ static LoD GetLoD(const Scope& scope, const std::string& name) {
} }
void OperatorBase::Run(const Scope& scope, const platform::Place& place) { void OperatorBase::Run(const Scope& scope, const platform::Place& place) {
VLOG(10) << "- " << DebugStringEx(&scope); VLOG(4) << place << " " << DebugStringEx(&scope);
if (platform::is_gpu_place(place)) { if (platform::is_gpu_place(place)) {
#ifndef PADDLE_WITH_CUDA #ifndef PADDLE_WITH_CUDA
PADDLE_THROW("Cannot run operator on place %s", place); PADDLE_THROW("Cannot run operator on place %s", place);
...@@ -139,7 +139,7 @@ void OperatorBase::Run(const Scope& scope, const platform::Place& place) { ...@@ -139,7 +139,7 @@ void OperatorBase::Run(const Scope& scope, const platform::Place& place) {
platform::DeviceContextPool& pool = platform::DeviceContextPool::Instance(); platform::DeviceContextPool& pool = platform::DeviceContextPool::Instance();
platform::RecordEvent record_event(Type(), pool.Get(place)); platform::RecordEvent record_event(Type(), pool.Get(place));
RunImpl(scope, place); RunImpl(scope, place);
VLOG(10) << "+ " << DebugStringEx(&scope); VLOG(3) << place << " " << DebugStringEx(&scope);
} }
bool OperatorBase::HasInputs(const std::string& name) const { bool OperatorBase::HasInputs(const std::string& name) const {
...@@ -778,6 +778,7 @@ proto::VarType::Type OperatorWithKernel::IndicateDataType( ...@@ -778,6 +778,7 @@ proto::VarType::Type OperatorWithKernel::IndicateDataType(
const ExecutionContext& ctx) const { const ExecutionContext& ctx) const {
auto& scope = ctx.scope(); auto& scope = ctx.scope();
int data_type = -1; int data_type = -1;
std::string last_input_name;
for (auto& input : this->inputs_) { for (auto& input : this->inputs_) {
for (auto& ipt_name : input.second) { for (auto& ipt_name : input.second) {
auto* var = scope.FindVar(ipt_name); auto* var = scope.FindVar(ipt_name);
...@@ -794,9 +795,10 @@ proto::VarType::Type OperatorWithKernel::IndicateDataType( ...@@ -794,9 +795,10 @@ proto::VarType::Type OperatorWithKernel::IndicateDataType(
int tmp = static_cast<int>(ToDataType(t->type())); int tmp = static_cast<int>(ToDataType(t->type()));
PADDLE_ENFORCE( PADDLE_ENFORCE(
tmp == data_type || data_type == -1, tmp == data_type || data_type == -1,
"DataType of Paddle Op %s must be the same. Get %d != %d", Type(), "DataType of Paddle Op %s must be the same. Get %s(%d) != %s(%d)",
data_type, tmp); Type(), last_input_name, data_type, ipt_name, tmp);
data_type = tmp; data_type = tmp;
last_input_name = ipt_name;
} }
} }
} }
......
...@@ -24,7 +24,7 @@ ...@@ -24,7 +24,7 @@
namespace paddle { namespace paddle {
DEFINE_bool(inference_analysis_enable_tensorrt_subgraph_engine, false, DEFINE_bool(inference_analysis_enable_tensorrt_subgraph_engine, true,
"Enable subgraph to TensorRT engine for acceleration"); "Enable subgraph to TensorRT engine for acceleration");
DEFINE_string(inference_analysis_graphviz_log_root, "./", DEFINE_string(inference_analysis_graphviz_log_root, "./",
...@@ -42,10 +42,19 @@ class DfgPassManagerImpl final : public DfgPassManager { ...@@ -42,10 +42,19 @@ class DfgPassManagerImpl final : public DfgPassManager {
// TODO(Superjomn) set the key with pass reprs. // TODO(Superjomn) set the key with pass reprs.
AddPass("fluid-to-data-flow-graph", new FluidToDataFlowGraphPass); AddPass("fluid-to-data-flow-graph", new FluidToDataFlowGraphPass);
if (FLAGS_inference_analysis_enable_tensorrt_subgraph_engine) { if (FLAGS_inference_analysis_enable_tensorrt_subgraph_engine) {
auto trt_teller = [](const Node* node) { auto trt_teller = [&](const Node* node) {
std::unordered_set<std::string> teller_set(
{"elementwise_add", "mul", "conv2d", "pool2d", "relu"});
if (!node->IsFunction()) return false; if (!node->IsFunction()) return false;
return static_cast<const Function*>(node)->func_type() == "mul";
const auto* func = static_cast<const Function*>(node);
if (teller_set.count(func->func_type()))
return true;
else {
return false;
}
}; };
AddPass("tensorrt-subgraph-marker", AddPass("tensorrt-subgraph-marker",
new TensorRTSubgraphNodeMarkPass(trt_teller)); new TensorRTSubgraphNodeMarkPass(trt_teller));
AddPass("tensorrt-subgraph", new TensorRTSubGraphPass(trt_teller)); AddPass("tensorrt-subgraph", new TensorRTSubGraphPass(trt_teller));
......
...@@ -337,6 +337,34 @@ ExtractInputAndOutputOfSubGraph(std::vector<Node *> &graph) { // NOLINT ...@@ -337,6 +337,34 @@ ExtractInputAndOutputOfSubGraph(std::vector<Node *> &graph) { // NOLINT
std::vector<Node *>(outputs.begin(), outputs.end())); std::vector<Node *>(outputs.begin(), outputs.end()));
} }
void FilterRedundantOutputOfSubGraph(DataFlowGraph *graph) {
std::vector<Node *> op_nodes;
for (auto &node : GraphTraits<DataFlowGraph>(graph).nodes_in_TS()) {
if (node.type() == Node::Type::kValue || node.deleted()) {
continue;
}
op_nodes.push_back(&node);
}
size_t op_num = op_nodes.size();
for (size_t i = 0; i < op_num; i++) {
if (op_nodes[i]->type() == Node::Type::kFunction) continue;
std::unordered_set<std::string> follow_up_input_names;
for (size_t j = i + 1; j < op_num; j++) {
for (auto *in : op_nodes[j]->inlinks) {
follow_up_input_names.insert(in->name());
}
}
std::vector<Node *> filtered_subgraph_outlinks;
for (auto *out : op_nodes[i]->outlinks) {
if (follow_up_input_names.count(out->name())) {
filtered_subgraph_outlinks.push_back(out);
}
}
PADDLE_ENFORCE_GE(filtered_subgraph_outlinks.size(), 1UL);
op_nodes[i]->outlinks = filtered_subgraph_outlinks;
}
}
} // namespace analysis } // namespace analysis
} // namespace inference } // namespace inference
} // namespace paddle } // namespace paddle
...@@ -178,6 +178,7 @@ struct GraphTraits<DataFlowGraph> { ...@@ -178,6 +178,7 @@ struct GraphTraits<DataFlowGraph> {
std::pair<std::vector<Node *>, std::vector<Node *>> std::pair<std::vector<Node *>, std::vector<Node *>>
ExtractInputAndOutputOfSubGraph(std::vector<Node *> &graph); // NOLINT ExtractInputAndOutputOfSubGraph(std::vector<Node *> &graph); // NOLINT
void FilterRedundantOutputOfSubGraph(DataFlowGraph *graph);
} // namespace analysis } // namespace analysis
} // namespace inference } // namespace inference
} // namespace paddle } // namespace paddle
...@@ -23,7 +23,7 @@ ...@@ -23,7 +23,7 @@
namespace paddle { namespace paddle {
namespace inference { namespace inference {
DEFINE_int32(tensorrt_max_batchsize, 300, "TensorRT maximum batch size"); DEFINE_int32(tensorrt_max_batchsize, 3, "TensorRT maximum batch size");
DEFINE_int32(tensorrt_workspace_size, 2048, "TensorRT workspace size"); DEFINE_int32(tensorrt_workspace_size, 2048, "TensorRT workspace size");
namespace analysis { namespace analysis {
...@@ -52,6 +52,7 @@ bool DataFlowGraphToFluidPass::Initialize(Argument *argument) { ...@@ -52,6 +52,7 @@ bool DataFlowGraphToFluidPass::Initialize(Argument *argument) {
bool DataFlowGraphToFluidPass::Finalize() { return true; } bool DataFlowGraphToFluidPass::Finalize() { return true; }
void DataFlowGraphToFluidPass::Run(DataFlowGraph *graph) { void DataFlowGraphToFluidPass::Run(DataFlowGraph *graph) {
FilterRedundantOutputOfSubGraph(graph);
LOG(INFO) << "graph.inputs " << graph->inputs.size(); LOG(INFO) << "graph.inputs " << graph->inputs.size();
for (auto &node : GraphTraits<DataFlowGraph>(graph).nodes_in_TS()) { for (auto &node : GraphTraits<DataFlowGraph>(graph).nodes_in_TS()) {
if (node.deleted()) continue; if (node.deleted()) continue;
...@@ -87,34 +88,113 @@ void DataFlowGraphToFluidPass::AddFluidOp(Node *node) { ...@@ -87,34 +88,113 @@ void DataFlowGraphToFluidPass::AddFluidOp(Node *node) {
} }
void CreateTrtEngineOp(Node *node, const DataFlowGraph &graph, void CreateTrtEngineOp(Node *node, const DataFlowGraph &graph,
const framework::proto::BlockDesc &block) { framework::proto::BlockDesc *block) {
static int counter{0}; static int counter{0};
PADDLE_ENFORCE(node->IsFunctionBlock()); PADDLE_ENFORCE(node->IsFunctionBlock());
framework::OpDesc desc; framework::OpDesc desc;
auto *func = static_cast<FunctionBlock *>(node); auto *func = static_cast<FunctionBlock *>(node);
// collect inputs // collect inputs
std::vector<std::string> io; std::unordered_set<std::string> input_names;
for (auto *x : func->inlinks) { for (auto *x : func->inlinks) {
io.push_back(x->name()); input_names.insert(x->name());
} }
desc.SetInput("Xs", io); desc.SetInput(
"Xs", std::vector<std::string>(input_names.begin(), input_names.end()));
// collect outputs std::unordered_set<std::string> output_names;
io.clear();
for (auto *x : func->outlinks) { for (auto *x : func->outlinks) {
io.push_back(x->name()); output_names.insert(x->name());
} }
desc.SetOutput("Ys", io);
std::vector<std::string> output_temp(output_names.begin(),
output_names.end());
desc.SetOutput("Ys", output_temp);
desc.SetType("tensorrt_engine"); desc.SetType("tensorrt_engine");
PADDLE_ENFORCE(!block.vars().empty(), "the block has no var-desc"); std::unordered_map<std::string, std::string> output_name_map;
// The following procedure is used to rename all the intermediate
// variables and the output variables of the subgraph.
// Why we do this?
// During the transition from fluid OP to tensorrt OP, we map
// the input and output Tensor(fluid data structure) of fluid OP
// to the correspondin ITensor (trt data structure) through the
// Tensor name. When we set up ITensor for an variable, we must
// ensure that it has not been set before.
// If there is variable in the fluid graph, which is not only the
// input of a OP, but also the output of a Op, there will be problems.
// So we have to rename the variable in the subgraph to make sure
// it is either an OP's input or an OP's output.
auto subgraph_nodes = func->subgraph;
for (int index = 0; index < block->ops_size(); index++) {
framework::proto::OpDesc *op = block->mutable_ops(index);
auto correspond_node = subgraph_nodes[index];
PADDLE_ENFORCE_EQ(correspond_node->name(), op->type());
std::unordered_map<std::string, size_t> var2id;
for (auto *in_var : correspond_node->inlinks) {
var2id[in_var->name()] = in_var->id();
}
// rename for the input variables of op inside subgraph
for (int i = 0; i < op->inputs_size(); i++) {
framework::proto::OpDesc_Var *in_var = op->mutable_inputs(i);
std::vector<std::string> replaced_names;
for (int k = 0; k < in_var->arguments_size(); k++) {
std::string arg_value = in_var->arguments(k);
if (input_names.count(arg_value)) {
replaced_names.push_back(arg_value);
} else {
replaced_names.push_back(arg_value +
std::to_string(var2id[arg_value]));
}
}
in_var->clear_arguments();
for (size_t k = 0; k < replaced_names.size(); k++) {
in_var->add_arguments(replaced_names[k]);
}
}
var2id.clear();
for (auto out_var : correspond_node->outlinks) {
var2id[out_var->name()] = out_var->id();
}
// rename for the output variables of op inside subgraph
for (int i = 0; i < op->outputs_size(); i++) {
framework::proto::OpDesc_Var *out_var = op->mutable_outputs(i);
std::vector<std::string> replaced_names;
for (int k = 0; k < out_var->arguments_size(); k++) {
std::string arg_value = out_var->arguments(k);
if (output_names.count(arg_value)) {
output_name_map[arg_value] =
arg_value + std::to_string(var2id[arg_value]);
}
replaced_names.push_back(arg_value + std::to_string(var2id[arg_value]));
}
out_var->clear_arguments();
for (size_t k = 0; k < replaced_names.size(); k++) {
out_var->add_arguments(replaced_names[k]);
}
}
}
// When tensorrt engine runs at the end of the operation,
// output_mapping help us copy the data from the renamed ITensor
// to Tensor.
std::vector<std::string> output_mapping;
for (auto name : output_names) {
PADDLE_ENFORCE(output_name_map.count(name) != 0);
output_mapping.push_back(output_name_map[name]);
}
PADDLE_ENFORCE(!block->vars().empty(), "the block has no var-desc");
// Set attrs // Set attrs
SetAttr(desc.Proto(), "subgraph", block.SerializeAsString()); SetAttr(desc.Proto(), "subgraph", block->SerializeAsString());
SetAttr(desc.Proto(), "engine_uniq_key", "trt-" + std::to_string(counter++)); SetAttr(desc.Proto(), "engine_uniq_key", "trt-" + std::to_string(counter++));
SetAttr(desc.Proto(), "max_batch", FLAGS_tensorrt_max_batchsize); SetAttr(desc.Proto(), "max_batch", FLAGS_tensorrt_max_batchsize);
SetAttr(desc.Proto(), "max_workspace", FLAGS_tensorrt_workspace_size); SetAttr(desc.Proto(), "max_workspace", FLAGS_tensorrt_workspace_size);
SetAttr(desc.Proto(), "parameters", ExtractParameters(graph.nodes.nodes())); SetAttr(desc.Proto(), "parameters", ExtractParameters(graph.nodes.nodes()));
SetAttr(desc.Proto(), "output_name_mapping", output_mapping);
node->SetPbMsg(desc.Proto()->SerializeAsString()); node->SetPbMsg(desc.Proto()->SerializeAsString());
} }
...@@ -146,15 +226,17 @@ void DataFlowGraphToFluidPass::AddEngineOp(Node *node) { ...@@ -146,15 +226,17 @@ void DataFlowGraphToFluidPass::AddEngineOp(Node *node) {
LOG(INFO) << "transformed variable size: " LOG(INFO) << "transformed variable size: "
<< block_desc.Proto()->vars().size(); << block_desc.Proto()->vars().size();
// copy ops. // copy ops.
for (auto *node : block_node->subgraph) { for (auto *node : block_node->subgraph) {
auto *op = block_desc.AppendOp(); auto *op = block_desc.AppendOp();
PADDLE_ENFORCE(!node->pb_msg().empty()); PADDLE_ENFORCE(!node->pb_msg().empty());
op->Proto()->ParseFromString(node->pb_msg()); op->Proto()->ParseFromString(node->pb_msg());
} }
*block_desc.Proto()->mutable_vars() = *block_desc.Proto()->mutable_vars() =
argument_->origin_program_desc->blocks(0).vars(); argument_->origin_program_desc->blocks(0).vars();
PADDLE_ENFORCE(!block_desc.Proto()->vars().empty()); PADDLE_ENFORCE(!block_desc.Proto()->vars().empty());
CreateTrtEngineOp(node, *argument_->main_dfg, *block_desc.Proto()); CreateTrtEngineOp(node, *argument_->main_dfg, block_desc.Proto());
auto *main_block = desc_->mutable_blocks(framework::kRootBlockIndex); auto *main_block = desc_->mutable_blocks(framework::kRootBlockIndex);
auto *op = main_block->add_ops(); auto *op = main_block->add_ops();
PADDLE_ENFORCE(!node->pb_msg().empty(), "failed to set desc for block"); PADDLE_ENFORCE(!node->pb_msg().empty(), "failed to set desc for block");
......
...@@ -46,9 +46,9 @@ std::string DFG_GraphvizDrawPass::Draw(DataFlowGraph *graph) { ...@@ -46,9 +46,9 @@ std::string DFG_GraphvizDrawPass::Draw(DataFlowGraph *graph) {
for (size_t i = 0; i < graph->nodes.size(); i++) { for (size_t i = 0; i < graph->nodes.size(); i++) {
const Node &node = graph->nodes.Get(i); const Node &node = graph->nodes.Get(i);
if (!config_.display_deleted_node && node.deleted()) continue; if (!config_.display_deleted_node && node.deleted()) continue;
for (auto &in : node.inlinks) { for (auto &out : node.outlinks) {
if (!config_.display_deleted_node && in->deleted()) continue; if (!config_.display_deleted_node && out->deleted()) continue;
dot.AddEdge(in->repr(), node.repr(), {}); dot.AddEdge(node.repr(), out->repr(), {});
} }
} }
return dot.Build(); return dot.Build();
......
...@@ -76,7 +76,7 @@ void UnionFindCombine(const node_map_t &node_map, size_t a, size_t b) { ...@@ -76,7 +76,7 @@ void UnionFindCombine(const node_map_t &node_map, size_t a, size_t b) {
std::vector<std::vector<Node *>> SubGraphSplitter::ExtractSubGraphs() { std::vector<std::vector<Node *>> SubGraphSplitter::ExtractSubGraphs() {
std::vector<Node *> marked_nodes; std::vector<Node *> marked_nodes;
for (auto &node : GraphTraits<DataFlowGraph>(graph_).nodes()) { for (auto &node : GraphTraits<DataFlowGraph>(graph_).nodes_in_TS()) {
if (node.attr(kMarkerAttrName).Bool()) { if (node.attr(kMarkerAttrName).Bool()) {
marked_nodes.push_back(&node); marked_nodes.push_back(&node);
} }
......
...@@ -12,6 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ...@@ -12,6 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#include <glog/logging.h>
#include "paddle/fluid/inference/api/paddle_inference_api.h" #include "paddle/fluid/inference/api/paddle_inference_api.h"
namespace paddle { namespace paddle {
...@@ -40,19 +41,36 @@ PaddleBuf::PaddleBuf(PaddleBuf&& other) ...@@ -40,19 +41,36 @@ PaddleBuf::PaddleBuf(PaddleBuf&& other)
PaddleBuf::PaddleBuf(const PaddleBuf& other) { *this = other; } PaddleBuf::PaddleBuf(const PaddleBuf& other) { *this = other; }
PaddleBuf& PaddleBuf::operator=(const PaddleBuf& other) { PaddleBuf& PaddleBuf::operator=(const PaddleBuf& other) {
if (!other.memory_owned_) {
data_ = other.data_;
length_ = other.length_;
memory_owned_ = other.memory_owned_;
} else {
Resize(other.length());
memcpy(data_, other.data(), other.length());
length_ = other.length();
memory_owned_ = true;
}
return *this;
}
PaddleBuf& PaddleBuf::operator=(PaddleBuf&& other) {
// only the buffer with external memory can be copied // only the buffer with external memory can be copied
assert(!other.memory_owned_);
data_ = other.data_; data_ = other.data_;
length_ = other.length_; length_ = other.length_;
memory_owned_ = other.memory_owned_; memory_owned_ = other.memory_owned_;
other.data_ = nullptr;
other.length_ = 0;
other.memory_owned_ = false;
return *this; return *this;
} }
void PaddleBuf::Resize(size_t length) { void PaddleBuf::Resize(size_t length) {
// Only the owned memory can be reset, the external memory can't be changed. // Only the owned memory can be reset, the external memory can't be changed.
if (length_ == length) return; if (length_ == length) return;
assert(memory_owned_); if (memory_owned_) {
Free(); Free();
}
data_ = new char[length]; data_ = new char[length];
length_ = length; length_ = length;
memory_owned_ = true; memory_owned_ = true;
...@@ -68,7 +86,7 @@ void PaddleBuf::Reset(void* data, size_t length) { ...@@ -68,7 +86,7 @@ void PaddleBuf::Reset(void* data, size_t length) {
void PaddleBuf::Free() { void PaddleBuf::Free() {
if (memory_owned_ && data_) { if (memory_owned_ && data_) {
assert(length_ > 0); assert(length_ > 0);
delete static_cast<char*>(data_); delete[] static_cast<char*>(data_);
data_ = nullptr; data_ = nullptr;
length_ = 0; length_ = 0;
} }
......
...@@ -40,11 +40,12 @@ class PaddleBuf { ...@@ -40,11 +40,12 @@ class PaddleBuf {
// Copy only available when memory is managed externally. // Copy only available when memory is managed externally.
explicit PaddleBuf(const PaddleBuf&); explicit PaddleBuf(const PaddleBuf&);
PaddleBuf& operator=(const PaddleBuf&); PaddleBuf& operator=(const PaddleBuf&);
PaddleBuf& operator=(PaddleBuf&&);
// Do not own the memory. // Do not own the memory.
PaddleBuf(void* data, size_t length) PaddleBuf(void* data, size_t length)
: data_(data), length_(length), memory_owned_{false} {} : data_(data), length_(length), memory_owned_{false} {}
// Own memory. // Own memory.
explicit PaddleBuf(size_t length) PaddleBuf(size_t length)
: data_(new char[length]), length_(length), memory_owned_(true) {} : data_(new char[length]), length_(length), memory_owned_(true) {}
// Resize to `length` bytes. // Resize to `length` bytes.
void Resize(size_t length); void Resize(size_t length);
......
# Add TRT tests # Add TRT tests
nv_library(tensorrt_converter nv_library(tensorrt_converter
SRCS mul_op.cc conv2d_op.cc fc_op.cc pool2d_op.cc elementwise_op.cc SRCS mul_op.cc conv2d_op.cc fc_op.cc pool2d_op.cc elementwise_op.cc
activation_op.cc
DEPS tensorrt_engine operator scope framework_proto op_registry) DEPS tensorrt_engine operator scope framework_proto op_registry)
nv_test(test_op_converter SRCS test_op_converter.cc DEPS nv_test(test_op_converter SRCS test_op_converter.cc DEPS
......
...@@ -55,7 +55,6 @@ class OpConverter { ...@@ -55,7 +55,6 @@ class OpConverter {
it = Registry<OpConverter>::Lookup("fc"); it = Registry<OpConverter>::Lookup("fc");
} }
} }
if (op_desc.Type().find("elementwise") != std::string::npos) { if (op_desc.Type().find("elementwise") != std::string::npos) {
static std::unordered_set<std::string> add_tensor_op_set{ static std::unordered_set<std::string> add_tensor_op_set{
"add", "mul", "sub", "div", "max", "min", "pow"}; "add", "mul", "sub", "div", "max", "min", "pow"};
...@@ -72,6 +71,8 @@ class OpConverter { ...@@ -72,6 +71,8 @@ class OpConverter {
"Unsupported elementwise type" + op_type); "Unsupported elementwise type" + op_type);
it = it =
Registry<OpConverter>::Lookup("elementwise_" + op_type + "_weight"); Registry<OpConverter>::Lookup("elementwise_" + op_type + "_weight");
PADDLE_ENFORCE_NOT_NULL(it, "no OpConverter for optype [%s]",
op_desc.Type());
} else { } else {
PADDLE_ENFORCE(add_tensor_op_set.count(op_type) > 0, PADDLE_ENFORCE(add_tensor_op_set.count(op_type) > 0,
"Unsupported elementwise type" + op_type); "Unsupported elementwise type" + op_type);
......
...@@ -280,12 +280,16 @@ class ConvMKLDNNOpKernel : public paddle::framework::OpKernel<T> { ...@@ -280,12 +280,16 @@ class ConvMKLDNNOpKernel : public paddle::framework::OpKernel<T> {
* ('any') which lets a primitive (convolution in this case) choose * ('any') which lets a primitive (convolution in this case) choose
* the memory format preferred for best performance * the memory format preferred for best performance
*/ */
std::string data_format = ctx.Attr<std::string>("data_format");
auto chosen_memory_format =
platform::data_format_to_memory_format(data_format);
auto src_md = platform::MKLDNNMemDesc( auto src_md = platform::MKLDNNMemDesc(
src_tz, platform::MKLDNNGetDataType<T>(), memory::format::any); src_tz, platform::MKLDNNGetDataType<T>(), chosen_memory_format);
auto weights_md = platform::MKLDNNMemDesc( auto weights_md = platform::MKLDNNMemDesc(
weights_tz, platform::MKLDNNGetDataType<T>(), memory::format::any); weights_tz, platform::MKLDNNGetDataType<T>(), chosen_memory_format);
auto dst_md = platform::MKLDNNMemDesc( auto dst_md = platform::MKLDNNMemDesc(
dst_tz, platform::MKLDNNGetDataType<T>(), memory::format::any); dst_tz, platform::MKLDNNGetDataType<T>(), chosen_memory_format);
// create a conv primitive descriptor and save it for usage in backward // create a conv primitive descriptor and save it for usage in backward
std::shared_ptr<mkldnn::convolution_forward::primitive_desc> conv_pd = std::shared_ptr<mkldnn::convolution_forward::primitive_desc> conv_pd =
...@@ -423,16 +427,20 @@ class ConvMKLDNNGradOpKernel : public paddle::framework::OpKernel<T> { ...@@ -423,16 +427,20 @@ class ConvMKLDNNGradOpKernel : public paddle::framework::OpKernel<T> {
* ('any') which lets a primitive (conv backward in this case) choose * ('any') which lets a primitive (conv backward in this case) choose
* the memory format preferred for best performance * the memory format preferred for best performance
*/ */
std::string data_format = ctx.Attr<std::string>("data_format");
auto chosen_memory_format =
platform::data_format_to_memory_format(data_format);
auto src_md = platform::MKLDNNMemDesc( auto src_md = platform::MKLDNNMemDesc(
src_tz, platform::MKLDNNGetDataType<T>(), memory::format::any); src_tz, platform::MKLDNNGetDataType<T>(), chosen_memory_format);
auto diff_src_md = platform::MKLDNNMemDesc( auto diff_src_md = platform::MKLDNNMemDesc(
src_tz, platform::MKLDNNGetDataType<T>(), memory::format::any); src_tz, platform::MKLDNNGetDataType<T>(), chosen_memory_format);
auto weights_md = platform::MKLDNNMemDesc( auto weights_md = platform::MKLDNNMemDesc(
weights_tz, platform::MKLDNNGetDataType<T>(), memory::format::any); weights_tz, platform::MKLDNNGetDataType<T>(), chosen_memory_format);
auto diff_weights_md = platform::MKLDNNMemDesc( auto diff_weights_md = platform::MKLDNNMemDesc(
weights_tz, platform::MKLDNNGetDataType<T>(), memory::format::any); weights_tz, platform::MKLDNNGetDataType<T>(), chosen_memory_format);
auto diff_dst_md = platform::MKLDNNMemDesc( auto diff_dst_md = platform::MKLDNNMemDesc(
dst_tz, platform::MKLDNNGetDataType<T>(), memory::format::any); dst_tz, platform::MKLDNNGetDataType<T>(), chosen_memory_format);
// Retrieve conv_pd from device context // Retrieve conv_pd from device context
auto conv_pd = auto conv_pd =
......
...@@ -534,8 +534,8 @@ void ElemwiseGradCompute(const framework::ExecutionContext& ctx, ...@@ -534,8 +534,8 @@ void ElemwiseGradCompute(const framework::ExecutionContext& ctx,
const framework::Tensor& dout, int axis, const framework::Tensor& dout, int axis,
framework::Tensor* dx, framework::Tensor* dy, framework::Tensor* dx, framework::Tensor* dy,
DX_OP dx_op, DY_OP dy_op) { DX_OP dx_op, DY_OP dy_op) {
const framework::DDim x_dim = x.dims(); const framework::DDim& x_dim = x.dims();
const framework::DDim y_dim = y.dims(); const framework::DDim& y_dim = y.dims();
if (x.dims() == y.dims()) { if (x.dims() == y.dims()) {
ElemwiseGradComputeNoBroadcast<DeviceContext, T, DX_OP, DY_OP>( ElemwiseGradComputeNoBroadcast<DeviceContext, T, DX_OP, DY_OP>(
ctx, x_dim, y_dim, x, y, out, dout, axis, dx, dy, dx_op, dy_op); ctx, x_dim, y_dim, x, y, out, dout, axis, dx, dy, dx_op, dy_op);
...@@ -558,19 +558,19 @@ void ElemwiseExplicitGradCompute(const framework::ExecutionContext& ctx, ...@@ -558,19 +558,19 @@ void ElemwiseExplicitGradCompute(const framework::ExecutionContext& ctx,
framework::Tensor* dx, framework::Tensor* dy, framework::Tensor* dx, framework::Tensor* dy,
DX_OP dx_op, DY_OP dy_op) { DX_OP dx_op, DY_OP dy_op) {
if (dy == nullptr) { if (dy == nullptr) {
const framework::DDim dx_dims = dout.dims(); const framework::DDim& dx_dims = dout.dims();
auto dy_dims = dx_dims; auto dy_dims = dx_dims;
ElemwiseGradComputeNoBroadcast<DeviceContext, T, DX_OP, DY_OP>( ElemwiseGradComputeNoBroadcast<DeviceContext, T, DX_OP, DY_OP>(
ctx, dx_dims, dy_dims, x, y, out, dout, axis, dx, dy, dx_op, dy_op); ctx, dx_dims, dy_dims, x, y, out, dout, axis, dx, dy, dx_op, dy_op);
} else { } else {
if (dout.dims() == dy->dims()) { if (dout.dims() == dy->dims()) {
const framework::DDim dx_dims = dout.dims(); const framework::DDim& dx_dims = dout.dims();
const framework::DDim dy_dims = dy->dims(); const framework::DDim& dy_dims = dy->dims();
ElemwiseGradComputeNoBroadcast<DeviceContext, T, DX_OP, DY_OP>( ElemwiseGradComputeNoBroadcast<DeviceContext, T, DX_OP, DY_OP>(
ctx, dx_dims, dy_dims, x, y, out, dout, axis, dx, dy, dx_op, dy_op); ctx, dx_dims, dy_dims, x, y, out, dout, axis, dx, dy, dx_op, dy_op);
} else { // Y is a scalar } else { // Y is a scalar
auto dx_dims = dout.dims(); auto dx_dims = dout.dims();
const framework::DDim dy_dims = dy->dims(); const framework::DDim& dy_dims = dy->dims();
ElemwiseGradComputeWithBroadcast<DeviceContext, T, DX_OP, DY_OP>( ElemwiseGradComputeWithBroadcast<DeviceContext, T, DX_OP, DY_OP>(
ctx, dx_dims, dy_dims, x, y, out, dout, axis, dx, dy, dx_op, dy_op); ctx, dx_dims, dy_dims, x, y, out, dout, axis, dx, dy, dx_op, dy_op);
} }
......
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include <string>
#include <vector>
#include "paddle/fluid/operators/fused_elemwise_activation_op.h"
namespace paddle {
namespace operators {
class FusedElemwiseActivationOp : public framework::OperatorWithKernel {
public:
using framework::OperatorWithKernel::OperatorWithKernel;
void InferShape(framework::InferShapeContext *ctx) const override {
PADDLE_ENFORCE(
ctx->HasInput("X"),
"Input(X) of FusedElemwiseActivationOp op should not be null.");
PADDLE_ENFORCE(
ctx->HasInput("Y"),
"Input(Y) of FusedElemwiseActivationOp op should not be null.");
PADDLE_ENFORCE(
ctx->HasOutput("Out"),
"Output(Out) of FusedElemwiseActivationOp op should not be null.");
auto x_dim = ctx->GetInputDim("X");
auto y_dim = ctx->GetInputDim("Y");
PADDLE_ENFORCE_GE(x_dim.size(), y_dim.size(),
"Rank of first input must >= rank of second input.");
ctx->SetOutputDim("Out", x_dim);
ctx->ShareLoD("X", /*->*/ "Out");
}
protected:
framework::OpKernelType GetExpectedKernelType(
const framework::ExecutionContext &ctx) const override {
PADDLE_ENFORCE_EQ(ctx.Input<framework::Tensor>("X")->type(),
ctx.Input<framework::Tensor>("Y")->type(),
"The element's type of input should be the same.");
auto input_data_type =
framework::ToDataType(ctx.Input<framework::Tensor>("X")->type());
return framework::OpKernelType(input_data_type, ctx.GetPlace());
}
};
class FusedElemwiseActivationMaker : public framework::OpProtoAndCheckerMaker {
public:
void Make() override {
AddInput("X", "(vector<Tensor>)");
AddInput("Y", "(vector<Tensor>)");
AddOutput("Out", "vector<Tensor>");
AddAttr<int>("axis",
"axis is used by elementwise_op, the default value is -1.")
.SetDefault(-1);
AddAttr<float>("scale",
"scale is used by scale_op, the default value is 0.0.")
.SetDefault(0.0);
AddAttr<bool>("recomputation",
"Whether to recompute the Out."
"fused_elemwise_activation_grad has two methods to get the "
"dx and dy, one "
"is to use the 'Out', and the other is not to use it. "
"The former method will save the time of recomputing the "
"'Out', but it must occupy the memory to store the 'out'. "
"While, the later method can avoid occupying the memory, "
"but it must recompute the 'Out'. The default value is true.")
.SetDefault(true);
AddAttr<std::vector<std::string>>("functor_list",
"The functors that should be fused.")
.AddCustomChecker([&](const std::vector<std::string> &functor_list) {
PADDLE_ENFORCE(ValidCheck(functor_list));
});
AddComment(R"DOC(
FusedElemwiseActivation Operator.
At present, FusedElemwiseActivation only supports Two kinds of compound
operators (elementwise_op and activation_op):
Z = Binary(X, Unary(Y))
Z = Unary(Binary(X, Y))
The attributions of activation_op can be get from fused_elemwise_activation_op's
attributions. functor_list records the functors to be fused, for example
"scale,elementwise_add".
)DOC");
}
private:
bool ValidCheck(const std::vector<std::string> &functors) {
std::unordered_set<std::string> unary_fun = {"scale", "relu"};
std::unordered_set<std::string> binary_fun = {"elementwise_add"};
std::string unary_fun_str;
if (binary_fun.count(functors[0])) {
unary_fun_str = functors[1];
} else if (binary_fun.count(functors[1])) {
unary_fun_str = functors[0];
} else {
PADDLE_THROW("%s and %s are not included in fused_list.", functors[0],
functors[1]);
}
PADDLE_ENFORCE_EQ(unary_fun.count(unary_fun_str), 1,
"%s is not included in fused_list.", unary_fun_str);
return true;
}
};
class FusedElemwiseActivationGradMaker
: public framework::SingleGradOpDescMaker {
public:
using framework::SingleGradOpDescMaker::SingleGradOpDescMaker;
protected:
std::unique_ptr<framework::OpDesc> Apply() const override {
auto *op_desc_ptr = new framework::OpDesc();
op_desc_ptr->SetType(this->ForwardOpType() + "_grad");
for (auto &input_param : this->InputNames()) {
op_desc_ptr->SetInput(input_param, this->Input(input_param));
op_desc_ptr->SetOutput(framework::GradVarName(input_param),
this->InputGrad(input_param, true));
}
for (auto &output_param : this->OutputNames()) {
op_desc_ptr->SetInput(output_param, this->Output(output_param));
op_desc_ptr->SetInput(framework::GradVarName(output_param),
this->OutputGrad(output_param));
}
op_desc_ptr->SetAttrMap(this->Attrs());
std::vector<std::string> functor_names =
boost::get<std::vector<std::string>>(
op_desc_ptr->GetAttr("functor_list"));
functor_names[0] += "_grad";
functor_names[1] += "_grad";
op_desc_ptr->SetAttr("functor_list", functor_names);
return std::unique_ptr<framework::OpDesc>(op_desc_ptr);
}
};
class FusedElemwiseActivationOpGrad : public framework::OperatorWithKernel {
public:
using framework::OperatorWithKernel::OperatorWithKernel;
void InferShape(framework::InferShapeContext *ctx) const override {
PADDLE_ENFORCE(ctx->HasInput("X"), "Input(X) should not be null");
PADDLE_ENFORCE(ctx->HasInput("Y"), "Input(Y) should not be null");
PADDLE_ENFORCE(ctx->HasInput(framework::GradVarName("Out")),
"Input(Out@GRAD) should not be null");
auto x_dims = ctx->GetInputDim("X");
auto y_dims = ctx->GetInputDim("Y");
auto out_dims = ctx->GetInputDim(framework::GradVarName("Out"));
PADDLE_ENFORCE_GE(x_dims.size(), y_dims.size(),
"Rank of first input must >= rank of second input.");
auto x_grad_name = framework::GradVarName("X");
auto y_grad_name = framework::GradVarName("Y");
if (ctx->HasOutput(x_grad_name)) {
ctx->SetOutputDim(x_grad_name, x_dims);
}
if (ctx->HasOutput(y_grad_name)) {
ctx->SetOutputDim(y_grad_name, y_dims);
}
}
protected:
framework::OpKernelType GetExpectedKernelType(
const framework::ExecutionContext &ctx) const override {
auto input_data_type_index = ctx.Input<framework::Tensor>("X")->type();
PADDLE_ENFORCE_EQ(input_data_type_index,
ctx.Input<framework::Tensor>("Y")->type(),
"The element's type of input should be the same.");
PADDLE_ENFORCE_EQ(
input_data_type_index,
ctx.Input<framework::Tensor>(framework::GradVarName("Out"))->type(),
"The element's type of input should be the same.");
auto input_data_type = framework::ToDataType(input_data_type_index);
return framework::OpKernelType(input_data_type, ctx.GetPlace());
}
};
} // namespace operators
} // namespace paddle
namespace ops = paddle::operators;
REGISTER_OPERATOR(fused_elemwise_activation, ops::FusedElemwiseActivationOp,
ops::FusedElemwiseActivationMaker,
ops::FusedElemwiseActivationGradMaker);
REGISTER_OPERATOR(fused_elemwise_activation_grad,
ops::FusedElemwiseActivationOpGrad);
REGISTER_OP_CPU_KERNEL(
fused_elemwise_activation,
ops::FusedElemwiseActivationKernel<paddle::platform::CPUDeviceContext,
float>,
ops::FusedElemwiseActivationKernel<paddle::platform::CPUDeviceContext,
double>);
REGISTER_OP_CPU_KERNEL(
fused_elemwise_activation_grad,
ops::FusedElemwiseActivationGradKernel<paddle::platform::CPUDeviceContext,
float>,
ops::FusedElemwiseActivationGradKernel<paddle::platform::CPUDeviceContext,
double>);
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/operators/fused_elemwise_activation_op.h"
namespace ops = paddle::operators;
REGISTER_OP_CUDA_KERNEL(
fused_elemwise_activation,
ops::FusedElemwiseActivationKernel<paddle::platform::CUDADeviceContext,
float>,
ops::FusedElemwiseActivationKernel<paddle::platform::CUDADeviceContext,
double>);
REGISTER_OP_CUDA_KERNEL(
fused_elemwise_activation_grad,
ops::FusedElemwiseActivationGradKernel<paddle::platform::CUDADeviceContext,
float>,
ops::FusedElemwiseActivationGradKernel<paddle::platform::CUDADeviceContext,
double>);
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include <string>
#include <vector>
#include "paddle/fluid/framework/op_desc.h"
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/operators/detail/safe_ref.h"
#include "paddle/fluid/operators/elementwise_op_function.h"
#include "paddle/fluid/operators/math/functors.h"
namespace math = paddle::operators::math;
namespace paddle {
namespace operators {
// CompoundFunctors
// For example: Z = Binary(X, Unary(Y))
template <typename T, typename BinaryFun, typename UnaryFun>
struct BinaryCompoundFunctor {
BinaryCompoundFunctor(const BinaryFun &binary_fun, const UnaryFun &unary_fun)
: binary_fun_(binary_fun), unary_fun_(unary_fun) {}
inline HOSTDEVICE T operator()(T x, T y) {
return binary_fun_(x, unary_fun_(y));
}
private:
BinaryFun binary_fun_;
UnaryFun unary_fun_;
};
// For example: Z = Unary(Binary(X, Y))
template <typename T, typename UnaryFun, typename BinaryFun>
struct UnaryCompoundFunctor {
UnaryCompoundFunctor(const UnaryFun &unary_fun, const BinaryFun &binary_fun)
: unary_fun_(unary_fun), binary_fun_(binary_fun) {}
inline HOSTDEVICE T operator()(T x, T y) {
return unary_fun_(binary_fun_(x, y));
}
private:
UnaryFun unary_fun_;
BinaryFun binary_fun_;
};
// FIXME(zcd): DBinaryFun and DUnaryFun have to method to get
// the dx, one is to use the 'out', and the other is not to use it.
// the former method will save the time of recomputing the
// 'out', but it must occupy the memory to store the 'out'.
// While the later method can avoid occupying this memory,
// but it must recompute the 'out'.
template <typename T, typename DBinaryFun, typename UnaryFun,
bool Recomputation = true>
struct BinaryCompoundGradDxFunctor {
BinaryCompoundGradDxFunctor(const DBinaryFun &d_binary_fun,
const UnaryFun &unary_fun)
: d_binary_fun_(d_binary_fun), unary_fun_(unary_fun) {}
inline HOSTDEVICE T operator()(T x, T y, T out, T dout) {
if (Recomputation) {
return dout * d_binary_fun_(x, unary_fun_(y));
} else {
return dout * d_binary_fun_(x, unary_fun_(y), out);
}
}
private:
DBinaryFun d_binary_fun_;
UnaryFun unary_fun_;
};
template <typename T, typename DBinaryFun, typename UnaryFun,
typename DUnaryFun, bool Recomputation = true>
struct BinaryCompoundGradDyFunctor {
BinaryCompoundGradDyFunctor(const DBinaryFun &d_binary_fun,
const UnaryFun &unary_fun,
const DUnaryFun &d_unary_fun)
: d_binary_fun_(d_binary_fun),
unary_fun_(unary_fun),
d_unary_fun_(d_unary_fun) {}
inline HOSTDEVICE T operator()(T x, T y, T out, T dout) {
if (Recomputation) {
return dout * d_binary_fun_(unary_fun_(y), x) * d_unary_fun_(y);
} else {
return dout * d_binary_fun_(unary_fun_(y), x, out) * d_unary_fun_(y);
}
}
private:
DBinaryFun d_binary_fun_;
UnaryFun unary_fun_;
DUnaryFun d_unary_fun_;
};
template <typename T, typename DUnaryFun, typename BinaryFun,
typename DBinaryFun, bool Recomputation = true>
struct UnaryCompoundGradDxFunctor {
UnaryCompoundGradDxFunctor(const DUnaryFun &d_unary_fun,
const BinaryFun &binary_fun,
const DBinaryFun &d_binary_fun)
: d_unary_fun_(d_unary_fun),
binary_fun_(binary_fun),
d_binary_fun_(d_binary_fun) {}
inline HOSTDEVICE T operator()(T x, T y, T out, T dout) {
T base;
if (Recomputation) {
base = dout * d_unary_fun_(binary_fun_(x, y));
} else {
base = dout * d_unary_fun_(binary_fun_(x, y), out);
}
return base * d_binary_fun_(x, y);
}
private:
DUnaryFun d_unary_fun_;
BinaryFun binary_fun_;
DBinaryFun d_binary_fun_;
};
template <typename T, typename DUnaryFun, typename BinaryFun,
typename DBinaryFun, bool Recomputation = true>
struct UnaryCompoundGradDyFunctor {
UnaryCompoundGradDyFunctor(const DUnaryFun &d_unary_fun,
const BinaryFun &binary_fun,
const DBinaryFun &d_binary_fun)
: d_unary_fun_(d_unary_fun),
binary_fun_(binary_fun),
d_binary_fun_(d_binary_fun) {}
inline HOSTDEVICE T operator()(T x, T y, T out, T dout) {
T base;
if (Recomputation) {
base = dout * d_unary_fun_(binary_fun_(x, y));
} else {
base = dout * d_unary_fun_(binary_fun_(x, y), out);
}
return base * d_binary_fun_(y, x);
}
private:
DUnaryFun d_unary_fun_;
BinaryFun binary_fun_;
DBinaryFun d_binary_fun_;
};
template <typename DeviceContext, typename T, typename BinaryFunctor,
typename UnaryFunctor>
static void RunBinaryCompoundFunctor(const framework::ExecutionContext &ctx,
const BinaryFunctor &binary_functor,
const UnaryFunctor &unary_functor,
const framework::Tensor *in_x,
const framework::Tensor *in_y,
framework::Tensor *output) {
int axis = ctx.Attr<int>("axis");
using BinaryCompoundFunctor =
BinaryCompoundFunctor<T, BinaryFunctor, UnaryFunctor>;
ElementwiseComputeEx<BinaryCompoundFunctor, DeviceContext, T>(
ctx, in_x, in_y, axis,
BinaryCompoundFunctor(binary_functor, unary_functor), output);
}
template <typename DeviceContext, typename T, typename UnaryFunctor,
typename BinaryFunctor>
static void RunUnaryCompoundFunctors(const framework::ExecutionContext &ctx,
const UnaryFunctor &unary_functor,
const BinaryFunctor &binary_functor,
const framework::Tensor *in_x,
const framework::Tensor *in_y,
framework::Tensor *output) {
int axis = ctx.Attr<int>("axis");
using UnaryCompoundFunctor =
UnaryCompoundFunctor<T, UnaryFunctor, BinaryFunctor>;
ElementwiseComputeEx<UnaryCompoundFunctor, DeviceContext, T>(
ctx, in_x, in_y, axis,
UnaryCompoundFunctor(unary_functor, binary_functor), output);
}
template <typename DeviceContext, typename T, typename BinaryGradFunctor,
typename UnaryFunctor, typename UnaryGradFunctor,
bool Recomputation = true>
static void RunBinaryCompoundGradFunctors(
const framework::ExecutionContext &ctx,
const BinaryGradFunctor &binary_grad_functor,
const UnaryFunctor &unary_functor,
const UnaryGradFunctor &unary_grad_functor, const framework::Tensor *in_x,
const framework::Tensor *in_y, const framework::Tensor *in_out,
const framework::Tensor *in_out_grad, framework::Tensor *x_grad,
framework::Tensor *y_grad) {
int axis = ctx.Attr<int>("axis");
using BinaryCompoundDxFunctor =
BinaryCompoundGradDxFunctor<T, BinaryGradFunctor, UnaryFunctor,
Recomputation>;
using BinaryCompoundDyFunctor =
BinaryCompoundGradDyFunctor<T, BinaryGradFunctor, UnaryFunctor,
UnaryGradFunctor, Recomputation>;
ElemwiseGradCompute<DeviceContext, T, BinaryCompoundDxFunctor,
BinaryCompoundDyFunctor>(
ctx, *in_x, *in_y, *in_out, *in_out_grad, axis, x_grad, y_grad,
BinaryCompoundDxFunctor(binary_grad_functor, unary_functor),
BinaryCompoundDyFunctor(binary_grad_functor, unary_functor,
unary_grad_functor));
}
template <typename DeviceContext, typename T, typename UnaryGradFunctor,
typename BinaryFunctor, typename BinaryGradFunctor,
bool Recomputation = true>
static void RunUnaryCompoundGradFunctors(
const framework::ExecutionContext &ctx,
const UnaryGradFunctor &unary_grad_functor,
const BinaryFunctor &binary_functor,
const BinaryGradFunctor &binary_grad_functor, const framework::Tensor *in_x,
const framework::Tensor *in_y, const framework::Tensor *in_out,
const framework::Tensor *in_out_grad, framework::Tensor *x_grad,
framework::Tensor *y_grad) {
int axis = ctx.Attr<int>("axis");
using UnaryCompoundDxFunctor =
UnaryCompoundGradDxFunctor<T, UnaryGradFunctor, BinaryFunctor,
BinaryGradFunctor, Recomputation>;
using UnaryCompoundDyFunctor =
UnaryCompoundGradDyFunctor<T, UnaryGradFunctor, BinaryFunctor,
BinaryGradFunctor, Recomputation>;
ElemwiseGradCompute<DeviceContext, T, UnaryCompoundDxFunctor,
UnaryCompoundDyFunctor>(
ctx, *in_x, *in_y, *in_out, *in_out_grad, axis, x_grad, y_grad,
UnaryCompoundDxFunctor(unary_grad_functor, binary_functor,
binary_grad_functor),
UnaryCompoundDyFunctor(unary_grad_functor, binary_functor,
binary_grad_functor));
}
template <typename DeviceContext, typename T>
static void RunFunctors(const framework::ExecutionContext &ctx,
const framework::Tensor *in_x,
const framework::Tensor *in_y,
framework::Tensor *output) {
auto &functors = ctx.Attr<std::vector<std::string>>("functor_list");
auto funcs_str = functors[0] + "," + functors[1];
// TODO(zcd): The following code can be refined.
if (funcs_str == "elementwise_add,scale") {
// Z = Binary(X, Unary(Y))
T scale = static_cast<T>(ctx.Attr<float>("scale"));
RunBinaryCompoundFunctor<DeviceContext, T, math::AddFunctor<T>,
math::ScaleFunctor<T>>(
ctx, math::AddFunctor<T>(), math::ScaleFunctor<T>(scale), in_x, in_y,
output);
} else if (funcs_str == "scale,elementwise_add") {
// Z = Unary(Binary(X, Y))
T scale = static_cast<T>(ctx.Attr<float>("scale"));
RunUnaryCompoundFunctors<DeviceContext, T, math::ScaleFunctor<T>,
math::AddFunctor<T>>(
ctx, math::ScaleFunctor<T>(scale), math::AddFunctor<T>(), in_x, in_y,
output);
} else if (funcs_str == "elementwise_add,relu") {
RunBinaryCompoundFunctor<DeviceContext, T, math::AddFunctor<T>,
math::ReluFunctor<T>>(
ctx, math::AddFunctor<T>(), math::ReluFunctor<T>(), in_x, in_y, output);
} else if (funcs_str == "relu,elementwise_add") {
RunUnaryCompoundFunctors<DeviceContext, T, math::ReluFunctor<T>,
math::AddFunctor<T>>(
ctx, math::ReluFunctor<T>(), math::AddFunctor<T>(), in_x, in_y, output);
} else {
PADDLE_THROW("%s has not been implemented.", funcs_str);
}
}
template <typename DeviceContext, typename T>
static void RunGradFunctors(const framework::ExecutionContext &ctx,
const framework::Tensor *in_x,
const framework::Tensor *in_y,
const framework::Tensor *in_out,
const framework::Tensor *in_out_grad,
framework::Tensor *x_grad,
framework::Tensor *y_grad) {
auto &functors = ctx.Attr<std::vector<std::string>>("functor_list");
auto funcs_str = functors[0] + "," + functors[1];
bool recomputation = ctx.Attr<bool>("recomputation");
// TODO(zcd): The following code can be refined. for example, use registion
if (funcs_str == "elementwise_add_grad,scale_grad") {
// The backward of Z = Binary(X, Unary(Y))
T scale = static_cast<T>(ctx.Attr<float>("scale"));
if (recomputation) {
RunBinaryCompoundGradFunctors<DeviceContext, T, math::AddGradFunctor<T>,
math::ScaleFunctor<T>,
math::ScaleGradFunctor<T>, true>(
ctx, math::AddGradFunctor<T>(), math::ScaleFunctor<T>(scale),
math::ScaleGradFunctor<T>(scale), in_x, in_y, in_out, in_out_grad,
x_grad, y_grad);
} else {
RunBinaryCompoundGradFunctors<DeviceContext, T, math::AddGradFunctor<T>,
math::ScaleFunctor<T>,
math::ScaleGradFunctor<T>, false>(
ctx, math::AddGradFunctor<T>(), math::ScaleFunctor<T>(scale),
math::ScaleGradFunctor<T>(scale), in_x, in_y, in_out, in_out_grad,
x_grad, y_grad);
}
} else if (funcs_str == "scale_grad,elementwise_add_grad") {
// The backward of Z = Unary(Binary(X, Y))
T scale = static_cast<T>(ctx.Attr<float>("scale"));
if (recomputation) {
RunUnaryCompoundGradFunctors<DeviceContext, T, math::ScaleGradFunctor<T>,
math::AddFunctor<T>, math::AddGradFunctor<T>,
true>(ctx, math::ScaleGradFunctor<T>(scale),
math::AddFunctor<T>(),
math::AddGradFunctor<T>(), in_x, in_y,
in_out, in_out_grad, x_grad, y_grad);
} else {
RunUnaryCompoundGradFunctors<DeviceContext, T, math::ScaleGradFunctor<T>,
math::AddFunctor<T>, math::AddGradFunctor<T>,
false>(ctx, math::ScaleGradFunctor<T>(scale),
math::AddFunctor<T>(),
math::AddGradFunctor<T>(), in_x, in_y,
in_out, in_out_grad, x_grad, y_grad);
}
} else if (funcs_str == "elementwise_add_grad,relu_grad") {
if (recomputation) {
RunBinaryCompoundGradFunctors<DeviceContext, T, math::AddGradFunctor<T>,
math::ReluFunctor<T>,
math::ReluGradFunctor<T>, true>(
ctx, math::AddGradFunctor<T>(), math::ReluFunctor<T>(),
math::ReluGradFunctor<T>(), in_x, in_y, in_out, in_out_grad, x_grad,
y_grad);
} else {
RunBinaryCompoundGradFunctors<DeviceContext, T, math::AddGradFunctor<T>,
math::ReluFunctor<T>,
math::ReluGradFunctor<T>, false>(
ctx, math::AddGradFunctor<T>(), math::ReluFunctor<T>(),
math::ReluGradFunctor<T>(), in_x, in_y, in_out, in_out_grad, x_grad,
y_grad);
}
} else if (funcs_str == "relu_grad,elementwise_add_grad") {
if (recomputation) {
RunUnaryCompoundGradFunctors<DeviceContext, T, math::ReluGradFunctor<T>,
math::AddFunctor<T>, math::AddGradFunctor<T>,
true>(ctx, math::ReluGradFunctor<T>(),
math::AddFunctor<T>(),
math::AddGradFunctor<T>(), in_x, in_y,
in_out, in_out_grad, x_grad, y_grad);
} else {
RunUnaryCompoundGradFunctors<DeviceContext, T, math::ReluGradFunctor<T>,
math::AddFunctor<T>, math::AddGradFunctor<T>,
false>(ctx, math::ReluGradFunctor<T>(),
math::AddFunctor<T>(),
math::AddGradFunctor<T>(), in_x, in_y,
in_out, in_out_grad, x_grad, y_grad);
}
} else {
PADDLE_THROW("%s has not been implemented.", funcs_str);
}
}
template <typename DeviceContext, typename T>
class FusedElemwiseActivationKernel : public framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext &ctx) const override {
auto &in_x = detail::Ref(ctx.Input<framework::Tensor>("X"),
"Cannot get input tensor %s, variable name = %s",
"X", ctx.op().Input("X"));
auto &in_y = detail::Ref(ctx.Input<framework::Tensor>("Y"),
"Cannot get input tensor %s, variable name = %s",
"Y", ctx.op().Input("Y"));
auto &output = detail::Ref(ctx.Output<framework::Tensor>("Out"),
"Cannot get input tensor %s, variable name = %s",
"Out", ctx.op().Output("Out"));
RunFunctors<DeviceContext, T>(ctx, &in_x, &in_y, &output);
}
};
template <typename DeviceContext, typename T>
class FusedElemwiseActivationGradKernel : public framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext &ctx) const override {
auto &in_x = detail::Ref(ctx.Input<framework::Tensor>("X"),
"Cannot get input tensor %s, variable name = %s",
"X", ctx.op().Input("X"));
auto &in_y = detail::Ref(ctx.Input<framework::Tensor>("Y"),
"Cannot get input tensor %s, variable name = %s",
"Y", ctx.op().Input("Y"));
auto &in_out = detail::Ref(ctx.Input<framework::Tensor>("Out"),
"Cannot get input tensor %s, variable name = %s",
"Out", ctx.op().Input("Out"));
auto &in_out_grad =
detail::Ref(ctx.Input<framework::Tensor>(framework::GradVarName("Out")),
"Cannot get input tensor %s, variable name = %s",
framework::GradVarName("Out"),
ctx.op().Input(framework::GradVarName("Out")));
framework::Tensor *x_grad =
ctx.Output<framework::Tensor>(framework::GradVarName("X"));
framework::Tensor *y_grad =
ctx.Output<framework::Tensor>(framework::GradVarName("Y"));
RunGradFunctors<DeviceContext, T>(ctx, &in_x, &in_y, &in_out, &in_out_grad,
x_grad, y_grad);
}
};
} // namespace operators
} // namespace paddle
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
namespace paddle {
namespace operators {
namespace math {
// AddFunctor
template <typename T>
struct AddFunctor {
// out = x + y;
inline HOSTDEVICE T operator()(T x, T y) { return x + y; }
};
template <typename T>
struct AddGradFunctor {
inline HOSTDEVICE T operator()(T x, T y) { return 1; }
inline HOSTDEVICE T operator()(T x, T y, T out) const { return 1; }
};
template <typename T>
struct ScaleFunctor {
explicit ScaleFunctor(const T coeff) : coeff_(coeff) {}
inline HOSTDEVICE T operator()(T ele) { return ele * coeff_; }
private:
T coeff_;
};
template <typename T>
struct ScaleGradFunctor {
explicit ScaleGradFunctor(T coeff) : coeff_(coeff) {}
inline HOSTDEVICE T operator()(T x) { return coeff_; }
inline HOSTDEVICE T operator()(T x, T out) { return coeff_; }
private:
T coeff_;
};
template <typename T>
struct ReluFunctor {
inline HOSTDEVICE T operator()(T x) { return x * (x > 0); }
};
template <typename T>
struct ReluGradFunctor {
inline HOSTDEVICE T operator()(T x) { return x > 0 ? 1 : 0; }
inline HOSTDEVICE T operator()(T x, T out) { return x > 0 ? 1 : 0; }
};
} // namespace math
} // namespace operators
} // namespace paddle
...@@ -163,12 +163,11 @@ class ParallelDoOp : public framework::OperatorBase { ...@@ -163,12 +163,11 @@ class ParallelDoOp : public framework::OperatorBase {
auto &place = places[place_idx]; auto &place = places[place_idx];
auto *cur_scope = sub_scopes[place_idx]; auto *cur_scope = sub_scopes[place_idx];
workers.emplace_back( workers.emplace_back(framework::Async([program, cur_scope, place, block] {
framework::Async([program, cur_scope, place, block, place_idx] { framework::Executor executor(place);
framework::Executor executor(place); executor.Run(*program, cur_scope, block->ID(),
executor.Run(*program, cur_scope, block->ID(), false /*create_local_scope*/);
false /*create_local_scope*/); }));
}));
} }
for (auto &worker : workers) { for (auto &worker : workers) {
worker.wait(); worker.wait();
...@@ -239,12 +238,11 @@ class ParallelDoGradOp : public framework::OperatorBase { ...@@ -239,12 +238,11 @@ class ParallelDoGradOp : public framework::OperatorBase {
auto *cur_scope = sub_scopes[i]; auto *cur_scope = sub_scopes[i];
// execute // execute
workers.emplace_back( workers.emplace_back(framework::Async([program, cur_scope, place, block] {
framework::Async([program, cur_scope, place, block, i] { framework::Executor executor(place);
framework::Executor executor(place); executor.Run(*program, cur_scope, block->ID(),
executor.Run(*program, cur_scope, block->ID(), false /*create_local_scope*/);
false /*create_local_scope*/); }));
}));
} }
for (auto &worker : workers) { for (auto &worker : workers) {
worker.wait(); worker.wait();
......
...@@ -15,6 +15,7 @@ ...@@ -15,6 +15,7 @@
#include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/framework/reader.h" #include "paddle/fluid/framework/reader.h"
#include "paddle/fluid/operators/detail/safe_ref.h" #include "paddle/fluid/operators/detail/safe_ref.h"
#include "paddle/fluid/platform/profiler.h"
namespace paddle { namespace paddle {
namespace operators { namespace operators {
...@@ -65,6 +66,12 @@ class ReadOp : public framework::OperatorBase { ...@@ -65,6 +66,12 @@ class ReadOp : public framework::OperatorBase {
.GetMutable<framework::ReaderHolder>(); .GetMutable<framework::ReaderHolder>();
std::vector<std::string> out_arg_names = Outputs("Out"); std::vector<std::string> out_arg_names = Outputs("Out");
std::vector<framework::LoDTensor> ins; std::vector<framework::LoDTensor> ins;
// For profiling
platform::DeviceContextPool& pool = platform::DeviceContextPool::Instance();
auto& ctx = *pool.Get(dev_place);
platform::RecordEvent record_event(Type(), &ctx);
reader->ReadNext(&ins); reader->ReadNext(&ins);
if (ins.empty()) { if (ins.empty()) {
if (Attr<bool>("throw_eof_exp")) { if (Attr<bool>("throw_eof_exp")) {
......
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. /* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License"); Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License. you may not use this file except in compliance with the License.
...@@ -14,6 +14,8 @@ limitations under the License. */ ...@@ -14,6 +14,8 @@ limitations under the License. */
#define EIGEN_USE_GPU #define EIGEN_USE_GPU
#include <cub/cub.cuh>
#include "paddle/fluid/operators/math/cross_entropy.h"
#include "paddle/fluid/operators/softmax_with_cross_entropy_op.h" #include "paddle/fluid/operators/softmax_with_cross_entropy_op.h"
namespace paddle { namespace paddle {
...@@ -53,8 +55,196 @@ __global__ void SoftCrossEntropyGradientKernel(T* logit_grad, ...@@ -53,8 +55,196 @@ __global__ void SoftCrossEntropyGradientKernel(T* logit_grad,
logit_grad[ids] = loss_grad[row_ids] * (logit_grad[ids] - labels[ids]); logit_grad[ids] = loss_grad[row_ids] * (logit_grad[ids] - labels[ids]);
} }
} }
} // namespace } // namespace
static __device__ __forceinline__ float real_exp(float x) { return expf(x); }
static __device__ __forceinline__ double real_exp(double x) { return exp(x); }
static __device__ __forceinline__ float real_log(float x) {
return math::TolerableValue<float>()(logf(x));
}
static __device__ __forceinline__ double real_log(double x) {
return math::TolerableValue<double>()(log(x));
}
/** In the following codes, 3 CUDA kernels are implemented to calculate softmax
* and loss **/
/*
Supposing the x is `logits` and y is `labels`, the equations are as
followings:
cross\_entropy_i = \sum_{j}[- y_i_j * log({e^{x_i_j}/\sum_{j}e^{x_i_j}})]
= \sum_{j}[- y_i_j * log({e^{x_i_j - max_i}/\sum_{j}e^{x_i_j-max_i}})]
= \sum_{j}[-y_i_j * (x_i_j - max_i - log\sum_{j}e^{x_i_j - max_i})]
= \sum_{j}[-y_i_j * (x_i_j - max_i - logDiffMaxSum_i)]
= \sum_{j}(-y_i_j * tmp_i_j)
softmax_i_j = e^{tmp_i_j}
where:
max_i = \max_{j}{x_i_j}
logDiffMaxSum_i = log\sum_{j}e^{x_i_j - max_i}
tmp_i_j = x_i_j - max_i - logDiffMaxSum_i
Therefore, the calculation can be separated into 3 steps:
Step 1: row-wise operation to calculate max_i
Step 2: row-wise operation to calculate logDiffMaxSum_i
Step 3: caculate tmp_i_j, and finally get softmax_i_j and cross\_entropy_i
To save memory, we can share memory among max_i, logDiffMaxSum_i and
cross\_entropy_i.
In this way, the 3 steps should be changed to:
Step 1 (RowReductionForMax): row-wise operation to calculate max_i
Step 2 (RowReductionForDiffMaxSum): calculate immediate result of softmax'_i_j =
x_i_j - max_i, and row-wise operation to calculate logDiffMaxSum_i
Step 3 (RowReductionForSoftmaxAndCrossEntropy): calculate tmp_i_j = softmax'_i_j
- logDiffMaxSum_i, and finally get softmax_i_j and cross\_entropy_i
*/
// There are 3 kinds of reduce algorithms in cub:
// BLOCK_REDUCE_RAKING_COMMUTATIVE_ONLY
// BLOCK_REDUCE_RAKING
// BLOCK_REDUCE_WARP_REDUCTIONS (default)
template <typename T, int BlockDim>
using BlockReduce =
cub::BlockReduce<T, BlockDim /*, cub::BLOCK_REDUCE_WARP_REDUCTIONS*/>;
template <typename T, int BlockDim>
using BlockReduceTempStorage = typename BlockReduce<T, BlockDim>::TempStorage;
// Make sure that BlockDim <= feature_size
// This kernel is used to calculate the max element of each row
template <typename T, int BlockDim>
__global__ void RowReductionForMax(const T* logits_data, T* max_data,
int feature_size) {
__shared__ BlockReduceTempStorage<T, BlockDim> temp_storage;
auto beg_idx = feature_size * blockIdx.x + threadIdx.x;
auto end_idx = feature_size * (blockIdx.x + 1);
T cur_max = logits_data[beg_idx];
beg_idx += BlockDim;
while (beg_idx < end_idx) {
if (cur_max < logits_data[beg_idx]) {
cur_max = logits_data[beg_idx];
}
beg_idx += BlockDim;
}
cur_max = BlockReduce<T, BlockDim>(temp_storage).Reduce(cur_max, cub::Max());
if (threadIdx.x == 0) {
max_data[blockIdx.x] = cur_max < -64 ? -64 : cur_max;
}
}
// Make sure that BlockDim <= feature_size
template <typename T, int BlockDim>
__global__ void RowReductionForDiffMaxSum(const T* logits_data, T* max_data,
T* softmax, int feature_size) {
__shared__ BlockReduceTempStorage<T, BlockDim> temp_storage;
auto beg_idx = feature_size * blockIdx.x + threadIdx.x;
auto end_idx = feature_size * (blockIdx.x + 1);
auto block_max = max_data[blockIdx.x];
softmax[beg_idx] = logits_data[beg_idx] - block_max;
T diff_max_sum = real_exp(softmax[beg_idx]);
beg_idx += BlockDim;
while (beg_idx < end_idx) {
softmax[beg_idx] = logits_data[beg_idx] - block_max;
diff_max_sum += real_exp(softmax[beg_idx]);
beg_idx += BlockDim;
}
diff_max_sum =
BlockReduce<T, BlockDim>(temp_storage).Reduce(diff_max_sum, cub::Sum());
if (threadIdx.x == 0) max_data[blockIdx.x] = real_log(diff_max_sum);
}
// Make sure that BlockDim <= feature_size
template <typename T, int BlockDim>
__global__ void RowReductionForSoftmaxAndCrossEntropy(const T* logits_data,
const T* labels_data,
T* loss_data, T* softmax,
int feature_size) {
__shared__ BlockReduceTempStorage<T, BlockDim> temp_storage;
auto beg_idx = feature_size * blockIdx.x + threadIdx.x;
auto end_idx = feature_size * (blockIdx.x + 1);
// log_diff_max_sum shares memory with loss
auto block_log_diff_max_sum = loss_data[blockIdx.x];
auto tmp = softmax[beg_idx] - block_log_diff_max_sum;
softmax[beg_idx] = real_exp(tmp);
auto loss = -labels_data[beg_idx] * tmp;
beg_idx += BlockDim;
while (beg_idx < end_idx) {
tmp = softmax[beg_idx] - block_log_diff_max_sum;
softmax[beg_idx] = real_exp(tmp);
loss -= (labels_data[beg_idx] * tmp);
beg_idx += BlockDim;
}
loss = BlockReduce<T, BlockDim>(temp_storage).Reduce(loss, cub::Sum());
if (threadIdx.x == 0) loss_data[blockIdx.x] = loss;
}
template <typename T>
__global__ void SetSoftmaxToOneWhenFeatureSizeIsOne(T* out, int batch_size) {
auto idx = threadIdx.x + blockIdx.x * blockDim.x;
if (idx < batch_size) out[idx] = static_cast<T>(1);
}
template <typename T>
static void SoftmaxWithCrossEntropyFusedKernel(const T* logits_data,
const T* labels_data,
T* softmax_data, T* loss_data,
int batch_size, int feature_size,
cudaStream_t stream) {
constexpr int kMaxBlockDim = 512;
int block_dim = feature_size >= kMaxBlockDim
? kMaxBlockDim
: (1 << static_cast<int>(std::log2(feature_size)));
#define CALL_SOFTMAX_WITH_CROSS_ENTROPY_FUSED_KERNEL(BlockDim) \
case BlockDim: \
RowReductionForMax<T, BlockDim><<<batch_size, BlockDim, 0, stream>>>( \
logits_data, loss_data, feature_size); \
RowReductionForDiffMaxSum<T, \
BlockDim><<<batch_size, BlockDim, 0, stream>>>( \
logits_data, loss_data, softmax_data, feature_size); \
RowReductionForSoftmaxAndCrossEntropy< \
T, BlockDim><<<batch_size, BlockDim, 0, stream>>>( \
logits_data, labels_data, loss_data, softmax_data, feature_size); \
break
switch (block_dim) {
CALL_SOFTMAX_WITH_CROSS_ENTROPY_FUSED_KERNEL(512);
CALL_SOFTMAX_WITH_CROSS_ENTROPY_FUSED_KERNEL(256);
CALL_SOFTMAX_WITH_CROSS_ENTROPY_FUSED_KERNEL(128);
CALL_SOFTMAX_WITH_CROSS_ENTROPY_FUSED_KERNEL(64);
CALL_SOFTMAX_WITH_CROSS_ENTROPY_FUSED_KERNEL(32);
CALL_SOFTMAX_WITH_CROSS_ENTROPY_FUSED_KERNEL(16);
CALL_SOFTMAX_WITH_CROSS_ENTROPY_FUSED_KERNEL(8);
CALL_SOFTMAX_WITH_CROSS_ENTROPY_FUSED_KERNEL(4);
CALL_SOFTMAX_WITH_CROSS_ENTROPY_FUSED_KERNEL(2);
case 1:
SetSoftmaxToOneWhenFeatureSizeIsOne<<<(batch_size + kMaxBlockDim - 1) /
kMaxBlockDim,
kMaxBlockDim, 0, stream>>>(
softmax_data, batch_size);
cudaMemsetAsync(loss_data, 0, batch_size, stream);
break;
default:
PADDLE_THROW("BlockDim must be 2^n in softmax_with_cross_entropy_op");
break;
}
#undef CALL_SOFTMAX_WITH_CROSS_ENTROPY_FUSED_KERNEL
}
template <typename T> template <typename T>
class SoftmaxWithCrossEntropyCUDAKernel : public framework::OpKernel<T> { class SoftmaxWithCrossEntropyCUDAKernel : public framework::OpKernel<T> {
public: public:
...@@ -66,14 +256,24 @@ class SoftmaxWithCrossEntropyCUDAKernel : public framework::OpKernel<T> { ...@@ -66,14 +256,24 @@ class SoftmaxWithCrossEntropyCUDAKernel : public framework::OpKernel<T> {
Tensor* softmax = context.Output<Tensor>("Softmax"); Tensor* softmax = context.Output<Tensor>("Softmax");
Tensor* loss = context.Output<Tensor>("Loss"); Tensor* loss = context.Output<Tensor>("Loss");
softmax->mutable_data<T>(context.GetPlace()); auto* softmax_data = softmax->mutable_data<T>(context.GetPlace());
loss->mutable_data<T>(context.GetPlace()); auto* loss_data = loss->mutable_data<T>(context.GetPlace());
math::SoftmaxFunctor<platform::CUDADeviceContext, T>()( auto soft_label = context.Attr<bool>("soft_label");
context.cuda_device_context(), logits, softmax); if (soft_label) {
math::CrossEntropyFunctor<platform::CUDADeviceContext, T>()( int batch_size = logits->dims()[0];
context.cuda_device_context(), loss, softmax, labels, int feature_size = logits->dims()[1];
context.Attr<bool>("soft_label")); auto* logits_data = logits->data<T>();
auto* labels_data = labels->data<T>();
SoftmaxWithCrossEntropyFusedKernel(
logits_data, labels_data, softmax_data, loss_data, batch_size,
feature_size, context.cuda_device_context().stream());
} else {
math::SoftmaxCUDNNFunctor<T>()(context.cuda_device_context(), logits,
softmax);
math::CrossEntropyFunctor<platform::CUDADeviceContext, T>()(
context.cuda_device_context(), loss, softmax, labels, false);
}
} }
}; };
......
...@@ -55,18 +55,8 @@ nvinfer1::Dims Vec2TRT_Dims(const std::vector<int64_t> &shape) { ...@@ -55,18 +55,8 @@ nvinfer1::Dims Vec2TRT_Dims(const std::vector<int64_t> &shape) {
"TensorRT' tensor input requires at least 2 dimensions"); "TensorRT' tensor input requires at least 2 dimensions");
PADDLE_ENFORCE_LE(shape.size(), 4UL, PADDLE_ENFORCE_LE(shape.size(), 4UL,
"TensorRT' tensor input requires at most 4 dimensions"); "TensorRT' tensor input requires at most 4 dimensions");
PADDLE_ENFORCE_EQ(shape.size(), 4UL);
switch (shape.size()) { return nvinfer1::DimsCHW(shape[1], shape[2], shape[3]);
case 2:
return nvinfer1::Dims2(1, shape[1]);
case 3:
return nvinfer1::Dims3(1, shape[1], shape[2]);
case 4:
return nvinfer1::Dims4(1, shape[1], shape[2], shape[3]);
default:
return nvinfer1::Dims();
}
return nvinfer1::Dims();
} }
} // namespace } // namespace
...@@ -86,6 +76,9 @@ void TensorRTEngineKernel<DeviceContext, T>::Prepare( ...@@ -86,6 +76,9 @@ void TensorRTEngineKernel<DeviceContext, T>::Prepare(
parameters.insert(param); parameters.insert(param);
} }
std::vector<std::string> output_maps =
context.Attr<std::vector<std::string>>("output_name_mapping");
// TODO(Superjomn) replace this with a different stream // TODO(Superjomn) replace this with a different stream
auto *engine = Singleton<TRT_EngineManager>::Global().Create( auto *engine = Singleton<TRT_EngineManager>::Global().Create(
max_batch, max_workspace, nullptr /*engine hold its own stream*/, max_batch, max_workspace, nullptr /*engine hold its own stream*/,
...@@ -97,6 +90,7 @@ void TensorRTEngineKernel<DeviceContext, T>::Prepare( ...@@ -97,6 +90,7 @@ void TensorRTEngineKernel<DeviceContext, T>::Prepare(
// Add inputs // Add inputs
VLOG(4) << "declare inputs"; VLOG(4) << "declare inputs";
for (auto &input : context.Inputs("Xs")) { for (auto &input : context.Inputs("Xs")) {
if (parameters.count(input)) continue;
VLOG(4) << "declare input " << input; VLOG(4) << "declare input " << input;
auto *var = block.FindVar(input); auto *var = block.FindVar(input);
// TensorRT engine need to create parameters. The parameter's description // TensorRT engine need to create parameters. The parameter's description
...@@ -122,7 +116,7 @@ void TensorRTEngineKernel<DeviceContext, T>::Prepare( ...@@ -122,7 +116,7 @@ void TensorRTEngineKernel<DeviceContext, T>::Prepare(
block_desc, parameters, context.scope(), engine); block_desc, parameters, context.scope(), engine);
// Add outputs // Add outputs
for (auto &output : context.Outputs("Ys")) { for (auto &output : output_maps) {
engine->DeclareOutput(output); engine->DeclareOutput(output);
} }
......
...@@ -66,8 +66,17 @@ class TensorRTEngineKernel : public framework::OpKernel<T> { ...@@ -66,8 +66,17 @@ class TensorRTEngineKernel : public framework::OpKernel<T> {
PADDLE_ENFORCE_LE(FLAGS_tensorrt_engine_batch_size, PADDLE_ENFORCE_LE(FLAGS_tensorrt_engine_batch_size,
context.Attr<int>("max_batch")); context.Attr<int>("max_batch"));
std::vector<std::string> output_maps =
context.Attr<std::vector<std::string>>("output_name_mapping");
auto params = context.Attr<std::vector<std::string>>("parameters");
std::unordered_set<std::string> parameters;
for (const auto& param : params) {
parameters.insert(param);
}
// Convert input tensor from fluid to engine. // Convert input tensor from fluid to engine.
for (const auto& x : context.Inputs("Xs")) { for (const auto& x : context.Inputs("Xs")) {
if (parameters.count(x)) continue;
// convert input and copy to TRT engine's buffer // convert input and copy to TRT engine's buffer
auto& t = inference::analysis::GetFromScope<framework::LoDTensor>( auto& t = inference::analysis::GetFromScope<framework::LoDTensor>(
context.scope(), x); context.scope(), x);
...@@ -82,10 +91,12 @@ class TensorRTEngineKernel : public framework::OpKernel<T> { ...@@ -82,10 +91,12 @@ class TensorRTEngineKernel : public framework::OpKernel<T> {
// Execute the engine. // Execute the engine.
PADDLE_ENFORCE_GT(FLAGS_tensorrt_engine_batch_size, 0); PADDLE_ENFORCE_GT(FLAGS_tensorrt_engine_batch_size, 0);
engine->Execute(FLAGS_tensorrt_engine_batch_size); engine->Execute(FLAGS_tensorrt_engine_batch_size);
// Convert output tensor from engine to fluid // Convert output tensor from engine to fluid
int output_index = 0;
for (const auto& y : context.Outputs("Ys")) { for (const auto& y : context.Outputs("Ys")) {
// convert output and copy to fluid. // convert output and copy to fluid.
nvinfer1::ITensor* trt_t = engine->GetITensor(y); nvinfer1::ITensor* trt_t = engine->GetITensor(output_maps[output_index]);
auto dims = trt_t->getDimensions(); auto dims = trt_t->getDimensions();
// Use the output ITensor's dims to reshape the Fluid Tensor. // Use the output ITensor's dims to reshape the Fluid Tensor.
std::vector<int> ddim(dims.d, dims.d + dims.nbDims); std::vector<int> ddim(dims.d, dims.d + dims.nbDims);
...@@ -102,7 +113,7 @@ class TensorRTEngineKernel : public framework::OpKernel<T> { ...@@ -102,7 +113,7 @@ class TensorRTEngineKernel : public framework::OpKernel<T> {
// TODO(Superjomn) change this float to dtype size. // TODO(Superjomn) change this float to dtype size.
auto size = inference::analysis::AccuDims(dims.d, dims.nbDims) * auto size = inference::analysis::AccuDims(dims.d, dims.nbDims) *
FLAGS_tensorrt_engine_batch_size; FLAGS_tensorrt_engine_batch_size;
engine->GetOutputInCPU(y, engine->GetOutputInCPU(output_maps[output_index],
fluid_t->mutable_data<float>(platform::CPUPlace()), fluid_t->mutable_data<float>(platform::CPUPlace()),
size * sizeof(float)); size * sizeof(float));
//} else { //} else {
...@@ -110,6 +121,7 @@ class TensorRTEngineKernel : public framework::OpKernel<T> { ...@@ -110,6 +121,7 @@ class TensorRTEngineKernel : public framework::OpKernel<T> {
// y, fluid_t->mutable_data<float>(platform::CUDAPlace()), // y, fluid_t->mutable_data<float>(platform::CUDAPlace()),
// size * sizeof(float)); // size * sizeof(float));
//} //}
output_index += 1;
} }
cudaStreamSynchronize(*engine->stream()); cudaStreamSynchronize(*engine->stream());
......
...@@ -103,6 +103,9 @@ TEST(TensorRTEngineOp, manual) { ...@@ -103,6 +103,9 @@ TEST(TensorRTEngineOp, manual) {
SetAttr<std::string>(engine_op_desc.Proto(), "engine_uniq_key", "a_engine"); SetAttr<std::string>(engine_op_desc.Proto(), "engine_uniq_key", "a_engine");
SetAttr<std::vector<std::string>>(engine_op_desc.Proto(), "parameters", SetAttr<std::vector<std::string>>(engine_op_desc.Proto(), "parameters",
std::vector<std::string>({})); std::vector<std::string>({}));
SetAttr<std::vector<std::string>>(engine_op_desc.Proto(),
"output_name_mapping",
std::vector<std::string>({"z0"}));
LOG(INFO) << "create engine op"; LOG(INFO) << "create engine op";
auto engine_op = framework::OpRegistry::CreateOp(*engine_op_desc.Proto()); auto engine_op = framework::OpRegistry::CreateOp(*engine_op_desc.Proto());
...@@ -196,6 +199,10 @@ void Execute(int batch_size, int input_dim, int output_dim, int nlayers = 1) { ...@@ -196,6 +199,10 @@ void Execute(int batch_size, int input_dim, int output_dim, int nlayers = 1) {
std::vector<std::string>({"y0", "y1", "y2", "y3"})); std::vector<std::string>({"y0", "y1", "y2", "y3"}));
SetAttr<std::string>(engine_op_desc.Proto(), "engine_uniq_key", "b_engine"); SetAttr<std::string>(engine_op_desc.Proto(), "engine_uniq_key", "b_engine");
SetAttr<std::vector<std::string>>(engine_op_desc.Proto(),
"output_name_mapping",
std::vector<std::string>({"z3"}));
auto engine_op = framework::OpRegistry::CreateOp(*engine_op_desc.Proto()); auto engine_op = framework::OpRegistry::CreateOp(*engine_op_desc.Proto());
// Execute them. // Execute them.
......
...@@ -223,7 +223,7 @@ class MKLDNNHandler { ...@@ -223,7 +223,7 @@ class MKLDNNHandler {
static std::string GetHash(mkldnn::memory::dims& operand_dims, // NOLINT static std::string GetHash(mkldnn::memory::dims& operand_dims, // NOLINT
const std::string& suffix) { const std::string& suffix) {
return dims2str(operand_dims) + suffix; return dims2str(operand_dims) + suffix;
}; }
protected: protected:
static std::string dims2str(const mkldnn::memory::dims& operand_dims) { static std::string dims2str(const mkldnn::memory::dims& operand_dims) {
...@@ -251,5 +251,17 @@ inline mkldnn::memory::format MKLDNNFormatForSize( ...@@ -251,5 +251,17 @@ inline mkldnn::memory::format MKLDNNFormatForSize(
return data_format; return data_format;
} }
inline mkldnn::memory::format data_format_to_memory_format(
const std::string& data_format) {
switch (framework::StringToDataLayout(data_format)) {
case framework::DataLayout::kNHWC:
return mkldnn::memory::format::nhwc;
case framework::DataLayout::kNCHW:
return mkldnn::memory::format::nchw;
default:
return mkldnn::memory::format::any;
}
}
} // namespace platform } // namespace platform
} // namespace paddle } // namespace paddle
...@@ -394,8 +394,10 @@ All parameter, weight, gradient are variables in Paddle. ...@@ -394,8 +394,10 @@ All parameter, weight, gradient are variables in Paddle.
InferenceOptimize(*(origin.Proto()), &pruned_desc); InferenceOptimize(*(origin.Proto()), &pruned_desc);
return new ProgramDesc(pruned_desc); return new ProgramDesc(pruned_desc);
}); });
m.def("empty_var_name", []() { return framework::kEmptyVarName; }); m.def("empty_var_name",
m.def("grad_var_suffix", []() { return framework::kGradVarSuffix; }); []() { return std::string(framework::kEmptyVarName); });
m.def("grad_var_suffix",
[]() { return std::string(framework::kGradVarSuffix); });
m.def_submodule( m.def_submodule(
"var_names", "var_names",
"The module will return special predefined variable name in Paddle") "The module will return special predefined variable name in Paddle")
......
...@@ -419,6 +419,25 @@ EOF ...@@ -419,6 +419,25 @@ EOF
linkchecker doc/v2/en/html/index.html linkchecker doc/v2/en/html/index.html
linkchecker doc/v2/cn/html/index.html linkchecker doc/v2/cn/html/index.html
linkchecker doc/v2/api/en/html/index.html linkchecker doc/v2/api/en/html/index.html
if [[ "$TRAVIS_PULL_REQUEST" != "false" ]]; then exit 0; fi;
# Deploy to the the content server if its a "develop" or "release/version" branch
# The "develop_doc" branch is reserved to test full deploy process without impacting the real content.
if [ "$TRAVIS_BRANCH" == "develop_doc" ]; then
PPO_SCRIPT_BRANCH=develop
elif [[ "$TRAVIS_BRANCH" == "develop" || "$TRAVIS_BRANCH" =~ ^v|release/[[:digit:]]+\.[[:digit:]]+(\.[[:digit:]]+)?(-\S*)?$ ]]; then
PPO_SCRIPT_BRANCH=master
else
# Early exit, this branch doesn't require documentation build
return 0;
fi
# Fetch the paddlepaddle.org deploy_docs.sh from the appopriate branch
export DEPLOY_DOCS_SH=https://raw.githubusercontent.com/PaddlePaddle/PaddlePaddle.org/$PPO_SCRIPT_BRANCH/scripts/deploy/deploy_docs.sh
export PYTHONPATH=$PYTHONPATH:${PADDLE_ROOT}/build/python:/paddle/build/python
cd ..
curl $DEPLOY_DOCS_SH | bash -s $CONTENT_DEC_PASSWD $TRAVIS_BRANCH ${PADDLE_ROOT} ${PADDLE_ROOT}/build/doc/ ${PPO_SCRIPT_BRANCH}
cd -
} }
function gen_html() { function gen_html() {
......
...@@ -52,6 +52,9 @@ EOL ...@@ -52,6 +52,9 @@ EOL
${DOCKER_CMD} run -it \ ${DOCKER_CMD} run -it \
${DOCKER_ENV} \ ${DOCKER_ENV} \
-e SCRIPT_NAME=$0 \ -e SCRIPT_NAME=$0 \
-e CONTENT_DEC_PASSWD=$CONTENT_DEC_PASSWD \
-e TRAVIS_BRANCH=$TRAVIS_BRANCH \
-e TRAVIS_PULL_REQUEST=$TRAVIS_PULL_REQUEST \
-v $PADDLE_ROOT:/paddle \ -v $PADDLE_ROOT:/paddle \
-v ${HOME}/.ccache:/root/.ccache \ -v ${HOME}/.ccache:/root/.ccache \
-w /paddle \ -w /paddle \
......
...@@ -28,11 +28,12 @@ images per class. ...@@ -28,11 +28,12 @@ images per class.
""" """
import cPickle
import itertools import itertools
import numpy import numpy
import paddle.dataset.common import paddle.dataset.common
import tarfile import tarfile
from six.moves import zip
from six.moves import cPickle as pickle
__all__ = ['train100', 'test100', 'train10', 'test10', 'convert'] __all__ = ['train100', 'test100', 'train10', 'test10', 'convert']
...@@ -48,7 +49,7 @@ def reader_creator(filename, sub_name, cycle=False): ...@@ -48,7 +49,7 @@ def reader_creator(filename, sub_name, cycle=False):
data = batch['data'] data = batch['data']
labels = batch.get('labels', batch.get('fine_labels', None)) labels = batch.get('labels', batch.get('fine_labels', None))
assert labels is not None assert labels is not None
for sample, label in itertools.izip(data, labels): for sample, label in zip(data, labels):
yield (sample / 255.0).astype(numpy.float32), int(label) yield (sample / 255.0).astype(numpy.float32), int(label)
def reader(): def reader():
...@@ -58,7 +59,7 @@ def reader_creator(filename, sub_name, cycle=False): ...@@ -58,7 +59,7 @@ def reader_creator(filename, sub_name, cycle=False):
while True: while True:
for name in names: for name in names:
batch = cPickle.load(f.extractfile(name)) batch = pickle.load(f.extractfile(name))
for item in read_batch(batch): for item in read_batch(batch):
yield item yield item
if not cycle: if not cycle:
......
...@@ -20,9 +20,8 @@ import shutil ...@@ -20,9 +20,8 @@ import shutil
import sys import sys
import importlib import importlib
import paddle.dataset import paddle.dataset
import cPickle import six.moves.cPickle as pickle
import glob import glob
import cPickle as pickle
__all__ = [ __all__ = [
'DATA_HOME', 'DATA_HOME',
...@@ -75,13 +74,13 @@ def download(url, module_name, md5sum, save_name=None): ...@@ -75,13 +74,13 @@ def download(url, module_name, md5sum, save_name=None):
retry_limit = 3 retry_limit = 3
while not (os.path.exists(filename) and md5file(filename) == md5sum): while not (os.path.exists(filename) and md5file(filename) == md5sum):
if os.path.exists(filename): if os.path.exists(filename):
print "file md5", md5file(filename), md5sum print("file md5", md5file(filename), md5sum)
if retry < retry_limit: if retry < retry_limit:
retry += 1 retry += 1
else: else:
raise RuntimeError("Cannot download {0} within retry limit {1}". raise RuntimeError("Cannot download {0} within retry limit {1}".
format(url, retry_limit)) format(url, retry_limit))
print "Cache file %s not found, downloading %s" % (filename, url) print("Cache file %s not found, downloading %s" % (filename, url))
r = requests.get(url, stream=True) r = requests.get(url, stream=True)
total_length = r.headers.get('content-length') total_length = r.headers.get('content-length')
...@@ -104,8 +103,9 @@ def download(url, module_name, md5sum, save_name=None): ...@@ -104,8 +103,9 @@ def download(url, module_name, md5sum, save_name=None):
def fetch_all(): def fetch_all():
for module_name in filter(lambda x: not x.startswith("__"), for module_name in [
dir(paddle.dataset)): x for x in dir(paddle.dataset) if not x.startswith("__")
]:
if "fetch" in dir( if "fetch" in dir(
importlib.import_module("paddle.dataset.%s" % module_name)): importlib.import_module("paddle.dataset.%s" % module_name)):
getattr( getattr(
...@@ -114,8 +114,9 @@ def fetch_all(): ...@@ -114,8 +114,9 @@ def fetch_all():
def fetch_all_recordio(path): def fetch_all_recordio(path):
for module_name in filter(lambda x: not x.startswith("__"), for module_name in [
dir(paddle.dataset)): x for x in dir(paddle.dataset) if not x.startswith("__")
]:
if "convert" in dir( if "convert" in dir(
importlib.import_module("paddle.dataset.%s" % module_name)) and \ importlib.import_module("paddle.dataset.%s" % module_name)) and \
not module_name == "common": not module_name == "common":
...@@ -126,7 +127,7 @@ def fetch_all_recordio(path): ...@@ -126,7 +127,7 @@ def fetch_all_recordio(path):
"convert")(ds_path) "convert")(ds_path)
def split(reader, line_count, suffix="%05d.pickle", dumper=cPickle.dump): def split(reader, line_count, suffix="%05d.pickle", dumper=pickle.dump):
""" """
you can call the function as: you can call the function as:
...@@ -167,7 +168,7 @@ def split(reader, line_count, suffix="%05d.pickle", dumper=cPickle.dump): ...@@ -167,7 +168,7 @@ def split(reader, line_count, suffix="%05d.pickle", dumper=cPickle.dump):
def cluster_files_reader(files_pattern, def cluster_files_reader(files_pattern,
trainer_count, trainer_count,
trainer_id, trainer_id,
loader=cPickle.load): loader=pickle.load):
""" """
Create a reader that yield element from the given files, select Create a reader that yield element from the given files, select
a file set according trainer count and trainer_id a file set according trainer count and trainer_id
...@@ -188,7 +189,7 @@ def cluster_files_reader(files_pattern, ...@@ -188,7 +189,7 @@ def cluster_files_reader(files_pattern,
my_file_list = [] my_file_list = []
for idx, fn in enumerate(file_list): for idx, fn in enumerate(file_list):
if idx % trainer_count == trainer_id: if idx % trainer_count == trainer_id:
print "append file: %s" % fn print("append file: %s" % fn)
my_file_list.append(fn) my_file_list.append(fn)
for fn in my_file_list: for fn in my_file_list:
with open(fn, "r") as f: with open(fn, "r") as f:
...@@ -221,7 +222,7 @@ def convert(output_path, reader, line_count, name_prefix): ...@@ -221,7 +222,7 @@ def convert(output_path, reader, line_count, name_prefix):
for l in lines: for l in lines:
# FIXME(Yancey1989): # FIXME(Yancey1989):
# dumps with protocol: pickle.HIGHEST_PROTOCOL # dumps with protocol: pickle.HIGHEST_PROTOCOL
writer.write(cPickle.dumps(l)) writer.write(pickle.dumps(l))
writer.close() writer.close()
lines = [] lines = []
......
...@@ -24,18 +24,19 @@ import tarfile ...@@ -24,18 +24,19 @@ import tarfile
import gzip import gzip
import itertools import itertools
import paddle.dataset.common import paddle.dataset.common
from six.moves import zip
__all__ = ['test, get_dict', 'get_embedding', 'convert'] __all__ = ['test, get_dict', 'get_embedding', 'convert']
DATA_URL = 'http://www.cs.upc.edu/~srlconll/conll05st-tests.tar.gz' DATA_URL = 'http://www.cs.upc.edu/~srlconll/conll05st-tests.tar.gz'
DATA_MD5 = '387719152ae52d60422c016e92a742fc' DATA_MD5 = '387719152ae52d60422c016e92a742fc'
WORDDICT_URL = 'http://paddlepaddle.bj.bcebos.com/demo/srl_dict_and_embedding/wordDict.txt' WORDDICT_URL = 'http://paddlemodels.bj.bcebos.com/conll05st%2FwordDict.txt'
WORDDICT_MD5 = 'ea7fb7d4c75cc6254716f0177a506baa' WORDDICT_MD5 = 'ea7fb7d4c75cc6254716f0177a506baa'
VERBDICT_URL = 'http://paddlepaddle.bj.bcebos.com/demo/srl_dict_and_embedding/verbDict.txt' VERBDICT_URL = 'http://paddlemodels.bj.bcebos.com/conll05st%2FverbDict.txt'
VERBDICT_MD5 = '0d2977293bbb6cbefab5b0f97db1e77c' VERBDICT_MD5 = '0d2977293bbb6cbefab5b0f97db1e77c'
TRGDICT_URL = 'http://paddlepaddle.bj.bcebos.com/demo/srl_dict_and_embedding/targetDict.txt' TRGDICT_URL = 'http://paddlemodels.bj.bcebos.com/conll05st%2FtargetDict.txt'
TRGDICT_MD5 = 'd8c7f03ceb5fc2e5a0fa7503a4353751' TRGDICT_MD5 = 'd8c7f03ceb5fc2e5a0fa7503a4353751'
EMB_URL = 'http://paddlepaddle.bj.bcebos.com/demo/srl_dict_and_embedding/emb' EMB_URL = 'http://paddlemodels.bj.bcebos.com/conll05st%2Femb'
EMB_MD5 = 'bf436eb0faa1f6f9103017f8be57cdb7' EMB_MD5 = 'bf436eb0faa1f6f9103017f8be57cdb7'
UNK_IDX = 0 UNK_IDX = 0
...@@ -87,12 +88,12 @@ def corpus_reader(data_path, words_name, props_name): ...@@ -87,12 +88,12 @@ def corpus_reader(data_path, words_name, props_name):
sentences = [] sentences = []
labels = [] labels = []
one_seg = [] one_seg = []
for word, label in itertools.izip(words_file, props_file): for word, label in zip(words_file, props_file):
word = word.strip() word = word.strip()
label = label.strip().split() label = label.strip().split()
if len(label) == 0: # end of sentence if len(label) == 0: # end of sentence
for i in xrange(len(one_seg[0])): for i in range(len(one_seg[0])):
a_kind_lable = [x[i] for x in one_seg] a_kind_lable = [x[i] for x in one_seg]
labels.append(a_kind_lable) labels.append(a_kind_lable)
......
...@@ -28,10 +28,9 @@ Graphics and Image Processing (2008) ...@@ -28,10 +28,9 @@ Graphics and Image Processing (2008)
http://www.robots.ox.ac.uk/~vgg/publications/papers/nilsback08.{pdf,ps.gz}. http://www.robots.ox.ac.uk/~vgg/publications/papers/nilsback08.{pdf,ps.gz}.
""" """
import cPickle
import itertools import itertools
import functools import functools
from common import download from .common import download
import tarfile import tarfile
import scipy.io as scio import scipy.io as scio
from paddle.dataset.image import * from paddle.dataset.image import *
...@@ -39,6 +38,8 @@ from paddle.reader import * ...@@ -39,6 +38,8 @@ from paddle.reader import *
import os import os
import numpy as np import numpy as np
from multiprocessing import cpu_count from multiprocessing import cpu_count
from six.moves import cPickle as pickle
from six.moves import zip
__all__ = ['train', 'test', 'valid'] __all__ = ['train', 'test', 'valid']
DATA_URL = 'http://www.robots.ox.ac.uk/~vgg/data/flowers/102/102flowers.tgz' DATA_URL = 'http://www.robots.ox.ac.uk/~vgg/data/flowers/102/102flowers.tgz'
...@@ -116,10 +117,10 @@ def reader_creator(data_file, ...@@ -116,10 +117,10 @@ def reader_creator(data_file,
file = file.strip() file = file.strip()
batch = None batch = None
with open(file, 'r') as f: with open(file, 'r') as f:
batch = cPickle.load(f) batch = pickle.load(f)
data = batch['data'] data = batch['data']
labels = batch['label'] labels = batch['label']
for sample, label in itertools.izip(data, batch['label']): for sample, label in zip(data, batch['label']):
yield sample, int(label) - 1 yield sample, int(label) - 1
if not cycle: if not cycle:
break break
......
...@@ -36,7 +36,7 @@ except ImportError: ...@@ -36,7 +36,7 @@ except ImportError:
cv2 = None cv2 = None
import os import os
import tarfile import tarfile
import cPickle import six.moves.cPickle as pickle
__all__ = [ __all__ = [
"load_image_bytes", "load_image", "resize_short", "to_chw", "center_crop", "load_image_bytes", "load_image", "resize_short", "to_chw", "center_crop",
...@@ -86,10 +86,10 @@ def batch_images_from_tar(data_file, ...@@ -86,10 +86,10 @@ def batch_images_from_tar(data_file,
output = {} output = {}
output['label'] = labels output['label'] = labels
output['data'] = data output['data'] = data
cPickle.dump( pickle.dump(
output, output,
open('%s/batch_%d' % (out_path, file_id), 'w'), open('%s/batch_%d' % (out_path, file_id), 'w'),
protocol=cPickle.HIGHEST_PROTOCOL) protocol=pickle.HIGHEST_PROTOCOL)
file_id += 1 file_id += 1
data = [] data = []
labels = [] labels = []
...@@ -97,10 +97,10 @@ def batch_images_from_tar(data_file, ...@@ -97,10 +97,10 @@ def batch_images_from_tar(data_file,
output = {} output = {}
output['label'] = labels output['label'] = labels
output['data'] = data output['data'] = data
cPickle.dump( pickle.dump(
output, output,
open('%s/batch_%d' % (out_path, file_id), 'w'), open('%s/batch_%d' % (out_path, file_id), 'w'),
protocol=cPickle.HIGHEST_PROTOCOL) protocol=pickle.HIGHEST_PROTOCOL)
with open(meta_file, 'a') as meta: with open(meta_file, 'a') as meta:
for file in os.listdir(out_path): for file in os.listdir(out_path):
......
...@@ -42,13 +42,13 @@ def tokenize(pattern): ...@@ -42,13 +42,13 @@ def tokenize(pattern):
# sequential access of member files, other than # sequential access of member files, other than
# tarfile.extractfile, which does random access and might # tarfile.extractfile, which does random access and might
# destroy hard disks. # destroy hard disks.
tf = tarf.next() tf = next(tarf)
while tf != None: while tf != None:
if bool(pattern.match(tf.name)): if bool(pattern.match(tf.name)):
# newline and punctuations removal and ad-hoc tokenization. # newline and punctuations removal and ad-hoc tokenization.
yield tarf.extractfile(tf).read().rstrip("\n\r").translate( yield tarf.extractfile(tf).read().rstrip("\n\r").translate(
None, string.punctuation).lower().split() None, string.punctuation).lower().split()
tf = tarf.next() tf = next(tarf)
def build_dict(pattern, cutoff): def build_dict(pattern, cutoff):
...@@ -62,11 +62,11 @@ def build_dict(pattern, cutoff): ...@@ -62,11 +62,11 @@ def build_dict(pattern, cutoff):
word_freq[word] += 1 word_freq[word] += 1
# Not sure if we should prune less-frequent words here. # Not sure if we should prune less-frequent words here.
word_freq = filter(lambda x: x[1] > cutoff, word_freq.items()) word_freq = [x for x in list(word_freq.items()) if x[1] > cutoff]
dictionary = sorted(word_freq, key=lambda x: (-x[1], x[0])) dictionary = sorted(word_freq, key=lambda x: (-x[1], x[0]))
words, _ = list(zip(*dictionary)) words, _ = list(zip(*dictionary))
word_idx = dict(zip(words, xrange(len(words)))) word_idx = dict(list(zip(words, list(range(len(words))))))
word_idx['<unk>'] = len(words) word_idx['<unk>'] = len(words)
return word_idx return word_idx
......
...@@ -64,11 +64,11 @@ def build_dict(min_word_freq=50): ...@@ -64,11 +64,11 @@ def build_dict(min_word_freq=50):
# remove <unk> for now, since we will set it as last index # remove <unk> for now, since we will set it as last index
del word_freq['<unk>'] del word_freq['<unk>']
word_freq = filter(lambda x: x[1] > min_word_freq, word_freq.items()) word_freq = [x for x in list(word_freq.items()) if x[1] > min_word_freq]
word_freq_sorted = sorted(word_freq, key=lambda x: (-x[1], x[0])) word_freq_sorted = sorted(word_freq, key=lambda x: (-x[1], x[0]))
words, _ = list(zip(*word_freq_sorted)) words, _ = list(zip(*word_freq_sorted))
word_idx = dict(zip(words, xrange(len(words)))) word_idx = dict(list(zip(words, list(range(len(words))))))
word_idx['<unk>'] = len(words) word_idx['<unk>'] = len(words)
return word_idx return word_idx
......
...@@ -65,7 +65,7 @@ def reader_creator(image_filename, label_filename, buffer_size): ...@@ -65,7 +65,7 @@ def reader_creator(image_filename, label_filename, buffer_size):
images = images / 255.0 * 2.0 - 1.0 images = images / 255.0 * 2.0 - 1.0
for i in xrange(buffer_size): for i in range(buffer_size):
yield images[i, :], int(labels[i]) yield images[i, :], int(labels[i])
finally: finally:
try: try:
......
...@@ -16,7 +16,7 @@ Movielens 1-M dataset. ...@@ -16,7 +16,7 @@ Movielens 1-M dataset.
Movielens 1-M dataset contains 1 million ratings from 6000 users on 4000 Movielens 1-M dataset contains 1 million ratings from 6000 users on 4000
movies, which was collected by GroupLens Research. This module will download movies, which was collected by GroupLens Research. This module will download
Movielens 1-M dataset from Movielens 1-M dataset from
http://files.grouplens.org/datasets/movielens/ml-1m.zip and parse training http://files.grouplens.org/datasets/movielens/ml-1m.zip and parse training
set and test set into paddle reader creators. set and test set into paddle reader creators.
...@@ -187,7 +187,7 @@ def max_movie_id(): ...@@ -187,7 +187,7 @@ def max_movie_id():
Get the maximum value of movie id. Get the maximum value of movie id.
""" """
__initialize_meta_info__() __initialize_meta_info__()
return reduce(__max_index_info__, MOVIE_INFO.viewvalues()).index return reduce(__max_index_info__, list(MOVIE_INFO.values())).index
def max_user_id(): def max_user_id():
...@@ -195,7 +195,7 @@ def max_user_id(): ...@@ -195,7 +195,7 @@ def max_user_id():
Get the maximum value of user id. Get the maximum value of user id.
""" """
__initialize_meta_info__() __initialize_meta_info__()
return reduce(__max_index_info__, USER_INFO.viewvalues()).index return reduce(__max_index_info__, list(USER_INFO.values())).index
def __max_job_id_impl__(a, b): def __max_job_id_impl__(a, b):
...@@ -210,7 +210,7 @@ def max_job_id(): ...@@ -210,7 +210,7 @@ def max_job_id():
Get the maximum value of job id. Get the maximum value of job id.
""" """
__initialize_meta_info__() __initialize_meta_info__()
return reduce(__max_job_id_impl__, USER_INFO.viewvalues()).job_id return reduce(__max_job_id_impl__, list(USER_INFO.values())).job_id
def movie_categories(): def movie_categories():
...@@ -243,7 +243,7 @@ def unittest(): ...@@ -243,7 +243,7 @@ def unittest():
for test_count, _ in enumerate(test()()): for test_count, _ in enumerate(test()()):
pass pass
print train_count, test_count print(train_count, test_count)
def fetch(): def fetch():
......
...@@ -26,7 +26,7 @@ http://research.microsoft.com/en-us/um/beijing/projects/letor/LETOR4.0/Data/MQ20 ...@@ -26,7 +26,7 @@ http://research.microsoft.com/en-us/um/beijing/projects/letor/LETOR4.0/Data/MQ20
import os import os
import functools import functools
import rarfile import rarfile
from common import download from .common import download
import numpy as np import numpy as np
# URL = "http://research.microsoft.com/en-us/um/beijing/projects/letor/LETOR4.0/Data/MQ2007.rar" # URL = "http://research.microsoft.com/en-us/um/beijing/projects/letor/LETOR4.0/Data/MQ2007.rar"
...@@ -53,7 +53,7 @@ class Query(object): ...@@ -53,7 +53,7 @@ class Query(object):
---------- ----------
query_id : int query_id : int
query_id in dataset, mapping from query to relevance documents query_id in dataset, mapping from query to relevance documents
relevance_score : int relevance_score : int
relevance score of query and document pair relevance score of query and document pair
feature_vector : array, dense feature feature_vector : array, dense feature
feature in vector format feature in vector format
...@@ -92,7 +92,7 @@ class Query(object): ...@@ -92,7 +92,7 @@ class Query(object):
sys.stdout.write("expect 48 space split parts, get %d" % sys.stdout.write("expect 48 space split parts, get %d" %
(len(parts))) (len(parts)))
return None return None
# format : 0 qid:10 1:0.000272 2:0.000000 .... # format : 0 qid:10 1:0.000272 2:0.000000 ....
self.relevance_score = int(parts[0]) self.relevance_score = int(parts[0])
self.query_id = int(parts[1].split(':')[1]) self.query_id = int(parts[1].split(':')[1])
for p in parts[2:]: for p in parts[2:]:
...@@ -295,7 +295,7 @@ def __reader__(filepath, format="pairwise", shuffle=False, fill_missing=-1): ...@@ -295,7 +295,7 @@ def __reader__(filepath, format="pairwise", shuffle=False, fill_missing=-1):
-------- --------
filename : string filename : string
fill_missing : fill the missing value. default in MQ2007 is -1 fill_missing : fill the missing value. default in MQ2007 is -1
Returns Returns
------ ------
yield yield
...@@ -330,4 +330,4 @@ if __name__ == "__main__": ...@@ -330,4 +330,4 @@ if __name__ == "__main__":
mytest = functools.partial( mytest = functools.partial(
__reader__, filepath="MQ2007/MQ2007/Fold1/sample", format="listwise") __reader__, filepath="MQ2007/MQ2007/Fold1/sample", format="listwise")
for label, query in mytest(): for label, query in mytest():
print label, query print(label, query)
...@@ -43,11 +43,11 @@ def download_data_if_not_yet(): ...@@ -43,11 +43,11 @@ def download_data_if_not_yet():
nltk.data.path.append(paddle.dataset.common.DATA_HOME) nltk.data.path.append(paddle.dataset.common.DATA_HOME)
movie_reviews.categories() movie_reviews.categories()
except LookupError: except LookupError:
print "Downloading movie_reviews data set, please wait....." print("Downloading movie_reviews data set, please wait.....")
nltk.download( nltk.download(
'movie_reviews', download_dir=paddle.dataset.common.DATA_HOME) 'movie_reviews', download_dir=paddle.dataset.common.DATA_HOME)
print "Download data set success....." print("Download data set success.....")
print "Path is " + nltk.data.find('corpora/movie_reviews').path print("Path is " + nltk.data.find('corpora/movie_reviews').path)
def get_word_dict(): def get_word_dict():
...@@ -64,7 +64,7 @@ def get_word_dict(): ...@@ -64,7 +64,7 @@ def get_word_dict():
for field in movie_reviews.fileids(category): for field in movie_reviews.fileids(category):
for words in movie_reviews.words(field): for words in movie_reviews.words(field):
word_freq_dict[words] += 1 word_freq_dict[words] += 1
words_sort_list = word_freq_dict.items() words_sort_list = list(word_freq_dict.items())
words_sort_list.sort(cmp=lambda a, b: b[1] - a[1]) words_sort_list.sort(cmp=lambda a, b: b[1] - a[1])
for index, word in enumerate(words_sort_list): for index, word in enumerate(words_sort_list):
words_freq_sorted.append((word[0], index)) words_freq_sorted.append((word[0], index))
...@@ -80,7 +80,8 @@ def sort_files(): ...@@ -80,7 +80,8 @@ def sort_files():
files_list = list() files_list = list()
neg_file_list = movie_reviews.fileids('neg') neg_file_list = movie_reviews.fileids('neg')
pos_file_list = movie_reviews.fileids('pos') pos_file_list = movie_reviews.fileids('pos')
files_list = list(chain.from_iterable(zip(neg_file_list, pos_file_list))) files_list = list(
chain.from_iterable(list(zip(neg_file_list, pos_file_list))))
return files_list return files_list
......
...@@ -36,7 +36,7 @@ class TestCommon(unittest.TestCase): ...@@ -36,7 +36,7 @@ class TestCommon(unittest.TestCase):
def test_split(self): def test_split(self):
def test_reader(): def test_reader():
def reader(): def reader():
for x in xrange(10): for x in range(10):
yield x yield x
return reader return reader
...@@ -49,7 +49,7 @@ class TestCommon(unittest.TestCase): ...@@ -49,7 +49,7 @@ class TestCommon(unittest.TestCase):
def test_cluster_file_reader(self): def test_cluster_file_reader(self):
_, temp_path = tempfile.mkstemp() _, temp_path = tempfile.mkstemp()
for x in xrange(5): for x in range(5):
with open(temp_path + '/%05d.test' % x) as f: with open(temp_path + '/%05d.test' % x) as f:
f.write('%d\n' % x) f.write('%d\n' % x)
reader = paddle.dataset.common.cluster_files_reader( reader = paddle.dataset.common.cluster_files_reader(
...@@ -63,7 +63,7 @@ class TestCommon(unittest.TestCase): ...@@ -63,7 +63,7 @@ class TestCommon(unittest.TestCase):
def test_reader(): def test_reader():
def reader(): def reader():
for x in xrange(record_num): for x in range(record_num):
yield x yield x
return reader return reader
......
...@@ -59,7 +59,7 @@ class TestMikolov(unittest.TestCase): ...@@ -59,7 +59,7 @@ class TestMikolov(unittest.TestCase):
self.assertEqual(first_line, read_line) self.assertEqual(first_line, read_line)
def test_total(self): def test_total(self):
_, idx = zip(*WORD_DICT.items()) _, idx = list(zip(*list(WORD_DICT.items())))
self.assertEqual(sorted(idx)[-1], len(WORD_DICT) - 1) self.assertEqual(sorted(idx)[-1], len(WORD_DICT) - 1)
......
...@@ -24,9 +24,8 @@ from nltk.corpus import movie_reviews ...@@ -24,9 +24,8 @@ from nltk.corpus import movie_reviews
class TestSentimentMethods(unittest.TestCase): class TestSentimentMethods(unittest.TestCase):
def test_get_word_dict(self): def test_get_word_dict(self):
word_dict = st.get_word_dict()[0:10] word_dict = st.get_word_dict()[0:10]
test_word_list = [(u',', 0), (u'the', 1), (u'.', 2), (u'a', 3), test_word_list = [(',', 0), ('the', 1), ('.', 2), ('a', 3), ('and', 4),
(u'and', 4), (u'of', 5), (u'to', 6), (u"'", 7), ('of', 5), ('to', 6), ("'", 7), ('is', 8), ('in', 9)]
(u'is', 8), (u'in', 9)]
for idx, each in enumerate(word_dict): for idx, each in enumerate(word_dict):
self.assertEqual(each, test_word_list[idx]) self.assertEqual(each, test_word_list[idx])
self.assertTrue("/root/.cache/paddle/dataset" in nltk.data.path) self.assertTrue("/root/.cache/paddle/dataset" in nltk.data.path)
......
...@@ -49,9 +49,12 @@ def feature_range(maximums, minimums): ...@@ -49,9 +49,12 @@ def feature_range(maximums, minimums):
import matplotlib.pyplot as plt import matplotlib.pyplot as plt
fig, ax = plt.subplots() fig, ax = plt.subplots()
feature_num = len(maximums) feature_num = len(maximums)
ax.bar(range(feature_num), maximums - minimums, color='r', align='center') ax.bar(list(range(feature_num)),
maximums - minimums,
color='r',
align='center')
ax.set_title('feature scale') ax.set_title('feature scale')
plt.xticks(range(feature_num), feature_names) plt.xticks(list(range(feature_num)), feature_names)
plt.xlim([-1, feature_num]) plt.xlim([-1, feature_num])
fig.set_figheight(6) fig.set_figheight(6)
fig.set_figwidth(10) fig.set_figwidth(10)
...@@ -71,7 +74,7 @@ def load_data(filename, feature_num=14, ratio=0.8): ...@@ -71,7 +74,7 @@ def load_data(filename, feature_num=14, ratio=0.8):
maximums, minimums, avgs = data.max(axis=0), data.min(axis=0), data.sum( maximums, minimums, avgs = data.max(axis=0), data.min(axis=0), data.sum(
axis=0) / data.shape[0] axis=0) / data.shape[0]
feature_range(maximums[:-1], minimums[:-1]) feature_range(maximums[:-1], minimums[:-1])
for i in xrange(feature_num - 1): for i in range(feature_num - 1):
data[:, i] = (data[:, i] - avgs[i]) / (maximums[i] - minimums[i]) data[:, i] = (data[:, i] - avgs[i]) / (maximums[i] - minimums[i])
offset = int(data.shape[0] * ratio) offset = int(data.shape[0] * ratio)
UCI_TRAIN_DATA = data[:offset] UCI_TRAIN_DATA = data[:offset]
......
...@@ -40,7 +40,7 @@ URL_TRAIN = ('http://paddlepaddle.cdn.bcebos.com/demo/' ...@@ -40,7 +40,7 @@ URL_TRAIN = ('http://paddlepaddle.cdn.bcebos.com/demo/'
'wmt_shrinked_data/wmt14.tgz') 'wmt_shrinked_data/wmt14.tgz')
MD5_TRAIN = '0791583d57d5beb693b9414c5b36798c' MD5_TRAIN = '0791583d57d5beb693b9414c5b36798c'
# BLEU of this trained model is 26.92 # BLEU of this trained model is 26.92
URL_MODEL = 'http://paddlepaddle.bj.bcebos.com/demo/wmt_14/wmt14_model.tar.gz' URL_MODEL = 'http://paddlemodels.bj.bcebos.com/wmt%2Fwmt14.tgz'
MD5_MODEL = '0cb4a5366189b6acba876491c8724fa3' MD5_MODEL = '0cb4a5366189b6acba876491c8724fa3'
START = "<s>" START = "<s>"
...@@ -154,8 +154,8 @@ def get_dict(dict_size, reverse=True): ...@@ -154,8 +154,8 @@ def get_dict(dict_size, reverse=True):
tar_file = paddle.dataset.common.download(URL_TRAIN, 'wmt14', MD5_TRAIN) tar_file = paddle.dataset.common.download(URL_TRAIN, 'wmt14', MD5_TRAIN)
src_dict, trg_dict = __read_to_dict(tar_file, dict_size) src_dict, trg_dict = __read_to_dict(tar_file, dict_size)
if reverse: if reverse:
src_dict = {v: k for k, v in src_dict.items()} src_dict = {v: k for k, v in list(src_dict.items())}
trg_dict = {v: k for k, v in trg_dict.items()} trg_dict = {v: k for k, v in list(trg_dict.items())}
return src_dict, trg_dict return src_dict, trg_dict
......
...@@ -70,7 +70,9 @@ def __build_dict(tar_file, dict_size, save_path, lang): ...@@ -70,7 +70,9 @@ def __build_dict(tar_file, dict_size, save_path, lang):
fout.write("%s\n%s\n%s\n" % (START_MARK, END_MARK, UNK_MARK)) fout.write("%s\n%s\n%s\n" % (START_MARK, END_MARK, UNK_MARK))
for idx, word in enumerate( for idx, word in enumerate(
sorted( sorted(
word_dict.iteritems(), key=lambda x: x[1], reverse=True)): iter(list(word_dict.items())),
key=lambda x: x[1],
reverse=True)):
if idx + 3 == dict_size: break if idx + 3 == dict_size: break
fout.write("%s\n" % (word[0])) fout.write("%s\n" % (word[0]))
......
...@@ -14,49 +14,49 @@ ...@@ -14,49 +14,49 @@
from __future__ import print_function from __future__ import print_function
# import all class inside framework into fluid module # import all class inside framework into fluid module
import framework from . import framework
from framework import * from .framework import *
# import all class inside executor into fluid module # import all class inside executor into fluid module
import executor from . import executor
from executor import * from .executor import *
import trainer from . import trainer
from trainer import Trainer from .trainer import Trainer
from trainer import BeginEpochEvent from .trainer import BeginEpochEvent
from trainer import EndEpochEvent from .trainer import EndEpochEvent
from trainer import BeginStepEvent from .trainer import BeginStepEvent
from trainer import EndStepEvent from .trainer import EndStepEvent
from trainer import CheckpointConfig from .trainer import CheckpointConfig
import inferencer from . import inferencer
from inferencer import Inferencer from .inferencer import Inferencer
import io from . import io
import evaluator from . import evaluator
import initializer from . import initializer
import layers from . import layers
import contrib from . import contrib
import nets from . import nets
import optimizer from . import optimizer
import backward from . import backward
import regularizer from . import regularizer
import average from . import average
import metrics from . import metrics
import transpiler from . import transpiler
from param_attr import ParamAttr, WeightNormParamAttr from .param_attr import ParamAttr, WeightNormParamAttr
from data_feeder import DataFeeder from .data_feeder import DataFeeder
from core import LoDTensor, LoDTensorArray, CPUPlace, CUDAPlace, CUDAPinnedPlace, Scope from .core import LoDTensor, LoDTensorArray, CPUPlace, CUDAPlace, CUDAPinnedPlace, Scope
from transpiler import DistributeTranspiler, InferenceTranspiler, \ from .transpiler import DistributeTranspiler, InferenceTranspiler, \
memory_optimize, release_memory, DistributeTranspilerConfig memory_optimize, release_memory, DistributeTranspilerConfig
from concurrency import (Go, make_channel, channel_send, channel_recv, from .concurrency import (Go, make_channel, channel_send, channel_recv,
channel_close, Select) channel_close, Select)
from lod_tensor import create_lod_tensor, create_random_int_lodtensor from .lod_tensor import create_lod_tensor, create_random_int_lodtensor
import clip from . import clip
import profiler from . import profiler
import unique_name from . import unique_name
import recordio_writer from . import recordio_writer
import parallel_executor from . import parallel_executor
from parallel_executor import * from .parallel_executor import *
from paddle.fluid.layers.math_op_patch import monkey_patch_variable from paddle.fluid.layers.math_op_patch import monkey_patch_variable
Tensor = LoDTensor Tensor = LoDTensor
...@@ -99,8 +99,8 @@ def __bootstrap__(): ...@@ -99,8 +99,8 @@ def __bootstrap__():
None None
""" """
import sys import sys
import core
import os import os
from . import core
in_test = 'unittest' in sys.modules in_test = 'unittest' in sys.modules
...@@ -123,7 +123,8 @@ def __bootstrap__(): ...@@ -123,7 +123,8 @@ def __bootstrap__():
read_env_flags = [ read_env_flags = [
'use_pinned_memory', 'check_nan_inf', 'benchmark', 'warpctc_dir', 'use_pinned_memory', 'check_nan_inf', 'benchmark', 'warpctc_dir',
'eager_delete_scope', 'use_mkldnn', 'initial_cpu_memory_in_mb', 'eager_delete_scope', 'use_mkldnn', 'initial_cpu_memory_in_mb',
'init_allocated_mem', 'free_idle_memory', 'paddle_num_threads' 'init_allocated_mem', 'free_idle_memory', 'paddle_num_threads',
'cpu_deterministic'
] ]
if core.is_compiled_with_dist(): if core.is_compiled_with_dist():
read_env_flags.append('rpc_deadline') read_env_flags.append('rpc_deadline')
......
...@@ -12,6 +12,7 @@ ...@@ -12,6 +12,7 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
from __future__ import print_function
import functools import functools
import sys import sys
...@@ -28,7 +29,7 @@ def deprecated(since, instead, extra_message=""): ...@@ -28,7 +29,7 @@ def deprecated(since, instead, extra_message=""):
@functools.wraps(func) @functools.wraps(func)
def wrapper(*args, **kwargs): def wrapper(*args, **kwargs):
print >> sys.stderr, err_msg print(err_msg, file=sys.stderr)
return func(*args, **kwargs) return func(*args, **kwargs)
wrapper.__doc__ += "\n " wrapper.__doc__ += "\n "
......
...@@ -16,7 +16,8 @@ from paddle.fluid import framework as framework ...@@ -16,7 +16,8 @@ from paddle.fluid import framework as framework
from . import core from . import core
import collections import collections
import copy import copy
import unique_name import six
from . import unique_name
__all__ = ['append_backward'] __all__ = ['append_backward']
...@@ -44,17 +45,25 @@ def _create_op_desc_(op_type, inputs, outputs, attrs): ...@@ -44,17 +45,25 @@ def _create_op_desc_(op_type, inputs, outputs, attrs):
""" """
op_desc = core.OpDesc() op_desc = core.OpDesc()
op_desc.set_type(op_type) op_desc.set_type(op_type)
for para, args in inputs.iteritems(): for para, args in list(inputs.items()):
op_desc.set_input(para, args) op_desc.set_input(
for para, args in outputs.iteritems(): para,
op_desc.set_output(para, args) list(
map(lambda arg: arg.decode() if isinstance(arg, six.binary_type) else arg,
args)))
for para, args in list(outputs.items()):
op_desc.set_output(
para,
list(
map(lambda arg: arg.decode() if isinstance(arg, six.binary_type) else arg,
args)))
op_role_attr_name = core.op_proto_and_checker_maker.kOpRoleAttrName() op_role_attr_name = core.op_proto_and_checker_maker.kOpRoleAttrName()
if op_role_attr_name not in attrs: if op_role_attr_name not in attrs:
attrs[ attrs[
op_role_attr_name] = core.op_proto_and_checker_maker.OpRole.Backward op_role_attr_name] = core.op_proto_and_checker_maker.OpRole.Backward
for name, val in attrs.iteritems(): for name, val in list(attrs.items()):
if isinstance(val, framework.Block): if isinstance(val, framework.Block):
op_desc.set_block_attr(name, val.desc) op_desc.set_block_attr(name, val.desc)
else: else:
...@@ -105,7 +114,9 @@ def _strip_grad_suffix_(name): ...@@ -105,7 +114,9 @@ def _strip_grad_suffix_(name):
e.g. x@GRAD ==> x e.g. x@GRAD ==> x
y@GRAD@RENAME@1 ==> y y@GRAD@RENAME@1 ==> y
""" """
pos = name.find(core.grad_var_suffix()) if isinstance(name, six.text_type):
name = name.encode()
pos = name.find(six.b(core.grad_var_suffix()))
return name[:pos] if pos != -1 else name return name[:pos] if pos != -1 else name
...@@ -114,7 +125,9 @@ def _append_grad_suffix_(name): ...@@ -114,7 +125,9 @@ def _append_grad_suffix_(name):
Append grad suffix to the given variable name Append grad suffix to the given variable name
e.g. x ==> x@GRAD e.g. x ==> x@GRAD
""" """
return name + core.grad_var_suffix() if isinstance(name, six.text_type):
name = name.encode()
return name + six.b(core.grad_var_suffix())
def _addup_repetitive_outputs_(op_descs): def _addup_repetitive_outputs_(op_descs):
...@@ -174,7 +187,7 @@ def _addup_repetitive_outputs_(op_descs): ...@@ -174,7 +187,7 @@ def _addup_repetitive_outputs_(op_descs):
op_desc.set_output(param_name, arg_names) op_desc.set_output(param_name, arg_names)
renamed_vars[var_name].append(new_name) renamed_vars[var_name].append(new_name)
for var_name, inputs in renamed_vars.iteritems(): for var_name, inputs in list(renamed_vars.items()):
if len(inputs) > 1: if len(inputs) > 1:
pending_sum_ops.append( pending_sum_ops.append(
(_create_op_desc_("sum", {"X": inputs}, {"Out": [var_name]}, (_create_op_desc_("sum", {"X": inputs}, {"Out": [var_name]},
...@@ -198,16 +211,19 @@ def _remove_no_grad_branch_(op_descs, no_grad_set): ...@@ -198,16 +211,19 @@ def _remove_no_grad_branch_(op_descs, no_grad_set):
out_arg_names = op_desc.output_arg_names() out_arg_names = op_desc.output_arg_names()
if len(out_arg_names) == 0 or _all_in_set_(out_arg_names, no_grad_set): if len(out_arg_names) == 0 or _all_in_set_(out_arg_names, no_grad_set):
return True return True
if _all_in_set_( if _all_in_set_([
filter(lambda name: name.find(core.grad_var_suffix()) != -1, name for name in op_desc.input_arg_names()
op_desc.input_arg_names()), no_grad_set): if name.find(core.grad_var_suffix()) != -1
], no_grad_set):
no_grad_set.update(out_arg_names) no_grad_set.update(out_arg_names)
return True return True
return False return False
# Remove ops whose outputs are all in no_grad_dict # Remove ops whose outputs are all in no_grad_dict
op_descs = filter( op_descs = [
lambda op_desc: not _op_can_be_removed_(op_desc, no_grad_set), op_descs) op_desc for op_desc in op_descs
if not _op_can_be_removed_(op_desc, no_grad_set)
]
# Insert fill_zeros_like_op # Insert fill_zeros_like_op
to_insert = [] to_insert = []
for idx, op_desc in enumerate(op_descs): for idx, op_desc in enumerate(op_descs):
...@@ -217,12 +233,12 @@ def _remove_no_grad_branch_(op_descs, no_grad_set): ...@@ -217,12 +233,12 @@ def _remove_no_grad_branch_(op_descs, no_grad_set):
"X": [_strip_grad_suffix_(arg)] "X": [_strip_grad_suffix_(arg)]
}, {"Out": [arg]}, {}), idx)) }, {"Out": [arg]}, {}), idx))
map(lambda p: op_descs.insert(p[1], p[0]), reversed(to_insert)) list([op_descs.insert(p[1], p[0]) for p in reversed(to_insert)])
return op_descs return op_descs
import proto.framework_pb2 as framework_pb2 from .proto import framework_pb2
def serialize_op_decs(op_desc): def serialize_op_decs(op_desc):
...@@ -244,8 +260,10 @@ def _callback_lookup_(op): ...@@ -244,8 +260,10 @@ def _callback_lookup_(op):
if op.type == 'parallel_do' and op.attr('use_nccl'): if op.type == 'parallel_do' and op.attr('use_nccl'):
all_vars = op.block.vars all_vars = op.block.vars
param_names = set(op.input('parameters')) param_names = set(op.input('parameters'))
param_names = filter(lambda name: all_vars[name].stop_gradient is False, param_names = [
param_names) name for name in param_names
if all_vars[name].stop_gradient is False
]
param_grad_names = [n + "@GRAD" for n in param_names] param_grad_names = [n + "@GRAD" for n in param_names]
class ParallelDoCallBack(object): class ParallelDoCallBack(object):
...@@ -399,7 +417,7 @@ def _append_backward_vars_(block, start_op_idx, grad_to_var, grad_info_map): ...@@ -399,7 +417,7 @@ def _append_backward_vars_(block, start_op_idx, grad_to_var, grad_info_map):
continue continue
block.desc.var(grad_var_name) block.desc.var(grad_var_name)
new_vars.add(grad_var_name) new_vars.add(grad_var_name)
if not grad_to_var.has_key(grad_var_name): if grad_var_name not in grad_to_var:
continue continue
grad_info_map[grad_to_var[grad_var_name]] = (grad_var_name, block) grad_info_map[grad_to_var[grad_var_name]] = (grad_var_name, block)
# infer_shape and infer_type # infer_shape and infer_type
...@@ -427,7 +445,7 @@ def _rename_grad_(block, start_op_idx, grad_to_var, target_grad_map): ...@@ -427,7 +445,7 @@ def _rename_grad_(block, start_op_idx, grad_to_var, target_grad_map):
op_desc.rename_output(name, new_name) op_desc.rename_output(name, new_name)
var_map[name] = new_name var_map[name] = new_name
for g, ng in var_map.iteritems(): for g, ng in list(var_map.items()):
if g in grad_to_var: if g in grad_to_var:
grad_to_var[ng] = grad_to_var[g] grad_to_var[ng] = grad_to_var[g]
grad_to_var.pop(g) grad_to_var.pop(g)
...@@ -439,7 +457,7 @@ def _get_stop_gradients_(program): ...@@ -439,7 +457,7 @@ def _get_stop_gradients_(program):
for block in program.blocks: for block in program.blocks:
assert isinstance(block, framework.Block) assert isinstance(block, framework.Block)
block_no_grad_set = set() block_no_grad_set = set()
for var in block.vars.itervalues(): for var in list(block.vars.values()):
assert isinstance(var, framework.Variable) assert isinstance(var, framework.Variable)
if var.stop_gradient: if var.stop_gradient:
block_no_grad_set.add(_append_grad_suffix_(var.name)) block_no_grad_set.add(_append_grad_suffix_(var.name))
...@@ -452,51 +470,51 @@ def append_backward(loss, parameter_list=None, no_grad_set=None, ...@@ -452,51 +470,51 @@ def append_backward(loss, parameter_list=None, no_grad_set=None,
""" """
Append backward part to main_program. Append backward part to main_program.
A complete neural network training is made up of forward and backward A complete neural network training is made up of forward and backward
propagation. However, when we configure a network, we only need to propagation. However, when we configure a network, we only need to
specify its forwrd part. The backward part is generated automatically specify its forwrd part. The backward part is generated automatically
according to the forward part by this function. according to the forward part by this function.
In most cases, users do not need to invoke this function manually. It In most cases, users do not need to invoke this function manually. It
will be automatically invoked by the optimizer's `minimize` function. will be automatically invoked by the optimizer's `minimize` function.
Args: Args:
loss(Variable): The loss variable of the network. loss(Variable): The loss variable of the network.
parameter_list(list[string]|None): Names of parameters that need parameter_list(list[string]|None): Names of parameters that need
to be updated by optimizers. to be updated by optimizers.
If it is None, all parameters If it is None, all parameters
will be updated. will be updated.
Default: None Default: None
no_grad_set(set|None): Variables in the Block 0 whose gradients no_grad_set(set|None): Variables in the Block 0 whose gradients
should be ignored. All variables with should be ignored. All variables with
`step_gradient=True` from all blocks will `step_gradient=True` from all blocks will
be automatically added into this set. be automatically added into this set.
Default: None Default: None
callbacks(list[callable object]|None): The callbacks are used for callbacks(list[callable object]|None): The callbacks are used for
doing some custom jobs during doing some custom jobs during
backward part building. All backward part building. All
callable objects in it will callable objects in it will
be invoked once each time a be invoked once each time a
new gradient operator is added new gradient operator is added
into the program. The callable into the program. The callable
object must has two input object must has two input
parameters: 'block' and 'context'. parameters: 'block' and 'context'.
The 'block' is the block which The 'block' is the block which
the new gradient operator will the new gradient operator will
be added to. The 'context' is a be added to. The 'context' is a
map, whose keys are gradient map, whose keys are gradient
variable names and values are variable names and values are
corresponding original variables. corresponding original variables.
In addition to this, the 'context' In addition to this, the 'context'
has another special key-value pair: has another special key-value pair:
the key is string '__current_op_desc__' the key is string '__current_op_desc__'
and the value is the op_desc of the and the value is the op_desc of the
gradient operator who has just gradient operator who has just
triggered the callable object. triggered the callable object.
Returns: Returns:
list[(Variable,Variable)]: Pairs of parameter and its list[(Variable,Variable)]: Pairs of parameter and its
corresponding gradients. The key is the parameter and the corresponding gradients. The key is the parameter and the
value is gradient variable. value is gradient variable.
Raises: Raises:
...@@ -535,7 +553,7 @@ def append_backward(loss, parameter_list=None, no_grad_set=None, ...@@ -535,7 +553,7 @@ def append_backward(loss, parameter_list=None, no_grad_set=None,
no_grad_set = set() no_grad_set = set()
no_grad_set = copy.copy(no_grad_set) no_grad_set = copy.copy(no_grad_set)
no_grad_dict = _get_stop_gradients_(program) no_grad_dict = _get_stop_gradients_(program)
no_grad_dict[0].update(map(_append_grad_suffix_, no_grad_set)) no_grad_dict[0].update(list(map(_append_grad_suffix_, no_grad_set)))
grad_info_map = dict() grad_info_map = dict()
root_block = program.block(0) root_block = program.block(0)
...@@ -558,7 +576,7 @@ def append_backward(loss, parameter_list=None, no_grad_set=None, ...@@ -558,7 +576,7 @@ def append_backward(loss, parameter_list=None, no_grad_set=None,
block_no_grad_set = set(map(_strip_grad_suffix_, no_grad_dict[0])) block_no_grad_set = set(map(_strip_grad_suffix_, no_grad_dict[0]))
op_path = _find_op_path_(root_block, [loss], [], block_no_grad_set) op_path = _find_op_path_(root_block, [loss], [], block_no_grad_set)
no_grad_dict[0].update(map(_append_grad_suffix_, block_no_grad_set)) no_grad_dict[0].update(list(map(_append_grad_suffix_, block_no_grad_set)))
_append_backward_ops_(root_block, op_path, root_block, no_grad_dict, _append_backward_ops_(root_block, op_path, root_block, no_grad_dict,
grad_to_var, callbacks) grad_to_var, callbacks)
...@@ -572,8 +590,6 @@ def append_backward(loss, parameter_list=None, no_grad_set=None, ...@@ -572,8 +590,6 @@ def append_backward(loss, parameter_list=None, no_grad_set=None,
program.current_block_idx = current_block_idx program.current_block_idx = current_block_idx
program._sync_with_cpp() program._sync_with_cpp()
# FIXME(zcd): prevent loss.grad optimized by mem_opt.
loss.block.var(_append_grad_suffix_(loss.name)).persistable = True
if parameter_list is not None: if parameter_list is not None:
parameters = parameter_list parameters = parameter_list
...@@ -699,7 +715,7 @@ def calc_gradient(targets, inputs, target_gradients=None, no_grad_set=None): ...@@ -699,7 +715,7 @@ def calc_gradient(targets, inputs, target_gradients=None, no_grad_set=None):
no_grad_set = set() no_grad_set = set()
no_grad_set = copy.copy(no_grad_set) no_grad_set = copy.copy(no_grad_set)
no_grad_dict = _get_stop_gradients_(prog) no_grad_dict = _get_stop_gradients_(prog)
no_grad_dict[0].update(map(_append_grad_suffix_, no_grad_set)) no_grad_dict[0].update(list(map(_append_grad_suffix_, no_grad_set)))
fwd_op_num = block.desc.op_size() fwd_op_num = block.desc.op_size()
...@@ -733,7 +749,7 @@ def calc_gradient(targets, inputs, target_gradients=None, no_grad_set=None): ...@@ -733,7 +749,7 @@ def calc_gradient(targets, inputs, target_gradients=None, no_grad_set=None):
block_no_grad_set = set(map(_strip_grad_suffix_, no_grad_dict[0])) block_no_grad_set = set(map(_strip_grad_suffix_, no_grad_dict[0]))
op_path = _find_op_path_(block, targets, inputs, block_no_grad_set) op_path = _find_op_path_(block, targets, inputs, block_no_grad_set)
no_grad_dict[0].update(map(_append_grad_suffix_, block_no_grad_set)) no_grad_dict[0].update(list(map(_append_grad_suffix_, block_no_grad_set)))
grad_to_var = dict() grad_to_var = dict()
grad_info_map = dict() grad_info_map = dict()
_append_backward_ops_(block, op_path, block, no_grad_dict, grad_to_var) _append_backward_ops_(block, op_path, block, no_grad_dict, grad_to_var)
......
...@@ -13,10 +13,11 @@ ...@@ -13,10 +13,11 @@
# limitations under the License. # limitations under the License.
import copy import copy
import six
import functools import functools
import layers from . import layers
import framework from . import framework
from . import core from . import core
__all__ = [ __all__ = [
...@@ -80,8 +81,7 @@ def error_clip_callback(block, context): ...@@ -80,8 +81,7 @@ def error_clip_callback(block, context):
# the context is a grad_to_var map # the context is a grad_to_var map
grad_to_var = context grad_to_var = context
op_desc = block.desc.op(block.desc.op_size() - 1) op_desc = block.desc.op(block.desc.op_size() - 1)
for grad_n in filter(lambda n: grad_to_var.has_key(n), for grad_n in [n for n in op_desc.output_arg_names() if n in grad_to_var]:
op_desc.output_arg_names()):
fwd_var = block._var_recursive(grad_to_var[grad_n]) fwd_var = block._var_recursive(grad_to_var[grad_n])
error_clip = getattr(fwd_var, "error_clip", None) error_clip = getattr(fwd_var, "error_clip", None)
if not (error_clip is None or isinstance(error_clip, if not (error_clip is None or isinstance(error_clip,
...@@ -247,8 +247,8 @@ class GradientClipByGlobalNorm(BaseGradientClipAttr): ...@@ -247,8 +247,8 @@ class GradientClipByGlobalNorm(BaseGradientClipAttr):
""" """
def __init__(self, clip_norm, group_name="default_group"): def __init__(self, clip_norm, group_name="default_group"):
if not isinstance(group_name, basestring): if not isinstance(group_name, six.string_types):
raise TypeError("'group_name' must be a basestring.") raise TypeError("'group_name' must be a %s." % (six.string_types))
self.clip_norm = clip_norm self.clip_norm = clip_norm
self.group_name = group_name self.group_name = group_name
...@@ -284,7 +284,7 @@ class GradientClipByGlobalNorm(BaseGradientClipAttr): ...@@ -284,7 +284,7 @@ class GradientClipByGlobalNorm(BaseGradientClipAttr):
x=clip_var, x=clip_var,
y=layers.elementwise_max( y=layers.elementwise_max(
x=clip_var, y=group_norm_var)) x=clip_var, y=group_norm_var))
assert group_scale_var.shape == (1L, ) assert group_scale_var.shape == (1, )
self.context[group_scale_name] = group_scale_var self.context[group_scale_name] = group_scale_var
new_grad = layers.elementwise_mul( new_grad = layers.elementwise_mul(
...@@ -313,7 +313,7 @@ def set_gradient_clip(clip, param_list=None, program=None): ...@@ -313,7 +313,7 @@ def set_gradient_clip(clip, param_list=None, program=None):
program = framework.default_main_program() program = framework.default_main_program()
if param_list is None: if param_list is None:
param_list = program.block(0).all_parameters() param_list = program.block(0).all_parameters()
if all(isinstance(elem, basestring) for elem in param_list): if all(isinstance(elem, six.string_types) for elem in param_list):
param_list = [program.block(0).var(elem) for elem in param_list] param_list = [program.block(0).var(elem) for elem in param_list]
if not all(isinstance(elem, framework.Parameter) for elem in param_list): if not all(isinstance(elem, framework.Parameter) for elem in param_list):
raise TypeError( raise TypeError(
......
...@@ -12,11 +12,11 @@ ...@@ -12,11 +12,11 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
from layers.control_flow import BlockGuard, equal from .layers.control_flow import BlockGuard, equal
from .framework import Operator from .framework import Operator
from layer_helper import LayerHelper, unique_name from .layer_helper import LayerHelper, unique_name
from layers import fill_constant from .layers import fill_constant
import core from . import core
__all__ = [ __all__ = [
'Go', 'make_channel', 'channel_send', 'channel_recv', 'channel_close', 'Go', 'make_channel', 'channel_send', 'channel_recv', 'channel_close',
......
...@@ -12,7 +12,9 @@ ...@@ -12,7 +12,9 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
import decoder from . import decoder
from decoder import * from .decoder import *
from . import memory_usage_calc
from .memory_usage_calc import *
__all__ = decoder.__all__ __all__ = decoder.__all__ + memory_usage_calc.__all__
...@@ -12,7 +12,7 @@ ...@@ -12,7 +12,7 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
import beam_search_decoder from . import beam_search_decoder
from beam_search_decoder import * from .beam_search_decoder import *
__all__ = beam_search_decoder.__all__ __all__ = beam_search_decoder.__all__
...@@ -22,6 +22,7 @@ This API is still under active development and may change drastically. ...@@ -22,6 +22,7 @@ This API is still under active development and may change drastically.
import contextlib import contextlib
import numpy as np import numpy as np
import six
from ... import layers from ... import layers
from ...framework import Variable from ...framework import Variable
...@@ -191,7 +192,7 @@ class StateCell(object): ...@@ -191,7 +192,7 @@ class StateCell(object):
self._helper = LayerHelper('state_cell', name=name) self._helper = LayerHelper('state_cell', name=name)
self._cur_states = {} self._cur_states = {}
self._state_names = [] self._state_names = []
for state_name, state in states.items(): for state_name, state in six.iteritems(states):
if not isinstance(state, InitState): if not isinstance(state, InitState):
raise ValueError('state must be an InitState object.') raise ValueError('state must be an InitState object.')
self._cur_states[state_name] = state self._cur_states[state_name] = state
...@@ -346,7 +347,7 @@ class StateCell(object): ...@@ -346,7 +347,7 @@ class StateCell(object):
if self._in_decoder and not self._switched_decoder: if self._in_decoder and not self._switched_decoder:
self._switch_decoder() self._switch_decoder()
for input_name, input_value in inputs.items(): for input_name, input_value in six.iteritems(inputs):
if input_name not in self._inputs: if input_name not in self._inputs:
raise ValueError('Unknown input %s. ' raise ValueError('Unknown input %s. '
'Please make sure %s in input ' 'Please make sure %s in input '
...@@ -361,7 +362,7 @@ class StateCell(object): ...@@ -361,7 +362,7 @@ class StateCell(object):
if self._in_decoder and not self._switched_decoder: if self._in_decoder and not self._switched_decoder:
self._switched_decoder() self._switched_decoder()
for state_name, decoder_state in self._states_holder.items(): for state_name, decoder_state in six.iteritems(self._states_holder):
if id(self._cur_decoder_obj) not in decoder_state: if id(self._cur_decoder_obj) not in decoder_state:
raise ValueError('Unknown decoder object, please make sure ' raise ValueError('Unknown decoder object, please make sure '
'switch_decoder been invoked.') 'switch_decoder been invoked.')
...@@ -671,7 +672,7 @@ class BeamSearchDecoder(object): ...@@ -671,7 +672,7 @@ class BeamSearchDecoder(object):
feed_dict = {} feed_dict = {}
update_dict = {} update_dict = {}
for init_var_name, init_var in self._input_var_dict.items(): for init_var_name, init_var in six.iteritems(self._input_var_dict):
if init_var_name not in self.state_cell._inputs: if init_var_name not in self.state_cell._inputs:
raise ValueError('Variable ' + init_var_name + raise ValueError('Variable ' + init_var_name +
' not found in StateCell!\n') ' not found in StateCell!\n')
...@@ -721,7 +722,8 @@ class BeamSearchDecoder(object): ...@@ -721,7 +722,8 @@ class BeamSearchDecoder(object):
self.state_cell.update_states() self.state_cell.update_states()
self.update_array(prev_ids, selected_ids) self.update_array(prev_ids, selected_ids)
self.update_array(prev_scores, selected_scores) self.update_array(prev_scores, selected_scores)
for update_name, var_to_update in update_dict.items(): for update_name, var_to_update in six.iteritems(
update_dict):
self.update_array(var_to_update, feed_dict[update_name]) self.update_array(var_to_update, feed_dict[update_name])
def read_array(self, init, is_ids=False, is_scores=False): def read_array(self, init, is_ids=False, is_scores=False):
......
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
This module privides a memory usage calculate function for user.
The purpose of this API is to allow users to estimate memory usage of
a program under a special batch size, then user can set appropriate
batch size to fully utilize a GPU.
This API is still under active development and may change drastically.
"""
from .. import core
from ..framework import Program, Variable
__all__ = ['memory_usage']
dtype_to_size = {
core.VarDesc.VarType.FP16: 2,
core.VarDesc.VarType.FP32: 4,
core.VarDesc.VarType.FP64: 8,
core.VarDesc.VarType.INT16: 2,
core.VarDesc.VarType.INT32: 4,
core.VarDesc.VarType.INT64: 8,
core.VarDesc.VarType.BOOL: 1,
core.VarDesc.VarType.UINT8: 1,
}
DEBUG = False
def memory_usage(program, batch_size):
"""
Get the estimate memory usage of program with input batch size.
Args:
program(Program): The current Program.
batch_size(int): The current input data batch_size.
Returns:
min_total_memory(float): the estimate memory usage lower bound.
max_total_memory(float): the estimate memory usage upper bound.
unit_str(string): the unit of estimate usage result.
Examples:
>>> import paddle.fluid as fluid
>>> lower_usage, upper_usage, unit = fluid.contrib.memory_usage(
fluid.default_main_program(), batch_size=10)
>>> print "memory usage is about %.3f - %.3f %s" % \
(lower_usage, upper_usage, unit)
"""
# Parameters check
if not isinstance(program, Program):
raise TypeError(
"Calculating Memory Usage requires Program as its Parameter."
"But you passed in %s" % (type(prgram)))
if batch_size <= 0:
raise ValueError("The batch size need to be positive.")
# Get the var_name list of first block and calculate
total_memory = 0.0
for var in program.global_block().vars.itervalues():
data_count = 1
for x in var.shape:
if x == -1:
data_count *= batch_size
else:
data_count *= x
var_memory = data_count * dtype_to_size[var.dtype]
if DEBUG:
print "%s memory usage: %d" % (var.name, var_memory)
total_memory += var_memory
if DEBUG:
print "total memory usage: %.2f" % (total_memory)
# Convert appropriate unit
unit_str = "B"
if total_memory > 1024:
total_memory /= 1024
unit_str = "KB"
if total_memory > 1024:
total_memory /= 1024
unit_str = "MB"
# Append extra memory consumption (5% - 10%)
min_total_memory = total_memory * 1.05
max_total_memory = total_memory * 1.1
return min_total_memory, max_total_memory, unit_str
...@@ -12,14 +12,14 @@ ...@@ -12,14 +12,14 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
from __future__ import print_function from . import core
import core
import numpy import numpy
import os import os
import six.moves as six import six
from six.moves import zip, range, xrange
import multiprocessing import multiprocessing
from framework import Variable, default_main_program from .framework import Variable, default_main_program
__all__ = ['DataFeeder'] __all__ = ['DataFeeder']
...@@ -53,7 +53,7 @@ class DataToLoDTensorConverter(object): ...@@ -53,7 +53,7 @@ class DataToLoDTensorConverter(object):
self.data = [] self.data = []
self.lod = [] self.lod = []
for i in six.range(lod_level): for i in six.moves.range(lod_level):
self.lod.append([]) self.lod.append([])
def feed(self, data): def feed(self, data):
...@@ -142,7 +142,7 @@ class DataFeeder(object): ...@@ -142,7 +142,7 @@ class DataFeeder(object):
if program is None: if program is None:
program = default_main_program() program = default_main_program()
for each_var in feed_list: for each_var in feed_list:
if isinstance(each_var, basestring): if isinstance(each_var, six.string_types):
each_var = program.block(0).var(each_var) each_var = program.block(0).var(each_var)
if not isinstance(each_var, Variable): if not isinstance(each_var, Variable):
raise TypeError("Feed list should contain a list of variable") raise TypeError("Feed list should contain a list of variable")
...@@ -174,7 +174,7 @@ class DataFeeder(object): ...@@ -174,7 +174,7 @@ class DataFeeder(object):
dict: the result of conversion. dict: the result of conversion.
""" """
converter = [] converter = []
for lod_level, shape, dtype in six.zip( for lod_level, shape, dtype in six.moves.zip(
self.feed_lod_level, self.feed_shapes, self.feed_dtypes): self.feed_lod_level, self.feed_shapes, self.feed_dtypes):
converter.append( converter.append(
DataToLoDTensorConverter( DataToLoDTensorConverter(
...@@ -187,10 +187,12 @@ class DataFeeder(object): ...@@ -187,10 +187,12 @@ class DataFeeder(object):
assert len(each_sample) == len(converter), ( assert len(each_sample) == len(converter), (
"The number of fields in data (%s) does not match " + "The number of fields in data (%s) does not match " +
"len(feed_list) (%s)") % (len(each_sample), len(converter)) "len(feed_list) (%s)") % (len(each_sample), len(converter))
for each_converter, each_slot in six.zip(converter, each_sample): for each_converter, each_slot in six.moves.zip(converter,
each_sample):
each_converter.feed(each_slot) each_converter.feed(each_slot)
ret_dict = {} ret_dict = {}
for each_name, each_converter in six.zip(self.feed_names, converter): for each_name, each_converter in six.moves.zip(self.feed_names,
converter):
ret_dict[each_name] = each_converter.done() ret_dict[each_name] = each_converter.done()
return ret_dict return ret_dict
...@@ -212,12 +214,14 @@ class DataFeeder(object): ...@@ -212,12 +214,14 @@ class DataFeeder(object):
if isinstance(self.place, core.CUDAPlace): if isinstance(self.place, core.CUDAPlace):
places = [ places = [
core.CUDAPlace(i) core.CUDAPlace(i)
for i in six.xrange(self._get_number_of_places_(num_places)) for i in six.moves.xrange(
self._get_number_of_places_(num_places))
] ]
else: else:
places = [ places = [
core.CPUPlace() core.CPUPlace()
for _ in six.xrange(self._get_number_of_places_(num_places)) for _ in six.moves.xrange(
self._get_number_of_places_(num_places))
] ]
if len(iterable) != len(places): if len(iterable) != len(places):
...@@ -227,7 +231,7 @@ class DataFeeder(object): ...@@ -227,7 +231,7 @@ class DataFeeder(object):
"must be same.") "must be same.")
place = self.place place = self.place
for p, batch in six.zip(places, iterable): for p, batch in six.moves.zip(places, iterable):
self.place = p self.place = p
yield self.feed(batch) yield self.feed(batch)
self.place = place self.place = place
......
...@@ -14,8 +14,8 @@ ...@@ -14,8 +14,8 @@
import sys import sys
import re import re
from graphviz import GraphPreviewGenerator from .graphviz import GraphPreviewGenerator
import proto.framework_pb2 as framework_pb2 from .proto import framework_pb2
from google.protobuf import text_format from google.protobuf import text_format
_vartype2str_ = [ _vartype2str_ = [
......
...@@ -15,11 +15,11 @@ ...@@ -15,11 +15,11 @@
import warnings import warnings
import numpy as np import numpy as np
import layers from . import layers
from framework import Program, Variable, program_guard from .framework import Program, Variable, program_guard
import unique_name from . import unique_name
from layer_helper import LayerHelper from .layer_helper import LayerHelper
from initializer import Constant from .initializer import Constant
__all__ = [ __all__ = [
'ChunkEvaluator', 'ChunkEvaluator',
......
...@@ -14,7 +14,8 @@ ...@@ -14,7 +14,8 @@
import numpy as np import numpy as np
import contextlib import contextlib
from framework import Program, default_main_program, Variable import six
from .framework import Program, default_main_program, Variable
from . import core from . import core
__all__ = [ __all__ = [
...@@ -204,19 +205,19 @@ def fetch_var(name, scope=None, return_numpy=True): ...@@ -204,19 +205,19 @@ def fetch_var(name, scope=None, return_numpy=True):
def _get_program_cache_key(feed, fetch_list): def _get_program_cache_key(feed, fetch_list):
feed_var_names = feed.keys() feed_var_names = list(feed.keys())
def to_name_str(var): def to_name_str(var):
if isinstance(var, Variable): if isinstance(var, Variable):
return var.desc.name() return var.desc.name()
elif isinstance(var, str): elif isinstance(var, str):
return var return var
elif isinstance(var, basestring): elif isinstance(var, six.string_types):
return str(var) return str(var)
else: else:
raise TypeError(str(var) + " should be Variable or str") raise TypeError(str(var) + " should be Variable or str")
fetch_var_names = map(to_name_str, fetch_list) fetch_var_names = list(map(to_name_str, fetch_list))
return str(feed_var_names + fetch_var_names) return str(feed_var_names + fetch_var_names)
...@@ -229,8 +230,8 @@ class Executor(object): ...@@ -229,8 +230,8 @@ class Executor(object):
to feed map and fetch_list. Feed map provides input data for the program. fetch_list provides to feed map and fetch_list. Feed map provides input data for the program. fetch_list provides
the variables(or names) that user want to get after program run. Note: the executor will run all the variables(or names) that user want to get after program run. Note: the executor will run all
operators in the program but not only the operators dependent by the fetch_list. operators in the program but not only the operators dependent by the fetch_list.
It store the global variables into the global scope, and create a local scope for the temporary It store the global variables into the global scope, and create a local scope for the temporary
variables. The local scope contents will be discarded after every minibatch forward/backward finished. variables. The local scope contents will be discarded after every minibatch forward/backward finished.
But the global scope variables will be persistent through different runs. But the global scope variables will be persistent through different runs.
All of ops in program will be running in sequence. All of ops in program will be running in sequence.
...@@ -345,7 +346,7 @@ class Executor(object): ...@@ -345,7 +346,7 @@ class Executor(object):
def _fetch_data(self, fetch_list, fetch_var_name, scope): def _fetch_data(self, fetch_list, fetch_var_name, scope):
outs = [ outs = [
core.get_fetch_variable(scope, fetch_var_name, i) core.get_fetch_variable(scope, fetch_var_name, i)
for i in xrange(len(fetch_list)) for i in range(len(fetch_list))
] ]
return outs return outs
......
...@@ -15,21 +15,22 @@ ...@@ -15,21 +15,22 @@
import collections import collections
import contextlib import contextlib
import re import re
import six
import numpy as np import numpy as np
import proto.framework_pb2 as framework_pb2 from .proto import framework_pb2
try: try:
from . import core from . import core
except ImportError, e: except ImportError as e:
raise ImportError( raise ImportError(
"""NOTE: You may need to run \"export LD_LIBRARY_PATH=/usr/local/lib:$LD_LIBRARY_PATH\" """NOTE: You may need to run \"export LD_LIBRARY_PATH=/usr/local/lib:$LD_LIBRARY_PATH\"
if you encounters \"libmkldnn.so not found\" errors. If you have python if you encounters \"libmkldnn.so not found\" errors. If you have python
installed in other directory, replace \"/usr/local/lib\" with your own installed in other directory, replace \"/usr/local/lib\" with your own
directory. The original error is: \n""" + e.message) directory. The original error is: \n""" + e.message)
except Exception, e: except Exception as e:
raise e raise e
import unique_name from . import unique_name
__all__ = [ __all__ = [
'Program', 'Program',
...@@ -86,7 +87,7 @@ def convert_np_dtype_to_dtype_(np_dtype): ...@@ -86,7 +87,7 @@ def convert_np_dtype_to_dtype_(np_dtype):
elif dtype == np.uint8: elif dtype == np.uint8:
return core.VarDesc.VarType.UINT8 return core.VarDesc.VarType.UINT8
else: else:
raise ValueError("Not supported numpy dtype " + str(dtype)) raise ValueError("Not supported numpy dtype " + six.binary_type(dtype))
def dtype_is_floating(dtype): def dtype_is_floating(dtype):
...@@ -129,15 +130,15 @@ def _debug_string_(proto, throw_on_error=True): ...@@ -129,15 +130,15 @@ def _debug_string_(proto, throw_on_error=True):
class Variable(object): class Variable(object):
""" """
In Fluid, every input and output of an operator is a variable. In most In Fluid, every input and output of an operator is a variable. In most
cases, variables are used for holding different kinds of data or training cases, variables are used for holding different kinds of data or training
labels. A variable belongs to a block. All variable has its own name and labels. A variable belongs to a block. All variable has its own name and
two variables in different blocks could have the same name. two variables in different blocks could have the same name.
There are many kinds of variables. Each kind of them has its own attributes There are many kinds of variables. Each kind of them has its own attributes
and usages. Please reference the framework.proto for details. and usages. Please reference the framework.proto for details.
Most of a Variable's member variables can be setted to be None. It mean Most of a Variable's member variables can be setted to be None. It mean
it is not available or will be specified later. it is not available or will be specified later.
Args: Args:
...@@ -197,6 +198,7 @@ class Variable(object): ...@@ -197,6 +198,7 @@ class Variable(object):
if name is None: if name is None:
name = unique_name.generate('_generated_var') name = unique_name.generate('_generated_var')
is_new_var = False is_new_var = False
name = name if isinstance(name, six.binary_type) else name.encode()
self.desc = self.block.desc.find_var(name) self.desc = self.block.desc.find_var(name)
if self.desc is None: if self.desc is None:
...@@ -290,13 +292,13 @@ class Variable(object): ...@@ -290,13 +292,13 @@ class Variable(object):
assert isinstance(throw_on_error, bool) and isinstance(with_details, assert isinstance(throw_on_error, bool) and isinstance(with_details,
bool) bool)
protostr = self.desc.serialize_to_string() protostr = self.desc.serialize_to_string()
proto = framework_pb2.VarDesc.FromString(str(protostr)) proto = framework_pb2.VarDesc.FromString(six.binary_type(protostr))
res_str = _debug_string_(proto, throw_on_error) res_str = _debug_string_(proto, throw_on_error)
if with_details: if with_details:
additional_attr = ("error_clip", "stop_gradient") additional_attr = ("error_clip", "stop_gradient")
for attr_name in additional_attr: for attr_name in additional_attr:
res_str += "%s: %s\n" % (attr_name, res_str += "%s: %s\n" % (
str(getattr(self, attr_name))) attr_name, six.binary_type(getattr(self, attr_name)))
return res_str return res_str
__repr__ = __str__ __repr__ = __str__
...@@ -369,7 +371,7 @@ def get_all_op_protos(): ...@@ -369,7 +371,7 @@ def get_all_op_protos():
protostrs = core.get_all_op_protos() protostrs = core.get_all_op_protos()
ret_values = [] ret_values = []
for pbstr in protostrs: for pbstr in protostrs:
op_proto = framework_pb2.OpProto.FromString(str(pbstr)) op_proto = framework_pb2.OpProto.FromString(six.binary_type(pbstr))
ret_values.append(op_proto) ret_values.append(op_proto)
return ret_values return ret_values
...@@ -472,7 +474,6 @@ class Operator(object): ...@@ -472,7 +474,6 @@ class Operator(object):
inputs=None, inputs=None,
outputs=None, outputs=None,
attrs=None): attrs=None):
self.block = block self.block = block
self.desc = desc self.desc = desc
self.attrs = attrs self.attrs = attrs
...@@ -523,10 +524,19 @@ class Operator(object): ...@@ -523,10 +524,19 @@ class Operator(object):
% (in_proto.name, len(in_args))) % (in_proto.name, len(in_args)))
in_arg_names = [] in_arg_names = []
for arg in in_args: for arg in in_args:
if isinstance(arg, basestring): if isinstance(arg, six.string_types):
in_arg_names.append(arg) in_arg_names.append(arg)
elif isinstance(arg, six.binary_type):
in_arg_names.append(arg.decode())
else: else:
in_arg_names.append(arg.name) if isinstance(arg.name, six.string_types):
in_arg_names.append(arg.name)
elif isinstance(arg.name, six.binary_type):
in_arg_names.append(arg.name.decode())
else:
raise TypeError(
"arguments require unicode, str or bytes, but get %s instead."
% (type(arg.name)))
self.desc.set_input(in_proto.name, in_arg_names) self.desc.set_input(in_proto.name, in_arg_names)
else: else:
self.desc.set_input(in_proto.name, []) self.desc.set_input(in_proto.name, [])
...@@ -541,8 +551,9 @@ class Operator(object): ...@@ -541,8 +551,9 @@ class Operator(object):
if not given == need: if not given == need:
raise ValueError(("Incorrect setting for output(s) of " raise ValueError(("Incorrect setting for output(s) of "
"operator \"%s\". Need: [%s] Given: [%s]") % "operator \"%s\". Need: [%s] Given: [%s]") %
(type, ", ".join(str(e) for e in need), (type,
", ".join(str(e) for e in given))) ", ".join(six.binary_type(e) for e in need),
", ".join(six.binary_type(e) for e in given)))
for out_proto in proto.outputs: for out_proto in proto.outputs:
out_args = outputs[out_proto.name] out_args = outputs[out_proto.name]
...@@ -554,7 +565,14 @@ class Operator(object): ...@@ -554,7 +565,14 @@ class Operator(object):
(out_proto.name, len(out_args))) (out_proto.name, len(out_args)))
out_arg_names = [] out_arg_names = []
for arg in out_args: for arg in out_args:
out_arg_names.append(arg.name) if isinstance(arg.name, six.string_types):
out_arg_names.append(arg.name)
elif isinstance(arg.name, six.binary_type):
out_arg_names.append(arg.name.decode())
else:
raise TypeError(
"arguments require unicode, str or bytes, but get %s instead."
% (type(arg.name)))
arg.op = self arg.op = self
self.desc.set_output(out_proto.name, out_arg_names) self.desc.set_output(out_proto.name, out_arg_names)
...@@ -590,7 +608,7 @@ class Operator(object): ...@@ -590,7 +608,7 @@ class Operator(object):
""" """
protostr = self.desc.serialize_to_string() protostr = self.desc.serialize_to_string()
proto = framework_pb2.OpDesc.FromString(str(protostr)) proto = framework_pb2.OpDesc.FromString(six.binary_type(protostr))
return _debug_string_(proto, throw_on_error) return _debug_string_(proto, throw_on_error)
def __str__(self): def __str__(self):
...@@ -845,7 +863,7 @@ class Block(object): ...@@ -845,7 +863,7 @@ class Block(object):
re_add_indent = re.compile(r"\n(.)") re_add_indent = re.compile(r"\n(.)")
res_str = "blocks {\n idx: %d\n parent_idx: %d" % ( res_str = "blocks {\n idx: %d\n parent_idx: %d" % (
self.idx, self.parent_idx) self.idx, self.parent_idx)
for var in self.vars.itervalues(): for var in list(self.vars.values()):
res_str += "\n vars {\n %s }" % re_add_indent.sub( res_str += "\n vars {\n %s }" % re_add_indent.sub(
r"\n \1", var.to_string(throw_on_error, with_details)) r"\n \1", var.to_string(throw_on_error, with_details))
for op in self.ops: for op in self.ops:
...@@ -854,7 +872,8 @@ class Block(object): ...@@ -854,7 +872,8 @@ class Block(object):
res_str += "\n}" res_str += "\n}"
else: else:
protostr = self.desc.serialize_to_string() protostr = self.desc.serialize_to_string()
proto = framework_pb2.BlockDesc.FromString(str(protostr)) proto = framework_pb2.BlockDesc.FromString(
six.binary_type(protostr))
res_str = _debug_string_(proto, throw_on_error) res_str = _debug_string_(proto, throw_on_error)
return res_str return res_str
...@@ -898,10 +917,11 @@ class Block(object): ...@@ -898,10 +917,11 @@ class Block(object):
Returns: Returns:
Variable: the Variable with the giving name. Variable: the Variable with the giving name.
""" """
if not isinstance(name, basestring): if not isinstance(name, six.string_types):
raise TypeError( if not isinstance(name, six.binary_type):
"var require string as parameter, but get %s instead." % raise TypeError(
(type(name))) "var require string as parameter, but get %s instead." %
(type(name)))
v = self.vars.get(name, None) v = self.vars.get(name, None)
if v is None: if v is None:
raise ValueError("var %s not in this block" % name) raise ValueError("var %s not in this block" % name)
...@@ -949,10 +969,10 @@ class Block(object): ...@@ -949,10 +969,10 @@ class Block(object):
raise ValueError("Var {0} is not found recursively".format(name)) raise ValueError("Var {0} is not found recursively".format(name))
def all_parameters(self): def all_parameters(self):
return list(self._iter_parameters()) return list(self.iter_parameters())
def _iter_parameters(self): def iter_parameters(self):
return (item[1] for item in self.vars.iteritems() return (item[1] for item in list(self.vars.items())
if isinstance(item[1], Parameter)) if isinstance(item[1], Parameter))
def create_var(self, *args, **kwargs): def create_var(self, *args, **kwargs):
...@@ -1038,7 +1058,26 @@ class Block(object): ...@@ -1038,7 +1058,26 @@ class Block(object):
global_block = self.program.global_block() global_block = self.program.global_block()
param = Parameter(global_block, *args, **kwargs) param = Parameter(global_block, *args, **kwargs)
if 'initializer' in kwargs: if 'initializer' in kwargs:
kwargs['initializer'](param, self)
def _is_inited_by(block, var):
init_ops = []
for op in block.ops:
if var.name in op.output_arg_names:
init_ops.append(op)
return init_ops
initializer = kwargs['initializer']
init_ops = _is_inited_by(global_block, param)
init_ops_len = len(init_ops)
if init_ops_len > 1:
raise RuntimeError("param " + param.name +
" is inited by multiple init ops " + str(
init_ops))
elif init_ops_len == 1:
#TODO already inited, do nothing, should log a warning
pass
else:
initializer(param, self)
return param return param
def append_op(self, *args, **kwargs): def append_op(self, *args, **kwargs):
...@@ -1113,7 +1152,7 @@ class Block(object): ...@@ -1113,7 +1152,7 @@ class Block(object):
self.create_var(name=var.name(), desc=var, type=var.type()) self.create_var(name=var.name(), desc=var, type=var.type())
# sync variables removed from c++ end # sync variables removed from c++ end
for var in self.vars.keys(): for var in list(self.vars.keys()):
if not self.desc.find_var(var): if not self.desc.find_var(var):
self.vars.pop(var) self.vars.pop(var)
...@@ -1185,7 +1224,7 @@ class Block(object): ...@@ -1185,7 +1224,7 @@ class Block(object):
if not isinstance(other, Block): if not isinstance(other, Block):
raise TypeError( raise TypeError(
"_copy_param_info_from should be invoked with Block") "_copy_param_info_from should be invoked with Block")
for p in other._iter_parameters(): for p in other.iter_parameters():
assert isinstance(p, Parameter) assert isinstance(p, Parameter)
v = self.vars.get(p.name, None) v = self.vars.get(p.name, None)
if v is None: if v is None:
...@@ -1384,7 +1423,8 @@ class Program(object): ...@@ -1384,7 +1423,8 @@ class Program(object):
res_str += block.to_string(throw_on_error, with_details) res_str += block.to_string(throw_on_error, with_details)
else: else:
protostr = self.desc.serialize_to_string() protostr = self.desc.serialize_to_string()
proto = framework_pb2.ProgramDesc.FromString(str(protostr)) proto = framework_pb2.ProgramDesc.FromString(
six.binary_type(protostr))
res_str = _debug_string_(proto, throw_on_error) res_str = _debug_string_(proto, throw_on_error)
return res_str return res_str
...@@ -1482,7 +1522,7 @@ class Program(object): ...@@ -1482,7 +1522,7 @@ class Program(object):
else: else:
p = Program() p = Program()
p.desc = core.ProgramDesc(self.desc) p.desc = core.ProgramDesc(self.desc)
p.blocks = [Block(p, i) for i in xrange(self.desc.num_blocks())] p.blocks = [Block(p, i) for i in range(self.desc.num_blocks())]
p._sync_with_cpp() p._sync_with_cpp()
p._copy_param_info_from(self) p._copy_param_info_from(self)
...@@ -1534,7 +1574,7 @@ class Program(object): ...@@ -1534,7 +1574,7 @@ class Program(object):
targets_idx.append([t.block.idx, t.idx]) targets_idx.append([t.block.idx, t.idx])
res = Program() res = Program()
res.desc = core.prune(self.desc, targets_idx) res.desc = core.prune(self.desc, targets_idx)
res.blocks = [Block(res, i) for i in xrange(res.desc.num_blocks())] res.blocks = [Block(res, i) for i in range(res.desc.num_blocks())]
res._sync_with_cpp() res._sync_with_cpp()
return res return res
...@@ -1545,7 +1585,7 @@ class Program(object): ...@@ -1545,7 +1585,7 @@ class Program(object):
2. Remove the :code:`read_op` if exists. 2. Remove the :code:`read_op` if exists.
3. change the :code:`is_test` 3. change the :code:`is_test`
attribute of operators to :code:`True`. All the :code:`Parameter` attribute of operators to :code:`True`. All the :code:`Parameter`
information will be lost. information will be lost.
...@@ -1575,13 +1615,13 @@ class Program(object): ...@@ -1575,13 +1615,13 @@ class Program(object):
root_block._remove_var(var.name()) root_block._remove_var(var.name())
# change all `is_test` attributes to True # change all `is_test` attributes to True
for i in xrange(res.desc.num_blocks()): for i in range(res.desc.num_blocks()):
block = res.desc.block(i) block = res.desc.block(i)
for j in xrange(block.op_size()): for j in range(block.op_size()):
op = block.op(j) op = block.op(j)
if op.has_attr('is_test'): if op.has_attr('is_test'):
op.set_attr('is_test', True) op.set_attr('is_test', True)
res.blocks = [Block(res, i) for i in xrange(res.desc.num_blocks())] res.blocks = [Block(res, i) for i in range(res.desc.num_blocks())]
res._sync_with_cpp() res._sync_with_cpp()
return res return res
...@@ -1594,14 +1634,14 @@ class Program(object): ...@@ -1594,14 +1634,14 @@ class Program(object):
and deserialization. and deserialization.
Args: Args:
binary_str(str): The binary prootbuf string. binary_str_type(str): The binary prootbuf string.
Returns: Returns:
Program: A deserialized program desc. Program: A deserialized program desc.
""" """
p = Program() p = Program()
p.desc = core.ProgramDesc(binary_str) p.desc = core.ProgramDesc(binary_str)
p.blocks = [Block(p, i) for i in xrange(p.desc.num_blocks())] p.blocks = [Block(p, i) for i in range(p.desc.num_blocks())]
p._sync_with_cpp() p._sync_with_cpp()
return p return p
...@@ -1629,7 +1669,7 @@ class Program(object): ...@@ -1629,7 +1669,7 @@ class Program(object):
self._seed = seed self._seed = seed
def __repr__(self): def __repr__(self):
return str(self) return self.__str__()
def global_block(self): def global_block(self):
""" """
...@@ -1740,7 +1780,7 @@ class Program(object): ...@@ -1740,7 +1780,7 @@ class Program(object):
if len(self.blocks) != len(other.blocks): if len(self.blocks) != len(other.blocks):
raise ValueError("_copy_param_info_from should be invoked with two " raise ValueError("_copy_param_info_from should be invoked with two "
"program, with represent the same topology") "program, with represent the same topology")
for var in other.global_block().vars.itervalues(): for var in list(other.global_block().vars.values()):
if var.is_data: if var.is_data:
self.global_block().var(var.name).is_data = True self.global_block().var(var.name).is_data = True
...@@ -1752,15 +1792,15 @@ class Program(object): ...@@ -1752,15 +1792,15 @@ class Program(object):
iterable: The generator will yield every variable in this program. iterable: The generator will yield every variable in this program.
""" """
for each_block in self.blocks: for each_block in self.blocks:
for each_var in each_block.vars.itervalues(): for each_var in list(each_block.vars.values()):
yield each_var yield each_var
class Parameter(Variable): class Parameter(Variable):
""" """
Parameter is derived from Variable. A parameter is a persistable Parameter is derived from Variable. A parameter is a persistable
Variable, and will be updated by optimizers after each iteration. Variable, and will be updated by optimizers after each iteration.
The training of a neural network is essentially the updating of The training of a neural network is essentially the updating of
its parameters. its parameters.
Relative to a general Variable, a Parameter has several its own Relative to a general Variable, a Parameter has several its own
...@@ -1826,8 +1866,8 @@ class Parameter(Variable): ...@@ -1826,8 +1866,8 @@ class Parameter(Variable):
additional_attr = ("trainable", "optimize_attr", "regularizer", additional_attr = ("trainable", "optimize_attr", "regularizer",
"gradient_clip_attr", "do_model_average") "gradient_clip_attr", "do_model_average")
for attr_name in additional_attr: for attr_name in additional_attr:
res_str += "%s: %s\n" % (attr_name, res_str += "%s: %s\n" % (
str(getattr(self, attr_name))) attr_name, six.binary_type(getattr(self, attr_name)))
else: else:
res_str = Variable.to_string(self, throw_on_error, False) res_str = Variable.to_string(self, throw_on_error, False)
return res_str return res_str
......
...@@ -14,12 +14,13 @@ ...@@ -14,12 +14,13 @@
import os import os
import random import random
import six
import subprocess import subprocess
import logging import logging
def crepr(v): def crepr(v):
if type(v) is str or type(v) is unicode: if isinstance(v, six.string_types):
return '"%s"' % v return '"%s"' % v
return str(v) return str(v)
...@@ -104,7 +105,7 @@ class Graph(object): ...@@ -104,7 +105,7 @@ class Graph(object):
def _rank_repr(self): def _rank_repr(self):
ranks = sorted( ranks = sorted(
self.rank_groups.items(), list(self.rank_groups.items()),
cmp=lambda a, b: a[1].priority > b[1].priority) cmp=lambda a, b: a[1].priority > b[1].priority)
repr = [] repr = []
for x in ranks: for x in ranks:
...@@ -148,7 +149,7 @@ class Node(object): ...@@ -148,7 +149,7 @@ class Node(object):
name=self.name, name=self.name,
label=self.label, label=self.label,
extra=',' + ','.join("%s=%s" % (key, crepr(value)) extra=',' + ','.join("%s=%s" % (key, crepr(value))
for key, value in self.attrs.items()) for key, value in list(self.attrs.items()))
if self.attrs else "") if self.attrs else "")
return reprs return reprs
...@@ -172,7 +173,7 @@ class Edge(object): ...@@ -172,7 +173,7 @@ class Edge(object):
target=self.target.name, target=self.target.name,
extra="" if not self.attrs else extra="" if not self.attrs else
"[" + ','.join("{}={}".format(attr[0], crepr(attr[1])) "[" + ','.join("{}={}".format(attr[0], crepr(attr[1]))
for attr in self.attrs.items()) + "]") for attr in list(self.attrs.items())) + "]")
return repr return repr
......
...@@ -14,14 +14,14 @@ ...@@ -14,14 +14,14 @@
import contextlib import contextlib
import core from . import core
import executor from . import executor
import framework from . import framework
import io from . import io
import parallel_executor from . import parallel_executor
import unique_name from . import unique_name
from trainer import check_and_get_place from .trainer import check_and_get_place
__all__ = ['Inferencer', ] __all__ = ['Inferencer', ]
......
...@@ -12,11 +12,11 @@ ...@@ -12,11 +12,11 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
import framework from . import framework
import numpy as np import numpy as np
import contextlib import contextlib
from framework import convert_np_dtype_to_dtype_ from .framework import convert_np_dtype_to_dtype_
from core import VarDesc from .core import VarDesc
__all__ = [ __all__ = [
'Constant', 'Uniform', 'Normal', 'Xavier', 'Bilinear', 'MSRA', 'Constant', 'Uniform', 'Normal', 'Xavier', 'Bilinear', 'MSRA',
......
此差异已折叠。
...@@ -14,12 +14,14 @@ ...@@ -14,12 +14,14 @@
import copy import copy
import itertools import itertools
import six
from framework import Variable, Parameter, default_main_program, default_startup_program, dtype_is_floating from .framework import Variable, Parameter, default_main_program, default_startup_program, dtype_is_floating
import unique_name from . import unique_name
from paddle.fluid.initializer import Constant, Xavier from paddle.fluid.initializer import Constant, Xavier
from param_attr import ParamAttr, WeightNormParamAttr from .param_attr import ParamAttr, WeightNormParamAttr
import core from . import core
from six.moves import zip
class LayerHelper(object): class LayerHelper(object):
...@@ -83,7 +85,7 @@ class LayerHelper(object): ...@@ -83,7 +85,7 @@ class LayerHelper(object):
raise ValueError("parameter number mismatch") raise ValueError("parameter number mismatch")
elif len(param_attr) == 1 and length != 1: elif len(param_attr) == 1 and length != 1:
tmp = [None] * length tmp = [None] * length
for i in xrange(length): for i in range(length):
tmp[i] = copy.deepcopy(param_attr[0]) tmp[i] = copy.deepcopy(param_attr[0])
param_attr = tmp param_attr = tmp
return param_attr return param_attr
...@@ -91,7 +93,7 @@ class LayerHelper(object): ...@@ -91,7 +93,7 @@ class LayerHelper(object):
def iter_inputs_and_params(self, input_param_name='input'): def iter_inputs_and_params(self, input_param_name='input'):
inputs = self.multiple_input(input_param_name) inputs = self.multiple_input(input_param_name)
param_attrs = self.multiple_param_attr(len(inputs)) param_attrs = self.multiple_param_attr(len(inputs))
for ipt, param_attr in itertools.izip(inputs, param_attrs): for ipt, param_attr in zip(inputs, param_attrs):
yield ipt, param_attr yield ipt, param_attr
def input_dtype(self, input_param_name='input'): def input_dtype(self, input_param_name='input'):
...@@ -218,7 +220,7 @@ class LayerHelper(object): ...@@ -218,7 +220,7 @@ class LayerHelper(object):
norm = __norm_op(reshape, dim=0, block=block) norm = __norm_op(reshape, dim=0, block=block)
__reshape_op(norm, out=out, shape=out_shape, block=block) __reshape_op(norm, out=out, shape=out_shape, block=block)
else: else:
perm = range(len(x.shape)) perm = list(range(len(x.shape)))
perm[0], perm[dim] = dim, 0 perm[0], perm[dim] = dim, 0
transpose = __transpose_op(x, perm, block=block) transpose = __transpose_op(x, perm, block=block)
norm = __norm_op(transpose, dim=0, block=block) norm = __norm_op(transpose, dim=0, block=block)
...@@ -397,8 +399,10 @@ class LayerHelper(object): ...@@ -397,8 +399,10 @@ class LayerHelper(object):
act = self.kwargs.get('act', None) act = self.kwargs.get('act', None)
if act is None: if act is None:
return input_var return input_var
if isinstance(act, basestring): if isinstance(act, six.string_types):
act = {'type': act} act = {'type': act}
else:
raise TypeError(str(act) + " should be unicode or str")
if 'use_cudnn' in self.kwargs and self.kwargs.get('use_cudnn'): if 'use_cudnn' in self.kwargs and self.kwargs.get('use_cudnn'):
act['use_cudnn'] = self.kwargs.get('use_cudnn') act['use_cudnn'] = self.kwargs.get('use_cudnn')
......
...@@ -12,25 +12,25 @@ ...@@ -12,25 +12,25 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
import ops from . import ops
from ops import * from .ops import *
import nn from . import nn
from nn import * from .nn import *
import io from . import io
from io import * from .io import *
import tensor from . import tensor
from tensor import * from .tensor import *
import control_flow from . import control_flow
from control_flow import * from .control_flow import *
import device from . import device
from device import * from .device import *
import math_op_patch from . import math_op_patch
from math_op_patch import * from .math_op_patch import *
import detection from . import detection
from detection import * from .detection import *
import metric_op from . import metric_op
from metric_op import * from .metric_op import *
from learning_rate_scheduler import * from .learning_rate_scheduler import *
__all__ = [] __all__ = []
__all__ += nn.__all__ __all__ += nn.__all__
......
...@@ -13,15 +13,16 @@ ...@@ -13,15 +13,16 @@
# limitations under the License. # limitations under the License.
import contextlib import contextlib
from layer_function_generator import autodoc, templatedoc from .layer_function_generator import autodoc, templatedoc
from tensor import assign, fill_constant from .tensor import assign, fill_constant
from .. import core from .. import core
from ..framework import Program, Variable, Operator from ..framework import Program, Variable, Operator
from ..layer_helper import LayerHelper, unique_name from ..layer_helper import LayerHelper, unique_name
from ..initializer import force_init_on_cpu from ..initializer import force_init_on_cpu
from ops import logical_and, logical_not, logical_or from .ops import logical_and, logical_not, logical_or
import numpy import numpy
import warnings import warnings
from functools import reduce
__all__ = [ __all__ = [
'While', 'While',
...@@ -276,7 +277,7 @@ class ParallelDo(object): ...@@ -276,7 +277,7 @@ class ParallelDo(object):
avg_cost = fluid.layers.mean(x=cost) avg_cost = fluid.layers.mean(x=cost)
.. warning:: .. warning::
It will be soon deprecated, please use ParallelExecutor instead. It will be soon deprecated, please use ParallelExecutor instead.
""" """
...@@ -601,7 +602,7 @@ class StaticRNN(object): ...@@ -601,7 +602,7 @@ class StaticRNN(object):
boot_memories = [] boot_memories = []
pre_memories = [] pre_memories = []
memories = [] memories = []
for _, mem in self.memories.iteritems(): for _, mem in list(self.memories.items()):
boot_memories.append(mem.init) boot_memories.append(mem.init)
pre_memories.append(mem.pre_mem.name) pre_memories.append(mem.pre_mem.name)
mem_var = rnn_block.var(mem.mem.name) mem_var = rnn_block.var(mem.mem.name)
...@@ -819,21 +820,21 @@ def max_sequence_len(rank_table): ...@@ -819,21 +820,21 @@ def max_sequence_len(rank_table):
def lod_tensor_to_array(x, table): def lod_tensor_to_array(x, table):
""" """
Convert a LoDTensor to a LoDTensorArray. Convert a LoDTensor to a LoDTensorArray.
This function split a LoDTesnor to a LoDTensorArray according to its LoD This function split a LoDTesnor to a LoDTensorArray according to its LoD
information. LoDTensorArray is an alias of C++ std::vector<LoDTensor> in information. LoDTensorArray is an alias of C++ std::vector<LoDTensor> in
PaddlePaddle. The generated LoDTensorArray of this function can be further read PaddlePaddle. The generated LoDTensorArray of this function can be further read
or written by `read_from_array()` and `write_to_array()` operators. However, or written by `read_from_array()` and `write_to_array()` operators. However,
this function is generally an internal component of PaddlePaddle `DynamicRNN`. this function is generally an internal component of PaddlePaddle `DynamicRNN`.
Users should not use it directly. Users should not use it directly.
Args: Args:
x (Variable|list): The LoDTensor to be converted to a LoDTensorArray. x (Variable|list): The LoDTensor to be converted to a LoDTensorArray.
table (ParamAttr|list): The variable that stores the level of lod table (ParamAttr|list): The variable that stores the level of lod
which is ordered by sequence length in which is ordered by sequence length in
descending order. It is generally generated descending order. It is generally generated
by `layers.lod_rank_table()` API. by `layers.lod_rank_table()` API.
Returns: Returns:
...@@ -1067,9 +1068,9 @@ def array_read(array, i): ...@@ -1067,9 +1068,9 @@ def array_read(array, i):
Given: Given:
array = [0.6, 0.1, 0.3, 0.1] array = [0.6, 0.1, 0.3, 0.1]
And: And:
i = 2 i = 2
Then: Then:
...@@ -1176,9 +1177,9 @@ def array_length(array): ...@@ -1176,9 +1177,9 @@ def array_length(array):
class ConditionalBlockGuard(BlockGuard): class ConditionalBlockGuard(BlockGuard):
""" """
ConditionalBlockGuard is derived from BlockGuard. It is dedicated for ConditionalBlockGuard is derived from BlockGuard. It is dedicated for
holding a ConditionalBlock, and helping users entering and exiting the holding a ConditionalBlock, and helping users entering and exiting the
ConditionalBlock via Python's 'with' keyword. However, ConditionalBlockGuard ConditionalBlock via Python's 'with' keyword. However, ConditionalBlockGuard
is generally an internal component of IfElse, users should not use it directly. is generally an internal component of IfElse, users should not use it directly.
""" """
...@@ -1512,7 +1513,7 @@ class IfElse(object): ...@@ -1512,7 +1513,7 @@ class IfElse(object):
def __call__(self): def __call__(self):
if self.status != self.OUT_IF_ELSE_BLOCKS: if self.status != self.OUT_IF_ELSE_BLOCKS:
raise ValueError("IfElse::__call__ must be out of sub-block") raise ValueError("IfElse::__call__ must be out of sub-block")
false_len, true_len = map(len, self.output_table) false_len, true_len = list(map(len, self.output_table))
if false_len == 0 and true_len == 0: if false_len == 0 and true_len == 0:
raise ValueError("Must invoke true_block/false_block before " raise ValueError("Must invoke true_block/false_block before "
"__call__") "__call__")
...@@ -1932,7 +1933,7 @@ def is_empty(x, cond=None, **ignored): ...@@ -1932,7 +1933,7 @@ def is_empty(x, cond=None, **ignored):
Args: Args:
x (Variable): The Variable to be tested. x (Variable): The Variable to be tested.
cond (Variable|None): Output parameter. Returns the test result cond (Variable|None): Output parameter. Returns the test result
of given 'x'. Default: None of given 'x'. Default: None
Returns: Returns:
......
...@@ -15,12 +15,13 @@ ...@@ -15,12 +15,13 @@
All layers just related to the detection neural network. All layers just related to the detection neural network.
""" """
from layer_function_generator import generate_layer_fn from .layer_function_generator import generate_layer_fn
from layer_function_generator import autodoc, templatedoc from .layer_function_generator import autodoc, templatedoc
from ..layer_helper import LayerHelper from ..layer_helper import LayerHelper
import tensor from . import tensor
import nn from . import nn
import math import math
from functools import reduce
__all__ = [ __all__ = [
'prior_box', 'prior_box',
...@@ -1032,7 +1033,7 @@ def multi_box_head(inputs, ...@@ -1032,7 +1033,7 @@ def multi_box_head(inputs,
min_sizes = [] min_sizes = []
max_sizes = [] max_sizes = []
step = int(math.floor(((max_ratio - min_ratio)) / (num_layer - 2))) step = int(math.floor(((max_ratio - min_ratio)) / (num_layer - 2)))
for ratio in xrange(min_ratio, max_ratio + 1, step): for ratio in range(min_ratio, max_ratio + 1, step):
min_sizes.append(base_size * ratio / 100.) min_sizes.append(base_size * ratio / 100.)
max_sizes.append(base_size * (ratio + step) / 100.) max_sizes.append(base_size * (ratio + step) / 100.)
min_sizes = [base_size * .10] + min_sizes min_sizes = [base_size * .10] + min_sizes
......
...@@ -15,7 +15,7 @@ ...@@ -15,7 +15,7 @@
All util layers. All util layers.
""" """
from layer_function_generator import autodoc from .layer_function_generator import autodoc
from ..framework import unique_name from ..framework import unique_name
from ..layer_helper import LayerHelper from ..layer_helper import LayerHelper
from ..annotations import deprecated from ..annotations import deprecated
......
...@@ -16,8 +16,8 @@ import multiprocessing ...@@ -16,8 +16,8 @@ import multiprocessing
import threading import threading
from ..data_feeder import DataFeeder from ..data_feeder import DataFeeder
from control_flow import BlockGuard from .control_flow import BlockGuard
from layer_function_generator import templatedoc from .layer_function_generator import templatedoc
from .. import core from .. import core
from ..executor import global_scope from ..executor import global_scope
from ..framework import convert_np_dtype_to_dtype_, default_main_program, \ from ..framework import convert_np_dtype_to_dtype_, default_main_program, \
...@@ -69,7 +69,7 @@ def data(name, ...@@ -69,7 +69,7 @@ def data(name,
""" """
helper = LayerHelper('data', **locals()) helper = LayerHelper('data', **locals())
shape = list(shape) shape = list(shape)
for i in xrange(len(shape)): for i in range(len(shape)):
if shape[i] is None: if shape[i] is None:
shape[i] = -1 shape[i] = -1
append_batch_size = False append_batch_size = False
...@@ -387,9 +387,9 @@ def random_data_generator(low, high, shapes, lod_levels, for_parallel=True): ...@@ -387,9 +387,9 @@ def random_data_generator(low, high, shapes, lod_levels, for_parallel=True):
Create a uniform random data generator Create a uniform random data generator
This layer returns a Reader Variable. This layer returns a Reader Variable.
Instead of opening a file and reading data from it, this Instead of opening a file and reading data from it, this
Reader Variable generates float uniform random data by itself. Reader Variable generates float uniform random data by itself.
It can be used as a dummy reader to test a network without It can be used as a dummy reader to test a network without
opening a real file. opening a real file.
Args: Args:
...@@ -707,9 +707,9 @@ def open_files(filenames, ...@@ -707,9 +707,9 @@ def open_files(filenames,
""" """
Open files Open files
This layer takes a list of files to read from and returns a Reader Variable. This layer takes a list of files to read from and returns a Reader Variable.
Via the Reader Variable, we can get data from given files. All files must Via the Reader Variable, we can get data from given files. All files must
have name suffixs to indicate their formats, e.g., '*.recordio'. have name suffixs to indicate their formats, e.g., '*.recordio'.
Args: Args:
filenames(list): The list of file names. filenames(list): The list of file names.
...@@ -825,9 +825,9 @@ def shuffle(reader, buffer_size): ...@@ -825,9 +825,9 @@ def shuffle(reader, buffer_size):
def batch(reader, batch_size): def batch(reader, batch_size):
""" """
This layer is a reader decorator. It takes a reader and adds This layer is a reader decorator. It takes a reader and adds
'batching' decoration on it. When reading with the result 'batching' decoration on it. When reading with the result
decorated reader, output data will be automatically organized decorated reader, output data will be automatically organized
to the form of batches. to the form of batches.
Args: Args:
...@@ -852,11 +852,11 @@ def batch(reader, batch_size): ...@@ -852,11 +852,11 @@ def batch(reader, batch_size):
# If we read data with the raw_reader: # If we read data with the raw_reader:
# data = fluid.layers.read_file(raw_reader) # data = fluid.layers.read_file(raw_reader)
# We can only get data instance by instance. # We can only get data instance by instance.
# #
# However, if we read data with the batch_reader: # However, if we read data with the batch_reader:
# data = fluid.layers.read_file(batch_reader) # data = fluid.layers.read_file(batch_reader)
# Each 5 adjacent instances will be automatically combined together # Each 5 adjacent instances will be automatically combined together
# to become a batch. So what we get('data') is a batch data instead # to become a batch. So what we get('data') is a batch data instead
# of an instance. # of an instance.
""" """
return __create_unshared_decorated_reader__( return __create_unshared_decorated_reader__(
...@@ -903,8 +903,8 @@ def read_file(reader): ...@@ -903,8 +903,8 @@ def read_file(reader):
""" """
Execute the given reader and get data via it. Execute the given reader and get data via it.
A reader is also a Variable. It can be a raw reader generated by A reader is also a Variable. It can be a raw reader generated by
`fluid.layers.open_files()` or a decorated one generated by `fluid.layers.open_files()` or a decorated one generated by
`fluid.layers.double_buffer()` and so on. `fluid.layers.double_buffer()` and so on.
Args: Args:
...@@ -1005,7 +1005,7 @@ class Preprocessor(object): ...@@ -1005,7 +1005,7 @@ class Preprocessor(object):
source_lod_levels = self.underlying_reader.desc.lod_levels() source_lod_levels = self.underlying_reader.desc.lod_levels()
self.source_var_names = [ self.source_var_names = [
unique_name("preprocessor_source") unique_name("preprocessor_source")
for _ in xrange(len(source_shapes)) for _ in range(len(source_shapes))
] ]
source_vars = [] source_vars = []
for var_name, shape, dtype, lod_level in zip( for var_name, shape, dtype, lod_level in zip(
......
...@@ -12,11 +12,11 @@ ...@@ -12,11 +12,11 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
import re import re
import cStringIO
import functools import functools
import warnings import warnings
import string import string
from six.moves import cStringIO
from ..proto import framework_pb2 from ..proto import framework_pb2
from ..framework import OpProtoHolder, Variable from ..framework import OpProtoHolder, Variable
from ..layer_helper import LayerHelper from ..layer_helper import LayerHelper
...@@ -70,7 +70,7 @@ def _generate_doc_string_(op_proto): ...@@ -70,7 +70,7 @@ def _generate_doc_string_(op_proto):
if not isinstance(op_proto, framework_pb2.OpProto): if not isinstance(op_proto, framework_pb2.OpProto):
raise TypeError("OpProto should be `framework_pb2.OpProto`") raise TypeError("OpProto should be `framework_pb2.OpProto`")
buf = cStringIO.StringIO() buf = cStringIO()
buf.write(escape_math(op_proto.comment)) buf.write(escape_math(op_proto.comment))
buf.write('\nArgs:\n') buf.write('\nArgs:\n')
for each_input in op_proto.inputs: for each_input in op_proto.inputs:
...@@ -119,9 +119,9 @@ def generate_layer_fn(op_type): ...@@ -119,9 +119,9 @@ def generate_layer_fn(op_type):
""" """
op_proto = OpProtoHolder.instance().get_op_proto(op_type) op_proto = OpProtoHolder.instance().get_op_proto(op_type)
not_intermediate_outputs = \ not_intermediate_outputs = \
filter(lambda output: not output.intermediate, op_proto.outputs) [output for output in op_proto.outputs if not output.intermediate]
intermediate_outputs = \ intermediate_outputs = \
filter(lambda output: output.intermediate, op_proto.outputs) [output for output in op_proto.outputs if output.intermediate]
if len(not_intermediate_outputs) != 1: if len(not_intermediate_outputs) != 1:
raise ValueError("Only one non intermediate output operator can be", raise ValueError("Only one non intermediate output operator can be",
......
...@@ -20,10 +20,10 @@ User can also implement their own learning_rate_decay ...@@ -20,10 +20,10 @@ User can also implement their own learning_rate_decay
strategy according to this module. strategy according to this module.
""" """
import control_flow from . import control_flow
import nn from . import nn
import ops from . import ops
import tensor from . import tensor
from ..initializer import init_on_cpu from ..initializer import init_on_cpu
from ..framework import default_main_program, Parameter from ..framework import default_main_program, Parameter
......
...@@ -13,7 +13,7 @@ ...@@ -13,7 +13,7 @@
# limitations under the License. # limitations under the License.
from ..framework import Variable, unique_name from ..framework import Variable, unique_name
from layer_function_generator import OpProtoHolder from .layer_function_generator import OpProtoHolder
from ..initializer import force_init_on_cpu from ..initializer import force_init_on_cpu
......
...@@ -20,7 +20,7 @@ from ..layer_helper import LayerHelper ...@@ -20,7 +20,7 @@ from ..layer_helper import LayerHelper
from ..initializer import Normal, Constant from ..initializer import Normal, Constant
from ..framework import Variable from ..framework import Variable
from ..param_attr import ParamAttr from ..param_attr import ParamAttr
import nn from . import nn
__all__ = ['accuracy', 'auc'] __all__ = ['accuracy', 'auc']
......
...@@ -33,11 +33,12 @@ from ..layer_helper import LayerHelper ...@@ -33,11 +33,12 @@ from ..layer_helper import LayerHelper
from ..initializer import Normal, Constant from ..initializer import Normal, Constant
from ..framework import Variable from ..framework import Variable
from ..param_attr import ParamAttr from ..param_attr import ParamAttr
from layer_function_generator import autodoc, templatedoc from .layer_function_generator import autodoc, templatedoc
from tensor import concat from .tensor import concat
import utils from . import utils
import random import random
from .. import unique_name from .. import unique_name
from functools import reduce
__all__ = [ __all__ = [
'fc', 'fc',
...@@ -949,6 +950,10 @@ def dropout(x, dropout_prob, is_test=False, seed=None, name=None): ...@@ -949,6 +950,10 @@ def dropout(x, dropout_prob, is_test=False, seed=None, name=None):
helper = LayerHelper('dropout', **locals()) helper = LayerHelper('dropout', **locals())
out = helper.create_tmp_variable(dtype=x.dtype) out = helper.create_tmp_variable(dtype=x.dtype)
mask = helper.create_tmp_variable(dtype=x.dtype, stop_gradient=True) mask = helper.create_tmp_variable(dtype=x.dtype, stop_gradient=True)
if (seed is None or seed == 0) and helper.main_program.random_seed != 0:
seed = helper.main_program.random_seed
helper.append_op( helper.append_op(
type='dropout', type='dropout',
inputs={'X': [x]}, inputs={'X': [x]},
...@@ -4845,7 +4850,7 @@ def dice_loss(input, label, epsilon=0.00001): ...@@ -4845,7 +4850,7 @@ def dice_loss(input, label, epsilon=0.00001):
loss = fluid.layers.dice_loss(input=predictions, label=label, 2) loss = fluid.layers.dice_loss(input=predictions, label=label, 2)
""" """
label = one_hot(label, depth=input.shape[-1]) label = one_hot(label, depth=input.shape[-1])
reduce_dim = range(1, len(input.shape)) reduce_dim = list(range(1, len(input.shape)))
inse = reduce_sum(input * label, dim=reduce_dim) inse = reduce_sum(input * label, dim=reduce_dim)
dice_denominator = reduce_sum( dice_denominator = reduce_sum(
input, dim=reduce_dim) + reduce_sum( input, dim=reduce_dim) + reduce_sum(
......
...@@ -11,7 +11,7 @@ ...@@ -11,7 +11,7 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
from layer_function_generator import generate_layer_fn from .layer_function_generator import generate_layer_fn
__activations__ = [ __activations__ = [
'sigmoid', 'sigmoid',
......
...@@ -18,7 +18,7 @@ from ..framework import convert_np_dtype_to_dtype_ ...@@ -18,7 +18,7 @@ from ..framework import convert_np_dtype_to_dtype_
from ..framework import Variable from ..framework import Variable
from ..initializer import Constant, force_init_on_cpu from ..initializer import Constant, force_init_on_cpu
from ..core import VarDesc from ..core import VarDesc
from layer_function_generator import templatedoc from .layer_function_generator import templatedoc
import numpy import numpy
__all__ = [ __all__ = [
......
...@@ -12,7 +12,7 @@ ...@@ -12,7 +12,7 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
import core from . import core
import numpy as np import numpy as np
__all__ = ['create_lod_tensor', 'create_random_int_lodtensor'] __all__ = ['create_lod_tensor', 'create_random_int_lodtensor']
...@@ -24,7 +24,7 @@ def create_lod_tensor(data, recursive_seq_lens, place): ...@@ -24,7 +24,7 @@ def create_lod_tensor(data, recursive_seq_lens, place):
Create a lod tensor by doing the following: Create a lod tensor by doing the following:
1. Check that the length-based level of detail (LoD) also known as 1. Check that the length-based level of detail (LoD) also known as
recursive_sequence_lengths of the input is valid. recursive_sequence_lengths of the input is valid.
2. Convert recursive_sequence_lengths to a offset-based LoD. 2. Convert recursive_sequence_lengths to a offset-based LoD.
...@@ -33,7 +33,7 @@ def create_lod_tensor(data, recursive_seq_lens, place): ...@@ -33,7 +33,7 @@ def create_lod_tensor(data, recursive_seq_lens, place):
CPU or GPU device (based on input place). CPU or GPU device (based on input place).
4. Set the level of detail (LoD) using the offset-based LoD. 4. Set the level of detail (LoD) using the offset-based LoD.
Examples: Examples:
Suppose we want LoDTensor to hold data for sequences of word, where each Suppose we want LoDTensor to hold data for sequences of word, where each
...@@ -51,7 +51,7 @@ def create_lod_tensor(data, recursive_seq_lens, place): ...@@ -51,7 +51,7 @@ def create_lod_tensor(data, recursive_seq_lens, place):
Args: Args:
data(numpy.ndarray|list|LoDTensor): a numpy array or a LoDTensor or a data(numpy.ndarray|list|LoDTensor): a numpy array or a LoDTensor or a
list holding the data to be copied. list holding the data to be copied.
recursive_seq_lens(list): a list of lists indicating the length-based level of detail recursive_seq_lens(list): a list of lists indicating the length-based level of detail
info specified by the user. info specified by the user.
place(Place): CPU or GPU place indicating where the data in the new place(Place): CPU or GPU place indicating where the data in the new
LoDTensor will be stored. LoDTensor will be stored.
...@@ -62,10 +62,10 @@ def create_lod_tensor(data, recursive_seq_lens, place): ...@@ -62,10 +62,10 @@ def create_lod_tensor(data, recursive_seq_lens, place):
if isinstance(data, core.LoDTensor): if isinstance(data, core.LoDTensor):
return create_lod_tensor(np.array(data), recursive_seq_lens, place) return create_lod_tensor(np.array(data), recursive_seq_lens, place)
elif isinstance(data, list): elif isinstance(data, list):
# When input data is a list, it only deal with the case where the base element # When input data is a list, it only deal with the case where the base element
# is an index of shape [1] and dtype int64 (e.g., word id). Hence, the generated # is an index of shape [1] and dtype int64 (e.g., word id). Hence, the generated
# LoDTensor will be of shape [n, 1] and dtype int64, where `n` is the total number # LoDTensor will be of shape [n, 1] and dtype int64, where `n` is the total number
# of words or other indexes in the sequence. # of words or other indexes in the sequence.
new_recursive_seq_lens = [] new_recursive_seq_lens = []
for seq in data: for seq in data:
new_recursive_seq_lens.append(len(seq)) new_recursive_seq_lens.append(len(seq))
...@@ -109,12 +109,12 @@ def create_random_int_lodtensor(recursive_seq_lens, base_shape, place, low, ...@@ -109,12 +109,12 @@ def create_random_int_lodtensor(recursive_seq_lens, base_shape, place, low,
Suppose we want LoDTensor to hold data for sequences of word, where each Suppose we want LoDTensor to hold data for sequences of word, where each
word is represented by an integer. If we want to create a LoDTensor to word is represented by an integer. If we want to create a LoDTensor to
represent two sentences, one of 2 words, and one of 3 words. Then represent two sentences, one of 2 words, and one of 3 words. Then
'base_shape' is [1], input length-based 'recursive_seq_lens' is [[2, 3]]. 'base_shape' is [1], input length-based 'recursive_seq_lens' is [[2, 3]].
Then the overall shape of the LoDTensor would be [5, 1], holding 5 words Then the overall shape of the LoDTensor would be [5, 1], holding 5 words
for two sentences. for two sentences.
Args: Args:
recursive_seq_lens(list): a list of lists indicating the length-based recursive_seq_lens(list): a list of lists indicating the length-based
level of detail info specified by the user. level of detail info specified by the user.
base_shape(list): the shape of the basic element to be held by the base_shape(list): the shape of the basic element to be held by the
LoDTensor. LoDTensor.
...@@ -124,11 +124,11 @@ def create_random_int_lodtensor(recursive_seq_lens, base_shape, place, low, ...@@ -124,11 +124,11 @@ def create_random_int_lodtensor(recursive_seq_lens, base_shape, place, low,
high(int): the upper bound of the random integers. high(int): the upper bound of the random integers.
Returns: Returns:
A fluid LoDTensor object with tensor data and recursive_seq_lens info. A fluid LoDTensor object with tensor data and recursive_seq_lens info.
""" """
assert isinstance(base_shape, list), "base_shape should be a list" assert isinstance(base_shape, list), "base_shape should be a list"
# append the total number of basic elements to the front of its shape # append the total number of basic elements to the front of its shape
overall_shape = [sum(recursive_seq_lens[-1])] + base_shape overall_shape = [sum(recursive_seq_lens[-1])] + base_shape
# the range of integer data elements is [low, high] # the range of integer data elements is [low, high]
data = np.random.random_integers(low, high, overall_shape).astype("int64") data = np.random.random_integers(low, high, overall_shape).astype("int64")
return create_lod_tensor(data, recursive_seq_lens, place) return create_lod_tensor(data, recursive_seq_lens, place)
...@@ -79,10 +79,10 @@ class MetricBase(object): ...@@ -79,10 +79,10 @@ class MetricBase(object):
""" """
states = { states = {
attr: value attr: value
for attr, value in self.__dict__.iteritems() for attr, value in list(self.__dict__.items())
if not attr.startswith("_") if not attr.startswith("_")
} }
for attr, value in states.iteritems(): for attr, value in list(states.items()):
if isinstance(value, int): if isinstance(value, int):
setattr(self, attr, 0) setattr(self, attr, 0)
elif isinstance(value, float): elif isinstance(value, float):
...@@ -105,7 +105,7 @@ class MetricBase(object): ...@@ -105,7 +105,7 @@ class MetricBase(object):
""" """
states = { states = {
attr: value attr: value
for attr, value in self.__dict__.iteritems() for attr, value in list(self.__dict__.items())
if not attr.startswith("_") if not attr.startswith("_")
} }
config = {} config = {}
......
...@@ -24,7 +24,7 @@ logger = logging.getLogger(__name__) ...@@ -24,7 +24,7 @@ logger = logging.getLogger(__name__)
logger.setLevel(logging.INFO) logger.setLevel(logging.INFO)
try: try:
from graphviz import Digraph from .graphviz import Digraph
except ImportError: except ImportError:
logger.info( logger.info(
'Cannot import graphviz, which is required for drawing a network. This ' 'Cannot import graphviz, which is required for drawing a network. This '
...@@ -77,7 +77,7 @@ def parse_graph(program, graph, var_dict, **kwargs): ...@@ -77,7 +77,7 @@ def parse_graph(program, graph, var_dict, **kwargs):
# fill the known variables # fill the known variables
for block in program.blocks: for block in program.blocks:
for var in block.vars: for var in block.vars:
if not var_dict.has_key(var): if var not in var_dict:
var_dict[var] = "Feed" var_dict[var] = "Feed"
temp_id = 0 temp_id = 0
...@@ -93,17 +93,17 @@ def parse_graph(program, graph, var_dict, **kwargs): ...@@ -93,17 +93,17 @@ def parse_graph(program, graph, var_dict, **kwargs):
var_dict[arg] = op.type var_dict[arg] = op.type
for e in op.inputs: for e in op.inputs:
for arg in e.arguments: for arg in e.arguments:
if var_dict.has_key(arg): if arg in var_dict:
graph.edge(**draw_edge(var_dict, op, e, arg)) graph.edge(**draw_edge(var_dict, op, e, arg))
break # only plot the first block break # only plot the first block
def draw_graph(startup_program, main_program, **kwargs): def draw_graph(startup_program, main_program, **kwargs):
if kwargs.has_key("graph_attr"): if "graph_attr" in kwargs:
GRAPH_STYLE.update(kwargs[graph_attr]) GRAPH_STYLE.update(kwargs[graph_attr])
if kwargs.has_key("node_attr"): if "node_attr" in kwargs:
OP_STYLE.update(kwargs[node_attr]) OP_STYLE.update(kwargs[node_attr])
if kwargs.has_key("edge_attr"): if "edge_attr" in kwargs:
VAR_STYLE.update(kwargs[edge_attr]) VAR_STYLE.update(kwargs[edge_attr])
graph_id = unique_id() graph_id = unique_id()
......
...@@ -11,7 +11,7 @@ ...@@ -11,7 +11,7 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
import layers from . import layers
__all__ = [ __all__ = [
"simple_img_conv_pool", "simple_img_conv_pool",
...@@ -210,7 +210,7 @@ def img_conv_group(input, ...@@ -210,7 +210,7 @@ def img_conv_group(input,
conv_with_batchnorm = __extend_list__(conv_with_batchnorm) conv_with_batchnorm = __extend_list__(conv_with_batchnorm)
conv_batchnorm_drop_rate = __extend_list__(conv_batchnorm_drop_rate) conv_batchnorm_drop_rate = __extend_list__(conv_batchnorm_drop_rate)
for i in xrange(len(conv_num_filter)): for i in range(len(conv_num_filter)):
local_conv_act = conv_act local_conv_act = conv_act
if conv_with_batchnorm[i]: if conv_with_batchnorm[i]:
local_conv_act = None local_conv_act = None
...@@ -488,10 +488,11 @@ def scaled_dot_product_attention(queries, ...@@ -488,10 +488,11 @@ def scaled_dot_product_attention(queries,
trans_x = layers.transpose(x, perm=[0, 2, 1, 3]) trans_x = layers.transpose(x, perm=[0, 2, 1, 3])
return layers.reshape( return layers.reshape(
x=trans_x, x=trans_x,
shape=map(int, [ shape=list(
trans_x.shape[0], trans_x.shape[1], map(int, [
trans_x.shape[2] * trans_x.shape[3] trans_x.shape[0], trans_x.shape[1], trans_x.shape[2] *
])) trans_x.shape[3]
])))
q, k, v = __compute_qkv(queries, keys, values, num_heads) q, k, v = __compute_qkv(queries, keys, values, num_heads)
......
...@@ -12,6 +12,8 @@ ...@@ -12,6 +12,8 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
import six
import paddle.fluid.core as core import paddle.fluid.core as core
import paddle.fluid.proto.framework_pb2 as framework_pb2 import paddle.fluid.proto.framework_pb2 as framework_pb2
...@@ -24,13 +26,13 @@ def get_all_op_protos(): ...@@ -24,13 +26,13 @@ def get_all_op_protos():
protostrs = core.get_all_op_protos() protostrs = core.get_all_op_protos()
ret_values = [] ret_values = []
for pbstr in protostrs: for pbstr in protostrs:
op_proto = framework_pb2.OpProto.FromString(str(pbstr)) op_proto = framework_pb2.OpProto.FromString(six.binary_type(pbstr))
ret_values.append(op_proto) ret_values.append(op_proto)
return ret_values return ret_values
def is_str(s): def is_str(s):
return isinstance(s, str) or isinstance(s, unicode) return isinstance(s, six.string_types)
class OpDescCreationMethod(object): class OpDescCreationMethod(object):
...@@ -189,7 +191,7 @@ class OperatorFactory(object): ...@@ -189,7 +191,7 @@ class OperatorFactory(object):
return self.get_op_info(t).method(**kwargs) return self.get_op_info(t).method(**kwargs)
def types(self): def types(self):
return self.op_methods.keys() return list(self.op_methods.keys())
def get_op_info(self, t): def get_op_info(self, t):
if t not in self.op_methods: if t not in self.op_methods:
...@@ -197,13 +199,13 @@ class OperatorFactory(object): ...@@ -197,13 +199,13 @@ class OperatorFactory(object):
return self.op_methods.get(t) return self.op_methods.get(t)
def get_op_input_names(self, type): def get_op_input_names(self, type):
return map(lambda x: x[0], self.get_op_info(type).inputs) return [x[0] for x in self.get_op_info(type).inputs]
def get_op_inputs(self, type): def get_op_inputs(self, type):
return self.get_op_info(type).inputs return self.get_op_info(type).inputs
def get_op_output_names(self, type): def get_op_output_names(self, type):
return map(lambda x: x[0], self.get_op_info(type).outputs) return [x[0] for x in self.get_op_info(type).outputs]
def get_op_outputs(self, type): def get_op_outputs(self, type):
return self.get_op_info(type).outputs return self.get_op_info(type).outputs
......
...@@ -14,15 +14,15 @@ ...@@ -14,15 +14,15 @@
import re import re
from collections import defaultdict from collections import defaultdict
from paddle.fluid.framework import Program, Variable from paddle.fluid.framework import Program, Variable
import framework from . import framework
import layers from . import layers
from backward import append_backward from .backward import append_backward
from framework import program_guard from .framework import program_guard
import unique_name from . import unique_name
from initializer import Constant from .initializer import Constant
from layer_helper import LayerHelper from .layer_helper import LayerHelper
from regularizer import append_regularization_ops from .regularizer import append_regularization_ops
from clip import append_gradient_clip_ops, error_clip_callback from .clip import append_gradient_clip_ops, error_clip_callback
from contextlib import contextmanager from contextlib import contextmanager
__all__ = [ __all__ = [
......
...@@ -12,10 +12,11 @@ ...@@ -12,10 +12,11 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
import core from __future__ import print_function
import multiprocessing import multiprocessing
import framework from . import core
import executor from . import framework
from . import executor
import warnings import warnings
import sys import sys
import os import os
...@@ -94,7 +95,7 @@ class ParallelExecutor(object): ...@@ -94,7 +95,7 @@ class ParallelExecutor(object):
self._places = [] self._places = []
self._act_places = [] self._act_places = []
if use_cuda: if use_cuda:
for i in xrange(core.get_cuda_device_count()): for i in range(core.get_cuda_device_count()):
p = core.Place() p = core.Place()
self._act_places.append(core.CUDAPlace(i)) self._act_places.append(core.CUDAPlace(i))
p.set_place(self._act_places[-1]) p.set_place(self._act_places[-1])
...@@ -102,7 +103,7 @@ class ParallelExecutor(object): ...@@ -102,7 +103,7 @@ class ParallelExecutor(object):
else: else:
cpu_num = int( cpu_num = int(
os.environ.get('CPU_NUM', multiprocessing.cpu_count())) os.environ.get('CPU_NUM', multiprocessing.cpu_count()))
for i in xrange(cpu_num): for i in range(cpu_num):
p = core.Place() p = core.Place()
self._act_places.append(core.CPUPlace()) self._act_places.append(core.CPUPlace())
p.set_place(self._act_places[-1]) p.set_place(self._act_places[-1])
...@@ -143,16 +144,16 @@ class ParallelExecutor(object): ...@@ -143,16 +144,16 @@ class ParallelExecutor(object):
) if share_vars_from else [] ) if share_vars_from else []
self.persistable_vars = [ self.persistable_vars = [
v.name v.name for v in [
for v in filter( var for var in main.list_vars()
lambda var: var.persistable and var.type != core.VarDesc.VarType.RAW, if var.persistable and var.type != core.VarDesc.VarType.RAW
main.list_vars()) ]
] ]
self.executor = core.ParallelExecutor( self.executor = core.ParallelExecutor(
self._places, self._places,
set([ set([
p.name for p in main.global_block()._iter_parameters() p.name for p in main.global_block().iter_parameters()
if not p.stop_gradient if not p.stop_gradient
]), ]),
set(self.persistable_vars), main.desc, loss_name set(self.persistable_vars), main.desc, loss_name
...@@ -227,7 +228,9 @@ class ParallelExecutor(object): ...@@ -227,7 +228,9 @@ class ParallelExecutor(object):
""" """
if feed is None and feed_dict is not None: if feed is None and feed_dict is not None:
feed = feed_dict feed = feed_dict
print >> sys.stderr, "`feed_dict` is deprecated. Please use `feed=`" print(
"`feed_dict` is deprecated. Please use `feed=`",
file=sys.stderr)
if isinstance(feed, dict): if isinstance(feed, dict):
feed_tensor_dict = dict() feed_tensor_dict = dict()
......
...@@ -12,8 +12,10 @@ ...@@ -12,8 +12,10 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
from initializer import Initializer, Xavier, Constant import six
from regularizer import WeightDecayRegularizer
from .initializer import Initializer, Xavier, Constant
from .regularizer import WeightDecayRegularizer
__all__ = [ __all__ = [
'ParamAttr', 'ParamAttr',
...@@ -134,7 +136,7 @@ class ParamAttr(object): ...@@ -134,7 +136,7 @@ class ParamAttr(object):
return [ParamAttr._to_attr(a) for a in arg] return [ParamAttr._to_attr(a) for a in arg]
elif isinstance(arg, ParamAttr): elif isinstance(arg, ParamAttr):
return arg return arg
elif isinstance(arg, str) or isinstance(arg, unicode): elif isinstance(arg, six.string_types):
return ParamAttr(name=arg) return ParamAttr(name=arg)
elif isinstance(arg, Initializer): elif isinstance(arg, Initializer):
return ParamAttr(initializer=arg) return ParamAttr(initializer=arg)
......
...@@ -12,7 +12,7 @@ ...@@ -12,7 +12,7 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
import core from . import core
from contextlib import contextmanager from contextlib import contextmanager
import os import os
...@@ -224,7 +224,7 @@ def profiler(state, sorted_key=None, profile_path='/tmp/profile'): ...@@ -224,7 +224,7 @@ def profiler(state, sorted_key=None, profile_path='/tmp/profile'):
If the state == 'All', a profile proto file will be written to If the state == 'All', a profile proto file will be written to
`profile_path`. This file records timeline information during the execution. `profile_path`. This file records timeline information during the execution.
Then users can visualize this file to see the timeline, please refer Then users can visualize this file to see the timeline, please refer
https://github.com/PaddlePaddle/Paddle/blob/develop/doc/fluid/howto/optimization/timeline.md https://github.com/PaddlePaddle/Paddle/blob/develop/doc/fluid/howto/optimization/timeline.md
Args: Args:
......
...@@ -13,8 +13,8 @@ ...@@ -13,8 +13,8 @@
# limitations under the License. # limitations under the License.
import os import os
import core
import contextlib import contextlib
from . import core
__all__ = [ __all__ = [
'convert_reader_to_recordio_file', 'convert_reader_to_recordio_files' 'convert_reader_to_recordio_file', 'convert_reader_to_recordio_files'
] ]
......
...@@ -12,7 +12,7 @@ ...@@ -12,7 +12,7 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
import framework from . import framework
from . import core from . import core
__all__ = ['L1Decay', 'L2Decay', 'L1DecayRegularizer', 'L2DecayRegularizer'] __all__ = ['L1Decay', 'L2Decay', 'L1DecayRegularizer', 'L2DecayRegularizer']
......
...@@ -63,7 +63,7 @@ def train(use_cuda, train_program, params_dirname): ...@@ -63,7 +63,7 @@ def train(use_cuda, train_program, params_dirname):
if event.step == 10: if event.step == 10:
test_metrics = trainer.test( test_metrics = trainer.test(
reader=test_reader, feed_order=['x', 'y']) reader=test_reader, feed_order=['x', 'y'])
print test_metrics print(test_metrics)
''' '''
... ...
['25.768919467926025'] ['25.768919467926025']
......
...@@ -28,11 +28,12 @@ images per class. ...@@ -28,11 +28,12 @@ images per class.
""" """
import cPickle
import itertools import itertools
import numpy import numpy
import paddle.v2.dataset.common import paddle.v2.dataset.common
import tarfile import tarfile
from six.moves import cPickle as pickle
from six.moves import zip
__all__ = ['train10'] __all__ = ['train10']
...@@ -46,7 +47,7 @@ def reader_creator(filename, sub_name, batch_size=None): ...@@ -46,7 +47,7 @@ def reader_creator(filename, sub_name, batch_size=None):
data = batch['data'] data = batch['data']
labels = batch.get('labels', batch.get('fine_labels', None)) labels = batch.get('labels', batch.get('fine_labels', None))
assert labels is not None assert labels is not None
for sample, label in itertools.izip(data, labels): for sample, label in zip(data, labels):
yield (sample / 255.0).astype(numpy.float32), int(label) yield (sample / 255.0).astype(numpy.float32), int(label)
def reader(): def reader():
...@@ -56,7 +57,7 @@ def reader_creator(filename, sub_name, batch_size=None): ...@@ -56,7 +57,7 @@ def reader_creator(filename, sub_name, batch_size=None):
batch_count = 0 batch_count = 0
for name in names: for name in names:
batch = cPickle.load(f.extractfile(name)) batch = pickle.load(f.extractfile(name))
for item in read_batch(batch): for item in read_batch(batch):
if isinstance(batch_size, int) and batch_count > batch_size: if isinstance(batch_size, int) and batch_count > batch_size:
break break
......
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册