diff --git a/cmake/external/protobuf.cmake b/cmake/external/protobuf.cmake index 550b0dada8e90c1e2b33705fd53c065672113b45..45ef9b4550291cadaa9571f05dbaefdf4a0c223a 100644 --- a/cmake/external/protobuf.cmake +++ b/cmake/external/protobuf.cmake @@ -30,66 +30,61 @@ UNSET_VAR(PROTOBUF_LITE_LIBRARY) UNSET_VAR(PROTOBUF_LIBRARY) UNSET_VAR(PROTOBUF_INCLUDE_DIR) UNSET_VAR(Protobuf_PROTOC_EXECUTABLE) +function(protobuf_generate_python SRCS) + # shameless copy from https://github.com/Kitware/CMake/blob/master/Modules/FindProtobuf.cmake + if(NOT ARGN) + message(SEND_ERROR "Error: PROTOBUF_GENERATE_PYTHON() called without any proto files") + return() + endif() -if(NOT COMMAND protobuf_generate_python) # before cmake 3.4, protobuf_genrerate_python is not defined. - function(protobuf_generate_python SRCS) - # shameless copy from https://github.com/Kitware/CMake/blob/master/Modules/FindProtobuf.cmake - if(NOT ARGN) - message(SEND_ERROR "Error: PROTOBUF_GENERATE_PYTHON() called without any proto files") - return() - endif() - - if(PROTOBUF_GENERATE_CPP_APPEND_PATH) - # Create an include path for each file specified - foreach(FIL ${ARGN}) - get_filename_component(ABS_FIL ${FIL} ABSOLUTE) - get_filename_component(ABS_PATH ${ABS_FIL} PATH) - list(FIND _protobuf_include_path ${ABS_PATH} _contains_already) - if(${_contains_already} EQUAL -1) - list(APPEND _protobuf_include_path -I ${ABS_PATH}) - endif() - endforeach() - else() - set(_protobuf_include_path -I ${CMAKE_CURRENT_SOURCE_DIR}) - endif() - - if(DEFINED PROTOBUF_IMPORT_DIRS AND NOT DEFINED Protobuf_IMPORT_DIRS) - set(Protobuf_IMPORT_DIRS "${PROTOBUF_IMPORT_DIRS}") - endif() - - if(DEFINED Protobuf_IMPORT_DIRS) - foreach(DIR ${Protobuf_IMPORT_DIRS}) - get_filename_component(ABS_PATH ${DIR} ABSOLUTE) - list(FIND _protobuf_include_path ${ABS_PATH} _contains_already) - if(${_contains_already} EQUAL -1) - list(APPEND _protobuf_include_path -I ${ABS_PATH}) - endif() - endforeach() - endif() - - set(${SRCS}) + if(PROTOBUF_GENERATE_CPP_APPEND_PATH) + # Create an include path for each file specified foreach(FIL ${ARGN}) get_filename_component(ABS_FIL ${FIL} ABSOLUTE) - get_filename_component(FIL_WE ${FIL} NAME_WE) - if(NOT PROTOBUF_GENERATE_CPP_APPEND_PATH) - get_filename_component(FIL_DIR ${FIL} DIRECTORY) - if(FIL_DIR) - set(FIL_WE "${FIL_DIR}/${FIL_WE}") - endif() + get_filename_component(ABS_PATH ${ABS_FIL} PATH) + list(FIND _protobuf_include_path ${ABS_PATH} _contains_already) + if(${_contains_already} EQUAL -1) + list(APPEND _protobuf_include_path -I ${ABS_PATH}) endif() + endforeach() + else() + set(_protobuf_include_path -I ${CMAKE_CURRENT_SOURCE_DIR}) + endif() + if(DEFINED PROTOBUF_IMPORT_DIRS AND NOT DEFINED Protobuf_IMPORT_DIRS) + set(Protobuf_IMPORT_DIRS "${PROTOBUF_IMPORT_DIRS}") + endif() - list(APPEND ${SRCS} "${CMAKE_CURRENT_BINARY_DIR}/${FIL_WE}_pb2.py") - add_custom_command( - OUTPUT "${CMAKE_CURRENT_BINARY_DIR}/${FIL_WE}_pb2.py" - COMMAND ${Protobuf_PROTOC_EXECUTABLE} --python_out ${CMAKE_CURRENT_BINARY_DIR} ${_protobuf_include_path} ${ABS_FIL} - DEPENDS ${ABS_FIL} ${Protobuf_PROTOC_EXECUTABLE} - COMMENT "Running Python protocol buffer compiler on ${FIL}" - VERBATIM ) + if(DEFINED Protobuf_IMPORT_DIRS) + foreach(DIR ${Protobuf_IMPORT_DIRS}) + get_filename_component(ABS_PATH ${DIR} ABSOLUTE) + list(FIND _protobuf_include_path ${ABS_PATH} _contains_already) + if(${_contains_already} EQUAL -1) + list(APPEND _protobuf_include_path -I ${ABS_PATH}) + endif() endforeach() + endif() - set(${SRCS} ${${SRCS}} PARENT_SCOPE) - endfunction() -endif() + set(${SRCS}) + foreach(FIL ${ARGN}) + get_filename_component(ABS_FIL ${FIL} ABSOLUTE) + get_filename_component(FIL_WE ${FIL} NAME_WE) + if(NOT PROTOBUF_GENERATE_CPP_APPEND_PATH) + get_filename_component(FIL_DIR ${FIL} DIRECTORY) + if(FIL_DIR) + set(FIL_WE "${FIL_DIR}/${FIL_WE}") + endif() + endif() + list(APPEND ${SRCS} "${CMAKE_CURRENT_BINARY_DIR}/${FIL_WE}_pb2.py") + add_custom_command( + OUTPUT "${CMAKE_CURRENT_BINARY_DIR}/${FIL_WE}_pb2.py" + COMMAND ${PROTOBUF_PROTOC_EXECUTABLE} --python_out ${CMAKE_CURRENT_BINARY_DIR} ${_protobuf_include_path} ${ABS_FIL} + DEPENDS ${ABS_FIL} ${PROTOBUF_PROTOC_EXECUTABLE} + COMMENT "Running Python protocol buffer compiler on ${FIL}" + VERBATIM ) + endforeach() + + set(${SRCS} ${${SRCS}} PARENT_SCOPE) +endfunction() # Print and set the protobuf library information, # finish this cmake process and exit from this file. @@ -126,6 +121,7 @@ macro(PROMPT_PROTOBUF_LIB) # FIND_Protobuf.cmake uses `Protobuf_PROTOC_EXECUTABLE`. # make `protobuf_generate_cpp` happy. SET(Protobuf_PROTOC_EXECUTABLE ${PROTOBUF_PROTOC_EXECUTABLE}) + FOREACH(dep ${protobuf_DEPS}) ADD_DEPENDENCIES(protobuf ${dep}) ADD_DEPENDENCIES(protobuf_lite ${dep}) diff --git a/paddle/fluid/API.spec b/paddle/fluid/API.spec index f58131e75bebc5633ae01299a1f2084b63e0c8b9..a155c7052be4f07b568af7954f270ea13bd70164 100644 --- a/paddle/fluid/API.spec +++ b/paddle/fluid/API.spec @@ -179,6 +179,7 @@ paddle.fluid.layers.space_to_depth ArgSpec(args=['x', 'blocksize', 'name'], vara paddle.fluid.layers.affine_grid ArgSpec(args=['theta', 'out_shape', 'name'], varargs=None, keywords=None, defaults=(None,)) paddle.fluid.layers.sequence_reverse ArgSpec(args=['x', 'name'], varargs=None, keywords=None, defaults=(None,)) paddle.fluid.layers.affine_channel ArgSpec(args=['x', 'scale', 'bias', 'data_layout', 'name'], varargs=None, keywords=None, defaults=(None, None, 'NCHW', None)) +paddle.fluid.layers.similarity_focus ArgSpec(args=['input', 'axis', 'indexes', 'name'], varargs=None, keywords=None, defaults=(None,)) paddle.fluid.layers.hash ArgSpec(args=['input', 'hash_size', 'num_hash', 'name'], varargs=None, keywords=None, defaults=(1, None)) paddle.fluid.layers.grid_sampler ArgSpec(args=['x', 'grid', 'name'], varargs=None, keywords=None, defaults=(None,)) paddle.fluid.layers.log_loss ArgSpec(args=['input', 'label', 'epsilon', 'name'], varargs=None, keywords=None, defaults=(0.0001, None)) @@ -201,6 +202,7 @@ paddle.fluid.layers.create_tensor ArgSpec(args=['dtype', 'name', 'persistable'], paddle.fluid.layers.create_parameter ArgSpec(args=['shape', 'dtype', 'name', 'attr', 'is_bias', 'default_initializer'], varargs=None, keywords=None, defaults=(None, None, False, None)) paddle.fluid.layers.create_global_var ArgSpec(args=['shape', 'value', 'dtype', 'persistable', 'force_cpu', 'name'], varargs=None, keywords=None, defaults=(False, False, None)) paddle.fluid.layers.cast ArgSpec(args=['x', 'dtype'], varargs=None, keywords=None, defaults=None) +paddle.fluid.layers.tensor_array_to_tensor ArgSpec(args=['input', 'axis', 'name'], varargs=None, keywords=None, defaults=(1, None)) paddle.fluid.layers.concat ArgSpec(args=['input', 'axis', 'name'], varargs=None, keywords=None, defaults=(0, None)) paddle.fluid.layers.sums ArgSpec(args=['input', 'out'], varargs=None, keywords=None, defaults=(None,)) paddle.fluid.layers.assign ArgSpec(args=['input', 'output'], varargs=None, keywords=None, defaults=(None,)) diff --git a/paddle/fluid/framework/operator.cc b/paddle/fluid/framework/operator.cc index 0506907ab56bd4eeb720c43601dcf20e6ebc9d67..5624878d439873e5f6aee6ec9234e31d5c77ff97 100644 --- a/paddle/fluid/framework/operator.cc +++ b/paddle/fluid/framework/operator.cc @@ -259,6 +259,8 @@ std::string OperatorBase::DebugStringEx(const Scope* scope) const { if (row_size >= 0) { ss << "[row_size=" << row_size << "]"; } + std::string dtype = GetDtype(*scope, output.second[i]); + ss << ":" << dtype; ss << "[" << GetDims(*scope, var_name, true) << "]"; ss << "(" << GetLoD(*scope, var_name) << ")"; } diff --git a/paddle/fluid/framework/var_type_inference.h b/paddle/fluid/framework/var_type_inference.h index f3035cd712bdea517068b4c172bb2794d5fccddb..64236b78d2e390ea5f6c43c76a4b33b62c67629f 100644 --- a/paddle/fluid/framework/var_type_inference.h +++ b/paddle/fluid/framework/var_type_inference.h @@ -13,6 +13,9 @@ See the License for the specific language governing permissions and limitations under the License. */ #pragma once +#include +#include "paddle/fluid/framework/block_desc.h" +#include "paddle/fluid/framework/op_desc.h" #include "paddle/fluid/framework/type_defs.h" namespace paddle { @@ -24,5 +27,27 @@ class VarTypeInference { virtual void operator()(const OpDesc& op_desc, BlockDesc* block) const = 0; }; +class PassInDtypeAndVarTypeToOutput : public framework::VarTypeInference { + public: + void operator()(const framework::OpDesc& op_desc, + framework::BlockDesc* block) const final { + auto in_out_var_names = this->GetInputOutputWithSameType(); + + for (auto& i_o_n : in_out_var_names) { + auto& x_name = op_desc.Input(i_o_n.first).at(0); + auto& out_name = op_desc.Output(i_o_n.second).at(0); + + auto& x = block->FindRecursiveOrCreateVar(x_name); + auto& out = block->FindRecursiveOrCreateVar(out_name); + out.SetType(x.GetType()); + out.SetDataType(x.GetDataType()); + } + } + + protected: + virtual std::unordered_map + GetInputOutputWithSameType() const = 0; +}; + } // namespace framework } // namespace paddle diff --git a/paddle/fluid/inference/analysis/analyzer.cc b/paddle/fluid/inference/analysis/analyzer.cc index a3440cfc78e9172beef7dbe4f4154bcffcb62b6e..d55303a51e9fee3057455470b4b3139dc5f85e89 100644 --- a/paddle/fluid/inference/analysis/analyzer.cc +++ b/paddle/fluid/inference/analysis/analyzer.cc @@ -113,7 +113,9 @@ void Analyzer::Run(Argument* argument) { passes.push_back("infer_clean_graph_pass"); passes.push_back("graph_viz_pass"); // add graphviz for debug. for (auto& pass : ir_passes_) { - if (!disabled_ir_passes_.count(pass)) { + // skip mkldnn pass when use_mkldnn_ = false; + bool skip_pass = (!use_mkldnn_) && pass.find("mkldnn") != std::string::npos; + if (!disabled_ir_passes_.count(pass) && !skip_pass) { passes.push_back(pass); passes.push_back("graph_viz_pass"); // add graphviz for debug. } diff --git a/paddle/fluid/operators/CMakeLists.txt b/paddle/fluid/operators/CMakeLists.txt index 2a7de024bf4ec1f49f7672de782f68ba8b353bbd..7599313070bea790b910eccb012cc4aaf0bdac0c 100644 --- a/paddle/fluid/operators/CMakeLists.txt +++ b/paddle/fluid/operators/CMakeLists.txt @@ -317,6 +317,7 @@ op_library(save_op DEPS lod_tensor) op_library(load_op DEPS lod_tensor) op_library(save_combine_op DEPS lod_tensor) op_library(load_combine_op DEPS lod_tensor) +op_library(tensor_array_to_tensor_op DEPS concat_op) op_library(concat_op DEPS concat_and_split) list(REMOVE_ITEM GENERAL_OPS ${DEPS_OPS}) diff --git a/paddle/fluid/operators/activation_op.cc b/paddle/fluid/operators/activation_op.cc index 9ddb3a5d29f973047507855b43b226913a3600b5..ea260a3e92b775023085fd02eec33e6ecfaf2e81 100644 --- a/paddle/fluid/operators/activation_op.cc +++ b/paddle/fluid/operators/activation_op.cc @@ -91,16 +91,12 @@ class ActivationOp : public framework::OperatorWithKernel { } }; -class ActivationOpInferVarType : public framework::VarTypeInference { - public: - void operator()(const framework::OpDesc& op_desc, - framework::BlockDesc* block) const override { - auto x_name = op_desc.Input("X")[0]; - auto out_name = op_desc.Output("Out")[0]; - auto& x = block->FindRecursiveOrCreateVar(x_name); - auto& out = block->FindRecursiveOrCreateVar(out_name); - out.SetType(x.GetType()); - out.SetDataType(x.GetDataType()); +class ActivationOpInferVarType + : public framework::PassInDtypeAndVarTypeToOutput { + protected: + std::unordered_map GetInputOutputWithSameType() + const override { + return std::unordered_map{{"X", /*->*/ "Out"}}; } }; diff --git a/paddle/fluid/operators/batch_norm_op.cc b/paddle/fluid/operators/batch_norm_op.cc index 3eb473832577bd348b33ba9b0be9e597b78f26bc..cf245f5038f5f5ad1b623542aa14686eff8aad32 100644 --- a/paddle/fluid/operators/batch_norm_op.cc +++ b/paddle/fluid/operators/batch_norm_op.cc @@ -170,6 +170,15 @@ The required data format for this layer is one of the following: } }; +class BatchNormOpInferVarType + : public framework::PassInDtypeAndVarTypeToOutput { + protected: + std::unordered_map GetInputOutputWithSameType() + const override { + return std::unordered_map{{"X", /*->*/ "Y"}}; + } +}; + template class BatchNormKernel : public framework::OpKernel { @@ -525,7 +534,7 @@ class BatchNormGradMaker : public framework::SingleGradOpDescMaker { namespace ops = paddle::operators; REGISTER_OPERATOR(batch_norm, ops::BatchNormOp, ops::BatchNormOpMaker, - ops::BatchNormGradMaker); + ops::BatchNormOpInferVarType, ops::BatchNormGradMaker); REGISTER_OPERATOR(batch_norm_grad, ops::BatchNormGradOp); REGISTER_OP_CPU_KERNEL( diff --git a/paddle/fluid/operators/conv_op.cc b/paddle/fluid/operators/conv_op.cc index 7401f100d723e0d331f403caff336620305879e1..4d370746382a4247f51aafa189e86eece941c320 100644 --- a/paddle/fluid/operators/conv_op.cc +++ b/paddle/fluid/operators/conv_op.cc @@ -224,6 +224,15 @@ $$ )DOC"); } +class ConvOpInferVarType : public framework::PassInDtypeAndVarTypeToOutput { + protected: + std::unordered_map GetInputOutputWithSameType() + const override { + return std::unordered_map{ + {"Input", /*->*/ "Output"}}; + } +}; + void Conv3DOpMaker::Make() { AddInput( "Input", @@ -365,6 +374,7 @@ framework::OpKernelType ConvOpGrad::GetExpectedKernelType( namespace ops = paddle::operators; REGISTER_OPERATOR(conv2d, ops::ConvOp, ops::Conv2DOpMaker, + ops::ConvOpInferVarType, paddle::framework::DefaultGradOpDescMaker); REGISTER_OPERATOR(conv2d_grad, ops::ConvOpGrad); @@ -372,7 +382,9 @@ REGISTER_OPERATOR(conv2d_grad, ops::ConvOpGrad); REGISTER_OPERATOR(depthwise_conv2d, ops::ConvOp, ops::Conv2DOpMaker, paddle::framework::DefaultGradOpDescMaker); REGISTER_OPERATOR(depthwise_conv2d_grad, ops::ConvOpGrad); + REGISTER_OPERATOR(conv3d, ops::ConvOp, ops::Conv3DOpMaker, + ops::ConvOpInferVarType, paddle::framework::DefaultGradOpDescMaker); REGISTER_OPERATOR(conv3d_grad, ops::ConvOpGrad); diff --git a/paddle/fluid/operators/cross_entropy_op.cc b/paddle/fluid/operators/cross_entropy_op.cc index 66f19fe7ecfa51b2ce917f0c5fcb6d486f1a7307..a904dd91302c951560dc32ac107d4d73b6024c25 100644 --- a/paddle/fluid/operators/cross_entropy_op.cc +++ b/paddle/fluid/operators/cross_entropy_op.cc @@ -13,6 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/fluid/operators/cross_entropy_op.h" +#include namespace paddle { namespace operators { @@ -179,6 +180,15 @@ or not. But the output only shares the LoD information with input X. )DOC"); } }; + +class CrossEntropyOpInferVarType + : public framework::PassInDtypeAndVarTypeToOutput { + protected: + std::unordered_map GetInputOutputWithSameType() + const override { + return std::unordered_map{{"X", /*->*/ "Y"}}; + } +}; } // namespace operators } // namespace paddle @@ -186,6 +196,7 @@ namespace ops = paddle::operators; using CPUCtx = paddle::platform::CPUDeviceContext; REGISTER_OPERATOR(cross_entropy, ops::CrossEntropyOp, ops::CrossEntropyOpMaker, + ops::CrossEntropyOpInferVarType, paddle::framework::DefaultGradOpDescMaker); REGISTER_OPERATOR(cross_entropy_grad, ops::CrossEntropyGradientOp); REGISTER_OP_CPU_KERNEL(cross_entropy, ops::CrossEntropyOpKernel, diff --git a/paddle/fluid/operators/elementwise_op.h b/paddle/fluid/operators/elementwise_op.h index 5eb4233344e1c49e69dd9830178fd6fb2ae7e51c..f01f67692e1e5dd040971cb0dd1dd793648da97a 100644 --- a/paddle/fluid/operators/elementwise_op.h +++ b/paddle/fluid/operators/elementwise_op.h @@ -75,16 +75,12 @@ class ElementwiseOp : public framework::OperatorWithKernel { } }; -class ElementwiseOpInferVarType : public framework::VarTypeInference { - public: - void operator()(const framework::OpDesc &op_desc, - framework::BlockDesc *block) const override { - auto x_name = op_desc.Input("X")[0]; - auto out_name = op_desc.Output("Out")[0]; - auto &x = block->FindRecursiveOrCreateVar(x_name); - auto &out = block->FindRecursiveOrCreateVar(out_name); - out.SetType(x.GetType()); - out.SetDataType(x.GetDataType()); +class ElementwiseOpInferVarType + : public framework::PassInDtypeAndVarTypeToOutput { + protected: + std::unordered_map GetInputOutputWithSameType() + const override { + return std::unordered_map{{"X", /*->*/ "Out"}}; } }; diff --git a/paddle/fluid/operators/mean_op.cc b/paddle/fluid/operators/mean_op.cc index 19426b3c204095bd415cebcd87cff18468acd564..820636defad0be9fb2e6decefc938658ae70ea9b 100644 --- a/paddle/fluid/operators/mean_op.cc +++ b/paddle/fluid/operators/mean_op.cc @@ -13,7 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/fluid/operators/mean_op.h" - +#include namespace paddle { namespace operators { @@ -42,6 +42,14 @@ Mean Operator calculates the mean of all elements in X. } }; +class MeanOpInferVarType : public framework::PassInDtypeAndVarTypeToOutput { + protected: + std::unordered_map GetInputOutputWithSameType() + const override { + return std::unordered_map{{"X", /*->*/ "Out"}}; + } +}; + class MeanGradOp : public framework::OperatorWithKernel { public: using framework::OperatorWithKernel::OperatorWithKernel; @@ -50,6 +58,14 @@ class MeanGradOp : public framework::OperatorWithKernel { ctx->SetOutputDim(framework::GradVarName("X"), ctx->GetInputDim("X")); ctx->ShareLoD("X", framework::GradVarName("X")); } + + framework::OpKernelType GetExpectedKernelType( + const framework::ExecutionContext& ctx) const override { + auto input_data_type = + framework::ToDataType(ctx.Input("X")->type()); + + return framework::OpKernelType(input_data_type, ctx.GetPlace()); + } }; class MeanGradMaker : public framework::SingleGradOpDescMaker { @@ -71,7 +87,8 @@ class MeanGradMaker : public framework::SingleGradOpDescMaker { } // namespace paddle namespace ops = paddle::operators; -REGISTER_OPERATOR(mean, ops::MeanOp, ops::MeanOpMaker, ops::MeanGradMaker); +REGISTER_OPERATOR(mean, ops::MeanOp, ops::MeanOpMaker, ops::MeanOpInferVarType, + ops::MeanGradMaker); REGISTER_OPERATOR(mean_grad, ops::MeanGradOp); REGISTER_OP_CPU_KERNEL( mean, ops::MeanKernel, diff --git a/paddle/fluid/operators/mul_op.cc b/paddle/fluid/operators/mul_op.cc index a2140ddc792a69e794a89c8056b1dd19a3661e38..08f2949d4a3774894912ae5251806b46e6240702 100644 --- a/paddle/fluid/operators/mul_op.cc +++ b/paddle/fluid/operators/mul_op.cc @@ -126,6 +126,14 @@ or not. But the output only shares the LoD information with input $X$. } }; +class MulOpInferVarType : public framework::PassInDtypeAndVarTypeToOutput { + protected: + std::unordered_map GetInputOutputWithSameType() + const override { + return std::unordered_map{{"X", /*->*/ "Out"}}; + } +}; + class MulGradOp : public framework::OperatorWithKernel { public: using framework::OperatorWithKernel::OperatorWithKernel; @@ -178,7 +186,8 @@ class MulOpGradMaker : public framework::SingleGradOpDescMaker { } // namespace paddle namespace ops = paddle::operators; -REGISTER_OPERATOR(mul, ops::MulOp, ops::MulOpMaker, ops::MulOpGradMaker); +REGISTER_OPERATOR(mul, ops::MulOp, ops::MulOpMaker, ops::MulOpInferVarType, + ops::MulOpGradMaker); REGISTER_OPERATOR(mul_grad, ops::MulGradOp); REGISTER_OP_CPU_KERNEL( mul, ops::MulKernel, diff --git a/paddle/fluid/operators/pool_op.cc b/paddle/fluid/operators/pool_op.cc index 484cb65746612343fafc49fe61b607f2e919cf4f..46a95350a7293c18313811ba9b367fd65955145a 100644 --- a/paddle/fluid/operators/pool_op.cc +++ b/paddle/fluid/operators/pool_op.cc @@ -40,7 +40,7 @@ int PoolOutputSize(int input_size, int filter_size, int padding, int stride, return output_size; } -void PoolOp::InferShape(framework::InferShapeContext *ctx) const { +void PoolOp::InferShape(framework::InferShapeContext* ctx) const { PADDLE_ENFORCE(ctx->HasInput("X"), "X(Input) of Pooling should not be null."); PADDLE_ENFORCE(ctx->HasOutput("Out"), "Out(Output) of Pooling should not be null."); @@ -81,7 +81,7 @@ void PoolOp::InferShape(framework::InferShapeContext *ctx) const { } framework::OpKernelType PoolOp::GetExpectedKernelType( - const framework::ExecutionContext &ctx) const { + const framework::ExecutionContext& ctx) const { framework::LibraryType library_{framework::LibraryType::kPlain}; std::string data_format = ctx.Attr("data_format"); framework::DataLayout layout_ = framework::StringToDataLayout(data_format); @@ -104,7 +104,7 @@ framework::OpKernelType PoolOp::GetExpectedKernelType( layout_, library_); } -void PoolOpGrad::InferShape(framework::InferShapeContext *ctx) const { +void PoolOpGrad::InferShape(framework::InferShapeContext* ctx) const { PADDLE_ENFORCE(ctx->HasInput("X"), "Input(X) must not be null."); PADDLE_ENFORCE(ctx->HasOutput(framework::GradVarName("X")), "Input(X@GRAD) should not be null."); @@ -112,7 +112,7 @@ void PoolOpGrad::InferShape(framework::InferShapeContext *ctx) const { } framework::OpKernelType PoolOpGrad::GetExpectedKernelType( - const framework::ExecutionContext &ctx) const { + const framework::ExecutionContext& ctx) const { framework::LibraryType library_{framework::LibraryType::kPlain}; std::string data_format = ctx.Attr("data_format"); framework::DataLayout layout_ = framework::StringToDataLayout(data_format); @@ -262,6 +262,14 @@ Example: )DOC"); } +class PoolOpInferVarType : public framework::PassInDtypeAndVarTypeToOutput { + protected: + std::unordered_map GetInputOutputWithSameType() + const override { + return std::unordered_map{{"X", /*->*/ "Out"}}; + } +}; + void Pool3dOpMaker::Make() { AddInput("X", "(Tensor) The input tensor of pooling operator. " @@ -372,6 +380,7 @@ Example: namespace ops = paddle::operators; REGISTER_OPERATOR(pool2d, ops::PoolOp, ops::Pool2dOpMaker, + ops::PoolOpInferVarType, paddle::framework::DefaultGradOpDescMaker); REGISTER_OPERATOR(pool2d_grad, ops::PoolOpGrad); @@ -383,6 +392,7 @@ REGISTER_OP_CPU_KERNEL( ops::PoolGradKernel); REGISTER_OPERATOR(pool3d, ops::PoolOp, ops::Pool3dOpMaker, + ops::PoolOpInferVarType, paddle::framework::DefaultGradOpDescMaker); REGISTER_OPERATOR(pool3d_grad, ops::PoolOpGrad); diff --git a/paddle/fluid/operators/similarity_focus_op.cc b/paddle/fluid/operators/similarity_focus_op.cc new file mode 100644 index 0000000000000000000000000000000000000000..9612f82b6d45dc4e08bfe288ddd1c7790875ee4d --- /dev/null +++ b/paddle/fluid/operators/similarity_focus_op.cc @@ -0,0 +1,87 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "paddle/fluid/operators/similarity_focus_op.h" + +namespace paddle { +namespace operators { +class SimilarityFocusOpMaker : public framework::OpProtoAndCheckerMaker { + public: + void Make() override { + AddInput("X", + "(Tensor, default Tensor), a 4-D tensor with shape," + " [BatchSize, X, Y, Z]"); + AddOutput("Out", + "(Tensor, default Tensor), the similarity focus mask" + " with the same shape of input X."); + AddAttr("axis", + "(int32), indicating the dimension to be select. It can" + " only be 1, 2, or 3."); + AddAttr>("indexes", + "(std::vector), indicating the indexes" + " of the selected dimension."); + AddComment(R"DOC( +SimilarityFocus Operator. + +Generate a similarity focus mask with the same shape of input using the following method: +1. Extract the 3-D tensor(here the first dimension is BatchSize) corresponding + to the axis according to the indexes. For example, if axis=1 and indexes=[a], + it will get the matrix T=X[:, a, :, :]. In this case, if the shape of input X + is (BatchSize, A, B, C), the shape of tensor T is (BatchSize, B, C). +2. For each index, find the largest numbers in the tensor T, so that the same + row and same column has at most one number(what it means is that if the + largest number has been found in the i-th row and the j-th column, then + the numbers in the i-th row or j-th column will be skipped. And then the + next largest number will be selected from the remaining numbers. Obviously + there will be min(B, C) numbers), and mark the corresponding position of the + 3-D similarity focus mask as 1, otherwise as 0. Do elementwise-or for + each index. +3. Broadcast the 3-D similarity focus mask to the same shape of input X. + +Refer to `Similarity Focus Layer `_ +)DOC"); + } +}; + +class SimilarityFocusOp : public framework::OperatorWithKernel { + public: + using framework::OperatorWithKernel::OperatorWithKernel; + + void InferShape(framework::InferShapeContext* ctx) const override { + PADDLE_ENFORCE(ctx->HasInput("X"), "Input(X) should be not null."); + PADDLE_ENFORCE(ctx->HasOutput("Out"), "Output(Out) should be not null."); + auto x_dims = ctx->GetInputDim("X"); + PADDLE_ENFORCE_EQ(x_dims.size(), 4, "Input(X)'s rank should be 4."); + ctx->SetOutputDim("Out", x_dims); + ctx->ShareLoD("X", /*->*/ "Out"); + } + + protected: + framework::OpKernelType GetExpectedKernelType( + const framework::ExecutionContext& ctx) const override { + return framework::OpKernelType( + framework::ToDataType(ctx.Input("X")->type()), + platform::CPUPlace()); + } +}; + +} // namespace operators +} // namespace paddle + +namespace ops = paddle::operators; +REGISTER_OPERATOR(similarity_focus, ops::SimilarityFocusOp, + ops::SimilarityFocusOpMaker, + paddle::framework::EmptyGradOpMaker); +REGISTER_OP_CPU_KERNEL(similarity_focus, ops::SimilarityFocusKernel, + ops::SimilarityFocusKernel); diff --git a/paddle/fluid/operators/similarity_focus_op.h b/paddle/fluid/operators/similarity_focus_op.h new file mode 100644 index 0000000000000000000000000000000000000000..bf3fed2aaf2cf92d5619ae5bce6dd70d9dfe9621 --- /dev/null +++ b/paddle/fluid/operators/similarity_focus_op.h @@ -0,0 +1,168 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#pragma once + +#include +#include +#include +#include +#include "paddle/fluid/framework/eigen.h" +#include "paddle/fluid/framework/op_registry.h" + +namespace paddle { +namespace operators { +using Tensor = framework::Tensor; + +template +class SimilarityFocusKernel : public framework::OpKernel { + public: + void Compute(const framework::ExecutionContext& context) const override { + Tensor* out = context.Output("Out"); + const Tensor* x = context.Input("X"); + T* out_data = out->mutable_data(context.GetPlace()); + const T* x_data = x->data(); + + int axis = context.Attr("axis"); + std::vector indexes = context.Attr>("indexes"); + + int64_t batch_size = x->dims()[0]; + int64_t dim[4]; + for (int i = 1; i <= 3; ++i) { + dim[i] = x->dims()[i]; + } + + if (indexes.size() < 1) { + PADDLE_THROW("Indexes' size can not be 0."); + } + for (auto index : indexes) { + if (dim[axis] < index) { + PADDLE_THROW("Index exceeds tensor shape limit."); + } + } + + int64_t array_size = 1; + for (int i = 1; i <= 3; ++i) { + if (i != axis) { + array_size *= dim[i]; + } + } + + std::vector> array(array_size); + + bool (*cmp)(std::pair, std::pair) = []( + std::pair x, std::pair y) { + return x.first > y.first; + }; + + int64_t (*compute_index)(int64_t*, int, int, int, int) = []( + int64_t* dim, int d1, int d2, int d3, int d4) { + return d1 * dim[1] * dim[2] * dim[3] + d2 * dim[2] * dim[3] + + d3 * dim[3] + d4; + }; + + memset(out_data, 0, sizeof(T) * batch_size * dim[1] * dim[2] * dim[3]); + for (int i = 0; i < batch_size; ++i) { + for (auto index : indexes) { + if (axis == 1) { + for (int j = 0; j < dim[2]; ++j) { + for (int k = 0; k < dim[3]; ++k) { + array[j * dim[3] + k] = std::make_pair( + x_data[compute_index(dim, i, index, j, k)], j * dim[3] + k); + } + } + + std::sort(array.begin(), array.end(), cmp); + int tag_num = 0; + std::vector tag2(dim[2]), tag3(dim[3]); + for (auto x : array) { + int idx2 = x.second / dim[3]; + int idx3 = x.second % dim[3]; + if (tag2[idx2] || tag3[idx3]) { + continue; + } + tag_num++; + tag2[idx2] = true; + tag3[idx3] = true; + for (int j = 0; j < dim[1]; ++j) { + out_data[compute_index(dim, i, j, idx2, idx3)] = 1; + } + if (tag_num == std::min(dim[2], dim[3])) { + break; + } + } + } else if (axis == 2) { + for (int j = 0; j < dim[1]; ++j) { + for (int k = 0; k < dim[3]; ++k) { + array[j * dim[3] + k] = std::make_pair( + x_data[compute_index(dim, i, j, index, k)], j * dim[3] + k); + } + } + + std::sort(array.begin(), array.end(), cmp); + int tag_num = 0; + std::vector tag1(dim[1]), tag3(dim[3]); + for (auto x : array) { + int idx1 = x.second / dim[3]; + int idx3 = x.second % dim[3]; + if (tag1[idx1] || tag3[idx3]) { + continue; + } + tag_num++; + tag1[idx1] = true; + tag3[idx3] = true; + for (int j = 0; j < dim[2]; ++j) { + out_data[compute_index(dim, i, idx1, j, idx3)] = 1; + } + if (tag_num == std::min(dim[1], dim[3])) { + break; + } + } + } else if (axis == 3) { + for (int j = 0; j < dim[1]; ++j) { + for (int k = 0; k < dim[2]; ++k) { + array[j * dim[2] + k] = std::make_pair( + x_data[compute_index(dim, i, j, k, index)], j * dim[2] + k); + } + } + + std::sort(array.begin(), array.end(), cmp); + int tag_num = 0; + std::vector tag1(dim[1]), tag2(dim[2]); + for (auto x : array) { + int idx1 = x.second / dim[2]; + int idx2 = x.second % dim[2]; + if (tag1[idx1] || tag2[idx2]) { + continue; + } + tag_num++; + tag1[idx1] = true; + tag2[idx2] = true; + for (int j = 0; j < dim[3]; ++j) { + out_data[compute_index(dim, i, idx1, idx2, j)] = 1; + } + if (tag_num == std::min(dim[1], dim[2])) { + break; + } + } + } else { + PADDLE_THROW("Axis must be 1 or 2 or 3"); + } + } + } + } +}; + +} // namespace operators +} // namespace paddle diff --git a/paddle/fluid/operators/softmax_op.cc b/paddle/fluid/operators/softmax_op.cc index a4bdbe6648afa7c91a056af4737bb5d826229022..9e21b6c824bfd7d1c1090e5ba3ba2f6aa9bdb230 100644 --- a/paddle/fluid/operators/softmax_op.cc +++ b/paddle/fluid/operators/softmax_op.cc @@ -124,6 +124,14 @@ For each row $i$ and each column $j$ in the matrix, we have: } }; +class SoftmaxOpInferVarType : public framework::PassInDtypeAndVarTypeToOutput { + protected: + std::unordered_map GetInputOutputWithSameType() + const override { + return std::unordered_map{{"X", /*->*/ "Out"}}; + } +}; + class SoftmaxOpGrad : public framework::OperatorWithKernel { public: using framework::OperatorWithKernel::OperatorWithKernel; @@ -196,7 +204,7 @@ class SoftmaxOpGradMaker : public framework::SingleGradOpDescMaker { namespace ops = paddle::operators; REGISTER_OPERATOR(softmax, ops::SoftmaxOp, ops::SoftmaxOpMaker, - ops::SoftmaxOpGradMaker); + ops::SoftmaxOpInferVarType, ops::SoftmaxOpGradMaker); REGISTER_OPERATOR(softmax_grad, ops::SoftmaxOpGrad); REGISTER_OP_CPU_KERNEL( softmax, ops::SoftmaxKernel, diff --git a/paddle/fluid/operators/tensor_array_to_tensor_op.cc b/paddle/fluid/operators/tensor_array_to_tensor_op.cc new file mode 100644 index 0000000000000000000000000000000000000000..96dc123f6a36e1a2b6ae04e0d97dffe1e10ac4ea --- /dev/null +++ b/paddle/fluid/operators/tensor_array_to_tensor_op.cc @@ -0,0 +1,246 @@ +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + +http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include +#include + +#include "paddle/fluid/framework/lod_tensor_array.h" +#include "paddle/fluid/framework/op_registry.h" +#include "paddle/fluid/framework/variable.h" + +namespace paddle { +namespace operators { +using framework::Tensor; + +void LodTensorArray2LodTensorVector(const framework::Scope &scope, + const std::string &base_name, + const std::string &lod_tensor_array_name, + std::vector *res_names) { + auto &inx = + scope.FindVar(lod_tensor_array_name)->Get(); + for (size_t i = 0; i < inx.size(); i++) { + std::string var_name = base_name + std::to_string(i); + framework::Variable *g_feed_value = + const_cast(scope).Var(var_name); + auto &feed_input = + *(g_feed_value->GetMutable()); + feed_input.ShareDataWith(inx[i]); + res_names->push_back(var_name); + } +} + +void LodTensorVectorResizeFromLodTensorArray( + const framework::Scope &scope, const std::string &base_name, + const std::string &lod_tensor_array_name, + std::vector *res_names) { + auto &inx = + scope.FindVar(lod_tensor_array_name)->Get(); + for (size_t i = 0; i < inx.size(); i++) { + std::string var_name = base_name + std::to_string(i); + framework::Variable *g_feed_value = + const_cast(scope).Var(var_name); + auto &feed_input = + *(g_feed_value->GetMutable()); + auto dims = inx[i].dims(); + feed_input.Resize(dims); + res_names->push_back(var_name); + } +} + +void LodTensorArrayCreateFromLodTensorArray( + const framework::Scope &scope, + const std::string &input_lod_tensor_array_name, + const std::string &output_lod_tensor_array_name) { + auto &inx = scope.FindVar(input_lod_tensor_array_name) + ->Get(); + auto &grad_inx = *scope.FindVar(output_lod_tensor_array_name) + ->GetMutable(); + + for (size_t i = 0; i < inx.size(); i++) { + std::string var_name = output_lod_tensor_array_name + std::to_string(i); + framework::Variable *g_feed_value = + const_cast(scope).Var(var_name); + auto &feed_input = + *(g_feed_value->GetMutable()); + grad_inx.push_back(feed_input); + } +} + +class LoDTensorArray2TensorOp : public framework::OperatorBase { + public: + using OperatorBase::OperatorBase; + + private: + void RunImpl(const framework::Scope &scope, + const platform::Place &place) const override { + auto axis = Attr("axis"); + + framework::AttributeMap attrs; + attrs["axis"] = axis; + + auto &inx = scope.FindVar(Input("X"))->Get(); + auto &out = + *scope.FindVar(Output("Out"))->GetMutable(); + auto &out_inx = + *scope.FindVar(Output("OutIndex"))->GetMutable(); + + const size_t n = inx.size(); + PADDLE_ENFORCE_GT(n, 0, "Input tensorarray size should > 0."); + + std::string base_name = Inputs("X")[0]; + std::vector names; + + // get the input tensorarray items' dim in out_inx + auto out_inx_dim = out_inx.dims(); + out_inx_dim[0] = inx.size(); + out_inx.Resize(out_inx_dim); + + std::string var_name = "out_index"; + framework::Variable *tmp_index_var = + const_cast(scope).Var(var_name); + auto &tmp_index_tensor = + *(tmp_index_var->GetMutable()); + tmp_index_tensor.Resize(out_inx_dim); + int *tmp_index_data = + tmp_index_tensor.mutable_data(platform::CPUPlace()); + + auto out_dims = inx[0].dims(); + size_t out_dim_sum = 0; + for (size_t index = 0; index < inx.size(); index++) { + auto inx_dims = inx[index].dims(); + out_dim_sum += inx_dims[axis]; + tmp_index_data[index] = inx_dims[axis]; + } + out_inx.ShareDataWith(tmp_index_tensor); + + // get input array items' dims + out_dims[axis] = out_dim_sum; + out.Resize(out_dims); + + LodTensorArray2LodTensorVector(scope, base_name, Input("X"), &names); + // Invoke Reshape Op + auto concat_op = framework::OpRegistry::CreateOp( + "concat", {{"X", names}}, {{"Out", {Output("Out")}}}, attrs); + + concat_op->Run(scope, place); + } +}; + +class LoDTensorArray2TensorOpMaker : public framework::OpProtoAndCheckerMaker { + public: + void Make() override { + AddInput("X", "Input LoDTensorArray of tensor_array_to_tensor operator."); + AddOutput("Out", "Output tensor of tensor_array_to_tensor operator."); + AddOutput("OutIndex", + "Output input LoDTensorArray items' dims of " + "tensor_array_to_tensor operator."); + AddAttr("axis", + "The axis along which the input tensors will be concatenated.") + .SetDefault(0); + AddComment(R"DOC( +tensor_array_to_tensor Operator. + +Concatenate the input LoDTensorArray along dimension axis to the output Tensor. +Examples: + Input = {[1,2], [3,4], [5,6]} + axis = 0 + Output = [[1,2], + [3,4], + [5,6]] + OutputIndex = [1,1,1] + +)DOC"); + } +}; + +class LoDTensorArray2TensorOpInferShape : public framework::InferShapeBase { + public: + void operator()(framework::InferShapeContext *ctx) const override {} +}; + +class LoDTensorArray2TensorGradInferShape : public framework::InferShapeBase { + public: + void operator()(framework::InferShapeContext *context) const override {} +}; + +class LoDTensorArray2TensorGradInferVarType + : public framework::VarTypeInference { + public: + void operator()(const framework::OpDesc &op_desc, + framework::BlockDesc *block) const override { + for (auto &out_var : op_desc.Output(framework::GradVarName("X"))) { + block->Var(out_var)->SetType(framework::proto::VarType::LOD_TENSOR_ARRAY); + } + } +}; + +class LoDTensorArray2TensorGradOp : public framework::OperatorBase { + public: + using OperatorBase::OperatorBase; + + private: + void RunImpl(const framework::Scope &scope, + const platform::Place &place) const override { + auto axis = Attr("axis"); + framework::AttributeMap attrs; + attrs["axis"] = axis; + + auto &inx = scope.FindVar(Input("X"))->Get(); + const size_t n = inx.size(); + PADDLE_ENFORCE_GT(n, 0, "Input tensorarray size should > 0."); + + std::string base_name = Inputs("X")[0]; + std::vector names; + + LodTensorArray2LodTensorVector(scope, base_name, Input("X"), &names); + + // grad + auto dx_name = Output(framework::GradVarName("X")); + auto dout_name = Input(framework::GradVarName("Out")); + + std::vector grad_names; + + LodTensorVectorResizeFromLodTensorArray(scope, "grad_name", Input("X"), + &grad_names); + + auto concat_grad_op = framework::OpRegistry::CreateOp( + "concat_grad", {{"X", names}, {"Out@GRAD", {dout_name}}}, + {{"X@GRAD", grad_names}}, attrs); + + concat_grad_op->Run(scope, place); + + LodTensorArrayCreateFromLodTensorArray(scope, Input("X"), dx_name); + auto &grad_inx = + *scope.FindVar(dx_name)->GetMutable(); + + for (size_t i = 0; i < grad_names.size(); i++) { + std::string var_name = grad_names[i]; + auto &feed_input = scope.FindVar(var_name)->Get(); + grad_inx[i].ShareDataWith(feed_input); + } + } +}; + +} // namespace operators +} // namespace paddle +USE_OP(concat); + +namespace ops = paddle::operators; +REGISTER_OPERATOR(tensor_array_to_tensor, ops::LoDTensorArray2TensorOp, + ops::LoDTensorArray2TensorOpMaker, + ops::LoDTensorArray2TensorOpInferShape, + paddle::framework::DefaultGradOpDescMaker); +REGISTER_OPERATOR(tensor_array_to_tensor_grad, ops::LoDTensorArray2TensorGradOp, + ops::LoDTensorArray2TensorGradInferShape, + ops::LoDTensorArray2TensorGradInferVarType); diff --git a/python/paddle/fluid/__init__.py b/python/paddle/fluid/__init__.py index b79b00846ecd21398bb5b546e757694932f772c2..0b997009bffebb6c7a02c16b643c9d6c49af103b 100644 --- a/python/paddle/fluid/__init__.py +++ b/python/paddle/fluid/__init__.py @@ -34,6 +34,7 @@ from . import regularizer from . import average from . import metrics from . import transpiler +from . import distribute_lookup_table from .param_attr import ParamAttr, WeightNormParamAttr from .data_feeder import DataFeeder from .core import LoDTensor, LoDTensorArray, CPUPlace, CUDAPlace, CUDAPinnedPlace, Scope diff --git a/python/paddle/fluid/distribute_lookup_table.py b/python/paddle/fluid/distribute_lookup_table.py new file mode 100644 index 0000000000000000000000000000000000000000..52d9ce75f8d73eb3c3e8683bc0793e9dd8fbe48d --- /dev/null +++ b/python/paddle/fluid/distribute_lookup_table.py @@ -0,0 +1,39 @@ +# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +LOOKUP_TABLE_TYPE = "lookup_table" + + +def find_distributed_lookup_table(program): + """ + Find distribute lookup table in program. + We only support one distribute table now. + :param program: + :return: table_name or None + """ + table_name = None + + for op in program.global_block().ops: + if op.type == LOOKUP_TABLE_TYPE: + if op.attr('is_distributed') is True: + if table_name is None: + table_name = op.input("W")[0] + if table_name != op.input("W")[0]: + raise RuntimeError("all distributed lookup_table_ops" + " should have only one table") + else: + if table_name is not None: + assert op.input("W")[0] != table_name + + return table_name diff --git a/python/paddle/fluid/layers/nn.py b/python/paddle/fluid/layers/nn.py index 0a39587574c06a093b9f9e407bb82fc28612ff0d..c1a93e831b6df02c2c4cfa34cd498b7b147a8b1b 100644 --- a/python/paddle/fluid/layers/nn.py +++ b/python/paddle/fluid/layers/nn.py @@ -160,6 +160,7 @@ __all__ = [ 'affine_grid', 'sequence_reverse', 'affine_channel', + 'similarity_focus', 'hash', 'grid_sampler', 'log_loss', @@ -7933,6 +7934,118 @@ def affine_channel(x, scale=None, bias=None, data_layout='NCHW', name=None): return out +def similarity_focus(input, axis, indexes, name=None): + """ + SimilarityFocus Operator + + Generate a similarity focus mask with the same shape of input using the following method: + 1. Extract the 3-D tensor(here the first dimension is BatchSize) corresponding + to the axis according to the indexes. For example, if axis=1 and indexes=[a], + it will get the matrix T=X[:, a, :, :]. In this case, if the shape of input X + is (BatchSize, A, B, C), the shape of tensor T is (BatchSize, B, C). + 2. For each index, find the largest numbers in the tensor T, so that the same + row and same column has at most one number(what it means is that if the + largest number has been found in the i-th row and the j-th column, then + the numbers in the i-th row or j-th column will be skipped. And then the + next largest number will be selected from the remaining numbers. Obviously + there will be min(B, C) numbers), and mark the corresponding position of the + 3-D similarity focus mask as 1, otherwise as 0. Do elementwise-or for + each index. + 3. Broadcast the 3-D similarity focus mask to the same shape of input X. + + Refer to `Similarity Focus Layer `_ + + .. code-block:: text + + * Example : + + Given a 4-D tensor x with the shape (BatchSize, C, A, B), where C is + the number of channels and the shape of feature map is (A, B): + x.shape = (2, 3, 2, 2) + x.data = [[[[0.8, 0.1], + [0.4, 0.5]], + + [[0.9, 0.7], + [0.9, 0.9]], + + [[0.8, 0.9], + [0.1, 0.2]]], + + + [[[0.2, 0.5], + [0.3, 0.4]], + + [[0.9, 0.7], + [0.8, 0.4]], + + [[0.0, 0.2], + [0.4, 0.7]]]] + + Given axis: 1 (the axis of the channel) + Given indexes: [0] + + then we get a 4-D tensor out with the same shape of input x: + out.shape = (2, 3, 2, 2) + out.data = [[[[1.0, 0.0], + [0.0, 1.0]], + + [[1.0, 0.0], + [0.0, 1.0]], + + [[1.0, 0.0], + [0.0, 1.0]]], + + [[[0.0, 1.0], + [1.0, 0.0]], + + [[0.0, 1.0], + [1.0, 0.0]], + + [[0.0, 1.0], + [1.0, 0.0]]]] + + Args: + input(Variable): The input tensor variable(default float). It should + be a 4-D tensor with shape [BatchSize, A, B, C]. + axis(int): Indicating the dimension to be selected. It can only be + 1, 2 or 3. + indexes(list): Indicating the indexes of the selected dimension. + + Returns: + Variable: A tensor variable with the same shape and same type + as the input. + + Examples: + .. code-block:: python + data = fluid.layers.data( + name='data', shape=[2, 3, 2, 2], dtype='float32') + x = fluid.layers.layer_norm(input=data, axis=1, indexes=[0]) + """ + helper = LayerHelper('similarity_focus', **locals()) + # check attrs + if isinstance(axis, int) is False: + raise TypeError("axis must be int type.") + if isinstance(indexes, list) is False: + raise TypeError("indexes must be list type.") + if axis != 1 and axis != 2 and axis != 3: + raise ValueError("axis must be 1, 2 or 3.") + if len(indexes) == 0: + raise ValueError("indexes can not be empty.") + + if name is None: + out = helper.create_variable_for_type_inference(dtype=input.dtype) + else: + out = helper.create_variable( + name=name, dtype=input.dtype, persistable=False) + helper.append_op( + type='similarity_focus', + inputs={'X': input}, + outputs={'Out': out}, + attrs={"axis": axis, + "indexes": indexes}) + return out + + def hash(input, hash_size, num_hash=1, name=None): """ Hash the input to an integer whose value is less than the given hash size. diff --git a/python/paddle/fluid/layers/tensor.py b/python/paddle/fluid/layers/tensor.py index 09a7cb8dc9339afa666f8cf09e92a27ffba8a9b3..57e5d197b618615b32a7f446df0a81e18c25b097 100644 --- a/python/paddle/fluid/layers/tensor.py +++ b/python/paddle/fluid/layers/tensor.py @@ -24,10 +24,10 @@ from .layer_function_generator import templatedoc import numpy __all__ = [ - 'create_tensor', 'create_parameter', 'create_global_var', 'cast', 'concat', - 'sums', 'assign', 'fill_constant_batch_size_like', 'fill_constant', - 'argmin', 'argmax', 'argsort', 'ones', 'zeros', 'reverse', 'has_inf', - 'has_nan', 'isfinite' + 'create_tensor', 'create_parameter', 'create_global_var', 'cast', + 'tensor_array_to_tensor', 'concat', 'sums', 'assign', + 'fill_constant_batch_size_like', 'fill_constant', 'argmin', 'argmax', + 'argsort', 'ones', 'zeros', 'reverse', 'has_inf', 'has_nan', 'isfinite' ] @@ -193,6 +193,60 @@ def concat(input, axis=0, name=None): return out +def tensor_array_to_tensor(input, axis=1, name=None): + """ + This function concatenates the input LodTensorArray along the axis mentioned + and returns that as the output. + + A simple example as below: + + .. code-block:: text + + Given: + + input.data = {[[0.6, 0.1, 0.3], + [0.5, 0.3, 0.2]], + [[1.3], + [1.8]], + [[2.3, 2.1], + [2.5, 2.4]]} + + axis = 1 + + Then: + + output.data = [[0.6, 0.1, 0.3, 1.3, 2.3, 2.1], + [0.5, 0.3, 0.2, 1.8, 2.5, 2.4]] + + output_index.data = [3, 1, 2] + + Args: + input(list): Input LodTensorArray + axis(int): Integer axis along which the tensors will be concatenated + name(str|None): A name for this layer(optional). If set None, the layer + will be named automatically. + + Returns: + Variable: Output variable of the concatenation + Variable: The input LodTensorArray items' dims along the axis + + Examples: + .. code-block:: python + + output, output_index = fluid.layers.tensor_array_to_tensor(input=tensor_array) + """ + helper = LayerHelper('tensor_array_concat', **locals()) + out = helper.create_variable_for_type_inference(dtype=helper.input_dtype()) + out_index = helper.create_variable_for_type_inference(dtype="int32") + helper.append_op( + type='tensor_array_concat', + inputs={'X': input}, + outputs={'Out': [out], + 'OutIndex': [out_index]}, + attrs={'axis': axis}) + return out, out_index + + def sums(input, out=None): """ This function performs the sum operation on the input and returns the diff --git a/python/paddle/fluid/optimizer.py b/python/paddle/fluid/optimizer.py index 7e2364a5a872cdd8cf590438cc081ab070db767d..da92826d410505c9a80820f655162dd22e6b5966 100644 --- a/python/paddle/fluid/optimizer.py +++ b/python/paddle/fluid/optimizer.py @@ -13,21 +13,23 @@ # limitations under the License. from __future__ import print_function -import re -import sys + from collections import defaultdict +from contextlib import contextmanager + from paddle.fluid.framework import Program, Variable, name_scope, default_main_program +from paddle.fluid.distribute_lookup_table import find_distributed_lookup_table + from . import framework from . import layers +from . import unique_name from .backward import append_backward +from .clip import append_gradient_clip_ops, error_clip_callback from .framework import program_guard -from . import unique_name from .initializer import Constant from .layer_helper import LayerHelper -from .regularizer import append_regularization_ops -from .clip import append_gradient_clip_ops, error_clip_callback -from contextlib import contextmanager from .layers import ops +from .regularizer import append_regularization_ops __all__ = [ 'SGD', 'Momentum', 'Adagrad', 'Adam', 'Adamax', 'DecayedAdagrad', 'Ftrl', @@ -85,7 +87,7 @@ class Optimizer(object): name=unique_name.generate("learning_rate"), shape=[1], value=float(self._learning_rate), - dtype='float32' if self._dtype == None else self._dtype, + dtype='float32' if self._dtype is None else self._dtype, persistable=True) def _global_learning_rate(self, program=None): @@ -245,6 +247,50 @@ class Optimizer(object): end = len(global_block.ops) return global_block._slice_ops(start, end) + def _process_distribute_lookuptable(self, param_grads, loss, + startup_program): + """ + Because distribute lookup table only support SGD optimizer for now, not support + other optimizer and regularization, so we should find the table parameter out, + and avoid to add regularization and other op for it, and add sgd optimize op + for it independently. + :param param_grads(list((Var, Var))): list of (param, grad) pair. + :param loss: the loss variable. + :param startup_program: the startup program + """ + program = loss.block.program + table_name = find_distributed_lookup_table(program) + table_param = None + table_grad = None + new_param_grads = [] + for p, g in param_grads: + if p.name == table_name: + if table_param is not None: + raise RuntimeError( + "multi dist table var found, only support one now!") + table_param = p + table_grad = g + else: + new_param_grads.append((p, g)) + sgd_op = None + if table_param is not None: + with program_guard(program, startup_program): + param_and_grad = [table_param, table_grad] + with table_param.block.program._optimized_guard(param_and_grad), \ + framework.name_scope("optimizer"): + self._create_global_learning_rate() + # create the optimize op + sgd_op = loss.block.append_op( + type='sgd', + inputs={ + "Param": table_param, + "Grad": table_grad, + "LearningRate": + self._create_param_lr(param_and_grad) + }, + outputs={"ParamOut": param_and_grad[0]}) + return new_param_grads, (table_param, table_grad), sgd_op + def minimize(self, loss, startup_program=None, @@ -260,6 +306,9 @@ class Optimizer(object): params_grads = sorted(params_grads, key=lambda x: x[0].name) + params_grads, table_param_and_grad, table_optimize_op = \ + self._process_distribute_lookuptable(params_grads, loss, startup_program) + params_grads = append_gradient_clip_ops(params_grads) # Add regularization if any @@ -268,6 +317,9 @@ class Optimizer(object): optimize_ops = self._create_optimization_pass(params_grads, loss, startup_program) + if table_optimize_op is not None: + optimize_ops.append(table_optimize_op) + params_grads.append(table_param_and_grad) return optimize_ops, params_grads diff --git a/python/paddle/fluid/tests/book/test_label_semantic_roles.py b/python/paddle/fluid/tests/book/test_label_semantic_roles.py index f63387a90617dc4e9b7c9ee7caa2d01595237a03..42ab9b231153f7ede7b8f8dd4e754f8cc92f65fe 100644 --- a/python/paddle/fluid/tests/book/test_label_semantic_roles.py +++ b/python/paddle/fluid/tests/book/test_label_semantic_roles.py @@ -38,7 +38,7 @@ depth = 8 mix_hidden_lr = 1e-3 IS_SPARSE = True -PASS_NUM = 10 +PASS_NUM = 1 BATCH_SIZE = 10 embedding_name = 'emb' diff --git a/python/paddle/fluid/tests/unittests/test_dist_transpiler.py b/python/paddle/fluid/tests/unittests/test_dist_transpiler.py index 3a5b6b5cb8ee4f83c26a96e868e7c75933d28c15..d132dd3c48f55c07725515e40faeb5076398adeb 100644 --- a/python/paddle/fluid/tests/unittests/test_dist_transpiler.py +++ b/python/paddle/fluid/tests/unittests/test_dist_transpiler.py @@ -567,7 +567,6 @@ class TestDistLookupTable(TestDistLookupTableBase): 'fill_constant', 'fill_constant', 'fill_constant', 'fill_constant', 'fill_constant', 'fill_constant', 'fill_constant', 'fill_constant', 'fill_constant', 'fill_constant', 'fill_constant', 'fill_constant', - 'fill_constant', 'fill_constant', 'fill_constant', 'fill_constant', 'fill_constant', 'fill_constant', 'uniform_random', 'uniform_random', 'recv', 'recv', 'recv', 'fetch_barrier', 'concat', 'fake_init' @@ -639,7 +638,7 @@ class TestAsyncDistLookupTable(TestDistLookupTableBase): # 5 save table self.assertEqual([op.type for op in pserver1.blocks[5].ops], ["save"]) - trainer, _ = self.get_trainer(config) + trainer, trainer_startup = self.get_trainer(config) self.assertEqual(len(trainer.blocks), 1) ops = [ 'split_ids', 'prefetch', 'merge_ids', 'sequence_pool', @@ -653,6 +652,16 @@ class TestAsyncDistLookupTable(TestDistLookupTableBase): 'recv', 'concat' ] self.assertEqual([op.type for op in trainer.blocks[0].ops], ops) + startup_ops = [ + 'fill_constant', 'fill_constant', 'fill_constant', 'fill_constant', + 'fill_constant', 'fill_constant', 'fill_constant', 'fill_constant', + 'fill_constant', 'fill_constant', 'fill_constant', 'fill_constant', + 'fill_constant', 'fill_constant', 'uniform_random', + 'uniform_random', 'recv', 'recv', 'recv', 'fetch_barrier', 'concat', + 'fake_init' + ] + self.assertEqual([op.type for op in trainer_startup.blocks[0].ops], + startup_ops) class TestDistLookupTableSliceSize(TestDistLookupTableBase): diff --git a/python/paddle/fluid/tests/unittests/test_similarity_focus_op.py b/python/paddle/fluid/tests/unittests/test_similarity_focus_op.py new file mode 100755 index 0000000000000000000000000000000000000000..b3833f05f1aa3aac7b5bcc5b6fdc138870cc8844 --- /dev/null +++ b/python/paddle/fluid/tests/unittests/test_similarity_focus_op.py @@ -0,0 +1,217 @@ +# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import print_function + +import unittest +import numpy as np +import paddle.fluid.core as core +from op_test import OpTest + + +class TestSimilarityFocusOp(OpTest): + def setUp(self): + self.op_type = "similarity_focus" + batch_size = 2 + x_dim, y_dim, z_dim = 3, 2, 2 + self.inputs = { + 'X': np.array([[[[0.8, 0.1], [0.4, 0.5]], [[0.9, 0.7], [0.9, 0.9]], + [[0.8, 0.9], [0.1, 0.2]]], + [[[0.2, 0.5], [0.3, 0.4]], [[0.9, 0.7], [0.8, 0.4]], + [[0.0, 0.2], [0.4, 0.7]]]]), + } + self.attrs = { + 'axis': 1, + 'indexes': [0], + } + + output = None + for batch in range(batch_size): + res = np.zeros((1, y_dim, z_dim)).astype("float32").reshape(-1) + for index in self.attrs['indexes']: + channel = self.inputs['X'][batch, index, :, :].reshape(-1).copy( + ) + tag1 = [0 for i in range(y_dim)] + tag2 = [0 for i in range(z_dim)] + cnt = 0 + for i in range(channel.size): + index = channel.argmax() + idx1 = index // z_dim + idx2 = index % z_dim + if tag1[idx1] + tag2[idx2] == 0: + tag1[idx1] = 1 + tag2[idx2] = 1 + res[index] = 1 + cnt += 1 + if cnt == min(y_dim, z_dim): + break + channel[index] = -1 + res = res.reshape(1, y_dim, z_dim).repeat([x_dim], axis=0) + res = res.reshape(1, x_dim, y_dim, z_dim) + if output is not None: + output = np.concatenate((output, res), axis=0) + else: + output = res + self.outputs = {'Out': output} + + def test_check_output(self): + self.check_output() + + +class TestSimilarityFocusOp_axis1(OpTest): + def setUp(self): + self.op_type = "similarity_focus" + batch_size = 3 + x_dim, y_dim, z_dim = 4, 5, 6 + self.inputs = { + 'X': np.random.random( + (batch_size, x_dim, y_dim, z_dim)).astype("float32"), + } + self.attrs = { + 'axis': 1, + 'indexes': [0, 3], + } + + output = None + for batch in range(batch_size): + res = np.zeros((1, y_dim, z_dim)).astype("float32").reshape(-1) + for index in self.attrs['indexes']: + channel = self.inputs['X'][batch, index, :, :].reshape(-1).copy( + ) + tag1 = [0 for i in range(y_dim)] + tag2 = [0 for i in range(z_dim)] + cnt = 0 + for i in range(channel.size): + index = channel.argmax() + idx1 = index // z_dim + idx2 = index % z_dim + if tag1[idx1] + tag2[idx2] == 0: + tag1[idx1] = 1 + tag2[idx2] = 1 + res[index] = 1 + cnt += 1 + if cnt == min(y_dim, z_dim): + break + channel[index] = -1 + res = res.reshape(1, y_dim, z_dim) + res = res.repeat([x_dim], axis=0) + res = res.reshape(1, x_dim, y_dim, z_dim) + if output is not None: + output = np.concatenate((output, res), axis=0) + else: + output = res + self.outputs = {'Out': output} + + def test_check_output(self): + self.check_output() + + +class TestSimilarityFocusOp_axis2(OpTest): + def setUp(self): + self.op_type = "similarity_focus" + batch_size = 6 + x_dim, y_dim, z_dim = 7, 8, 9 + self.inputs = { + 'X': np.random.random( + (batch_size, x_dim, y_dim, z_dim)).astype("float32"), + } + self.attrs = { + 'axis': 2, + 'indexes': [0, 3, 5], + } + + output = None + for batch in range(batch_size): + res = np.zeros((x_dim, 1, z_dim)).astype("float32").reshape(-1) + for index in self.attrs['indexes']: + channel = self.inputs['X'][batch, :, index, :].reshape(-1).copy( + ) + tag1 = [0 for i in range(x_dim)] + tag2 = [0 for i in range(z_dim)] + cnt = 0 + for i in range(channel.size): + index = channel.argmax() + idx1 = index // z_dim + idx2 = index % z_dim + if tag1[idx1] + tag2[idx2] == 0: + tag1[idx1] = 1 + tag2[idx2] = 1 + res[index] = 1 + cnt += 1 + if cnt == min(x_dim, z_dim): + break + channel[index] = -1 + res = res.reshape(x_dim, 1, z_dim) + res = res.repeat([y_dim], axis=1) + res = res.reshape(1, x_dim, y_dim, z_dim) + if output is not None: + output = np.concatenate((output, res), axis=0) + else: + output = res + self.outputs = {'Out': output} + + def test_check_output(self): + self.check_output() + + +class TestSimilarityFocusOp_axis3(OpTest): + def setUp(self): + self.op_type = "similarity_focus" + batch_size = 64 + x_dim, y_dim, z_dim = 48, 48, 13 + self.inputs = { + 'X': np.random.random( + (batch_size, x_dim, y_dim, z_dim)).astype("float32"), + } + self.attrs = { + 'axis': 3, + 'indexes': [0, 2, 7, 9], + } + + output = None + for batch in range(batch_size): + res = np.zeros((x_dim, y_dim, 1)).astype("float32").reshape(-1) + for index in self.attrs['indexes']: + channel = self.inputs['X'][batch, :, :, index].reshape(-1).copy( + ) + tag1 = [0 for i in range(x_dim)] + tag2 = [0 for i in range(y_dim)] + cnt = 0 + for i in range(channel.size): + index = channel.argmax() + idx1 = index // y_dim + idx2 = index % y_dim + if tag1[idx1] + tag2[idx2] == 0: + tag1[idx1] = 1 + tag2[idx2] = 1 + res[index] = 1 + cnt += 1 + if cnt == min(x_dim, y_dim): + break + channel[index] = -1 + res = res.reshape(x_dim, y_dim, 1) + res = res.repeat([z_dim], axis=2) + res = res.reshape(1, x_dim, y_dim, z_dim) + if output is not None: + output = np.concatenate((output, res), axis=0) + else: + output = res + self.outputs = {'Out': output} + + def test_check_output(self): + self.check_output() + + +if __name__ == "__main__": + unittest.main() diff --git a/python/paddle/fluid/tests/unittests/test_tensor_array_to_tensor.py b/python/paddle/fluid/tests/unittests/test_tensor_array_to_tensor.py new file mode 100644 index 0000000000000000000000000000000000000000..78b95de7e07b1d1fcdeeae63498e740c2b474c6d --- /dev/null +++ b/python/paddle/fluid/tests/unittests/test_tensor_array_to_tensor.py @@ -0,0 +1,142 @@ +# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import print_function + +import unittest +import numpy +import paddle.fluid as fluid +import paddle.fluid.core as core +from paddle.fluid.op import Operator +from paddle.fluid.executor import Executor + + +class TestLoDTensorArrayConcat(unittest.TestCase): + def setUp(self): + self.op_type = "tensor_array_to_tensor" + self.attrs = {"axis": 0} + self.outputs = ["Out"] + + def test_get_set(self): + scope = core.Scope() + program = fluid.Program() + block = program.global_block() + + input_arr = block.create_var( + name="tmp_lod_tensor_array", + type=core.VarDesc.VarType.LOD_TENSOR_ARRAY) + input_arr.persistable = True + input_arr_var = scope.var('tmp_lod_tensor_array') + input_tensor_array = input_arr_var.get_lod_tensor_array() + self.assertEqual(0, len(input_tensor_array)) + + cpu = core.CPUPlace() + for i in range(10): + t = core.LoDTensor() + if i == 0: + t.set(numpy.array([[i], [i]], dtype='float32'), cpu) + else: + t.set(numpy.array([[i]], dtype='float32'), cpu) + input_tensor_array.append(t) + + self.assertEqual(10, len(input_tensor_array)) + + random_grad = numpy.random.random_sample([11]).astype(numpy.float32) + + y_out = block.create_var(name="Out") + y_out.persistable = True + y_out_index = block.create_var(name="OutIndex") + y_out_index.persistable = True + + y_grad_arr = block.create_var( + name='Out@GRAD', dtype='float32', shape=[11]) + y_grad_arr.persistable = True + y_grad = scope.var('Out@GRAD') + y_grad_tensor = y_grad.get_tensor() + y_grad_tensor.set(random_grad, cpu) + + op = block.append_op( + type=self.op_type, + inputs={"X": input_arr}, + outputs={"Out": y_out, + "OutIndex": y_out_index}, + attrs=self.attrs) + + out_grad = block.create_var( + name="tmp_lod_tensor_array@GRAD", + type=core.VarDesc.VarType.LOD_TENSOR_ARRAY) + out_grad.persistable = True + + grad_op_desc_list, op_grad_to_var = core.get_grad_op_desc(op.desc, + set(), []) + grad_op_desc = grad_op_desc_list[0] + new_op_desc = block.desc.append_op() + new_op_desc.copy_from(grad_op_desc) + for var_name in grad_op_desc.output_arg_names(): + block.desc.var(var_name.encode("ascii")) + + grad_op_desc.infer_var_type(block.desc) + grad_op_desc.infer_shape(block.desc) + for arg in grad_op_desc.output_arg_names(): + grad_var = block.desc.find_var(arg.encode("ascii")) + grad_var.set_dtype(core.VarDesc.VarType.FP32) + + fetch_list = [] + fetch_list.append(block.var('Out')) + fetch_list.append(block.var('OutIndex')) + + exe = fluid.Executor(fluid.CPUPlace()) + out = exe.run(program, fetch_list=fetch_list, scope=scope) + #print ("index: ", numpy.array(out[1])) + + # test forward + tensor_res = numpy.array(out[0]) + tensor_res_out_idx = numpy.array(out[1]) + tensor_gt = numpy.array( + [0] + [0, 1, 2, 3, 4, 5, 6, 7, 8, 9], dtype='float32') + + self.assertEqual(len(tensor_res), len(tensor_gt)) + self.assertEqual(len(tensor_res_out_idx), 10) + + for i in range(len(tensor_res)): + self.assertEqual(tensor_res[i], tensor_gt[i]) + + for i in range(len(tensor_res_out_idx)): + if i == 0: + self.assertEqual(tensor_res_out_idx[i], 2) + else: + self.assertEqual(tensor_res_out_idx[i], 1) + + # test backward + grad_tensor = scope.var('tmp_lod_tensor_array@GRAD') + grad_tensor_array = grad_tensor.get_lod_tensor_array() + + self.assertEqual(10, len(grad_tensor_array)) + + for i in range(len(grad_tensor_array)): + if i == 0: + self.assertEqual( + numpy.array(grad_tensor_array[i])[0], + numpy.array(random_grad[i])) + self.assertEqual( + numpy.array(grad_tensor_array[i])[1], + numpy.array(random_grad[i + 1])) + if i == 1: + self.assertEqual( + numpy.array(grad_tensor_array[i]), + numpy.array(random_grad[i + 1])) + + +if __name__ == '__main__': + unittest.main() diff --git a/python/paddle/fluid/transpiler/distribute_transpiler.py b/python/paddle/fluid/transpiler/distribute_transpiler.py index 094eaeb59ce7ab73012f6e6a5fc24778933270c1..89bc24802751340b6d4657be8673d714f3d3dc2b 100644 --- a/python/paddle/fluid/transpiler/distribute_transpiler.py +++ b/python/paddle/fluid/transpiler/distribute_transpiler.py @@ -31,18 +31,17 @@ Steps to transpile pserver: """ import math -import sys import numpy as np import collections -import six import logging -from .ps_dispatcher import RoundRobin, HashName, PSDispatcher +from .ps_dispatcher import RoundRobin, PSDispatcher from .. import core, framework, unique_name from ..framework import Program, default_main_program, \ default_startup_program, Block, \ Parameter, grad_var_name from .details import * +from ..distribute_lookup_table import find_distributed_lookup_table from functools import reduce LOOKUP_TABLE_TYPE = "lookup_table" @@ -292,7 +291,8 @@ class DistributeTranspiler(object): self.optimize_ops, self.params_grads = self._get_optimize_pass() ps_dispatcher = self.config.split_method(self.pserver_endpoints) - self.has_distributed_lookup_table = self._has_distributed_lookup_table() + self.table_name = find_distributed_lookup_table(self.origin_program) + self.has_distributed_lookup_table = self.table_name != None self.param_name_to_grad_name = dict() self.grad_name_to_param_name = dict() for param_var, grad_var in self.params_grads: @@ -966,28 +966,6 @@ to transpile() call.") # ====================== private transpiler functions ===================== - def _has_distributed_lookup_table(self): - # process lookup_table_op - # 1. check all lookup_table_op is distributed - # 2. check all lookup_table_op share the same table. - distributed_lookup_table_ops = [] - # support only one distributed_lookup_table now - self.table_name = None - for op in self.origin_program.global_block().ops: - if op.type == LOOKUP_TABLE_TYPE: - if op.attr('is_distributed') is True: - if self.table_name is None: - self.table_name = op.input("W")[0] - if self.table_name != op.input("W")[0]: - raise RuntimeError("all distributed lookup_table_ops" - " should have only one table") - distributed_lookup_table_ops.append(op) - else: - if self.table_name is not None: - assert op.input("W")[0] != self.table_name - - return len(distributed_lookup_table_ops) > 0 - def _update_dist_lookup_table_vars(self, param_list, grad_list, params_grads): # TODO(wuyi): put find a way to put dist lookup table stuff all together. @@ -1341,7 +1319,6 @@ to transpile() call.") """ create a new block to handle save checkpoint. """ - import os pserver_program.global_block().create_var( name="kLookupTablePath",