diff --git a/cmake/configure.cmake b/cmake/configure.cmake index ce1857582bd3e8ab3077158384beaae36a83a4b2..e9852f00b1835adec31373f58ac538f9685251e0 100644 --- a/cmake/configure.cmake +++ b/cmake/configure.cmake @@ -62,8 +62,26 @@ if(NOT CMAKE_CROSSCOMPILING) endif() if(WIN32) - # windows stupid compile option for all targets. + # windows header option for all targets. add_definitions(-D_XKEYCHECK_H) + # Use symbols instead of absolute path, reduce the cmake link command length. + SET(CMAKE_C_USE_RESPONSE_FILE_FOR_LIBRARIES 1) + SET(CMAKE_CXX_USE_RESPONSE_FILE_FOR_LIBRARIES 1) + SET(CMAKE_C_USE_RESPONSE_FILE_FOR_OBJECTS 1) + SET(CMAKE_CXX_USE_RESPONSE_FILE_FOR_OBJECTS 1) + SET(CMAKE_C_USE_RESPONSE_FILE_FOR_INCLUDES 1) + SET(CMAKE_CXX_USE_RESPONSE_FILE_FOR_INCLUDES 1) + SET(CMAKE_C_RESPONSE_FILE_LINK_FLAG "@") + SET(CMAKE_CXX_RESPONSE_FILE_LINK_FLAG "@") + + # Specify the program to use when building static libraries + SET(CMAKE_C_CREATE_STATIC_LIBRARY " lib ") + SET(CMAKE_CXX_CREATE_STATIC_LIBRARY " lib ") + + # set defination for the dll export + if (NOT MSVC) + message(FATAL "Windows build only support msvc. Which was binded by the nvcc compiler of NVIDIA.") + endif(NOT MSVC) endif(WIN32) if(NOT WITH_GOLANG) diff --git a/cmake/flags.cmake b/cmake/flags.cmake index e0556a0babc74ba6efa0a190d4f7b77416bef3bf..331b2af367bdf261ffbf96fb88f61cc6958ee647 100644 --- a/cmake/flags.cmake +++ b/cmake/flags.cmake @@ -27,7 +27,6 @@ endfunction() CheckCompilerCXX11Flag() set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11") - # safe_set_flag # # Set a compile flag only if compiler is support @@ -71,6 +70,20 @@ macro(safe_set_nvflag flag_name) endif() endmacro() +macro(safe_set_static_flag) # set c_flags and cxx_flags to static or shared + if (BUILD_SHARED_LIBS) + return() # if build shared libs, the flags keep same with '/MD' + endif(BUILD_SHARED_LIBS) + foreach(flag_var + CMAKE_CXX_FLAGS CMAKE_CXX_FLAGS_DEBUG CMAKE_CXX_FLAGS_RELEASE + CMAKE_CXX_FLAGS_MINSIZEREL CMAKE_CXX_FLAGS_RELWITHDEBINFO + CMAKE_C_FLAGS CMAKE_C_FLAGS_DEBUG CMAKE_C_FLAGS_RELEASE + CMAKE_C_FLAGS_MINSIZEREL CMAKE_C_FLAGS_RELWITHDEBINFO) + if(${flag_var} MATCHES "/MD") + string(REGEX REPLACE "/MD" "/MT" ${flag_var} "${${flag_var}}") + endif(${flag_var} MATCHES "/MD") + endforeach(flag_var) +endmacro() CHECK_CXX_SYMBOL_EXISTS(UINT64_MAX "stdint.h" UINT64_MAX_EXISTS) if(NOT UINT64_MAX_EXISTS) @@ -97,9 +110,13 @@ SET(CMAKE_EXTRA_INCLUDE_FILES "") # Common flags. the compiler flag used for C/C++ sources whenever release or debug # Do not care if this flag is support for gcc. + +# https://github.com/PaddlePaddle/Paddle/issues/12773 +if (NOT WIN32) set(COMMON_FLAGS -fPIC -fno-omit-frame-pointer + -Werror -Wall -Wextra -Wnon-virtual-dtor @@ -114,11 +131,6 @@ set(COMMON_FLAGS -Wno-error=terminate # Warning in PADDLE_ENFORCE ) -# https://github.com/PaddlePaddle/Paddle/issues/12773 -if (NOT WIN32) -list(APPEND COMMON_FLAGS -Werror) -endif() - set(GPU_COMMON_FLAGS -fPIC -fno-omit-frame-pointer @@ -133,30 +145,53 @@ set(GPU_COMMON_FLAGS -Wno-error=array-bounds # Warnings in Eigen::array ) +else(NOT WIN32) +set(COMMON_FLAGS + "/w") #disable all warnings. +set(GPU_COMMON_FLAGS + "/w") #disable all warnings +endif(NOT WIN32) + if (APPLE) if(NOT CMAKE_CROSSCOMPILING) # On Mac OS X build fat binaries with x86_64 architectures by default. set (CMAKE_OSX_ARCHITECTURES "x86_64" CACHE STRING "Build architectures for OSX" FORCE) endif() -else() +endif(APPLE) + +if(LINUX) set(GPU_COMMON_FLAGS -Wall -Wextra -Werror ${GPU_COMMON_FLAGS}) -endif() +endif(LINUX) if(UNIX AND NOT APPLE) # except apple from nix*Os family set(LINUX TRUE) endif(UNIX AND NOT APPLE) - foreach(flag ${COMMON_FLAGS}) safe_set_cflag(CMAKE_C_FLAGS ${flag}) safe_set_cxxflag(CMAKE_CXX_FLAGS ${flag}) + endforeach() foreach(flag ${GPU_COMMON_FLAGS}) safe_set_nvflag(${flag}) endforeach() + +if(WIN32) +# windows build turn off warnings. +safe_set_static_flag() + foreach(flag_var + CMAKE_CXX_FLAGS CMAKE_CXX_FLAGS_DEBUG CMAKE_CXX_FLAGS_RELEASE + CMAKE_CXX_FLAGS_MINSIZEREL CMAKE_CXX_FLAGS_RELWITHDEBINFO + CMAKE_C_FLAGS CMAKE_C_FLAGS_DEBUG CMAKE_C_FLAGS_RELEASE + CMAKE_C_FLAGS_MINSIZEREL CMAKE_C_FLAGS_RELWITHDEBINFO) + if(${flag_var} MATCHES "/W3") + string(REGEX REPLACE "/W3" "/w" ${flag_var} "${${flag_var}}") + endif(${flag_var} MATCHES "/W3") + endforeach(flag_var) +endif(WIN32) diff --git a/paddle/contrib/float16/float16_transpiler.py b/paddle/contrib/float16/float16_transpiler.py index 66e0345c299730c113ffbdc8dd3c1fa32f872f3d..8d95dc0591e1d6bd815cc697528191c2ee8c1cfe 100644 --- a/paddle/contrib/float16/float16_transpiler.py +++ b/paddle/contrib/float16/float16_transpiler.py @@ -102,8 +102,8 @@ class Float16Transpiler: continue for input_arg in current_op.input_arg_names: if input_arg in self.input_map: - current_op.rename_input(input_arg, - self.input_map[input_arg]) + current_op._rename_input(input_arg, + self.input_map[input_arg]) def _remove_unused_var(self): ''' @@ -187,7 +187,7 @@ class Float16Transpiler: shape=var.shape, persistable=var.persistable) find_op(var) - var.op.rename_output(var_name, tmp_var_name) + var.op._rename_output(var_name, tmp_var_name) self.block._insert_op( i, type="cast", diff --git a/paddle/fluid/API.spec b/paddle/fluid/API.spec index 98ff3da108cb713cd6d6c1a429b7049156949543..130558b09165dbe31f2062c2aebbab0a84d56b24 100644 --- a/paddle/fluid/API.spec +++ b/paddle/fluid/API.spec @@ -6,26 +6,9 @@ paddle.fluid.Program.global_block ArgSpec(args=['self'], varargs=None, keywords= paddle.fluid.Program.list_vars ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None) paddle.fluid.Program.parse_from_string ArgSpec(args=['binary_str'], varargs=None, keywords=None, defaults=None) paddle.fluid.Program.to_string ArgSpec(args=['self', 'throw_on_error', 'with_details'], varargs=None, keywords=None, defaults=(False,)) -paddle.fluid.Operator.__init__ ArgSpec(args=['self', 'block', 'desc', 'type', 'inputs', 'outputs', 'attrs'], varargs=None, keywords=None, defaults=(None, None, None, None)) -paddle.fluid.Operator.all_attrs ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None) -paddle.fluid.Operator.attr ArgSpec(args=['self', 'name'], varargs=None, keywords=None, defaults=None) -paddle.fluid.Operator.attr_type ArgSpec(args=['self', 'name'], varargs=None, keywords=None, defaults=None) -paddle.fluid.Operator.block_attr ArgSpec(args=['self', 'name'], varargs=None, keywords=None, defaults=None) -paddle.fluid.Operator.block_attr_id ArgSpec(args=['self', 'name'], varargs=None, keywords=None, defaults=None) -paddle.fluid.Operator.blocks_attr ArgSpec(args=['self', 'name'], varargs=None, keywords=None, defaults=None) -paddle.fluid.Operator.blocks_attr_ids ArgSpec(args=['self', 'name'], varargs=None, keywords=None, defaults=None) -paddle.fluid.Operator.has_attr ArgSpec(args=['self', 'name'], varargs=None, keywords=None, defaults=None) -paddle.fluid.Operator.has_kernel ArgSpec(args=['self', 'op_type'], varargs=None, keywords=None, defaults=None) -paddle.fluid.Operator.input ArgSpec(args=['self', 'name'], varargs=None, keywords=None, defaults=None) -paddle.fluid.Operator.output ArgSpec(args=['self', 'name'], varargs=None, keywords=None, defaults=None) -paddle.fluid.Operator.rename_input ArgSpec(args=['self', 'old_name', 'new_name'], varargs=None, keywords=None, defaults=None) -paddle.fluid.Operator.rename_output ArgSpec(args=['self', 'old_name', 'new_name'], varargs=None, keywords=None, defaults=None) -paddle.fluid.Operator.set_attr ArgSpec(args=['self', 'name', 'val'], varargs=None, keywords=None, defaults=None) -paddle.fluid.Operator.to_string ArgSpec(args=['self', 'throw_on_error'], varargs=None, keywords=None, defaults=None) paddle.fluid.default_startup_program ArgSpec(args=[], varargs=None, keywords=None, defaults=None) paddle.fluid.default_main_program ArgSpec(args=[], varargs=None, keywords=None, defaults=None) paddle.fluid.program_guard ArgSpec(args=[], varargs='args', keywords='kwds', defaults=None) -paddle.fluid.get_var ArgSpec(args=['name', 'program'], varargs=None, keywords=None, defaults=(None,)) paddle.fluid.name_scope ArgSpec(args=[], varargs='args', keywords='kwds', defaults=None) paddle.fluid.Executor.__init__ ArgSpec(args=['self', 'place'], varargs=None, keywords=None, defaults=None) paddle.fluid.Executor.close ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None) @@ -41,7 +24,7 @@ paddle.fluid.DistributeTranspiler.transpile ArgSpec(args=['self', 'trainer_id', paddle.fluid.memory_optimize ArgSpec(args=['input_program', 'skip_opt_set', 'print_log', 'level'], varargs=None, keywords=None, defaults=(None, False, 0)) paddle.fluid.release_memory ArgSpec(args=['input_program', 'skip_opt_set'], varargs=None, keywords=None, defaults=(None,)) paddle.fluid.DistributeTranspilerConfig.__init__ -paddle.fluid.ParallelExecutor.__init__ ArgSpec(args=['self', 'use_cuda', 'loss_name', 'main_program', 'share_vars_from', 'exec_strategy', 'build_strategy', 'num_trainers', 'trainer_id', 'scope'], varargs=None, keywords='kwargs', defaults=(None, None, None, None, None, 1, 0, None)) +paddle.fluid.ParallelExecutor.__init__ ArgSpec(args=['self', 'use_cuda', 'loss_name', 'main_program', 'share_vars_from', 'exec_strategy', 'build_strategy', 'num_trainers', 'trainer_id', 'scope'], varargs=None, keywords=None, defaults=(None, None, None, None, None, 1, 0, None)) paddle.fluid.ParallelExecutor.run ArgSpec(args=['self', 'fetch_list', 'feed', 'feed_dict', 'return_numpy'], varargs=None, keywords=None, defaults=(None, None, True)) paddle.fluid.ExecutionStrategy.__init__ __init__(self: paddle.fluid.core.ExecutionStrategy) -> None paddle.fluid.BuildStrategy.GradientScaleStrategy.__init__ __init__(self: paddle.fluid.core.GradientScaleStrategy, arg0: int) -> None @@ -162,14 +145,14 @@ paddle.fluid.layers.unstack ArgSpec(args=['x', 'axis', 'num'], varargs=None, key paddle.fluid.layers.sequence_enumerate ArgSpec(args=['input', 'win_size', 'pad_value', 'name'], varargs=None, keywords=None, defaults=(0, None)) paddle.fluid.layers.expand ArgSpec(args=['x', 'expand_times', 'name'], varargs=None, keywords=None, defaults=(None,)) paddle.fluid.layers.sequence_concat ArgSpec(args=['input', 'name'], varargs=None, keywords=None, defaults=(None,)) -paddle.fluid.layers.scale ArgSpec(args=['x', 'scale', 'bias', 'bias_after_scale', 'act', 'name'], varargs=None, keywords=None, defaults=(1.0, 0.0, True, None, None)) -paddle.fluid.layers.elementwise_add ArgSpec(args=['x', 'y', 'axis', 'use_mkldnn', 'act', 'name'], varargs=None, keywords=None, defaults=(-1, False, None, None)) -paddle.fluid.layers.elementwise_div ArgSpec(args=['x', 'y', 'axis', 'use_mkldnn', 'act', 'name'], varargs=None, keywords=None, defaults=(-1, False, None, None)) -paddle.fluid.layers.elementwise_sub ArgSpec(args=['x', 'y', 'axis', 'use_mkldnn', 'act', 'name'], varargs=None, keywords=None, defaults=(-1, False, None, None)) -paddle.fluid.layers.elementwise_mul ArgSpec(args=['x', 'y', 'axis', 'use_mkldnn', 'act', 'name'], varargs=None, keywords=None, defaults=(-1, False, None, None)) -paddle.fluid.layers.elementwise_max ArgSpec(args=['x', 'y', 'axis', 'use_mkldnn', 'act', 'name'], varargs=None, keywords=None, defaults=(-1, False, None, None)) -paddle.fluid.layers.elementwise_min ArgSpec(args=['x', 'y', 'axis', 'use_mkldnn', 'act', 'name'], varargs=None, keywords=None, defaults=(-1, False, None, None)) -paddle.fluid.layers.elementwise_pow ArgSpec(args=['x', 'y', 'axis', 'use_mkldnn', 'act', 'name'], varargs=None, keywords=None, defaults=(-1, False, None, None)) +paddle.fluid.layers.scale ArgSpec(args=['x', 'scale', 'bias', 'bias_after_scale', 'out', 'act', 'name'], varargs=None, keywords=None, defaults=(1.0, 0.0, True, None, None, None)) +paddle.fluid.layers.elementwise_add ArgSpec(args=['x', 'y', 'out', 'axis', 'use_mkldnn', 'act', 'name'], varargs=None, keywords=None, defaults=(None, -1, False, None, None)) +paddle.fluid.layers.elementwise_div ArgSpec(args=['x', 'y', 'out', 'axis', 'use_mkldnn', 'act', 'name'], varargs=None, keywords=None, defaults=(None, -1, False, None, None)) +paddle.fluid.layers.elementwise_sub ArgSpec(args=['x', 'y', 'out', 'axis', 'use_mkldnn', 'act', 'name'], varargs=None, keywords=None, defaults=(None, -1, False, None, None)) +paddle.fluid.layers.elementwise_mul ArgSpec(args=['x', 'y', 'out', 'axis', 'use_mkldnn', 'act', 'name'], varargs=None, keywords=None, defaults=(None, -1, False, None, None)) +paddle.fluid.layers.elementwise_max ArgSpec(args=['x', 'y', 'out', 'axis', 'use_mkldnn', 'act', 'name'], varargs=None, keywords=None, defaults=(None, -1, False, None, None)) +paddle.fluid.layers.elementwise_min ArgSpec(args=['x', 'y', 'out', 'axis', 'use_mkldnn', 'act', 'name'], varargs=None, keywords=None, defaults=(None, -1, False, None, None)) +paddle.fluid.layers.elementwise_pow ArgSpec(args=['x', 'y', 'out', 'axis', 'use_mkldnn', 'act', 'name'], varargs=None, keywords=None, defaults=(None, -1, False, None, None)) paddle.fluid.layers.data ArgSpec(args=['name', 'shape', 'append_batch_size', 'dtype', 'lod_level', 'type', 'stop_gradient'], varargs=None, keywords=None, defaults=(True, 'float32', 0, VarType.LOD_TENSOR, True)) paddle.fluid.layers.open_files ArgSpec(args=['filenames', 'shapes', 'lod_levels', 'dtypes', 'thread_num', 'buffer_size', 'pass_num', 'is_test'], varargs=None, keywords=None, defaults=(None, None, 1, None)) paddle.fluid.layers.read_file ArgSpec(args=['reader'], varargs=None, keywords=None, defaults=None) @@ -374,7 +357,7 @@ paddle.fluid.CPUPlace.__init__ __init__(self: paddle.fluid.core.CPUPlace) -> Non paddle.fluid.CUDAPlace.__init__ __init__(self: paddle.fluid.core.CUDAPlace, arg0: int) -> None paddle.fluid.CUDAPinnedPlace.__init__ __init__(self: paddle.fluid.core.CUDAPinnedPlace) -> None paddle.fluid.ParamAttr.__init__ ArgSpec(args=['self', 'name', 'initializer', 'learning_rate', 'regularizer', 'trainable', 'gradient_clip', 'do_model_average'], varargs=None, keywords=None, defaults=(None, None, 1.0, None, True, None, False)) -paddle.fluid.WeightNormParamAttr.__init__ ArgSpec(args=['self', 'dim'], varargs=None, keywords='kwargs', defaults=(None,)) +paddle.fluid.WeightNormParamAttr.__init__ ArgSpec(args=['self', 'dim', 'name', 'initializer', 'learning_rate', 'regularizer', 'trainable', 'gradient_clip', 'do_model_average'], varargs=None, keywords=None, defaults=(None, None, None, 1.0, None, True, None, False)) paddle.fluid.DataFeeder.__init__ ArgSpec(args=['self', 'feed_list', 'place', 'program'], varargs=None, keywords=None, defaults=(None,)) paddle.fluid.DataFeeder.decorate_reader ArgSpec(args=['self', 'reader', 'multi_devices', 'num_places', 'drop_last'], varargs=None, keywords=None, defaults=(None, True)) paddle.fluid.DataFeeder.feed ArgSpec(args=['self', 'iterable'], varargs=None, keywords=None, defaults=None) diff --git a/paddle/fluid/CMakeLists.txt b/paddle/fluid/CMakeLists.txt index ee1f655e25dedb8846bb26275072fd9f6c1f123e..519a00fb073b08f6c88de8186de187476b548fd3 100644 --- a/paddle/fluid/CMakeLists.txt +++ b/paddle/fluid/CMakeLists.txt @@ -13,3 +13,5 @@ if(WITH_INFERENCE) # NOTE: please add subdirectory inference at last. add_subdirectory(inference) endif() + +add_subdirectory(train) diff --git a/paddle/fluid/framework/ir/conv_relu_mkldnn_fuse_pass.cc b/paddle/fluid/framework/ir/conv_relu_mkldnn_fuse_pass.cc index 09c5ec59d66445bdbd5349447b125be89cb2efdf..d7df6389cfd595324e284e0da10f65213ccee80f 100644 --- a/paddle/fluid/framework/ir/conv_relu_mkldnn_fuse_pass.cc +++ b/paddle/fluid/framework/ir/conv_relu_mkldnn_fuse_pass.cc @@ -26,8 +26,6 @@ std::unique_ptr ConvReLUFusePass::ApplyImpl( PADDLE_ENFORCE(graph.get()); FusePassBase::Init("conv_relu_mkldnn_fuse", graph.get()); - std::unordered_set nodes2delete; - GraphPatternDetector gpd; auto* conv_input = gpd.mutable_pattern() ->NewNode("conv_relu_mkldnn_fuse/conv_input") @@ -42,36 +40,20 @@ std::unique_ptr ConvReLUFusePass::ApplyImpl( Graph* g) { VLOG(4) << "handle ConvReLU fuse"; GET_IR_NODE_FROM_SUBGRAPH(conv_weight, conv_weight, - conv_relu_pattern); // Filter - GET_IR_NODE_FROM_SUBGRAPH(conv_bias, conv_bias, conv_relu_pattern); // Bias - GET_IR_NODE_FROM_SUBGRAPH(conv_out, conv_out, conv_relu_pattern); // tmp + conv_relu_pattern); // Filter + GET_IR_NODE_FROM_SUBGRAPH(conv_out, conv_out, conv_relu_pattern); // tmp GET_IR_NODE_FROM_SUBGRAPH(conv, conv, conv_relu_pattern); // CONV op GET_IR_NODE_FROM_SUBGRAPH(relu_out, relu_out, conv_relu_pattern); // Out GET_IR_NODE_FROM_SUBGRAPH(relu, relu, conv_relu_pattern); // ReLU op - // Create an ConvReLU Node. - OpDesc desc; - std::string conv_relu_i_in = subgraph.at(conv_input)->Name(); - std::string conv_relu_w_in = conv_weight->Name(); - std::string conv_relu_b_in = conv_bias->Name(); - std::string conv_relu_out = relu_out->Name(); - desc.SetInput("Input", std::vector({conv_relu_i_in})); - desc.SetInput("Filter", std::vector({conv_relu_w_in})); - desc.SetInput("Bias", std::vector({conv_relu_b_in})); - desc.SetOutput("Output", std::vector({conv_relu_out})); - desc.SetType("conv2d"); - for (auto& attr : conv->Op()->GetAttrMap()) { - desc.SetAttr(attr.first, attr.second); - } - desc.SetAttr("fuse_relu", true); - auto conv_relu_node = g->CreateOpNode(&desc); // OpDesc will be copied. - GraphSafeRemoveNodes(graph.get(), {conv, relu, conv_out}); + // Transform Conv node into ConvReLU node. + OpDesc* desc = conv->Op(); + desc->SetOutput("Output", std::vector({relu_out->Name()})); + desc->SetAttr("fuse_relu", true); + GraphSafeRemoveNodes(graph.get(), {relu, conv_out}); PADDLE_ENFORCE(subgraph.count(conv_input)); - IR_NODE_LINK_TO(subgraph.at(conv_input), conv_relu_node); - IR_NODE_LINK_TO(conv_weight, conv_relu_node); - IR_NODE_LINK_TO(conv_bias, conv_relu_node); - IR_NODE_LINK_TO(conv_relu_node, relu_out); + IR_NODE_LINK_TO(conv, relu_out); found_conv_relu_count++; }; diff --git a/paddle/fluid/framework/ir/conv_relu_mkldnn_fuse_pass_tester.cc b/paddle/fluid/framework/ir/conv_relu_mkldnn_fuse_pass_tester.cc index 82b5fa1886098ca3b19c147c307d3f2fc3ba03d6..9dd780ec89ab991d6d99cb66fa2a9b683be2b9ca 100644 --- a/paddle/fluid/framework/ir/conv_relu_mkldnn_fuse_pass_tester.cc +++ b/paddle/fluid/framework/ir/conv_relu_mkldnn_fuse_pass_tester.cc @@ -85,16 +85,13 @@ TEST(ConvReLUFusePass, basic) { for (auto* node : graph->Nodes()) { if (node->IsOp() && node->Op()->Type() == "conv2d") { - if (node->Op()->HasAttr("use_mkldnn")) { - bool use_mkldnn = boost::get(node->Op()->GetAttr("use_mkldnn")); - if (use_mkldnn) { - if (node->Op()->HasAttr("fuse_relu")) { - bool fuse_relu = boost::get(node->Op()->GetAttr("fuse_relu")); - if (fuse_relu) { - ++conv_relu_count; - } - } - } + auto* op = node->Op(); + ASSERT_TRUE(op->HasAttr("use_mkldnn")); + EXPECT_TRUE(boost::get(op->GetAttr("use_mkldnn"))); + ASSERT_TRUE(op->HasAttr("fuse_relu")); + bool fuse_relu = boost::get(op->GetAttr("fuse_relu")); + if (fuse_relu) { + ++conv_relu_count; } } } diff --git a/paddle/fluid/framework/ir/graph_pattern_detector.cc b/paddle/fluid/framework/ir/graph_pattern_detector.cc index ef5113819696238d4e06b826bf43064b0f368dea..6d2c51b0e9bed8461f6491b84a36a3bf6663a138 100644 --- a/paddle/fluid/framework/ir/graph_pattern_detector.cc +++ b/paddle/fluid/framework/ir/graph_pattern_detector.cc @@ -638,11 +638,6 @@ PDNode *patterns::ConvReLU::operator()( ->AsInput() ->assert_is_persistable_var() ->assert_is_op_input("conv2d", "Filter"); - // Bias - auto *conv_bias_var = pattern->NewNode(conv_bias_repr()) - ->AsInput() - ->assert_is_persistable_var() - ->assert_is_op_input("conv2d", "Bias"); // intermediate variable, will be removed in the IR after fuse. auto *conv_out_var = pattern->NewNode(conv_out_repr()) ->AsIntermediate() @@ -653,8 +648,7 @@ PDNode *patterns::ConvReLU::operator()( ->AsOutput() ->assert_is_op_output("relu"); - conv_op->LinksFrom({conv_input, conv_weight_var, conv_bias_var}) - .LinksTo({conv_out_var}); + conv_op->LinksFrom({conv_input, conv_weight_var}).LinksTo({conv_out_var}); relu_op->LinksFrom({conv_out_var}).LinksTo({relu_out_var}); return relu_out_var; } diff --git a/paddle/fluid/framework/ir/graph_pattern_detector.h b/paddle/fluid/framework/ir/graph_pattern_detector.h index 46950ed877cda7cd83ead4e9aa9a3aaae5d5ecfa..69b486c29d8bd1102a8372f5041051c25ce19359 100644 --- a/paddle/fluid/framework/ir/graph_pattern_detector.h +++ b/paddle/fluid/framework/ir/graph_pattern_detector.h @@ -379,7 +379,7 @@ struct PatternBase { // op: conv + relu // named nodes: // conv_input, conv_weight, -// conv_bias, conv_out, conv, +// conv_out, conv, // relu_out, relu struct ConvReLU : public PatternBase { ConvReLU(PDPattern* pattern, const std::string& name_scope) @@ -392,7 +392,6 @@ struct ConvReLU : public PatternBase { PATTERN_DECL_NODE(relu); // declare variable node's name PATTERN_DECL_NODE(conv_weight); - PATTERN_DECL_NODE(conv_bias); PATTERN_DECL_NODE(conv_out); PATTERN_DECL_NODE(relu_out); }; diff --git a/paddle/fluid/framework/ir/graph_traits.cc b/paddle/fluid/framework/ir/graph_traits.cc index 8f548913e4e1d9d5bc5bdace8b92db9065cf3b5e..084a4ba2def87eaa8badb3ca2c39865c6e5cb981 100644 --- a/paddle/fluid/framework/ir/graph_traits.cc +++ b/paddle/fluid/framework/ir/graph_traits.cc @@ -14,6 +14,8 @@ #include "paddle/fluid/framework/ir/graph_traits.h" +#include + namespace paddle { namespace framework { namespace ir { diff --git a/paddle/fluid/framework/op_info.h b/paddle/fluid/framework/op_info.h index 19e5c2c73eac74dee030a4f7820531800f737e4e..06cf4a0f9f33af67343437baeb9623a35ddad183 100644 --- a/paddle/fluid/framework/op_info.h +++ b/paddle/fluid/framework/op_info.h @@ -38,27 +38,31 @@ struct OpInfo { OpAttrChecker* checker_{nullptr}; InferVarTypeFN infer_var_type_; InferShapeFN infer_shape_; + std::string op_type_; bool HasOpProtoAndChecker() const { return proto_ != nullptr && checker_ != nullptr; } const proto::OpProto& Proto() const { - PADDLE_ENFORCE_NOT_NULL(proto_, "Operator Proto has not been registered"); + PADDLE_ENFORCE_NOT_NULL(proto_, "Operator %s Proto has not been registered", + op_type_); PADDLE_ENFORCE(proto_->IsInitialized(), - "Operator Proto must be initialized in op info"); + "Operator %s Proto must be initialized in op info", + op_type_); return *proto_; } const OpCreator& Creator() const { - PADDLE_ENFORCE_NOT_NULL(creator_, - "Operator Creator has not been registered"); + PADDLE_ENFORCE_NOT_NULL( + creator_, "Operator %s Creator has not been registered", op_type_); return creator_; } const GradOpMakerFN& GradOpMaker() const { PADDLE_ENFORCE_NOT_NULL(grad_op_maker_, - "Operator GradOpMaker has not been registered."); + "Operator %s GradOpMaker has not been registered.", + op_type_); return grad_op_maker_; } @@ -73,8 +77,9 @@ class OpInfoMap { return map_.find(op_type) != map_.end(); } - void Insert(const std::string& type, const OpInfo& info) { + void Insert(const std::string& type, OpInfo info) { PADDLE_ENFORCE(!Has(type), "Operator %s has been registered", type); + info.op_type_ = type; map_.insert({type, info}); } diff --git a/paddle/fluid/framework/op_proto_maker.cc b/paddle/fluid/framework/op_proto_maker.cc index df2a7a27ca4a6011b214202ac9bf4f30dc482ece..2663c9be41a834523fb896b490e7e75df256de05 100644 --- a/paddle/fluid/framework/op_proto_maker.cc +++ b/paddle/fluid/framework/op_proto_maker.cc @@ -132,7 +132,9 @@ void OpProtoAndCheckerMaker::operator()(proto::OpProto* proto, AddAttr(OpNamescopeAttrName(), "Operator name with namesope.") .SetDefault(""); - + AddAttr>(OpCreationCallstackAttrName(), + "Callstack for Op Creatation.") + .SetDefault({}); Validate(); } diff --git a/paddle/fluid/framework/op_proto_maker.h b/paddle/fluid/framework/op_proto_maker.h index 4ed3cc45d66849267ef4945a03da1db76b53e4ea..f13196959705bad473a6f7b3ef88f8faa8abe2b8 100644 --- a/paddle/fluid/framework/op_proto_maker.h +++ b/paddle/fluid/framework/op_proto_maker.h @@ -46,6 +46,7 @@ class OpProtoAndCheckerMaker { static const char *OpRoleAttrName() { return "op_role"; } static const char *OpRoleVarAttrName() { return "op_role_var"; } static const char *OpNamescopeAttrName() { return "op_namescope"; } + static const char *OpCreationCallstackAttrName() { return "op_callstack"; } void operator()(proto::OpProto *proto, OpAttrChecker *attr_checker); diff --git a/paddle/fluid/framework/operator.cc b/paddle/fluid/framework/operator.cc index a103be7191d02a96ee97d76f786f9364938c1c65..e800cb9993ddde45de7c33b11994359e77710daf 100644 --- a/paddle/fluid/framework/operator.cc +++ b/paddle/fluid/framework/operator.cc @@ -14,15 +14,17 @@ limitations under the License. */ #define GLOG_NO_ABBREVIATED_SEVERITIES #define GOOGLE_GLOG_DLL_DECL +#include "paddle/fluid/framework/operator.h" #include #include - #include - +#include +#include +#include #include "paddle/fluid/framework/data_transform.h" #include "paddle/fluid/framework/executor.h" #include "paddle/fluid/framework/lod_tensor.h" -#include "paddle/fluid/framework/operator.h" +#include "paddle/fluid/framework/op_proto_maker.h" #include "paddle/fluid/framework/shape_inference.h" #include "paddle/fluid/framework/var_type.h" #include "paddle/fluid/platform/profiler.h" @@ -140,19 +142,48 @@ static LoD GetLoD(const Scope& scope, const std::string& name) { } void OperatorBase::Run(const Scope& scope, const platform::Place& place) { - VLOG(4) << place << " " << DebugStringEx(&scope); - if (platform::is_gpu_place(place)) { + try { + if (VLOG_IS_ON(4)) { + VLOG(4) << place << " " << DebugStringEx(&scope); + } + if (platform::is_gpu_place(place)) { #ifndef PADDLE_WITH_CUDA - PADDLE_THROW("Cannot run operator on place %s", place); + PADDLE_THROW("Cannot run operator on place %s", place); #else - auto dev_id = boost::get(place).device; - platform::SetDeviceId(dev_id); + auto dev_id = boost::get(place).device; + platform::SetDeviceId(dev_id); #endif + } + platform::DeviceContextPool& pool = platform::DeviceContextPool::Instance(); + platform::RecordEvent record_event(Type(), pool.Get(place)); + RunImpl(scope, place); + if (VLOG_IS_ON(3)) { + VLOG(3) << place << " " << DebugStringEx(&scope); + } + } catch (platform::EnforceNotMet exception) { + if (Attrs().count("sub_block") != 0) { + throw exception; + } + + auto& callstack = Attr>( + OpProtoAndCheckerMaker::OpCreationCallstackAttrName()); + + if (callstack.empty()) { + throw exception; + } + std::ostringstream sout; + sout << "Invoke operator " << Type() << " error.\n"; + sout << "Python Callstacks: \n"; + for (auto& line : callstack) { + sout << line; + } + sout << "C++ Callstacks: \n"; + sout << exception.err_str_; + exception.err_str_ = sout.str(); + throw exception; + } catch (...) { + std::rethrow_exception(std::current_exception()); } - platform::DeviceContextPool& pool = platform::DeviceContextPool::Instance(); - platform::RecordEvent record_event(Type(), pool.Get(place)); - RunImpl(scope, place); - VLOG(3) << place << " " << DebugStringEx(&scope); } bool OperatorBase::HasInputs(const std::string& name) const { @@ -180,7 +211,7 @@ const std::vector& OperatorBase::Inputs( } bool OperatorBase::HasOutputs(const std::string& name) const { - if (outputs_.find(name) != outputs_.end()) { + if (outputs_.end() != outputs_.find(name)) { return true; } else { return false; diff --git a/paddle/fluid/inference/api/analysis_predictor.cc b/paddle/fluid/inference/api/analysis_predictor.cc index 00cbe28d45dc4393ba1c141912aee7d1b7469a89..1032aadcbda4f1b05841e08e1abe7c737c3aeb9c 100644 --- a/paddle/fluid/inference/api/analysis_predictor.cc +++ b/paddle/fluid/inference/api/analysis_predictor.cc @@ -76,10 +76,10 @@ bool AnalysisPredictor::Init( } OptimizeInferenceProgram(); - ctx_ = executor_->Prepare(*inference_program_, 0); if (config_._use_mkldnn) { executor_->EnableMKLDNN(*inference_program_); } + ctx_ = executor_->Prepare(*inference_program_, 0); VLOG(5) << "to create variables"; PADDLE_ENFORCE(scope_.get()); diff --git a/paddle/fluid/inference/api/api_impl.cc b/paddle/fluid/inference/api/api_impl.cc index c57fc64bb6bfeebc7935f19d0e977e8fccd4c9a0..dca4386b21b4a064c21b52218682321258f368c4 100644 --- a/paddle/fluid/inference/api/api_impl.cc +++ b/paddle/fluid/inference/api/api_impl.cc @@ -22,6 +22,7 @@ limitations under the License. */ #include "paddle/fluid/framework/feed_fetch_method.h" #include "paddle/fluid/inference/api/api_impl.h" +#include "paddle/fluid/inference/api/helper.h" #include "paddle/fluid/inference/api/timer.h" #include "paddle/fluid/platform/profiler.h" @@ -215,57 +216,20 @@ bool NativePaddlePredictor::SetFeed(const std::vector &inputs, template void NativePaddlePredictor::GetFetchOne(const framework::LoDTensor &fetch, PaddleTensor *output) { - std::vector shape; - auto dims_i = fetch.dims(); - auto lod = fetch.lod(); - const T *output_ptr = fetch.data(); - auto num = fetch.numel(); - std::vector data; - if (0 == lod.size()) { - std::copy(output_ptr, output_ptr + num, std::back_inserter(data)); - for (int j = 0; j < dims_i.size(); ++j) { - shape.push_back(dims_i[j]); - } - } else { - // for batch detection - // image[0] -> output[0] shape {145, 6} - // image[1] -> output[1] shape {176, 6} - // then, - // the batch output shape {321, 6} - // the lod {{0, 145, 321}} - // so we should append output[0] to {176, 6} - size_t max_dim = 0; - for (size_t j = 1; j < lod[0].size(); j++) { - max_dim = std::max(max_dim, lod[0][j] - lod[0][j - 1]); - } - size_t common_dim = lod[0].back() == 0 ? 0 : num / lod[0].back(); - if (max_dim > 0) { - data.resize((lod[0].size() - 1) * max_dim * common_dim, 0); - } - for (size_t j = 1; j < lod[0].size(); j++) { - size_t start = lod[0][j - 1] * common_dim; - size_t end = lod[0][j] * common_dim; - if (end > start) { - std::copy(output_ptr + start, output_ptr + end, - data.begin() + (j - 1) * max_dim * common_dim); - } - } - shape.push_back(lod[0].size() - 1); - shape.push_back(max_dim); - for (int j = 1; j < dims_i.size(); ++j) { - shape.push_back(dims_i[j]); - } - } - - output->shape = shape; - auto &buffer = output->data; - if (buffer.empty() || buffer.length() < sizeof(T) * data.size()) { - buffer.Resize(sizeof(T) * data.size()); - } - std::memcpy(buffer.data(), data.data(), sizeof(T) * data.size()); - // copy LoD - for (const auto &level : fetch.lod()) { - output->lod.emplace_back(level); + // set shape. + auto shape = framework::vectorize(fetch.dims()); + output->shape.assign(shape.begin(), shape.end()); + // set data. + const T *data = fetch.data(); + int num_elems = inference::VecReduceToInt(shape); + output->data.Resize(num_elems * sizeof(T)); + // The fetched tensor output by fetch op, should always in CPU memory, so just + // copy. + memcpy(output->data.data(), data, num_elems * sizeof(T)); + // set lod + output->lod.clear(); + for (auto &level : fetch.lod()) { + output->lod.emplace_back(level.begin(), level.end()); } } diff --git a/paddle/fluid/inference/api/helper.h b/paddle/fluid/inference/api/helper.h index 8e359a67738c0df180933421b45f15b39fd0e78c..1fec2f96da0f9d978a3537b2d78e4ce5ef628c81 100644 --- a/paddle/fluid/inference/api/helper.h +++ b/paddle/fluid/inference/api/helper.h @@ -74,13 +74,17 @@ template <> std::string to_string>>( const std::vector>> &vec); +template +int VecReduceToInt(const std::vector &v) { + return std::accumulate(v.begin(), v.end(), 1, [](T a, T b) { return a * b; }); +} + template static void TensorAssignData(PaddleTensor *tensor, const std::vector> &data) { // Assign buffer - int dim = std::accumulate(tensor->shape.begin(), tensor->shape.end(), 1, - [](int a, int b) { return a * b; }); - tensor->data.Resize(sizeof(T) * dim); + int num_elems = VecReduceToInt(tensor->shape); + tensor->data.Resize(sizeof(T) * num_elems); int c = 0; for (const auto &f : data) { for (T v : f) { @@ -89,7 +93,7 @@ static void TensorAssignData(PaddleTensor *tensor, } } -std::string DescribeTensor(const PaddleTensor &tensor) { +static std::string DescribeTensor(const PaddleTensor &tensor) { std::stringstream os; os << "Tensor [" << tensor.name << "]\n"; os << " - type: "; @@ -113,8 +117,7 @@ std::string DescribeTensor(const PaddleTensor &tensor) { os << "\n"; os << " - data: "; - int dim = std::accumulate(tensor.shape.begin(), tensor.shape.end(), 1, - [](int a, int b) { return a * b; }); + int dim = VecReduceToInt(tensor.shape); for (int i = 0; i < dim; i++) { os << static_cast(tensor.data.data())[i] << " "; } @@ -122,8 +125,8 @@ std::string DescribeTensor(const PaddleTensor &tensor) { return os.str(); } -void PrintTime(int batch_size, int repeat, int num_threads, int tid, - double latency, int epoch = 1) { +static void PrintTime(int batch_size, int repeat, int num_threads, int tid, + double latency, int epoch = 1) { LOG(INFO) << "====== batch_size: " << batch_size << ", repeat: " << repeat << ", threads: " << num_threads << ", thread id: " << tid << ", latency: " << latency << "ms ======"; diff --git a/paddle/fluid/inference/tests/api/CMakeLists.txt b/paddle/fluid/inference/tests/api/CMakeLists.txt index 508ef1ce40aa0882a0f39a85f97511fd9ea2a8a5..d7ab2ac980af2cf3bd9d95bfdbfa1887ef9a64d7 100644 --- a/paddle/fluid/inference/tests/api/CMakeLists.txt +++ b/paddle/fluid/inference/tests/api/CMakeLists.txt @@ -58,6 +58,11 @@ set(TEXT_CLASSIFICATION_INSTALL_DIR "${INFERENCE_DEMO_INSTALL_DIR}/text_classifi download_model_and_data(${TEXT_CLASSIFICATION_INSTALL_DIR} "text-classification-Senta.tar.gz" "text_classification_data.txt.tar.gz") inference_analysis_api_test(test_analyzer_text_classification ${TEXT_CLASSIFICATION_INSTALL_DIR} analyzer_text_classification_tester.cc) +# seq_conv1 +set(SEQ_CONV1_INSTALL_DIR "${INFERENCE_DEMO_INSTALL_DIR}/seq_conv1") +download_model_and_data(${SEQ_CONV1_INSTALL_DIR} "seq_conv1_model.tar.gz" "seq_conv1_data.txt.tar.gz") +inference_analysis_api_test(test_analyzer_seq_conv1 ${SEQ_CONV1_INSTALL_DIR} analyzer_seq_conv1_tester.cc) + # ocr set(OCR_INSTALL_DIR "${INFERENCE_DEMO_INSTALL_DIR}/ocr") if (NOT EXISTS ${OCR_INSTALL_DIR}) diff --git a/paddle/fluid/inference/tests/api/analyzer_seq_conv1_tester.cc b/paddle/fluid/inference/tests/api/analyzer_seq_conv1_tester.cc new file mode 100644 index 0000000000000000000000000000000000000000..2f71ed46ffc9fd5f853f5b5b46de1446d28b9e69 --- /dev/null +++ b/paddle/fluid/inference/tests/api/analyzer_seq_conv1_tester.cc @@ -0,0 +1,199 @@ +// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "paddle/fluid/inference/tests/api/tester_helper.h" + +namespace paddle { +namespace inference { + +struct DataRecord { + std::vector> title1_all, title2_all, title3_all, l1_all; + std::vector> title1, title2, title3, l1; + std::vector title1_lod, title2_lod, title3_lod, l1_lod; + size_t batch_iter{0}; + size_t batch_size{1}; + size_t num_samples; // total number of samples + DataRecord() = default; + explicit DataRecord(const std::string &path, int batch_size = 1) + : batch_size(batch_size) { + Load(path); + } + DataRecord NextBatch() { + DataRecord data; + size_t batch_end = batch_iter + batch_size; + // NOTE skip the final batch, if no enough data is provided. + if (batch_end <= title1_all.size()) { + data.title1_all.assign(title1_all.begin() + batch_iter, + title1_all.begin() + batch_end); + data.title2_all.assign(title2_all.begin() + batch_iter, + title2_all.begin() + batch_end); + data.title3_all.assign(title3_all.begin() + batch_iter, + title3_all.begin() + batch_end); + data.l1_all.assign(l1_all.begin() + batch_iter, + l1_all.begin() + batch_end); + // Prepare LoDs + data.title1_lod.push_back(0); + data.title2_lod.push_back(0); + data.title3_lod.push_back(0); + data.l1_lod.push_back(0); + CHECK(!data.title1_all.empty()); + CHECK(!data.title2_all.empty()); + CHECK(!data.title3_all.empty()); + CHECK(!data.l1_all.empty()); + CHECK_EQ(data.title1_all.size(), data.title2_all.size()); + CHECK_EQ(data.title1_all.size(), data.title3_all.size()); + CHECK_EQ(data.title1_all.size(), data.l1_all.size()); + for (size_t j = 0; j < data.title1_all.size(); j++) { + data.title1.push_back(data.title1_all[j]); + data.title2.push_back(data.title2_all[j]); + data.title3.push_back(data.title3_all[j]); + data.l1.push_back(data.l1_all[j]); + // calculate lod + data.title1_lod.push_back(data.title1_lod.back() + + data.title1_all[j].size()); + data.title2_lod.push_back(data.title2_lod.back() + + data.title2_all[j].size()); + data.title3_lod.push_back(data.title3_lod.back() + + data.title3_all[j].size()); + data.l1_lod.push_back(data.l1_lod.back() + data.l1_all[j].size()); + } + } + batch_iter += batch_size; + return data; + } + void Load(const std::string &path) { + std::ifstream file(path); + std::string line; + int num_lines = 0; + while (std::getline(file, line)) { + num_lines++; + std::vector data; + split(line, '\t', &data); + // load title1 data + std::vector title1_data; + split_to_int64(data[0], ' ', &title1_data); + // load title2 data + std::vector title2_data; + split_to_int64(data[1], ' ', &title2_data); + // load title3 data + std::vector title3_data; + split_to_int64(data[2], ' ', &title3_data); + // load l1 data + std::vector l1_data; + split_to_int64(data[3], ' ', &l1_data); + title1_all.push_back(std::move(title1_data)); + title2_all.push_back(std::move(title2_data)); + title3_all.push_back(std::move(title3_data)); + l1_all.push_back(std::move(l1_data)); + } + num_samples = num_lines; + } +}; + +void PrepareInputs(std::vector *input_slots, DataRecord *data, + int batch_size) { + PaddleTensor title1_tensor, title2_tensor, title3_tensor, l1_tensor; + title1_tensor.name = "title1"; + title2_tensor.name = "title2"; + title3_tensor.name = "title3"; + l1_tensor.name = "l1"; + auto one_batch = data->NextBatch(); + int title1_size = one_batch.title1_lod[one_batch.title1_lod.size() - 1]; + title1_tensor.shape.assign({title1_size, 1}); + title1_tensor.lod.assign({one_batch.title1_lod}); + int title2_size = one_batch.title2_lod[one_batch.title2_lod.size() - 1]; + title2_tensor.shape.assign({title2_size, 1}); + title2_tensor.lod.assign({one_batch.title2_lod}); + int title3_size = one_batch.title3_lod[one_batch.title3_lod.size() - 1]; + title3_tensor.shape.assign({title3_size, 1}); + title3_tensor.lod.assign({one_batch.title3_lod}); + int l1_size = one_batch.l1_lod[one_batch.l1_lod.size() - 1]; + l1_tensor.shape.assign({l1_size, 1}); + l1_tensor.lod.assign({one_batch.l1_lod}); + + // assign data + TensorAssignData(&title1_tensor, one_batch.title1); + TensorAssignData(&title2_tensor, one_batch.title2); + TensorAssignData(&title3_tensor, one_batch.title3); + TensorAssignData(&l1_tensor, one_batch.l1); + // Set inputs. + input_slots->assign({title1_tensor, title2_tensor, title3_tensor, l1_tensor}); + for (auto &tensor : *input_slots) { + tensor.dtype = PaddleDType::INT64; + } +} + +void SetConfig(AnalysisConfig *cfg) { + cfg->model_dir = FLAGS_infer_model; + cfg->use_gpu = false; + cfg->device = 0; + cfg->specify_input_name = true; + cfg->enable_ir_optim = true; +} + +void SetInput(std::vector> *inputs) { + DataRecord data(FLAGS_infer_data, FLAGS_batch_size); + std::vector input_slots; + int epoch = FLAGS_test_all_data ? data.num_samples / FLAGS_batch_size : 1; + LOG(INFO) << "number of samples: " << epoch * FLAGS_batch_size; + for (int bid = 0; bid < epoch; ++bid) { + PrepareInputs(&input_slots, &data, FLAGS_batch_size); + (*inputs).emplace_back(input_slots); + } +} + +// Easy for profiling independently. +TEST(Analyzer_seq_conv1, profile) { + AnalysisConfig cfg; + SetConfig(&cfg); + std::vector outputs; + + std::vector> input_slots_all; + SetInput(&input_slots_all); + TestPrediction(cfg, input_slots_all, &outputs, FLAGS_num_threads); + + if (FLAGS_num_threads == 1 && !FLAGS_test_all_data) { + // the first inference result + PADDLE_ENFORCE_EQ(outputs.size(), 1UL); + size_t size = GetSize(outputs[0]); + PADDLE_ENFORCE_GT(size, 0); + float *result = static_cast(outputs[0].data.data()); + // output is probability, which is in (0, 1). + for (size_t i = 0; i < size; i++) { + EXPECT_GT(result[i], 0); + EXPECT_LT(result[i], 1); + } + } +} + +// Check the fuse status +TEST(Analyzer_seq_conv1, fuse_statis) { + AnalysisConfig cfg; + SetConfig(&cfg); + int num_ops; + auto fuse_statis = GetFuseStatis(cfg, &num_ops); +} + +// Compare result of NativeConfig and AnalysisConfig +TEST(Analyzer_seq_conv1, compare) { + AnalysisConfig cfg; + SetConfig(&cfg); + + std::vector> input_slots_all; + SetInput(&input_slots_all); + CompareNativeAndAnalysis(cfg, input_slots_all); +} + +} // namespace inference +} // namespace paddle diff --git a/paddle/fluid/inference/tests/api/tester_helper.h b/paddle/fluid/inference/tests/api/tester_helper.h index 7189df775227680726a9d4840386280c5ad44c23..9fcb5129d268a7730c11e5910077ad233050484e 100644 --- a/paddle/fluid/inference/tests/api/tester_helper.h +++ b/paddle/fluid/inference/tests/api/tester_helper.h @@ -47,11 +47,8 @@ void CompareResult(const std::vector &outputs, for (size_t i = 0; i < outputs.size(); i++) { auto &out = outputs[i]; auto &ref_out = ref_outputs[i]; - size_t size = std::accumulate(out.shape.begin(), out.shape.end(), 1, - [](int a, int b) { return a * b; }); - size_t ref_size = - std::accumulate(ref_out.shape.begin(), ref_out.shape.end(), 1, - [](int a, int b) { return a * b; }); + size_t size = VecReduceToInt(out.shape); + size_t ref_size = VecReduceToInt(ref_out.shape); EXPECT_GT(size, 0); EXPECT_EQ(size, ref_size); EXPECT_EQ(out.dtype, ref_out.dtype); @@ -87,10 +84,7 @@ std::unique_ptr CreateTestPredictor( } } -size_t GetSize(const PaddleTensor &out) { - return std::accumulate(out.shape.begin(), out.shape.end(), 1, - [](int a, int b) { return a * b; }); -} +size_t GetSize(const PaddleTensor &out) { return VecReduceToInt(out.shape); } std::unordered_map GetFuseStatis(AnalysisConfig config, int *num_ops) { diff --git a/paddle/fluid/operators/activation_op.cc b/paddle/fluid/operators/activation_op.cc index 286b03d7b7d11a50f33f0190c1a5b9097ed0f4a2..c091476d6d132db17a656d5c8dee65e3a88d9ac2 100644 --- a/paddle/fluid/operators/activation_op.cc +++ b/paddle/fluid/operators/activation_op.cc @@ -15,6 +15,7 @@ limitations under the License. */ #include "paddle/fluid/operators/activation_op.h" #include #include "paddle/fluid/operators/mkldnn_activation_op.h" +#include "paddle/fluid/platform/port.h" namespace paddle { namespace operators { @@ -105,105 +106,105 @@ class ActivationOpGrad : public framework::OperatorWithKernel { } }; -__attribute__((unused)) constexpr char SigmoidDoc[] = R"DOC( +UNUSED constexpr char SigmoidDoc[] = R"DOC( Sigmoid Activation Operator $$out = \frac{1}{1 + e^{-x}}$$ )DOC"; -__attribute__((unused)) constexpr char LogSigmoidDoc[] = R"DOC( +UNUSED constexpr char LogSigmoidDoc[] = R"DOC( Logsigmoid Activation Operator $$out = \\log \\frac{1}{1 + e^{-x}}$$ )DOC"; -__attribute__((unused)) constexpr char ExpDoc[] = R"DOC( +UNUSED constexpr char ExpDoc[] = R"DOC( Exp Activation Operator. $out = e^x$ )DOC"; -__attribute__((unused)) constexpr char ReluDoc[] = R"DOC( +UNUSED constexpr char ReluDoc[] = R"DOC( Relu Activation Operator. $out = \max(x, 0)$ )DOC"; -__attribute__((unused)) constexpr char TanhDoc[] = R"DOC( +UNUSED constexpr char TanhDoc[] = R"DOC( Tanh Activation Operator. $$out = \\frac{e^{x} - e^{-x}}{e^{x} + e^{-x}}$$ )DOC"; -__attribute__((unused)) constexpr char TanhShrinkDoc[] = R"DOC( +UNUSED constexpr char TanhShrinkDoc[] = R"DOC( TanhShrink Activation Operator. $$out = x - \\frac{e^{x} - e^{-x}}{e^{x} + e^{-x}}$$ )DOC"; -__attribute__((unused)) constexpr char SqrtDoc[] = R"DOC( +UNUSED constexpr char SqrtDoc[] = R"DOC( Sqrt Activation Operator. $out = \sqrt{x}$ )DOC"; -__attribute__((unused)) constexpr char AbsDoc[] = R"DOC( +UNUSED constexpr char AbsDoc[] = R"DOC( Abs Activation Operator. $out = |x|$ )DOC"; -__attribute__((unused)) constexpr char CeilDoc[] = R"DOC( +UNUSED constexpr char CeilDoc[] = R"DOC( Ceil Activation Operator. $out = ceil(x)$ )DOC"; -__attribute__((unused)) constexpr char FloorDoc[] = R"DOC( +UNUSED constexpr char FloorDoc[] = R"DOC( Floor Activation Operator. $out = floor(x)$ )DOC"; -__attribute__((unused)) constexpr char CosDoc[] = R"DOC( +UNUSED constexpr char CosDoc[] = R"DOC( Cosine Activation Operator. $out = cos(x)$ )DOC"; -__attribute__((unused)) constexpr char SinDoc[] = R"DOC( +UNUSED constexpr char SinDoc[] = R"DOC( Sine Activation Operator. $out = sin(x)$ )DOC"; -__attribute__((unused)) constexpr char RoundDoc[] = R"DOC( +UNUSED constexpr char RoundDoc[] = R"DOC( Round Activation Operator. $out = [x]$ )DOC"; -__attribute__((unused)) constexpr char ReciprocalDoc[] = R"DOC( +UNUSED constexpr char ReciprocalDoc[] = R"DOC( Reciprocal Activation Operator. $$out = \\frac{1}{x}$$ )DOC"; -__attribute__((unused)) constexpr char LogDoc[] = R"DOC( +UNUSED constexpr char LogDoc[] = R"DOC( Log Activation Operator. $out = \ln(x)$ @@ -212,21 +213,21 @@ Natural logarithm of x. )DOC"; -__attribute__((unused)) constexpr char SquareDoc[] = R"DOC( +UNUSED constexpr char SquareDoc[] = R"DOC( Square Activation Operator. $out = x^2$ )DOC"; -__attribute__((unused)) constexpr char SoftplusDoc[] = R"DOC( +UNUSED constexpr char SoftplusDoc[] = R"DOC( Softplus Activation Operator. $out = \ln(1 + e^{x})$ )DOC"; -__attribute__((unused)) constexpr char SoftsignDoc[] = R"DOC( +UNUSED constexpr char SoftsignDoc[] = R"DOC( Softsign Activation Operator. $$out = \frac{x}{1 + |x|}$$ diff --git a/paddle/fluid/operators/pool_mkldnn_op.cc b/paddle/fluid/operators/pool_mkldnn_op.cc index 5341187d1ce9400ac34750ab691608e76158ae0d..56cef91e29cc7da27384c27a7ec63e90cfadfc3b 100644 --- a/paddle/fluid/operators/pool_mkldnn_op.cc +++ b/paddle/fluid/operators/pool_mkldnn_op.cc @@ -46,6 +46,25 @@ static std::string gethash(const memory::dims& input_dims, dims2str(paddings) + pooling_type + suffix; } +static inline int ComputeCeiledOutput(int input_size, int kernel_size, + int padding, int stride) { + return (input_size - kernel_size + 2 * padding) / stride + 1; +} + +static inline void CorrectOutputSize( + const std::vector& src_tz, const std::vector& dst_tz, + const std::vector& kernel_size, const std::vector& paddings, + const std::vector& strides, + std::vector& right_bot_padding) { // NOLINT + for (size_t i = 0; i < right_bot_padding.size(); i++) { + int desired_size = ComputeCeiledOutput(src_tz[i + 2], kernel_size[i], + paddings[i], strides[i]); + if (desired_size != dst_tz[i + 2]) { + right_bot_padding[i] += strides[i]; + } + } +} + template class PoolMKLDNNOpKernel : public paddle::framework::OpKernel { public: @@ -103,6 +122,13 @@ class PoolMKLDNNOpKernel : public paddle::framework::OpKernel { auto pool_p = std::static_pointer_cast(dev_ctx.GetBlob(key_pool_p)); if (pool_p == nullptr) { + const std::vector& padding_left_top(paddings); + std::vector padding_right_bottom(paddings); + bool ceil_mode = ctx.Attr("ceil_mode"); + if (ceil_mode) { + CorrectOutputSize(src_tz, dst_tz, ksize, paddings, strides, + padding_right_bottom); + } auto src_md = platform::MKLDNNMemDesc( src_tz, platform::MKLDNNGetDataType(), input_format); @@ -114,8 +140,9 @@ class PoolMKLDNNOpKernel : public paddle::framework::OpKernel { mkldnn::memory::format::any); std::shared_ptr pool_pd = - CreatePrimitiveDesc(src_md, dst_md, strides, paddings, ksize, - pooling_type, mkldnn_engine); + CreatePrimitiveDesc(src_md, dst_md, strides, padding_left_top, + padding_right_bottom, ksize, pooling_type, + mkldnn_engine, ceil_mode); // save pool_pd into global device context to be referred in backward path dev_ctx.SetBlob(key_pool_pd, pool_pd); @@ -171,14 +198,16 @@ class PoolMKLDNNOpKernel : public paddle::framework::OpKernel { private: std::unique_ptr CreatePrimitiveDesc( const mkldnn::memory::desc& src, const mkldnn::memory::desc& dst, - const std::vector& stride, const std::vector& padding, - const std::vector& kernel, const std::string& pooling_type, - const mkldnn::engine& engine) const { + const std::vector& stride, const std::vector& padding_left_top, + const std::vector& padding_right_bot, const std::vector& kernel, + const std::string& pooling_type, const mkldnn::engine& engine, + bool ceil_mode) const { auto pool_desc = mkldnn::pooling_forward::desc( mkldnn::prop_kind::forward, pooling_type == "max" ? mkldnn::algorithm::pooling_max : mkldnn::algorithm::pooling_avg, - src, dst, stride, kernel, padding, padding, mkldnn::padding_kind::zero); + src, dst, stride, kernel, padding_left_top, padding_right_bot, + mkldnn::padding_kind::zero); auto p_pool_pd = new mkldnn::pooling_forward::primitive_desc(pool_desc, engine); diff --git a/paddle/fluid/operators/read_op.cc b/paddle/fluid/operators/read_op.cc index a0d640b2020958af53a4405ae886eadb2a1e117e..326c58ee1c09d6f745e6c8abfb92030d11d8c1c6 100644 --- a/paddle/fluid/operators/read_op.cc +++ b/paddle/fluid/operators/read_op.cc @@ -45,10 +45,12 @@ class ReadInferVarType : public framework::VarTypeInference { framework::VarDesc* reader = block->FindVarRecursive(reader_name); auto dtypes = reader->GetDataTypes(); PADDLE_ENFORCE_EQ(dtypes.size(), out_names.size()); + auto lod_levels = reader->GetLoDLevels(); for (size_t i = 0; i < dtypes.size(); ++i) { framework::VarDesc& out = block->FindRecursiveOrCreateVar(out_names[i]); out.SetType(framework::proto::VarType::LOD_TENSOR); out.SetDataType(dtypes[i]); + out.SetLoDLevel(lod_levels[i]); } } }; diff --git a/paddle/fluid/operators/sequence_slice_op.h b/paddle/fluid/operators/sequence_slice_op.h index b5ea6ff49bbb29571f9a6ef6358ef881acd9be9e..03b59d71cc0ca2eddd1d9912e7ca25348507ba03 100644 --- a/paddle/fluid/operators/sequence_slice_op.h +++ b/paddle/fluid/operators/sequence_slice_op.h @@ -75,11 +75,11 @@ class SequenceSliceOpKernel : public framework::OpKernel { } for (size_t i = 0; i < n; ++i) { - PADDLE_ENFORCE_LT(0, offset_data[i], + PADDLE_ENFORCE_LE(0, offset_data[i], "The offset[%d] must greater than zero.", i); PADDLE_ENFORCE_LT(0, length_data[i], "The length[%d] must greater than zero.", i); - PADDLE_ENFORCE_LT(lod[0][i] + offset_data[i] + length_data[i], + PADDLE_ENFORCE_LE(lod[0][i] + offset_data[i] + length_data[i], lod[0][i + 1], "The target tensor's length overflow."); } diff --git a/paddle/fluid/operators/sgd_op.cu b/paddle/fluid/operators/sgd_op.cu index 4722be7a666d3e8f3c25c9499f88ddda835f60e3..9527e7ba300e10a6af1a0dd4b312c0323115256e 100644 --- a/paddle/fluid/operators/sgd_op.cu +++ b/paddle/fluid/operators/sgd_op.cu @@ -12,7 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ -#define EIGEN_USE_GPU +#include #include "paddle/fluid/operators/sgd_op.h" #include "paddle/fluid/platform/cuda_primitives.h" @@ -33,22 +33,21 @@ __global__ void SGDKernel(const T* g, const T* p, const T* learning_rate, } } -template +template __global__ void SparseSGDFunctorKernel(const T* selected_rows, const int64_t* rows, const T* learning_rate, T* tensor_out, - int64_t row_numel) { - const int ty = blockIdx.y; - int tid = threadIdx.x; - - selected_rows += ty * row_numel; - tensor_out += rows[ty] * row_numel; - - for (int index = tid; index < row_numel; index += block_size) { - // Since index in rows of SelectedRows can be duplicate, we have to use - // Atomic Operation to avoid concurrent write error. - paddle::platform::CudaAtomicAdd( - tensor_out + index, -1.0 * learning_rate[0] * selected_rows[index]); + int64_t row_numel, int64_t limit) { + for (int64_t i = blockIdx.x; i < limit; i += gridDim.x) { + const T* selected_rows_ptr = selected_rows + i * row_numel; + T* tensor_out_ptr = tensor_out + rows[i] * row_numel; + for (int64_t index = threadIdx.x; index < row_numel; index += blockDim.x) { + // Since index in rows of SelectedRows can be duplicate, we have to use + // Atomic Operation to avoid concurrent write error. + paddle::platform::CudaAtomicAdd( + tensor_out_ptr + index, + -1.0 * learning_rate[0] * selected_rows_ptr[index]); + } } } } // namespace @@ -97,13 +96,15 @@ class SGDOpCUDAKernel : public framework::OpKernel { auto* in_data = in_value.data(); auto* out_data = param_out->data(); - const int block_size = 256; - dim3 threads(block_size, 1); - dim3 grid(1, in_rows.size()); - SparseSGDFunctorKernel< - T, 256><<>>( + const int kThreadsPerBlock = 256; + int thread_x = kThreadsPerBlock; + int max_threads = ctx.cuda_device_context().GetMaxPhysicalThreadCount(); + int max_blocks = std::max(max_threads / kThreadsPerBlock, 1); + + SparseSGDFunctorKernel<<>>( in_data, in_rows.CUDAData(ctx.GetPlace()), learning_rate->data(), - out_data, in_row_numel); + out_data, in_row_numel, in_rows.size()); } else { PADDLE_THROW("Unsupported Variable Type of Grad"); diff --git a/paddle/fluid/operators/shrink_rnn_memory_op.cc b/paddle/fluid/operators/shrink_rnn_memory_op.cc index 29d2fb989754f5621222768a279a1c898ea1c355..e008e130e34f60a78bf44e211c42c4b7786d1721 100644 --- a/paddle/fluid/operators/shrink_rnn_memory_op.cc +++ b/paddle/fluid/operators/shrink_rnn_memory_op.cc @@ -52,16 +52,26 @@ class ShrinkRNNMemoryOp : public ArrayOp { size_t height = dst_num_rows; // do shrink for the top level LoD + if (x_tensor.lod().size() > 0 && x_tensor.lod()[0].size() > static_cast(dst_num_rows)) { - auto lod_offset = framework::GetSubLoDAndAbsoluteOffset(x_tensor.lod(), 0, - dst_num_rows, 0); - height = lod_offset.second.second; - auto out_lod = out_tensor.mutable_lod(); - framework::AppendLoD(out_lod, lod_offset.first); + if (x_tensor.lod().size() > 1) { // MultiLevel LoD + auto lod_offset = framework::GetSubLoDAndAbsoluteOffset( + x_tensor.lod(), 0, dst_num_rows, 0); + height = lod_offset.second.second; + auto out_lod = out_tensor.mutable_lod(); + framework::AppendLoD(out_lod, lod_offset.first); + } else { + // Shrink LoD + auto lod_item = x_tensor.lod()[0]; + lod_item.resize(dst_num_rows + 1); + out_tensor.set_lod({lod_item}); + const auto &const_lod_item = lod_item; + height = const_lod_item.back(); + } } - if (dst_num_rows != 0) { + if (height != 0) { out_tensor.mutable_data(place, x_tensor.type()); auto dev_ctx = platform::DeviceContextPool::Instance().Get(place); framework::TensorCopy(x_tensor.Slice(0, height), place, *dev_ctx, @@ -134,8 +144,11 @@ class ShrinkRNNMemoryGradOp : public ArrayOp { } else { auto &dout_tensor = dout_var->Get(); auto height = dout_tensor.dims()[0]; - auto slice = dx_tensor.Slice(0, static_cast(height)); - framework::TensorCopy(dout_tensor, dout_tensor.place(), dev_ctx, &slice); + if (height != 0) { + auto slice = dx_tensor.Slice(0, static_cast(height)); + framework::TensorCopy(dout_tensor, dout_tensor.place(), dev_ctx, + &slice); + } if (dx_tensor.dims()[0] > height) { auto rest_tensor = dx_tensor.Slice( static_cast(height), static_cast(dx_tensor.dims()[0])); diff --git a/paddle/fluid/operators/tensorrt_engine_op.h b/paddle/fluid/operators/tensorrt_engine_op.h index 79e75ea9a035b654f0bb7026d3a491bebe0b23c4..69173ff5178d32634f9ab291b7d709a3f91cb368 100644 --- a/paddle/fluid/operators/tensorrt_engine_op.h +++ b/paddle/fluid/operators/tensorrt_engine_op.h @@ -36,7 +36,7 @@ namespace operators { using FluidDT = framework::proto::VarType_Type; using TRT_DT = nvinfer1::DataType; -namespace { +namespace { // NOLINT TRT_DT FluidDataType2TRT(FluidDT type) { switch (type) { diff --git a/paddle/fluid/operators/top_k_op.cc b/paddle/fluid/operators/top_k_op.cc index 4a8ac441cfaf642fde58ee30865a22e83c065498..92a0697e27ba0da66fa3b0f5380e7bd52575640d 100644 --- a/paddle/fluid/operators/top_k_op.cc +++ b/paddle/fluid/operators/top_k_op.cc @@ -30,6 +30,8 @@ class TopkOp : public framework::OperatorWithKernel { "Output(Indices) of TopkOp should not be null."); auto input_dims = ctx->GetInputDim("X"); + PADDLE_ENFORCE_EQ(input_dims.size(), 2, + "Rank of TopK op's input must be 2."); const int k = static_cast(ctx->Attrs().Get("k")); PADDLE_ENFORCE_GE(k, 1, "k must >= 1"); diff --git a/paddle/fluid/platform/device_context.cc b/paddle/fluid/platform/device_context.cc index dfc079e986e93c7f02f17b299e5d6293edbedd05..1b283fc9725fb8d01da913312844d0faea29daf6 100644 --- a/paddle/fluid/platform/device_context.cc +++ b/paddle/fluid/platform/device_context.cc @@ -201,6 +201,7 @@ CUDADeviceContext::CUDADeviceContext(CUDAPlace place) compute_capability = GetCUDAComputeCapability(place_.device); multi_process = GetCUDAMultiProcessors(place_.device); max_threads_per_mp = GetCUDAMaxThreadsPerMultiProcessor(place_.device); + grid_max_dims_ = GpuMaxGridDim(place_.device); PADDLE_ENFORCE(cudaStreamCreate(&stream_)); eigen_stream_.reset(new EigenCudaStreamDevice()); eigen_stream_->Reinitialize(&stream_, place); @@ -239,6 +240,10 @@ int CUDADeviceContext::GetMaxPhysicalThreadCount() const { return multi_process * max_threads_per_mp; } +std::tuple CUDADeviceContext::GetMaxGridDims() const { + return grid_max_dims_; +} + Eigen::GpuDevice* CUDADeviceContext::eigen_device() const { return eigen_device_.get(); } diff --git a/paddle/fluid/platform/device_context.h b/paddle/fluid/platform/device_context.h index 79539195157d74d4d757edee5e008cbb76c93ee2..da32b0dad4b8cfe75bf82f59ec58db8136b899f2 100644 --- a/paddle/fluid/platform/device_context.h +++ b/paddle/fluid/platform/device_context.h @@ -13,6 +13,7 @@ limitations under the License. */ #include #include // NOLINT #include +#include #include #include @@ -91,6 +92,8 @@ class CUDADeviceContext : public DeviceContext { /*! \brief Return the max physical thread count in the device context */ int GetMaxPhysicalThreadCount() const; + std::tuple GetMaxGridDims() const; + /*! \brief Return eigen device in the device context. */ Eigen::GpuDevice* eigen_device() const; @@ -135,6 +138,8 @@ class CUDADeviceContext : public DeviceContext { cudaStream_t stream_; cublasHandle_t cublas_handle_; + std::tuple grid_max_dims_; + int compute_capability; int multi_process; int max_threads_per_mp; diff --git a/paddle/fluid/platform/enforce.h b/paddle/fluid/platform/enforce.h index 61a653d9313daff96d39c08e80f17d7e33acceb1..f04395a8ac00f33501008aa12f22773ddda9b138 100644 --- a/paddle/fluid/platform/enforce.h +++ b/paddle/fluid/platform/enforce.h @@ -21,6 +21,7 @@ limitations under the License. */ #if defined(_WIN32) #define NOMINMAX // msvc max/min macro conflict with std::min/max #define GLOG_NO_ABBREVIATED_SEVERITIES // msvc conflict logging with windows.h +#define GOOGLE_GLOG_DLL_DECL #endif #ifdef PADDLE_WITH_CUDA @@ -47,7 +48,7 @@ limitations under the License. */ #include "paddle/fluid/platform/dynload/cublas.h" #include "paddle/fluid/platform/dynload/cudnn.h" #include "paddle/fluid/platform/dynload/curand.h" -#if !defined(__APPLE__) and !defined(_WIN32) +#if !defined(__APPLE__) && !defined(_WIN32) #include "paddle/fluid/platform/dynload/nccl.h" #endif // __APPLE__ #endif // PADDLE_WITH_CUDA @@ -216,7 +217,7 @@ inline typename std::enable_if::type throw_on_error( #endif } -#if !defined(__APPLE__) and !defined(_WIN32) +#if !defined(__APPLE__) && !defined(_WIN32) template inline typename std::enable_if::type throw_on_error( ncclResult_t stat, const Args&... args) { @@ -260,14 +261,8 @@ inline void throw_on_error(T e) { } \ } while (false) -#define PADDLE_THROW_EOF() \ - do { \ - throw ::paddle::platform::EOFException("There is no next data.", __FILE__, \ - __LINE__); \ - } while (false) - #else -#define PADDLE_ENFORCE(...) ::paddle::platform::throw_on_error(__VA_ARGS__) +#define PADDLE_ENFORCE(...) ::paddle::platform::throw_on_error(__VA_ARGS__); #endif // REPLACE_ENFORCE_GLOG #else // !_WIN32 @@ -281,6 +276,12 @@ inline void throw_on_error(T e) { #define PADDLE_ENFORCE(x, ...) x #endif // !_WIN32 +#define PADDLE_THROW_EOF() \ + do { \ + throw ::paddle::platform::EOFException("There is no next data.", __FILE__, \ + __LINE__); \ + } while (false) + /* * Some enforce helpers here, usage: * int a = 1; @@ -294,7 +295,7 @@ inline void throw_on_error(T e) { * extra messages is also supported, for example: * PADDLE_ENFORCE(a, b, "some simple enforce failed between %d numbers", 2) */ - +#if !defined(_WIN32) #define PADDLE_ENFORCE_EQ(__VAL0, __VAL1, ...) \ __PADDLE_BINARY_COMPARE(__VAL0, __VAL1, ==, !=, __VA_ARGS__) #define PADDLE_ENFORCE_NE(__VAL0, __VAL1, ...) \ @@ -307,6 +308,7 @@ inline void throw_on_error(T e) { __PADDLE_BINARY_COMPARE(__VAL0, __VAL1, <, >=, __VA_ARGS__) #define PADDLE_ENFORCE_LE(__VAL0, __VAL1, ...) \ __PADDLE_BINARY_COMPARE(__VAL0, __VAL1, <=, >, __VA_ARGS__) + #define PADDLE_ENFORCE_NOT_NULL(__VAL, ...) \ do { \ if (UNLIKELY(nullptr == (__VAL))) { \ @@ -326,6 +328,27 @@ inline void throw_on_error(T e) { paddle::string::Sprintf("" __VA_ARGS__)); \ } \ } while (0) +#else +#define PADDLE_ENFORCE_EQ(__VAL0, __VAL1, ...) ((__VAL0) == (__VAL1)) +#define PADDLE_ENFORCE_NE(__VAL0, __VAL1, ...) ((__VAL0) != (__VAL1)) +#define PADDLE_ENFORCE_GT(__VAL0, __VAL1, ...) ((__VAL0) > (__VAL1)) +#define PADDLE_ENFORCE_GE(__VAL0, __VAL1, ...) ((__VAL0) >= (__VAL1)) +#define PADDLE_ENFORCE_LT(__VAL0, __VAL1, ...) ((__VAL0) < (__VAL1)) +#define PADDLE_ENFORCE_LE(__VAL0, __VAL1, ...) ((__VAL0) <= (__VAL1)) + +#define __PADDLE_BINARY_COMPARE(__VAL0, __VAL1, __CMP, __INV_CMP, ...) \ + do { \ + if (!((__VAL0)__CMP(__VAL1))) { \ + PADDLE_THROW("Windows disable the enforce. Enforce failed."); \ + } \ + } while (0) +#define PADDLE_ENFORCE_NOT_NULL(__VAL1, ...) \ + do { \ + if (nullptr == (__VAL1)) { \ + PADDLE_THROW("Windows disable the enforce. Enforce failed"); \ + } \ + } while (0) +#endif // !_WIN32 } // namespace platform } // namespace paddle diff --git a/paddle/fluid/platform/for_range.h b/paddle/fluid/platform/for_range.h index c153e80fe42aecb33d3aa97874d2881bce9029be..2806d726d2b1ac6b717a9041af19e7ee62be6883 100644 --- a/paddle/fluid/platform/for_range.h +++ b/paddle/fluid/platform/for_range.h @@ -48,35 +48,54 @@ __global__ static void ForRangeElemwiseOpGridIsOne(Function func) { } template -__global__ static void ForRangeElemwiseOp(Function func, int limit) { +__global__ static void ForRangeElemwiseOp(Function func, size_t limit) { size_t idx = static_cast(blockIdx.x * blockDim.x + threadIdx.x); if (idx < limit) { func(idx); } } +template +__global__ static void ForRangeElemwiseOpGridLarge(Function func, size_t limit, + int grid_dim) { + size_t idx = static_cast(blockIdx.x * blockDim.x + threadIdx.x); + while (idx < limit) { + func(idx); + idx += grid_dim; + } +} + template <> struct ForRange { ForRange(const CUDADeviceContext& dev_ctx, size_t limit) - : dev_ctx_(dev_ctx), limit_(static_cast(limit)) {} + : dev_ctx_(dev_ctx), limit_(limit) {} template inline void operator()(Function func) const { constexpr int num_threads = 1024; int block_size = limit_ <= num_threads ? limit_ : num_threads; - int grid_size = (limit_ + num_threads - 1) / num_threads; - - if (grid_size == 1) { - ForRangeElemwiseOpGridIsOne<<<1, block_size, 0, dev_ctx_.stream()>>>( - func); + size_t grid_size = (limit_ + num_threads - 1) / num_threads; + + int max_grid_dim = std::get<0>(dev_ctx_.GetMaxGridDims()); + + if (grid_size < max_grid_dim) { + int grid_size_int = static_cast(grid_size); + if (grid_size == 1) { + ForRangeElemwiseOpGridIsOne<<<1, block_size, 0, dev_ctx_.stream()>>>( + func); + } else { + ForRangeElemwiseOp<<>>( + func, limit_); + } } else { - ForRangeElemwiseOp<<>>( - func, limit_); + ForRangeElemwiseOpGridLarge<<>>(func, limit_, + max_grid_dim); } } const CUDADeviceContext& dev_ctx_; - int limit_; + size_t limit_; }; #endif diff --git a/paddle/fluid/platform/gpu_info.cc b/paddle/fluid/platform/gpu_info.cc index 126636d879213b1c8f242db8fbdf6a358a1d2da9..b88523728407803a1ea9d343dc2d33c6a38d5de9 100644 --- a/paddle/fluid/platform/gpu_info.cc +++ b/paddle/fluid/platform/gpu_info.cc @@ -152,5 +152,22 @@ void GpuMemsetAsync(void *dst, int value, size_t count, cudaStream_t stream) { PADDLE_ENFORCE(cudaMemsetAsync(dst, value, count, stream), "cudaMemsetAsync failed in paddle::platform::GpuMemsetAsync"); } + +std::tuple GpuMaxGridDim(int id) { + std::tuple result; + PADDLE_ENFORCE( + cudaDeviceGetAttribute(&std::get<0>(result), cudaDevAttrMaxBlockDimX, id), + "cudaDeviceGetAttribute failed in " + "cudaDevAttrMaxBlockDim"); + PADDLE_ENFORCE( + cudaDeviceGetAttribute(&std::get<1>(result), cudaDevAttrMaxBlockDimY, id), + "cudaDeviceGetAttribute failed in " + "cudaDevAttrMaxBlockDim"); + PADDLE_ENFORCE( + cudaDeviceGetAttribute(&std::get<2>(result), cudaDevAttrMaxBlockDimZ, id), + "cudaDeviceGetAttribute failed in " + "cudaDevAttrMaxBlockDim"); + return result; +} } // namespace platform } // namespace paddle diff --git a/paddle/fluid/platform/gpu_info.h b/paddle/fluid/platform/gpu_info.h index f4640d3eaa2165c35e8e14690d83e9e7e7168c0b..b748c6e8a519d27acd211f815a210c7a74ff32c8 100644 --- a/paddle/fluid/platform/gpu_info.h +++ b/paddle/fluid/platform/gpu_info.h @@ -19,6 +19,7 @@ limitations under the License. */ #include #include #include +#include namespace paddle { namespace platform { @@ -72,6 +73,8 @@ void GpuMemcpyPeerSync(void *dst, int dst_device, const void *src, //! Set memory dst with value count size asynchronously void GpuMemsetAsync(void *dst, int value, size_t count, cudaStream_t stream); +std::tuple GpuMaxGridDim(int id); + } // namespace platform } // namespace paddle diff --git a/paddle/fluid/pybind/const_value.cc b/paddle/fluid/pybind/const_value.cc index 1f61a0e289f32196ead04d71d07b513cbe4655b1..882e6332e8174b59eb6e19e788c8cced808d552c 100644 --- a/paddle/fluid/pybind/const_value.cc +++ b/paddle/fluid/pybind/const_value.cc @@ -48,6 +48,9 @@ void BindConstValue(pybind11::module* m) { op_proto_and_checker_maker.def( "kOpNameScopeAttrName", framework::OpProtoAndCheckerMaker::OpNamescopeAttrName); + op_proto_and_checker_maker.def( + "kOpCreationCallstackAttrName", + framework::OpProtoAndCheckerMaker::OpCreationCallstackAttrName); } } // namespace pybind diff --git a/paddle/fluid/pybind/protobuf.cc b/paddle/fluid/pybind/protobuf.cc index 67501186d150171728194f23bc02d2c014848dd7..a5bc44122028c1191f511157bdde2e7c2d30c6aa 100644 --- a/paddle/fluid/pybind/protobuf.cc +++ b/paddle/fluid/pybind/protobuf.cc @@ -285,12 +285,12 @@ void BindOpDesc(pybind11::module *m) { .def("set_output", &pd::OpDesc::SetOutput) .def("input_arg_names", &pd::OpDesc::InputArgumentNames) .def("output_arg_names", &pd::OpDesc::OutputArgumentNames) - .def("rename_input", &pd::OpDesc::RenameInput) - .def("rename_output", &pd::OpDesc::RenameOutput) + .def("_rename_input", &pd::OpDesc::RenameInput) + .def("_rename_output", &pd::OpDesc::RenameOutput) .def("has_attr", &pd::OpDesc::HasAttr) .def("attr_type", &pd::OpDesc::GetAttrType) .def("attr_names", &pd::OpDesc::AttrNames) - .def("set_attr", &pd::OpDesc::SetAttr) + .def("_set_attr", &pd::OpDesc::SetAttr) .def("attr", &pd::OpDesc::GetAttr) .def("set_block_attr", &pd::OpDesc::SetBlockAttr) .def("set_blocks_attr", &pd::OpDesc::SetBlocksAttr) @@ -300,8 +300,8 @@ void BindOpDesc(pybind11::module *m) { std::string ser(seriralized); self.SetAttr(name, ser); }) - .def("block_attr_id", &pd::OpDesc::GetBlockAttrId) - .def("blocks_attr_ids", &pd::OpDesc::GetBlocksAttrIds) + .def("_block_attr_id", &pd::OpDesc::GetBlockAttrId) + .def("_blocks_attr_ids", &pd::OpDesc::GetBlocksAttrIds) .def("check_attrs", &pd::OpDesc::CheckAttrs) .def("infer_shape", &pd::OpDesc::InferShape) .def("infer_var_type", &pd::OpDesc::InferVarType) diff --git a/paddle/fluid/train/CMakeLists.txt b/paddle/fluid/train/CMakeLists.txt new file mode 100644 index 0000000000000000000000000000000000000000..6cd9cbe379874e5ab7e40c1349e0483ff45bb63a --- /dev/null +++ b/paddle/fluid/train/CMakeLists.txt @@ -0,0 +1,30 @@ +function(train_test TARGET_NAME) + set(options "") + set(oneValueArgs "") + set(multiValueArgs ARGS) + cmake_parse_arguments(train_test "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN}) + + set(PYTHON_TESTS_DIR ${PADDLE_BINARY_DIR}/python/paddle/fluid/tests) + set(arg_list "") + if(train_test_ARGS) + foreach(arg ${train_test_ARGS}) + list(APPEND arg_list "_${arg}") + endforeach() + else() + list(APPEND arg_list "_") + endif() + foreach(arg ${arg_list}) + string(REGEX REPLACE "^_$" "" arg "${arg}") + cc_test(test_train_${TARGET_NAME}${arg} + SRCS test_train_${TARGET_NAME}.cc + DEPS paddle_fluid_origin + ARGS --dirname=${PYTHON_TESTS_DIR}/book/${TARGET_NAME}${arg}.train.model/) + set_tests_properties(test_train_${TARGET_NAME}${arg} + PROPERTIES DEPENDS test_${TARGET_NAME}) + endforeach() +endfunction(train_test) + + +if(WITH_TESTING) + train_test(recognize_digits ARGS mlp conv) +endif() diff --git a/paddle/fluid/train/test_train_recognize_digits.cc b/paddle/fluid/train/test_train_recognize_digits.cc new file mode 100644 index 0000000000000000000000000000000000000000..e8731dd51ad698e53b7f10cc781c52134f2d17a8 --- /dev/null +++ b/paddle/fluid/train/test_train_recognize_digits.cc @@ -0,0 +1,89 @@ +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include +#include + +#include "gflags/gflags.h" +#include "gtest/gtest.h" + +#include "paddle/fluid/framework/executor.h" +#include "paddle/fluid/framework/op_registry.h" +#include "paddle/fluid/framework/program_desc.h" +#include "paddle/fluid/framework/tensor_util.h" +#include "paddle/fluid/inference/io.h" +#include "paddle/fluid/platform/device_context.h" +#include "paddle/fluid/platform/init.h" +#include "paddle/fluid/platform/place.h" + +DEFINE_string(dirname, "", "Directory of the train model."); + +namespace paddle { + +void Train() { + CHECK(!FLAGS_dirname.empty()); + framework::InitDevices(false); + const auto cpu_place = platform::CPUPlace(); + framework::Executor executor(cpu_place); + framework::Scope scope; + + auto train_program = inference::Load( + &executor, &scope, FLAGS_dirname + "__model_combined__.main_program", + FLAGS_dirname + "__params_combined__"); + + std::string loss_name = ""; + for (auto op_desc : train_program->Block(0).AllOps()) { + if (op_desc->Type() == "mean") { + loss_name = op_desc->Output("Out")[0]; + break; + } + } + + PADDLE_ENFORCE_NE(loss_name, "", "loss not found"); + + // prepare data + auto x_var = scope.Var("img"); + auto x_tensor = x_var->GetMutable(); + x_tensor->Resize({64, 1, 28, 28}); + + auto x_data = x_tensor->mutable_data(cpu_place); + for (int i = 0; i < 64 * 28 * 28; ++i) { + x_data[i] = 1.0; + } + + auto y_var = scope.Var("label"); + auto y_tensor = y_var->GetMutable(); + y_tensor->Resize({64, 1}); + auto y_data = y_tensor->mutable_data(cpu_place); + for (int i = 0; i < 64 * 1; ++i) { + y_data[i] = static_cast(1); + } + + auto loss_var = scope.Var(loss_name); + float first_loss = 0.0; + float last_loss = 0.0; + for (int i = 0; i < 100; ++i) { + executor.Run(*train_program.get(), &scope, 0, false, true); + if (i == 0) { + first_loss = loss_var->Get().data()[0]; + } else if (i == 99) { + last_loss = loss_var->Get().data()[0]; + } + } + EXPECT_LT(last_loss, first_loss); +} + +TEST(train, recognize_digits) { Train(); } + +} // namespace paddle diff --git a/paddle/scripts/paddle_build.sh b/paddle/scripts/paddle_build.sh index f50a68c54114d5cce15418ad22f38c83163ba866..e8d2e8e6861b95c94077a72f8e4f7d11b0ab11be 100755 --- a/paddle/scripts/paddle_build.sh +++ b/paddle/scripts/paddle_build.sh @@ -147,6 +147,7 @@ function cmake_gen() { -DINFERENCE_DEMO_INSTALL_DIR=${INFERENCE_DEMO_INSTALL_DIR} -DWITH_ANAKIN=${WITH_ANAKIN:-OFF} -DPY_VERSION=${PY_VERSION:-2.7} + -DCMAKE_INSTALL_PREFIX=${INSTALL_PREFIX:-/paddle/build} ======================================== EOF # Disable UNITTEST_USE_VIRTUALENV in docker because @@ -178,7 +179,8 @@ EOF -DWITH_INFERENCE_API_TEST=${WITH_INFERENCE_API_TEST:-ON} \ -DINFERENCE_DEMO_INSTALL_DIR=${INFERENCE_DEMO_INSTALL_DIR} \ -DWITH_ANAKIN=${WITH_ANAKIN:-OFF} \ - -DPY_VERSION=${PY_VERSION:-2.7} + -DPY_VERSION=${PY_VERSION:-2.7} \ + -DCMAKE_INSTALL_PREFIX=${INSTALL_PREFIX:-/paddle/build} } @@ -361,7 +363,7 @@ EOF ctest --output-on-failure # make install should also be test when unittest make install -j `nproc` - pip install /usr/local/opt/paddle/share/wheels/*.whl + pip install ${INSTALL_PREFIX:-/paddle/build}/opt/paddle/share/wheels/*.whl if [[ ${WITH_FLUID_ONLY:-OFF} == "OFF" ]] ; then paddle version fi @@ -379,7 +381,7 @@ function run_mac_test() { EOF # TODO: jiabin need to refine this part when these tests fixed on mac - ctest --output-on-failure -j8 + ctest --output-on-failure -j $1 # make install should also be test when unittest make install -j 8 pip install /usr/local/opt/paddle/share/wheels/*.whl @@ -727,7 +729,7 @@ function main() { maccheck) cmake_gen ${PYTHON_ABI:-""} build_mac - run_mac_test + run_mac_test ${PROC_RUN:-1} ;; cicheck_py35) cmake_gen ${PYTHON_ABI:-""} diff --git a/python/paddle/dataset/wmt14.py b/python/paddle/dataset/wmt14.py index f8c1a33574e642b21feb6843d115b7f4205ef250..adc0c1aac80cbdb0b0c04535fc39b6a172d23eec 100644 --- a/python/paddle/dataset/wmt14.py +++ b/python/paddle/dataset/wmt14.py @@ -89,7 +89,8 @@ def reader_creator(tar_file, file_name, dict_size): ] for name in names: for line in f.extractfile(name): - line_split = line.strip().split(six.b('\t')) + line = cpt.to_text(line) + line_split = line.strip().split('\t') if len(line_split) != 2: continue src_seq = line_split[0] # one source sequence diff --git a/python/paddle/dataset/wmt16.py b/python/paddle/dataset/wmt16.py index f30dcd518ea6c0c685d027ede3ad6e0a1cb0c82c..9c02e0f41b04e113251e0fda72ca8abd976ab6f7 100644 --- a/python/paddle/dataset/wmt16.py +++ b/python/paddle/dataset/wmt16.py @@ -64,7 +64,8 @@ def __build_dict(tar_file, dict_size, save_path, lang): word_dict = defaultdict(int) with tarfile.open(tar_file, mode="r") as f: for line in f.extractfile("wmt16/train"): - line_split = line.strip().split(six.b("\t")) + line = cpt.to_text(line) + line_split = line.strip().split("\t") if len(line_split) != 2: continue sen = line_split[0] if lang == "en" else line_split[1] for w in sen.split(): @@ -123,7 +124,8 @@ def reader_creator(tar_file, file_name, src_dict_size, trg_dict_size, src_lang): with tarfile.open(tar_file, mode="r") as f: for line in f.extractfile(file_name): - line_split = line.strip().split(six.b("\t")) + line = cpt.to_text(line) + line_split = line.strip().split("\t") if len(line_split) != 2: continue src_words = line_split[src_col].split() diff --git a/python/paddle/fluid/backward.py b/python/paddle/fluid/backward.py index 88eaae10dd55edcc7e811163acf17579eb32cbf1..17fe8dc3c8a28ad129e2d377820da95e8e7a02d9 100644 --- a/python/paddle/fluid/backward.py +++ b/python/paddle/fluid/backward.py @@ -38,8 +38,8 @@ def _rename_arg_(op_descs, old_name, new_name, begin_idx=None, end_idx=None): op_desc = op_descs[i] if isinstance(op_desc, tuple): op_desc = op_desc[0] - op_desc.rename_input(old_name, new_name) - op_desc.rename_output(old_name, new_name) + op_desc._rename_input(old_name, new_name) + op_desc._rename_output(old_name, new_name) def _create_op_desc_(op_type, inputs, outputs, attrs): @@ -70,7 +70,7 @@ def _create_op_desc_(op_type, inputs, outputs, attrs): if isinstance(val, framework.Block): op_desc.set_block_attr(name, val.desc) else: - op_desc.set_attr(name, val) + op_desc._set_attr(name, val) return op_desc @@ -346,7 +346,7 @@ def _append_backward_ops_(block, grad_sub_block_list = [] # If the op has its own sub-block, deal with the sub-block first if op.has_attr("sub_block"): - sub_block = program.block(op.block_attr_id("sub_block")) + sub_block = program.block(op._block_attr_id("sub_block")) grad_sub_block = program._create_block() grad_sub_block._set_forward_block_idx(sub_block.idx) cb = _callback_lookup_(op) @@ -382,7 +382,7 @@ def _append_backward_ops_(block, for op_desc in grad_op_descs: new_op_desc = target_block.desc.append_op() new_op_desc.copy_from(op_desc) - new_op_desc.set_attr(op_role_attr_name, backward) + new_op_desc._set_attr(op_role_attr_name, backward) grad_to_var["__current_op_desc__"] = new_op_desc if callbacks is not None: assert (isinstance(callbacks, list)) @@ -408,7 +408,7 @@ def _append_backward_vars_(block, start_op_idx, grad_to_var, grad_info_map): for op_idx in range(start_op_idx, block.desc.op_size()): op_desc = block.desc.op(op_idx) if op_desc.has_attr("sub_block"): - sub_block = block.program.block(op_desc.block_attr_id("sub_block")) + sub_block = block.program.block(op_desc._block_attr_id("sub_block")) _append_backward_vars_(sub_block, 0, grad_to_var, grad_info_map) new_vars = set() # create new gradient variables @@ -438,12 +438,12 @@ def _rename_grad_(block, start_op_idx, grad_to_var, target_grad_map): op_desc = block.desc.op(op_idx) for name in op_desc.input_arg_names(): if name in var_map: - op_desc.rename_input(name, var_map[name]) + op_desc._rename_input(name, var_map[name]) for name in op_desc.output_arg_names(): if block.desc.find_var(name.encode("ascii")): new_name = unique_name.generate(name) - op_desc.rename_output(name, new_name) + op_desc._rename_output(name, new_name) var_map[name] = new_name for g, ng in six.iteritems(var_map): @@ -542,9 +542,9 @@ def append_backward(loss, parameter_list=None, no_grad_set=None, if loss.op is None: raise ValueError("loss.op is None. Should not happend") - loss.op.set_attr(core.op_proto_and_checker_maker.kOpRoleAttrName(), - int(core.op_proto_and_checker_maker.OpRole.Forward) | - int(core.op_proto_and_checker_maker.OpRole.Loss)) + loss.op._set_attr(core.op_proto_and_checker_maker.kOpRoleAttrName(), + int(core.op_proto_and_checker_maker.OpRole.Forward) | + int(core.op_proto_and_checker_maker.OpRole.Loss)) if callbacks is not None: isinstance(callbacks, list) @@ -631,7 +631,7 @@ def append_backward(loss, parameter_list=None, no_grad_set=None, attr_val = [p.name, g.name] if g.op.has_attr(op_role_var_attr_name): attr_val.extend(g.op.attr(op_role_var_attr_name)) - g.op.set_attr(op_role_var_attr_name, attr_val) + g.op._set_attr(op_role_var_attr_name, attr_val) return params_and_grads diff --git a/python/paddle/fluid/clip.py b/python/paddle/fluid/clip.py index 32b8f1189fd65ba1e8da5aeaf316fc0ae05af552..e884185528282021fd16289ccc6a3533e22b9967 100644 --- a/python/paddle/fluid/clip.py +++ b/python/paddle/fluid/clip.py @@ -75,8 +75,8 @@ class ErrorClipByValue(BaseErrorClipAttr): clip_op_desc.set_type("clip") clip_op_desc.set_input("X", [grad_name]) clip_op_desc.set_output("Out", [grad_name]) - clip_op_desc.set_attr("min", self.min) - clip_op_desc.set_attr("max", self.max) + clip_op_desc._set_attr("min", self.min) + clip_op_desc._set_attr("max", self.max) def error_clip_callback(block, context): diff --git a/python/paddle/fluid/framework.py b/python/paddle/fluid/framework.py index fc61bcbea66de07350ee778abb16e81f8f8bc8db..d795b92d79b2b9c616639d2fc56f3d2be383f376 100644 --- a/python/paddle/fluid/framework.py +++ b/python/paddle/fluid/framework.py @@ -18,6 +18,7 @@ import collections import contextlib import re import six +import traceback import numpy as np @@ -34,14 +35,14 @@ except ImportError as e: except Exception as e: raise e from . import unique_name +import os +PADDLE_ON_MODEL_CE = os.environ.get('PADDLE_ON_MODEL_CE', None) is not None __all__ = [ 'Program', - 'Operator', 'default_startup_program', 'default_main_program', 'program_guard', - 'get_var', 'name_scope', ] @@ -489,7 +490,8 @@ class OpProtoHolder(object): return { core.op_proto_and_checker_maker.kOpRoleAttrName(), core.op_proto_and_checker_maker.kOpRoleVarAttrName(), - core.op_proto_and_checker_maker.kOpNameScopeAttrName() + core.op_proto_and_checker_maker.kOpNameScopeAttrName(), + core.op_proto_and_checker_maker.kOpCreationCallstackAttrName() } @@ -572,6 +574,11 @@ class Operator(object): if role_var_name in op_attrs and len(op_attrs[role_var_name]) == 0: del op_attrs[role_var_name] + if not PADDLE_ON_MODEL_CE: + callstack_var_name = op_maker.kOpCreationCallstackAttrName() + op_attrs[callstack_var_name] = list( + reversed(traceback.format_stack()))[1:] + if len(self.desc.type()) != 0: return if type is None: @@ -654,11 +661,11 @@ class Operator(object): self._update_desc_attr(attr_name, attr_val) self.desc.check_attrs() - if self.has_kernel(type): + if self._has_kernel(type): self.desc.infer_var_type(self.block.desc) self.desc.infer_shape(self.block.desc) - def has_kernel(self, op_type): + def _has_kernel(self, op_type): return op_type not in self.OP_WITHOUT_KERNEL_SET def to_string(self, throw_on_error): @@ -699,7 +706,7 @@ class Operator(object): """ return self.desc.input(name) - def rename_input(self, old_name, new_name): + def _rename_input(self, old_name, new_name): """ Rename the `old_name` to `new_name`. @@ -710,9 +717,9 @@ class Operator(object): Returns: None """ - self.desc.rename_input(old_name, new_name) + self.desc._rename_input(old_name, new_name) - def rename_output(self, old_name, new_name): + def _rename_output(self, old_name, new_name): """ Rename the `old_name` to `new_name`. @@ -723,7 +730,7 @@ class Operator(object): Returns: None """ - self.desc.rename_output(old_name, new_name) + self.desc._rename_output(old_name, new_name) @property def input_names(self): @@ -787,7 +794,7 @@ class Operator(object): """ return self.desc.attr_type(name) - def set_attr(self, name, val): + def _set_attr(self, name, val): """ Set the value of attribute by attribute's name. @@ -820,7 +827,7 @@ class Operator(object): isinstance(val, core.ProgramDesc): self.desc.set_serialized_attr(name, val.serialize_to_string()) else: - self.desc.set_attr(name, val) + self.desc._set_attr(name, val) @property def attr_names(self): @@ -839,7 +846,7 @@ class Operator(object): """ return self.desc.attr(name) - def block_attr_id(self, name): + def _block_attr_id(self, name): """ Get the block attribute's id by name. @@ -849,9 +856,9 @@ class Operator(object): Returns: int: the block index. """ - return self.desc.block_attr_id(name) + return self.desc._block_attr_id(name) - def block_attr(self, name): + def _block_attr(self, name): """ Get the block attribute by name. @@ -862,11 +869,11 @@ class Operator(object): block: the block attribute. """ - id = self.block_attr_id(name) + id = self._block_attr_id(name) assert (id >= 0 and id < len(self.block.program.blocks)) return self.block.program.blocks[id] - def blocks_attr(self, name): + def _blocks_attr(self, name): """ Get the blocks attribute by name. @@ -877,13 +884,13 @@ class Operator(object): list: list of the blocks attribute. """ attrs = [] - for i in self.blocks_attr_ids(name): + for i in self._blocks_attr_ids(name): assert (i >= 0 and i < len(self.block.program.blocks)) attrs.append(self.block.program.blocks[i]) return attrs - def blocks_attr_ids(self, name): + def _blocks_attr_ids(self, name): """ Get the blocks attribute's ids by name. @@ -894,7 +901,7 @@ class Operator(object): list: list of the blocks ids. """ - return self.desc.blocks_attr_ids(name) + return self.desc._blocks_attr_ids(name) def all_attrs(self): """ @@ -908,11 +915,11 @@ class Operator(object): for n in attr_names: attr_type = self.desc.attr_type(n) if attr_type == core.AttrType.BLOCK: - attr_map[n] = self.block_attr(n) + attr_map[n] = self._block_attr(n) continue if attr_type == core.AttrType.BLOCKS: - attr_map[n] = self.blocks_attr(n) + attr_map[n] = self._blocks_attr(n) continue attr_map[n] = self.attr(n) @@ -1786,7 +1793,7 @@ class Program(object): for j in six.moves.range(block.op_size()): op = block.op(j) if op.has_attr('is_test'): - op.set_attr('is_test', True) + op._set_attr('is_test', True) res.blocks = [ Block(res, i) for i in six.moves.range(res.desc.num_blocks()) ] @@ -2160,7 +2167,7 @@ def program_guard(main_program, startup_program=None): switch_startup_program(startup_program) -def get_var(name, program=None): +def _get_var(name, program=None): """ Get a variable by name from the global block of a program. diff --git a/python/paddle/fluid/io.py b/python/paddle/fluid/io.py index e703e5ac7943b006741f12886a14bf344a6b9b28..604f3eacd75beff306915b224b30c369dd3a486f 100644 --- a/python/paddle/fluid/io.py +++ b/python/paddle/fluid/io.py @@ -600,7 +600,7 @@ def save_inference_model(dirname, """ if isinstance(feeded_var_names, six.string_types): feeded_var_names = [feeded_var_names] - else: + elif export_for_deployment: if len(feeded_var_names) > 0: # TODO(paddle-dev): polish these code blocks if not (bool(feeded_var_names) and all( @@ -610,61 +610,60 @@ def save_inference_model(dirname, if isinstance(target_vars, Variable): target_vars = [target_vars] - else: + elif export_for_deployment: if not (bool(target_vars) and all( isinstance(var, Variable) for var in target_vars)): raise ValueError("'target_vars' should be a list of Variable.") if main_program is None: main_program = default_main_program() - copy_program = main_program.clone() + + # if there is lookup table, the trainer 0 will notify all pserver to save. + if main_program._is_distributed and main_program._is_chief and main_program._distributed_lookup_table: + lookup_table_filename = os.path.join(dirname, "__lookup_table__") + _save_lookup_tables_by_notify(executor, lookup_table_filename, + main_program._distributed_lookup_table, + main_program._endpoints) if not os.path.isdir(dirname): os.makedirs(dirname) + if model_filename is not None: + model_basename = os.path.basename(model_filename) + else: + model_basename = "__model__" + model_basename = os.path.join(dirname, model_basename) # When export_for_deployment is true, we modify the program online so that # it can only be loaded for inference directly. If it's false, the whole # original program and related meta are saved so that future usage can be # more flexible. if export_for_deployment: - global_block = copy_program.global_block() + main_program = main_program.clone() + global_block = main_program.global_block() for i, op in enumerate(global_block.ops): op.desc.set_is_target(False) if op.type == "feed" or op.type == "fetch": global_block._remove_op(i) - copy_program.desc.flush() + main_program.desc.flush() - pruned_program = copy_program._prune(targets=target_vars) - saved_program = pruned_program._inference_optimize(prune_read_op=True) + main_program = main_program._prune(targets=target_vars) + main_program = main_program._inference_optimize(prune_read_op=True) fetch_var_names = [v.name for v in target_vars] - prepend_feed_ops(saved_program, feeded_var_names) - append_fetch_ops(saved_program, fetch_var_names) + prepend_feed_ops(main_program, feeded_var_names) + append_fetch_ops(main_program, fetch_var_names) + + with open(model_basename, "wb") as f: + f.write(main_program.desc.serialize_to_string()) else: # TODO(panyx0718): Save more information so that it can also be used # for training and more flexible post-processing. - saved_program = copy_program - - if model_filename is not None: - model_filename = os.path.basename(model_filename) - else: - model_filename = "__model__" - model_filename = os.path.join(dirname, model_filename) + with open(model_basename + ".main_program", "wb") as f: + f.write(main_program.desc.serialize_to_string()) if params_filename is not None: params_filename = os.path.basename(params_filename) - - with open(model_filename, "wb") as f: - f.write(saved_program.desc.serialize_to_string()) - - save_persistables(executor, dirname, saved_program, params_filename) - - # if there is lookup table, the trainer 0 will notify all pserver to save. - if main_program._is_distributed and main_program._is_chief and main_program._distributed_lookup_table: - lookup_table_filename = os.path.join(dirname, "__lookup_table__") - _save_lookup_tables_by_notify(executor, lookup_table_filename, - main_program._distributed_lookup_table, - main_program._endpoints) + save_persistables(executor, dirname, main_program, params_filename) def load_inference_model(dirname, diff --git a/python/paddle/fluid/layers/io.py b/python/paddle/fluid/layers/io.py index d56fa76300e7054ef71a7729483a579fa35f1dac..75c29b12724d53783b9748d6df066c52bd232482 100644 --- a/python/paddle/fluid/layers/io.py +++ b/python/paddle/fluid/layers/io.py @@ -311,6 +311,7 @@ def _copy_reader_var_(block, var): new_var = block.create_var(name=var.name, type=core.VarDesc.VarType.READER) new_var.desc.set_shapes(var.desc.shapes()) new_var.desc.set_dtypes(var.desc.dtypes()) + new_var.desc.set_lod_levels(var.desc.lod_levels()) new_var.persistable = True return new_var @@ -632,6 +633,7 @@ def py_reader(capacity, }) startup_var.desc.set_dtypes(dtypes) + startup_var.desc.set_lod_levels(lod_levels) startup_var.persistable = True main_prog_var = _copy_reader_var_(default_main_program().current_block(), diff --git a/python/paddle/fluid/layers/nn.py b/python/paddle/fluid/layers/nn.py index 6e0f3de4141aff3efb98b6d1162823a13f83a7bb..2cb61a9cd25c744710ab7ac9ea591902740f78da 100644 --- a/python/paddle/fluid/layers/nn.py +++ b/python/paddle/fluid/layers/nn.py @@ -6471,12 +6471,14 @@ def _elementwise_op(helper): assert y is not None, 'y cannot be None in {}'.format(op_type) axis = helper.kwargs.get('axis', -1) use_mkldnn = helper.kwargs.get('use_mkldnn', False) - name = helper.kwargs.get('name', None) - if name is None: - out = helper.create_tmp_variable(dtype=x.dtype) - else: - out = helper.create_variable( - name=name, dtype=x.dtype, persistable=False) + out = helper.kwargs.get('out', None) + if out is None: + name = helper.kwargs.get('name', None) + if name is None: + out = helper.create_tmp_variable(dtype=x.dtype) + else: + out = helper.create_variable( + name=name, dtype=x.dtype, persistable=False) helper.append_op( type=op_type, @@ -6489,7 +6491,13 @@ def _elementwise_op(helper): @templatedoc() -def scale(x, scale=1.0, bias=0.0, bias_after_scale=True, act=None, name=None): +def scale(x, + scale=1.0, + bias=0.0, + bias_after_scale=True, + out=None, + act=None, + name=None): """ ${comment} @@ -6498,6 +6506,7 @@ def scale(x, scale=1.0, bias=0.0, bias_after_scale=True, act=None, name=None): scale(${scale_type}): ${scale_comment} bias(${bias_type}): ${bias_comment} bias_after_scale(${bias_after_scale_type}): ${bias_after_scale_comment} + out(Tensor): Output tensor. act(basestring|None): Activation applied to the output. name(basestring|None): Name of the output. @@ -6506,11 +6515,12 @@ def scale(x, scale=1.0, bias=0.0, bias_after_scale=True, act=None, name=None): """ helper = LayerHelper('scale', **locals()) - if name is None: - out = helper.create_tmp_variable(dtype=x.dtype) - else: - out = helper.create_variable( - name=name, dtype=x.dtype, persistable=False) + if out is None: + if name is None: + out = helper.create_tmp_variable(dtype=x.dtype) + else: + out = helper.create_variable( + name=name, dtype=x.dtype, persistable=False) helper.append_op( type='scale', @@ -6524,31 +6534,73 @@ def scale(x, scale=1.0, bias=0.0, bias_after_scale=True, act=None, name=None): return helper.append_activation(out) -def elementwise_add(x, y, axis=-1, use_mkldnn=False, act=None, name=None): +def elementwise_add(x, + y, + out=None, + axis=-1, + use_mkldnn=False, + act=None, + name=None): return _elementwise_op(LayerHelper('elementwise_add', **locals())) -def elementwise_div(x, y, axis=-1, use_mkldnn=False, act=None, name=None): +def elementwise_div(x, + y, + out=None, + axis=-1, + use_mkldnn=False, + act=None, + name=None): return _elementwise_op(LayerHelper('elementwise_div', **locals())) -def elementwise_sub(x, y, axis=-1, use_mkldnn=False, act=None, name=None): +def elementwise_sub(x, + y, + out=None, + axis=-1, + use_mkldnn=False, + act=None, + name=None): return _elementwise_op(LayerHelper('elementwise_sub', **locals())) -def elementwise_mul(x, y, axis=-1, use_mkldnn=False, act=None, name=None): +def elementwise_mul(x, + y, + out=None, + axis=-1, + use_mkldnn=False, + act=None, + name=None): return _elementwise_op(LayerHelper('elementwise_mul', **locals())) -def elementwise_max(x, y, axis=-1, use_mkldnn=False, act=None, name=None): +def elementwise_max(x, + y, + out=None, + axis=-1, + use_mkldnn=False, + act=None, + name=None): return _elementwise_op(LayerHelper('elementwise_max', **locals())) -def elementwise_min(x, y, axis=-1, use_mkldnn=False, act=None, name=None): +def elementwise_min(x, + y, + out=None, + axis=-1, + use_mkldnn=False, + act=None, + name=None): return _elementwise_op(LayerHelper('elementwise_min', **locals())) -def elementwise_pow(x, y, axis=-1, use_mkldnn=False, act=None, name=None): +def elementwise_pow(x, + y, + out=None, + axis=-1, + use_mkldnn=False, + act=None, + name=None): return _elementwise_op(LayerHelper('elementwise_pow', **locals())) @@ -6560,6 +6612,7 @@ for func in [ func.__doc__ = _generate_doc_string_( op_proto, additional_args_lines=[ + "out (Tensor): The output tensor of elementwise op.", "act (basestring|None): Activation applied to the output.", "name (basestring|None): Name of the output." ]) diff --git a/python/paddle/fluid/parallel_executor.py b/python/paddle/fluid/parallel_executor.py index 44af29d3390e35129d0ee65b31eacad6b28a9d60..57d272cbfb948840679e80e8db40379c57603113 100644 --- a/python/paddle/fluid/parallel_executor.py +++ b/python/paddle/fluid/parallel_executor.py @@ -74,28 +74,7 @@ class ParallelExecutor(object): build_strategy=None, num_trainers=1, trainer_id=0, - scope=None, - **kwargs): - if len(kwargs) != 0: - err_msg = "" - for key in kwargs: - if key in dir(ExecutionStrategy): - err_msg += \ - "Setting {0} by constructor is deprecated. Use " \ - "strategy=ExecutionStrategy(); strategy.{0}=xxx; " \ - "pe=ParallelExecutor(exec_strategy=strategy) " \ - "instead.\n ".format(key) - elif key in dir(BuildStrategy): - err_msg += \ - "Setting {0} by constructor is deprecated. Use " \ - "strategy=BuildStrategy(); See help(" \ - "paddle.fluid.ParallelExecutor.BuildStrategy) \n".format( - key) - else: - err_msg += "Setting {0} by constructor is deprecated. Use strategy.\n".format( - key) - raise ValueError(err_msg) - + scope=None): self._places = [] self._act_places = [] if use_cuda: diff --git a/python/paddle/fluid/param_attr.py b/python/paddle/fluid/param_attr.py index f0be794327f51cbbc4202b8b7b401b712b6d66a3..a51607bfdb1dde3d25f490770cc2ba368ceb27ff 100644 --- a/python/paddle/fluid/param_attr.py +++ b/python/paddle/fluid/param_attr.py @@ -185,7 +185,17 @@ class WeightNormParamAttr(ParamAttr): Args: dim(list): The parameter's name. Default None. - kwargs: Any field in ParamAttr. Default None. + name(str): The parameter's name. Default None. + initializer(Initializer): The method to initial this parameter. Default None. + learning_rate(float): The parameter's learning rate. The learning rate when + optimize is :math:`global\_lr * parameter\_lr * scheduler\_factor`. + Default 1.0. + regularizer(WeightDecayRegularizer): Regularization factor. Default None. + trainable(bool): Whether this parameter is trainable. Default True. + gradient_clip(BaseGradientClipAttr): The method to clip this parameter's + gradient. Default None. + do_model_average(bool): Whether this parameter should do model average. + Default False. Examples: .. code-block:: python @@ -204,6 +214,21 @@ class WeightNormParamAttr(ParamAttr): # these paramters for inference. params_with_weight_norm = [] - def __init__(self, dim=None, **kwargs): - super(WeightNormParamAttr, self).__init__(**kwargs) + def __init__(self, + dim=None, + name=None, + initializer=None, + learning_rate=1.0, + regularizer=None, + trainable=True, + gradient_clip=None, + do_model_average=False): + super(WeightNormParamAttr, self).__init__( + name=name, + initializer=initializer, + learning_rate=learning_rate, + regularizer=regularizer, + trainable=trainable, + gradient_clip=gradient_clip, + do_model_average=do_model_average) self.dim = dim diff --git a/python/paddle/fluid/tests/book/test_recognize_digits.py b/python/paddle/fluid/tests/book/test_recognize_digits.py index 4b4f3e403776625fb5ca2f9b03d14ee7efe23d53..4a70976a4837c668a5e0ba6d49b598d046a8ec5d 100644 --- a/python/paddle/fluid/tests/book/test_recognize_digits.py +++ b/python/paddle/fluid/tests/book/test_recognize_digits.py @@ -67,6 +67,7 @@ def train(nn_type, use_cuda, parallel, save_dirname=None, + save_full_dirname=None, model_filename=None, params_filename=None, is_local=True): @@ -143,6 +144,13 @@ def train(nn_type, exe, model_filename=model_filename, params_filename=params_filename) + if save_full_dirname is not None: + fluid.io.save_inference_model( + save_full_dirname, [], [], + exe, + model_filename=model_filename, + params_filename=params_filename, + export_for_deployment=False) return else: print( @@ -214,10 +222,12 @@ def infer(use_cuda, def main(use_cuda, parallel, nn_type, combine): save_dirname = None + save_full_dirname = None model_filename = None params_filename = None if not use_cuda and not parallel: save_dirname = "recognize_digits_" + nn_type + ".inference.model" + save_full_dirname = "recognize_digits_" + nn_type + ".train.model" if combine == True: model_filename = "__model_combined__" params_filename = "__params_combined__" @@ -228,6 +238,7 @@ def main(use_cuda, parallel, nn_type, combine): use_cuda=use_cuda, parallel=parallel, save_dirname=save_dirname, + save_full_dirname=save_full_dirname, model_filename=model_filename, params_filename=params_filename) infer( diff --git a/python/paddle/fluid/tests/unittests/dist_transformer.py b/python/paddle/fluid/tests/unittests/dist_transformer.py index 175bd130e5a8324227953eeeb769474e78f94fd2..f53f7f3b354e60619b17d601ff3f55d2b8b059db 100644 --- a/python/paddle/fluid/tests/unittests/dist_transformer.py +++ b/python/paddle/fluid/tests/unittests/dist_transformer.py @@ -1488,7 +1488,7 @@ def wrap_decoder(trg_vocab_size, if weight_sharing: predict = layers.matmul( x=dec_output, - y=fluid.get_var(word_emb_param_names[0]), + y=fluid.framework._get_var(word_emb_param_names[0]), transpose_y=True) else: predict = layers.fc(input=dec_output, diff --git a/python/paddle/fluid/tests/unittests/test_dist_transpiler.py b/python/paddle/fluid/tests/unittests/test_dist_transpiler.py index 1b7f0745cd758a7534dec9f48132e9cb44a994e6..ecde407e6d85ea1bfc0181b4b60e095ea496fb1a 100644 --- a/python/paddle/fluid/tests/unittests/test_dist_transpiler.py +++ b/python/paddle/fluid/tests/unittests/test_dist_transpiler.py @@ -661,22 +661,25 @@ class TestLoadSliceVar(TranspilerTest): class TestNCCL2Transpile(TranspilerTest): def test_nccl2_transpile(self): - main = fluid.Program() - startup = fluid.Program() - with fluid.program_guard(main, startup): - self.net_conf() - - config = fluid.DistributeTranspilerConfig() - config.mode = "nccl2" - t = fluid.DistributeTranspiler(config=config) - t.transpile( - 0, - trainers="127.0.0.1:6174,127.0.0.1:6175", - current_endpoint="127.0.0.1:6174", - startup_program=startup) - print([op.type for op in startup.global_block().ops]) - self.assertEqual(startup.global_block().ops[-1].type, "gen_nccl_id") - self.assertIsNotNone(startup.global_block().vars.get("NCCLID")) + if fluid.core.is_compiled_with_cuda(): #test nccl2 only with cuda + main = fluid.Program() + startup = fluid.Program() + with fluid.program_guard(main, startup): + self.net_conf() + + config = fluid.DistributeTranspilerConfig() + config.mode = "nccl2" + t = fluid.DistributeTranspiler(config=config) + t.transpile( + 0, + trainers="127.0.0.1:6174,127.0.0.1:6175", + current_endpoint="127.0.0.1:6174", + startup_program=startup) + print([op.type for op in startup.global_block().ops]) + self.assertEqual(startup.global_block().ops[-1].type, "gen_nccl_id") + self.assertIsNotNone(startup.global_block().vars.get("NCCLID")) + else: + pass if __name__ == "__main__": diff --git a/python/paddle/fluid/tests/unittests/test_infer_shape.py b/python/paddle/fluid/tests/unittests/test_infer_shape.py index a3d700aad8236fea7bb0e6d043323ad3bd0851f2..fdff22cacc28731a91ff4fd17407bd9edbdd9d8b 100644 --- a/python/paddle/fluid/tests/unittests/test_infer_shape.py +++ b/python/paddle/fluid/tests/unittests/test_infer_shape.py @@ -76,8 +76,8 @@ class TestInferShape(unittest.TestCase): mul_op_desc.set_input("X", ["x"]) mul_op_desc.set_input("Y", ["y"]) mul_op_desc.set_output("Out", ["out"]) - mul_op_desc.set_attr("x_num_col_dims", 1) - mul_op_desc.set_attr("y_num_col_dims", 1) + mul_op_desc._set_attr("x_num_col_dims", 1) + mul_op_desc._set_attr("y_num_col_dims", 1) mul_op_desc.check_attrs() mul_op_desc.infer_shape(block) diff --git a/python/paddle/fluid/tests/unittests/test_operator_desc.py b/python/paddle/fluid/tests/unittests/test_operator_desc.py index 4153394c1da776d0a41e1415a09fa7d6f4b14d6d..37b9a9188ab44df81029ae6d9925ae21c1929cff 100644 --- a/python/paddle/fluid/tests/unittests/test_operator_desc.py +++ b/python/paddle/fluid/tests/unittests/test_operator_desc.py @@ -69,7 +69,7 @@ class TestOperator(unittest.TestCase): set(mul_op.attr_names), set([ "x_num_col_dims", "y_num_col_dims", "op_role", "op_role_var", - "op_namescope" + "op_namescope", "op_callstack" ])) self.assertEqual(mul_op.has_attr("x_num_col_dims"), True) self.assertEqual(mul_op.attr_type("x_num_col_dims"), core.AttrType.INT) diff --git a/python/paddle/fluid/tests/unittests/test_protobuf_descs.py b/python/paddle/fluid/tests/unittests/test_protobuf_descs.py index d24b5cbd06ddf9f332c1369ebd513bef27b77e14..7fb2171f611adea434d6f2710465810fb69d6979 100644 --- a/python/paddle/fluid/tests/unittests/test_protobuf_descs.py +++ b/python/paddle/fluid/tests/unittests/test_protobuf_descs.py @@ -38,40 +38,40 @@ class TestOpDesc(unittest.TestCase): self.assertEqual(['z'], op.output("Out")) self.assertEqual(["Out"], op.output_names()) - op.set_attr("int_attr", 1) + op._set_attr("int_attr", 1) self.assertEqual(1, op.attr("int_attr")) self.assertTrue(op.has_attr("int_attr")) self.assertEqual(core.AttrType.INT, op.attr_type("int_attr")) - op.set_attr("float_attr", -1.32) + op._set_attr("float_attr", -1.32) self.assertAlmostEqual(-1.32, op.attr("float_attr"), delta=1e-4) self.assertTrue(op.has_attr("float_attr")) - op.set_attr("bool_attr", False) + op._set_attr("bool_attr", False) self.assertFalse(op.attr("bool_attr")) - op.set_attr("string_attr", "abc") + op._set_attr("string_attr", "abc") self.assertEqual("abc", op.attr("string_attr")) self.assertTrue(op.has_attr("string_attr")) - op.set_attr("ints_attr", [1, 2, 3]) + op._set_attr("ints_attr", [1, 2, 3]) self.assertEqual([1, 2, 3], op.attr("ints_attr")) expected = [1.2, 2.3, 3.4] - op.set_attr("floats_attr", expected) + op._set_attr("floats_attr", expected) for e, a in zip(expected, op.attr("floats_attr")): self.assertAlmostEqual(e, a, delta=1e-4) - op.set_attr("strings_attr", ["a", "b", "c"]) + op._set_attr("strings_attr", ["a", "b", "c"]) self.assertEqual(["a", "b", "c"], op.attr("strings_attr")) - op.set_attr("bools_attr", [True, False, True]) + op._set_attr("bools_attr", [True, False, True]) self.assertEqual([True, False, True], op.attr("bools_attr")) self.assertEqual(8, len(op.attr_names())) - op.set_block_attr("block_attr", program_desc.block(0)) - self.assertEqual(0, op.block_attr_id("block_attr")) + op.set_block_attr("_block_attr", program_desc.block(0)) + self.assertEqual(0, op._block_attr_id("_block_attr")) mul_op = block.append_op() mul_op.set_type("mul") diff --git a/python/paddle/fluid/transpiler/details/program_utils.py b/python/paddle/fluid/transpiler/details/program_utils.py index 59899e7e9ab98f661699d5ac0645c92bd23a1512..391d6aa12bdd70b9ef988898bee8e86cd0a0d765 100644 --- a/python/paddle/fluid/transpiler/details/program_utils.py +++ b/python/paddle/fluid/transpiler/details/program_utils.py @@ -128,7 +128,7 @@ def op_to_code(op): attr_type = op.desc.attr_type(name) if attr_type == core.AttrType.BLOCK: a = "{name} = block[{value}]".format( - name=name, type=attr_type, value=op.block_attr_id(name)) + name=name, type=attr_type, value=op._block_attr_id(name)) attrs_str += a if i != len(attr_names) - 1: attrs_str += ", " @@ -136,7 +136,7 @@ def op_to_code(op): if attr_type == core.AttrType.BLOCKS: a = "{name} = blocks{value}".format( - name=name, type=attr_type, value=op.blocks_attr_ids(name)) + name=name, type=attr_type, value=op._blocks_attr_ids(name)) attrs_str += a if i != len(attr_names) - 1: attrs_str += ", " diff --git a/python/paddle/fluid/transpiler/distribute_transpiler.py b/python/paddle/fluid/transpiler/distribute_transpiler.py index 43071def7a906e585909e50e4c0c52c56d981cde..d9cc709f7428abcf415fe9a27b0d48a157ed0b41 100644 --- a/python/paddle/fluid/transpiler/distribute_transpiler.py +++ b/python/paddle/fluid/transpiler/distribute_transpiler.py @@ -668,7 +668,7 @@ in a single call.") __clone_lr_op_sub_block__(cloned_op, program, new_sub_block) # reset the block of op - op.set_attr('sub_block', new_sub_block) + op._set_attr('sub_block', new_sub_block) # append lr decay ops to the child block if exists lr_ops = self._get_lr_ops() @@ -864,7 +864,7 @@ to transpile() call.") if op.type in [ "gaussian_random", "fill_constant", "uniform_random" ]: - op.set_attr("shape", list(new_outputs["Out"].shape)) + op._set_attr("shape", list(new_outputs["Out"].shape)) s_prog.global_block().append_op( type=op.type, inputs=new_inputs, diff --git a/python/paddle/fluid/transpiler/inference_transpiler.py b/python/paddle/fluid/transpiler/inference_transpiler.py index 49ba2cfd55bc881ed753fcefbd41f5b8fd4ebaf7..43d51b03e81895d7322d9e28a9c40b6d7cc69206 100644 --- a/python/paddle/fluid/transpiler/inference_transpiler.py +++ b/python/paddle/fluid/transpiler/inference_transpiler.py @@ -163,7 +163,7 @@ class InferenceTranspiler(object): next_op = self.block.ops[i + 1] if next_op.type == 'relu': # modify bnorm OP to include relu - current_op.set_attr("fuse_with_relu", True) + current_op._set_attr("fuse_with_relu", True) # remove relu OP self.block._remove_op(i + 1) i = i + 1 @@ -377,7 +377,7 @@ class InferenceTranspiler(object): type=old_var.type, dtype=old_var.dtype, shape=old_var.shape) - op.rename_input(old_param_name, new_param_name) + op._rename_input(old_param_name, new_param_name) self.scope.var(new_param_name) tensor = self.scope.find_var(new_param_name).get_tensor() @@ -463,8 +463,8 @@ class InferenceTranspiler(object): current_op = self.block.ops[i] for input_arg in current_op.input_arg_names: if input_arg in self.input_map: - current_op.rename_input(input_arg, - self.input_map[input_arg]) + current_op._rename_input(input_arg, + self.input_map[input_arg]) def _remove_unused_var(self): '''