未验证 提交 5bb04ea4 编写于 作者: L lujun 提交者: GitHub

Merge pull request #12 from PaddlePaddle/develop

merge to local
...@@ -24,6 +24,8 @@ message(STATUS "CXX compiler: ${CMAKE_CXX_COMPILER}, version: " ...@@ -24,6 +24,8 @@ message(STATUS "CXX compiler: ${CMAKE_CXX_COMPILER}, version: "
"${CMAKE_CXX_COMPILER_ID} ${CMAKE_CXX_COMPILER_VERSION}") "${CMAKE_CXX_COMPILER_ID} ${CMAKE_CXX_COMPILER_VERSION}")
message(STATUS "C compiler: ${CMAKE_C_COMPILER}, version: " message(STATUS "C compiler: ${CMAKE_C_COMPILER}, version: "
"${CMAKE_C_COMPILER_ID} ${CMAKE_C_COMPILER_VERSION}") "${CMAKE_C_COMPILER_ID} ${CMAKE_C_COMPILER_VERSION}")
message(STATUS "AR tools: ${CMAKE_AR}")
if(WIN32) if(WIN32)
set(CMAKE_SUPPRESS_REGENERATION ON) set(CMAKE_SUPPRESS_REGENERATION ON)
set(CMAKE_STATIC_LIBRARY_PREFIX lib) set(CMAKE_STATIC_LIBRARY_PREFIX lib)
......
...@@ -12,7 +12,7 @@ paddle.fluid.program_guard (ArgSpec(args=['main_program', 'startup_program'], va ...@@ -12,7 +12,7 @@ paddle.fluid.program_guard (ArgSpec(args=['main_program', 'startup_program'], va
paddle.fluid.name_scope (ArgSpec(args=['prefix'], varargs=None, keywords=None, defaults=(None,)), ('document', '0ef753f5cec69fef9ae6ad8b867b33a2')) paddle.fluid.name_scope (ArgSpec(args=['prefix'], varargs=None, keywords=None, defaults=(None,)), ('document', '0ef753f5cec69fef9ae6ad8b867b33a2'))
paddle.fluid.Executor.__init__ (ArgSpec(args=['self', 'place'], varargs=None, keywords=None, defaults=None), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) paddle.fluid.Executor.__init__ (ArgSpec(args=['self', 'place'], varargs=None, keywords=None, defaults=None), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
paddle.fluid.Executor.close (ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None), ('document', 'f5369953dd0c443961cf79f7a00e1a03')) paddle.fluid.Executor.close (ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None), ('document', 'f5369953dd0c443961cf79f7a00e1a03'))
paddle.fluid.Executor.run (ArgSpec(args=['self', 'program', 'feed', 'fetch_list', 'feed_var_name', 'fetch_var_name', 'scope', 'return_numpy', 'use_program_cache'], varargs=None, keywords=None, defaults=(None, None, None, 'feed', 'fetch', None, True, False)), ('document', 'aba8093edebf2d5c869b735b92811e45')) paddle.fluid.Executor.run (ArgSpec(args=['self', 'program', 'feed', 'fetch_list', 'feed_var_name', 'fetch_var_name', 'scope', 'return_numpy', 'use_program_cache'], varargs=None, keywords=None, defaults=(None, None, None, 'feed', 'fetch', None, True, False)), ('document', 'f482e93b38b4018796969a2e1dde479d'))
paddle.fluid.global_scope (ArgSpec(args=[], varargs=None, keywords=None, defaults=None), ('document', 'e148d3ab1ed8edf3e928212a375959c0')) paddle.fluid.global_scope (ArgSpec(args=[], varargs=None, keywords=None, defaults=None), ('document', 'e148d3ab1ed8edf3e928212a375959c0'))
paddle.fluid.scope_guard (ArgSpec(args=['scope'], varargs=None, keywords=None, defaults=None), ('document', 'b94d1f6bcc29c4fb58fc0058561250c2')) paddle.fluid.scope_guard (ArgSpec(args=['scope'], varargs=None, keywords=None, defaults=None), ('document', 'b94d1f6bcc29c4fb58fc0058561250c2'))
paddle.fluid.DistributeTranspiler.__init__ (ArgSpec(args=['self', 'config'], varargs=None, keywords=None, defaults=(None,)), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) paddle.fluid.DistributeTranspiler.__init__ (ArgSpec(args=['self', 'config'], varargs=None, keywords=None, defaults=(None,)), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
...@@ -277,7 +277,7 @@ paddle.fluid.layers.DynamicRNN.block (ArgSpec(args=['self'], varargs=None, keywo ...@@ -277,7 +277,7 @@ paddle.fluid.layers.DynamicRNN.block (ArgSpec(args=['self'], varargs=None, keywo
paddle.fluid.layers.DynamicRNN.memory (ArgSpec(args=['self', 'init', 'shape', 'value', 'need_reorder', 'dtype'], varargs=None, keywords=None, defaults=(None, None, 0.0, False, 'float32')), ('document', 'b9174d4e91505b0c8ecc193eb51e248d')) paddle.fluid.layers.DynamicRNN.memory (ArgSpec(args=['self', 'init', 'shape', 'value', 'need_reorder', 'dtype'], varargs=None, keywords=None, defaults=(None, None, 0.0, False, 'float32')), ('document', 'b9174d4e91505b0c8ecc193eb51e248d'))
paddle.fluid.layers.DynamicRNN.output (ArgSpec(args=['self'], varargs='outputs', keywords=None, defaults=None), ('document', 'b439a176a3328de8a75bdc5c08eece4a')) paddle.fluid.layers.DynamicRNN.output (ArgSpec(args=['self'], varargs='outputs', keywords=None, defaults=None), ('document', 'b439a176a3328de8a75bdc5c08eece4a'))
paddle.fluid.layers.DynamicRNN.static_input (ArgSpec(args=['self', 'x'], varargs=None, keywords=None, defaults=None), ('document', 'f29ad2478b6b2ad4f413d2936a331ea0')) paddle.fluid.layers.DynamicRNN.static_input (ArgSpec(args=['self', 'x'], varargs=None, keywords=None, defaults=None), ('document', 'f29ad2478b6b2ad4f413d2936a331ea0'))
paddle.fluid.layers.DynamicRNN.step_input (ArgSpec(args=['self', 'x'], varargs=None, keywords=None, defaults=None), ('document', '169d694d2224f62b4f3afdc3dbc19e95')) paddle.fluid.layers.DynamicRNN.step_input (ArgSpec(args=['self', 'x', 'level'], varargs=None, keywords=None, defaults=(0,)), ('document', '7568c5ac7622a10288d3307a94134655'))
paddle.fluid.layers.DynamicRNN.update_memory (ArgSpec(args=['self', 'ex_mem', 'new_mem'], varargs=None, keywords=None, defaults=None), ('document', '5d83987da13b98363d6a807a52d8024f')) paddle.fluid.layers.DynamicRNN.update_memory (ArgSpec(args=['self', 'ex_mem', 'new_mem'], varargs=None, keywords=None, defaults=None), ('document', '5d83987da13b98363d6a807a52d8024f'))
paddle.fluid.layers.StaticRNN.__init__ (ArgSpec(args=['self', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) paddle.fluid.layers.StaticRNN.__init__ (ArgSpec(args=['self', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
paddle.fluid.layers.StaticRNN.memory (ArgSpec(args=['self', 'init', 'shape', 'batch_ref', 'init_value', 'init_batch_dim_idx', 'ref_batch_dim_idx'], varargs=None, keywords=None, defaults=(None, None, None, 0.0, 0, 1)), ('document', 'c24e368e23afac1ed91a78a639d7a9c7')) paddle.fluid.layers.StaticRNN.memory (ArgSpec(args=['self', 'init', 'shape', 'batch_ref', 'init_value', 'init_batch_dim_idx', 'ref_batch_dim_idx'], varargs=None, keywords=None, defaults=(None, None, None, 0.0, 0, 1)), ('document', 'c24e368e23afac1ed91a78a639d7a9c7'))
...@@ -393,9 +393,9 @@ paddle.fluid.contrib.MagnitudePruner.__init__ (ArgSpec(args=['self', 'threshold' ...@@ -393,9 +393,9 @@ paddle.fluid.contrib.MagnitudePruner.__init__ (ArgSpec(args=['self', 'threshold'
paddle.fluid.contrib.MagnitudePruner.prune (ArgSpec(args=['self', 'param', 'threshold'], varargs=None, keywords=None, defaults=(None,)), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) paddle.fluid.contrib.MagnitudePruner.prune (ArgSpec(args=['self', 'param', 'threshold'], varargs=None, keywords=None, defaults=(None,)), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
paddle.fluid.contrib.RatioPruner.__init__ (ArgSpec(args=['self', 'ratios'], varargs=None, keywords=None, defaults=(None,)), ('document', 'e7a81a325b296a9ca502ee5adb4fc85d')) paddle.fluid.contrib.RatioPruner.__init__ (ArgSpec(args=['self', 'ratios'], varargs=None, keywords=None, defaults=(None,)), ('document', 'e7a81a325b296a9ca502ee5adb4fc85d'))
paddle.fluid.contrib.RatioPruner.prune (ArgSpec(args=['self', 'param', 'ratio'], varargs=None, keywords=None, defaults=(None,)), ('document', '358cbf2978c91028fb96a195a9884645')) paddle.fluid.contrib.RatioPruner.prune (ArgSpec(args=['self', 'param', 'ratio'], varargs=None, keywords=None, defaults=(None,)), ('document', '358cbf2978c91028fb96a195a9884645'))
paddle.fluid.contrib.load_persistables_for_increment (ArgSpec(args=['dirname', 'executor', 'program', 'lookup_table_var', 'lookup_table_var_path'], varargs=None, keywords=None, defaults=None), ('document', '11fbf7e8dd2289805de291b453a33ee7')) paddle.fluid.contrib.load_persistables_for_increment (ArgSpec(args=['dirname', 'executor', 'program', 'lookup_table_var', 'lookup_table_var_path'], varargs=None, keywords=None, defaults=None), ('document', '2ab36d4f7a564f5f65e455807ad06c67'))
paddle.fluid.contrib.load_persistables_for_inference (ArgSpec(args=['dirname', 'executor', 'program', 'lookup_table_var_name'], varargs=None, keywords=None, defaults=None), ('document', '5b5577bb3d24070da819674255d16196')) paddle.fluid.contrib.load_persistables_for_inference (ArgSpec(args=['dirname', 'executor', 'program', 'lookup_table_var_name'], varargs=None, keywords=None, defaults=None), ('document', '59066bac9db0ac6ce414d05780b7333f'))
paddle.fluid.contrib.convert_dist_to_sparse_program (ArgSpec(args=['program'], varargs=None, keywords=None, defaults=None), ('document', '4efbd93876832d4d35497cdbc7a1e6d8')) paddle.fluid.contrib.convert_dist_to_sparse_program (ArgSpec(args=['program'], varargs=None, keywords=None, defaults=None), ('document', '74c39c595dc70d6be2f16d8e462d282b'))
paddle.fluid.contrib.HDFSClient.__init__ (ArgSpec(args=['self', 'hadoop_home', 'configs'], varargs=None, keywords=None, defaults=None), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) paddle.fluid.contrib.HDFSClient.__init__ (ArgSpec(args=['self', 'hadoop_home', 'configs'], varargs=None, keywords=None, defaults=None), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
paddle.fluid.contrib.HDFSClient.delete (ArgSpec(args=['self', 'hdfs_path'], varargs=None, keywords=None, defaults=None), ('document', 'c3721aa2d4d9ef5a857dd47b2681c03e')) paddle.fluid.contrib.HDFSClient.delete (ArgSpec(args=['self', 'hdfs_path'], varargs=None, keywords=None, defaults=None), ('document', 'c3721aa2d4d9ef5a857dd47b2681c03e'))
paddle.fluid.contrib.HDFSClient.download (ArgSpec(args=['self', 'hdfs_path', 'local_path', 'overwrite', 'unzip'], varargs=None, keywords=None, defaults=(False, False)), ('document', 'ca55bde92184d3fd0f9f5c963b25e634')) paddle.fluid.contrib.HDFSClient.download (ArgSpec(args=['self', 'hdfs_path', 'local_path', 'overwrite', 'unzip'], varargs=None, keywords=None, defaults=(False, False)), ('document', 'ca55bde92184d3fd0f9f5c963b25e634'))
...@@ -494,7 +494,7 @@ paddle.fluid.CUDAPinnedPlace.__init__ __init__(self: paddle.fluid.core.CUDAPinne ...@@ -494,7 +494,7 @@ paddle.fluid.CUDAPinnedPlace.__init__ __init__(self: paddle.fluid.core.CUDAPinne
paddle.fluid.ParamAttr.__init__ (ArgSpec(args=['self', 'name', 'initializer', 'learning_rate', 'regularizer', 'trainable', 'gradient_clip', 'do_model_average'], varargs=None, keywords=None, defaults=(None, None, 1.0, None, True, None, False)), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) paddle.fluid.ParamAttr.__init__ (ArgSpec(args=['self', 'name', 'initializer', 'learning_rate', 'regularizer', 'trainable', 'gradient_clip', 'do_model_average'], varargs=None, keywords=None, defaults=(None, None, 1.0, None, True, None, False)), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
paddle.fluid.WeightNormParamAttr.__init__ (ArgSpec(args=['self', 'dim', 'name', 'initializer', 'learning_rate', 'regularizer', 'trainable', 'gradient_clip', 'do_model_average'], varargs=None, keywords=None, defaults=(None, None, None, 1.0, None, True, None, False)), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) paddle.fluid.WeightNormParamAttr.__init__ (ArgSpec(args=['self', 'dim', 'name', 'initializer', 'learning_rate', 'regularizer', 'trainable', 'gradient_clip', 'do_model_average'], varargs=None, keywords=None, defaults=(None, None, None, 1.0, None, True, None, False)), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
paddle.fluid.DataFeeder.__init__ (ArgSpec(args=['self', 'feed_list', 'place', 'program'], varargs=None, keywords=None, defaults=(None,)), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) paddle.fluid.DataFeeder.__init__ (ArgSpec(args=['self', 'feed_list', 'place', 'program'], varargs=None, keywords=None, defaults=(None,)), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
paddle.fluid.DataFeeder.decorate_reader (ArgSpec(args=['self', 'reader', 'multi_devices', 'num_places', 'drop_last'], varargs=None, keywords=None, defaults=(None, True)), ('document', '0eed2f198dc73c08a41b61edbc755753')) paddle.fluid.DataFeeder.decorate_reader (ArgSpec(args=['self', 'reader', 'multi_devices', 'num_places', 'drop_last'], varargs=None, keywords=None, defaults=(None, True)), ('document', 'f8f3df23c5633c614db781a91b81fb62'))
paddle.fluid.DataFeeder.feed (ArgSpec(args=['self', 'iterable'], varargs=None, keywords=None, defaults=None), ('document', '459e316301279dfd82001b46f0b8ffca')) paddle.fluid.DataFeeder.feed (ArgSpec(args=['self', 'iterable'], varargs=None, keywords=None, defaults=None), ('document', '459e316301279dfd82001b46f0b8ffca'))
paddle.fluid.DataFeeder.feed_parallel (ArgSpec(args=['self', 'iterable', 'num_places'], varargs=None, keywords=None, defaults=(None,)), ('document', '543863d1f9d4853758adb613b8659e85')) paddle.fluid.DataFeeder.feed_parallel (ArgSpec(args=['self', 'iterable', 'num_places'], varargs=None, keywords=None, defaults=(None,)), ('document', '543863d1f9d4853758adb613b8659e85'))
paddle.fluid.clip.ErrorClipByValue.__init__ (ArgSpec(args=['self', 'max', 'min'], varargs=None, keywords=None, defaults=(None,)), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) paddle.fluid.clip.ErrorClipByValue.__init__ (ArgSpec(args=['self', 'max', 'min'], varargs=None, keywords=None, defaults=(None,)), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
...@@ -518,11 +518,11 @@ paddle.reader.compose (ArgSpec(args=[], varargs='readers', keywords='kwargs', de ...@@ -518,11 +518,11 @@ paddle.reader.compose (ArgSpec(args=[], varargs='readers', keywords='kwargs', de
paddle.reader.chain (ArgSpec(args=[], varargs='readers', keywords=None, defaults=None), ('document', 'd22c34e379a53901ae67a6bca7f4def4')) paddle.reader.chain (ArgSpec(args=[], varargs='readers', keywords=None, defaults=None), ('document', 'd22c34e379a53901ae67a6bca7f4def4'))
paddle.reader.shuffle (ArgSpec(args=['reader', 'buf_size'], varargs=None, keywords=None, defaults=None), ('document', 'e42ea6fee23ce26b23cb142cd1d6522d')) paddle.reader.shuffle (ArgSpec(args=['reader', 'buf_size'], varargs=None, keywords=None, defaults=None), ('document', 'e42ea6fee23ce26b23cb142cd1d6522d'))
paddle.reader.firstn (ArgSpec(args=['reader', 'n'], varargs=None, keywords=None, defaults=None), ('document', 'c5bb8f7dd4f917f1569a368aab5b8aad')) paddle.reader.firstn (ArgSpec(args=['reader', 'n'], varargs=None, keywords=None, defaults=None), ('document', 'c5bb8f7dd4f917f1569a368aab5b8aad'))
paddle.reader.xmap_readers (ArgSpec(args=['mapper', 'reader', 'process_num', 'buffer_size', 'order'], varargs=None, keywords=None, defaults=(False,)), ('document', '283bc0b8a0e26ae186b8b9bee4aec560')) paddle.reader.xmap_readers (ArgSpec(args=['mapper', 'reader', 'process_num', 'buffer_size', 'order'], varargs=None, keywords=None, defaults=(False,)), ('document', '9c804a42f8a4dbaa76b3c98e0ab7f796'))
paddle.reader.PipeReader.__init__ (ArgSpec(args=['self', 'command', 'bufsize', 'file_type'], varargs=None, keywords=None, defaults=(8192, 'plain')), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) paddle.reader.PipeReader.__init__ (ArgSpec(args=['self', 'command', 'bufsize', 'file_type'], varargs=None, keywords=None, defaults=(8192, 'plain')), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
paddle.reader.PipeReader.get_line (ArgSpec(args=['self', 'cut_lines', 'line_break'], varargs=None, keywords=None, defaults=(True, '\n')), ('document', '5f80a7ed70052f01665e4c74acccfa69')) paddle.reader.PipeReader.get_line (ArgSpec(args=['self', 'cut_lines', 'line_break'], varargs=None, keywords=None, defaults=(True, '\n')), ('document', '9621ae612e595b6c34eb3bb5f3eb1a45'))
paddle.reader.multiprocess_reader (ArgSpec(args=['readers', 'use_pipe', 'queue_size'], varargs=None, keywords=None, defaults=(True, 1000)), ('document', '7d8b3a96e592107c893d5d51ce968ba0')) paddle.reader.multiprocess_reader (ArgSpec(args=['readers', 'use_pipe', 'queue_size'], varargs=None, keywords=None, defaults=(True, 1000)), ('document', '7d8b3a96e592107c893d5d51ce968ba0'))
paddle.reader.Fake.__init__ (ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) paddle.reader.Fake.__init__ (ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
paddle.reader.creator.np_array (ArgSpec(args=['x'], varargs=None, keywords=None, defaults=None), ('document', '28d457fbc9a71efa4ac91a3be179cada')) paddle.reader.creator.np_array (ArgSpec(args=['x'], varargs=None, keywords=None, defaults=None), ('document', '28d457fbc9a71efa4ac91a3be179cada'))
paddle.reader.creator.text_file (ArgSpec(args=['path'], varargs=None, keywords=None, defaults=None), ('document', '44fe286ab6175a5464d3a961a68c266a')) paddle.reader.creator.text_file (ArgSpec(args=['path'], varargs=None, keywords=None, defaults=None), ('document', 'f45fcb7add066c8e042c6774fc7c3db2'))
paddle.reader.creator.recordio (ArgSpec(args=['paths', 'buf_size'], varargs=None, keywords=None, defaults=(100,)), ('document', '11b3704ea42cfd537953387a7e58dae8')) paddle.reader.creator.recordio (ArgSpec(args=['paths', 'buf_size'], varargs=None, keywords=None, defaults=(100,)), ('document', 'b4a94ee0e2cefb495619275c2f8c61d2'))
...@@ -68,11 +68,11 @@ class SplitOpMaker : public OpProtoAndCheckerMaker { ...@@ -68,11 +68,11 @@ class SplitOpMaker : public OpProtoAndCheckerMaker {
class DummyVarTypeInference : public VarTypeInference { class DummyVarTypeInference : public VarTypeInference {
public: public:
void operator()(const OpDesc& op_desc, BlockDesc* block) const override { void operator()(framework::InferVarTypeContext* ctx) const override {
auto& inputs = op_desc.Input("X"); auto& inputs = ctx->Input("X");
auto type = block->Var(inputs.front())->GetType(); auto type = ctx->GetType(inputs.front());
auto out_var_name = op_desc.Output("Out").front(); auto out_var_name = ctx->Output("Out").front();
block->Var(out_var_name)->SetType(type); ctx->SetType(out_var_name, type);
} }
}; };
......
...@@ -16,6 +16,8 @@ limitations under the License. */ ...@@ -16,6 +16,8 @@ limitations under the License. */
#include <string> #include <string>
#include <tuple> #include <tuple>
#include <unordered_map>
#include <unordered_set>
#include <vector> #include <vector>
#include "paddle/fluid/framework/grad_op_desc_maker.h" #include "paddle/fluid/framework/grad_op_desc_maker.h"
#include "paddle/fluid/framework/inplace_op_inference.h" #include "paddle/fluid/framework/inplace_op_inference.h"
...@@ -127,9 +129,9 @@ struct OpInfoFiller<T, kGradOpDescMaker> { ...@@ -127,9 +129,9 @@ struct OpInfoFiller<T, kGradOpDescMaker> {
template <typename T> template <typename T>
struct OpInfoFiller<T, kVarTypeInference> { struct OpInfoFiller<T, kVarTypeInference> {
void operator()(const char* op_type, OpInfo* info) const { void operator()(const char* op_type, OpInfo* info) const {
info->infer_var_type_ = [](const OpDesc& fwd_op, BlockDesc* block) { info->infer_var_type_ = [](InferVarTypeContext* context) {
T inference; T inference;
inference(fwd_op, block); inference(context);
}; };
} }
}; };
......
...@@ -14,7 +14,9 @@ limitations under the License. */ ...@@ -14,7 +14,9 @@ limitations under the License. */
#pragma once #pragma once
#include <algorithm> #include <algorithm>
#include <memory>
#include <string> #include <string>
#include <unordered_map>
#include <unordered_set> #include <unordered_set>
#include <vector> #include <vector>
#include "paddle/fluid/framework/op_desc.h" #include "paddle/fluid/framework/op_desc.h"
...@@ -55,11 +57,11 @@ class GradOpDescMakerBase { ...@@ -55,11 +57,11 @@ class GradOpDescMakerBase {
std::back_inserter(ret_val), std::back_inserter(ret_val),
[this](const std::string& fwd_var_name) -> std::string { [this](const std::string& fwd_var_name) -> std::string {
auto g_name = GradVarName(fwd_var_name); auto g_name = GradVarName(fwd_var_name);
if (no_grad_set_.count(g_name)) { if (no_grad_set_.empty() || !no_grad_set_.count(g_name)) {
return kEmptyVarName;
} else {
(*this->grad_to_var_)[g_name] = fwd_var_name; (*this->grad_to_var_)[g_name] = fwd_var_name;
return g_name; return g_name;
} else {
return kEmptyVarName;
} }
}); });
if (!drop_empty_grad) { if (!drop_empty_grad) {
......
...@@ -46,6 +46,7 @@ cc_library(fuse_pass_base SRCS fuse_pass_base.cc DEPS pass) ...@@ -46,6 +46,7 @@ cc_library(fuse_pass_base SRCS fuse_pass_base.cc DEPS pass)
pass_library(graph_to_program_pass base) pass_library(graph_to_program_pass base)
pass_library(graph_viz_pass base) pass_library(graph_viz_pass base)
pass_library(lock_free_optimize_pass base) pass_library(lock_free_optimize_pass base)
pass_library(cpu_quantize_placement_pass base)
pass_library(cpu_quantize_pass inference) pass_library(cpu_quantize_pass inference)
pass_library(cpu_quantize_squash_pass inference) pass_library(cpu_quantize_squash_pass inference)
pass_library(fc_fuse_pass inference) pass_library(fc_fuse_pass inference)
...@@ -103,6 +104,7 @@ cc_test(test_graph_pattern_detector SRCS graph_pattern_detector_tester.cc DEPS g ...@@ -103,6 +104,7 @@ cc_test(test_graph_pattern_detector SRCS graph_pattern_detector_tester.cc DEPS g
cc_test(test_fc_fuse_pass SRCS fc_fuse_pass_tester.cc DEPS fc_fuse_pass framework_proto) cc_test(test_fc_fuse_pass SRCS fc_fuse_pass_tester.cc DEPS fc_fuse_pass framework_proto)
cc_test(test_seqpool_concat_fuse_pass SRCS seqpool_concat_fuse_pass_tester.cc DEPS seqpool_concat_fuse_pass framework_proto) cc_test(test_seqpool_concat_fuse_pass SRCS seqpool_concat_fuse_pass_tester.cc DEPS seqpool_concat_fuse_pass framework_proto)
cc_test(test_is_test_pass SRCS is_test_pass_tester.cc DEPS is_test_pass) cc_test(test_is_test_pass SRCS is_test_pass_tester.cc DEPS is_test_pass)
cc_test(test_cpu_quantize_placement_pass SRCS cpu_quantize_placement_pass_tester.cc DEPS cpu_quantize_placement_pass)
cc_test(test_cpu_quantize_pass SRCS cpu_quantize_pass_tester.cc DEPS cpu_quantize_pass naive_executor) cc_test(test_cpu_quantize_pass SRCS cpu_quantize_pass_tester.cc DEPS cpu_quantize_pass naive_executor)
cc_test(test_cpu_quantize_squash_pass SRCS cpu_quantize_squash_pass_tester.cc DEPS cpu_quantize_squash_pass naive_executor) cc_test(test_cpu_quantize_squash_pass SRCS cpu_quantize_squash_pass_tester.cc DEPS cpu_quantize_squash_pass naive_executor)
if(NOT WIN32) if(NOT WIN32)
......
/* Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/framework/ir/cpu_quantize_placement_pass.h"
#include <string>
#include <unordered_set>
namespace paddle {
namespace framework {
namespace ir {
std::unique_ptr<ir::Graph> CPUQuantizePlacementPass::ApplyImpl(
std::unique_ptr<ir::Graph> graph) const {
VLOG(3) << "Marks operators which are to be quantized.";
const auto& excluded_ids_list =
Get<std::unordered_set<int>>("quantize_excluded_op_ids");
const auto& op_types_list =
Get<std::unordered_set<std::string>>("quantize_enabled_op_types");
for (const Node* n : graph->Nodes()) {
if (n->IsOp()) {
if (std::find(excluded_ids_list.begin(), excluded_ids_list.end(),
n->id()) != excluded_ids_list.end())
continue;
auto* op = n->Op();
if (op->HasAttr("use_quantizer") || op->HasProtoAttr("use_quantizer")) {
if (op_types_list.empty()) {
op->SetAttr("use_quantizer", true);
} else if (std::find(op_types_list.begin(), op_types_list.end(),
n->Name()) != op_types_list.end()) {
op->SetAttr("use_quantizer", true);
}
}
}
}
return graph;
}
} // namespace ir
} // namespace framework
} // namespace paddle
REGISTER_PASS(cpu_quantize_placement_pass,
paddle::framework::ir::CPUQuantizePlacementPass)
// a vector of operator type names to be quantized ("conv2d" etc.)
.RequirePassAttr("quantize_enabled_op_types")
// a vector of operator ids that are to be excluded from quantization
.RequirePassAttr("quantize_excluded_op_ids");
/* Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include <memory>
#include "paddle/fluid/framework/ir/pass.h"
namespace paddle {
namespace framework {
namespace ir {
/*
* Specifies which operators should be quantized.
*/
class CPUQuantizePlacementPass : public Pass {
protected:
std::unique_ptr<ir::Graph> ApplyImpl(
std::unique_ptr<ir::Graph> graph) const override;
};
} // namespace ir
} // namespace framework
} // namespace paddle
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/framework/ir/cpu_quantize_placement_pass.h"
#include <gtest/gtest.h>
#include <boost/logic/tribool.hpp>
namespace paddle {
namespace framework {
namespace ir {
void SetOp(ProgramDesc* prog, const std::string& type, const std::string& name,
const std::vector<std::string>& inputs,
const std::vector<std::string>& outputs,
boost::tribool use_quantizer) {
auto* op = prog->MutableBlock(0)->AppendOp();
op->SetType(type);
if (!boost::indeterminate(use_quantizer))
op->SetAttr("use_quantizer", use_quantizer);
if (type == "conv2d") {
op->SetAttr("name", name);
op->SetInput("Input", {inputs[0]});
op->SetInput("Filter", {inputs[1]});
op->SetInput("Bias", {inputs[2]});
} else if (type == "relu") {
op->SetInput("X", inputs);
} else if (type == "concat") {
op->SetAttr("axis", 1);
op->SetInput("X", {inputs[0], inputs[1]});
} else if (type == "pool2d") {
op->SetInput("X", {inputs[0]});
} else {
FAIL() << "Unexpected operator type.";
}
op->SetOutput("Out", {outputs[0]});
}
// operator use_quantizer
// ---------------------------------------
// (a,b)->concat->c none
// (c,weights,bias)->conv->f false
// f->relu->g none
// g->pool->h false
// (h,weights2,bias2)->conv->k false
// k->pool->l false
ProgramDesc BuildProgramDesc() {
ProgramDesc prog;
for (auto& v :
std::vector<std::string>({"a", "b", "c", "weights", "bias", "f", "g",
"h", "weights2", "bias2", "k", "l"})) {
auto* var = prog.MutableBlock(0)->Var(v);
var->SetType(proto::VarType::SELECTED_ROWS);
if (v == "weights" || v == "bias") {
var->SetPersistable(true);
}
}
SetOp(&prog, "concat", "concat1", {"a", "b"}, {"c"}, boost::indeterminate);
SetOp(&prog, "conv2d", "conv1", {"c", "weights", "bias"}, {"f"}, false);
SetOp(&prog, "relu", "relu1", {"f"}, {"g"}, boost::indeterminate);
SetOp(&prog, "pool2d", "pool1", {"g"}, {"h"}, false);
SetOp(&prog, "conv2d", "conv2", {"h", "weights2", "bias2"}, {"k"}, false);
SetOp(&prog, "pool2d", "pool2", {"k"}, {"l"}, false);
return prog;
}
void MainTest(std::initializer_list<std::string> quantize_enabled_op_types,
std::initializer_list<int> quantize_excluded_op_ids,
unsigned expected_use_quantizer_true_count) {
auto prog = BuildProgramDesc();
std::unique_ptr<ir::Graph> graph(new ir::Graph(prog));
auto pass = PassRegistry::Instance().Get("cpu_quantize_placement_pass");
pass->Set("quantize_enabled_op_types",
new std::unordered_set<std::string>(quantize_enabled_op_types));
pass->Set("quantize_excluded_op_ids",
new std::unordered_set<int>(quantize_excluded_op_ids));
graph = pass->Apply(std::move(graph));
unsigned use_quantizer_true_count = 0;
for (auto* node : graph->Nodes()) {
if (node->IsOp()) {
auto* op = node->Op();
if (op->HasAttr("use_quantizer") &&
boost::get<bool>(op->GetAttr("use_quantizer"))) {
++use_quantizer_true_count;
}
}
}
EXPECT_EQ(use_quantizer_true_count, expected_use_quantizer_true_count);
}
TEST(QuantizerPlacementPass, enabled_pool) { MainTest({"pool2d"}, {}, 2); }
TEST(QuantizerPlacementPass, enabled_conv_excluded_one) {
MainTest({"conv2d"}, {4}, 1);
}
TEST(QuantizerPlacementPass, excluded_none) {
// 2 conv + 2 pool
MainTest({}, {}, 4);
}
} // namespace ir
} // namespace framework
} // namespace paddle
USE_PASS(cpu_quantize_placement_pass);
...@@ -43,20 +43,20 @@ class SumOpMaker : public OpProtoAndCheckerMaker { ...@@ -43,20 +43,20 @@ class SumOpMaker : public OpProtoAndCheckerMaker {
class SumOpVarTypeInference : public VarTypeInference { class SumOpVarTypeInference : public VarTypeInference {
public: public:
void operator()(const OpDesc &op_desc, BlockDesc *block) const override { void operator()(InferVarTypeContext *ctx) const override {
auto &inputs = op_desc.Input("X"); auto &inputs = ctx->Input("X");
auto default_var_type = proto::VarType::SELECTED_ROWS; auto default_var_type = proto::VarType::SELECTED_ROWS;
bool any_input_is_lod_tensor = std::any_of( bool any_input_is_lod_tensor = std::any_of(
inputs.begin(), inputs.end(), [block](const std::string &name) { inputs.begin(), inputs.end(), [&ctx](const std::string &name) {
return block->Var(name)->GetType() == proto::VarType::LOD_TENSOR; return ctx->GetType(name) == proto::VarType::LOD_TENSOR;
}); });
if (any_input_is_lod_tensor) { if (any_input_is_lod_tensor) {
default_var_type = proto::VarType::LOD_TENSOR; default_var_type = proto::VarType::LOD_TENSOR;
} }
auto out_var_name = op_desc.Output("Out").front(); auto out_var_name = ctx->Output("Out").front();
block->Var(out_var_name)->SetType(default_var_type); ctx->SetType(out_var_name, default_var_type);
} }
}; };
...@@ -71,7 +71,7 @@ class DummyOpMaker : public OpProtoAndCheckerMaker { ...@@ -71,7 +71,7 @@ class DummyOpMaker : public OpProtoAndCheckerMaker {
class DummyOpVarTypeInference : public VarTypeInference { class DummyOpVarTypeInference : public VarTypeInference {
public: public:
void operator()(const OpDesc &op_desc, BlockDesc *block) const override {} void operator()(framework::InferVarTypeContext *ctx) const override {}
}; };
} // namespace framework } // namespace framework
} // namespace paddle } // namespace paddle
......
...@@ -24,6 +24,7 @@ limitations under the License. */ ...@@ -24,6 +24,7 @@ limitations under the License. */
#include "paddle/fluid/framework/operator.h" #include "paddle/fluid/framework/operator.h"
#include "paddle/fluid/framework/program_desc.h" #include "paddle/fluid/framework/program_desc.h"
#include "paddle/fluid/framework/shape_inference.h" #include "paddle/fluid/framework/shape_inference.h"
#include "paddle/fluid/framework/var_type_inference.h"
namespace paddle { namespace paddle {
namespace framework { namespace framework {
...@@ -677,7 +678,8 @@ void OpDesc::InferVarType(BlockDesc *block) const { ...@@ -677,7 +678,8 @@ void OpDesc::InferVarType(BlockDesc *block) const {
// var type inference. Hence, we don't do any "default" setting here. // var type inference. Hence, we don't do any "default" setting here.
auto &info = OpInfoMap::Instance().Get(this->Type()); auto &info = OpInfoMap::Instance().Get(this->Type());
if (info.infer_var_type_) { if (info.infer_var_type_) {
info.infer_var_type_(*this, block); InferVarTypeContext context(this, block);
info.infer_var_type_(&context);
} }
} }
......
...@@ -44,6 +44,11 @@ void TensorCopy(const Tensor& src, const platform::Place& dst_place, ...@@ -44,6 +44,11 @@ void TensorCopy(const Tensor& src, const platform::Place& dst_place,
<< dst_place; << dst_place;
return; return;
} }
#ifdef PADDLE_WITH_MKLDNN
if (src.layout() == DataLayout::kMKLDNN) {
dst->set_mkldnn_prim_desc(src.get_mkldnn_prim_desc());
}
#endif
memory::Copy(boost::get<platform::CPUPlace>(dst_place), dst_ptr, memory::Copy(boost::get<platform::CPUPlace>(dst_place), dst_ptr,
boost::get<platform::CPUPlace>(src_place), src_ptr, size); boost::get<platform::CPUPlace>(src_place), src_ptr, size);
} }
......
...@@ -27,6 +27,7 @@ namespace framework { ...@@ -27,6 +27,7 @@ namespace framework {
class OperatorBase; class OperatorBase;
class OpDesc; class OpDesc;
class InferShapeContext; class InferShapeContext;
class InferVarTypeContext;
class BlockDesc; class BlockDesc;
class Variable; class Variable;
...@@ -53,7 +54,7 @@ using GradOpMakerFN = std::function<std::vector<std::unique_ptr<OpDesc>>( ...@@ -53,7 +54,7 @@ using GradOpMakerFN = std::function<std::vector<std::unique_ptr<OpDesc>>(
const std::vector<BlockDesc*>& grad_block)>; const std::vector<BlockDesc*>& grad_block)>;
using InferVarTypeFN = using InferVarTypeFN =
std::function<void(const OpDesc& /*op_desc*/, BlockDesc* /*block*/)>; std::function<void(framework::InferVarTypeContext* /*context*/)>;
using InferShapeFN = std::function<void(InferShapeContext*)>; using InferShapeFN = std::function<void(InferShapeContext*)>;
......
...@@ -14,6 +14,8 @@ limitations under the License. */ ...@@ -14,6 +14,8 @@ limitations under the License. */
#pragma once #pragma once
#include <string> #include <string>
#include <unordered_map>
#include <vector>
#include "paddle/fluid/framework/block_desc.h" #include "paddle/fluid/framework/block_desc.h"
#include "paddle/fluid/framework/op_desc.h" #include "paddle/fluid/framework/op_desc.h"
#include "paddle/fluid/framework/type_defs.h" #include "paddle/fluid/framework/type_defs.h"
...@@ -21,26 +23,123 @@ limitations under the License. */ ...@@ -21,26 +23,123 @@ limitations under the License. */
namespace paddle { namespace paddle {
namespace framework { namespace framework {
class OpDesc;
class BlockDesc;
// default infer var type context
class InferVarTypeContext {
public:
InferVarTypeContext(const OpDesc* op, BlockDesc* block)
: op_(op), block_(block) {}
virtual ~InferVarTypeContext() {}
virtual Attribute GetAttr(const std::string& name) const {
PADDLE_ENFORCE_NOT_NULL(op_);
return op_->GetAttr(name);
}
virtual bool HasVar(const std::string& name) const {
PADDLE_ENFORCE_NOT_NULL(block_);
return block_->FindVarRecursive(name) != nullptr;
}
virtual bool HasInput(const std::string& name) const {
PADDLE_ENFORCE_NOT_NULL(op_);
return op_->Inputs().count(name) > 0;
}
virtual bool HasOutput(const std::string& name) const {
PADDLE_ENFORCE_NOT_NULL(op_);
return op_->Outputs().count(name) > 0;
}
virtual const std::vector<std::string>& Input(const std::string& name) const {
PADDLE_ENFORCE_NOT_NULL(op_);
return op_->Input(name);
}
virtual const std::vector<std::string>& Output(
const std::string& name) const {
PADDLE_ENFORCE_NOT_NULL(op_);
return op_->Output(name);
}
virtual proto::VarType::Type GetType(const std::string& name) const {
PADDLE_ENFORCE_NOT_NULL(block_);
return block_->FindRecursiveOrCreateVar(name).GetType();
}
virtual void SetType(const std::string& name, proto::VarType::Type type) {
PADDLE_ENFORCE_NOT_NULL(block_);
block_->FindRecursiveOrCreateVar(name).SetType(type);
}
virtual proto::VarType::Type GetDataType(const std::string& name) const {
PADDLE_ENFORCE_NOT_NULL(block_);
return block_->FindRecursiveOrCreateVar(name).GetDataType();
}
virtual void SetDataType(const std::string& name, proto::VarType::Type type) {
PADDLE_ENFORCE_NOT_NULL(block_);
block_->FindRecursiveOrCreateVar(name).SetDataType(type);
}
virtual std::vector<proto::VarType::Type> GetDataTypes(
const std::string& name) const {
PADDLE_ENFORCE_NOT_NULL(block_);
return block_->FindRecursiveOrCreateVar(name).GetDataTypes();
}
virtual void SetDataTypes(
const std::string& name,
const std::vector<proto::VarType::Type>& multiple_data_type) {
PADDLE_ENFORCE_NOT_NULL(block_);
block_->FindRecursiveOrCreateVar(name).SetDataTypes(multiple_data_type);
}
virtual std::vector<int64_t> GetShape(const std::string& name) const {
PADDLE_ENFORCE_NOT_NULL(block_);
return block_->FindRecursiveOrCreateVar(name).GetShape();
}
virtual void SetShape(const std::string& name,
const std::vector<int64_t>& dims) {
PADDLE_ENFORCE_NOT_NULL(block_);
block_->FindRecursiveOrCreateVar(name).SetShape(dims);
}
virtual int32_t GetLoDLevel(const std::string& name) const {
PADDLE_ENFORCE_NOT_NULL(block_);
return block_->FindRecursiveOrCreateVar(name).GetLoDLevel();
}
virtual void SetLoDLevel(const std::string& name, int32_t lod_level) {
PADDLE_ENFORCE_NOT_NULL(block_);
block_->FindRecursiveOrCreateVar(name).SetLoDLevel(lod_level);
}
protected:
const OpDesc* op_;
BlockDesc* block_;
};
class VarTypeInference { class VarTypeInference {
public: public:
virtual ~VarTypeInference() {} virtual ~VarTypeInference() {}
virtual void operator()(const OpDesc& op_desc, BlockDesc* block) const = 0; virtual void operator()(InferVarTypeContext* context) const = 0; // NOLINT
}; };
class PassInDtypeAndVarTypeToOutput : public framework::VarTypeInference { class PassInDtypeAndVarTypeToOutput : public framework::VarTypeInference {
public: public:
void operator()(const framework::OpDesc& op_desc, void operator()(framework::InferVarTypeContext* ctx) const final { // NOLINT
framework::BlockDesc* block) const final {
auto in_out_var_names = this->GetInputOutputWithSameType(); auto in_out_var_names = this->GetInputOutputWithSameType();
for (auto& i_o_n : in_out_var_names) { for (auto& i_o_n : in_out_var_names) {
auto& x_name = op_desc.Input(i_o_n.first).at(0); auto& x_name = ctx->Input(i_o_n.first).at(0);
auto& out_name = op_desc.Output(i_o_n.second).at(0); auto& out_name = ctx->Output(i_o_n.second).at(0);
auto& x = block->FindRecursiveOrCreateVar(x_name); ctx->SetType(out_name, ctx->GetType(x_name));
auto& out = block->FindRecursiveOrCreateVar(out_name); ctx->SetDataType(out_name, ctx->GetDataType(x_name));
out.SetType(x.GetType());
out.SetDataType(x.GetDataType());
} }
} }
......
...@@ -44,20 +44,20 @@ class SumOpMaker : public OpProtoAndCheckerMaker { ...@@ -44,20 +44,20 @@ class SumOpMaker : public OpProtoAndCheckerMaker {
class SumOpVarTypeInference : public VarTypeInference { class SumOpVarTypeInference : public VarTypeInference {
public: public:
void operator()(const OpDesc &op_desc, BlockDesc *block) const override { void operator()(framework::InferVarTypeContext *ctx) const override {
auto &inputs = op_desc.Input("X"); auto &inputs = ctx->Input("X");
auto default_var_type = proto::VarType::SELECTED_ROWS; auto default_var_type = proto::VarType::SELECTED_ROWS;
bool any_input_is_lod_tensor = std::any_of( bool any_input_is_lod_tensor = std::any_of(
inputs.begin(), inputs.end(), [block](const std::string &name) { inputs.begin(), inputs.end(), [&ctx](const std::string &name) {
return block->Var(name)->GetType() == proto::VarType::LOD_TENSOR; return ctx->GetType(name) == proto::VarType::LOD_TENSOR;
}); });
if (any_input_is_lod_tensor) { if (any_input_is_lod_tensor) {
default_var_type = proto::VarType::LOD_TENSOR; default_var_type = proto::VarType::LOD_TENSOR;
} }
auto out_var_name = op_desc.Output("Out").front(); auto out_var_name = ctx->Output("Out").front();
block->Var(out_var_name)->SetType(default_var_type); ctx->SetType(out_var_name, default_var_type);
} }
}; };
} // namespace framework } // namespace framework
......
...@@ -2,4 +2,5 @@ if(WITH_PYTHON) ...@@ -2,4 +2,5 @@ if(WITH_PYTHON)
cc_library(layer SRCS layer.cc DEPS proto_desc operator device_context blas pybind) cc_library(layer SRCS layer.cc DEPS proto_desc operator device_context blas pybind)
cc_library(tracer SRCS tracer.cc DEPS proto_desc device_context pybind) cc_library(tracer SRCS tracer.cc DEPS proto_desc device_context pybind)
cc_library(engine SRCS engine.cc) cc_library(engine SRCS engine.cc)
cc_library(imperative_profiler SRCS profiler.cc)
endif() endif()
...@@ -214,13 +214,11 @@ framework::LoDTensor& VarBase::GradValue() { ...@@ -214,13 +214,11 @@ framework::LoDTensor& VarBase::GradValue() {
} }
std::map<std::string, std::vector<VarBase*>> OpBase::ApplyGrad() { std::map<std::string, std::vector<VarBase*>> OpBase::ApplyGrad() {
if (grad_op_descs_.empty() && backward_id_ <= 0) { PADDLE_ENFORCE(!grad_op_descs_.empty() || backward_id_ > 0,
VLOG(3) << "op with no grad: " << Type(); "%s has no backward implementation", Type());
return {};
}
VLOG(3) << "apply op grad: " << Type(); VLOG(3) << "apply op grad: " << Type();
std::vector<framework::VariableValueMap> tmp_grad_outputs; std::vector<VarBasePtrMap> tmp_grad_outputs;
if (backward_id_ > 0) { if (backward_id_ > 0) {
VLOG(3) << "py_layer_grad"; VLOG(3) << "py_layer_grad";
tmp_grad_outputs.resize(1); tmp_grad_outputs.resize(1);
...@@ -239,30 +237,66 @@ std::map<std::string, std::vector<VarBase*>> OpBase::ApplyGrad() { ...@@ -239,30 +237,66 @@ std::map<std::string, std::vector<VarBase*>> OpBase::ApplyGrad() {
VLOG(3) << "apply grad op " << grad_op_desc->Type(); VLOG(3) << "apply grad op " << grad_op_desc->Type();
// Allocate tmp grad output variable // Allocate tmp grad output variable
for (auto it : grad_output_variable_map) { for (const auto& it : grad_output_variable_map) {
auto& outputs = tmp_grad_outputs[k][it.first]; auto& outputs = tmp_grad_outputs[k][it.first];
outputs.reserve(it.second.size()); outputs.reserve(it.second.size());
for (size_t i = 0; i < it.second.size(); ++i) { for (size_t i = 0; i < it.second.size(); ++i) {
VarBase* origin_grad_var_base = it.second[i];
// Allocate a new variable // Allocate a new variable
Variable* tmp_var = new framework::Variable(); VarBase* tmp_grad_var_base = new VarBase(
tmp_var->GetMutable<framework::LoDTensor>(); string::Sprintf("%s@IGrad", origin_grad_var_base->Name()),
outputs.emplace_back(tmp_var); origin_grad_var_base->DataType(), origin_grad_var_base->Dims(),
place_, true, false);
outputs.emplace_back(tmp_grad_var_base);
} }
} }
// Run grad op
framework::RuntimeContext ctx(grad_input_vars_[k], tmp_grad_outputs[k]);
// No need to do compile time infer shape here. // No need to do compile time infer shape here.
// grad_op_desc_->InferShape(*block_); // grad_op_desc_->InferShape(*block_);
// grad_op_desc->InferVarType(block_); // grad_op_desc->InferVarType(block_);
std::unique_ptr<framework::OperatorBase> opbase = std::unique_ptr<framework::OperatorBase> opbase =
framework::OpRegistry::CreateOp(*grad_op_desc); framework::OpRegistry::CreateOp(*grad_op_desc);
auto& info = framework::OpInfoMap::Instance().Get(grad_op_desc->Type());
if (info.infer_var_type_) {
RuntimeInferVarTypeContext infer_var_type_ctx(
&grad_input_vars_[k], &tmp_grad_outputs[k], &attrs_);
info.infer_var_type_(&infer_var_type_ctx);
}
framework::OperatorWithKernel* op_kernel = framework::OperatorWithKernel* op_kernel =
dynamic_cast<framework::OperatorWithKernel*>(opbase.get()); dynamic_cast<framework::OperatorWithKernel*>(opbase.get());
PADDLE_ENFORCE_NOT_NULL(op_kernel, "only support op with kernel"); PADDLE_ENFORCE_NOT_NULL(op_kernel, "only support op with kernel");
// Run grad op
framework::VariableValueMap grad_invars_map;
framework::VariableValueMap grad_outvars_map;
for (const auto& it : grad_input_vars_[k]) {
auto& grad_invars = grad_invars_map[it.first];
grad_invars.reserve(it.second.size());
for (const VarBase* grad_inp : it.second) {
PADDLE_ENFORCE_NOT_NULL(grad_inp->var_, "op %s input %s nullptr",
grad_op_desc->Type(), grad_inp->Name());
grad_invars.emplace_back(grad_inp->var_);
}
}
for (const auto& it : tmp_grad_outputs[k]) {
auto& grad_outvars = grad_outvars_map[it.first];
grad_outvars.reserve(it.second.size());
for (VarBase* grad_out : it.second) {
PADDLE_ENFORCE_NOT_NULL(grad_out->var_, "op %s output %s nullptr",
grad_op_desc->Type(), grad_out->Name());
grad_outvars.emplace_back(grad_out->var_);
}
}
framework::RuntimeContext ctx(grad_invars_map, grad_outvars_map);
framework::Scope scope; framework::Scope scope;
PreparedOp p = PreparedOp::Prepare(ctx, *op_kernel, place_); PreparedOp p = PreparedOp::Prepare(ctx, *op_kernel, place_);
p.op.RuntimeInferShape(scope, place_, ctx); p.op.RuntimeInferShape(scope, place_, ctx);
...@@ -273,14 +307,14 @@ std::map<std::string, std::vector<VarBase*>> OpBase::ApplyGrad() { ...@@ -273,14 +307,14 @@ std::map<std::string, std::vector<VarBase*>> OpBase::ApplyGrad() {
// Add tmp grad outputs to original grad vars // Add tmp grad outputs to original grad vars
for (size_t k = 0; k < grad_output_vars_.size(); ++k) { for (size_t k = 0; k < grad_output_vars_.size(); ++k) {
for (auto it : grad_output_vars_[k]) { for (const auto& it : grad_output_vars_[k]) {
auto& outputs = tmp_grad_outputs[k][it.first]; auto& outputs = tmp_grad_outputs[k][it.first];
auto& origin_outputs = it.second; const auto& origin_outputs = it.second;
PADDLE_ENFORCE_EQ(outputs.size(), origin_outputs.size()); PADDLE_ENFORCE_EQ(outputs.size(), origin_outputs.size());
for (size_t i = 0; i < outputs.size(); ++i) { for (size_t i = 0; i < outputs.size(); ++i) {
framework::Variable* grad = outputs[i]; framework::Variable* grad = outputs[i]->var_;
framework::Variable* orig_grad = origin_outputs[i]; framework::Variable* orig_grad = origin_outputs[i]->var_;
AddTo(grad, orig_grad, place_); AddTo(grad, orig_grad, place_);
delete grad; delete grad;
} }
...@@ -328,28 +362,35 @@ void PyLayer::RegisterFunc(int func_id, const py::object& py_func) { ...@@ -328,28 +362,35 @@ void PyLayer::RegisterFunc(int func_id, const py::object& py_func) {
int PyLayer::NumFuncs() { return py_funcs_.size(); } int PyLayer::NumFuncs() { return py_funcs_.size(); }
std::vector<Variable*> PyLayer::Apply(int func_id, std::vector<framework::Variable*> PyLayer::Apply(
const std::vector<VarBase*>& inputs) { int func_id, const std::vector<VarBase*>& inputs) {
std::vector<framework::Variable*> invars;
for (const VarBase* in : inputs) {
invars.push_back(in->var_);
}
PADDLE_ENFORCE(py_funcs_.find(func_id) != py_funcs_.end()); PADDLE_ENFORCE(py_funcs_.find(func_id) != py_funcs_.end());
return CallPythonFunc(py_funcs_[func_id], invars); return CallPythonFunc(py_funcs_[func_id], inputs);
} }
std::vector<Variable*> PyLayer::ApplyGrad( std::vector<VarBase*> PyLayer::ApplyGrad(int func_id,
int func_id, const std::vector<framework::Variable*>& inputs) { const std::vector<VarBase*>& inputs) {
PADDLE_ENFORCE(py_funcs_.find(func_id) != py_funcs_.end()); PADDLE_ENFORCE(py_funcs_.find(func_id) != py_funcs_.end());
return CallPythonFunc(py_funcs_[func_id], inputs); auto rets = CallPythonFunc(py_funcs_[func_id], inputs);
std::vector<VarBase*> outs;
outs.reserve(rets.size());
for (size_t i = 0U; i != rets.size(); ++i) {
outs.emplace_back(new VarBase(
string::Sprintf("%s_out_%d", framework::GradVarName(PyLayer::kFwdOut),
i),
rets[i], nullptr, true));
}
return outs;
} }
std::vector<framework::Variable*> PyLayer::CallPythonFunc( std::vector<framework::Variable*> PyLayer::CallPythonFunc(
const py::object& callable, const std::vector<framework::Variable*>& ins) { const py::object& callable, const std::vector<VarBase*>& ins) {
py::gil_scoped_acquire guard; py::gil_scoped_acquire guard;
py::tuple in_args(ins.size()); py::tuple in_args(ins.size());
for (size_t i = 0; i < ins.size(); ++i) { for (size_t i = 0; i < ins.size(); ++i) {
const framework::LoDTensor& t = ins[i]->Get<framework::LoDTensor>(); const framework::LoDTensor& t = ins[i]->var_->Get<framework::LoDTensor>();
in_args[i] = t.IsInitialized() ? py::cast(t) : py::cast(nullptr); in_args[i] = t.IsInitialized() ? py::cast(t) : py::cast(nullptr);
} }
VLOG(3) << "pyfunc in " << py::len(in_args); VLOG(3) << "pyfunc in " << py::len(in_args);
...@@ -359,6 +400,7 @@ std::vector<framework::Variable*> PyLayer::CallPythonFunc( ...@@ -359,6 +400,7 @@ std::vector<framework::Variable*> PyLayer::CallPythonFunc(
auto ret_tuple = py::cast<py::tuple>(ret); auto ret_tuple = py::cast<py::tuple>(ret);
size_t ret_num = py::len(ret_tuple); size_t ret_num = py::len(ret_tuple);
std::vector<framework::Variable*> outs; std::vector<framework::Variable*> outs;
outs.reserve(ret_num);
VLOG(3) << "pyfunc out " << ret_num; VLOG(3) << "pyfunc out " << ret_num;
for (size_t i = 0; i < ret_num; ++i) { for (size_t i = 0; i < ret_num; ++i) {
try { try {
...@@ -369,7 +411,7 @@ std::vector<framework::Variable*> PyLayer::CallPythonFunc( ...@@ -369,7 +411,7 @@ std::vector<framework::Variable*> PyLayer::CallPythonFunc(
auto* tensor = var->GetMutable<framework::LoDTensor>(); auto* tensor = var->GetMutable<framework::LoDTensor>();
tensor->ShareDataWith(*py_out_tensor); tensor->ShareDataWith(*py_out_tensor);
tensor->set_lod(py_out_tensor->lod()); tensor->set_lod(py_out_tensor->lod());
outs.push_back(var); outs.emplace_back(var);
} catch (py::cast_error&) { } catch (py::cast_error&) {
PADDLE_THROW("The %d-th output must be LoDTensor", i); PADDLE_THROW("The %d-th output must be LoDTensor", i);
} }
......
...@@ -18,14 +18,16 @@ ...@@ -18,14 +18,16 @@
#include "paddle/fluid/framework/python_headers.h" #include "paddle/fluid/framework/python_headers.h"
// clang-format on // clang-format on
#include <map> // NOLINT #include <map> // NOLINT
#include <string> // NOLINT #include <string> // NOLINT
#include <vector> // NOLINT #include <vector> // NOLINT
#include <memory> // NOLINT #include <memory> // NOLINT
#include <unordered_map> // NOLINT
#include "paddle/fluid/framework/op_desc.h" #include "paddle/fluid/framework/op_desc.h"
#include "paddle/fluid/framework/operator.h" #include "paddle/fluid/framework/operator.h"
#include "paddle/fluid/framework/var_desc.h" #include "paddle/fluid/framework/var_desc.h"
#include "paddle/fluid/framework/var_type_inference.h"
#include "paddle/fluid/platform/enforce.h" #include "paddle/fluid/platform/enforce.h"
#include "paddle/fluid/platform/device_context.h" #include "paddle/fluid/platform/device_context.h"
#include "paddle/fluid/operators/math/math_function.h" #include "paddle/fluid/operators/math/math_function.h"
...@@ -135,13 +137,13 @@ class VarBase { ...@@ -135,13 +137,13 @@ class VarBase {
persistable) {} persistable) {}
private: private:
// TODO(minqiyang): need support SelectedRows
VarBase(const std::string& name, framework::proto::VarType::Type dtype, VarBase(const std::string& name, framework::proto::VarType::Type dtype,
const framework::DDim& shape, const platform::Place& place, const framework::DDim& shape, const platform::Place& place,
framework::Variable* var, VarBase* grad, bool stop_gradient, framework::Variable* var, VarBase* grad, bool stop_gradient,
bool persistable) bool persistable)
: name_(name), : name_(name),
dtype_(dtype), type_(framework::proto::VarType::LOD_TENSOR),
place_(place),
var_(var), var_(var),
grads_(grad), grads_(grad),
stop_gradient_(stop_gradient), stop_gradient_(stop_gradient),
...@@ -151,10 +153,12 @@ class VarBase { ...@@ -151,10 +153,12 @@ class VarBase {
pre_op_out_idx_(-1) { pre_op_out_idx_(-1) {
if (!var_) { if (!var_) {
var_ = new framework::Variable(); var_ = new framework::Variable();
auto tensor = var_->GetMutable<framework::LoDTensor>();
tensor->Resize(shape);
tensor->mutable_data(place_, dtype_);
} }
auto tensor = var_->GetMutable<framework::LoDTensor>();
tensor->Resize(shape);
tensor->mutable_data(place, dtype);
VLOG(10) << "create varbase: " << name_ << " type: " << dtype
<< " place: " << place;
} }
public: public:
...@@ -184,7 +188,23 @@ class VarBase { ...@@ -184,7 +188,23 @@ class VarBase {
} }
} }
inline framework::proto::VarType::Type DType() const { return dtype_; } inline framework::DDim Dims() const {
return var_->Get<framework::LoDTensor>().dims();
}
// data type. e.g.. FP32
inline void SetDataType(framework::proto::VarType::Type type) {
auto tensor = var_->GetMutable<framework::LoDTensor>();
tensor->mutable_data(tensor->place(), type);
}
inline framework::proto::VarType::Type DataType() const {
auto tensor = var_->Get<framework::LoDTensor>();
return tensor.type();
}
// tensor type. e.g.. LoDTensor
inline void SetType(framework::proto::VarType::Type type) { type_ = type; }
inline framework::proto::VarType::Type Type() const { return type_; }
inline void SetStopGradient(bool stop_gradient) { inline void SetStopGradient(bool stop_gradient) {
stop_gradient_ = stop_gradient; stop_gradient_ = stop_gradient;
...@@ -238,7 +258,7 @@ class VarBase { ...@@ -238,7 +258,7 @@ class VarBase {
} }
std::string name_; std::string name_;
framework::proto::VarType::Type dtype_; framework::proto::VarType::Type type_;
platform::Place place_; platform::Place place_;
framework::Variable* var_; framework::Variable* var_;
...@@ -294,17 +314,23 @@ class PYBIND11_HIDDEN OpBase { ...@@ -294,17 +314,23 @@ class PYBIND11_HIDDEN OpBase {
void InvokeBackwardHooks(); void InvokeBackwardHooks();
void TrackPreOp(const VarBase* inp_var, const std::string& inp_name) { void TrackPreOp(const std::string& inp_name,
if (inp_var->PreOp() && !inp_var->IsStopGradient()) { const std::vector<VarBase*>& inputs) {
VLOG(3) << "add pre op " << inp_var->PreOp()->Type() << " in slot " auto& pre_ops_list = pre_ops_[inp_name];
<< inp_name; pre_ops_list.reserve(inputs.size());
pre_ops_[inp_name].push_back(inp_var->PreOp()); auto& pre_ops_out_idx_list = pre_ops_out_idx_[inp_name];
pre_ops_out_idx_[inp_name].push_back(inp_var->PreOpOutIdx()); for (VarBase* inp_var : inputs) {
} else { if (inp_var->PreOp() && !inp_var->IsStopGradient()) {
VLOG(3) << "no pre op in slot " << inp_name VLOG(3) << "add pre op " << inp_var->PreOp()->Type() << " in slot "
<< " input var stop_gradient: " << inp_var->IsStopGradient(); << inp_name;
pre_ops_[inp_name].push_back(nullptr); pre_ops_list.emplace_back(inp_var->PreOp());
// pre_ops_out_idx_[inp_name].push_back(-1); pre_ops_out_idx_list.push_back(inp_var->PreOpOutIdx());
} else {
VLOG(3) << "no pre op in slot " << inp_name
<< " input var stop_gradient: " << inp_var->IsStopGradient();
pre_ops_list.emplace_back(nullptr);
// pre_ops_out_idx_list.push_back(-1);
}
} }
} }
...@@ -328,11 +354,13 @@ class PYBIND11_HIDDEN OpBase { ...@@ -328,11 +354,13 @@ class PYBIND11_HIDDEN OpBase {
std::map<std::string, std::vector<int>> pre_ops_out_idx_; std::map<std::string, std::vector<int>> pre_ops_out_idx_;
// Inputs to a vector of bwd ops. // Inputs to a vector of bwd ops.
std::vector<framework::VariableValueMap> grad_input_vars_; std::vector<VarBasePtrMap> grad_input_vars_;
// Outputs to a vector of bwd ops. // Outputs to a vector of bwd ops.
std::vector<framework::VariableValueMap> grad_output_vars_; std::vector<VarBasePtrMap> grad_output_vars_;
std::vector<py::object> backward_hooks_; std::vector<py::object> backward_hooks_;
framework::AttributeMap attrs_;
}; };
class Layer { class Layer {
...@@ -359,12 +387,131 @@ class PyLayer { ...@@ -359,12 +387,131 @@ class PyLayer {
static std::vector<framework::Variable*> Apply( static std::vector<framework::Variable*> Apply(
int func_id, const std::vector<VarBase*>& inputs); int func_id, const std::vector<VarBase*>& inputs);
static std::vector<framework::Variable*> ApplyGrad( static std::vector<VarBase*> ApplyGrad(int func_id,
int func_id, const std::vector<framework::Variable*>& inputs); const std::vector<VarBase*>& inputs);
private: private:
static std::vector<framework::Variable*> CallPythonFunc( static std::vector<framework::Variable*> CallPythonFunc(
const py::object& callable, const std::vector<framework::Variable*>& ins); const py::object& callable, const std::vector<VarBase*>& ins);
};
// infer var type context for imperative mode
class PYBIND11_HIDDEN RuntimeInferVarTypeContext
: public framework::InferVarTypeContext {
public:
RuntimeInferVarTypeContext(const imperative::VarBasePtrMap* inputs,
imperative::VarBasePtrMap* outputs,
const framework::AttributeMap* attrs_map)
: InferVarTypeContext(nullptr, nullptr),
inputs_(inputs),
outputs_(outputs),
attrs_(attrs_map),
input_names_(),
output_names_(),
var_set_() {
input_names_.reserve(inputs_->size());
for (auto& it : *inputs_) {
for (imperative::VarBase* var : it.second) {
input_names_[it.first].emplace_back(var->Name());
var_set_[var->Name()] = var;
}
}
output_names_.reserve(outputs_->size());
for (auto& it : *outputs_) {
for (imperative::VarBase* var : it.second) {
output_names_[it.first].emplace_back(var->Name());
var_set_[var->Name()] = var;
}
}
}
virtual ~RuntimeInferVarTypeContext() {}
framework::Attribute GetAttr(const std::string& name) const override {
PADDLE_ENFORCE_NOT_NULL(attrs_);
return attrs_->at(name);
}
bool HasVar(const std::string& name) const override {
return var_set_.count(name) > 0;
}
bool HasInput(const std::string& name) const override {
PADDLE_ENFORCE_NOT_NULL(inputs_);
return inputs_->count(name) > 0;
}
bool HasOutput(const std::string& name) const override {
PADDLE_ENFORCE_NOT_NULL(outputs_);
return outputs_->count(name) > 0;
}
const std::vector<std::string>& Input(
const std::string& name) const override {
return input_names_.at(name);
}
const std::vector<std::string>& Output(
const std::string& name) const override {
return output_names_.at(name);
}
framework::proto::VarType::Type GetType(
const std::string& name) const override {
return var_set_.at(name)->Type();
}
void SetType(const std::string& name,
framework::proto::VarType::Type type) override {
var_set_[name]->SetType(type);
}
framework::proto::VarType::Type GetDataType(
const std::string& name) const override {
return var_set_.at(name)->DataType();
}
void SetDataType(const std::string& name,
framework::proto::VarType::Type type) override {
var_set_[name]->SetDataType(type);
}
std::vector<framework::proto::VarType::Type> GetDataTypes(
const std::string& name) const override {
PADDLE_THROW("GetDataTypes is not supported in runtime InferVarType");
}
void SetDataTypes(const std::string& name,
const std::vector<framework::proto::VarType::Type>&
multiple_data_type) override {
PADDLE_THROW("SetDataTypes is not supported in runtime InferVarType");
}
std::vector<int64_t> GetShape(const std::string& name) const override {
PADDLE_THROW("Do not handle Shape in runtime InferVarType");
}
void SetShape(const std::string& name,
const std::vector<int64_t>& dims) override {
PADDLE_THROW("Do not handle Shape in runtime InferVarType");
}
int32_t GetLoDLevel(const std::string& name) const override {
PADDLE_THROW("Do not handle LoDLevel in runtime InferVarType");
}
void SetLoDLevel(const std::string& name, int32_t lod_level) override {
PADDLE_THROW("Do not handle LoDLevel in runtime InferVarType");
}
private:
const imperative::VarBasePtrMap* inputs_;
imperative::VarBasePtrMap* outputs_;
const framework::AttributeMap* attrs_;
std::unordered_map<std::string, std::vector<std::string>> input_names_;
std::unordered_map<std::string, std::vector<std::string>> output_names_;
std::unordered_map<std::string, imperative::VarBase*> var_set_;
}; };
} // namespace imperative } // namespace imperative
......
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/imperative/profiler.h"
#ifdef WITH_GPERFTOOLS
#include "gperftools/profiler.h"
#endif
#include <gflags/gflags.h>
#include <glog/logging.h>
#include <mutex> // NOLINT
#include <thread> // NOLINT
DEFINE_string(
tracer_profile_fname, "xxgperf",
"Profiler filename for imperative tracer, which generated by gperftools."
"Only valid when compiled `WITH_PROFILER=ON`. Empty if disable.");
namespace paddle {
namespace imperative {
static std::once_flag gTracerProfileOnce;
#ifdef WITH_GPERFTOOLS
static bool gTracerProfilerStarted = false;
#endif
void StartProfile() {
if (!FLAGS_tracer_profile_fname.empty()) {
std::call_once(gTracerProfileOnce, [] {
#ifdef WITH_GPERFTOOLS
ProfilerStart(FLAGS_tracer_profile_fname.c_str());
gTracerProfilerStarted = true;
#else
LOG(WARNING) << "Paddle is not compiled with gperftools. "
"FLAGS_tracer_profile_fname will be ignored";
#endif
});
}
}
void StopProfile() {
#ifdef WITH_GPERFTOOLS
ProfilerFlush();
#else
LOG(WARNING) << "Paddle is not compiled with gperftools. "
"FLAGS_tracer_profile_fname will be ignored";
#endif
}
} // namespace imperative
} // namespace paddle
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
namespace paddle {
namespace imperative {
extern void StartProfile();
extern void StopProfile();
} // namespace imperative
} // namespace paddle
...@@ -19,38 +19,26 @@ ...@@ -19,38 +19,26 @@
#include <unordered_map> #include <unordered_map>
#include <unordered_set> #include <unordered_set>
#include "paddle/fluid/framework/var_type_inference.h"
#include "paddle/fluid/operators/math/math_function.h" #include "paddle/fluid/operators/math/math_function.h"
#include "paddle/fluid/platform/device_context.h" #include "paddle/fluid/platform/device_context.h"
#include "paddle/fluid/platform/enforce.h" #include "paddle/fluid/platform/enforce.h"
#ifdef WITH_GPERFTOOLS
#include "gperftools/profiler.h"
#endif
DEFINE_string(
tracer_profile_fname, "",
"Profiler filename for imperative tracer, which generated by gperftools."
"Only valid when compiled `WITH_PROFILER=ON`. Empty if disable.");
namespace paddle { namespace paddle {
namespace imperative { namespace imperative {
static std::once_flag gTracerProfileOnce;
#ifdef WITH_GPERFTOOLS
static bool gTracerProfilerStarted = false;
#endif
void CreateGradOp(const framework::OpDesc& op_desc, void CreateGradOp(const framework::OpDesc& op_desc,
const std::unordered_set<std::string>& no_grad_set, const std::unordered_set<std::string>& no_grad_set,
const std::vector<framework::BlockDesc*>& grad_sub_block, const std::vector<framework::BlockDesc*>& grad_sub_block,
std::vector<framework::OpDesc*>* grad_op_descs, std::vector<framework::OpDesc*>* grad_op_descs,
std::unordered_map<std::string, std::string>* grad_to_var) { std::unordered_map<std::string, std::string>* grad_to_var) {
PADDLE_ENFORCE(grad_op_descs->empty()); PADDLE_ENFORCE(grad_op_descs->empty());
std::vector<std::unique_ptr<framework::OpDesc>> descs = const framework::OpInfo& op_info =
framework::OpInfoMap::Instance() framework::OpInfoMap::Instance().Get(op_desc.Type());
.Get(op_desc.Type()) if (!op_info.grad_op_maker_) return;
.GradOpMaker()(op_desc, no_grad_set, grad_to_var, grad_sub_block);
std::vector<std::unique_ptr<framework::OpDesc>> descs =
op_info.GradOpMaker()(op_desc, no_grad_set, grad_to_var, grad_sub_block);
for (auto& desc : descs) { for (auto& desc : descs) {
grad_op_descs->emplace_back(desc.release()); grad_op_descs->emplace_back(desc.release());
} }
...@@ -145,31 +133,13 @@ framework::VariableNameMap CreateOutputVarNameMap( ...@@ -145,31 +133,13 @@ framework::VariableNameMap CreateOutputVarNameMap(
return result; return result;
} }
Tracer::Tracer(framework::BlockDesc* root_block) : root_block_(root_block) { Tracer::Tracer(framework::BlockDesc* root_block) : root_block_(root_block) {}
if (!FLAGS_tracer_profile_fname.empty()) {
std::call_once(gTracerProfileOnce, [] {
#ifdef WITH_GPERFTOOLS
ProfilerStart(FLAGS_tracer_profile_fname.c_str());
gTracerProfilerStarted = true;
#else
LOG(WARNING) << "Paddle is not compiled with gperftools. "
"FLAGS_tracer_profile_fname will be ignored";
#endif
});
}
}
std::set<std::string> Tracer::Trace(OpBase* op, const VarBasePtrMap& inputs, std::set<std::string> Tracer::Trace(OpBase* op, const VarBasePtrMap& inputs,
const VarBasePtrMap& outputs, VarBasePtrMap* outputs,
framework::AttributeMap attrs_map, framework::AttributeMap attrs_map,
const platform::Place expected_place, const platform::Place expected_place,
const bool stop_gradient) { const bool stop_gradient) {
#ifdef WITH_GPERFTOOLS
if (gTracerProfilerStarted) {
ProfilerFlush();
}
#endif
framework::VariableValueMap invars_map; framework::VariableValueMap invars_map;
framework::VariableValueMap outvars_map; framework::VariableValueMap outvars_map;
...@@ -184,7 +154,6 @@ std::set<std::string> Tracer::Trace(OpBase* op, const VarBasePtrMap& inputs, ...@@ -184,7 +154,6 @@ std::set<std::string> Tracer::Trace(OpBase* op, const VarBasePtrMap& inputs,
inp->Name()); inp->Name());
invars.emplace_back(inp->var_); invars.emplace_back(inp->var_);
op->TrackPreOp(inp, it.first);
if (!stop_gradient) { if (!stop_gradient) {
current_vars_map[inp->Name()] = inp; current_vars_map[inp->Name()] = inp;
} }
...@@ -192,9 +161,10 @@ std::set<std::string> Tracer::Trace(OpBase* op, const VarBasePtrMap& inputs, ...@@ -192,9 +161,10 @@ std::set<std::string> Tracer::Trace(OpBase* op, const VarBasePtrMap& inputs,
<< " inited: " << inp->var_->IsInitialized() << " inited: " << inp->var_->IsInitialized()
<< " stop_grad: " << inp->IsStopGradient(); << " stop_grad: " << inp->IsStopGradient();
} }
op->TrackPreOp(it.first, it.second);
} }
op->output_vars_ = outputs; op->output_vars_ = *outputs;
for (auto it : op->output_vars_) { for (auto it : op->output_vars_) {
auto& outvars = outvars_map[it.first]; auto& outvars = outvars_map[it.first];
const std::vector<VarBase*>& outputs = it.second; const std::vector<VarBase*>& outputs = it.second;
...@@ -217,7 +187,7 @@ std::set<std::string> Tracer::Trace(OpBase* op, const VarBasePtrMap& inputs, ...@@ -217,7 +187,7 @@ std::set<std::string> Tracer::Trace(OpBase* op, const VarBasePtrMap& inputs,
framework::VariableNameMap invars_name_map = framework::VariableNameMap invars_name_map =
CreateInputVarNameMap(op, inputs); CreateInputVarNameMap(op, inputs);
framework::VariableNameMap outvars_name_map = framework::VariableNameMap outvars_name_map =
CreateOutputVarNameMap(op, outputs); CreateOutputVarNameMap(op, *outputs);
auto& info = framework::OpInfoMap::Instance().Get(op->Type()); auto& info = framework::OpInfoMap::Instance().Get(op->Type());
if (info.Checker() != nullptr) { if (info.Checker() != nullptr) {
...@@ -228,6 +198,11 @@ std::set<std::string> Tracer::Trace(OpBase* op, const VarBasePtrMap& inputs, ...@@ -228,6 +198,11 @@ std::set<std::string> Tracer::Trace(OpBase* op, const VarBasePtrMap& inputs,
framework::OpRegistry::CreateOp(op->Type(), invars_name_map, framework::OpRegistry::CreateOp(op->Type(), invars_name_map,
outvars_name_map, attrs_map); outvars_name_map, attrs_map);
if (info.infer_var_type_) {
RuntimeInferVarTypeContext infer_var_type_ctx(&inputs, outputs, &attrs_map);
info.infer_var_type_(&infer_var_type_ctx);
}
// TODO(minqiyang): Support infer var type in imperative mode // TODO(minqiyang): Support infer var type in imperative mode
// Run forward op // Run forward op
VLOG(3) << "tracer running " << op->Type(); VLOG(3) << "tracer running " << op->Type();
...@@ -252,6 +227,7 @@ std::set<std::string> Tracer::Trace(OpBase* op, const VarBasePtrMap& inputs, ...@@ -252,6 +227,7 @@ std::set<std::string> Tracer::Trace(OpBase* op, const VarBasePtrMap& inputs,
VLOG(5) << "start construct backward op"; VLOG(5) << "start construct backward op";
// construct grad op descs // construct grad op descs
op->attrs_ = attrs_map;
std::unique_ptr<framework::OpDesc> fwd_op_desc(new framework::OpDesc( std::unique_ptr<framework::OpDesc> fwd_op_desc(new framework::OpDesc(
op->Type(), invars_name_map, outvars_name_map, attrs_map)); op->Type(), invars_name_map, outvars_name_map, attrs_map));
std::unique_ptr<std::unordered_map<std::string, std::string>> grad_to_var( std::unique_ptr<std::unordered_map<std::string, std::string>> grad_to_var(
...@@ -278,12 +254,12 @@ std::set<std::string> Tracer::Trace(OpBase* op, const VarBasePtrMap& inputs, ...@@ -278,12 +254,12 @@ std::set<std::string> Tracer::Trace(OpBase* op, const VarBasePtrMap& inputs,
auto fwd_var_it = current_vars_map.find(grad_invar); auto fwd_var_it = current_vars_map.find(grad_invar);
PADDLE_ENFORCE(fwd_var_it != current_vars_map.end()); PADDLE_ENFORCE(fwd_var_it != current_vars_map.end());
// Forward inputs or outputs. // Forward inputs or outputs.
grad_in_vars.emplace_back(fwd_var_it->second->var_); grad_in_vars.emplace_back(fwd_var_it->second);
} else { } else {
VarBase* var = current_vars_map[var_it->second]; VarBase* var = current_vars_map[var_it->second];
InitGrad(var, prepared_op.GetDeviceContext()); InitGrad(var, prepared_op.GetDeviceContext());
// Douts. // Douts.
grad_in_vars.emplace_back(var->grads_->var_); grad_in_vars.emplace_back(var->grads_);
} }
vars_saved_for_backward.insert(it.first); vars_saved_for_backward.insert(it.first);
...@@ -300,7 +276,7 @@ std::set<std::string> Tracer::Trace(OpBase* op, const VarBasePtrMap& inputs, ...@@ -300,7 +276,7 @@ std::set<std::string> Tracer::Trace(OpBase* op, const VarBasePtrMap& inputs,
op->Type()); op->Type());
VarBase* var = current_vars_map[var_it->second]; VarBase* var = current_vars_map[var_it->second];
InitGrad(var, prepared_op.GetDeviceContext()); InitGrad(var, prepared_op.GetDeviceContext());
grad_out_vars.push_back(var->grads_->var_); grad_out_vars.push_back(var->grads_);
} }
} }
} }
...@@ -319,9 +295,7 @@ std::vector<VarBase*> Tracer::PyTrace(OpBase* op, ...@@ -319,9 +295,7 @@ std::vector<VarBase*> Tracer::PyTrace(OpBase* op,
std::vector<framework::Variable*> ret_vars = std::vector<framework::Variable*> ret_vars =
PyLayer::Apply(op->forward_id_, inputs); PyLayer::Apply(op->forward_id_, inputs);
for (VarBase* inp : inputs) { op->TrackPreOp(PyLayer::kFwdInp, inputs);
op->TrackPreOp(inp, PyLayer::kFwdInp);
}
std::vector<VarBase*>& outputs = op->output_vars_[PyLayer::kFwdOut]; std::vector<VarBase*>& outputs = op->output_vars_[PyLayer::kFwdOut];
outputs.reserve(ret_vars.size()); outputs.reserve(ret_vars.size());
...@@ -342,23 +316,23 @@ std::vector<VarBase*> Tracer::PyTrace(OpBase* op, ...@@ -342,23 +316,23 @@ std::vector<VarBase*> Tracer::PyTrace(OpBase* op,
auto& grad_output_vars = auto& grad_output_vars =
op->grad_output_vars_[0][framework::GradVarName(PyLayer::kFwdOut)]; op->grad_output_vars_[0][framework::GradVarName(PyLayer::kFwdOut)];
for (const VarBase* inp : inputs) { for (VarBase* inp : inputs) {
grad_input_vars.push_back(inp->var_); grad_input_vars.push_back(inp);
} }
for (VarBase* out : outputs) { for (VarBase* out : outputs) {
grad_input_vars.push_back(out->var_); grad_input_vars.push_back(out);
} }
// TODO(minqiyang): Add GPU support for PyLayer, only support CPU now // TODO(minqiyang): Add GPU support for PyLayer, only support CPU now
platform::CPUPlace place; platform::CPUPlace place;
for (VarBase* out : outputs) { for (VarBase* out : outputs) {
InitGrad(out, platform::DeviceContextPool::Instance().Get(place)); InitGrad(out, platform::DeviceContextPool::Instance().Get(place));
grad_input_vars.push_back(out->grads_->var_); grad_input_vars.push_back(out->grads_);
} }
for (VarBase* inp : inputs) { for (VarBase* inp : inputs) {
InitGrad(inp, platform::DeviceContextPool::Instance().Get(place)); InitGrad(inp, platform::DeviceContextPool::Instance().Get(place));
grad_output_vars.push_back(inp->grads_->var_); grad_output_vars.push_back(inp->grads_);
} }
} }
return outputs; return outputs;
......
...@@ -48,7 +48,7 @@ class Tracer { ...@@ -48,7 +48,7 @@ class Tracer {
virtual ~Tracer() {} virtual ~Tracer() {}
std::set<std::string> Trace(OpBase* op, const VarBasePtrMap& inputs, std::set<std::string> Trace(OpBase* op, const VarBasePtrMap& inputs,
const VarBasePtrMap& outputs, VarBasePtrMap* outputs, // NOLINT
framework::AttributeMap attrs_map, framework::AttributeMap attrs_map,
const platform::Place expected_place, const platform::Place expected_place,
const bool stop_gradient = false); const bool stop_gradient = false);
......
...@@ -25,6 +25,7 @@ class VarBase; ...@@ -25,6 +25,7 @@ class VarBase;
class OpBase; class OpBase;
typedef std::map<std::string, std::vector<VarBase*>> VarBasePtrMap; typedef std::map<std::string, std::vector<VarBase*>> VarBasePtrMap;
typedef std::map<std::string, std::vector<const VarBase*>> ConstVarBasePtrMap;
typedef std::map<std::string, std::vector<OpBase*>> OpBasePtrMap; typedef std::map<std::string, std::vector<OpBase*>> OpBasePtrMap;
} // namespace imperative } // namespace imperative
......
...@@ -91,5 +91,5 @@ if(WITH_TESTING) ...@@ -91,5 +91,5 @@ if(WITH_TESTING)
add_subdirectory(tests/book) add_subdirectory(tests/book)
if(WITH_INFERENCE_API_TEST) if(WITH_INFERENCE_API_TEST)
add_subdirectory(tests/api) add_subdirectory(tests/api)
endif() endif()
endif() endif()
...@@ -110,7 +110,7 @@ set(TRANSFORMER_INSTALL_DIR "${INFERENCE_DEMO_INSTALL_DIR}/transformer") ...@@ -110,7 +110,7 @@ set(TRANSFORMER_INSTALL_DIR "${INFERENCE_DEMO_INSTALL_DIR}/transformer")
download_model_and_data(${TRANSFORMER_INSTALL_DIR} "temp%2Ftransformer_model.tar.gz" "temp%2Ftransformer_data.txt.tar.gz") download_model_and_data(${TRANSFORMER_INSTALL_DIR} "temp%2Ftransformer_model.tar.gz" "temp%2Ftransformer_data.txt.tar.gz")
inference_analysis_test(test_analyzer_transformer SRCS analyzer_transformer_tester.cc inference_analysis_test(test_analyzer_transformer SRCS analyzer_transformer_tester.cc
EXTRA_DEPS ${INFERENCE_EXTRA_DEPS} EXTRA_DEPS ${INFERENCE_EXTRA_DEPS}
ARGS --infer_model=${TRANSFORMER_INSTALL_DIR}/model --infer_data=${TRANSFORMER_INSTALL_DIR}/data.txt --batch_size=8) ARGS --infer_model=${TRANSFORMER_INSTALL_DIR}/model --infer_data=${TRANSFORMER_INSTALL_DIR}/data.txt --batch_size=8 SERIAL)
# ocr # ocr
set(OCR_INSTALL_DIR "${INFERENCE_DEMO_INSTALL_DIR}/ocr") set(OCR_INSTALL_DIR "${INFERENCE_DEMO_INSTALL_DIR}/ocr")
......
...@@ -183,10 +183,13 @@ void SetInput(std::vector<std::vector<PaddleTensor>> *inputs) { ...@@ -183,10 +183,13 @@ void SetInput(std::vector<std::vector<PaddleTensor>> *inputs) {
} }
// Easy for profiling independently. // Easy for profiling independently.
TEST(Analyzer_Transformer, profile) { void profile(bool use_mkldnn = false) {
AnalysisConfig cfg; AnalysisConfig cfg;
SetConfig(&cfg); SetConfig(&cfg);
std::vector<PaddleTensor> outputs; std::vector<PaddleTensor> outputs;
if (use_mkldnn) {
cfg.EnableMKLDNN();
}
std::vector<std::vector<PaddleTensor>> input_slots_all; std::vector<std::vector<PaddleTensor>> input_slots_all;
SetInput(&input_slots_all); SetInput(&input_slots_all);
...@@ -194,6 +197,11 @@ TEST(Analyzer_Transformer, profile) { ...@@ -194,6 +197,11 @@ TEST(Analyzer_Transformer, profile) {
input_slots_all, &outputs, FLAGS_num_threads); input_slots_all, &outputs, FLAGS_num_threads);
} }
TEST(Analyzer_Transformer, profile) { profile(); }
#ifdef PADDLE_WITH_MKLDNN
TEST(Analyzer_Transformer, profile_mkldnn) { profile(true); }
#endif
// Check the fuse status // Check the fuse status
TEST(Analyzer_Transformer, fuse_statis) { TEST(Analyzer_Transformer, fuse_statis) {
AnalysisConfig cfg; AnalysisConfig cfg;
...@@ -206,9 +214,12 @@ TEST(Analyzer_Transformer, fuse_statis) { ...@@ -206,9 +214,12 @@ TEST(Analyzer_Transformer, fuse_statis) {
} }
// Compare result of NativeConfig and AnalysisConfig // Compare result of NativeConfig and AnalysisConfig
TEST(Analyzer_Transformer, compare) { void compare(bool use_mkldnn = false) {
AnalysisConfig cfg; AnalysisConfig cfg;
SetConfig(&cfg); SetConfig(&cfg);
if (use_mkldnn) {
cfg.EnableMKLDNN();
}
std::vector<std::vector<PaddleTensor>> input_slots_all; std::vector<std::vector<PaddleTensor>> input_slots_all;
SetInput(&input_slots_all); SetInput(&input_slots_all);
...@@ -216,5 +227,10 @@ TEST(Analyzer_Transformer, compare) { ...@@ -216,5 +227,10 @@ TEST(Analyzer_Transformer, compare) {
reinterpret_cast<const PaddlePredictor::Config *>(&cfg), input_slots_all); reinterpret_cast<const PaddlePredictor::Config *>(&cfg), input_slots_all);
} }
TEST(Analyzer_Transformer, compare) { compare(); }
#ifdef PADDLE_WITH_MKLDNN
TEST(Analyzer_Transformer, compare_mkldnn) { compare(true /* use_mkldnn */); }
#endif
} // namespace inference } // namespace inference
} // namespace paddle } // namespace paddle
...@@ -178,10 +178,10 @@ Beam Search Decode Operator. This Operator constructs the full hypotheses for ...@@ -178,10 +178,10 @@ Beam Search Decode Operator. This Operator constructs the full hypotheses for
each source sentence by walking back along the LoDTensorArray Input(ids) each source sentence by walking back along the LoDTensorArray Input(ids)
whose lods can be used to restore the path in the beam search tree. whose lods can be used to restore the path in the beam search tree.
The Output(SentenceIds) and Output(SentenceScores) separately contain the The Output(SentenceIds) and Output(SentenceScores) separately contain the
generated id sequences and the corresponding scores. The shapes and lods of the generated id sequences and the corresponding scores. The shapes and lods of the
two LodTensor are same. The lod level is 2 and the two levels separately two LodTensor are same. The lod level is 2 and the two levels separately
indicate how many hypotheses each source sentence has and how many ids each indicate how many hypotheses each source sentence has and how many ids each
hypothesis has. hypothesis has.
)DOC"); )DOC");
} }
...@@ -203,15 +203,12 @@ class BeamSearchDecodeInferShape : public framework::InferShapeBase { ...@@ -203,15 +203,12 @@ class BeamSearchDecodeInferShape : public framework::InferShapeBase {
class BeamSearchDecodeInferVarType : public framework::VarTypeInference { class BeamSearchDecodeInferVarType : public framework::VarTypeInference {
public: public:
void operator()(const framework::OpDesc& op_desc, void operator()(framework::InferVarTypeContext* ctx) const override {
framework::BlockDesc* block) const override { for (auto& o : ctx->Output("SentenceIds")) {
for (auto& o : op_desc.Output("SentenceIds")) { ctx->SetType(o, framework::proto::VarType::LOD_TENSOR);
auto& sentence_ids = block->FindRecursiveOrCreateVar(o);
sentence_ids.SetType(framework::proto::VarType::LOD_TENSOR);
} }
for (auto& o : op_desc.Output("SentenceScores")) { for (auto& o : ctx->Output("SentenceScores")) {
auto& sentence_scores = block->FindRecursiveOrCreateVar(o); ctx->SetType(o, framework::proto::VarType::LOD_TENSOR);
sentence_scores.SetType(framework::proto::VarType::LOD_TENSOR);
} }
} }
}; };
......
...@@ -65,7 +65,7 @@ class BeamSearchOpMaker : public framework::OpProtoAndCheckerMaker { ...@@ -65,7 +65,7 @@ class BeamSearchOpMaker : public framework::OpProtoAndCheckerMaker {
.SetDefault(true); .SetDefault(true);
AddComment(R"DOC( AddComment(R"DOC(
This operator does the search in beams for one time step. This operator does the search in beams for one time step.
Specifically, it selects the top-K candidate word ids of current step from Specifically, it selects the top-K candidate word ids of current step from
Input(ids) according to their Input(scores) for all source sentences, Input(ids) according to their Input(scores) for all source sentences,
where K is Attr(beam_size) and Input(ids), Input(scores) are predicted results where K is Attr(beam_size) and Input(ids), Input(scores) are predicted results
...@@ -120,15 +120,12 @@ class BeamSearchOp : public framework::OperatorWithKernel { ...@@ -120,15 +120,12 @@ class BeamSearchOp : public framework::OperatorWithKernel {
class BeamSearchInferVarType : public framework::VarTypeInference { class BeamSearchInferVarType : public framework::VarTypeInference {
public: public:
void operator()(const framework::OpDesc &op_desc, void operator()(framework::InferVarTypeContext *ctx) const override {
framework::BlockDesc *block) const override { for (auto &o : ctx->Output("selected_ids")) {
for (auto &o : op_desc.Output("selected_ids")) { ctx->SetType(o, framework::proto::VarType::LOD_TENSOR);
auto &selected_ids = block->FindRecursiveOrCreateVar(o);
selected_ids.SetType(framework::proto::VarType::LOD_TENSOR);
} }
for (auto &o : op_desc.Output("selected_scores")) { for (auto &o : ctx->Output("selected_scores")) {
auto &selected_scores = block->FindRecursiveOrCreateVar(o); ctx->SetType(o, framework::proto::VarType::LOD_TENSOR);
selected_scores.SetType(framework::proto::VarType::LOD_TENSOR);
} }
} }
}; };
......
...@@ -50,9 +50,19 @@ class ConcatOp : public framework::OperatorWithKernel { ...@@ -50,9 +50,19 @@ class ConcatOp : public framework::OperatorWithKernel {
if (j == axis) { if (j == axis) {
out_dims[axis] += ins[i][j]; out_dims[axis] += ins[i][j];
} else { } else {
PADDLE_ENFORCE_EQ(out_dims[j], ins[i][j], if (ctx->IsRuntime()) {
"Input tensors should have the same " // check all shape in run time
"elements except the specify axis."); PADDLE_ENFORCE_EQ(out_dims[j], ins[i][j],
"Input tensors should have the same "
"elements except the specify axis.");
} else {
// not check -1 with other in compile time
if (out_dims[j] != -1 && ins[i][j] != -1) {
PADDLE_ENFORCE_EQ(out_dims[j], ins[i][j],
"Input tensors should have the same "
"elements except the specify axis.");
}
}
} }
} }
} }
......
...@@ -93,11 +93,9 @@ execution. ...@@ -93,11 +93,9 @@ execution.
class GetPlacesInferVarType : public framework::VarTypeInference { class GetPlacesInferVarType : public framework::VarTypeInference {
public: public:
void operator()(const framework::OpDesc &op_desc, void operator()(framework::InferVarTypeContext *ctx) const override {
framework::BlockDesc *block) const override { for (auto &o_name : ctx->Output("Out")) {
for (auto &o_name : op_desc.Output("Out")) { ctx->SetType(o_name, framework::proto::VarType::PLACE_LIST);
block->FindRecursiveOrCreateVar(o_name).SetType(
framework::proto::VarType::PLACE_LIST);
} }
} }
}; };
......
...@@ -100,16 +100,13 @@ class WriteToArrayInferShape : public framework::InferShapeBase { ...@@ -100,16 +100,13 @@ class WriteToArrayInferShape : public framework::InferShapeBase {
class WriteToArrayInferVarType : public framework::VarTypeInference { class WriteToArrayInferVarType : public framework::VarTypeInference {
public: public:
void operator()(const framework::OpDesc &op_desc, void operator()(framework::InferVarTypeContext *ctx) const override {
framework::BlockDesc *block) const override { auto x_name = ctx->Input("X")[0];
auto x_name = op_desc.Input("X")[0]; auto out_name = ctx->Output("Out")[0];
auto out_name = op_desc.Output("Out")[0];
VLOG(10) << "Set Variable " << out_name << " as LOD_TENSOR_ARRAY"; VLOG(10) << "Set Variable " << out_name << " as LOD_TENSOR_ARRAY";
auto &out = block->FindRecursiveOrCreateVar(out_name); ctx->SetType(out_name, framework::proto::VarType::LOD_TENSOR_ARRAY);
out.SetType(framework::proto::VarType::LOD_TENSOR_ARRAY); if (ctx->HasVar(x_name)) {
auto *x = block->FindVarRecursive(x_name); ctx->SetDataType(out_name, ctx->GetDataType(x_name));
if (x != nullptr) {
out.SetDataType(x->GetDataType());
} }
} }
}; };
......
...@@ -365,19 +365,16 @@ class WhileGradOpDescMaker : public framework::SingleGradOpDescMaker { ...@@ -365,19 +365,16 @@ class WhileGradOpDescMaker : public framework::SingleGradOpDescMaker {
class WhileGradOpVarTypeInference : public framework::VarTypeInference { class WhileGradOpVarTypeInference : public framework::VarTypeInference {
public: public:
void operator()(const framework::OpDesc &op_desc, void operator()(framework::InferVarTypeContext *ctx) const override {
framework::BlockDesc *block) const override { auto p_names = ctx->Input(kX);
auto p_names = op_desc.Input(kX); auto pg_ig_names = ctx->Output(framework::GradVarName(kX));
auto pg_ig_names = op_desc.Output(framework::GradVarName(kX));
for (size_t i = 0; i < p_names.size(); ++i) { for (size_t i = 0; i < p_names.size(); ++i) {
auto &p_var = detail::Ref(block->FindVarRecursive(p_names[i])); if (ctx->HasVar(pg_ig_names[i])) {
auto *g_var = block->FindVarRecursive(pg_ig_names[i]);
if (g_var != nullptr) { // Gradient could be @EMPTY@
VLOG(5) << "Setting " << pg_ig_names[i] << " following " << p_names[i] VLOG(5) << "Setting " << pg_ig_names[i] << " following " << p_names[i]
<< " type: " << p_var.GetType(); << " type: " << ctx->GetType(p_names[i]);
g_var->SetType(p_var.GetType()); ctx->SetType(pg_ig_names[i], ctx->GetType(p_names[i]));
g_var->SetDataType(p_var.GetDataType()); ctx->SetDataType(pg_ig_names[i], ctx->GetDataType(p_names[i]));
} }
} }
} }
......
...@@ -56,8 +56,7 @@ class FakeInitOp : public framework::OperatorBase { ...@@ -56,8 +56,7 @@ class FakeInitOp : public framework::OperatorBase {
class FakeInitOpVarTypeInference : public framework::VarTypeInference { class FakeInitOpVarTypeInference : public framework::VarTypeInference {
public: public:
void operator()(const framework::OpDesc &op_desc, void operator()(framework::InferVarTypeContext *ctx) const override {}
framework::BlockDesc *block) const override {}
}; };
class FakeInitOpMaker : public framework::OpProtoAndCheckerMaker { class FakeInitOpMaker : public framework::OpProtoAndCheckerMaker {
......
...@@ -114,11 +114,10 @@ class MergeIdsOp : public framework::OperatorWithKernel { ...@@ -114,11 +114,10 @@ class MergeIdsOp : public framework::OperatorWithKernel {
class MergeIdsOpInferVarType : public framework::VarTypeInference { class MergeIdsOpInferVarType : public framework::VarTypeInference {
public: public:
void operator()(const framework::OpDesc &op_desc, void operator()(framework::InferVarTypeContext *ctx) const override {
framework::BlockDesc *block) const override { auto input_type = ctx->GetType(ctx->Input("Ids")[0]);
auto *input_var = block->Var(op_desc.Input("Ids")[0]); for (auto &out_var : ctx->Output("Out")) {
for (auto &out_var : op_desc.Output("Out")) { ctx->SetType(out_var, input_type);
block->Var(out_var)->SetType(input_var->GetType());
} }
} }
}; };
......
...@@ -14,6 +14,8 @@ limitations under the License. */ ...@@ -14,6 +14,8 @@ limitations under the License. */
#include "paddle/fluid/operators/distributed_ops/split_ids_op.h" #include "paddle/fluid/operators/distributed_ops/split_ids_op.h"
#include <memory>
namespace paddle { namespace paddle {
namespace operators { namespace operators {
...@@ -71,11 +73,10 @@ class SplitIdsOp : public framework::OperatorWithKernel { ...@@ -71,11 +73,10 @@ class SplitIdsOp : public framework::OperatorWithKernel {
class SplitIdsOpInferVarType : public framework::VarTypeInference { class SplitIdsOpInferVarType : public framework::VarTypeInference {
public: public:
void operator()(const framework::OpDesc &op_desc, void operator()(framework::InferVarTypeContext *ctx) const override {
framework::BlockDesc *block) const override { auto input_type = ctx->GetType(ctx->Input("Ids")[0]);
auto *input_var = block->Var(op_desc.Input("Ids")[0]); for (auto &out_var : ctx->Output("Out")) {
for (auto &out_var : op_desc.Output("Out")) { ctx->SetType(out_var, input_type);
block->Var(out_var)->SetType(input_var->GetType());
} }
} }
}; };
......
...@@ -55,17 +55,8 @@ void FCOp::InferShape(framework::InferShapeContext* ctx) const { ...@@ -55,17 +55,8 @@ void FCOp::InferShape(framework::InferShapeContext* ctx) const {
"The input tensor Input's rank of FCOp should be larger than " "The input tensor Input's rank of FCOp should be larger than "
"in_num_col_dims."); "in_num_col_dims.");
auto in_mat_dims = framework::flatten_to_2d(in_dims, in_num_col_dims);
PADDLE_ENFORCE_EQ(
in_mat_dims[1], w_dims[0],
"Fully Connected input and weigth size do not match. %s, %s");
std::vector<int64_t> output_dims; std::vector<int64_t> output_dims;
output_dims.reserve(static_cast<size_t>(in_num_col_dims + 1)); FCOutputSize(in_dims, w_dims, output_dims, in_num_col_dims);
for (int i = 0; i < in_num_col_dims; ++i) {
output_dims.push_back(in_dims[i]);
}
output_dims.push_back(w_dims[1]);
ctx->SetOutputDim("Out", framework::make_ddim(output_dims)); ctx->SetOutputDim("Out", framework::make_ddim(output_dims));
ctx->ShareLoD("Input", "Out"); ctx->ShareLoD("Input", "Out");
...@@ -128,6 +119,9 @@ void FCOpMaker::Make() { ...@@ -128,6 +119,9 @@ void FCOpMaker::Make() {
AddAttr<bool>("use_mkldnn", AddAttr<bool>("use_mkldnn",
"(bool, default false) Only used in mkldnn kernel") "(bool, default false) Only used in mkldnn kernel")
.SetDefault(false); .SetDefault(false);
AddAttr<bool>(framework::kAllKernelsMustComputeRuntimeShape,
"Skip calling InferShape() function in the runtime.")
.SetDefault(true);
AddComment(R"DOC( AddComment(R"DOC(
Fully Connected Operator. Fully Connected Operator.
...@@ -142,13 +136,20 @@ class FCOpKernel : public framework::OpKernel<T> { ...@@ -142,13 +136,20 @@ class FCOpKernel : public framework::OpKernel<T> {
void Compute(const paddle::framework::ExecutionContext& ctx) const override { void Compute(const paddle::framework::ExecutionContext& ctx) const override {
PADDLE_ENFORCE(platform::is_cpu_place(ctx.GetPlace()), PADDLE_ENFORCE(platform::is_cpu_place(ctx.GetPlace()),
"It must use CPUPlace."); "It must use CPUPlace.");
auto input = ctx.Input<Tensor>("Input"); auto input = ctx.Input<framework::LoDTensor>("Input");
auto w = ctx.Input<Tensor>("W"); auto w = ctx.Input<Tensor>("W");
auto bias = ctx.Input<Tensor>("Bias"); auto bias = ctx.Input<Tensor>("Bias");
auto output = ctx.Output<Tensor>("Out"); auto output = ctx.Output<framework::LoDTensor>("Out");
int in_num_col_dims = ctx.Attr<int>("in_num_col_dims");
auto w_dims = w->dims(); auto w_dims = w->dims();
std::vector<int64_t> output_dims;
FCOutputSize(input->dims(), w_dims, output_dims, in_num_col_dims);
output->Resize(framework::make_ddim(output_dims));
output->set_lod(input->lod());
auto out_dims = output->dims(); auto out_dims = output->dims();
int M = framework::product(out_dims) / out_dims[out_dims.size() - 1]; int M = framework::product(out_dims) / w_dims[1];
const T* input_data = input->data<T>(); const T* input_data = input->data<T>();
const T* w_data = w->data<T>(); const T* w_data = w->data<T>();
......
...@@ -48,5 +48,21 @@ class FCOpMaker : public framework::OpProtoAndCheckerMaker { ...@@ -48,5 +48,21 @@ class FCOpMaker : public framework::OpProtoAndCheckerMaker {
void Make() override; void Make() override;
}; };
inline void FCOutputSize(const framework::DDim& in_dims,
const framework::DDim& w_dims,
std::vector<int64_t>& out_dims, // NOLINT
int in_num_col_dims) {
auto in_mat_dims = framework::flatten_to_2d(in_dims, in_num_col_dims);
PADDLE_ENFORCE_EQ(
in_mat_dims[1], w_dims[0],
"Fully Connected input and weigth size do not match. %s, %s");
out_dims.reserve(static_cast<size_t>(in_num_col_dims + 1));
for (int i = 0; i < in_num_col_dims; ++i) {
out_dims.push_back(in_dims[i]);
}
out_dims.push_back(w_dims[1]);
}
} // namespace operators } // namespace operators
} // namespace paddle } // namespace paddle
...@@ -39,12 +39,11 @@ class FillConstantOp : public framework::OperatorWithKernel { ...@@ -39,12 +39,11 @@ class FillConstantOp : public framework::OperatorWithKernel {
class FillConstantOpVarTypeInference : public framework::VarTypeInference { class FillConstantOpVarTypeInference : public framework::VarTypeInference {
public: public:
void operator()(const framework::OpDesc& op_desc, void operator()(framework::InferVarTypeContext* ctx) const override {
framework::BlockDesc* block) const override {
auto data_type = static_cast<framework::proto::VarType::Type>( auto data_type = static_cast<framework::proto::VarType::Type>(
boost::get<int>(op_desc.GetAttr("dtype"))); boost::get<int>(ctx->GetAttr("dtype")));
auto& out_var_name = op_desc.Output("Out").front(); auto& out_var_name = ctx->Output("Out").front();
block->Var(out_var_name)->SetDataType(data_type); ctx->SetDataType(out_var_name, data_type);
} }
}; };
......
...@@ -88,7 +88,8 @@ class FusedEmbeddingSeqPoolOpMaker : public framework::OpProtoAndCheckerMaker { ...@@ -88,7 +88,8 @@ class FusedEmbeddingSeqPoolOpMaker : public framework::OpProtoAndCheckerMaker {
"(boolean, default false) " "(boolean, default false) "
"Sparse update.") "Sparse update.")
.SetDefault(false); .SetDefault(false);
AddAttr<bool>(framework::kAllKernelsMustComputeRuntimeShape, "") AddAttr<bool>(framework::kAllKernelsMustComputeRuntimeShape,
"Skip calling InferShape() function in the runtime.")
.SetDefault(true); .SetDefault(true);
AddComment(R"DOC( AddComment(R"DOC(
FusedEmbeddingSeqPool Operator. FusedEmbeddingSeqPool Operator.
...@@ -137,22 +138,20 @@ class FusedEmbeddingSeqPoolOpGrad : public framework::OperatorWithKernel { ...@@ -137,22 +138,20 @@ class FusedEmbeddingSeqPoolOpGrad : public framework::OperatorWithKernel {
class FusedEmbeddingSeqPoolOpGradVarTypeInference class FusedEmbeddingSeqPoolOpGradVarTypeInference
: public framework::VarTypeInference { : public framework::VarTypeInference {
public: public:
void operator()(const framework::OpDesc& op_desc, void operator()(framework::InferVarTypeContext* ctx) const override {
framework::BlockDesc* block) const override { auto out_var_name = ctx->Output(framework::GradVarName("W")).front();
auto out_var_name = op_desc.Output(framework::GradVarName("W")).front(); auto attr = ctx->GetAttr("is_sparse");
auto attr = op_desc.GetAttr("is_sparse");
bool is_sparse = boost::get<bool>(attr); bool is_sparse = boost::get<bool>(attr);
if (is_sparse) { if (is_sparse) {
VLOG(3) << "fused_embedding_seq_pool_grad op " VLOG(3) << "fused_embedding_seq_pool_grad op "
<< framework::GradVarName("W") << " is set to SelectedRows"; << framework::GradVarName("W") << " is set to SelectedRows";
block->Var(out_var_name) ctx->SetType(out_var_name, framework::proto::VarType::SELECTED_ROWS);
->SetType(framework::proto::VarType::SELECTED_ROWS);
} else { } else {
VLOG(3) << "fused_embedding_seq_pool_grad op " VLOG(3) << "fused_embedding_seq_pool_grad op "
<< framework::GradVarName("W") << " is set to LoDTensor"; << framework::GradVarName("W") << " is set to LoDTensor";
block->Var(out_var_name)->SetType(framework::proto::VarType::LOD_TENSOR); ctx->SetType(out_var_name, framework::proto::VarType::LOD_TENSOR);
} }
block->Var(out_var_name)->SetDataType(block->Var("W")->GetDataType()); ctx->SetDataType(out_var_name, ctx->GetDataType(ctx->Input("W")[0]));
} }
}; };
......
...@@ -81,15 +81,12 @@ GetTensorFromSelectedRows is used to get the tensor from SelectedRows. ...@@ -81,15 +81,12 @@ GetTensorFromSelectedRows is used to get the tensor from SelectedRows.
class GetTensorFromSelectedRowsOpVarTypeInference class GetTensorFromSelectedRowsOpVarTypeInference
: public framework::VarTypeInference { : public framework::VarTypeInference {
public: public:
void operator()(const framework::OpDesc &op_desc, void operator()(framework::InferVarTypeContext *ctx) const { // NOLINT
framework::BlockDesc *block) const final { auto out_var_name = ctx->Output("Out").front();
auto out_var_name = op_desc.Output("Out").front(); auto in_var_name = ctx->Input("X").front();
auto in_var_name = op_desc.Input("X").front();
ctx->SetType(out_var_name, framework::proto::VarType::LOD_TENSOR);
auto out_var = block->FindRecursiveOrCreateVar(out_var_name); ctx->SetDataType(out_var_name, ctx->GetDataType(in_var_name));
auto in_var = block->FindRecursiveOrCreateVar(in_var_name);
out_var.SetType(framework::proto::VarType::LOD_TENSOR);
out_var.SetDataType(in_var.GetDataType());
} }
}; };
......
...@@ -54,7 +54,8 @@ $$Out = scale * X$$ ...@@ -54,7 +54,8 @@ $$Out = scale * X$$
)DOC"); )DOC");
AddAttr<int>("num_hash", "").SetDefault(1); AddAttr<int>("num_hash", "").SetDefault(1);
AddAttr<int>("mod_by", "").SetDefault(100000); AddAttr<int>("mod_by", "").SetDefault(100000);
AddAttr<bool>(framework::kAllKernelsMustComputeRuntimeShape, "") AddAttr<bool>(framework::kAllKernelsMustComputeRuntimeShape,
"Skip calling InferShape() function in the runtime.")
.SetDefault(true); .SetDefault(true);
} }
}; };
......
...@@ -197,38 +197,32 @@ class HierarchicalSigmoidGradOp : public framework::OperatorWithKernel { ...@@ -197,38 +197,32 @@ class HierarchicalSigmoidGradOp : public framework::OperatorWithKernel {
class HierarchicalSigmoidGradOpGradVarTypeInference class HierarchicalSigmoidGradOpGradVarTypeInference
: public framework::VarTypeInference { : public framework::VarTypeInference {
public: public:
void operator()(const framework::OpDesc& op_desc, void operator()(framework::InferVarTypeContext* ctx) const override {
framework::BlockDesc* block) const override { auto w_grad_var_name = ctx->Output(framework::GradVarName("W")).front();
auto w_grad_var_name = op_desc.Output(framework::GradVarName("W")).front(); auto bias_grad_var_name_vec = ctx->Output(framework::GradVarName("Bias"));
auto bias_grad_var_name_vec =
op_desc.Output(framework::GradVarName("Bias"));
std::string bias_grad_var_name; std::string bias_grad_var_name;
bool hasBias = false; bool hasBias = false;
if (bias_grad_var_name_vec.size()) { if (bias_grad_var_name_vec.size()) {
hasBias = true; hasBias = true;
bias_grad_var_name = bias_grad_var_name = ctx->Output(framework::GradVarName("Bias")).front();
op_desc.Output(framework::GradVarName("Bias")).front();
} }
auto attr = op_desc.GetAttr("is_sparse"); auto attr = ctx->GetAttr("is_sparse");
bool is_sparse = boost::get<bool>(attr); bool is_sparse = boost::get<bool>(attr);
if (is_sparse) { if (is_sparse) {
VLOG(30) << "hierarchical_sigmoid_grad op " << framework::GradVarName("W") VLOG(30) << "hierarchical_sigmoid_grad op " << framework::GradVarName("W")
<< " is set to SelectedRows"; << " is set to SelectedRows";
block->Var(w_grad_var_name) ctx->SetType(w_grad_var_name, framework::proto::VarType::SELECTED_ROWS);
->SetType(framework::proto::VarType::SELECTED_ROWS);
} else { } else {
VLOG(30) << "hierarchical_sigmoid_grad op " << framework::GradVarName("W") VLOG(30) << "hierarchical_sigmoid_grad op " << framework::GradVarName("W")
<< " is set to LoDTensor"; << " is set to LoDTensor";
block->Var(w_grad_var_name) ctx->SetType(w_grad_var_name, framework::proto::VarType::LOD_TENSOR);
->SetType(framework::proto::VarType::LOD_TENSOR);
} }
if (hasBias) { if (hasBias) {
VLOG(30) << "hierarchical_sigmoid_grad op " VLOG(30) << "hierarchical_sigmoid_grad op "
<< framework::GradVarName("Bias") << " is set to LoDTensor"; << framework::GradVarName("Bias") << " is set to LoDTensor";
block->Var(bias_grad_var_name) ctx->SetType(bias_grad_var_name, framework::proto::VarType::LOD_TENSOR);
->SetType(framework::proto::VarType::LOD_TENSOR);
} }
block->Var(w_grad_var_name)->SetDataType(block->Var("W")->GetDataType()); ctx->SetDataType(w_grad_var_name, ctx->GetDataType(ctx->Input("W")[0]));
} }
}; };
......
...@@ -64,11 +64,9 @@ class LoDRankTableInferShape : public framework::InferShapeBase { ...@@ -64,11 +64,9 @@ class LoDRankTableInferShape : public framework::InferShapeBase {
class LoDRankTableInferVarType : public framework::VarTypeInference { class LoDRankTableInferVarType : public framework::VarTypeInference {
public: public:
void operator()(const framework::OpDesc &op_desc, void operator()(framework::InferVarTypeContext *ctx) const override {
framework::BlockDesc *block) const override { for (auto &o : ctx->Output("Out")) {
for (auto &o : op_desc.Output("Out")) { ctx->SetType(o, framework::proto::VarType::LOD_RANK_TABLE);
block->FindRecursiveOrCreateVar(o).SetType(
framework::proto::VarType::LOD_RANK_TABLE);
} }
} }
}; };
......
...@@ -201,10 +201,9 @@ class LoDTensorToArrayInferShape : public framework::InferShapeBase { ...@@ -201,10 +201,9 @@ class LoDTensorToArrayInferShape : public framework::InferShapeBase {
class LoDTensorToArrayInferVarType : public framework::VarTypeInference { class LoDTensorToArrayInferVarType : public framework::VarTypeInference {
public: public:
void operator()(const framework::OpDesc &op_desc, void operator()(framework::InferVarTypeContext *ctx) const override {
framework::BlockDesc *block) const override { for (auto &out_var : ctx->Output("Out")) {
for (auto &out_var : op_desc.Output("Out")) { ctx->SetType(out_var, framework::proto::VarType::LOD_TENSOR_ARRAY);
block->Var(out_var)->SetType(framework::proto::VarType::LOD_TENSOR_ARRAY);
} }
} }
}; };
......
...@@ -147,22 +147,20 @@ class LookupTableOpGrad : public framework::OperatorWithKernel { ...@@ -147,22 +147,20 @@ class LookupTableOpGrad : public framework::OperatorWithKernel {
class LookupTableOpGradVarTypeInference : public framework::VarTypeInference { class LookupTableOpGradVarTypeInference : public framework::VarTypeInference {
public: public:
void operator()(const framework::OpDesc& op_desc, void operator()(framework::InferVarTypeContext* ctx) const override {
framework::BlockDesc* block) const override { auto out_var_name = ctx->Output(framework::GradVarName("W")).front();
auto out_var_name = op_desc.Output(framework::GradVarName("W")).front(); auto attr = ctx->GetAttr("is_sparse");
auto attr = op_desc.GetAttr("is_sparse");
bool is_sparse = boost::get<bool>(attr); bool is_sparse = boost::get<bool>(attr);
if (is_sparse) { if (is_sparse) {
VLOG(3) << "lookup_table_grad op " << framework::GradVarName("W") VLOG(3) << "lookup_table_grad op " << framework::GradVarName("W")
<< " is set to SelectedRows"; << " is set to SelectedRows";
block->Var(out_var_name) ctx->SetType(out_var_name, framework::proto::VarType::SELECTED_ROWS);
->SetType(framework::proto::VarType::SELECTED_ROWS);
} else { } else {
VLOG(3) << "lookup_table_grad op " << framework::GradVarName("W") VLOG(3) << "lookup_table_grad op " << framework::GradVarName("W")
<< " is set to LoDTensor"; << " is set to LoDTensor";
block->Var(out_var_name)->SetType(framework::proto::VarType::LOD_TENSOR); ctx->SetType(out_var_name, framework::proto::VarType::LOD_TENSOR);
} }
block->Var(out_var_name)->SetDataType(block->Var("W")->GetDataType()); ctx->SetDataType(out_var_name, ctx->GetDataType(ctx->Input("W")[0]));
} }
}; };
......
...@@ -123,7 +123,7 @@ class FCMKLDNNOpKernel : public paddle::framework::OpKernel<T> { ...@@ -123,7 +123,7 @@ class FCMKLDNNOpKernel : public paddle::framework::OpKernel<T> {
auto& dev_ctx = ctx.template device_context<MKLDNNDeviceContext>(); auto& dev_ctx = ctx.template device_context<MKLDNNDeviceContext>();
const auto& mkldnn_engine = dev_ctx.GetEngine(); const auto& mkldnn_engine = dev_ctx.GetEngine();
auto input = ctx.Input<Tensor>("Input"); auto input = ctx.Input<framework::LoDTensor>("Input");
auto w = ctx.Input<Tensor>("W"); auto w = ctx.Input<Tensor>("W");
auto bias = ctx.Input<Tensor>("Bias"); auto bias = ctx.Input<Tensor>("Bias");
...@@ -151,7 +151,13 @@ class FCMKLDNNOpKernel : public paddle::framework::OpKernel<T> { ...@@ -151,7 +151,13 @@ class FCMKLDNNOpKernel : public paddle::framework::OpKernel<T> {
const T* input_data = input->data<T>(); const T* input_data = input->data<T>();
const T* w_data = w->data<T>(); const T* w_data = w->data<T>();
auto output = ctx.Output<Tensor>("Out"); auto output = ctx.Output<framework::LoDTensor>("Out");
int in_num_col_dims = ctx.Attr<int>("in_num_col_dims");
std::vector<int64_t> output_dims;
FCOutputSize(input->dims(), w->dims(), output_dims, in_num_col_dims);
output->Resize(framework::make_ddim(output_dims));
output->set_lod(input->lod());
T* output_data = output->mutable_data<T>(ctx.GetPlace()); T* output_data = output->mutable_data<T>(ctx.GetPlace());
auto dst_memory = mem.dst(output_data); auto dst_memory = mem.dst(output_data);
...@@ -204,19 +210,21 @@ class FCMKLDNNGradOpKernel : public paddle::framework::OpKernel<T> { ...@@ -204,19 +210,21 @@ class FCMKLDNNGradOpKernel : public paddle::framework::OpKernel<T> {
Tensor* input_grad = ctx.Output<Tensor>(framework::GradVarName("Input")); Tensor* input_grad = ctx.Output<Tensor>(framework::GradVarName("Input"));
Tensor* w_grad = ctx.Output<Tensor>(framework::GradVarName("W")); Tensor* w_grad = ctx.Output<Tensor>(framework::GradVarName("W"));
const Tensor* input = ctx.Input<Tensor>("Input");
const T* input_data = input->data<T>();
const Tensor* w = ctx.Input<Tensor>("W");
const T* w_data = w->data<T>();
if (input_grad) { if (input_grad) {
input_grad->Resize(input->dims());
input_grad_data = input_grad->mutable_data<T>(ctx.GetPlace()); input_grad_data = input_grad->mutable_data<T>(ctx.GetPlace());
} }
if (w_grad) { if (w_grad) {
w_grad->Resize(w->dims());
w_grad_data = w_grad->mutable_data<T>(ctx.GetPlace()); w_grad_data = w_grad->mutable_data<T>(ctx.GetPlace());
} }
const Tensor* input = ctx.Input<Tensor>("Input");
const T* input_data = input->data<T>();
const Tensor* w = ctx.Input<Tensor>("W");
const T* w_data = w->data<T>();
const Tensor* out_grad = ctx.Input<Tensor>(framework::GradVarName("Out")); const Tensor* out_grad = ctx.Input<Tensor>(framework::GradVarName("Out"));
const T* out_grad_data = out_grad->data<T>(); const T* out_grad_data = out_grad->data<T>();
......
...@@ -73,6 +73,29 @@ class TransposeMKLDNNOpKernel : public paddle::framework::OpKernel<T> { ...@@ -73,6 +73,29 @@ class TransposeMKLDNNOpKernel : public paddle::framework::OpKernel<T> {
} }
}; };
template <typename T>
class TransposeINT8MKLDNNOpKernel : public paddle::framework::OpKernel<T> {
public:
void Compute(const paddle::framework::ExecutionContext& ctx) const override {
std::vector<int> axis = ctx.Attr<std::vector<int>>("axis");
std::vector<int> axis_int8 = {0, 2, 3, 1};
if (axis.size() != 1) {
PADDLE_ENFORCE_EQ(axis.size(), axis_int8.size());
for (size_t i = 0; i < axis.size(); i++) {
PADDLE_ENFORCE_EQ(axis[i], axis_int8[i],
"Current INT8 MKLDNN Transpose kernel only surpport "
"axis with [0, 2, 3, 1] due to MKL-DNN kernel "
"implementation.");
}
}
auto* input = ctx.Input<Tensor>("X");
auto* output = ctx.Output<Tensor>("Out");
output->ShareDataWith(*input);
output->set_layout(DataLayout::kMKLDNN);
output->set_format(input->format());
}
};
template <typename T> template <typename T>
class TransposeMKLDNNGradOpKernel : public paddle::framework::OpKernel<T> { class TransposeMKLDNNGradOpKernel : public paddle::framework::OpKernel<T> {
public: public:
...@@ -140,7 +163,10 @@ class TransposeMKLDNNGradOpKernel : public paddle::framework::OpKernel<T> { ...@@ -140,7 +163,10 @@ class TransposeMKLDNNGradOpKernel : public paddle::framework::OpKernel<T> {
namespace ops = paddle::operators; namespace ops = paddle::operators;
REGISTER_OP_KERNEL(transpose2, MKLDNN, ::paddle::platform::CPUPlace, REGISTER_OP_KERNEL(transpose2, MKLDNN, ::paddle::platform::CPUPlace,
ops::TransposeMKLDNNOpKernel<float>); ops::TransposeMKLDNNOpKernel<float>,
ops::TransposeINT8MKLDNNOpKernel<uint8_t>,
ops::TransposeINT8MKLDNNOpKernel<int8_t>);
REGISTER_OP_KERNEL(transpose, MKLDNN, ::paddle::platform::CPUPlace, REGISTER_OP_KERNEL(transpose, MKLDNN, ::paddle::platform::CPUPlace,
ops::TransposeMKLDNNOpKernel<float>); ops::TransposeMKLDNNOpKernel<float>);
......
...@@ -60,12 +60,9 @@ class NCCLInitOp : public framework::OperatorBase { ...@@ -60,12 +60,9 @@ class NCCLInitOp : public framework::OperatorBase {
class NCCLInitOpVarTypeInference : public framework::VarTypeInference { class NCCLInitOpVarTypeInference : public framework::VarTypeInference {
public: public:
void operator()(const framework::OpDesc &op_desc, void operator()(framework::InferVarTypeContext *ctx) const override {
framework::BlockDesc *block) const override { auto out_var_name = ctx->Output("Communicator").front();
auto out_var_name = op_desc.Output("Communicator").front(); ctx->SetType(out_var_name, framework::proto::VarType::RAW);
auto &out_var = block->FindRecursiveOrCreateVar(out_var_name);
auto var_type = framework::proto::VarType::RAW;
out_var.SetType(var_type);
} }
}; };
......
...@@ -237,23 +237,21 @@ class NCEOpGrad : public framework::OperatorWithKernel { ...@@ -237,23 +237,21 @@ class NCEOpGrad : public framework::OperatorWithKernel {
class NCEOpGradVarTypeInference : public framework::VarTypeInference { class NCEOpGradVarTypeInference : public framework::VarTypeInference {
public: public:
void operator()(const framework::OpDesc &op_desc, void operator()(framework::InferVarTypeContext *ctx) const override {
framework::BlockDesc *block) const override { auto weight_grad = ctx->Output(framework::GradVarName("Weight")).front();
auto weight_grad = op_desc.Output(framework::GradVarName("Weight")).front();
auto attr = op_desc.GetAttr("is_sparse"); auto attr = ctx->GetAttr("is_sparse");
bool is_sparse = boost::get<bool>(attr); bool is_sparse = boost::get<bool>(attr);
if (is_sparse) { if (is_sparse) {
VLOG(3) << "nce_op_grad op " << weight_grad << " and " VLOG(3) << "nce_op_grad op " << weight_grad << " and "
<< " is set to SelectedRows"; << " is set to SelectedRows";
block->Var(weight_grad) ctx->SetType(weight_grad, framework::proto::VarType::SELECTED_ROWS);
->SetType(framework::proto::VarType::SELECTED_ROWS);
} else { } else {
VLOG(3) << "nce_op_grad op " << weight_grad << " and " VLOG(3) << "nce_op_grad op " << weight_grad << " and "
<< " is set to LoDTensor"; << " is set to LoDTensor";
block->Var(weight_grad)->SetType(framework::proto::VarType::LOD_TENSOR); ctx->SetType(weight_grad, framework::proto::VarType::LOD_TENSOR);
} }
block->Var(weight_grad)->SetDataType(block->Var("Input")->GetDataType()); ctx->SetDataType(weight_grad, ctx->GetDataType(ctx->Input("Input")[0]));
} }
}; };
......
...@@ -37,8 +37,7 @@ class NgraphEngineOpMaker : public framework::OpProtoAndCheckerMaker { ...@@ -37,8 +37,7 @@ class NgraphEngineOpMaker : public framework::OpProtoAndCheckerMaker {
class NgraphEngineInferVarType : public framework::VarTypeInference { class NgraphEngineInferVarType : public framework::VarTypeInference {
public: public:
void operator()(const framework::OpDesc &op_desc, void operator()(framework::InferVarTypeContext *ctx) const override {}
framework::BlockDesc *block) const override {}
}; };
} // namespace operators } // namespace operators
......
...@@ -15,6 +15,7 @@ limitations under the License. */ ...@@ -15,6 +15,7 @@ limitations under the License. */
#pragma once #pragma once
#include <math.h> // for sqrt in CPU and CUDA #include <math.h> // for sqrt in CPU and CUDA
#include <Eigen/Dense> #include <Eigen/Dense>
#include <unordered_map>
#include <vector> #include <vector>
#include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/framework/threadpool.h" #include "paddle/fluid/framework/threadpool.h"
...@@ -311,17 +312,17 @@ struct SparseAdamFunctor<T, CPUAdam> { ...@@ -311,17 +312,17 @@ struct SparseAdamFunctor<T, CPUAdam> {
T beta1_pow = *beta1_pow_; T beta1_pow = *beta1_pow_;
T beta2_pow = *beta2_pow_; T beta2_pow = *beta2_pow_;
lr *= sqrt(1 - beta2_pow) / (1 - beta1_pow); lr *= sqrt(1 - beta2_pow) / (1 - beta1_pow);
size_t row_count = numel / row_numel_; int64_t row_count = static_cast<int64_t>(numel / row_numel_);
for (size_t i = 0U, j = 0U; i != row_count; ++i) { for (int64_t i = 0, j = 0; i != row_count; ++i) {
if (i == *(rows_ + j)) { if (i == *(rows_ + j)) {
for (size_t k = 0U; k != row_numel_; ++k) { for (int64_t k = 0; k != row_numel_; ++k) {
T g = grad_[j * row_numel_ + k]; T g = grad_[j * row_numel_ + k];
adam_update(i * row_numel_ + k, g); adam_update(i * row_numel_ + k, g);
} }
++j; ++j;
} else { } else {
for (size_t k = 0U; k != row_numel_; ++k) { for (int64_t k = 0; k != row_numel_; ++k) {
T mom1 = moment1_[i * row_numel_ + k]; T mom1 = moment1_[i * row_numel_ + k];
T mom2 = moment2_[i * row_numel_ + k]; T mom2 = moment2_[i * row_numel_ + k];
T p = param_[i * row_numel_ + k]; T p = param_[i * row_numel_ + k];
...@@ -427,43 +428,23 @@ class AdamOpKernel : public framework::OpKernel<T> { ...@@ -427,43 +428,23 @@ class AdamOpKernel : public framework::OpKernel<T> {
} }
} }
framework::SelectedRows cpu_grad_merge; framework::SelectedRows tmp_grad_merge;
const framework::SelectedRows* grad_merge_ptr; const framework::SelectedRows* grad_merge_ptr;
if (is_strict_sorted) { if (is_strict_sorted) {
grad_merge_ptr = &grad; grad_merge_ptr = &grad;
} else { } else {
// merge duplicated rows if any. // merge duplicated rows if any.
// The rows of grad_merge have been sorted inside MergeAdd functor // The rows of grad_merge have been sorted inside MergeAdd functor
framework::SelectedRows* grad_merge_var;
scatter::MergeAdd<DeviceContext, T> merge_func; scatter::MergeAdd<DeviceContext, T> merge_func;
if (platform::is_cpu_place(ctx.GetPlace())) {
grad_merge_var = &cpu_grad_merge;
} else {
// FIXME(qiao): GPU also need to fix this
grad_merge_var = const_cast<framework::Scope&>(ctx.scope())
.Var()
->GetMutable<framework::SelectedRows>();
}
merge_func(ctx.template device_context<DeviceContext>(), grad, merge_func(ctx.template device_context<DeviceContext>(), grad,
grad_merge_var, true); &tmp_grad_merge, true);
grad_merge_ptr = grad_merge_var; grad_merge_ptr = &tmp_grad_merge;
} }
auto& grad_merge = *grad_merge_ptr; auto& grad_merge = *grad_merge_ptr;
auto& grad_tensor = grad_merge.value(); auto& grad_tensor = grad_merge.value();
const T* grad_data = grad_tensor.template data<T>(); const T* grad_data = grad_tensor.template data<T>();
const int64_t* rows = nullptr; const int64_t* rows = grad_merge.rows().Data(ctx.GetPlace());
// When compiled without CUDA, the CUDAData() interface should not be
// provided.
#if defined(PADDLE_WITH_CUDA)
if (platform::is_gpu_place(ctx.GetPlace())) {
rows = grad_merge.rows().CUDAData(ctx.GetPlace());
} else {
#endif
rows = grad_merge.rows().data();
#if defined(PADDLE_WITH_CUDA)
}
#endif
auto row_numel = grad_tensor.numel() / grad_merge.rows().size(); auto row_numel = grad_tensor.numel() / grad_merge.rows().size();
if (platform::is_cpu_place(ctx.GetPlace())) { if (platform::is_cpu_place(ctx.GetPlace())) {
...@@ -488,7 +469,7 @@ class AdamOpKernel : public framework::OpKernel<T> { ...@@ -488,7 +469,7 @@ class AdamOpKernel : public framework::OpKernel<T> {
} }
} }
#ifndef _WIN32 #ifndef _WIN32
else if (FLAGS_inner_op_parallelism > 1 && else if (FLAGS_inner_op_parallelism > 1 && // NOLINT
min_row_size_to_use_multithread > 0 && min_row_size_to_use_multithread > 0 &&
param.dims()[0] > min_row_size_to_use_multithread) { param.dims()[0] > min_row_size_to_use_multithread) {
VLOG(3) << "use multi thread, inner_op_parallelism=" VLOG(3) << "use multi thread, inner_op_parallelism="
...@@ -516,11 +497,11 @@ class AdamOpKernel : public framework::OpKernel<T> { ...@@ -516,11 +497,11 @@ class AdamOpKernel : public framework::OpKernel<T> {
for (int i = 0; i < FLAGS_inner_op_parallelism; ++i) { for (int i = 0; i < FLAGS_inner_op_parallelism; ++i) {
int64_t start = i * line_in_each_thread; int64_t start = i * line_in_each_thread;
int64_t end = (i + 1) * line_in_each_thread; int64_t end = (i + 1) * line_in_each_thread;
if (start >= param_row_count) { if (start >= static_cast<int64_t>(param_row_count)) {
break; break;
} }
if (end > param_row_count) { if (end > static_cast<int64_t>(param_row_count)) {
end = param_row_count; end = static_cast<int64_t>(param_row_count);
} }
fs.push_back( fs.push_back(
framework::Async([&functor, &row_id_to_grad_row_offset, framework::Async([&functor, &row_id_to_grad_row_offset,
...@@ -545,8 +526,8 @@ class AdamOpKernel : public framework::OpKernel<T> { ...@@ -545,8 +526,8 @@ class AdamOpKernel : public framework::OpKernel<T> {
} }
for (size_t i = 0; i < fs.size(); ++i) fs[i].wait(); for (size_t i = 0; i < fs.size(); ++i) fs[i].wait();
} }
#endif // !_WIN32 #endif // !_WIN32
else { else { // NOLINT
functor(param.numel()); functor(param.numel());
} }
} else if (platform::is_gpu_place(ctx.GetPlace())) { } else if (platform::is_gpu_place(ctx.GetPlace())) {
......
...@@ -56,9 +56,9 @@ This optimizer use LARS (https://arxiv.org/abs/1708.03888) to optimize each ...@@ -56,9 +56,9 @@ This optimizer use LARS (https://arxiv.org/abs/1708.03888) to optimize each
weight using a local learning rate: weight using a local learning rate:
$$ $$
local\_lr = \eta * local\_lr = \eta *
\frac{\left \| param \right \|}{\left \| grad \right \| + \beta *\left \| param \right \|} \\ \frac{\left \| param \right \|}{\left \| grad \right \| + \beta *\left \| param \right \|} \\
velocity = mu * velocity + velocity = mu * velocity +
local\_lr * (grad + \beta * param) \\ local\_lr * (grad + \beta * param) \\
param = param - velocity. \\ param = param - velocity. \\
$$ $$
...@@ -72,8 +72,7 @@ use L2 regularizers in case of using LARS. ...@@ -72,8 +72,7 @@ use L2 regularizers in case of using LARS.
class LarsMomentumOpVarTypeInference : public framework::VarTypeInference { class LarsMomentumOpVarTypeInference : public framework::VarTypeInference {
public: public:
void operator()(const framework::OpDesc &op_desc, void operator()(framework::InferVarTypeContext* ctx) const override {}
framework::BlockDesc *block) const override {}
}; };
} // namespace operators } // namespace operators
} // namespace paddle } // namespace paddle
......
...@@ -21,18 +21,14 @@ using Tensor = framework::Tensor; ...@@ -21,18 +21,14 @@ using Tensor = framework::Tensor;
class MomentumOpInferVarType : public framework::VarTypeInference { class MomentumOpInferVarType : public framework::VarTypeInference {
public: public:
void operator()(const framework::OpDesc& op_desc, void operator()(framework::InferVarTypeContext* ctx) const override {
framework::BlockDesc* block) const override { auto& input_var = ctx->Input("Param")[0];
auto input_var = op_desc.Input("Param")[0]; for (auto& out_var : ctx->Output("ParamOut")) {
for (auto& out_var : op_desc.Output("ParamOut")) { if (ctx->GetType(input_var) == framework::proto::VarType::SELECTED_ROWS) {
if (block->FindRecursiveOrCreateVar(input_var).GetType() == ctx->SetType(out_var, framework::proto::VarType::SELECTED_ROWS);
framework::proto::VarType::SELECTED_ROWS) { } else if (ctx->GetType(input_var) ==
block->FindRecursiveOrCreateVar(out_var).SetType(
framework::proto::VarType::SELECTED_ROWS);
} else if (block->FindRecursiveOrCreateVar(input_var).GetType() ==
framework::proto::VarType::LOD_TENSOR) { framework::proto::VarType::LOD_TENSOR) {
block->FindRecursiveOrCreateVar(out_var).SetType( ctx->SetType(out_var, framework::proto::VarType::LOD_TENSOR);
framework::proto::VarType::LOD_TENSOR);
} else { } else {
PADDLE_THROW( PADDLE_THROW(
"Only support LodTensor and SelectedRows, Unexpected Input Type."); "Only support LodTensor and SelectedRows, Unexpected Input Type.");
......
...@@ -13,6 +13,7 @@ See the License for the specific language governing permissions and ...@@ -13,6 +13,7 @@ See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#pragma once #pragma once
#include <memory>
#include <string> #include <string>
#include "paddle/fluid/framework/eigen.h" #include "paddle/fluid/framework/eigen.h"
#include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/framework/op_registry.h"
...@@ -69,6 +70,7 @@ class MomentumOp : public framework::OperatorWithKernel { ...@@ -69,6 +70,7 @@ class MomentumOp : public framework::OperatorWithKernel {
ctx->SetOutputDim("ParamOut", param_dim); ctx->SetOutputDim("ParamOut", param_dim);
ctx->SetOutputDim("VelocityOut", param_dim); ctx->SetOutputDim("VelocityOut", param_dim);
} }
framework::OpKernelType GetExpectedKernelType( framework::OpKernelType GetExpectedKernelType(
const framework::ExecutionContext& ctx) const override { const framework::ExecutionContext& ctx) const override {
auto input_data_type = framework::GetDataTypeOfVar(ctx.InputVar("Param")); auto input_data_type = framework::GetDataTypeOfVar(ctx.InputVar("Param"));
...@@ -351,23 +353,14 @@ class MomentumOpKernel : public framework::OpKernel<T> { ...@@ -351,23 +353,14 @@ class MomentumOpKernel : public framework::OpKernel<T> {
VLOG(3) << "Grad SelectedRows contains no data!"; VLOG(3) << "Grad SelectedRows contains no data!";
return; return;
} }
auto* merged_grad = const_cast<framework::Scope&>(ctx.scope())
.Var() framework::SelectedRows tmp_merged_grad;
->GetMutable<framework::SelectedRows>(); framework::SelectedRows* merged_grad = &tmp_merged_grad;
math::scatter::MergeAdd<DeviceContext, T> merge_func; math::scatter::MergeAdd<DeviceContext, T> merge_func;
merge_func(ctx.template device_context<DeviceContext>(), *grad, merge_func(ctx.template device_context<DeviceContext>(), *grad,
merged_grad); merged_grad);
const int64_t* rows = nullptr; const int64_t* rows = merged_grad->rows().Data(ctx.GetPlace());
#ifdef PADDLE_WITH_CUDA
if (platform::is_gpu_place(ctx.GetPlace())) {
rows = merged_grad->rows().CUDAData(ctx.GetPlace());
} else {
#endif
rows = merged_grad->rows().data();
#ifdef PADDLE_WITH_CUDA
}
#endif
int64_t row_numel = int64_t row_numel =
merged_grad->value().numel() / merged_grad->rows().size(); merged_grad->value().numel() / merged_grad->rows().size();
platform::ForRange<DeviceContext> for_range( platform::ForRange<DeviceContext> for_range(
......
...@@ -216,24 +216,14 @@ class RmspropOpKernel : public framework::OpKernel<T> { ...@@ -216,24 +216,14 @@ class RmspropOpKernel : public framework::OpKernel<T> {
} }
} else if (grad_var->IsType<framework::SelectedRows>()) { } else if (grad_var->IsType<framework::SelectedRows>()) {
auto &grad = grad_var->Get<framework::SelectedRows>(); auto &grad = grad_var->Get<framework::SelectedRows>();
auto *merged_grad = const_cast<framework::Scope &>(ctx.scope()) framework::SelectedRows tmp_merged_grad;
.Var() framework::SelectedRows *merged_grad = &tmp_merged_grad;
->GetMutable<framework::SelectedRows>();
math::scatter::MergeAdd<DeviceContext, T> merge_func; math::scatter::MergeAdd<DeviceContext, T> merge_func;
merge_func(dev_ctx, grad, merged_grad); merge_func(dev_ctx, grad, merged_grad);
platform::ForRange<DeviceContext> for_range(dev_ctx, limit); platform::ForRange<DeviceContext> for_range(dev_ctx, limit);
const int64_t *rows; const int64_t *rows = merged_grad->rows().Data(ctx.GetPlace());
#ifdef PADDLE_WITH_CUDA
if (platform::is_gpu_place(ctx.GetPlace())) {
rows = merged_grad->rows().CUDAData(ctx.GetPlace());
} else {
#endif
rows = merged_grad->rows().data();
#ifdef PADDLE_WITH_CUDA
}
#endif
auto &merged_tensor = merged_grad->value(); auto &merged_tensor = merged_grad->value();
int64_t row_count = merged_grad->rows().size(); int64_t row_count = merged_grad->rows().size();
int64_t row_numel = merged_tensor.numel() / row_count; int64_t row_numel = merged_tensor.numel() / row_count;
......
...@@ -50,20 +50,18 @@ class SGDOp : public framework::OperatorWithKernel { ...@@ -50,20 +50,18 @@ class SGDOp : public framework::OperatorWithKernel {
class SGDOpInferVarType : public framework::VarTypeInference { class SGDOpInferVarType : public framework::VarTypeInference {
public: public:
void operator()(const framework::OpDesc &op_desc, void operator()(framework::InferVarTypeContext *ctx) const override {
framework::BlockDesc *block) const override { auto &input_var_n = ctx->Input("Param")[0];
auto input_var_n = op_desc.Input("Param")[0]; auto in_var_type = ctx->GetType(input_var_n);
auto in_var_type = block->FindRecursiveOrCreateVar(input_var_n).GetType();
PADDLE_ENFORCE(in_var_type == framework::proto::VarType::SELECTED_ROWS || PADDLE_ENFORCE(in_var_type == framework::proto::VarType::SELECTED_ROWS ||
in_var_type == framework::proto::VarType::LOD_TENSOR, in_var_type == framework::proto::VarType::LOD_TENSOR,
"The input Var's type should be LoDtensor or SelectedRows," "The input Var's type should be LoDtensor or SelectedRows,"
" but the received var(%s)'s type is %s", " but the received var(%s)'s type is %s",
input_var_n, in_var_type); input_var_n, in_var_type);
for (auto &out_var_n : op_desc.Output("ParamOut")) { for (auto &out_var_n : ctx->Output("ParamOut")) {
auto &out_var = block->FindRecursiveOrCreateVar(out_var_n); if (ctx->GetType(out_var_n) != in_var_type) {
if (out_var.GetType() != in_var_type) { ctx->SetType(out_var_n, in_var_type);
out_var.SetType(in_var_type);
} }
} }
} }
......
...@@ -14,8 +14,11 @@ ...@@ -14,8 +14,11 @@
#include "paddle/fluid/operators/py_func_op.h" #include "paddle/fluid/operators/py_func_op.h"
#include <memory>
#include <set> #include <set>
#include <string> #include <string>
#include <unordered_set>
#include <utility>
#include <vector> #include <vector>
#include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/framework/op_registry.h"
...@@ -91,15 +94,12 @@ static void CallPythonFunc(py::object *callable, ...@@ -91,15 +94,12 @@ static void CallPythonFunc(py::object *callable,
} }
} }
class PyFuncOpVarTypInference : public framework::VarTypeInference { class PyFuncOpVarTypeInference : public framework::VarTypeInference {
public: public:
void operator()(const framework::OpDesc &op, void operator()(framework::InferVarTypeContext *ctx) const override {
framework::BlockDesc *block) const override { bool has_out = (ctx->HasOutput("Out") && !ctx->Output("Out").empty());
auto &outs = op.Outputs();
bool has_out = (outs.count("Out") > 0 && !outs.at("Out").empty());
auto &ins = op.Inputs(); bool has_in = (ctx->HasInput("X") && !ctx->Input("X").empty());
bool has_in = (ins.count("X") > 0 && !ins.at("X").empty());
/** /**
* X or Out can be empty, so that py_func can be more flexible * X or Out can be empty, so that py_func can be more flexible
...@@ -107,8 +107,8 @@ class PyFuncOpVarTypInference : public framework::VarTypeInference { ...@@ -107,8 +107,8 @@ class PyFuncOpVarTypInference : public framework::VarTypeInference {
*/ */
PADDLE_ENFORCE(has_in || has_out, "Input(X) or Output(Out) must exist"); PADDLE_ENFORCE(has_in || has_out, "Input(X) or Output(Out) must exist");
PADDLE_ENFORCE_GE(boost::get<int>(op.GetAttr(kForwardPythonCallableId)), 0, PADDLE_ENFORCE_GE(boost::get<int>(ctx->GetAttr(kForwardPythonCallableId)),
"Function id cannot be less than 0"); 0, "Function id cannot be less than 0");
if (!has_out) return; if (!has_out) return;
...@@ -118,7 +118,7 @@ class PyFuncOpVarTypInference : public framework::VarTypeInference { ...@@ -118,7 +118,7 @@ class PyFuncOpVarTypInference : public framework::VarTypeInference {
* the corresponding forward variable * the corresponding forward variable
*/ */
const std::string kGradVarSuffix = framework::kGradVarSuffix; const std::string kGradVarSuffix = framework::kGradVarSuffix;
auto &out_var_names = outs.at("Out"); auto &out_var_names = ctx->Output("Out");
for (auto &out_var_name : out_var_names) { for (auto &out_var_name : out_var_names) {
if (out_var_name == framework::kEmptyVarName || if (out_var_name == framework::kEmptyVarName ||
out_var_name.size() < kGradVarSuffix.size()) { out_var_name.size() < kGradVarSuffix.size()) {
...@@ -128,18 +128,17 @@ class PyFuncOpVarTypInference : public framework::VarTypeInference { ...@@ -128,18 +128,17 @@ class PyFuncOpVarTypInference : public framework::VarTypeInference {
size_t len = out_var_name.size() - kGradVarSuffix.size(); size_t len = out_var_name.size() - kGradVarSuffix.size();
if (out_var_name.substr(len) == kGradVarSuffix) { if (out_var_name.substr(len) == kGradVarSuffix) {
auto fwd_var_name = out_var_name.substr(0, len); auto fwd_var_name = out_var_name.substr(0, len);
auto *out_var_desc = block->FindVarRecursive(out_var_name); PADDLE_ENFORCE(ctx->HasVar(out_var_name),
auto *fwd_var_desc = block->FindVarRecursive(fwd_var_name); "Backward variable %s not found", out_var_name);
PADDLE_ENFORCE_NOT_NULL(out_var_desc, "Backward variable %s not found", PADDLE_ENFORCE(ctx->HasVar(fwd_var_name),
out_var_name); "Backward variable %s not found", fwd_var_name);
PADDLE_ENFORCE_NOT_NULL(fwd_var_desc, "Forward variable %s not found",
fwd_var_name);
VLOG(10) << "Infer var_desc of Output(" << out_var_name << ") as Input(" VLOG(10) << "Infer var_desc of Output(" << out_var_name << ") as Input("
<< fwd_var_name << ")"; << fwd_var_name << ")";
out_var_desc->SetShape(fwd_var_desc->GetShape());
out_var_desc->SetDataType(fwd_var_desc->GetDataType()); ctx->SetShape(out_var_name, ctx->GetShape(fwd_var_name));
out_var_desc->SetLoDLevel(fwd_var_desc->GetLoDLevel()); ctx->SetDataType(out_var_name, ctx->GetDataType(fwd_var_name));
out_var_desc->SetType(fwd_var_desc->GetType()); ctx->SetLoDLevel(out_var_name, ctx->GetLoDLevel(fwd_var_name));
ctx->SetType(out_var_name, ctx->GetType(fwd_var_name));
} }
} }
} }
...@@ -309,5 +308,5 @@ class PyFuncOp : public framework::OperatorBase { ...@@ -309,5 +308,5 @@ class PyFuncOp : public framework::OperatorBase {
namespace ops = paddle::operators; namespace ops = paddle::operators;
REGISTER_OPERATOR(py_func, ops::PyFuncOp, ops::PyFuncOpMaker, REGISTER_OPERATOR(py_func, ops::PyFuncOp, ops::PyFuncOpMaker,
ops::PyFuncOpVarTypInference, ops::PyFuncOpShapeInference, ops::PyFuncOpVarTypeInference, ops::PyFuncOpShapeInference,
ops::PyFuncOpGradDescMaker); ops::PyFuncOpGradDescMaker);
...@@ -85,10 +85,10 @@ class CreateCustomReaderOpMaker : public DecoratedReaderMakerBase { ...@@ -85,10 +85,10 @@ class CreateCustomReaderOpMaker : public DecoratedReaderMakerBase {
AddComment(R"DOC( AddComment(R"DOC(
CreateCustomReader Operator CreateCustomReader Operator
A custom reader can be used for input data preprocessing. A custom reader can be used for input data preprocessing.
A custom reader holds its own sub-block, which will be executed in CPU A custom reader holds its own sub-block, which will be executed in CPU
in its 'ReadNext()' function. Users can configurate their own in its 'ReadNext()' function. Users can configurate their own
preprocessing pipelines by inserting operators into custom reader's preprocessing pipelines by inserting operators into custom reader's
sub-block. sub-block.
)DOC"); )DOC");
} }
...@@ -123,23 +123,22 @@ class CustomReaderInferShape : public framework::InferShapeBase { ...@@ -123,23 +123,22 @@ class CustomReaderInferShape : public framework::InferShapeBase {
class CustomReaderInferVarType : public framework::VarTypeInference { class CustomReaderInferVarType : public framework::VarTypeInference {
public: public:
void operator()(const framework::OpDesc& op_desc, void operator()(framework::InferVarTypeContext* ctx) const override {
framework::BlockDesc* block) const override { auto& out_var_name = ctx->Output("Out")[0];
framework::VarDesc* out_reader = block->FindVar(op_desc.Output("Out")[0]); PADDLE_ENFORCE(ctx->HasVar(out_var_name));
PADDLE_ENFORCE_NOT_NULL(out_reader); ctx->SetType(out_var_name, framework::proto::VarType::READER);
out_reader->SetType(framework::proto::VarType::READER);
auto sink_var_names = auto sink_var_names =
boost::get<std::vector<std::string>>(op_desc.GetAttr("sink_var_names")); boost::get<std::vector<std::string>>(ctx->GetAttr("sink_var_names"));
const auto* sub_block = const auto* sub_block =
boost::get<framework::BlockDesc*>(op_desc.GetAttr("sub_block")); boost::get<framework::BlockDesc*>(ctx->GetAttr("sub_block"));
std::vector<framework::proto::VarType::Type> res_data_types; std::vector<framework::proto::VarType::Type> res_data_types;
for (const std::string& var_name : sink_var_names) { for (const std::string& var_name : sink_var_names) {
framework::VarDesc* var = sub_block->FindVar(var_name); framework::VarDesc* var = sub_block->FindVar(var_name);
PADDLE_ENFORCE_NOT_NULL(var); PADDLE_ENFORCE_NOT_NULL(var);
res_data_types.emplace_back(var->GetDataType()); res_data_types.emplace_back(var->GetDataType());
} }
out_reader->SetDataTypes(res_data_types); ctx->SetDataTypes(out_var_name, res_data_types);
} }
}; };
......
...@@ -51,19 +51,16 @@ class ReadInferShape : public framework::InferShapeBase { ...@@ -51,19 +51,16 @@ class ReadInferShape : public framework::InferShapeBase {
class ReadInferVarType : public framework::VarTypeInference { class ReadInferVarType : public framework::VarTypeInference {
public: public:
void operator()(const framework::OpDesc& op_desc, void operator()(framework::InferVarTypeContext* ctx) const override {
framework::BlockDesc* block) const override { bool infer_out = boost::get<bool>(ctx->GetAttr("infer_out"));
bool infer_out = boost::get<bool>(op_desc.GetAttr("infer_out"));
if (infer_out) { if (infer_out) {
std::string reader_name = op_desc.Input("Reader")[0]; std::string reader_name = ctx->Input("Reader")[0];
std::vector<std::string> out_names = op_desc.Output("Out"); std::vector<std::string> out_names = ctx->Output("Out");
framework::VarDesc* reader = block->FindVarRecursive(reader_name); auto dtypes = ctx->GetDataTypes(reader_name);
auto dtypes = reader->GetDataTypes();
PADDLE_ENFORCE_EQ(dtypes.size(), out_names.size()); PADDLE_ENFORCE_EQ(dtypes.size(), out_names.size());
for (size_t i = 0; i < dtypes.size(); ++i) { for (size_t i = 0; i < dtypes.size(); ++i) {
framework::VarDesc& out = block->FindRecursiveOrCreateVar(out_names[i]); ctx->SetType(out_names[i], framework::proto::VarType::LOD_TENSOR);
out.SetType(framework::proto::VarType::LOD_TENSOR); ctx->SetDataType(out_names[i], dtypes[i]);
out.SetDataType(dtypes[i]);
} }
} }
} }
......
...@@ -98,11 +98,10 @@ void FileReaderInferShape::operator()(framework::InferShapeContext* ctx) const { ...@@ -98,11 +98,10 @@ void FileReaderInferShape::operator()(framework::InferShapeContext* ctx) const {
} }
} }
void FileReaderInferVarType::operator()(const framework::OpDesc& op_desc, void FileReaderInferVarType::operator()(
framework::BlockDesc* block) const { framework::InferVarTypeContext* ctx) const {
std::string reader_name = op_desc.Output("Out")[0]; std::string reader_name = ctx->Output("Out")[0];
framework::VarDesc* reader = block->FindVarRecursive(reader_name); ctx->SetType(reader_name, framework::proto::VarType::READER);
reader->SetType(framework::proto::VarType::READER);
} }
void DecoratedReaderInferShape::operator()( void DecoratedReaderInferShape::operator()(
...@@ -125,13 +124,11 @@ void DecoratedReaderInferShape::operator()( ...@@ -125,13 +124,11 @@ void DecoratedReaderInferShape::operator()(
} }
void DecoratedReaderInferVarType::operator()( void DecoratedReaderInferVarType::operator()(
const framework::OpDesc& op_desc, framework::BlockDesc* block) const { framework::InferVarTypeContext* ctx) const {
std::string in_reader_name = op_desc.Input("UnderlyingReader")[0]; const std::string& in_reader_name = ctx->Input("UnderlyingReader")[0];
framework::VarDesc* in_reader = block->FindVarRecursive(in_reader_name); const std::string& out_reader_name = ctx->Output("Out")[0];
std::string out_reader_name = op_desc.Output("Out")[0]; ctx->SetType(out_reader_name, framework::proto::VarType::READER);
framework::VarDesc* out_reader = block->FindVarRecursive(out_reader_name); ctx->SetDataTypes(out_reader_name, ctx->GetDataTypes(in_reader_name));
out_reader->SetType(framework::proto::VarType::READER);
out_reader->SetDataTypes(in_reader->GetDataTypes());
} }
void DecoratedReaderMakerBase::Make() { void DecoratedReaderMakerBase::Make() {
......
...@@ -14,7 +14,9 @@ ...@@ -14,7 +14,9 @@
#pragma once #pragma once
#include <memory>
#include <string> #include <string>
#include <unordered_map>
#include <vector> #include <vector>
#include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/framework/reader.h" #include "paddle/fluid/framework/reader.h"
...@@ -59,8 +61,7 @@ class FileReaderInferShape : public framework::InferShapeBase { ...@@ -59,8 +61,7 @@ class FileReaderInferShape : public framework::InferShapeBase {
class FileReaderInferVarType : public framework::VarTypeInference { class FileReaderInferVarType : public framework::VarTypeInference {
public: public:
void operator()(const framework::OpDesc& op_desc, void operator()(framework::InferVarTypeContext* ctx) const override;
framework::BlockDesc* block) const override;
}; };
// general infershape for decorated reader // general infershape for decorated reader
...@@ -72,8 +73,7 @@ class DecoratedReaderInferShape : public framework::InferShapeBase { ...@@ -72,8 +73,7 @@ class DecoratedReaderInferShape : public framework::InferShapeBase {
// general var type inference for decorated reader // general var type inference for decorated reader
class DecoratedReaderInferVarType : public framework::VarTypeInference { class DecoratedReaderInferVarType : public framework::VarTypeInference {
public: public:
void operator()(const framework::OpDesc& op_desc, void operator()(framework::InferVarTypeContext* ctx) const override;
framework::BlockDesc* block) const override;
}; };
class DecoratedReaderMakerBase : public framework::OpProtoAndCheckerMaker { class DecoratedReaderMakerBase : public framework::OpProtoAndCheckerMaker {
......
...@@ -159,12 +159,9 @@ This operator will serialize and write LoDTensor / SelectedRows variable to file ...@@ -159,12 +159,9 @@ This operator will serialize and write LoDTensor / SelectedRows variable to file
class SaveOpVarTypeInference : public framework::VarTypeInference { class SaveOpVarTypeInference : public framework::VarTypeInference {
public: public:
void operator()(const framework::OpDesc &op_desc, void operator()(framework::InferVarTypeContext *ctx) const override {
framework::BlockDesc *block) const override { auto out_var_name = ctx->Output(LOOKUP_TABLE_PATH).front();
auto out_var_name = op_desc.Output(LOOKUP_TABLE_PATH).front(); ctx->SetType(out_var_name, framework::proto::VarType::RAW);
auto &out_var = block->FindRecursiveOrCreateVar(out_var_name);
auto var_type = framework::proto::VarType::RAW;
out_var.SetType(var_type);
} }
}; };
......
...@@ -14,6 +14,7 @@ limitations under the License. */ ...@@ -14,6 +14,7 @@ limitations under the License. */
#include "paddle/fluid/operators/scale_op.h" #include "paddle/fluid/operators/scale_op.h"
#include <memory>
#include <string> #include <string>
#include "paddle/fluid/operators/detail/safe_ref.h" #include "paddle/fluid/operators/detail/safe_ref.h"
...@@ -69,17 +70,13 @@ $$Out = scale*(X + bias)$$ ...@@ -69,17 +70,13 @@ $$Out = scale*(X + bias)$$
class ScaleOpVarTypeInference : public framework::VarTypeInference { class ScaleOpVarTypeInference : public framework::VarTypeInference {
public: public:
void operator()(const framework::OpDesc &op_desc, void operator()(framework::InferVarTypeContext *ctx) const override {
framework::BlockDesc *block) const override { auto &in_var_name = ctx->Input("X").front();
auto &in_var_name = op_desc.Input("X").front(); auto out_var_name = ctx->Output("Out").front();
auto &in_var = detail::Ref(block->FindVarRecursive(in_var_name));
auto out_var_name = op_desc.Output("Out").front();
auto *out_var = block->FindVarRecursive(out_var_name);
if (in_var_name != out_var_name) { if (in_var_name != out_var_name) {
out_var->SetType(in_var.GetType()); ctx->SetType(out_var_name, ctx->GetType(in_var_name));
out_var->SetDataType(in_var.GetDataType()); ctx->SetDataType(out_var_name, ctx->GetDataType(in_var_name));
} }
} }
}; };
......
...@@ -30,13 +30,6 @@ class SequenceEnumerateOp : public framework::OperatorWithKernel { ...@@ -30,13 +30,6 @@ class SequenceEnumerateOp : public framework::OperatorWithKernel {
"Output(X) of SequenceEnumerate operator should not be null."); "Output(X) of SequenceEnumerate operator should not be null.");
const auto x_dims = ctx->GetInputDim("X"); const auto x_dims = ctx->GetInputDim("X");
PADDLE_ENFORCE_EQ(
x_dims.size(), 2,
"Input(X) of SequenceEnumerate operator's rank should be 2.");
PADDLE_ENFORCE_EQ(x_dims[1], 1,
"Input(X) of SequenceEnumerate operator's 2nd "
"dimension should be 1.");
const auto win_size = ctx->Attrs().Get<int>("win_size"); const auto win_size = ctx->Attrs().Get<int>("win_size");
ctx->SetOutputDim("Out", {x_dims[0], win_size}); ctx->SetOutputDim("Out", {x_dims[0], win_size});
ctx->ShareLoD("X", "Out"); ctx->ShareLoD("X", "Out");
...@@ -59,7 +52,8 @@ class SequenceEnumerateOpMaker : public framework::OpProtoAndCheckerMaker { ...@@ -59,7 +52,8 @@ class SequenceEnumerateOpMaker : public framework::OpProtoAndCheckerMaker {
}); });
AddAttr<int>("pad_value", "(int) The enumerate sequence padding value.") AddAttr<int>("pad_value", "(int) The enumerate sequence padding value.")
.SetDefault(0); .SetDefault(0);
AddAttr<bool>(framework::kAllKernelsMustComputeRuntimeShape, "") AddAttr<bool>(framework::kAllKernelsMustComputeRuntimeShape,
"Skip calling InferShape() function in the runtime.")
.SetDefault(true); .SetDefault(true);
AddComment(R"DOC( AddComment(R"DOC(
Sequence Enumerate Operator. Sequence Enumerate Operator.
......
...@@ -27,30 +27,47 @@ class SequenceEnumerateKernel : public framework::OpKernel<T> { ...@@ -27,30 +27,47 @@ class SequenceEnumerateKernel : public framework::OpKernel<T> {
auto* in = context.Input<LoDTensor>("X"); auto* in = context.Input<LoDTensor>("X");
auto* out = context.Output<LoDTensor>("Out"); auto* out = context.Output<LoDTensor>("Out");
int win_size = context.Attr<int>("win_size"); int win_size = context.Attr<int>("win_size");
int pad_value = context.Attr<int>("pad_value"); auto pad_value = static_cast<T>(context.Attr<int>("pad_value"));
auto in_dims = in->dims(); auto in_dims = in->dims();
auto in_lod = in->lod(); auto lod0 = in->lod()[0];
PADDLE_ENFORCE_EQ( PADDLE_ENFORCE_EQ(
static_cast<uint64_t>(in_dims[0]), in_lod[0].back(), static_cast<uint64_t>(in_dims[0]), lod0.back(),
"The actual input data's size mismatched with LoD information."); "The actual input data's size mismatched with LoD information.");
PADDLE_ENFORCE_EQ(
in_dims.size(), 2UL,
"Input(X) of SequenceEnumerate operator's rank should be 2.");
PADDLE_ENFORCE_EQ(in_dims[1], 1,
"Input(X) of SequenceEnumerate operator's 2nd "
"dimension should be 1.");
// Generate enumerate sequence set // Generate enumerate sequence set
auto lod0 = in_lod[0];
auto in_data = in->data<T>(); auto in_data = in->data<T>();
out->Resize({in_dims[0], win_size}); out->Resize({in_dims[0], win_size});
out->set_lod(in->lod());
auto out_data = out->mutable_data<T>(context.GetPlace()); auto out_data = out->mutable_data<T>(context.GetPlace());
for (size_t i = 0; i < lod0.size() - 1; ++i) { for (size_t i = 0; i < lod0.size() - 1; ++i) {
for (size_t idx = lod0[i]; idx < lod0[i + 1]; ++idx) { int start = lod0[i];
for (int word_idx = 0; word_idx < win_size; ++word_idx) { int end = lod0[i + 1];
size_t word_pos = idx + word_idx; int copy_size = win_size < end - start + 1 ? win_size : end - start + 1;
out_data[win_size * idx + word_idx] = int mid = end + 1 - copy_size;
word_pos < lod0[i + 1] ? in_data[word_pos] : pad_value; int pad_num = win_size - copy_size;
copy_size *= sizeof(T);
for (int idx = start; idx < mid; ++idx) {
std::memcpy(out_data, in_data + idx, copy_size);
out_data += win_size;
}
for (int idx = mid; idx < end; ++idx) {
copy_size -= sizeof(T);
pad_num++;
std::memcpy(out_data, in_data + idx, copy_size);
T* pdata = out_data + copy_size / sizeof(T);
for (int i = 0; i < pad_num; ++i) {
pdata[i] = pad_value;
} }
out_data += win_size;
} }
} }
out->set_lod(in->lod());
} }
}; };
......
...@@ -31,18 +31,18 @@ __global__ void Padding(const paddle::platform::float16* d_out, ...@@ -31,18 +31,18 @@ __global__ void Padding(const paddle::platform::float16* d_out,
paddle::platform::float16* d_in) { paddle::platform::float16* d_in) {
int64_t out_idx = threadIdx.x + blockDim.x * blockIdx.x; int64_t out_idx = threadIdx.x + blockDim.x * blockIdx.x;
if (out_idx < n) { if (out_idx < n) {
int64_t out_idx_tmp = out_idx;
int coords[D] = {0}; int coords[D] = {0};
for (int i = D - 1; i >= 0; --i) { for (int i = D - 1; i >= 0; --i) {
coords[i] = out_idx % out_dims[i]; coords[i] = out_idx_tmp % out_dims[i];
out_idx /= out_dims[i]; out_idx_tmp /= out_dims[i];
coords[i] += offsets[i]; coords[i] += offsets[i];
} }
int64_t in_idx = 0; int64_t in_idx = 0;
for (int i = 0; i < D - 1; ++i) { for (int i = 0; i < D; ++i) {
in_idx += coords[i] * in_dims[i + 1]; in_idx = in_idx * in_dims[i] + coords[i];
} }
in_idx += coords[D - 1];
d_in[in_idx] = d_out[out_idx]; d_in[in_idx] = d_out[out_idx];
} }
...@@ -80,8 +80,8 @@ class SliceGradKernel<paddle::platform::CUDADeviceContext, ...@@ -80,8 +80,8 @@ class SliceGradKernel<paddle::platform::CUDADeviceContext,
set_zero(dev_ctx, d_in, static_cast<paddle::platform::float16>(0)); set_zero(dev_ctx, d_in, static_cast<paddle::platform::float16>(0));
int64_t numel = d_out->numel(); int64_t numel = d_out->numel();
dim3 blocks((numel - 1) / PADDLE_CUDA_NUM_THREADS + 1, 1, 1); dim3 blocks((numel - 1) / PADDLE_CUDA_NUM_THREADS + 1);
dim3 threads(PADDLE_CUDA_NUM_THREADS, 1, 1); dim3 threads(PADDLE_CUDA_NUM_THREADS);
auto stream = ctx.cuda_device_context().stream(); auto stream = ctx.cuda_device_context().stream();
auto out_shape = framework::vectorize2int(out_dims); auto out_shape = framework::vectorize2int(out_dims);
......
...@@ -14,6 +14,8 @@ limitations under the License. */ ...@@ -14,6 +14,8 @@ limitations under the License. */
#include "paddle/fluid/operators/split_selected_rows_op.h" #include "paddle/fluid/operators/split_selected_rows_op.h"
#include <memory>
namespace paddle { namespace paddle {
namespace operators { namespace operators {
...@@ -60,10 +62,9 @@ class SplitSelectedRowsOp : public framework::OperatorWithKernel { ...@@ -60,10 +62,9 @@ class SplitSelectedRowsOp : public framework::OperatorWithKernel {
class SplitSelectedRowsOpInferVarType : public framework::VarTypeInference { class SplitSelectedRowsOpInferVarType : public framework::VarTypeInference {
public: public:
void operator()(const framework::OpDesc &op_desc, void operator()(framework::InferVarTypeContext *ctx) const override {
framework::BlockDesc *block) const override { for (auto &out_var : ctx->Output("Out")) {
for (auto &out_var : op_desc.Output("Out")) { ctx->SetType(out_var, framework::proto::VarType::SELECTED_ROWS);
block->Var(out_var)->SetType(framework::proto::VarType::SELECTED_ROWS);
} }
} }
}; };
......
...@@ -12,6 +12,7 @@ limitations under the License. */ ...@@ -12,6 +12,7 @@ limitations under the License. */
#include "paddle/fluid/operators/sum_op.h" #include "paddle/fluid/operators/sum_op.h"
#include <algorithm> #include <algorithm>
#include <memory>
#include <string> #include <string>
#include <vector> #include <vector>
...@@ -159,24 +160,20 @@ the LoD information with the first input. ...@@ -159,24 +160,20 @@ the LoD information with the first input.
class SumOpVarTypeInference : public framework::VarTypeInference { class SumOpVarTypeInference : public framework::VarTypeInference {
public: public:
void operator()(const framework::OpDesc& op_desc, void operator()(framework::InferVarTypeContext* ctx) const override {
framework::BlockDesc* block) const override { auto& inputs = ctx->Input("X");
auto& inputs = op_desc.Input("X");
auto var_type = framework::proto::VarType::SELECTED_ROWS; auto var_type = framework::proto::VarType::SELECTED_ROWS;
for (auto& name : op_desc.Input("X")) { for (auto& name : ctx->Input("X")) {
VLOG(10) << name << " " VLOG(10) << name << " " << ctx->GetType(name);
<< block->FindRecursiveOrCreateVar(name).GetType();
} }
bool any_input_is_lod_tensor = std::any_of( bool any_input_is_lod_tensor = std::any_of(
inputs.begin(), inputs.end(), [block](const std::string& name) { inputs.begin(), inputs.end(), [ctx](const std::string& name) {
return block->FindRecursiveOrCreateVar(name).GetType() == return ctx->GetType(name) == framework::proto::VarType::LOD_TENSOR;
framework::proto::VarType::LOD_TENSOR;
}); });
auto is_tensor_array = [block](const std::string& name) { auto is_tensor_array = [ctx](const std::string& name) {
return block->FindRecursiveOrCreateVar(name).GetType() == return ctx->GetType(name) == framework::proto::VarType::LOD_TENSOR_ARRAY;
framework::proto::VarType::LOD_TENSOR_ARRAY;
}; };
bool any_input_is_tensor_array = bool any_input_is_tensor_array =
...@@ -188,8 +185,7 @@ class SumOpVarTypeInference : public framework::VarTypeInference { ...@@ -188,8 +185,7 @@ class SumOpVarTypeInference : public framework::VarTypeInference {
if (!all_inputs_are_tensor_array) { if (!all_inputs_are_tensor_array) {
std::ostringstream os; std::ostringstream os;
for (auto& each : inputs) { for (auto& each : inputs) {
os << " " << each << " type is " os << " " << each << " type is " << ctx->GetType(each) << "\n";
<< block->FindRecursiveOrCreateVar(each).GetType() << "\n";
} }
PADDLE_ENFORCE(all_inputs_are_tensor_array, PADDLE_ENFORCE(all_inputs_are_tensor_array,
"Not all inputs are tensor array:\n%s", os.str()); "Not all inputs are tensor array:\n%s", os.str());
...@@ -199,11 +195,9 @@ class SumOpVarTypeInference : public framework::VarTypeInference { ...@@ -199,11 +195,9 @@ class SumOpVarTypeInference : public framework::VarTypeInference {
var_type = framework::proto::VarType::LOD_TENSOR; var_type = framework::proto::VarType::LOD_TENSOR;
} }
auto out_var_name = op_desc.Output("Out").front(); auto out_var_name = ctx->Output("Out").front();
auto& out_var = block->FindRecursiveOrCreateVar(out_var_name); ctx->SetType(out_var_name, var_type);
out_var.SetType(var_type); ctx->SetDataType(out_var_name, ctx->GetDataType(inputs.front()));
auto& in_var = detail::Ref(block->FindVarRecursive(inputs.front()));
out_var.SetDataType(in_var.GetDataType());
} }
}; };
......
...@@ -177,10 +177,9 @@ class LoDTensorArray2TensorGradInferShape : public framework::InferShapeBase { ...@@ -177,10 +177,9 @@ class LoDTensorArray2TensorGradInferShape : public framework::InferShapeBase {
class LoDTensorArray2TensorGradInferVarType class LoDTensorArray2TensorGradInferVarType
: public framework::VarTypeInference { : public framework::VarTypeInference {
public: public:
void operator()(const framework::OpDesc &op_desc, void operator()(framework::InferVarTypeContext *ctx) const override {
framework::BlockDesc *block) const override { for (auto &out_var : ctx->Output(framework::GradVarName("X"))) {
for (auto &out_var : op_desc.Output(framework::GradVarName("X"))) { ctx->SetType(out_var, framework::proto::VarType::LOD_TENSOR_ARRAY);
block->Var(out_var)->SetType(framework::proto::VarType::LOD_TENSOR_ARRAY);
} }
} }
}; };
......
...@@ -46,8 +46,7 @@ class TensorRTEngineOpMaker : public framework::OpProtoAndCheckerMaker { ...@@ -46,8 +46,7 @@ class TensorRTEngineOpMaker : public framework::OpProtoAndCheckerMaker {
class TensorRTEngineInferVarType : public framework::VarTypeInference { class TensorRTEngineInferVarType : public framework::VarTypeInference {
public: public:
void operator()(const framework::OpDesc &op_desc, void operator()(framework::InferVarTypeContext *ctx) const override {}
framework::BlockDesc *block) const override {}
}; };
} // namespace operators } // namespace operators
......
...@@ -112,17 +112,16 @@ uniform distribution. The random result is in set [min, max]. ...@@ -112,17 +112,16 @@ uniform distribution. The random result is in set [min, max].
class UniformRandomOpVarTypeInference : public framework::VarTypeInference { class UniformRandomOpVarTypeInference : public framework::VarTypeInference {
public: public:
void operator()(const framework::OpDesc &op_desc, void operator()(framework::InferVarTypeContext *ctx) const override {
framework::BlockDesc *block) const override { auto out_var_name = ctx->Output("Out").front();
auto out_var_name = op_desc.Output("Out").front();
auto var_data_type = static_cast<framework::proto::VarType::Type>( auto var_data_type = static_cast<framework::proto::VarType::Type>(
boost::get<int>(op_desc.GetAttr("dtype"))); boost::get<int>(ctx->GetAttr("dtype")));
auto out_var = block->FindRecursiveOrCreateVar(out_var_name); if (ctx->GetType(out_var_name) !=
if (out_var.GetType() != framework::proto::VarType::SELECTED_ROWS) { framework::proto::VarType::SELECTED_ROWS) {
out_var.SetType(framework::proto::VarType::LOD_TENSOR); ctx->SetType(out_var_name, framework::proto::VarType::LOD_TENSOR);
} }
out_var.SetDataType(var_data_type); ctx->SetDataType(out_var_name, var_data_type);
} }
}; };
......
set(PYBIND_DEPS pybind python proto_desc memory executor async_executor prune set(PYBIND_DEPS pybind python proto_desc memory executor async_executor prune
feed_fetch_method pass_builder parallel_executor profiler layer scope_pool feed_fetch_method pass_builder parallel_executor profiler layer scope_pool
tracer analysis_predictor) tracer analysis_predictor imperative_profiler)
if(WITH_PYTHON) if(WITH_PYTHON)
list(APPEND PYBIND_DEPS py_func_op) list(APPEND PYBIND_DEPS py_func_op)
......
...@@ -38,20 +38,22 @@ void BindTracer(pybind11::module* m) { ...@@ -38,20 +38,22 @@ void BindTracer(pybind11::module* m) {
.def("trace", .def("trace",
[](imperative::Tracer& self, imperative::OpBase* op, [](imperative::Tracer& self, imperative::OpBase* op,
const imperative::VarBasePtrMap& inputs, const imperative::VarBasePtrMap& inputs,
const imperative::VarBasePtrMap& outputs, imperative::VarBasePtrMap* outputs,
framework::AttributeMap attrs_map, framework::AttributeMap attrs_map,
const platform::CPUPlace expected_place, const platform::CPUPlace expected_place,
const bool stop_gradient = false) { const bool stop_gradient = false) {
pybind11::gil_scoped_release release;
return self.Trace(op, inputs, outputs, attrs_map, expected_place, return self.Trace(op, inputs, outputs, attrs_map, expected_place,
stop_gradient); stop_gradient);
}) })
.def("trace", .def("trace",
[](imperative::Tracer& self, imperative::OpBase* op, [](imperative::Tracer& self, imperative::OpBase* op,
const imperative::VarBasePtrMap& inputs, const imperative::VarBasePtrMap& inputs,
const imperative::VarBasePtrMap& outputs, imperative::VarBasePtrMap* outputs,
framework::AttributeMap attrs_map, framework::AttributeMap attrs_map,
const platform::CUDAPlace expected_place, const platform::CUDAPlace expected_place,
const bool stop_gradient = false) { const bool stop_gradient = false) {
pybind11::gil_scoped_release release;
return self.Trace(op, inputs, outputs, attrs_map, expected_place, return self.Trace(op, inputs, outputs, attrs_map, expected_place,
stop_gradient); stop_gradient);
}) })
......
...@@ -36,6 +36,7 @@ limitations under the License. */ ...@@ -36,6 +36,7 @@ limitations under the License. */
#include "paddle/fluid/framework/selected_rows.h" #include "paddle/fluid/framework/selected_rows.h"
#include "paddle/fluid/framework/version.h" #include "paddle/fluid/framework/version.h"
#include "paddle/fluid/imperative/layer.h" #include "paddle/fluid/imperative/layer.h"
#include "paddle/fluid/imperative/profiler.h"
#include "paddle/fluid/memory/allocation/allocator_strategy.h" #include "paddle/fluid/memory/allocation/allocator_strategy.h"
#include "paddle/fluid/memory/allocation/legacy_allocator.h" #include "paddle/fluid/memory/allocation/legacy_allocator.h"
#include "paddle/fluid/operators/activation_op.h" #include "paddle/fluid/operators/activation_op.h"
...@@ -156,6 +157,11 @@ PYBIND11_MODULE(core, m) { ...@@ -156,6 +157,11 @@ PYBIND11_MODULE(core, m) {
m.def("print_mem_usage", m.def("print_mem_usage",
[]() { return memory::allocation::GPUMemMonitor.PrintMemUsage(); }); []() { return memory::allocation::GPUMemMonitor.PrintMemUsage(); });
m.def("start_imperative_gperf_profiler",
[]() { imperative::StartProfile(); });
m.def("stop_imperative_gperf_profiler", []() { imperative::StopProfile(); });
py::class_<imperative::VarBase>(m, "VarBase", R"DOC()DOC") py::class_<imperative::VarBase>(m, "VarBase", R"DOC()DOC")
.def( .def(
py::init<const std::string &, paddle::framework::proto::VarType::Type, py::init<const std::string &, paddle::framework::proto::VarType::Type,
...@@ -194,7 +200,7 @@ PYBIND11_MODULE(core, m) { ...@@ -194,7 +200,7 @@ PYBIND11_MODULE(core, m) {
.def_property("name", &imperative::VarBase::Name, .def_property("name", &imperative::VarBase::Name,
&imperative::VarBase::SetName) &imperative::VarBase::SetName)
.def_property_readonly("shape", &imperative::VarBase::Shape) .def_property_readonly("shape", &imperative::VarBase::Shape)
.def_property_readonly("dtype", &imperative::VarBase::DType) .def_property_readonly("dtype", &imperative::VarBase::DataType)
.def_property("persistable", &imperative::VarBase::IsPersistable, .def_property("persistable", &imperative::VarBase::IsPersistable,
&imperative::VarBase::SetPersistable) &imperative::VarBase::SetPersistable)
.def_property("stop_gradient", &imperative::VarBase::IsStopGradient, .def_property("stop_gradient", &imperative::VarBase::IsStopGradient,
......
...@@ -132,7 +132,8 @@ def __bootstrap__(): ...@@ -132,7 +132,8 @@ def __bootstrap__():
'allocator_strategy', 'reader_queue_speed_test_mode', 'allocator_strategy', 'reader_queue_speed_test_mode',
'print_sub_graph_dir', 'pe_profile_fname', 'warpctc_dir', 'print_sub_graph_dir', 'pe_profile_fname', 'warpctc_dir',
'inner_op_parallelism', 'enable_parallel_graph', 'inner_op_parallelism', 'enable_parallel_graph',
'multiple_of_cupti_buffer_size', 'enable_subgraph_optimize' 'multiple_of_cupti_buffer_size', 'enable_subgraph_optimize',
'tracer_profile_fname'
] ]
if 'Darwin' not in sysstr: if 'Darwin' not in sysstr:
read_env_flags.append('use_pinned_memory') read_env_flags.append('use_pinned_memory')
......
...@@ -18,6 +18,7 @@ import os ...@@ -18,6 +18,7 @@ import os
import time import time
import logging import logging
import paddle
from paddle.fluid import core from paddle.fluid import core
from paddle.fluid import io from paddle.fluid import io
from paddle.fluid import Program from paddle.fluid import Program
...@@ -84,8 +85,9 @@ def convert_dist_to_sparse_program(program): ...@@ -84,8 +85,9 @@ def convert_dist_to_sparse_program(program):
when we train model with distributed lookup table but want to do the local inference, we can use when we train model with distributed lookup table but want to do the local inference, we can use
this function to convert the train program with distributed lookup table to sparse lookup table. this function to convert the train program with distributed lookup table to sparse lookup table.
:param program(Program): the program must be the trainer program, which will be get by the distribute transpiler. Args:
:return: program(Program): the program must be the trainer program, which will be get by the distribute transpiler.
Returns:
program: The `program` is a Program, it's the program replace distributed lookup table to sparse lookup table. program: The `program` is a Program, it's the program replace distributed lookup table to sparse lookup table.
""" """
if not program._distributed_lookup_table: if not program._distributed_lookup_table:
...@@ -128,68 +130,92 @@ def convert_dist_to_sparse_program(program): ...@@ -128,68 +130,92 @@ def convert_dist_to_sparse_program(program):
return program return program
def _load_persistable_vars(executor, dirname, program, lookup_table_vars):
def _is_checkpoint_var(exclude_fluid_vars=None):
"""
the checkpoint will not save or load all the variables.
var type is FEED_MINIBATCH/FETCH_LIST/RAW or var name ends with @GRAD are discarded.
: param var(Variable)
"""
if exclude_fluid_vars is None:
exclude_fluid_vars = []
def is_valid(var):
if var.desc.type() == core.VarDesc.VarType.FEED_MINIBATCH or \
var.desc.type() == core.VarDesc.VarType.FETCH_LIST or \
var.desc.type() == core.VarDesc.VarType.RAW:
return False
# @GRAD are named for gradient variables, checkpoint will not save it.
if "@GRAD" in var.name:
return False
# .trainer_ are named for distribute train variables, checkpoint will not save it.
if ".trainer_" in var.name:
return False
# .block is named for distribute train variables, checkpoint will not save it.
if ".block" in var.name:
return False
if "tmp_" in var.name:
return False
if var.name in exclude_fluid_vars:
return False
return var.persistable
return is_valid
io.load_vars(
executor,
dirname=dirname,
main_program=program,
predicate=_is_checkpoint_var(lookup_table_vars),
filename=None)
def load_persistables_for_increment(dirname, executor, program, def load_persistables_for_increment(dirname, executor, program,
lookup_table_var, lookup_table_var_path): lookup_table_var, lookup_table_var_path):
""" """
WARNING: this function will only be used for distributed training with distributed lookup table. WARNING: this function will only be used for distributed training with distributed lookup table.
for increment trainning, the pserver will not only load dense variables, for increment trainning, the pserver will not only load dense variables,
but also load the suitable lookup table var. Because of slice lookup table but also load the suitable lookup table var. Because of sliced lookup table
var with HASH, we must load the correct slice var. var with HASH, we must load the correct sliced var.
Args:
dirname(str): The directory path
executor(Executor): The executor to run for loading inference model.
program(Program): The parameter server program, which will run on Pserver.
lookup_table_var: the distributed lookup tables var name.
lookup_table_var_path: the the distributed lookup tables var location.
Returns:
None
"""
def _load_persistable_vars(executor, dirname, need_load_vars):
load_prog = Program()
load_block = load_prog.global_block()
need_delete_vars = []
for param in need_load_vars:
origin_var = param.origin
slice_var = param.slice
is_slice = param.is_slice
offset = param.offset
if is_slice:
origin = load_block.create_var(
name="{}.load".format(origin_var.name),
type=origin_var.type,
shape=origin_var.shape,
dtype=origin_var.dtype,
persistable=True)
load_block.append_op(
type='load',
inputs={},
outputs={'Out': [origin]},
attrs={
'file_path': os.path.join(dirname, origin_var.name)
})
slice = load_block.create_var(
name=slice_var.name,
type=slice_var.type,
shape=slice_var.shape,
dtype=slice_var.dtype,
persistable=True)
dim1_flatten = reduce(lambda x, y: x * y, slice.shape[1:])
start = int(offset / dim1_flatten)
end = int(offset / dim1_flatten + slice.shape[0])
load_block.append_op(
type="slice",
inputs={'Input': origin},
outputs={'Out': slice},
attrs={'axes': [0],
'starts': [start],
'ends': [end]})
need_delete_vars.append(origin)
else:
origin = load_block.create_var(
name="{}".format(origin_var.name),
type=origin_var.type,
shape=origin_var.shape,
dtype=origin_var.dtype,
persistable=True)
load_block.append_op(
type='load',
inputs={},
outputs={'Out': [origin]},
attrs={
'file_path': os.path.join(dirname, origin_var.name)
})
:param dirname(str): The directory path load_block.append_op(
:param executor(Executor): The executor to run for loading inference model. type='delete_var',
:param program(Program): The parameter server program, which will run on Pserver. inputs={'X': need_delete_vars}, )
:param lookup_table_var: the distributed lookup tables var name.
:param lookup_table_var_path: the the distributed lookup tables var location. executor.run(load_prog)
:return: None
"""
def __load_lookup_table_vars(executor, main_program, lookup_table_var, def __load_lookup_table_vars(executor, main_program, lookup_table_var,
lookup_table_var_path): lookup_table_var_path):
...@@ -217,7 +243,9 @@ def load_persistables_for_increment(dirname, executor, program, ...@@ -217,7 +243,9 @@ def load_persistables_for_increment(dirname, executor, program,
"Distributed Lookup Table Vars from {}, time = {}".format( "Distributed Lookup Table Vars from {}, time = {}".format(
dirname, time.ctime())) dirname, time.ctime()))
_load_persistable_vars(executor, dirname, program, [lookup_table_var]) need_load_vars = program._parameters_on_pservers.get_distributed_vars_by_ep(
program._ps_endpoint)
_load_persistable_vars(executor, dirname, need_load_vars)
__load_lookup_table_vars(executor, program, lookup_table_var, __load_lookup_table_vars(executor, program, lookup_table_var,
lookup_table_var_path) lookup_table_var_path)
...@@ -233,15 +261,62 @@ def load_persistables_for_inference(dirname, executor, program, ...@@ -233,15 +261,62 @@ def load_persistables_for_inference(dirname, executor, program,
Inference with distributed lookup table is a little funky, this function will load distributed Inference with distributed lookup table is a little funky, this function will load distributed
lookup table vars into sparse var, can be used in local inference mode. lookup table vars into sparse var, can be used in local inference mode.
:param dirname(str): The directory path Args:
:param executor(Executor): The executor to run for loading inference model. dirname(str): The directory path
:param program(Program): The parameter server program, which will run on Pserver. executor(Executor): The executor to run for loading inference model.
:param lookup_table_var_name: the distributed lookup tables var name. program(Program): The parameter server program, which will run on Pserver.
:return: None lookup_table_var_name: the distributed lookup tables var name.
Returns:
None
""" """
def __load_lookup_table_vars(executor, dirname, main_program, def _load_persistable_vars(executor, dirname, program, lookup_table_vars):
lookup_table_vars): def _is_checkpoint_var(exclude_fluid_vars=None):
"""
the checkpoint will not save or load all the variables.
var type is FEED_MINIBATCH/FETCH_LIST/RAW or var name ends with @GRAD are discarded.
: param var(Variable)
"""
if exclude_fluid_vars is None:
exclude_fluid_vars = []
def is_valid(var):
if var.desc.type() == core.VarDesc.VarType.FEED_MINIBATCH or \
var.desc.type() == core.VarDesc.VarType.FETCH_LIST or \
var.desc.type() == core.VarDesc.VarType.RAW:
return False
# @GRAD are named for gradient variables, checkpoint will not save it.
if "@GRAD" in var.name:
return False
# .trainer_ are named for distribute train variables, checkpoint will not save it.
if ".trainer_" in var.name:
return False
# .block is named for distribute train variables, checkpoint will not save it.
if ".block" in var.name:
return False
if "tmp_" in var.name:
return False
if var.name in exclude_fluid_vars:
return False
return var.persistable
return is_valid
io.load_vars(
executor,
dirname=dirname,
main_program=program,
predicate=_is_checkpoint_var(lookup_table_vars),
filename=None)
def _load_lookup_table_vars(executor, dirname, main_program,
lookup_table_vars):
if not os.path.isdir(dirname): if not os.path.isdir(dirname):
raise ValueError("There is no directory named '%s'", dirname) raise ValueError("There is no directory named '%s'", dirname)
...@@ -313,11 +388,96 @@ def load_persistables_for_inference(dirname, executor, program, ...@@ -313,11 +388,96 @@ def load_persistables_for_inference(dirname, executor, program,
dirname, time.ctime())) dirname, time.ctime()))
_load_persistable_vars(executor, dirname, program, [lookup_table_var_name]) _load_persistable_vars(executor, dirname, program, [lookup_table_var_name])
__load_lookup_table_vars(executor, dirname, program, _load_lookup_table_vars(executor, dirname, program, [lookup_table_var_name])
[lookup_table_var_name])
_logger.info("Finish Load Sparse Program With " _logger.info("Finish Load Sparse Program With "
"Distributed Lookup Table Vars from {}, time = {}".format( "Distributed Lookup Table Vars from {}, time = {}".format(
dirname, time.ctime())) dirname, time.ctime()))
return program return program
def get_inference_model(main_program, feeded_var_names, target_vars):
"""
Prune the given `main_program` to build a new program especially for inference with distributed lookup table ,
and then add `feeded_vars` and `target_vars` in this program.
Args:
main_program(Program|None): The original program, which will be pruned to
build the inference model. If is setted None,
the default main program will be used.
Default: None.
feeded_var_names(list[str]): Names of variables that need to be feeded data
during inference.
target_vars(list[Variable]): Variables from which we can get inference
results.
Returns:
program(Program)
Raises:
ValueError: If `feed_var_names` is not a list of basestring.
ValueError: If `target_vars` is not a list of Variable.
"""
def prepend_feed_ops(inference_program,
feed_target_names,
feed_holder_name='feed'):
if len(feed_target_names) == 0:
return
global_block = inference_program.global_block()
feed_var = global_block.create_var(
name=feed_holder_name,
type=core.VarDesc.VarType.FEED_MINIBATCH,
persistable=True)
for i, name in enumerate(feed_target_names):
out = global_block.var(name)
global_block._prepend_op(
type='feed',
inputs={'X': [feed_var]},
outputs={'Out': [out]},
attrs={'col': i})
def append_fetch_ops(inference_program,
fetch_target_names,
fetch_holder_name='fetch'):
global_block = inference_program.global_block()
fetch_var = global_block.create_var(
name=fetch_holder_name,
type=core.VarDesc.VarType.FETCH_LIST,
persistable=True)
for i, name in enumerate(fetch_target_names):
global_block.append_op(
type='fetch',
inputs={'X': [name]},
outputs={'Out': [fetch_var]},
attrs={'col': i})
origin_program = main_program.clone()
main_program = main_program.clone()
global_block = main_program.global_block()
need_to_remove_op_index = []
for i, op in enumerate(global_block.ops):
op.desc.set_is_target(False)
if op.type == "feed" or op.type == "fetch":
need_to_remove_op_index.append(i)
for index in need_to_remove_op_index[::-1]:
global_block._remove_op(index)
main_program.desc.flush()
main_program = main_program._prune(targets=target_vars)
main_program = main_program._inference_optimize(prune_read_op=True)
fetch_var_names = [v.name for v in target_vars]
prepend_feed_ops(main_program, feeded_var_names)
append_fetch_ops(main_program, fetch_var_names)
return main_program
...@@ -268,8 +268,8 @@ class DataFeeder(object): ...@@ -268,8 +268,8 @@ class DataFeeder(object):
Args: Args:
reader(function): the reader is the function which can generate data. reader(function): the reader is the function which can generate data.
multi_devices(bool): whether to use multiple devices or not. multi_devices(bool): whether to use multiple devices or not.
num_places(int): if the multi_devices is True, you can specify the number num_places(int): if multi_devices is True, you can specify the number
of GPU to use, if 'num_places' is None, the function will use all the of GPU to use, if multi_devices is None, the function will use all the
GPU of the current machine. Default None. GPU of the current machine. Default None.
drop_last(bool): whether to drop the last batch if the drop_last(bool): whether to drop the last batch if the
size of the last batch is less than batch_size. Default True. size of the last batch is less than batch_size. Default True.
...@@ -278,7 +278,7 @@ class DataFeeder(object): ...@@ -278,7 +278,7 @@ class DataFeeder(object):
dict: the result of conversion. dict: the result of conversion.
Raises: Raises:
ValueError: If drop_last is False and the data batch which cannot fit for devices. ValueError: If drop_last is False and the data batch cannot fit for devices.
""" """
def __reader_creator__(): def __reader_creator__():
......
...@@ -470,13 +470,21 @@ class Executor(object): ...@@ -470,13 +470,21 @@ class Executor(object):
program(Program|CompiledProgram): the program that need to run, program(Program|CompiledProgram): the program that need to run,
if not provided, then default_main_program (not compiled) will be used. if not provided, then default_main_program (not compiled) will be used.
feed(dict): feed variable map, e.g. {"image": ImageData, "label": LabelData} feed(dict): feed variable map, e.g. {"image": ImageData, "label": LabelData}
fetch_list(list): a list of variable or variable names that user want to get, run will return them according to this list. fetch_list(list): a list of variable or variable names that user
feed_var_name(str): the name for the input variable of feed Operator. wants to get, this method will return them according to this list.
fetch_var_name(str): the name for the output variable of fetch Operator. feed_var_name(str): the name for the input variable of
scope(Scope): the scope used to run this program, you can switch it to different scope. default is global_scope feed Operator.
fetch_var_name(str): the name for the output variable of
fetch Operator.
scope(Scope): the scope used to run this program, you can switch
it to different scope. default is global_scope
return_numpy(bool): if convert the fetched tensor to numpy return_numpy(bool): if convert the fetched tensor to numpy
use_program_cache(bool): set use_program_cache to true if program not changed compare to the last step. use_program_cache(bool): whether to use the cached program
settings across batches. Setting it be true would be faster
only when (1) the program is not compiled with data parallel,
and (2) program, feed variable names and fetch_list variable
names do not changed compared to the last step.
Returns: Returns:
list(numpy.array): fetch result according to fetch_list. list(numpy.array): fetch result according to fetch_list.
......
...@@ -26,8 +26,12 @@ from .nn import * ...@@ -26,8 +26,12 @@ from .nn import *
from . import tracer from . import tracer
from .tracer import * from .tracer import *
from . import profiler
from .profiler import *
__all__ = [] __all__ = []
__all__ += layers.__all__ __all__ += layers.__all__
__all__ += base.__all__ __all__ += base.__all__
__all__ += nn.__all__ __all__ += nn.__all__
__all__ += tracer.__all__ __all__ += tracer.__all__
__all__ += profiler.__all__
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import print_function
from .. import core
__all__ = [
'start_gperf_profiler',
'stop_gperf_profiler',
]
def start_gperf_profiler():
core.start_imperative_gperf_profiler()
def stop_gperf_profiler():
core.stop_imperative_gperf_profiler()
...@@ -1448,12 +1448,13 @@ class DynamicRNN(object): ...@@ -1448,12 +1448,13 @@ class DynamicRNN(object):
self.input_array = [] self.input_array = []
self.mem_link = [] self.mem_link = []
def step_input(self, x): def step_input(self, x, level=0):
""" """
Mark a sequence as a dynamic RNN input. Mark a sequence as a dynamic RNN input.
Args: Args:
x(Variable): The input sequence. x(Variable): The input sequence.
level(int): The level of lod used to split steps. Default: 0.
Returns: Returns:
The current timestep in the input sequence. The current timestep in the input sequence.
...@@ -1471,7 +1472,8 @@ class DynamicRNN(object): ...@@ -1471,7 +1472,8 @@ class DynamicRNN(object):
parent_block.append_op( parent_block.append_op(
type='lod_rank_table', type='lod_rank_table',
inputs={"X": x}, inputs={"X": x},
outputs={"Out": self.lod_rank_table}) outputs={"Out": self.lod_rank_table},
attrs={"level": level})
self.max_seq_len = parent_block.create_var( self.max_seq_len = parent_block.create_var(
name=unique_name.generate('dynamic_rnn_max_seq_len'), name=unique_name.generate('dynamic_rnn_max_seq_len'),
dtype='int64') dtype='int64')
......
...@@ -24,6 +24,7 @@ import inspect ...@@ -24,6 +24,7 @@ import inspect
from ..layer_helper import LayerHelper from ..layer_helper import LayerHelper
from ..initializer import Normal, Constant, NumpyArrayInitializer from ..initializer import Normal, Constant, NumpyArrayInitializer
from ..framework import Variable, OpProtoHolder, _in_imperative_mode from ..framework import Variable, OpProtoHolder, _in_imperative_mode
from ..imperative import base
from ..param_attr import ParamAttr from ..param_attr import ParamAttr
from .layer_function_generator import autodoc, templatedoc, _generate_doc_string_ from .layer_function_generator import autodoc, templatedoc, _generate_doc_string_
from .tensor import concat, assign from .tensor import concat, assign
...@@ -9138,6 +9139,10 @@ def _elementwise_op(helper): ...@@ -9138,6 +9139,10 @@ def _elementwise_op(helper):
op_type = helper.layer_type op_type = helper.layer_type
x = helper.kwargs.get('x', None) x = helper.kwargs.get('x', None)
y = helper.kwargs.get('y', None) y = helper.kwargs.get('y', None)
if _in_imperative_mode():
x = base.to_variable(x)
y = base.to_variable(y)
assert x is not None, 'x cannot be None in {}'.format(op_type) assert x is not None, 'x cannot be None in {}'.format(op_type)
assert y is not None, 'y cannot be None in {}'.format(op_type) assert y is not None, 'y cannot be None in {}'.format(op_type)
axis = helper.kwargs.get('axis', -1) axis = helper.kwargs.get('axis', -1)
......
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import print_function
import unittest
import numpy as np
from paddle.fluid.tests.unittests.op_test import OpTest
from mkldnn_op_test import format_reorder
class TestTransposeOp(OpTest):
def setUp(self):
self.init_op_type()
self.initTestCase()
self.initInputData()
self.use_mkldnn = True
self.axis = (0, 2, 3, 1)
self.inputs = {
'X': format_reorder(self.input_data, self.shape)
} #transform data format to 'NHWC' for INT8 transpose specially.
self.attrs = {
'axis': list(self.axis),
'use_mkldnn': self.use_mkldnn,
}
self.outputs = {
'XShape': np.random.random(self.shape).astype('int8'),
'Out': self.inputs['X'].transpose(self.axis)
}
def init_op_type(self):
self.op_type = "transpose2"
def test_check_output(self):
self.check_output(no_check_set=['XShape'])
def initTestCase(self):
self.shape = (2, 3, 4, 5)
def initInputData(self):
self.input_data = (
np.random.randint(0, 100, self.shape) - 50).astype('int8')
class TestINT8Case(TestTransposeOp):
def initTestCase(self):
self.shape = (2, 4, 6, 8)
def initInputData(self):
self.input_data = (
np.random.randint(0, 100, self.shape) - 50).astype('int8')
class TestUINT8Case(TestTransposeOp):
def initTestCase(self):
self.shape = (1, 3, 5, 7)
def initDataType(self):
self.input_data = (np.random.randint(0, 100,
self.shape)).astype('uint8')
if __name__ == '__main__':
unittest.main()
...@@ -174,6 +174,60 @@ class TestLayer(LayerTest): ...@@ -174,6 +174,60 @@ class TestLayer(LayerTest):
self.assertTrue(np.allclose(static_ret[i], static_ret2[i])) self.assertTrue(np.allclose(static_ret[i], static_ret2[i]))
self.assertTrue(np.allclose(static_ret[i], dy_ret[i]._numpy())) self.assertTrue(np.allclose(static_ret[i], dy_ret[i]._numpy()))
def test_elementwise_math(self):
n = np.ones([3, 3], dtype='float32')
n2 = np.ones([3, 3], dtype='float32') * 1.1
n3 = np.ones([3, 3], dtype='float32') * 2
n4 = np.ones([3, 3], dtype='float32') * 3
n5 = np.ones([3, 3], dtype='float32') * 4
n6 = np.ones([3, 3], dtype='float32') * 5
with self.static_graph():
t = layers.data(name='t', shape=[3, 3], dtype='float32')
t2 = layers.data(name='t2', shape=[3, 3], dtype='float32')
t3 = layers.data(name='t3', shape=[3, 3], dtype='float32')
t4 = layers.data(name='t4', shape=[3, 3], dtype='float32')
t5 = layers.data(name='t5', shape=[3, 3], dtype='float32')
t6 = layers.data(name='t6', shape=[3, 3], dtype='float32')
ret = layers.elementwise_add(t, t2)
ret = layers.elementwise_pow(ret, t3)
ret = layers.elementwise_div(ret, t4)
ret = layers.elementwise_sub(ret, t5)
ret = layers.elementwise_mul(ret, t6)
static_ret = self.get_static_graph_result(
feed={
't': n,
't2': n2,
't3': n3,
't4': n4,
't5': n5,
't6': n6
},
fetch_list=[ret])[0]
with self.dynamic_graph():
ret = layers.elementwise_add(n, n2)
ret = layers.elementwise_pow(ret, n3)
ret = layers.elementwise_div(ret, n4)
ret = layers.elementwise_sub(ret, n5)
dy_ret = layers.elementwise_mul(ret, n6)
self.assertTrue(
np.allclose(static_ret, dy_ret._numpy()),
'%s vs %s' % (static_ret, dy_ret._numpy()))
def test_elementwise_minmax(self):
n = np.ones([3, 3], dtype='float32')
n2 = np.ones([3, 3], dtype='float32') * 2
with self.dynamic_graph():
min_ret = layers.elementwise_min(n, n2)
max_ret = layers.elementwise_max(n, n2)
self.assertTrue(np.allclose(n, min_ret._numpy()))
self.assertTrue(np.allclose(n2, max_ret._numpy()))
class TestBook(unittest.TestCase): class TestBook(unittest.TestCase):
def test_fit_a_line(self): def test_fit_a_line(self):
......
...@@ -87,5 +87,31 @@ class TestFP16(TestSliceOp): ...@@ -87,5 +87,31 @@ class TestFP16(TestSliceOp):
place, ['Input'], 'Out', max_relative_error=0.006) place, ['Input'], 'Out', max_relative_error=0.006)
@unittest.skipIf(not core.is_compiled_with_cuda(),
"core is not compiled with CUDA")
class TestFP16_2(TestSliceOp):
def config(self):
self.dtype = "float16"
self.input = np.random.random([3, 4, 5]).astype(self.dtype)
self.starts = [0]
self.ends = [1]
self.axes = [1]
self.out = self.input[:, 0:1, :]
def test_check_output(self):
place = core.CUDAPlace(0)
if core.is_float16_supported(place):
self.check_output_with_place(place, atol=1e-5)
def test_check_grad_normal(self):
place = core.CUDAPlace(0)
if core.is_float16_supported(place):
self.check_grad_with_place(
place, ['Input'],
'Out',
max_relative_error=0.006,
numeric_grad_delta=0.5)
if __name__ == '__main__': if __name__ == '__main__':
unittest.main() unittest.main()
...@@ -38,9 +38,8 @@ items. It can be any function with no parameter that creates a iterable ...@@ -38,9 +38,8 @@ items. It can be any function with no parameter that creates a iterable
Element produced from the iterable should be a **single** entry of data, Element produced from the iterable should be a **single** entry of data,
**not** a mini batch. That entry of data could be a single item, or a tuple of **not** a mini batch. That entry of data could be a single item, or a tuple of
items. items.
Item should be of `supported type <http://www.paddlepaddle.org/doc/ui/data_provider Item should be of supported type (e.g., numpy array or list/tuple of float
/pydataprovider2.html?highlight=dense_vector#input-types>`_ (e.g., numpy 1d or int).
array of float32, int, list of int)
An example implementation for single item data reader creator: An example implementation for single item data reader creator:
...@@ -62,8 +61,6 @@ An example implementation for multiple item data reader creator: ...@@ -62,8 +61,6 @@ An example implementation for multiple item data reader creator:
yield numpy.random.uniform(-1, 1, size=width*height), label yield numpy.random.uniform(-1, 1, size=width*height), label
return reader return reader
TODO(yuyang18): Should we add whole design doc here?
""" """
import paddle.reader.decorator import paddle.reader.decorator
......
...@@ -44,8 +44,11 @@ def text_file(path): ...@@ -44,8 +44,11 @@ def text_file(path):
Creates a data reader that outputs text line by line from given text file. Creates a data reader that outputs text line by line from given text file.
Trailing new line ('\\\\n') of each line will be removed. Trailing new line ('\\\\n') of each line will be removed.
:path: path of the text file. Args:
:returns: data reader of text file path (str): path of the text file.
Returns:
callable: data reader of text file.
""" """
def reader(): def reader():
...@@ -59,10 +62,15 @@ def text_file(path): ...@@ -59,10 +62,15 @@ def text_file(path):
def recordio(paths, buf_size=100): def recordio(paths, buf_size=100):
""" """
Creates a data reader from given RecordIO file paths separated by ",", Creates a data reader from given RecordIO file paths separated
glob pattern is supported. by ",", glob pattern is supported.
:path: path of recordio files, can be a string or a string list.
:returns: data reader of recordio files. Args:
paths (str|list(str)): path of recordio files.
buf_size (int): prefetched buffer size.
Returns:
callable: data reader of recordio files.
""" """
import recordio as rec import recordio as rec
......
...@@ -242,20 +242,18 @@ class XmapEndSignal(): ...@@ -242,20 +242,18 @@ class XmapEndSignal():
def xmap_readers(mapper, reader, process_num, buffer_size, order=False): def xmap_readers(mapper, reader, process_num, buffer_size, order=False):
""" """
Use multiprocess to map samples from reader by a mapper defined by user. Use multi-threads to map samples from reader by a mapper defined by user.
And this function contains a buffered decorator.
:param mapper: a function to map sample. Args:
:type mapper: callable mapper (callable): a function to map the data from reader.
:param reader: the data reader to read from reader (callable): a data reader which yields the data.
:type reader: callable process_num (int): thread number to handle original sample.
:param process_num: process number to handle original sample buffer_size (int): size of the queue to read data in.
:type process_num: int order (bool): whether to keep the data order from original reader.
:param buffer_size: max buffer size Default False.
:type buffer_size: int
:param order: keep the order of reader Returns:
:type order: bool callable: a decorated reader with data mapping.
:return: the decarated reader
:rtype: callable
""" """
end = XmapEndSignal() end = XmapEndSignal()
...@@ -477,7 +475,7 @@ class PipeReader: ...@@ -477,7 +475,7 @@ class PipeReader:
""" """
:param cut_lines: cut buffer to lines :param cut_lines: cut buffer to lines
:type cut_lines: bool :type cut_lines: bool
:param line_break: line break of the file, like \n or \r :param line_break: line break of the file, like '\\\\n' or '\\\\r'
:type line_break: string :type line_break: string
:return: one line or a buffer of bytes :return: one line or a buffer of bytes
......
...@@ -153,3 +153,9 @@ done ...@@ -153,3 +153,9 @@ done
# Restore LD_LIBRARY_PATH # Restore LD_LIBRARY_PATH
LD_LIBRARY_PATH="${ORIGINAL_LD_LIBRARY_PATH}" LD_LIBRARY_PATH="${ORIGINAL_LD_LIBRARY_PATH}"
# According to ar issues: https://lists.gnu.org/archive/html/bug-binutils/2016-05/msg00211.html
# we should install new version ar with 64-bit supported here
wget https://ftp.gnu.org/gnu/binutils/binutils-2.27.tar.gz
tar xzf binutils-2.27.tar.gz && cd binutils-2.27
./configure --prefix=/opt/rh/devtoolset-2/root/usr/ --enable-64-bit-archive && make -j `nproc` && make install
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册