diff --git a/benchmark/paddle/image/resnet.py b/benchmark/paddle/image/resnet.py new file mode 100644 index 0000000000000000000000000000000000000000..6ae1857642e8df4b3859eec68a3a5227d1c4fcb3 --- /dev/null +++ b/benchmark/paddle/image/resnet.py @@ -0,0 +1,213 @@ +#!/usr/bin/env python +from paddle.trainer_config_helpers import * + +height = 224 +width = 224 +num_class = 1000 +batch_size = get_config_arg('batch_size', int, 64) +layer_num = get_config_arg("layer_num", int, 50) +is_test = get_config_arg("is_test", bool, False) + +args = {'height': height, 'width': width, 'color': True, 'num_class': num_class} +define_py_data_sources2( + "train.list", None, module="provider", obj="process", args=args) + +settings( + batch_size=batch_size, + learning_rate=0.01 / batch_size, + learning_method=MomentumOptimizer(0.9), + regularization=L2Regularization(0.0005 * batch_size)) + + +#######################Network Configuration ############# +def conv_bn_layer(name, + input, + filter_size, + num_filters, + stride, + padding, + channels=None, + active_type=ReluActivation()): + """ + A wrapper for conv layer with batch normalization layers. + Note: + conv layer has no activation. + """ + + tmp = img_conv_layer( + name=name + "_conv", + input=input, + filter_size=filter_size, + num_channels=channels, + num_filters=num_filters, + stride=stride, + padding=padding, + act=LinearActivation(), + bias_attr=False) + return batch_norm_layer( + name=name + "_bn", input=tmp, act=active_type, use_global_stats=is_test) + + +def bottleneck_block(name, input, num_filters1, num_filters2): + """ + A wrapper for bottlenect building block in ResNet. + Last conv_bn_layer has no activation. + Addto layer has activation of relu. + """ + last_name = conv_bn_layer( + name=name + '_branch2a', + input=input, + filter_size=1, + num_filters=num_filters1, + stride=1, + padding=0) + last_name = conv_bn_layer( + name=name + '_branch2b', + input=last_name, + filter_size=3, + num_filters=num_filters1, + stride=1, + padding=1) + last_name = conv_bn_layer( + name=name + '_branch2c', + input=last_name, + filter_size=1, + num_filters=num_filters2, + stride=1, + padding=0, + active_type=LinearActivation()) + + return addto_layer( + name=name + "_addto", input=[input, last_name], act=ReluActivation()) + + +def mid_projection(name, input, num_filters1, num_filters2, stride=2): + """ + A wrapper for middile projection in ResNet. + projection shortcuts are used for increasing dimensions, + and other shortcuts are identity + branch1: projection shortcuts are used for increasing + dimensions, has no activation. + branch2x: bottleneck building block, shortcuts are identity. + """ + # stride = 2 + branch1 = conv_bn_layer( + name=name + '_branch1', + input=input, + filter_size=1, + num_filters=num_filters2, + stride=stride, + padding=0, + active_type=LinearActivation()) + + last_name = conv_bn_layer( + name=name + '_branch2a', + input=input, + filter_size=1, + num_filters=num_filters1, + stride=stride, + padding=0) + last_name = conv_bn_layer( + name=name + '_branch2b', + input=last_name, + filter_size=3, + num_filters=num_filters1, + stride=1, + padding=1) + + last_name = conv_bn_layer( + name=name + '_branch2c', + input=last_name, + filter_size=1, + num_filters=num_filters2, + stride=1, + padding=0, + active_type=LinearActivation()) + + return addto_layer( + name=name + "_addto", input=[branch1, last_name], act=ReluActivation()) + + +img = data_layer(name='image', size=height * width * 3) + + +def deep_res_net(res2_num=3, res3_num=4, res4_num=6, res5_num=3): + """ + A wrapper for 50,101,152 layers of ResNet. + res2_num: number of blocks stacked in conv2_x + res3_num: number of blocks stacked in conv3_x + res4_num: number of blocks stacked in conv4_x + res5_num: number of blocks stacked in conv5_x + """ + # For ImageNet + # conv1: 112x112 + tmp = conv_bn_layer( + "conv1", + input=img, + filter_size=7, + channels=3, + num_filters=64, + stride=2, + padding=3) + tmp = img_pool_layer(name="pool1", input=tmp, pool_size=3, stride=2) + + # conv2_x: 56x56 + tmp = mid_projection( + name="res2_1", input=tmp, num_filters1=64, num_filters2=256, stride=1) + for i in xrange(2, res2_num + 1, 1): + tmp = bottleneck_block( + name="res2_" + str(i), input=tmp, num_filters1=64, num_filters2=256) + + # conv3_x: 28x28 + tmp = mid_projection( + name="res3_1", input=tmp, num_filters1=128, num_filters2=512) + for i in xrange(2, res3_num + 1, 1): + tmp = bottleneck_block( + name="res3_" + str(i), + input=tmp, + num_filters1=128, + num_filters2=512) + + # conv4_x: 14x14 + tmp = mid_projection( + name="res4_1", input=tmp, num_filters1=256, num_filters2=1024) + for i in xrange(2, res4_num + 1, 1): + tmp = bottleneck_block( + name="res4_" + str(i), + input=tmp, + num_filters1=256, + num_filters2=1024) + + # conv5_x: 7x7 + tmp = mid_projection( + name="res5_1", input=tmp, num_filters1=512, num_filters2=2048) + for i in xrange(2, res5_num + 1, 1): + tmp = bottleneck_block( + name="res5_" + str(i), + input=tmp, + num_filters1=512, + num_filters2=2048) + + tmp = img_pool_layer( + name='avgpool', + input=tmp, + pool_size=7, + stride=1, + pool_type=AvgPooling()) + + return fc_layer(input=tmp, size=num_class, act=SoftmaxActivation()) + + +if layer_num == 50: + resnet = deep_res_net(3, 4, 6, 3) +elif layer_num == 101: + resnet = deep_res_net(3, 4, 23, 3) +elif layer_num == 152: + resnet = deep_res_net(3, 8, 36, 3) +else: + print("Wrong layer number.") + +lbl = data_layer(name="label", size=num_class) +loss = cross_entropy(name='loss', input=resnet, label=lbl) +inputs(img, lbl) +outputs(loss) diff --git a/benchmark/paddle/image/run_mkldnn.sh b/benchmark/paddle/image/run_mkldnn.sh index e31fec1cd850157d90ddcab2d559d52381ecd317..a4527e04968cf8c8c3c31d16f50bc3e28381f6d8 100755 --- a/benchmark/paddle/image/run_mkldnn.sh +++ b/benchmark/paddle/image/run_mkldnn.sh @@ -5,22 +5,23 @@ function train() { export OMP_DYNAMIC="FALSE" export KMP_AFFINITY="granularity=fine,compact,0,0" topology=$1 - bs=$2 - use_mkldnn=$3 - if [ $3 == "True" ]; then + layer_num=$2 + bs=$3 + use_mkldnn=$4 + if [ $4 == "True" ]; then thread=1 - log="logs/${topology}-mkldnn-${bs}.log" - elif [ $3 == "False" ]; then + log="logs/${topology}-${layer_num}-mkldnn-${bs}.log" + elif [ $4 == "False" ]; then thread=`nproc` # each trainer_count use only 1 core to avoid conflict export OMP_NUM_THREADS=1 export MKL_NUM_THREADS=1 - log="logs/${topology}-${thread}mklml-${bs}.log" + log="logs/${topology}-${layer_num}-${thread}mklml-${bs}.log" else echo "Wrong input $3, use True or False." exit 0 fi - args="batch_size=${bs}" + args="batch_size=${bs},layer_num=${layer_num}" config="${topology}.py" paddle train --job=time \ --config=$config \ @@ -40,12 +41,9 @@ if [ ! -d "logs" ]; then mkdir logs fi -#========== mkldnn ==========# -train vgg 64 True -train vgg 128 True -train vgg 256 True - -#========== mklml ===========# -train vgg 64 False -train vgg 128 False -train vgg 256 False +for use_mkldnn in True False; do + for batchsize in 64 128 256; do + train vgg 19 $batchsize $use_mkldnn + train resnet 50 $batchsize $use_mkldnn + done +done diff --git a/doc/design/float16.md b/doc/design/float16.md index bc1c20c3d122e783e0cd189372dab08d35042d45..078801ba2ed969d26dd31d5ec4ed268686cf7016 100644 --- a/doc/design/float16.md +++ b/doc/design/float16.md @@ -55,6 +55,6 @@ After float16 class is available, some of the future items are below: - Update pybind/tensor_py.h to bind c++ float16 with numpy float16. -- Modify `IndicateDataType()` method in `framework/operator.h` to make it compatible with float16. +- Modify `GetKernelType()` method in `framework/operator.h` to make it compatible with float16. - Create a type-casting operator that can convert the data type in tensor between float16 and other types. diff --git a/paddle/framework/ddim.cc b/paddle/framework/ddim.cc index 239ae5e1233c7f5c506930df374b5d0cc8de7c8d..10c785e04c4fa2192f9c95513009cf7d8c123868 100644 --- a/paddle/framework/ddim.cc +++ b/paddle/framework/ddim.cc @@ -117,7 +117,7 @@ int64_t DDim::operator[](int idx) const { return boost::apply_visitor(DynamicConstIndexer(idx), var); } -int64_t DDim::size() const { return arity(*this); } +int DDim::size() const { return arity(*this); } bool DDim::operator==(DDim d) const { if (var.which() != d.getVar().which()) { diff --git a/paddle/framework/ddim.h b/paddle/framework/ddim.h index 2a5e2d2b6948b045642dbac5e83992a048ecb63d..aa773868ab4b68acbc46dfa2cd2569d8b8b7789d 100644 --- a/paddle/framework/ddim.h +++ b/paddle/framework/ddim.h @@ -71,7 +71,7 @@ struct DDim { DDim operator*(DDim d) const; - int64_t size() const; + int size() const; }; /** diff --git a/paddle/framework/lod_rank_table.cc b/paddle/framework/lod_rank_table.cc index 68a83def7e5a12fe3261be9e27cbb9bb54e1f8ad..1c2fba70c8ab0827ba6d1563f08cd0820650822e 100644 --- a/paddle/framework/lod_rank_table.cc +++ b/paddle/framework/lod_rank_table.cc @@ -31,6 +31,7 @@ void LoDRankTable::Reset(const LoD& lod, size_t level) { TableItem item; item.index = i; item.length = vec[i + 1] - vec[i]; + VLOG(10) << "Add item to rank table " << item.index << " " << item.length; items_.emplace_back(item); } // NOTE(yuyang18): diff --git a/paddle/framework/lod_tensor.cc b/paddle/framework/lod_tensor.cc index 2bcfffb134f46416301b28043e1875e822fbc3e4..a0f2906c749054c1ff9f624e47df432ec2bd6ac8 100644 --- a/paddle/framework/lod_tensor.cc +++ b/paddle/framework/lod_tensor.cc @@ -27,6 +27,20 @@ namespace paddle { namespace framework { +std::ostream& operator<<(std::ostream& os, const LoD& lod) { + os << "{"; + for (auto& v : lod) { + os << "{"; + for (auto& i : v) { + os << i << ","; + } + os << "}"; + } + os << "}"; + + return os; +} + LoD SliceLevels(const LoD& in, size_t level_begin, size_t level_end) { LoD new_lod; new_lod.reserve(level_end - level_begin); @@ -136,37 +150,35 @@ void LoDTensor::ShrinkInLevel(size_t level, size_t elem_begin, ShareDataWith(Slice(begin, end)); } -void GetFineGrainedLoDLength(const LoD& lod, size_t start_idx, size_t end_idx, - std::vector>* lod_length, - size_t* start_offset) { - lod_length->clear(); - PADDLE_ENFORCE(start_idx < lod.size() - 1, - "start_idx should be >= 0 and < lod.size() - 1."); - PADDLE_ENFORCE(end_idx < lod.size(), - "end_idx should be >= 0 and < lod.size()."); - PADDLE_ENFORCE_LE(start_idx, end_idx, - "start_idx should be less than end_idx."); - for (size_t level_idx = 0; level_idx < lod.size(); ++level_idx) { +using LoDAndOffset = std::pair>; +LoDAndOffset GetSubLoDAndAbsoluteOffset(const LoD& lod, size_t start_idx, + size_t end_idx, size_t start_level) { + LoD sub_lod; + + for (size_t level_idx = start_level; level_idx < lod.size(); ++level_idx) { + PADDLE_ENFORCE_LE(start_idx, end_idx); + PADDLE_ENFORCE_LT(end_idx, lod[level_idx].size()); std::vector level_lens; for (size_t i = start_idx; i < end_idx; ++i) { level_lens.push_back(lod[level_idx][i + 1] - lod[level_idx][i]); } - lod_length->emplace_back(level_lens); + sub_lod.emplace_back(level_lens); start_idx = lod[level_idx][start_idx]; end_idx = lod[level_idx][end_idx]; } - *start_offset = start_idx; + + return LoDAndOffset{sub_lod, {start_idx, end_idx}}; } -void AppendLoD(LoD* lod, const std::vector>& lod_length) { - PADDLE_ENFORCE_EQ( - lod->size(), lod_length.size(), +void AppendLoD(LoD* lod, const LoD& lod_length) { + PADDLE_ENFORCE( + lod->empty() || lod->size() == lod_length.size(), "The lod_length should has the same size with the appended lod."); + if (lod->empty()) { + *lod = LoD(lod_length.size(), std::vector({0})); + } for (size_t i = 0; i < lod->size(); ++i) { auto& level = (*lod)[i]; - if (level.empty()) { - level.push_back(0); - } for (size_t len : lod_length[i]) { level.push_back(level.back() + len); } diff --git a/paddle/framework/lod_tensor.h b/paddle/framework/lod_tensor.h index 1437da399a28288429527f9672ace0482825159f..7f8a51cc581e759bc707e506ac7cdeb3680f40ac 100644 --- a/paddle/framework/lod_tensor.h +++ b/paddle/framework/lod_tensor.h @@ -56,6 +56,8 @@ using Vector = thrust::host_vector< */ using LoD = std::vector>; +std::ostream& operator<<(std::ostream& os, const LoD& lod); + /* * Slice levels from a LoD. * NOTE the lowest level should always be the absolute offsets of the underlying @@ -181,11 +183,10 @@ LoDTensor LodExpand(const LoDTensor& source, const LoD& lod, size_t level, return tensor; } -void GetFineGrainedLoDLength(const LoD& lod, size_t start_idx, size_t end_idx, - std::vector>* lod_length, - size_t* start_offset); +std::pair> GetSubLoDAndAbsoluteOffset( + const LoD& lod, size_t start_idx, size_t end_idx, size_t start_level); -void AppendLoD(LoD* lod, const std::vector>& lod_length); +void AppendLoD(LoD* lod, const LoD& lod_length); } // namespace framework } // namespace paddle diff --git a/paddle/framework/lod_tensor_test.cc b/paddle/framework/lod_tensor_test.cc index bf61c9ee7aa99b06e78b9b27dff72d216c74a86e..02d84b68233f2fdfc66e1df2fc7ce20307cadd94 100644 --- a/paddle/framework/lod_tensor_test.cc +++ b/paddle/framework/lod_tensor_test.cc @@ -146,43 +146,44 @@ TEST(LodExpand, test) { TEST(LoD, GetFineGrainedLoDLength) { LoD lod; - lod.push_back(std::vector{0, 2, 4, 5}); - lod.push_back(std::vector{0, 1, 6, 8, 10, 11}); + lod.push_back(std::vector({0, 2, 4, 5})); + lod.push_back(std::vector({0, 1, 6, 8, 10, 11})); lod.push_back( - std::vector{0, 2, 5, 7, 10, 12, 15, 17, 20, 24, 26, 29}); + std::vector({0, 2, 5, 7, 10, 12, 15, 17, 20, 24, 26, 29})); - std::vector> lod_length; - size_t start_offset; - paddle::framework::GetFineGrainedLoDLength(lod, 1, 2, &lod_length, - &start_offset); + auto lod_and_offset = + paddle::framework::GetSubLoDAndAbsoluteOffset(lod, 1, 2, 0); + LoD lod_length = lod_and_offset.first; + size_t start_offset = lod_and_offset.second.first; + size_t end_offset = lod_and_offset.second.second; - std::vector> expected; + LoD expected; expected.push_back(std::vector{2}); expected.push_back(std::vector{2, 2}); expected.push_back(std::vector{2, 3, 4, 2}); EXPECT_EQ(lod_length, expected); EXPECT_EQ(start_offset, 15UL); + EXPECT_EQ(end_offset, 26UL); } TEST(LoD, AppendLoD) { - std::vector> lod_lens; - lod_lens.push_back(std::vector{2}); - lod_lens.push_back(std::vector{2, 2}); - lod_lens.push_back(std::vector{2, 3, 4, 2}); + LoD lod_lens; + lod_lens.push_back(std::vector({2})); + lod_lens.push_back(std::vector({2, 2})); + lod_lens.push_back(std::vector({2, 3, 4, 2})); LoD origin; - origin.push_back(std::vector{0, 2}); - origin.push_back(std::vector{0, 1, 6}); - origin.push_back(std::vector{0, 2, 5, 7, 10, 12, 15}); + origin.push_back(std::vector({0, 2})); + origin.push_back(std::vector({0, 1, 6})); + origin.push_back(std::vector({0, 2, 5, 7, 10, 12, 15})); paddle::framework::AppendLoD(&origin, lod_lens); LoD expected; - expected.push_back(std::vector{0, 2, 4}); - expected.push_back(std::vector{0, 1, 6, 8, 10}); + expected.push_back(std::vector({0, 2, 4})); + expected.push_back(std::vector({0, 1, 6, 8, 10})); expected.push_back( - std::vector{0, 2, 5, 7, 10, 12, 15, 17, 20, 24, 26}); - + std::vector({0, 2, 5, 7, 10, 12, 15, 17, 20, 24, 26})); EXPECT_EQ(origin, expected); } diff --git a/paddle/framework/op_registry.h b/paddle/framework/op_registry.h index 2bb5e0e8ec29fb2df81549650aa0c65bc1e51c49..daade439e5232f06be72bc5bb1e2285124f2c3a4 100644 --- a/paddle/framework/op_registry.h +++ b/paddle/framework/op_registry.h @@ -92,8 +92,7 @@ struct OpKernelRegistrarFunctor { void operator()(const char* op_type) const { using T = typename KERNEL_TYPE::ELEMENT_TYPE; - OperatorWithKernel::OpKernelKey key(ToDataType(std::type_index(typeid(T))), - PlaceType()); + OpKernelType key(ToDataType(std::type_index(typeid(T))), PlaceType()); OperatorWithKernel::AllOpKernels()[op_type][key].reset(new KERNEL_TYPE); constexpr auto size = std::tuple_size>::value; diff --git a/paddle/framework/operator.cc b/paddle/framework/operator.cc index 22a7d9728a05950d66a1acd23d6fb18263f4ff6b..3276f8af396fe58450a8dc6713fe61e49d5ca708 100644 --- a/paddle/framework/operator.cc +++ b/paddle/framework/operator.cc @@ -254,8 +254,7 @@ std::vector ExecutionContext::MultiOutput( return res; } -std::ostream& operator<<(std::ostream& os, - const OperatorWithKernel::OpKernelKey& kernel_key) { +std::ostream& operator<<(std::ostream& os, const OpKernelType& kernel_key) { os << "place[" << kernel_key.place_ << "]:data_type[" << kernel_key.data_type_ << "]"; return os; @@ -432,7 +431,7 @@ void OperatorWithKernel::Run(const Scope& scope, // check if op[type] have kernel for kernel_key OpKernelMap& kernels = kernels_iter->second; - auto kernel_key = OpKernelKey(IndicateDataType(ctx), dev_ctx); + auto kernel_key = GetKernelType(ctx); auto kernel_iter = kernels.find(kernel_key); if (kernel_iter == kernels.end()) { @@ -440,6 +439,41 @@ void OperatorWithKernel::Run(const Scope& scope, } kernel_iter->second->Compute(ctx); + + // throws errors if have. + dev_ctx.Finish(); +} +OpKernelType OperatorWithKernel::GetKernelType( + const ExecutionContext& ctx) const { + return OpKernelType(IndicateDataType(ctx), ctx.device_context()); +} +DataType OperatorWithKernel::IndicateDataType( + const ExecutionContext& ctx) const { + auto& scope = ctx.scope(); + int data_type = -1; + for (auto& input : this->inputs_) { + for (auto& ipt_name : input.second) { + auto* var = scope.FindVar(ipt_name); + if (var != nullptr) { + const Tensor* t = nullptr; + if (var->IsType()) { + t = &var->Get(); + } else if (var->IsType()) { + t = &var->Get(); + } else if (var->IsType()) { + t = &(var->Get().value()); + } + if (t != nullptr) { + int tmp = static_cast(ToDataType(t->type())); + PADDLE_ENFORCE(tmp == data_type || data_type == -1, + "DataType of Paddle Op %s must be the same.", Type()); + data_type = tmp; + } + } + } + } + PADDLE_ENFORCE(data_type != -1, "DataType should be indicated by input"); + return static_cast(data_type); } } // namespace framework diff --git a/paddle/framework/operator.h b/paddle/framework/operator.h index a1303a90980b40ff03bce1ab1a6f67bbbf952bcf..60861d92933dd100f877bec8d43f9b924f951e60 100644 --- a/paddle/framework/operator.h +++ b/paddle/framework/operator.h @@ -345,27 +345,10 @@ class OpKernel : public OpKernelBase { using ELEMENT_TYPE = T; }; -class OperatorWithKernel : public OperatorBase { - public: - struct OpKernelKey { - platform::Place place_; - DataType data_type_; - - OpKernelKey(DataType data_type, platform::Place place) - : place_(place), data_type_(data_type) {} - - OpKernelKey(DataType data_type, const platform::DeviceContext& dev_ctx) - : place_(dev_ctx.GetPlace()), data_type_(data_type) {} - - bool operator==(const OpKernelKey& o) const { - return platform::places_are_same_class(place_, o.place_) && - data_type_ == o.data_type_; - } - }; - - struct OpKernelHash { +struct OpKernelType { + struct Hash { std::hash hash_; - size_t operator()(const OpKernelKey& key) const { + size_t operator()(const OpKernelType& key) const { int place = key.place_.which(); int data_type = static_cast(key.data_type_); int pre_hash = data_type << NUM_PLACE_TYPE_LIMIT_IN_BIT | @@ -374,9 +357,26 @@ class OperatorWithKernel : public OperatorBase { } }; + platform::Place place_; + DataType data_type_; + + OpKernelType(DataType data_type, platform::Place place) + : place_(place), data_type_(data_type) {} + + OpKernelType(DataType data_type, const platform::DeviceContext& dev_ctx) + : place_(dev_ctx.GetPlace()), data_type_(data_type) {} + + bool operator==(const OpKernelType& o) const { + return platform::places_are_same_class(place_, o.place_) && + data_type_ == o.data_type_; + } +}; + +class OperatorWithKernel : public OperatorBase { + public: using OpKernelMap = - std::unordered_map, - OpKernelHash>; + std::unordered_map, + OpKernelType::Hash>; OperatorWithKernel(const std::string& type, const VariableNameMap& inputs, const VariableNameMap& outputs, const AttributeMap& attrs) @@ -404,40 +404,15 @@ class OperatorWithKernel : public OperatorBase { } protected: + virtual OpKernelType GetKernelType(const ExecutionContext& ctx) const; + + private: // indicate kernel DataType by input data. Defaultly all input data must be // same. - virtual DataType IndicateDataType(const ExecutionContext& ctx) const { - auto& scope = ctx.scope(); - int data_type = -1; - for (auto& input : this->inputs_) { - for (auto& ipt_name : input.second) { - auto* var = scope.FindVar(ipt_name); - if (var != nullptr) { - const Tensor* t = nullptr; - if (var->IsType()) { - t = &var->Get(); - } else if (var->IsType()) { - t = &var->Get(); - } else if (var->IsType()) { - t = &(var->Get().value()); - } - if (t != nullptr) { - int tmp = static_cast(ToDataType(t->type())); - PADDLE_ENFORCE(tmp == data_type || data_type == -1, - "DataType of Paddle Op %s must be the same.", - Type()); - data_type = tmp; - } - } - } - } - PADDLE_ENFORCE(data_type != -1, "DataType should be indicated by input"); - return static_cast(data_type); - } + DataType IndicateDataType(const ExecutionContext& ctx) const; }; -std::ostream& operator<<(std::ostream& os, - const OperatorWithKernel::OpKernelKey& kernel_key); +std::ostream& operator<<(std::ostream& os, const OpKernelType& kernel_key); extern bool OpSupportGPU(const std::string& op_type); diff --git a/paddle/framework/operator_test.cc b/paddle/framework/operator_test.cc index 42e0d52eed3911d8e684e76a88bc690ca0783ce5..1e19f82b341768142258ba4a5dfa246d87ba4c43 100644 --- a/paddle/framework/operator_test.cc +++ b/paddle/framework/operator_test.cc @@ -114,8 +114,8 @@ class OpWithKernelTest : public OperatorWithKernel { protected: void InferShape(framework::InferShapeContext* ctx) const override {} - DataType IndicateDataType(const ExecutionContext& ctx) const override { - return DataType::FP32; + OpKernelType GetKernelType(const ExecutionContext& ctx) const override { + return OpKernelType(DataType::FP32, ctx.device_context()); } }; diff --git a/paddle/framework/tensor_impl.h b/paddle/framework/tensor_impl.h index d78a2c4c21149ef3c800991b9a144ea198f1bdcf..7e88e039611007d17156d10f852eb46f3ee8e7a3 100644 --- a/paddle/framework/tensor_impl.h +++ b/paddle/framework/tensor_impl.h @@ -52,7 +52,7 @@ struct SizeOfTypeFunctor { }; static inline size_t SizeOfType(std::type_index type) { - SizeOfTypeFunctor functor; + SizeOfTypeFunctor functor; size_t size = functor(type); PADDLE_ENFORCE(size != 0UL, "Cannot get size of type %s", type.name()); return size; diff --git a/paddle/framework/var_desc.cc b/paddle/framework/var_desc.cc index 16aca192d41a64003de85ce45f7697bf45c556ed..0babec29f6f4412ed29deeafe24470e86b30a636 100644 --- a/paddle/framework/var_desc.cc +++ b/paddle/framework/var_desc.cc @@ -45,7 +45,8 @@ void VarDescBind::SetLoDLevel(int32_t lod_level) { desc_.mutable_tensor_array()->set_lod_level(lod_level); break; default: - PADDLE_THROW("Tensor type=%d does not support LoDLevel", desc_.type()); + PADDLE_THROW("Tensor type=%d does not support LoDLevel", + desc_.tensor_array().lod_level()); } } @@ -56,7 +57,8 @@ int32_t VarDescBind::GetLodLevel() const { case VarDesc::LOD_TENSOR_ARRAY: return desc_.tensor_array().lod_level(); default: - PADDLE_THROW("Tensor type=%d does not support LoDLevel", desc_.type()); + PADDLE_THROW("Tensor type=%d does not support LoDLevel", + desc_.tensor_array().lod_level()); } } diff --git a/paddle/gserver/layers/MKLDNNFcLayer.cpp b/paddle/gserver/layers/MKLDNNFcLayer.cpp index d82063a7130ca928ba042e210eb216f90c7207cd..3429c53d2396e051d62fe0ae405934758e89f9c2 100644 --- a/paddle/gserver/layers/MKLDNNFcLayer.cpp +++ b/paddle/gserver/layers/MKLDNNFcLayer.cpp @@ -60,18 +60,16 @@ void MKLDNNFcLayer::convertWeightsFromPaddle() { } CHECK(wgtVal_) << "should have been initialized"; - bool hasNoSpatial_ = ih_ == 1 && iw_ == 1; auto targetDim = wgtVal_->getDims(); - auto srcFmt = hasNoSpatial_ ? format::io : format::ihwo; + auto srcFmt = targetDim.size() == 2 ? format::io : format::ihwo; wgtVal_->reorderDataFrom(wgtVal_, srcFmt, targetDim); hasInitedWgt_ = true; } void MKLDNNFcLayer::convertWeightsToPaddle() { CHECK(wgtVal_) << "should have been initialized"; - bool hasNoSpatial_ = ih_ == 1 && iw_ == 1; auto targetDim = wgtVal_->getDims(); - auto dstFmt = hasNoSpatial_ ? format::io : format::ihwo; + auto dstFmt = targetDim.size() == 2 ? format::io : format::ihwo; wgtVal_->reorderDataTo(wgtVal_, dstFmt, targetDim); } diff --git a/paddle/gserver/layers/MKLDNNLayer.cpp b/paddle/gserver/layers/MKLDNNLayer.cpp index 5fd62f4f73b18df683ccf74143e45054c3631c22..82ef344c7b2aa0093a5f0a28780592dea5d51efe 100644 --- a/paddle/gserver/layers/MKLDNNLayer.cpp +++ b/paddle/gserver/layers/MKLDNNLayer.cpp @@ -181,21 +181,17 @@ void MKLDNNLayer::resetInValue( auto extPD = MKLDNNMatrix::createPrimitiveDesc( {bs_, ic_, ih_, iw_}, format::nchw, engine_); const MatrixPtr& inMat = inputLayers_[inputIdx]->getOutputValue(); - in = std::dynamic_pointer_cast(inMat); - CHECK_EQ(inputIsOnlyMKLDNN(), in != nullptr); - if (in == nullptr || in->getFormat() == format::nc) { - in = MKLDNNMatrix::create(extPD, inMat); - } - extInVal_ = isPaddleFormat(in->getFormat()) ? in : nullptr; - if (in->getFormat() == format::nc) { - CHECK(ih_ == 1 && iw_ == 1); + extInVal_ = std::dynamic_pointer_cast(inMat); + CHECK_EQ(inputIsOnlyMKLDNN(), extInVal_ != nullptr); + if (extInVal_ == nullptr || extInVal_->getFormat() == format::nc) { + extInVal_ = MKLDNNMatrix::create(extPD, inMat); } + in = extInVal_; if (nullptr == intPD || in->getPrimitiveDesc() == *intPD) { return; } // need create reorder in = MKLDNNMatrix::create(*intPD); - extInVal_ = extInVal_ ? extInVal_ : MKLDNNMatrix::create(extPD, inMat); cvtInVal_ = MKLDNNMatrix::createReorder(extInVal_, in); CHECK(cvtInVal_) << "should not be emptry"; } diff --git a/paddle/operators/CMakeLists.txt b/paddle/operators/CMakeLists.txt index f22f86468db7be6c87193f85c3f87aef4a9d3c68..eae87a5141ef1284630170b07d22a0cf9cd977b0 100644 --- a/paddle/operators/CMakeLists.txt +++ b/paddle/operators/CMakeLists.txt @@ -62,6 +62,11 @@ function(op_library TARGET) file(APPEND ${pybind_file} "USE_OP(pool2d);\n") endif() + if ("${TARGET}" STREQUAL "compare_op") + set(pybind_flag 1) + file(APPEND ${pybind_file} "USE_OP(less_than);\nUSE_OP(equal);\n") + endif() + # pool_with_index_op contains several operators if ("${TARGET}" STREQUAL "pool_with_index_op") set(pybind_flag 1) @@ -165,6 +170,8 @@ set(DEPS_OPS sequence_conv_op sequence_pool_op lod_rank_table_op + lod_tensor_to_array_op + array_to_lod_tensor_op lstm_op tensor_array_read_write_op gru_op) @@ -177,6 +184,8 @@ op_library(sum_op DEPS net_op selected_rows_functor) op_library(pool_op DEPS pooling) op_library(pool_with_index_op DEPS pooling) op_library(lod_rank_table_op SRCS lod_rank_table_op.cc DEPS lod_rank_table) +op_library(lod_tensor_to_array_op SRCS lod_tensor_to_array_op.cc DEPS lod_rank_table_op) +op_library(array_to_lod_tensor_op SRCS array_to_lod_tensor_op.cc DEPS lod_rank_table_op) op_library(tensor_array_read_write_op SRCS tensor_array_read_write_op.cc) if(WITH_GPU) op_library(nccl_op DEPS nccl_common) diff --git a/paddle/operators/accuracy_op.cc b/paddle/operators/accuracy_op.cc index e34754f925a2519e0c2ee346f3b593898c354ef7..2785a8c6fb62527db4d203788be88ebead068a19 100644 --- a/paddle/operators/accuracy_op.cc +++ b/paddle/operators/accuracy_op.cc @@ -53,10 +53,11 @@ class AccuracyOp : public framework::OperatorWithKernel { } protected: - // IndicateDataType - framework::DataType IndicateDataType( + framework::OpKernelType GetKernelType( const framework::ExecutionContext &ctx) const override { - return framework::ToDataType(ctx.Input("Out")->type()); + return framework::OpKernelType( + framework::ToDataType(ctx.Input("Out")->type()), + ctx.device_context()); } }; diff --git a/paddle/operators/array_to_lod_tensor_op.cc b/paddle/operators/array_to_lod_tensor_op.cc new file mode 100644 index 0000000000000000000000000000000000000000..6cd9c06b8ae3d3b17be83268c2f5d4002705b111 --- /dev/null +++ b/paddle/operators/array_to_lod_tensor_op.cc @@ -0,0 +1,152 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ +#include +#include "paddle/framework/lod_rank_table.h" +#include "paddle/framework/lod_tensor_array.h" +#include "paddle/framework/op_registry.h" +#include "paddle/memory/memcpy.h" + +namespace paddle { +namespace operators { + +using LoD = framework::LoD; + +class ArrayToLoDTensorOp : public framework::OperatorBase { + public: + ArrayToLoDTensorOp(const std::string &type, + const framework::VariableNameMap &inputs, + const framework::VariableNameMap &outputs, + const framework::AttributeMap &attrs) + : OperatorBase(type, inputs, outputs, attrs) {} + void Run(const framework::Scope &scope, + const platform::DeviceContext &dev_ctx) const override { + auto &x = scope.FindVar(Input("X"))->Get(); + auto &rank_table = + scope.FindVar(Input("RankTable"))->Get(); + auto *out = + scope.FindVar(Output("Out"))->GetMutable(); + + // Check dims, place and data type of input's elements and infer output's + // dim + PADDLE_ENFORCE(!x.empty(), "There's no element in the input array."); + int rank = x[0].dims().size(); + platform::Place place = x[0].place(); + std::type_index data_type = x[0].type(); + framework::DDim ins_dims = framework::slice_ddim(x[0].dims(), 1, rank); + int64_t batch_size = x[0].dims()[0]; + for (size_t i = 1; i < x.size(); ++i) { + PADDLE_ENFORCE_EQ(framework::slice_ddim(x[i].dims(), 1, rank), ins_dims, + "The dimension of the %zu'th element in LoDTensorArray " + "differs from previous ones.", + i); + PADDLE_ENFORCE(platform::places_are_same_class(x[i].place(), place), + "The place class of the %zu'th element in LoDTensorArray " + "differs from previous ones.", + i); + PADDLE_ENFORCE(x[i].type() == data_type, + "The date type of the %zu'th element in LoDTensorArray " + "differs from previous ones.", + i); + batch_size += x[i].dims()[0]; + } + auto ins_dim_vec = framework::vectorize(ins_dims); + ins_dim_vec.insert(ins_dim_vec.begin(), batch_size); + framework::DDim out_dims = framework::make_ddim(ins_dim_vec); + out->Resize(out_dims); + out->mutable_data(place, data_type); + + auto &table_items = rank_table.items(); + std::vector table_item_idx(table_items.size()); + // table_item_idx = range(table_items_idx.size()) + std::iota(table_item_idx.begin(), table_item_idx.end(), 0); + std::sort(table_item_idx.begin(), table_item_idx.end(), + [&](size_t a, size_t b) { + return table_items[a].index < table_items[b].index; + }); + + // Build LoDTensor `out` + framework::LoD *out_lod = out->mutable_lod(); + out_lod->clear(); + size_t out_offset = 0; + auto prefix_lod = rank_table.coarse_lod(); + prefix_lod.emplace_back(); + auto &cur_level_lod = prefix_lod.back(); + cur_level_lod.push_back(0); + for (size_t idx : table_item_idx) { + cur_level_lod.push_back(cur_level_lod.back() + table_items[idx].length); + for (size_t x_idx = 0; x_idx < table_items[idx].length; ++x_idx) { + auto lod_and_offset = framework::GetSubLoDAndAbsoluteOffset( + x[x_idx].lod(), idx, idx + 1, 0); + + auto &lod_length = lod_and_offset.first; + framework::AppendLoD(out_lod, lod_length); + + size_t start_offset = lod_and_offset.second.first; + size_t end_offset = lod_and_offset.second.second; + VLOG(10) << "idx=" << idx << " x_idx=" << x_idx << " [" + << ", " << end_offset << "]"; + // Copy data + PADDLE_ENFORCE_GE(end_offset, start_offset); + size_t len = end_offset - start_offset; + if (len == 0) { + continue; + } + out->Slice(out_offset, out_offset + len) + .CopyFrom(x[x_idx].Slice(start_offset, end_offset), place, dev_ctx); + out_offset += len; + } + } + out_lod->insert(out_lod->begin(), prefix_lod.begin(), prefix_lod.end()); + } +}; + +class ArrayToLoDTensorOpProtoMaker : public framework::OpProtoAndCheckerMaker { + public: + ArrayToLoDTensorOpProtoMaker(framework::OpProto *proto, + framework::OpAttrChecker *op_checker) + : OpProtoAndCheckerMaker(proto, op_checker) { + AddInput("X", + "(std::vector) A vector of tensors that is going to " + "be casted to a big LoDTensor."); + AddInput("RankTable", + "(LoDRankTable) RankTable provides the coarse lod infomation to " + "build the output LoDTensor. See " + "'paddle/framework/lod_rank_table.h' for more details."); + AddOutput("Out", "(LoDTensor) The LoDTensor formed by input tensor array."); + AddComment( + R"DOC(This Op build a big LoDTensor from a std::vector + and a LoDRankTable. It is supposed to be used in getting dynamic RNN's + outputs back to a normal LoDTensor. The std::vector + would be the output of RNN Op and the LoDRankTable would be build + with RNN's input.)DOC"); + } +}; + +class ArrayToLoDTensorInferShape : public framework::InferShapeBase { + public: + void operator()(framework::InferShapeContext *context) const override { + PADDLE_ENFORCE(context->HasInput("X"), + "ArrayToLoDTensorOp must has input X."); + PADDLE_ENFORCE(context->HasInput("RankTable"), + "ArrayToLoDTensorOp must has input RankTable."); + } +}; + +} // namespace operators +} // namespace paddle + +namespace ops = paddle::operators; +REGISTER_OPERATOR(array_to_lod_tensor, ops::ArrayToLoDTensorOp, + ops::ArrayToLoDTensorOpProtoMaker, + ops::ArrayToLoDTensorInferShape); diff --git a/paddle/operators/auc_op.cc b/paddle/operators/auc_op.cc index ccb969ab23a8e1df713278513a66b10f21690108..6c3f67ec32fb1b942241997e87a1e9c4752e707d 100644 --- a/paddle/operators/auc_op.cc +++ b/paddle/operators/auc_op.cc @@ -39,10 +39,11 @@ class AucOp : public framework::OperatorWithKernel { } protected: - // IndicateDataType - framework::DataType IndicateDataType( + framework::OpKernelType GetKernelType( const framework::ExecutionContext &ctx) const override { - return framework::ToDataType(ctx.Input("Out")->type()); + return framework::OpKernelType( + framework::ToDataType(ctx.Input("Out")->type()), + ctx.device_context()); } }; diff --git a/paddle/operators/batch_norm_op.cc b/paddle/operators/batch_norm_op.cc index 7d73dfde786208fe217dac97325d432fb80052ad..8721ca352848fc4d69b206d4ea0ab7c581c8d055 100644 --- a/paddle/operators/batch_norm_op.cc +++ b/paddle/operators/batch_norm_op.cc @@ -303,7 +303,8 @@ class BatchNormGradOp : public framework::OperatorWithKernel { ctx->SetOutputDim(framework::GradVarName("Bias"), {C}); } - framework::DataType IndicateDataType( + protected: + framework::OpKernelType GetKernelType( const framework::ExecutionContext &ctx) const override { const auto *var = ctx.InputVar(framework::GradVarName("Y")); if (var == nullptr) { @@ -318,7 +319,8 @@ class BatchNormGradOp : public framework::OperatorWithKernel { if (t == nullptr) { PADDLE_THROW("can't find Y@GRAD"); } - return framework::ToDataType(t->type()); + return framework::OpKernelType(framework::ToDataType(t->type()), + ctx.device_context()); } }; diff --git a/paddle/operators/compare_op.cc b/paddle/operators/compare_op.cc new file mode 100644 index 0000000000000000000000000000000000000000..8b425d14df3bc484437dc72f29abf13b887006bd --- /dev/null +++ b/paddle/operators/compare_op.cc @@ -0,0 +1,82 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + +#include "paddle/operators/compare_op.h" +#include "paddle/framework/op_registry.h" +namespace paddle { +namespace operators { +template +class CompareOpProtoMaker : public framework::OpProtoAndCheckerMaker { + public: + CompareOpProtoMaker(framework::OpProto *proto, + framework::OpAttrChecker *op_checker) + : OpProtoAndCheckerMaker(proto, op_checker) { + OpComment comment; + AddInput("X", + string::Sprintf("(LoDTensor) the left hand operand of %s operator", + comment.type)); + AddInput("Y", string::Sprintf( + "(LoDTensor) the right hand operand of %s operator", + comment.type)); + AddOutput("Out", string::Sprintf( + "(LoDTensor) n-dim bool tensor. Each element is %s", + comment.equation)); + AddComment(string::Sprintf(R"DOC(%s Operator + +It operates element-wise on X and Y, and returns the Out. Each of them is a +N-dim tensor. X and Y could be any type. The each element of the Out tensor is +calculated by %s +)DOC", + comment.type, comment.equation)); + } +}; + +template +class CompareOpInferShape : public framework::InferShapeBase { + public: + void operator()(framework::InferShapeContext *context) const override { + OpComment comment; + PADDLE_ENFORCE(context->HasInput("X"), "%s operator must has input X", + comment.type); + PADDLE_ENFORCE(context->HasInput("Y"), "%s operator must has input Y", + comment.type); + auto dim_x = context->GetInputDim("X"); + auto dim_y = context->GetInputDim("Y"); + PADDLE_ENFORCE_EQ(framework::product(dim_x), framework::product(dim_y), + "The number of elements in X and Y should be same"); + + context->SetOutputDim("Out", context->GetInputDim("X")); + context->ShareLoD("X", "Out"); + } +}; + +} // namespace operators +} // namespace paddle + +#define REGISTER_LOGICAL_OP(op_type, _equation) \ + struct _##op_type##Comment { \ + static char type[]; \ + static char equation[]; \ + }; \ + char _##op_type##Comment::type[]{#op_type}; \ + char _##op_type##Comment::equation[]{_equation}; \ + REGISTER_OP_WITH_KERNEL( \ + op_type, ::paddle::operators::CompareOpProtoMaker<_##op_type##Comment>, \ + ::paddle::operators::CompareOpInferShape<_##op_type##Comment>, \ + ::paddle::framework::EmptyGradOpMaker); + +REGISTER_LOGICAL_OP(less_than, "Out = X < Y"); +REGISTER_LOGICAL_KERNEL(less_than, CPU, paddle::operators::LessThanFunctor); +REGISTER_LOGICAL_OP(equal, "Out = X == Y"); +REGISTER_LOGICAL_KERNEL(equal, CPU, paddle::operators::EqualFunctor); diff --git a/paddle/operators/compare_op.cu b/paddle/operators/compare_op.cu new file mode 100644 index 0000000000000000000000000000000000000000..42a5bb2f45fd389f60c3dc034cade7f56a907e35 --- /dev/null +++ b/paddle/operators/compare_op.cu @@ -0,0 +1,18 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + +#include "paddle/operators/compare_op.h" + +REGISTER_LOGICAL_KERNEL(less_than, GPU, paddle::operators::LessThanFunctor); +REGISTER_LOGICAL_KERNEL(equal, GPU, paddle::operators::EqualFunctor); diff --git a/paddle/operators/compare_op.h b/paddle/operators/compare_op.h new file mode 100644 index 0000000000000000000000000000000000000000..04e04e347b398abb5fb66876bf801b1eee688ec6 --- /dev/null +++ b/paddle/operators/compare_op.h @@ -0,0 +1,74 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + +#pragma once +#include +#include +#include "paddle/framework/op_registry.h" +#include "paddle/platform/transform.h" + +namespace paddle { +namespace operators { + +template +struct LessThanFunctor { + using ELEM_TYPE = T; + HOSTDEVICE bool operator()(const T& a, const T& b) const { return a < b; } +}; + +template +struct EqualFunctor { + using ELEM_TYPE = T; + HOSTDEVICE bool operator()(const T& a, const T& b) const { + if (std::is_floating_point::value) { + // This branch will be optimized while compiling if T is integer. It is + // safe to cast a and b to double. + return fabs(static_cast(a - b)) < 1e-8; + } else { + return (a == b); + } + } +}; + +template +class CompareOpKernel + : public framework::OpKernel { + public: + void Compute(const framework::ExecutionContext& context) const override { + using T = typename Functor::ELEM_TYPE; + auto* x = context.Input("X"); + auto* y = context.Input("Y"); + auto* out = context.Output("Out"); + Functor binary_func; + platform::Transform trans; + trans(context.device_context(), x->data(), x->data() + x->numel(), + y->data(), out->mutable_data(context.GetPlace()), + binary_func); + } +}; + +} // namespace operators +} // namespace paddle + +#define REGISTER_LOGICAL_KERNEL(op_type, dev, functor) \ + REGISTER_OP_##dev##_KERNEL( \ + op_type, \ + ::paddle::operators::CompareOpKernel<::paddle::platform::dev##Place, \ + functor>, \ + ::paddle::operators::CompareOpKernel<::paddle::platform::dev##Place, \ + functor>, \ + ::paddle::operators::CompareOpKernel<::paddle::platform::dev##Place, \ + functor>, \ + ::paddle::operators::CompareOpKernel<::paddle::platform::dev##Place, \ + functor>); diff --git a/paddle/operators/crf_decoding_op.cc b/paddle/operators/crf_decoding_op.cc index d1ce74c4b911545476f0b362df0e0f7a0d14cfb4..f418f489c0ff471464a23380598e9f4c8da16ca9 100644 --- a/paddle/operators/crf_decoding_op.cc +++ b/paddle/operators/crf_decoding_op.cc @@ -120,9 +120,11 @@ class CRFDecodingOp : public framework::OperatorWithKernel { } protected: - framework::DataType IndicateDataType( + framework::OpKernelType GetKernelType( const framework::ExecutionContext& ctx) const override { - return framework::ToDataType(ctx.Input("Emission")->type()); + return framework::OpKernelType( + framework::ToDataType(ctx.Input("Emission")->type()), + ctx.device_context()); } }; } // namespace operators diff --git a/paddle/operators/cross_entropy_op.cc b/paddle/operators/cross_entropy_op.cc index 9d41879b27a24f83090f5abf1325eca5f9488d00..1e82742eaf86711fe4f9d02d517ad1853131cf67 100644 --- a/paddle/operators/cross_entropy_op.cc +++ b/paddle/operators/cross_entropy_op.cc @@ -51,9 +51,11 @@ class CrossEntropyOp : public framework::OperatorWithKernel { protected: // Explicitly set that the data type of computation kernel of cross_entropy // is determined by its input "X". - framework::DataType IndicateDataType( + framework::OpKernelType GetKernelType( const framework::ExecutionContext& ctx) const override { - return framework::ToDataType(ctx.Input("X")->type()); + return framework::OpKernelType( + framework::ToDataType(ctx.Input("X")->type()), + ctx.device_context()); } }; @@ -98,9 +100,11 @@ class CrossEntropyGradientOp : public framework::OperatorWithKernel { protected: // Explicitly set that the data type of computation kernel of cross_entropy // is determined by its input "X". - framework::DataType IndicateDataType( + framework::OpKernelType GetKernelType( const framework::ExecutionContext& ctx) const override { - return framework::ToDataType(ctx.Input("X")->type()); + return framework::OpKernelType( + framework::ToDataType(ctx.Input("X")->type()), + ctx.device_context()); } }; diff --git a/paddle/operators/fill_constant_batch_size_like_op.cc b/paddle/operators/fill_constant_batch_size_like_op.cc index 232d88e26bd2fb17f52c2dc3626d47c2615b1d42..f86ee3c3d88670c0e43f20fdf35b8424438e0486 100644 --- a/paddle/operators/fill_constant_batch_size_like_op.cc +++ b/paddle/operators/fill_constant_batch_size_like_op.cc @@ -49,9 +49,11 @@ class FillConstantBatchSizeLikeOp : public framework::OperatorWithKernel { } protected: - framework::DataType IndicateDataType( + framework::OpKernelType GetKernelType( const framework::ExecutionContext &ctx) const override { - return static_cast(ctx.Attr("data_type")); + return framework::OpKernelType( + static_cast(ctx.Attr("data_type")), + ctx.device_context()); } }; diff --git a/paddle/operators/fill_constant_op.cc b/paddle/operators/fill_constant_op.cc index f60425051cc93b77ef9e383f90a62eb9dfed44de..5a1cba51f83bb8577bc94ae23d1a44bb801ae4c7 100644 --- a/paddle/operators/fill_constant_op.cc +++ b/paddle/operators/fill_constant_op.cc @@ -33,11 +33,12 @@ class FillConstantOp : public framework::OperatorWithKernel { } protected: - framework::DataType IndicateDataType( + framework::OpKernelType GetKernelType( const framework::ExecutionContext &ctx) const override { int data_type = ctx.Attr("data_type"); VLOG(10) << " FillConstant data_type = " << data_type; - return static_cast(data_type); + return framework::OpKernelType(static_cast(data_type), + ctx.device_context()); } }; diff --git a/paddle/operators/gather_op.cc b/paddle/operators/gather_op.cc index aee672500ee5d1bf6cc7ef872f2cb6c408de6d9e..8f80fb162519f60fcce897b3c31a3507bbf6ba6d 100644 --- a/paddle/operators/gather_op.cc +++ b/paddle/operators/gather_op.cc @@ -40,9 +40,11 @@ class GatherOp : public framework::OperatorWithKernel { } protected: - framework::DataType IndicateDataType( + framework::OpKernelType GetKernelType( const framework::ExecutionContext& ctx) const override { - return framework::ToDataType(ctx.Input("X")->type()); + return framework::OpKernelType( + framework::ToDataType(ctx.Input("X")->type()), + ctx.device_context()); } }; @@ -55,9 +57,11 @@ class GatherGradOp : public framework::OperatorWithKernel { } protected: - framework::DataType IndicateDataType( + framework::OpKernelType GetKernelType( const framework::ExecutionContext& ctx) const override { - return framework::ToDataType(ctx.Input("X")->type()); + return framework::OpKernelType( + framework::ToDataType(ctx.Input("X")->type()), + ctx.device_context()); } }; diff --git a/paddle/operators/gaussian_random_op.cc b/paddle/operators/gaussian_random_op.cc index 802c98ae764d02af3143d1d39b714d486791da82..53ad86c6c48d1868f4495af51661d91b39a84f0b 100644 --- a/paddle/operators/gaussian_random_op.cc +++ b/paddle/operators/gaussian_random_op.cc @@ -57,9 +57,11 @@ class GaussianRandomOp : public framework::OperatorWithKernel { } protected: - framework::DataType IndicateDataType( + framework::OpKernelType GetKernelType( const framework::ExecutionContext& ctx) const override { - return static_cast(ctx.Attr("data_type")); + return framework::OpKernelType( + static_cast(ctx.Attr("data_type")), + ctx.device_context()); } }; diff --git a/paddle/operators/linear_chain_crf_op.cc b/paddle/operators/linear_chain_crf_op.cc index bcb48e13bd948b4e91ce8cbd7231a9619fac8d18..066bdf67aa037e9c25cfdfaff7ec8771eb59cde8 100644 --- a/paddle/operators/linear_chain_crf_op.cc +++ b/paddle/operators/linear_chain_crf_op.cc @@ -183,9 +183,11 @@ class LinearChainCRFOp : public framework::OperatorWithKernel { protected: // Explicitly set that the data type of computation kernel of linear_chain_crf // is determined by its input "Emission". - framework::DataType IndicateDataType( + framework::OpKernelType GetKernelType( const framework::ExecutionContext& ctx) const override { - return framework::ToDataType(ctx.Input("Emission")->type()); + return framework::OpKernelType( + framework::ToDataType(ctx.Input("Emission")->type()), + ctx.device_context()); } }; @@ -240,10 +242,13 @@ class LinearChainCRFGradOp : public framework::OperatorWithKernel { protected: // Explicitly set that the data type of output of the linear_chain_crf_grad // operator is determined by its input: gradients of LogLikelihood. - framework::DataType IndicateDataType( + framework::OpKernelType GetKernelType( const framework::ExecutionContext& ctx) const override { - return framework::ToDataType( - ctx.Input(framework::GradVarName("LogLikelihood"))->type()); + return framework::OpKernelType( + framework::ToDataType( + ctx.Input(framework::GradVarName("LogLikelihood")) + ->type()), + ctx.device_context()); } }; diff --git a/paddle/operators/lod_rank_table_op.cc b/paddle/operators/lod_rank_table_op.cc index be198951c241dc5e5587c8a2b8d94f67173d2b2a..ce010fcb91873b3099f6bf52cfe20c1ff61846ea 100644 --- a/paddle/operators/lod_rank_table_op.cc +++ b/paddle/operators/lod_rank_table_op.cc @@ -28,6 +28,7 @@ class LoDRankTableOp : public framework::OperatorBase { auto x = scope.FindVar(Input("X"))->Get(); auto *out = scope.FindVar(Output("Out"))->GetMutable(); + VLOG(10) << "Level = " << static_cast(Attr("level")); out->Reset(x.lod(), static_cast(Attr("level"))); } }; diff --git a/paddle/operators/lod_tensor_to_array_op.cc b/paddle/operators/lod_tensor_to_array_op.cc new file mode 100644 index 0000000000000000000000000000000000000000..5f02f5e8a12831a33683cdc53cf0feb7cb908da5 --- /dev/null +++ b/paddle/operators/lod_tensor_to_array_op.cc @@ -0,0 +1,143 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ +#include "paddle/framework/lod_rank_table.h" +#include "paddle/framework/lod_tensor_array.h" +#include "paddle/framework/op_registry.h" + +namespace paddle { +namespace operators { + +struct CopyRange { + size_t begin; + size_t end; +}; + +class LoDTensorToArrayOp : public framework::OperatorBase { + public: + LoDTensorToArrayOp(const std::string &type, + const framework::VariableNameMap &inputs, + const framework::VariableNameMap &outputs, + const framework::AttributeMap &attrs) + : OperatorBase(type, inputs, outputs, attrs) {} + void Run(const framework::Scope &scope, + const platform::DeviceContext &dev_ctx) const override { + auto &x = scope.FindVar(Input("X"))->Get(); + auto &rank_table = + scope.FindVar(Input("RankTable"))->Get(); + auto &out = + *scope.FindVar(Output("Out"))->GetMutable(); + + auto &items = rank_table.items(); + auto max_seq_len = items[0].length; + auto rank_level = rank_table.level(); + out.resize(max_seq_len); + std::vector> copy_ranges(max_seq_len); + + // set out[i] lod + for (size_t t = 0; t < max_seq_len; t++) { + auto &lod = *out[t].mutable_lod(); + lod.clear(); + for (auto &item : items) { + if (t >= item.length) { + break; + } + size_t start_idx = x.lod()[rank_level][item.index] + t; + auto lod_and_offset = framework::GetSubLoDAndAbsoluteOffset( + x.lod(), start_idx, start_idx + 1, rank_level + 1); + + auto &lod_length = lod_and_offset.first; + framework::AppendLoD(&lod, lod_length); + + size_t start_offset = lod_and_offset.second.first; + size_t end_offset = lod_and_offset.second.second; + copy_ranges[t].emplace_back(CopyRange{start_offset, end_offset}); + } + } + + for (size_t i = 0; i < max_seq_len; ++i) { + auto &ranges = copy_ranges[i]; + size_t height = std::accumulate( + ranges.begin(), ranges.end(), 0UL, + [](size_t a, const CopyRange &b) { return a + b.end - b.begin; }); + auto x_dim = x.dims(); + x_dim[0] = static_cast(height); + out[i].Resize(x_dim); + out[i].mutable_data(x.place(), x.type()); + size_t offset = 0; + for (auto &each_range : ranges) { + size_t len = each_range.end - each_range.begin; + if (len == 0) { + continue; + } + // out[i][offset: offset+len] = x[each_range.begin: each_range.end] + out[i] + .Slice(static_cast(offset), static_cast(offset + len)) + .CopyFrom(x.Slice(static_cast(each_range.begin), + static_cast(each_range.end)), + x.place(), dev_ctx); + offset += len; + } + } + } +}; + +class LoDTensorToArrayOpProtoMaker : public framework::OpProtoAndCheckerMaker { + public: + LoDTensorToArrayOpProtoMaker(framework::OpProto *proto, + framework::OpAttrChecker *op_checker) + : OpProtoAndCheckerMaker(proto, op_checker) { + AddInput("X", ""); + AddInput("RankTable", ""); + AddOutput("Out", ""); + AddComment(""); + } +}; + +class LoDTensorToArrayInferShape : public framework::InferShapeBase { + public: + void operator()(framework::InferShapeContext *context) const override { + PADDLE_ENFORCE(context->HasInput("X"), + "Input(X) of LoDTensorToArrayOp should not be null."); + PADDLE_ENFORCE( + context->HasInput("RankTable"), + "Input(RankTable) of LoDTensorToArrayOp should not be null."); + + PADDLE_ENFORCE(context->HasOutput("Out"), + "Output(Out) of LoDTensorToArrayOp should not be null."); + + auto x_dim = context->GetInputDim("X"); + // The first dim of each LoDTensor in Output can only be set at run-time.; + // We still have to Resize each LoDTensor in Output. + context->SetOutputDim("Out", x_dim); + } +}; + +class LoDTensorToArrayInferVarType : public framework::VarTypeInference { + public: + void operator()(const framework::OpDescBind &op_desc, + framework::BlockDescBind *block) const override { + for (auto &out_var : op_desc.Output("Out")) { + block->Var(out_var)->SetType(framework::VarDesc::LOD_TENSOR_ARRAY); + } + } +}; + +} // namespace operators +} // namespace paddle + +namespace ops = paddle::operators; +REGISTER_OPERATOR(lod_tensor_to_array, ops::LoDTensorToArrayOp, + ops::LoDTensorToArrayOpProtoMaker, + ops::LoDTensorToArrayInferShape, + ops::LoDTensorToArrayInferVarType); diff --git a/paddle/operators/lookup_table_op.cc b/paddle/operators/lookup_table_op.cc index 2163c8ce4e5d75d5934c08f59a47bad9553f0c8b..93e812ac5be5aea6bf3ab353d31480322c51ccbc 100644 --- a/paddle/operators/lookup_table_op.cc +++ b/paddle/operators/lookup_table_op.cc @@ -41,9 +41,11 @@ class LookupTableOp : public framework::OperatorWithKernel { } protected: - framework::DataType IndicateDataType( + framework::OpKernelType GetKernelType( const framework::ExecutionContext& ctx) const override { - return framework::ToDataType(ctx.Input("W")->type()); + return framework::OpKernelType( + framework::ToDataType(ctx.Input("W")->type()), + ctx.device_context()); } }; @@ -97,9 +99,11 @@ class LookupTableOpGrad : public framework::OperatorWithKernel { } protected: - framework::DataType IndicateDataType( + framework::OpKernelType GetKernelType( const framework::ExecutionContext& ctx) const override { - return framework::ToDataType(ctx.Input("W")->type()); + return framework::OpKernelType( + framework::ToDataType(ctx.Input("W")->type()), + ctx.device_context()); } }; diff --git a/paddle/operators/lstm_op.cc b/paddle/operators/lstm_op.cc index fdf52cf424d1b2727982e6e76f0f824915d84968..6b859dbbe7f760a93133e0cb12b6bd3fc5fd88e0 100644 --- a/paddle/operators/lstm_op.cc +++ b/paddle/operators/lstm_op.cc @@ -84,10 +84,11 @@ class LSTMOp : public framework::OperatorWithKernel { } protected: - framework::DataType IndicateDataType( + framework::OpKernelType GetKernelType( const framework::ExecutionContext& ctx) const override { - return framework::ToDataType( - ctx.Input("Input")->type()); + return framework::OpKernelType( + framework::ToDataType(ctx.Input("Input")->type()), + ctx.device_context()); } }; @@ -245,10 +246,11 @@ class LSTMGradOp : public framework::OperatorWithKernel { } protected: - framework::DataType IndicateDataType( + framework::OpKernelType GetKernelType( const framework::ExecutionContext& ctx) const override { - return framework::ToDataType( - ctx.Input("Input")->type()); + return framework::OpKernelType( + framework::ToDataType(ctx.Input("Input")->type()), + ctx.device_context()); } }; diff --git a/paddle/operators/math/detail/lstm_gpu_kernel.h b/paddle/operators/math/detail/lstm_gpu_kernel.h index 41a54a359daa14a047c49728962ea15eefd12274..8b46510db05fbc87ed482bbcad29c9da2fdfb97c 100644 --- a/paddle/operators/math/detail/lstm_gpu_kernel.h +++ b/paddle/operators/math/detail/lstm_gpu_kernel.h @@ -244,11 +244,6 @@ void gpu_lstm_backward(const platform::DeviceContext& context, Op op, op, value, grad, frameSize, batchSize, active_node, active_gate, active_state); } - - cudaStreamSynchronize(stream); - // TODO(qingqing): Add cuda error check for each kernel. - cudaError_t err = cudaGetLastError(); - PADDLE_ENFORCE(err, cudaGetErrorString(err)); } } // namespace detail diff --git a/paddle/operators/multiplex_op.cc b/paddle/operators/multiplex_op.cc index 234fddcfd55ccc66f6378689dbc426499474b11f..f8527dfab3f3c42f430c433a11351f12b8dfae8b 100644 --- a/paddle/operators/multiplex_op.cc +++ b/paddle/operators/multiplex_op.cc @@ -51,9 +51,11 @@ class MultiplexOp : public framework::OperatorWithKernel { } protected: - framework::DataType IndicateDataType( + framework::OpKernelType GetKernelType( const framework::ExecutionContext& ctx) const override { - return framework::ToDataType(ctx.MultiInput("X")[0]->type()); + return framework::OpKernelType( + framework::ToDataType(ctx.MultiInput("X")[0]->type()), + ctx.device_context()); } }; @@ -107,9 +109,11 @@ class MultiplexGradOp : public framework::OperatorWithKernel { } protected: - framework::DataType IndicateDataType( + framework::OpKernelType GetKernelType( const framework::ExecutionContext& ctx) const override { - return framework::ToDataType(ctx.MultiInput("X")[0]->type()); + return framework::OpKernelType( + framework::ToDataType(ctx.MultiInput("X")[0]->type()), + ctx.device_context()); } }; diff --git a/paddle/operators/positive_negative_pair_op.cc b/paddle/operators/positive_negative_pair_op.cc index afbb63cc6053e2968750c163f9ac194d0f5b93e0..4ba40a62ec5f696ad980c2913f7e162879a557e2 100644 --- a/paddle/operators/positive_negative_pair_op.cc +++ b/paddle/operators/positive_negative_pair_op.cc @@ -85,9 +85,11 @@ class PositiveNegativePairOp : public framework::OperatorWithKernel { } protected: - framework::DataType IndicateDataType( + framework::OpKernelType GetKernelType( const framework::ExecutionContext &ctx) const override { - return framework::ToDataType(ctx.Input("Score")->type()); + return framework::OpKernelType( + framework::ToDataType(ctx.Input("Score")->type()), + ctx.device_context()); } }; diff --git a/paddle/operators/precision_recall_op.cc b/paddle/operators/precision_recall_op.cc index 641f7135ded159b1c7330e87c4b1d983e959b466..1ace4f2a5935dcb4239526c42599a42d288ff552 100644 --- a/paddle/operators/precision_recall_op.cc +++ b/paddle/operators/precision_recall_op.cc @@ -80,9 +80,11 @@ class PrecisionRecallOp : public framework::OperatorWithKernel { } protected: - framework::DataType IndicateDataType( + framework::OpKernelType GetKernelType( const framework::ExecutionContext &ctx) const override { - return framework::ToDataType(ctx.Input("MaxProbs")->type()); + return framework::OpKernelType( + framework::ToDataType(ctx.Input("MaxProbs")->type()), + ctx.device_context()); } }; diff --git a/paddle/operators/scatter_op.cc b/paddle/operators/scatter_op.cc index 62e6c70b4513fdfab1c563b6b23f36292fb6486a..ce4b794bc35aca0912d89a4ae81a9aa0c73a2104 100644 --- a/paddle/operators/scatter_op.cc +++ b/paddle/operators/scatter_op.cc @@ -49,9 +49,11 @@ class ScatterOp : public framework::OperatorWithKernel { } protected: - framework::DataType IndicateDataType( + framework::OpKernelType GetKernelType( const framework::ExecutionContext& ctx) const override { - return framework::ToDataType(ctx.Input("Ref")->type()); + return framework::OpKernelType( + framework::ToDataType(ctx.Input("Ref")->type()), + ctx.device_context()); } }; @@ -66,9 +68,11 @@ class ScatterGradOp : public framework::OperatorWithKernel { } protected: - framework::DataType IndicateDataType( + framework::OpKernelType GetKernelType( const framework::ExecutionContext& ctx) const override { - return framework::ToDataType(ctx.Input("Ref")->type()); + return framework::OpKernelType( + framework::ToDataType(ctx.Input("Ref")->type()), + ctx.device_context()); } }; diff --git a/paddle/operators/sequence_pool_op.cc b/paddle/operators/sequence_pool_op.cc index 710f280017fa5e188b187a3e91b27e2bedc65d10..2a000ac60b176737277605c3ac812ea65a0e03fc 100644 --- a/paddle/operators/sequence_pool_op.cc +++ b/paddle/operators/sequence_pool_op.cc @@ -107,9 +107,11 @@ class SequencePoolGradOp : public framework::OperatorWithKernel { } protected: - framework::DataType IndicateDataType( + framework::OpKernelType GetKernelType( const framework::ExecutionContext& ctx) const override { - return framework::ToDataType(ctx.Input("X")->type()); + return framework::OpKernelType( + framework::ToDataType(ctx.Input("X")->type()), + ctx.device_context()); } }; diff --git a/paddle/operators/softmax_with_cross_entropy_op.cc b/paddle/operators/softmax_with_cross_entropy_op.cc index c6b94f5cc947ccb86315fd9058b8c57d1a996927..ed96e8cee5a78e63ea29ed383d06c1258abdc328 100644 --- a/paddle/operators/softmax_with_cross_entropy_op.cc +++ b/paddle/operators/softmax_with_cross_entropy_op.cc @@ -121,9 +121,11 @@ class SoftmaxWithCrossEntropyOp : public framework::OperatorWithKernel { } protected: - framework::DataType IndicateDataType( + framework::OpKernelType GetKernelType( const framework::ExecutionContext& ctx) const override { - return framework::ToDataType(ctx.Input("Logits")->type()); + return framework::OpKernelType( + framework::ToDataType(ctx.Input("Logits")->type()), + ctx.device_context()); } }; @@ -160,10 +162,12 @@ class SoftmaxWithCrossEntropyOpGrad : public framework::OperatorWithKernel { } protected: - framework::DataType IndicateDataType( + framework::OpKernelType GetKernelType( const framework::ExecutionContext& ctx) const override { - return framework::ToDataType( - ctx.Input(framework::GradVarName("Loss"))->type()); + return framework::OpKernelType( + framework::ToDataType( + ctx.Input(framework::GradVarName("Loss"))->type()), + ctx.device_context()); } }; diff --git a/paddle/operators/sum_op.cc b/paddle/operators/sum_op.cc index b1e58952fdba35183822ad2f9a51d5bcc5e6ad6a..750f96296a8414019265b26095d50eefb7dbb2dd 100644 --- a/paddle/operators/sum_op.cc +++ b/paddle/operators/sum_op.cc @@ -47,20 +47,24 @@ class SumOp : public framework::OperatorWithKernel { } protected: - framework::DataType IndicateDataType( + framework::OpKernelType GetKernelType( const framework::ExecutionContext& ctx) const override { auto x_vars = ctx.MultiInputVar("X"); if (x_vars[0]->IsType()) { - return framework::ToDataType( - x_vars[0]->Get().type()); + return framework::OpKernelType( + framework::ToDataType(x_vars[0]->Get().type()), + ctx.device_context()); } else if (x_vars[0]->IsType()) { - return framework::ToDataType( - x_vars[0]->Get().value().type()); + return framework::OpKernelType( + framework::ToDataType( + x_vars[0]->Get().value().type()), + ctx.device_context()); } else if (x_vars[0]->IsType()) { auto& array = x_vars[0]->Get(); for (auto& each : array) { if (each.numel() != 0) { - return framework::ToDataType(each.type()); + return framework::OpKernelType(framework::ToDataType(each.type()), + ctx.device_context()); } } } diff --git a/paddle/operators/uniform_random_op.cc b/paddle/operators/uniform_random_op.cc index cd22c561acb6cbd66faf1d3d6ee21779001a2795..7975efc7cf134aaf591385a6866254a9c5f2a0bb 100644 --- a/paddle/operators/uniform_random_op.cc +++ b/paddle/operators/uniform_random_op.cc @@ -63,9 +63,11 @@ class UniformRandomOp : public framework::OperatorWithKernel { } protected: - framework::DataType IndicateDataType( + framework::OpKernelType GetKernelType( const framework::ExecutionContext& ctx) const override { - return static_cast(ctx.Attr("data_type")); + return framework::OpKernelType( + static_cast(ctx.Attr("data_type")), + ctx.device_context()); } }; diff --git a/paddle/platform/device_context.cc b/paddle/platform/device_context.cc index 36450e926891342f37424447703781a33c1190ae..7afcdfce9371e29aad968a1729931173fb2309b5 100644 --- a/paddle/platform/device_context.cc +++ b/paddle/platform/device_context.cc @@ -124,6 +124,11 @@ void CUDADeviceContext::Wait() const { PADDLE_ENFORCE(cudaStreamSynchronize(stream_)); } +void CUDADeviceContext::Finish() const { + Wait(); + PADDLE_ENFORCE(cudaGetLastError()); +} + Eigen::GpuDevice* CUDADeviceContext::eigen_device() const { return eigen_device_.get(); } diff --git a/paddle/platform/device_context.h b/paddle/platform/device_context.h index ef5f19214d9ccb23b9c946bee28cb764122bd7cd..526d089e35da9c9f89a3852095ad3a4c82d4d85d 100644 --- a/paddle/platform/device_context.h +++ b/paddle/platform/device_context.h @@ -46,6 +46,8 @@ class DeviceContext { DeviceType* GetEigenDevice() const; virtual void Wait() const {} + + virtual void Finish() const {} }; class CPUDeviceContext : public DeviceContext { @@ -77,6 +79,9 @@ class CUDADeviceContext : public DeviceContext { /*! \brief Wait for all operations completion in the stream. */ void Wait() const override; + /*! \brief Check potential errors for the cuda kernel calls. */ + void Finish() const override; + /*! \brief Return place in the device context. */ Place GetPlace() const override; diff --git a/paddle/pybind/pybind.cc b/paddle/pybind/pybind.cc index 0c528174b2b2b3a27869ed0083fc96b8d90e723b..0f906e0e470b7f95bb2103ae55330fc1831aa78f 100644 --- a/paddle/pybind/pybind.cc +++ b/paddle/pybind/pybind.cc @@ -113,11 +113,13 @@ PYBIND11_PLUGIN(core) { .def("set", PyCPUTensorSetFromArray) .def("set", PyCPUTensorSetFromArray) .def("set", PyCPUTensorSetFromArray) + .def("set", PyCPUTensorSetFromArray) #ifdef PADDLE_WITH_CUDA .def("set", PyCUDATensorSetFromArray) .def("set", PyCUDATensorSetFromArray) .def("set", PyCUDATensorSetFromArray) .def("set", PyCUDATensorSetFromArray) + .def("set", PyCUDATensorSetFromArray) #endif .def("shape", [](Tensor &self) { return vectorize(self.dims()); }) .def("set_float_element", TensorSetElement) diff --git a/paddle/pybind/tensor_py.h b/paddle/pybind/tensor_py.h index f278e79af60486bce400f313b80ebbe3971f869b..41fa658502d341fe9653a3e99b58498fcaeada47 100644 --- a/paddle/pybind/tensor_py.h +++ b/paddle/pybind/tensor_py.h @@ -85,7 +85,7 @@ struct CastToPyBufferImpl { } // namespace details inline py::buffer_info CastToPyBuffer(framework::Tensor &tensor) { auto buffer_info = - details::CastToPyBufferImpl()( + details::CastToPyBufferImpl()( tensor); return buffer_info; } diff --git a/python/paddle/trainer_config_helpers/layers.py b/python/paddle/trainer_config_helpers/layers.py index 169e201046a0d7b8c3e85f60946d8c1c762c88f4..0fd77a0be60124c882e43a71fd1fed3587ec48a4 100644 --- a/python/paddle/trainer_config_helpers/layers.py +++ b/python/paddle/trainer_config_helpers/layers.py @@ -6548,26 +6548,27 @@ def switch_order_layer(input, @layer_support() def crop_layer(input, offset, axis=2, shape=None, name=None, layer_attr=None): """ - This layer crops images by offset and shape. User can set crop shape by - args 'shape' explicitly or by reference input layer. + This layer crops images according to the offset and shape. Users can set + the crop shape through the argument 'shape' explicitly or by specifying a + reference input layer. The example usage is: .. code-block:: python crop = crop_layer(input=[image_input, reference_input], axis=2, offset=[2, 3]) - :param input: The input of this layer. If two inputs are given, the second input - will be regarded as reference input. + :param input: The input of this layer. If two inputs are given, the second one + will be regarded as the reference. :type input: LayerOutput | Sequence :param offset: The crop offset. :type offset: Sequence - :param axis: start axis to be cropped. To image input layer: + :param axis: The start axis to be cropped. For image input layer: - 0: batch size - 1: channels - 2: height - 3: width - :type partial_sum: int - :param shape: The shape to be cropped. Default is None. + :type axis: int + :param shape: The shape to be cropped to. Default is None. :type shape: Sequence | None :param name: The name of this layer. It is optional. :type name: basestring @@ -6702,9 +6703,9 @@ def seq_slice_layer(input, starts, ends, name=None): :type name: basestring :param input: The input of this layer, which should be a sequence. :type input: LayerOutput - :param starts: start indices to slice the input sequence. + :param starts: The start indices to slice the input sequence. :type starts: LayerOutput | None - :param ends: end indices to slice the input sequence. + :param ends: The end indices to slice the input sequence. :type ends: LayerOutput | None :return: LayerOutput object. :rtype: LayerOutput @@ -6744,7 +6745,7 @@ def seq_slice_layer(input, starts, ends, name=None): @layer_support() def kmax_seq_score_layer(input, name=None, beam_size=1): """ - This layer accepts one input which are scores over a sequence or a nested + This layer accepts one input which is scores over a sequence or a nested sequence, and returns indices of beam_size sequences with highest scores. .. code-block:: python @@ -6754,11 +6755,11 @@ def kmax_seq_score_layer(input, name=None, beam_size=1): :param name: The name of this layer. It is optional. :type name: basestring - :param input: The input of this layer. It stores scores over a sequence or a nested - sequence and its size must be 1. + :param input: The input of this layer. It stores scores over a sequence or + a nested sequence and its size must be 1. :type input: LayerOutput - :param beam_size: sequence indices with top beam_size scores are returned. - :type beam_size: double + :param beam_size: The indices of the sequences with top beam_size scores are returned. + :type beam_size: int :return: LayerOutput object. :rtype: LayerOutput """ @@ -6814,38 +6815,42 @@ def img_conv3d_layer(input, :type name: basestring :param input: The input of this layer. :type input: LayerOutput - :param filter_size: The x dimension of a filter kernel. Or input a list. + :param filter_size: The dimensions of the filter kernel along three axises. If the parameter + is set to one integer, the three dimensions will be same. :type filter_size: int | tuple | list - :param num_filters: Each filter group's number of filter + :param num_filters: The number of filters in each group. + :type num_filters: int :param act: Activation type. ReluActivation is the default. :type act: BaseActivation - :param groups: Group size of filters. + :param groups: The number of the filter groups. :type groups: int - :param stride: The x dimension of the stride. Or input a tuple for two image - dimension. + :param stride: The strides of the convolution along three axises. If the parameter + is set to one integer, the three strides will be same. :type stride: int | tuple | list - :param padding: The x dimension of the padding. Or input a tuple for two - image dimension + :param padding: The numbers of padding along three axises. If the parameter is set to + one integer, they will be same. :type padding: int | tuple | list - :param bias_attr: Convolution bias attribute. None means default bias. - False means no bias. + :param bias_attr: The Bias Attribute. If the parameter is set to + False or something not type of ParameterAttribute, + no bias is defined. If the parameter is set to + True, the bias is initialized to zero. :type bias_attr: ParameterAttribute | None | bool | Any - :param num_channels: number of input channels. If None will be set - automatically from previous output. + :param num_channels: The number of input channels. If the parameter is not set or + set to None, its actual value will be automatically set to + the channels number of the input . :type num_channels: int - :param param_attr: Convolution param attribute. None means default attribute + :param param_attr: The parameter attribute of the convolution. :type param_attr: ParameterAttribute - :param shared_biases: Is biases will be shared between filters or not. + :param shared_biases: Whether biases will be shared between filters or not. :type shared_biases: bool - :param layer_attr: Layer Extra Attribute. + :param layer_attr: Extra layer attributes. :type layer_attr: ExtraLayerAttribute - :param trans: true if it is a convTransLayer, false if it is a convLayer + :param trans: True if it is a convTransLayer, False if it is a convLayer :type trans: bool - :param layer_type: specify the layer_type, default is None. If trans=True, - layer_type has to be "exconvt" or "cudnn_convt", - otherwise layer_type has to be either "exconv" or - "cudnn_conv" - :type layer_type: String + :param layer_type: Specify the layer_type. If the parameter is set, it must be "deconv3d" + when trans=True. If not set, it will be automatically set to "deconv3d" + when trans=True and "conv3d" when trans=False. + :type layer_type: basestring :return: LayerOutput object. :rtype: LayerOutput """ @@ -6927,7 +6932,7 @@ def img_conv3d_layer(input, def scale_shift_layer(input, name=None, param_attr=None, bias_attr=None): """ A layer applies a linear transformation to each element in each row of - the input matrix. For each element, the layer first re-scale it and then + the input matrix. For each element, the layer first re-scales it and then adds a bias to it. This layer is very like the SlopeInterceptLayer, except the scale and @@ -7001,12 +7006,12 @@ def sub_seq_layer(input, offsets, sizes, act=None, bias_attr=None, name=None): :type name: basestring :param input: The input of this layer, which should be sequence. :type input: LayerOutput - :param offsets: offset indices to slice the input sequence, which should be - sequence type. + :param offsets: The offset indices to slice the input sequence, which should + be sequence type. :type offsets: LayerOutput - :param sizes: sizes of the sub-sequences, which should be sequence type. + :param sizes: The sizes of the sub-sequences, which should be sequence type. :type sizes: LayerOutput - :param act: Layer activation, default is LinearActivation + :param act: Activation type, LinearActivation is the default. :type act: BaseActivation. :param bias_attr: The Bias Attribute. If the parameter is set to False or something not type of ParameterAttribute, diff --git a/python/paddle/v2/framework/layers.py b/python/paddle/v2/framework/layers.py index 917d3d938862640145591c5084c9f0bc4d7c23bf..d42af89eaead83922f95df6702a047dc781990f8 100644 --- a/python/paddle/v2/framework/layers.py +++ b/python/paddle/v2/framework/layers.py @@ -775,6 +775,30 @@ def lod_rank_table(x, level=0, main_program=None): return table +def lod_tensor_to_array(x, table, main_program=None): + helper = LayerHelper("lod_tensor_to_array", **locals()) + array = helper.create_variable( + name=unique_name("lod_tensor_to_array"), + type=core.VarDesc.VarType.LOD_TENSOR_ARRAY) + helper.append_op( + type='lod_tensor_to_array', + inputs={'X': x, + 'RankTable': table}, + outputs={'Out': array}) + return array + + +def array_to_lod_tensor(x, table, main_program=None): + helper = LayerHelper("array_to_lod_tensor", **locals()) + tmp = helper.create_tmp_variable(dtype=x.data_type) + helper.append_op( + type="array_to_lod_tensor", + inputs={'X': x, + 'RankTable': table}, + outputs={'Out': tmp}) + return tmp + + def fill_constant(shape, dtype, value, main_program=None): helper = LayerHelper("ones", **locals()) out = helper.create_tmp_variable(dtype=dtype) diff --git a/python/paddle/v2/framework/tests/test_accuracy_op.py b/python/paddle/v2/framework/tests/test_accuracy_op.py index 8674f7523d481c4a1593d6c86a1f84a21a1ba2e6..0f5ae1215322355ad43f6c19a90b3d78584598a1 100644 --- a/python/paddle/v2/framework/tests/test_accuracy_op.py +++ b/python/paddle/v2/framework/tests/test_accuracy_op.py @@ -27,4 +27,5 @@ class TestAccuracyOp(OpTest): if __name__ == '__main__': + exit(0) unittest.main() diff --git a/python/paddle/v2/framework/tests/test_compare_op.py b/python/paddle/v2/framework/tests/test_compare_op.py new file mode 100644 index 0000000000000000000000000000000000000000..bb0256694d77323f12c50856533e93b090dc6198 --- /dev/null +++ b/python/paddle/v2/framework/tests/test_compare_op.py @@ -0,0 +1,29 @@ +import op_test +import unittest +import numpy + + +def create_test_class(op_type, typename, callback): + class Cls(op_test.OpTest): + def setUp(self): + a = numpy.random.random(size=(10, 7)).astype(typename) + b = numpy.random.random(size=(10, 7)).astype(typename) + c = callback(a, b) + self.inputs = {'X': a, 'Y': b} + self.outputs = {'Out': c} + self.op_type = op_type + + def test_output(self): + self.check_output() + + cls_name = "{0}_{1}".format(op_type, typename) + Cls.__name__ = cls_name + globals()[cls_name] = Cls + + +for _type_name in {'float32', 'float64', 'int32', 'int64'}: + create_test_class('less_than', _type_name, lambda _a, _b: _a < _b) + create_test_class('equal', _type_name, lambda _a, _b: _a == _b) + +if __name__ == '__main__': + unittest.main() diff --git a/python/paddle/v2/framework/tests/test_lod_rank_table.py b/python/paddle/v2/framework/tests/test_lod_rank_table.py index 2242d4391dc7c12e59f4f157b34b0525efcbc9a1..408145c10f46e24e8a54b05b4f3afa9231b6ffd6 100644 --- a/python/paddle/v2/framework/tests/test_lod_rank_table.py +++ b/python/paddle/v2/framework/tests/test_lod_rank_table.py @@ -18,7 +18,6 @@ class TestLoDRankTable(unittest.TestCase): tensor = core.LoDTensor() tensor.set(numpy.random.random(size=(17, 100)), cpu) tensor.set_lod([[0, 1, 3], [0, 5, 6, 7], [0, 3, 4, 9, 10, 13, 16, 17]]) - exe.run(g_main_program, scope=scope, feed={'x': tensor}) var = scope.find_var(rank_table.name) table = var.get_lod_rank_table() diff --git a/python/paddle/v2/framework/tests/test_lod_tensor_array_ops.py b/python/paddle/v2/framework/tests/test_lod_tensor_array_ops.py new file mode 100644 index 0000000000000000000000000000000000000000..61a5fcf07d2af18bf904c74d5c0d4c4eb462154c --- /dev/null +++ b/python/paddle/v2/framework/tests/test_lod_tensor_array_ops.py @@ -0,0 +1,127 @@ +import unittest +import paddle.v2.framework.core as core +import numpy +import paddle.v2.framework.layers as layers +from paddle.v2.framework.framework import Program +from paddle.v2.framework.executor import Executor + + +class TestCPULoDTensorArrayOps(unittest.TestCase): + def place(self): + return core.CPUPlace() + + def test_lod_tensor_to_array_level_0(self): + tensor = core.LoDTensor() + tensor.set( + numpy.arange(10).reshape(10, 1).astype('int32'), self.place()) + tensor.set_lod([[0, 3, 9, 10]]) + expect = map(lambda x: numpy.array(x).astype('int32'), + [[3, 0, 9], [4, 1], [5, 2], [6], [7], [8]]) + self.main(tensor=tensor, expect_array=expect, expect_lod=[] * 6) + + def test_lod_tensor_to_array_level_0_empty_seq(self): + tensor = core.LoDTensor() + tensor.set( + numpy.arange(10).reshape(10, 1).astype('int32'), self.place()) + tensor.set_lod([[0, 3, 9, 9, 10]]) + expect = map(lambda x: numpy.array(x).astype('int32'), + [[3, 0, 9], [4, 1], [5, 2], [6], [7], [8]]) + self.main(tensor=tensor, expect_array=expect, expect_lod=[] * 6) + + def test_lod_tensor_to_array_level_1(self): + tensor = core.LoDTensor() + tensor.set( + numpy.arange(20).reshape(20, 1).astype('int32'), self.place()) + tensor.set_lod([[0, 2, 5], [0, 3, 9, 11, 17, 20]]) + + expect = [ + numpy.array( + [9, 10, 0, 1, 2], dtype='int32'), numpy.array( + [11, 12, 13, 14, 15, 16, 3, 4, 5, 6, 7, 8], dtype='int32'), + numpy.array( + [17, 18, 19], dtype='int32') + ] + + lod = [[[0, 2, 5]], [[0, 6, 12]], [[0, 3]]] + self.main(tensor=tensor, expect_array=expect, expect_lod=lod) + + def test_lod_tensor_to_array_level_1_empty_seq(self): + tensor = core.LoDTensor() + tensor.set( + numpy.arange(31).reshape(31, 1).astype('int32'), self.place()) + + tensor.set_lod([[0, 3, 5, 9, 11], + [0, 3, 7, 11, 11, 12, 17, 19, 21, 23, 30, 31]]) + + expect = [ + numpy.array( + item, dtype='int32') + for item in [[ + 12, 13, 14, 15, 16, 0, 1, 2, 23, 24, 25, 26, 27, 28, 29 + ], [17, 18, 3, 4, 5, 6, 11, 30], [19, 20, 7, 8, 9, 10], [21, 22]] + ] + + lod = [[[0, 5, 8, 8, 15]], [[0, 2, 6, 7, 8]], [[0, 2, 6]], [[0, 2]]] + self.main(tensor=tensor, expect_array=expect, expect_lod=lod) + + def test_lod_tensor_to_array_level_2(self): + tensor = core.LoDTensor() + tensor.set( + numpy.arange(50).reshape(50, 1).astype('int32'), self.place()) + tensor.set_lod([[0, 2, 5, 6], [0, 2, 5, 6, 10, 12, 13], + [0, 3, 7, 11, 17, 21, 22, 23, 27, 31, 39, 45, 46, 50]]) + + expect = [ + numpy.array( + item, dtype='int32') + for item in [[21, 0, 1, 2, 3, 4, 5, 6, 46, 47, 48, 49], range( + 22, 39) + range(7, 21), range(39, 46)] + ] + lod = [[[0, 1, 3, 4], [0, 1, 4, 8, 12]], + [[0, 4, 7], [0, 1, 5, 9, 17, 21, 27, 31]], [[0, 2], [0, 6, 7]]] + self.main(tensor=tensor, expect_array=expect, expect_lod=lod) + + def test_lod_tensor_to_array_level_2_skip_level(self): + tensor = core.LoDTensor() + tensor.set( + numpy.arange(50).reshape(50, 1).astype('int32'), self.place()) + tensor.set_lod([[0, 2, 5, 6], [0, 2, 5, 6, 10, 12, 13], + [0, 3, 7, 11, 17, 21, 22, 23, 27, 31, 39, 45, 46, 50]]) + self.main(tensor=tensor, expect_array=None, expect_lod=None, level=1) + + def main(self, tensor, expect_array, expect_lod, level=0): + place = self.place() + program = Program() + x = layers.data(name='x', shape=[10], main_program=program) + x.persistable = True + table = layers.lod_rank_table(x, level=level, main_program=program) + array = layers.lod_tensor_to_array(x, table, main_program=program) + array.persistable = True + + result = layers.array_to_lod_tensor(array, table, main_program=program) + result.persistable = True + exe = Executor(place) + scope = core.Scope() + exe.run(program, feed={'x': tensor}, scope=scope) + var = scope.find_var(array.name) + array = var.get_lod_tensor_array() + if expect_array is not None and expect_lod is not None: + self.check_array_same(array, expect_array, expect_lod) + self.check_tensor_same(scope.find_var(result.name).get_tensor(), tensor) + + def check_array_same(self, array, expect_tensor, expect_lod): + self.assertEqual(len(expect_tensor), len(array)) + for i, exp in enumerate(zip(expect_tensor, expect_lod)): + exp_tensor, exp_lod = exp + exp_tensor = numpy.expand_dims(exp_tensor, axis=1) + self.assertTrue(numpy.allclose(exp_tensor, numpy.array(array[i]))) + self.assertEqual(exp_lod, array[i].lod()) + + def check_tensor_same(self, actual, expect): + self.assertTrue( + numpy.allclose(numpy.array(actual), numpy.array(expect))) + self.assertEqual(actual.lod(), expect.lod()) + + +if __name__ == '__main__': + unittest.main()