diff --git a/cmake/generic.cmake b/cmake/generic.cmake index b9c1dde97bc444d793d67ff622fd6b13c6435a9a..404717187d08febd7c1486b31159d647f0ef3357 100644 --- a/cmake/generic.cmake +++ b/cmake/generic.cmake @@ -463,7 +463,7 @@ function(py_test TARGET_NAME) cmake_parse_arguments(py_test "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN}) add_test(NAME ${TARGET_NAME} COMMAND env PYTHONPATH=${PADDLE_PYTHON_BUILD_DIR}/lib-python - python2 ${py_test_SRCS} + ${PYTHON_EXECUTABLE} ${py_test_SRCS} WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}) endif() endfunction() diff --git a/cmake/util.cmake b/cmake/util.cmake index ad905ab55ba3537054fa5b30b5fca4d83c406702..0dc33ce385175d1e2dc454d41db467d4b9d9cf9a 100644 --- a/cmake/util.cmake +++ b/cmake/util.cmake @@ -168,17 +168,3 @@ function(create_resources res_file output_file) COMMAND python ARGS ${PADDLE_SOURCE_DIR}/cmake/make_resource.py ${res_file} ${output_file} DEPENDS ${res_file} ${PADDLE_SOURCE_DIR}/cmake/make_resource.py) endfunction() - - -# Create a python unittest using run_python_tests.sh, -# which takes care of making correct running environment -function(add_python_test TEST_NAME) - foreach(arg ${ARGN}) - get_filename_component(py_fn ${arg} NAME_WE) - set(TRG_NAME ${TEST_NAME}_${py_fn}) - add_test(NAME ${TRG_NAME} - COMMAND env PYTHONPATH=${PADDLE_PYTHON_PACKAGE_DIR} - python2 ${arg} - WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}) - endforeach() -endfunction() diff --git a/paddle/framework/CMakeLists.txt b/paddle/framework/CMakeLists.txt index c08e844847737b1172f6453767cc7f5e7b1a2bda..4b0eff3adb6fff0c9599b8613c5f19daea840674 100644 --- a/paddle/framework/CMakeLists.txt +++ b/paddle/framework/CMakeLists.txt @@ -6,7 +6,10 @@ cc_test(ddim_test SRCS ddim_test.cc DEPS ddim) nv_test(dim_test SRCS dim_test.cu DEPS ddim) cc_library(tensor SRCS tensor.cc DEPS ddim place paddle_memory device_context) + cc_test(tensor_test SRCS tensor_test.cc DEPS tensor) +cc_test(tensor_util_test SRCS tensor_util_test.cc DEPS tensor) + cc_test(eigen_test SRCS eigen_test.cc DEPS tensor) cc_library(lod_tensor SRCS lod_tensor.cc DEPS ddim place tensor framework_proto) @@ -51,10 +54,6 @@ cc_library(executor SRCS executor.cc DEPS op_registry device_context scope frame cc_library(prune SRCS prune.cc DEPS framework_proto) cc_test(prune_test SRCS prune_test.cc DEPS op_info prune recurrent_op device_context) - -cc_library(tensor_array SRCS tensor_array.cc DEPS lod_tensor) -cc_test(tensor_array_test SRCS tensor_array_test.cc DEPS tensor_array place) - cc_test(var_type_inference_test SRCS var_type_inference_test.cc DEPS op_registry proto_desc) cc_library(selected_rows SRCS selected_rows.cc DEPS tensor) diff --git a/paddle/framework/backward.cc b/paddle/framework/backward.cc index b9018ecdba8303fd6b37c87edd99e192aa604228..8fd2906107c490eee129fc10262df28bfa67800b 100644 --- a/paddle/framework/backward.cc +++ b/paddle/framework/backward.cc @@ -22,7 +22,6 @@ #include "paddle/framework/block_desc.h" #include "paddle/framework/op_registry.h" -#include "paddle/operators/dynamic_recurrent_op.h" #include "paddle/operators/net_op.h" namespace paddle { @@ -218,21 +217,6 @@ static std::unique_ptr BackwardRecursive( return false; }); - // process recurrent gradient op as a special operator. - if (forwardOp.Type() == "dynamic_recurrent") { - // NOTE clean up cycle call somewhere (RNN's stepnet constains itself), - // or this will result in infinite loop. - const auto& rnnop = - *static_cast(&forwardOp); - auto rnn_grad_op = - static_cast(grad_op.get()); - const auto& stepnet_op = - *static_cast(&rnnop.rnn.GetStepUnit()); - // create stepnet's gradient op - rnn_grad_op->rnn.SetStepUnit( - BackwardRecursive(stepnet_op, no_grad_names, grad_to_var, uniq_id)); - } - if (net->ops_.empty()) { // Current no aux op is added to network return grad_op; } @@ -522,7 +506,7 @@ ParamGradInfoMap AppendBackward( new OpDescBind("fill_constant", {}, {{"Out", {fill_one_op_out}}}, {{"shape", std::vector{1}}, {"value", static_cast(1.0)}, - {"data_type", target.GetDataType()}})); + {"dtype", target.GetDataType()}})); // infer var type of fill_one_op fill_one_op->InferVarType(root_block); diff --git a/paddle/framework/executor.cc b/paddle/framework/executor.cc index adedd8cb0e8504fd6fc924e62a2ede3c1c7ce698..2ffb5b7dbb27b561092856eac0de23d0c3788f75 100644 --- a/paddle/framework/executor.cc +++ b/paddle/framework/executor.cc @@ -120,7 +120,7 @@ void Executor::Run(const ProgramDescBind& pdesc, Scope* scope, int block_id, for (auto& op_desc : block.AllOps()) { auto op = paddle::framework::OpRegistry::CreateOp(*op_desc); - VLOG(10) << op->DebugString(); + VLOG(3) << op->DebugString(); op->Run(*local_scope, *device); } if (create_local_scope) { diff --git a/paddle/framework/lod_tensor.h b/paddle/framework/lod_tensor.h index 7f8a51cc581e759bc707e506ac7cdeb3680f40ac..21bdfca1111f16d5b8ea71be004ddb8da12fd03c 100644 --- a/paddle/framework/lod_tensor.h +++ b/paddle/framework/lod_tensor.h @@ -24,6 +24,7 @@ #include #include "paddle/framework/ddim.h" #include "paddle/framework/tensor.h" +#include "paddle/framework/tensor_util.h" #include "paddle/platform/enforce.h" #include "paddle/platform/place.h" @@ -175,9 +176,9 @@ LoDTensor LodExpand(const LoDTensor& source, const LoD& lod, size_t level, PADDLE_ENFORCE_EQ(num_instances, lod_level.size() - 1); for (size_t ins = 0; ins < num_instances; ins++) { for (size_t elem = lod_level[ins]; elem < lod_level[ins + 1]; elem++) { - tensor.Slice(elem, elem + 1) - .CopyFrom(source.Slice(ins, ins + 1), platform::CPUPlace(), - platform::CPUDeviceContext()); + auto slice = tensor.Slice(elem, elem + 1); + CopyFrom(source.Slice(ins, ins + 1), platform::CPUPlace(), + platform::CPUDeviceContext(), &slice); } } return tensor; diff --git a/paddle/framework/prune.cc b/paddle/framework/prune.cc index bf3066983cdcf44ae84f236ac72486e5d4fd5b92..da76052eb4d3067214841af72a35cebb26477e7f 100644 --- a/paddle/framework/prune.cc +++ b/paddle/framework/prune.cc @@ -26,6 +26,8 @@ namespace framework { const std::string kFeedOpType = "feed"; const std::string kFetchOpType = "fetch"; +const std::string kDropOutOpType = "dropout"; +const std::string kBatchNormOpType = "batch_norm"; bool HasDependentVar(const OpDesc& op_desc, const std::set& dependent_vars) { @@ -106,5 +108,26 @@ void Prune(const ProgramDesc& input, ProgramDesc* output) { prune_impl(input, output, 0); } +void inference_optimize_impl(const ProgramDesc& input, ProgramDesc* output, + int block_id) { + *output = input; + auto* op_field = output->mutable_blocks(block_id)->mutable_ops(); + for (auto& op_desc : *op_field) { + if (op_desc.type() == kDropOutOpType || + op_desc.type() == kBatchNormOpType) { + for (auto& attr : *op_desc.mutable_attrs()) { + if (attr.name() == "is_test") { + attr.set_b(true); + break; + } + } + } + } +} + +void InferenceOptimize(const ProgramDesc& input, ProgramDesc* output) { + inference_optimize_impl(input, output, 0); +} + } // namespace framework } // namespace paddle diff --git a/paddle/framework/prune.h b/paddle/framework/prune.h index 8cfb16343aa44dcc8a3349b01adecce33f1c2b5b..23db014894348094a98e043aa744c6f0d27b2640 100644 --- a/paddle/framework/prune.h +++ b/paddle/framework/prune.h @@ -22,5 +22,7 @@ namespace framework { void Prune(const ProgramDesc& input, ProgramDesc* output); +void InferenceOptimize(const ProgramDesc& input, ProgramDesc* output); + } // namespace framework } // namespace paddle diff --git a/paddle/framework/tensor.h b/paddle/framework/tensor.h index 28d0fcf94ec31c82476e093f93ccee222a0c9d9a..6a0c5133c9a6bb326ca51755242e75b6eb9e5474 100644 --- a/paddle/framework/tensor.h +++ b/paddle/framework/tensor.h @@ -89,34 +89,6 @@ class Tensor { /*! The internal of two tensors share the same memory block. */ inline Tensor& ShareDataWith(const Tensor& src); - /** - * @brief Copy the content of external tensor to a new place. - * - * @param[in] src The external tensor. - * @param[in] dst_place The dst place. - * @param[in] ctx The device context contains device resources. - * - * @note CopyFrom supports CPU <-> GPU, GPU <-> GPU. - */ - // TODO(qijun): https://github.com/PaddlePaddle/Paddle/issues/4647 - // Remove `CopyFrom` and `CopyFromVector` from Tensor interface - // and make them global functions - inline void CopyFrom(const Tensor& src, const platform::Place& dst_place, - const platform::DeviceContext& ctx); - - /** - * @brief Copy the content of an external vector to a tensor. - * - * @param[in] src The external tensor. - * @param[in] ctx The device context contains device resources. - * - * * @note CopyFromVector assumes that the tensor has been resized - * before invoking. - */ - template - inline void CopyFromVector(const std::vector& src, - const platform::DeviceContext& ctx); - /** * @brief Return a sub-tensor of the given tensor. * @@ -141,7 +113,6 @@ class Tensor { size_t memory_size() const; - private: inline void check_memory_size() const; private: diff --git a/paddle/framework/tensor_array.cc b/paddle/framework/tensor_array.cc deleted file mode 100644 index 0947e33548130a923e998f8bad68db00097af909..0000000000000000000000000000000000000000 --- a/paddle/framework/tensor_array.cc +++ /dev/null @@ -1,444 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - - - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. */ - -#include "paddle/framework/tensor_array.h" - -#include -#include -#include - -#include "paddle/framework/eigen.h" - -namespace paddle { -namespace framework { - -namespace detail { - -/* - * Offer an iterator over the length-sorted lod-tensor's top level. The top - * level of a lod-tensor stores batch-size of sequences, each top-level sequence - * may contains several lower-level sequences, sort top-level lod by the numbers - * of lower-level sequences in descending order, so that during RNN's running, - * the batch-size will keep decreasing, the short sentences will end at the tail - * of each batch. - * - * Let's take a simple lod-tensor for example - * - * |(0) |(1) top-level has two instances - * ||| ||||| lower-level - * - * sort by lower-level's length - * - * |(1) |(0) - * ||||| ||| - * - * when RNN runs, it get 5 batches (equals the number of elements the longest - * sequence has) - * - * ||||| - * ||| - * - * the first three batches has two elements, the last two elements just has 1 - * element each. - */ -struct DynamicBatchUnpacker { - using value_type = float; - - DynamicBatchUnpacker(const LoDTensor& source, size_t level, - bool descend = true) - : source(&source), level(level) { - BuildLengthSortedMeta(descend); - } - - LoDTensor GetBatch(size_t index); - - std::vector meta; - - LoDTensor const* source; - size_t level; - - protected: - void BuildLengthSortedMeta(bool descend); -}; - -LoDTensor PackDynamicBatch(const std::vector& source, - const std::vector& meta, const LoD& lod, - size_t level); - -std::vector GenDyBatchIndice(const DySeqMetaBatch& meta, int batch_id) { - // collect indice need to copy to the batch - std::vector indice; - for (const auto& seq : meta) { - size_t id = seq.begin + batch_id; - if (id >= seq.end) break; - indice.push_back(id); - } - return indice; -} - -} // namespace detail - -const LoDTensor& TensorArray::Read(size_t index) const { - PADDLE_ENFORCE_LE(index, MAX_SIZE, "index[%d] too large", index); - if (index >= size()) { - values_.resize(index + 1); - } - return values_[index]; -} - -void TensorArray::Write(size_t index, const LoDTensor& value) { - PADDLE_ENFORCE_LE(index, MAX_SIZE, "index[%d] too large", index); - - if (index >= size()) { - values_.resize(index + 1); - } - - values_[index].set_lod(value.lod()); - values_[index].Resize(value.dims()); - values_[index].mutable_data(value.place()); - values_[index].CopyFrom(value, value.place(), platform::CPUDeviceContext()); -} - -void TensorArray::WriteShared(size_t index, const LoDTensor& value) { - PADDLE_ENFORCE_LE(index, MAX_SIZE, "index[%d] too large", index); - if (index >= size()) { - values_.resize(index + 1); - } - - values_[index].set_lod(value.lod()); - values_[index].ShareDataWith(value); -} - -LoDTensor TensorArray::Pack(size_t level, const std::vector& meta, - const LoD& lod) const { - return detail::PackDynamicBatch(values_, meta, lod, level); -} - -DySeqMetaBatch TensorArray::Unpack(const LoDTensor& source, int level, - bool length_desend) { - detail::DynamicBatchUnpacker unpacker(source, level, - length_desend /*descend*/); - - // find max length of all the sequences - size_t max_length = 0; - for (const auto& seq : unpacker.meta) { - max_length = std::max(max_length, seq.end - seq.begin); - } - - // write batches to values - for (size_t batch_id = 0; batch_id < max_length; batch_id++) { - Write(batch_id, unpacker.GetBatch(batch_id)); - } - - PADDLE_ENFORCE(!unpacker.meta.empty()); - return unpacker.meta; -} - -LoDTensor TensorArray::LodPack(size_t level) const { - PADDLE_ENFORCE_GT(size(), 0UL, "no time step exists"); - // the levels should be no less than 2 - LoDTensor merged; - const LoDTensor *pre, *cur; - pre = &Read(0); - - for (size_t step = 1; step < size(); step++) { - cur = &Read(step); - PADDLE_ENFORCE_GT(cur->NumLevels(), 0); - PADDLE_ENFORCE_GT(pre->NumLevels(), 0); - PADDLE_ENFORCE_EQ(pre->NumLevels(), cur->NumLevels()); - PADDLE_ENFORCE_EQ(pre->NumElements(level), cur->NumElements(level)); - - merged = LodPackTwo(*pre, *cur, level); - pre = &merged; - } - return merged; -} - -/* - * NOTE currently, only the lowest level supports packing. - * The lowest LoD will be changed, while the relative offsets in levels above - * stay unchanged. - * - * previous step : [0] [1] [3] - * current step: [0 1 2] [2 3] [] - * packed to - * [0 0] [0 1] [0 2] [1 2] [1 3] [3] - */ -LoDTensor TensorArray::LodPackTwo(const LoDTensor& pre, const LoDTensor& cur, - size_t level) const { - PADDLE_ENFORCE_EQ(pre.NumLevels(), cur.NumLevels()); - PADDLE_ENFORCE_EQ(pre.NumLevels(), level + 1, - "Only the lowest LoD level supports pack temporarily."); - // calculate the result tensor's shape first - size_t num_instances = 0; - for (size_t elem = 0; elem < pre.NumElements(level); elem++) { - size_t prefix_size = pre.NumElements(level, elem); - size_t num_candidates = cur.NumElements(level, elem); - if (num_candidates > 0) { - num_instances += num_candidates * (prefix_size + 1); - } else { - num_instances += prefix_size; - } - } - - auto res_dims = pre.dims(); - res_dims[0] = num_instances; - LoDTensor result; - result.Resize(res_dims); - result.mutable_data(cur.place()); - - Vector last_lod_level; - // copy data - size_t index = 0; - last_lod_level.push_back(index); - for (size_t elem = 0; elem < pre.NumElements(level); elem++) { - size_t prefix_size = pre.NumElements(level, elem); - size_t num_candidates = cur.NumElements(level, elem); - - // slice the prefix Tensor - LoDTensor prefix = pre; - prefix.ShrinkInLevel(level, elem, elem + 1); - LoDTensor candidate = cur; - if (num_candidates > 0) { - candidate.ShrinkInLevel(level, elem, elem + 1); - } else { // just push prefix - result.Slice(index, index + prefix_size) - .CopyFrom(prefix, result.place(), platform::CPUDeviceContext()); - index += prefix_size; - last_lod_level.push_back(index); - } - for (size_t candi = 0; candi < num_candidates; candi++) { - // TODO(superjom) support GPU - result.Slice(index, index + prefix_size) - .CopyFrom(prefix, result.place(), platform::CPUDeviceContext()); - index += prefix_size; - // copy candidate record - result.Slice(index, index + 1) - .CopyFrom(candidate.Slice(candi, candi + 1), result.place(), - platform::CPUDeviceContext()); - index++; - last_lod_level.push_back(index); - } - } - - // update lod - auto lod = cur.lod(); - lod.back() = last_lod_level; - result.set_lod(lod); - return result; -} - -/* - * source [0 1 2] [3 4] [5 6 7] will be transformd to a list of LoDTensors such - * as - * [0 3 5] [1 4 6] [2 7] with 1-level LoDs: - * - [0 1 2 3] - * - [0 1 2 3] - * - [0 1 1 2], the [1,1) here means the second sequence is empty - * - * NOTE Unpack a LoDTensor in this approach may result in a big LoD. - */ -void TensorArray::LodUnpack(const LoDTensor& source, size_t level) { - PADDLE_ENFORCE_EQ(level, source.NumLevels() - 1, - "only the lowest LoD level supports unpack."); - const size_t non_empty_instances = source.dims()[0]; - size_t index = 0; - Vector lowest_lod_level; - lowest_lod_level.push_back(index); - - for (size_t step = 0; step < non_empty_instances; step++) { - size_t num_instances = 0; - for (size_t id = 0; id < source.NumElements(level); id++) { - auto instance = source; - instance.ShrinkInLevel(level, id, id + 1); - if (static_cast(instance.dims()[0]) > step) { - num_instances++; - index++; - } - lowest_lod_level.push_back(index); - } - - // create tensor for this time step - LoDTensor tensor; - auto dims = source.dims(); - dims[0] = num_instances; - // set lod - auto lod = source.lod(); - lod.back() = lowest_lod_level; - tensor.set_lod(lod); - - index = 0; - for (size_t id = 0; id < source.NumElements(level); id++) { - auto instance = source; - instance.ShrinkInLevel(level, id, id + 1); - if (static_cast(instance.dims()[0]) > step) { - // copy this instance - tensor.Slice(index, index + 1) - .CopyFrom(instance.Slice(step, step + 1), tensor.place(), - platform::CPUDeviceContext()); - index++; - } - } - Write(step, tensor); - } -} - -LoDTensor TensorArray::Stack() const { - LoDTensor result; - if (size() == 0) return result; - - const auto& first_dims = values_.front().dims(); - // check all the values have the same shape - // TODO(superjom) check the same dtypes - for (size_t idx = 1; idx < size(); idx++) { - const auto& value_dims = values_[idx].dims(); - PADDLE_ENFORCE_EQ(first_dims, value_dims); - } - - // copy - auto result_dims = vectorize(first_dims); - result_dims.insert(result_dims.begin(), size()); - result.Resize(make_ddim(result_dims)); - result.mutable_data(platform::CPUPlace()); - - for (size_t idx = 0; idx < size(); idx++) { - result.Slice(idx, idx + 1) - .CopyFrom(Read(idx), platform::CPUPlace(), - platform::CPUDeviceContext()); - } - return result; -} - -void TensorArray::Unstack(const LoDTensor& source) const { - Unstack(source, false /*data_shared*/); -} - -void TensorArray::UnstackShared(const LoDTensor& source) const { - Unstack(source, true /*data_shared*/); -} - -void TensorArray::Unstack(const LoDTensor& source, bool data_shared) const { - size_t first_dim = source.dims()[0]; - DDim value_dims = slice_ddim(source.dims(), 1, source.dims().size()); - PADDLE_ENFORCE_GT(first_dim, 0, - "source should have some data to be unstacked"); - - values_.resize(first_dim); - - for (size_t elem = 0; elem < first_dim; elem++) { - // create a new value - auto& value = values_[elem]; - if (data_shared) { - // share memory - value.ShareDataWith(source.Slice(elem, elem + 1)); - } else { - // copy - value.Resize(value_dims); - value.CopyFrom(source.Slice(elem, elem + 1), platform::CPUPlace(), - platform::CPUDeviceContext()); - } - } -} - -size_t TensorArray::size() const { return values_.size(); } - -namespace detail { - -void DynamicBatchUnpacker::BuildLengthSortedMeta(bool descend) { - PADDLE_ENFORCE(meta.empty(), "duplicate build meta"); - // collect meta for each sequence in some level - auto lod = SliceLevels(source->lod(), level, level + 1)[0]; - - for (size_t seq_id = 0; seq_id < lod.size() - 1; seq_id++) { - DySeqMeta seq_meta({lod[seq_id], lod[seq_id + 1], seq_id}); - meta.push_back(seq_meta); - } - - PADDLE_ENFORCE_GT(meta.size(), 0, "meta is empty"); - - // sort by length - sort(meta.begin(), meta.end(), - [descend](const DySeqMeta& a, const DySeqMeta& b) { - bool a_ge_b = (a.end - a.begin) > (b.end - b.begin); - return descend ? a_ge_b : !a_ge_b; - }); -} - -LoDTensor DynamicBatchUnpacker::GetBatch(size_t index) { - PADDLE_ENFORCE(!meta.empty(), "should build meta first"); - LoDTensor result; - - auto indice = detail::GenDyBatchIndice(meta, index); - PADDLE_ENFORCE(!indice.empty(), "invalid batch at %d", index); - - // copy the indice of records in LoDTensor - auto record_dims = slice_ddim(source->dims(), 1, source->dims().size()); - auto record_dims_vec = vectorize(record_dims); - record_dims_vec.insert(record_dims_vec.begin(), indice.size()); - result.Resize(make_ddim(record_dims_vec)); - result.mutable_data(platform::CPUPlace()); - - for (size_t i = 0; i < indice.size(); i++) { - auto index = indice[i]; - auto target = result.Slice(i, i + 1); - auto slice = source->Slice(index, index + 1); - - target.CopyFrom(slice, platform::CPUPlace(), platform::CPUDeviceContext()); - } - - return result; -} - -// TODO(supejom) to cache lod if reasonable -LoDTensor PackDynamicBatch(const std::vector& source, - const std::vector& meta, const LoD& lod, - size_t level) { - PADDLE_ENFORCE(!source.empty()); - PADDLE_ENFORCE(!meta.empty()); - PADDLE_ENFORCE(!lod.empty()); - - LoDTensor result; - - // init result space - auto record_dims = slice_ddim(source[0].dims(), 1, source[0].dims().size()); - auto record_dims_vec = vectorize(record_dims); - auto height = lod[level].back(); - record_dims_vec.insert(record_dims_vec.begin(), height); - result.Resize(make_ddim(record_dims_vec)); - result.mutable_data(platform::CPUPlace()); - - for (size_t batch_id = 0; batch_id < source.size(); batch_id++) { - for (size_t seq_id = 0; seq_id < meta.size(); seq_id++) { - const auto& seq_meta = meta[seq_id]; - // source is source[batch_id][seq_id] - // target is result[index] - auto index = seq_meta.begin + batch_id; - if (index >= seq_meta.end) break; - auto source_ = source[batch_id].Slice(seq_id, seq_id + 1); - auto target = result.Slice(index, index + 1); - target.CopyFrom(source_, platform::CPUPlace(), - platform::CPUDeviceContext()); - } - } - - result.set_lod(lod); - return result; -} - -} // namespace detail - -} // namespace framework -} // namespace paddle diff --git a/paddle/framework/tensor_array.h b/paddle/framework/tensor_array.h deleted file mode 100644 index 78fad8cab7e27a7f07ca542c2a083460ee9e2b79..0000000000000000000000000000000000000000 --- a/paddle/framework/tensor_array.h +++ /dev/null @@ -1,132 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. */ - -#pragma once -#include - -#include "paddle/framework/lod_tensor.h" - -namespace paddle { -namespace framework { - -/* - * DyBatchSeqPosition stores indices of the basic element in tensor. It is used - * after lod-tensor's re-assembling, its info can be used to recover the order - * in original lod-tensor. - */ -struct DySeqMeta { - DySeqMeta(size_t begin, size_t end, size_t ori_idx) - : begin(begin), end(end), ori_idx(ori_idx) {} - - size_t begin; - size_t end; // not included - size_t ori_idx; -}; - -using DySeqMetaBatch = std::vector; - -/* - * Extract the indices of instances. - */ -std::vector GenDyBatchIndice(const DySeqMetaBatch &metas, int batch_id); - -/* - * TensorArray is a C-array-like array of tensors, it is meant to be used with - * dynamic iteration primitives such as while_loop. It is used to segment inputs - * and store states in all time steps. - * - * By providing some methods similar to a C++ array, the difinition of some - * state-based dynamic models such as RNN cound be more natural and highly - * flexible. - */ -class TensorArray { - public: - using value_type = float; - - // max number of values allowed to store. - const size_t MAX_SIZE{100000}; - - /* - * Read the value at location `index` in the `TensorArray`. - */ - const LoDTensor &Read(size_t index) const; - - /* - * Write value into the index of the TensorArray. - */ - void Write(size_t index, const LoDTensor &value); - - /* - * Write value into the index of the TensorArray, with memory shared. - */ - void WriteShared(size_t index, const LoDTensor &value); - - /* - * Recover the original LoD-arranged LoDTensor with the `values`, `level` and - * `indice_map`. - */ - LoDTensor Pack(size_t level, const DySeqMetaBatch &meta, - const LoD &lod) const; - - /* - * Split LoDTensor in some `level` and write the generated batches to - * `values`, if set `desend`, will sort by length in descending order else in - * ascending order. - */ - DySeqMetaBatch Unpack(const LoDTensor &source, int level, bool length_desend); - - /* - * Pack an array of LoDTensors to a LoDTensor. - */ - LoDTensor LodPack(size_t level) const; - - /* - * Unpack a LoDTensor to an array of LoDTensors. - */ - void LodUnpack(const LoDTensor &source, size_t level); - - /* - * Pack the values into a tensor with rank one higher than each tensor in - * values. - */ - LoDTensor Stack() const; - - /* - * Unstacks the given division of a rank-`R` tensor into rank-`(R-1)` tensors. - */ - void Unstack(const LoDTensor &source) const; - - /* - * Unstacks the given division of a rank-`R` tensor into rank-`(R-1)` tensors, - * with memory of tensors shared. - */ - void UnstackShared(const LoDTensor &source) const; - - /* - * Return the number of values. - */ - size_t size() const; - - protected: - void Unstack(const LoDTensor &source, bool data_shared) const; - - LoDTensor LodPackTwo(const LoDTensor &pre, const LoDTensor &cur, - size_t level) const; - - private: - mutable std::vector values_; -}; // class TensorArray - -} // namespace framework -} // namespace paddle diff --git a/paddle/framework/tensor_array_test.cc b/paddle/framework/tensor_array_test.cc deleted file mode 100644 index 83b52b442daf9b2f1fc40f23e458fcb67c5040e8..0000000000000000000000000000000000000000 --- a/paddle/framework/tensor_array_test.cc +++ /dev/null @@ -1,182 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. */ - -#include "paddle/framework/tensor_array.h" - -#include - -namespace paddle { -namespace framework { - -class TensorArrayTester : public ::testing::Test { - protected: - void SetUp() override { - LoDTensor source; - source.Resize(make_ddim({batch_size, dim})); - int* data = source.mutable_data(platform::CPUPlace()); - for (int i = 0; i < 16 * 32; i++) { - data[i] = i; - } - ta.Unstack(source); - } - - TensorArray ta; - const int batch_size = 16; - const int dim = 32; -}; - -TEST_F(TensorArrayTester, Read) { - for (int i = 0; i < batch_size; i++) { - const auto& tensor = ta.Read(i); - ASSERT_EQ(tensor.dims()[0], 1); - ASSERT_EQ(tensor.dims()[1], dim); - } -} - -TEST_F(TensorArrayTester, Write) { - LoDTensor source; - source.Resize(make_ddim({1, dim})); - for (int i = 0; i < dim; i++) { - *(source.mutable_data(platform::CPUPlace()) + i) = i; - } - - ta.Write(2, source); - - const auto& tensor = ta.Read(2); - for (int i = 0; i < dim; i++) { - EXPECT_EQ(*(tensor.data() + i), *(source.data() + i)); - } -} - -TEST_F(TensorArrayTester, WriteShared) { - LoDTensor source; - source.Resize(make_ddim({1, dim})); - for (int i = 0; i < dim; i++) { - *(source.mutable_data(platform::CPUPlace()) + i) = i; - } - - ta.WriteShared(2, source); - - const auto& tensor = ta.Read(2); - for (int i = 0; i < dim; i++) { - EXPECT_EQ(*(tensor.data() + i), *(source.data() + i)); - } - - EXPECT_EQ(source.data(), tensor.data()); -} - -class TensorArrayPackTester : public ::testing::Test { - protected: - virtual void SetUp() override { - lod.push_back(std::vector{0, 2, 9, 13}); - - source.set_lod(lod); - source.Resize(make_ddim({13, 128})); - source.mutable_data(platform::CPUPlace()); - - // content of each setence: 0 1 2 3 4 - const auto& level = lod.front(); - for (size_t i = 0; i < level.size() - 1; i++) { - size_t begin = level[i]; - size_t end = level[i + 1]; - for (size_t j = begin; j < end; j++) { - auto record = source.Slice(j, j + 1); - for (int dim = 0; dim < 128; dim++) { - record.mutable_data(platform::CPUPlace())[dim] = j - begin; - } - } - } - - // unpack - meta = ta.Unpack(source, 0, true); - } - - LoD lod; - TensorArray ta; - LoDTensor source; - std::vector meta; -}; - -TEST_F(TensorArrayPackTester, Unpack) { - ASSERT_EQ(ta.size(), 7UL); - - const auto& t0 = ta.Read(0); - const auto& t1 = ta.Read(1); - - ASSERT_EQ(t0.data()[0], int(0)); - ASSERT_EQ(t1.data()[0], int(1)); -} - -TEST_F(TensorArrayPackTester, Pack) { - LoDTensor packed = ta.Pack(0, meta, lod); -} - -TEST_F(TensorArrayTester, size) { - ASSERT_EQ(ta.size(), static_cast(batch_size)); -} - -TEST(TensorArray, LodPack) { - // three time steps, each step stores a LoDTensors - // - [0] [1] - // - [2 3], [4 5] - // - [6 7] [] [8], [9, 10] - // try to get a LoDTensor with content: - // - [0 2 6] - // - [0 2 7] - // - [0 3] - // - [1 4 8] - // - [1 5 9] - // - [1 5 10] - std::array tensors; - tensors[0].Resize(make_ddim({2, 1})); - tensors[1].Resize(make_ddim({4, 1})); - tensors[2].Resize(make_ddim({5, 1})); - int index = 0; - for (auto& t : tensors) { - t.mutable_data(platform::CPUPlace()); - for (int i = 0; i < t.dims()[0]; i++) { - t.data()[i] = index; - index++; - } - } - - std::array lods; - std::vector> levels{ - {0, 1, 2}, {0, 2, 4}, {0, 2, 2, 3, 5}}; - for (int i = 0; i < 3; i++) { - lods[i].emplace_back(levels[i].begin(), levels[i].end()); - } - - TensorArray ta; - for (int i = 0; i < 3; i++) { - tensors[i].set_lod(lods[i]); - ta.Write(i, tensors[i]); - } - - auto merged = ta.LodPack(0); - - std::vector target_tensor_data{{0, 2, 6, // 0 - 0, 2, 7, // 1 - 0, 3, // 2 - 1, 4, 8, // 3 - 1, 5, 9, // 5 - 1, 5, 10}}; - EXPECT_EQ(merged.dims()[0], (int)target_tensor_data.size()); - for (size_t i = 0; i < target_tensor_data.size(); i++) { - EXPECT_EQ(target_tensor_data[i], merged.data()[i]); - } -} - -} // namespace framework -} // namespace paddle diff --git a/paddle/framework/tensor_impl.h b/paddle/framework/tensor_impl.h index 7e88e039611007d17156d10f852eb46f3ee8e7a3..aba1f9f09329f890ef190f8820b958c56f017e89 100644 --- a/paddle/framework/tensor_impl.h +++ b/paddle/framework/tensor_impl.h @@ -150,84 +150,6 @@ inline Tensor& Tensor::ShareDataWith(const Tensor& src) { return *this; } -inline void Tensor::CopyFrom(const Tensor& src, - const platform::Place& dst_place, - const platform::DeviceContext& ctx) { - src.check_memory_size(); - Resize(src.dims()); - - auto src_place = src.holder_->place(); - auto src_ptr = src.data(); - - auto dst_ptr = mutable_data(dst_place, src.type()); - - auto size = src.numel() * SizeOfType(src.type()); - - if (platform::is_cpu_place(src_place) && platform::is_cpu_place(dst_place)) { - memory::Copy(boost::get(dst_place), dst_ptr, - boost::get(src_place), src_ptr, size); - } -#ifdef PADDLE_WITH_CUDA - else if (platform::is_gpu_place(src_place) && - platform::is_cpu_place(dst_place)) { - auto src_gpu_place = boost::get(src_place); - auto dst_cpu_place = boost::get(dst_place); - auto ctx_place = ctx.GetPlace(); - PADDLE_ENFORCE(platform::is_gpu_place(ctx_place)); - auto ctx_gpu_place = boost::get(ctx_place); - PADDLE_ENFORCE_EQ(src_gpu_place, ctx_gpu_place); - memory::Copy( - dst_cpu_place, dst_ptr, src_gpu_place, src_ptr, size, - reinterpret_cast(ctx).stream()); - } else if (platform::is_cpu_place(src_place) && - platform::is_gpu_place(dst_place)) { - auto src_cpu_place = boost::get(src_place); - auto dst_gpu_place = boost::get(dst_place); - auto ctx_place = ctx.GetPlace(); - PADDLE_ENFORCE(platform::is_gpu_place(ctx_place)); - auto ctx_gpu_place = boost::get(ctx_place); - PADDLE_ENFORCE_EQ(dst_gpu_place, ctx_gpu_place); - memory::Copy( - dst_gpu_place, dst_ptr, src_cpu_place, src_ptr, size, - reinterpret_cast(ctx).stream()); - } else if (platform::is_gpu_place(src_place) && - platform::is_gpu_place(dst_place)) { - auto src_gpu_place = boost::get(src_place); - auto dst_gpu_place = boost::get(dst_place); - auto ctx_place = ctx.GetPlace(); - PADDLE_ENFORCE(platform::is_gpu_place(ctx_place)); - auto ctx_gpu_place = boost::get(ctx_place); - PADDLE_ENFORCE_EQ(src_gpu_place, ctx_gpu_place); - memory::Copy( - dst_gpu_place, dst_ptr, src_gpu_place, src_ptr, size, - reinterpret_cast(ctx).stream()); - } -#endif -} - -template -inline void Tensor::CopyFromVector(const std::vector& src, - const platform::DeviceContext& ctx) { - auto dst_place = ctx.GetPlace(); - auto src_ptr = static_cast(src.data()); - platform::CPUPlace src_place; - auto dst_ptr = static_cast(mutable_data(dst_place)); - auto size = src.size() * sizeof(T); - - if (platform::is_cpu_place(dst_place)) { - memory::Copy(boost::get(dst_place), dst_ptr, src_place, - src_ptr, size); - } -#ifdef PADDLE_WITH_CUDA - else if (platform::is_gpu_place(dst_place)) { - memory::Copy( - boost::get(dst_place), dst_ptr, src_place, src_ptr, - size, - reinterpret_cast(ctx).stream()); - } -#endif -} - inline Tensor Tensor::Slice(int begin_idx, int end_idx) const { check_memory_size(); PADDLE_ENFORCE_GE(begin_idx, 0, diff --git a/paddle/framework/tensor_test.cc b/paddle/framework/tensor_test.cc index 1bb0fb71b079940d35a995b78e04a531c074a8b2..ceca64365a1a628642eb374a3e3bbdff490c955a 100644 --- a/paddle/framework/tensor_test.cc +++ b/paddle/framework/tensor_test.cc @@ -188,178 +188,6 @@ TEST(Tensor, Slice) { #endif } -TEST(Tensor, CopyFrom) { - using namespace paddle::framework; - using namespace paddle::platform; - { - Tensor src_tensor; - Tensor dst_tensor; - CPUDeviceContext cpu_ctx((CPUPlace())); - - int* src_ptr = src_tensor.mutable_data(make_ddim({3, 3}), CPUPlace()); - - int arr[9] = {1, 2, 3, 4, 5, 6, 7, 8, 9}; - memcpy(src_ptr, arr, 9 * sizeof(int)); - - auto cpu_place = new paddle::platform::CPUPlace(); - dst_tensor.CopyFrom(src_tensor, *cpu_place, cpu_ctx); - - const int* dst_ptr = dst_tensor.data(); - ASSERT_NE(src_ptr, dst_ptr); - for (size_t i = 0; i < 9; ++i) { - EXPECT_EQ(src_ptr[i], dst_ptr[i]); - } - - Tensor slice_tensor = src_tensor.Slice(1, 2); - dst_tensor.CopyFrom(slice_tensor, *cpu_place, cpu_ctx); - const int* slice_ptr = slice_tensor.data(); - dst_ptr = dst_tensor.data(); - ASSERT_NE(dst_ptr, slice_ptr); - for (size_t i = 0; i < 3; ++i) { - EXPECT_EQ(dst_ptr[i], slice_ptr[i]); - } - } -#ifdef PADDLE_WITH_CUDA - { - Tensor src_tensor; - Tensor gpu_tensor; - Tensor dst_tensor; - - int* src_ptr = src_tensor.mutable_data(make_ddim({3, 3}), CPUPlace()); - - int arr[9] = {1, 2, 3, 4, 5, 6, 7, 8, 9}; - memcpy(src_ptr, arr, 9 * sizeof(int)); - - // CPU Tensor to GPU Tensor - auto gpu_place = new paddle::platform::GPUPlace(0); - CUDADeviceContext gpu_ctx(*gpu_place); - gpu_tensor.CopyFrom(src_tensor, *gpu_place, gpu_ctx); - - // GPU Tensor to CPU Tensor - auto cpu_place = new paddle::platform::CPUPlace(); - dst_tensor.CopyFrom(gpu_tensor, *cpu_place, gpu_ctx); - - // Sync before Compare Tensors - gpu_ctx.Wait(); - const int* dst_ptr = dst_tensor.data(); - ASSERT_NE(src_ptr, dst_ptr); - for (size_t i = 0; i < 9; ++i) { - EXPECT_EQ(src_ptr[i], dst_ptr[i]); - } - - Tensor slice_tensor = src_tensor.Slice(1, 2); - - // CPU Slice Tensor to GPU Tensor - gpu_tensor.CopyFrom(slice_tensor, *gpu_place, gpu_ctx); - - // GPU Tensor to CPU Tensor - dst_tensor.CopyFrom(gpu_tensor, *cpu_place, gpu_ctx); - - // Sync before Compare Slice Tensors - gpu_ctx.Wait(); - const int* slice_ptr = slice_tensor.data(); - dst_ptr = dst_tensor.data(); - ASSERT_NE(dst_ptr, slice_ptr); - for (size_t i = 0; i < 3; ++i) { - EXPECT_EQ(dst_ptr[i], slice_ptr[i]); - } - } -#endif -} - -TEST(Tensor, CopyFromVector) { - using namespace paddle::framework; - using namespace paddle::platform; - { - std::vector src_vec = {1, 2, 3, 4, 5, 6, 7, 8, 9}; - Tensor cpu_tensor; - - // Copy to CPU Tensor - cpu_tensor.Resize(make_ddim({3, 3})); - auto cpu_place = new paddle::platform::CPUPlace(); - CPUDeviceContext cpu_ctx(*cpu_place); - cpu_tensor.CopyFromVector(src_vec, cpu_ctx); - - // Compare Tensors - const int* cpu_ptr = cpu_tensor.data(); - const int* src_ptr = src_vec.data(); - ASSERT_NE(src_ptr, cpu_ptr); - for (size_t i = 0; i < 9; ++i) { - EXPECT_EQ(src_ptr[i], cpu_ptr[i]); - } - - src_vec.erase(src_vec.begin(), src_vec.begin() + 5); - cpu_tensor.Resize(make_ddim({2, 2})); - cpu_tensor.CopyFromVector(src_vec, cpu_ctx); - cpu_ptr = cpu_tensor.data(); - src_ptr = src_vec.data(); - ASSERT_NE(src_ptr, cpu_ptr); - for (size_t i = 0; i < 5; ++i) { - EXPECT_EQ(src_ptr[i], cpu_ptr[i]); - } - - delete cpu_place; - } - -#ifdef PADDLE_WITH_CUDA - { - std::vector src_vec = {1, 2, 3, 4, 5, 6, 7, 8, 9}; - Tensor cpu_tensor; - Tensor gpu_tensor; - Tensor dst_tensor; - - // Copy to CPU Tensor - cpu_tensor.Resize(make_ddim({3, 3})); - auto cpu_place = new paddle::platform::CPUPlace(); - CPUDeviceContext cpu_ctx(*cpu_place); - cpu_tensor.CopyFromVector(src_vec, cpu_ctx); - - // Copy to GPUTensor - gpu_tensor.Resize(make_ddim({3, 3})); - auto gpu_place = new paddle::platform::GPUPlace(); - CUDADeviceContext gpu_ctx(*gpu_place); - gpu_tensor.CopyFromVector(src_vec, gpu_ctx); - // Copy from GPU to CPU tensor for comparison - dst_tensor.CopyFrom(gpu_tensor, *cpu_place, gpu_ctx); - - // Sync before Compare Tensors - gpu_ctx.Wait(); - const int* src_ptr = src_vec.data(); - const int* cpu_ptr = cpu_tensor.data(); - const int* dst_ptr = dst_tensor.data(); - ASSERT_NE(src_ptr, cpu_ptr); - ASSERT_NE(src_ptr, dst_ptr); - for (size_t i = 0; i < 9; ++i) { - EXPECT_EQ(src_ptr[i], cpu_ptr[i]); - EXPECT_EQ(src_ptr[i], dst_ptr[i]); - } - - src_vec.erase(src_vec.begin(), src_vec.begin() + 5); - - cpu_tensor.Resize(make_ddim({2, 2})); - cpu_tensor.CopyFromVector(src_vec, cpu_ctx); - gpu_tensor.Resize(make_ddim({2, 2})); - gpu_tensor.CopyFromVector(src_vec, gpu_ctx); - dst_tensor.CopyFrom(gpu_tensor, *cpu_place, gpu_ctx); - - // Sync before Compare Tensors - gpu_ctx.Wait(); - src_ptr = src_vec.data(); - cpu_ptr = cpu_tensor.data(); - dst_ptr = dst_tensor.data(); - ASSERT_NE(src_ptr, cpu_ptr); - ASSERT_NE(src_ptr, dst_ptr); - for (size_t i = 0; i < 5; ++i) { - EXPECT_EQ(src_ptr[i], cpu_ptr[i]); - EXPECT_EQ(src_ptr[i], dst_ptr[i]); - } - - delete cpu_place; - delete gpu_place; - } -#endif -} - TEST(Tensor, ReshapeToMatrix) { using namespace paddle::framework; using namespace paddle::platform; diff --git a/paddle/framework/tensor_util.h b/paddle/framework/tensor_util.h new file mode 100644 index 0000000000000000000000000000000000000000..8ee2e15a59113e6d17513045e6baa58f8da9026e --- /dev/null +++ b/paddle/framework/tensor_util.h @@ -0,0 +1,153 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + +#pragma once +#include "paddle/framework/tensor.h" + +namespace paddle { +namespace framework { + +/** + * @brief Copy the content of external tensor to a new place. + * + * @param[in] src The external tensor. + * @param[in] dst_place The dst place. + * @param[in] ctx The device context contains device resources. + * + * @note CopyFrom supports CPU <-> GPU, GPU <-> GPU. + */ + +inline void CopyFrom(const Tensor& src, const platform::Place& dst_place, + const platform::DeviceContext& ctx, Tensor* dst) { + src.check_memory_size(); + + dst->Resize(src.dims()); + auto src_place = src.place(); + auto src_ptr = src.data(); + + auto dst_ptr = dst->mutable_data(dst_place, src.type()); + + auto size = src.numel() * SizeOfType(src.type()); + + if (platform::is_cpu_place(src_place) && platform::is_cpu_place(dst_place)) { + memory::Copy(boost::get(dst_place), dst_ptr, + boost::get(src_place), src_ptr, size); + } +#ifdef PADDLE_WITH_CUDA + else if (platform::is_gpu_place(src_place) && // NOLINT + platform::is_cpu_place(dst_place)) { + auto src_gpu_place = boost::get(src_place); + auto dst_cpu_place = boost::get(dst_place); + auto ctx_place = ctx.GetPlace(); + PADDLE_ENFORCE(platform::is_gpu_place(ctx_place)); + auto ctx_gpu_place = boost::get(ctx_place); + PADDLE_ENFORCE_EQ(src_gpu_place, ctx_gpu_place); + memory::Copy( + dst_cpu_place, dst_ptr, src_gpu_place, src_ptr, size, + reinterpret_cast(ctx).stream()); + } else if (platform::is_cpu_place(src_place) && + platform::is_gpu_place(dst_place)) { + auto src_cpu_place = boost::get(src_place); + auto dst_gpu_place = boost::get(dst_place); + auto ctx_place = ctx.GetPlace(); + PADDLE_ENFORCE(platform::is_gpu_place(ctx_place)); + auto ctx_gpu_place = boost::get(ctx_place); + PADDLE_ENFORCE_EQ(dst_gpu_place, ctx_gpu_place); + memory::Copy( + dst_gpu_place, dst_ptr, src_cpu_place, src_ptr, size, + reinterpret_cast(ctx).stream()); + } else if (platform::is_gpu_place(src_place) && + platform::is_gpu_place(dst_place)) { + auto src_gpu_place = boost::get(src_place); + auto dst_gpu_place = boost::get(dst_place); + auto ctx_place = ctx.GetPlace(); + PADDLE_ENFORCE(platform::is_gpu_place(ctx_place)); + auto ctx_gpu_place = boost::get(ctx_place); + PADDLE_ENFORCE_EQ(src_gpu_place, ctx_gpu_place); + memory::Copy( + dst_gpu_place, dst_ptr, src_gpu_place, src_ptr, size, + reinterpret_cast(ctx).stream()); + } +#endif +} + +/** + * @brief Copy the content of an external vector to a tensor. + * + * @param[in] src The external tensor. + * @param[in] ctx The device context contains device resources. + * + * * @note CopyFromVector assumes that the tensor has been resized + * before invoking. + */ +template +inline void CopyFromVector(const std::vector& src, + const platform::DeviceContext& ctx, Tensor* dst) { + auto dst_place = ctx.GetPlace(); + auto src_ptr = static_cast(src.data()); + platform::CPUPlace src_place; + dst->Resize({static_cast(src.size())}); + auto dst_ptr = static_cast(dst->mutable_data(dst_place)); + auto size = src.size() * sizeof(T); + + if (platform::is_cpu_place(dst_place)) { + memory::Copy(boost::get(dst_place), dst_ptr, src_place, + src_ptr, size); + } +#ifdef PADDLE_WITH_CUDA + else if (platform::is_gpu_place(dst_place)) { // NOLINT + memory::Copy( + boost::get(dst_place), dst_ptr, src_place, src_ptr, + size, + reinterpret_cast(ctx).stream()); + } +#endif +} + +/** + * @brief Copy the content of a tensor to a vector + * + * @param[in] src The external tensor. + * @param[in] ctx The device context contains device resources. + * + * * @note CopyFromVector assumes that the tensor has been resized + * before invoking. + */ +template +inline void CopyToVector(const Tensor& src, const platform::DeviceContext& ctx, + std::vector* dst) { + auto src_ptr = static_cast(src.data()); + auto size = src.numel() * sizeof(T); + + platform::CPUPlace dst_place; + dst->resize(src.numel()); + auto dst_ptr = static_cast(dst->data()); + + if (platform::is_cpu_place(src.place())) { + memory::Copy(dst_place, dst_ptr, boost::get(src.place()), + src_ptr, size); + } +#ifdef PADDLE_WITH_CUDA + else if (platform::is_gpu_place(src.place())) { // NOLINT + memory::Copy( + dst_place, dst_ptr, boost::get(src.place()), src_ptr, + size, + reinterpret_cast(ctx).stream()); + } +#endif + +} + +} // namespace framework +} // namespace paddle diff --git a/paddle/framework/tensor_util_test.cc b/paddle/framework/tensor_util_test.cc new file mode 100644 index 0000000000000000000000000000000000000000..03a70de182d0eb499a81413d38229c81c4378b91 --- /dev/null +++ b/paddle/framework/tensor_util_test.cc @@ -0,0 +1,228 @@ +/* + Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + http://www.apache.org/licenses/LICENSE-2.0 + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +#include "paddle/framework/tensor_util.h" +#include +#include + +namespace paddle { +namespace framework { +TEST(CopyFrom, Tensor) { + Tensor src_tensor; + Tensor dst_tensor; + platform::CPUDeviceContext cpu_ctx((platform::CPUPlace())); + + int* src_ptr = + src_tensor.mutable_data(make_ddim({3, 3}), platform::CPUPlace()); + + int arr[9] = {1, 2, 3, 4, 5, 6, 7, 8, 9}; + memcpy(src_ptr, arr, 9 * sizeof(int)); + + auto cpu_place = new platform::CPUPlace(); + CopyFrom(src_tensor, *cpu_place, cpu_ctx, &dst_tensor); + + const int* dst_ptr = dst_tensor.data(); + ASSERT_NE(src_ptr, dst_ptr); + for (size_t i = 0; i < 9; ++i) { + EXPECT_EQ(src_ptr[i], dst_ptr[i]); + } + + Tensor slice_tensor = src_tensor.Slice(1, 2); + CopyFrom(slice_tensor, *cpu_place, cpu_ctx, &dst_tensor); + const int* slice_ptr = slice_tensor.data(); + dst_ptr = dst_tensor.data(); + ASSERT_NE(dst_ptr, slice_ptr); + for (size_t i = 0; i < 3; ++i) { + EXPECT_EQ(dst_ptr[i], slice_ptr[i]); + } +#ifdef PADDLE_WITH_CUDA + { + Tensor src_tensor; + Tensor gpu_tensor; + Tensor dst_tensor; + + int* src_ptr = + src_tensor.mutable_data(make_ddim({3, 3}), platform::CPUPlace()); + + int arr[9] = {1, 2, 3, 4, 5, 6, 7, 8, 9}; + memcpy(src_ptr, arr, 9 * sizeof(int)); + + // CPU Tensor to GPU Tensor + auto gpu_place = new platform::GPUPlace(0); + platform::CUDADeviceContext gpu_ctx(*gpu_place); + CopyFrom(src_tensor, *gpu_place, gpu_ctx, &gpu_tensor); + + // GPU Tensor to CPU Tensor + auto cpu_place = new platform::CPUPlace(); + CopyFrom(gpu_tensor, *cpu_place, gpu_ctx, &dst_tensor); + + // Sync before Compare Tensors + gpu_ctx.Wait(); + const int* dst_ptr = dst_tensor.data(); + ASSERT_NE(src_ptr, dst_ptr); + for (size_t i = 0; i < 9; ++i) { + EXPECT_EQ(src_ptr[i], dst_ptr[i]); + } + + Tensor slice_tensor = src_tensor.Slice(1, 2); + + // CPU Slice Tensor to GPU Tensor + CopyFrom(slice_tensor, *gpu_place, gpu_ctx, &gpu_tensor); + + // GPU Tensor to CPU Tensor + CopyFrom(gpu_tensor, *cpu_place, gpu_ctx, &dst_tensor); + + // Sync before Compare Slice Tensors + gpu_ctx.Wait(); + const int* slice_ptr = slice_tensor.data(); + dst_ptr = dst_tensor.data(); + ASSERT_NE(dst_ptr, slice_ptr); + for (size_t i = 0; i < 3; ++i) { + EXPECT_EQ(dst_ptr[i], slice_ptr[i]); + } + } +#endif +} + +TEST(CopyFromVector, Tensor) { + using namespace paddle::framework; + using namespace paddle::platform; + { + std::vector src_vec = {1, 2, 3, 4, 5, 6, 7, 8, 9}; + Tensor cpu_tensor; + + // Copy to CPU Tensor + cpu_tensor.Resize(make_ddim({3, 3})); + auto cpu_place = new paddle::platform::CPUPlace(); + CPUDeviceContext cpu_ctx(*cpu_place); + CopyFromVector(src_vec, cpu_ctx, &cpu_tensor); + + // Compare Tensors + const int* cpu_ptr = cpu_tensor.data(); + const int* src_ptr = src_vec.data(); + ASSERT_NE(src_ptr, cpu_ptr); + for (size_t i = 0; i < 9; ++i) { + EXPECT_EQ(src_ptr[i], cpu_ptr[i]); + } + + src_vec.erase(src_vec.begin(), src_vec.begin() + 5); + cpu_tensor.Resize(make_ddim({2, 2})); + CopyFromVector(src_vec, cpu_ctx, &cpu_tensor); + cpu_ptr = cpu_tensor.data(); + src_ptr = src_vec.data(); + ASSERT_NE(src_ptr, cpu_ptr); + for (size_t i = 0; i < 5; ++i) { + EXPECT_EQ(src_ptr[i], cpu_ptr[i]); + } + + delete cpu_place; + } + +#ifdef PADDLE_WITH_CUDA + { + std::vector src_vec = {1, 2, 3, 4, 5, 6, 7, 8, 9}; + Tensor cpu_tensor; + Tensor gpu_tensor; + Tensor dst_tensor; + + // Copy to CPU Tensor + cpu_tensor.Resize(make_ddim({3, 3})); + auto cpu_place = new paddle::platform::CPUPlace(); + CPUDeviceContext cpu_ctx(*cpu_place); + CopyFromVector(src_vec, cpu_ctx, &cpu_tensor); + + // Copy to GPUTensor + gpu_tensor.Resize(make_ddim({3, 3})); + auto gpu_place = new paddle::platform::GPUPlace(); + CUDADeviceContext gpu_ctx(*gpu_place); + CopyFromVector(src_vec, gpu_ctx, &gpu_tensor); + // Copy from GPU to CPU tensor for comparison + CopyFrom(gpu_tensor, *cpu_place, gpu_ctx, &dst_tensor); + + // Sync before Compare Tensors + gpu_ctx.Wait(); + const int* src_ptr = src_vec.data(); + const int* cpu_ptr = cpu_tensor.data(); + const int* dst_ptr = dst_tensor.data(); + ASSERT_NE(src_ptr, cpu_ptr); + ASSERT_NE(src_ptr, dst_ptr); + for (size_t i = 0; i < 9; ++i) { + EXPECT_EQ(src_ptr[i], cpu_ptr[i]); + EXPECT_EQ(src_ptr[i], dst_ptr[i]); + } + + src_vec.erase(src_vec.begin(), src_vec.begin() + 5); + + cpu_tensor.Resize(make_ddim({2, 2})); + CopyFromVector(src_vec, cpu_ctx, &cpu_tensor); + gpu_tensor.Resize(make_ddim({2, 2})); + CopyFromVector(src_vec, gpu_ctx, &gpu_tensor); + CopyFrom(gpu_tensor, *cpu_place, gpu_ctx, &dst_tensor); + + // Sync before Compare Tensors + gpu_ctx.Wait(); + src_ptr = src_vec.data(); + cpu_ptr = cpu_tensor.data(); + dst_ptr = dst_tensor.data(); + ASSERT_NE(src_ptr, cpu_ptr); + ASSERT_NE(src_ptr, dst_ptr); + for (size_t i = 0; i < 5; ++i) { + EXPECT_EQ(src_ptr[i], cpu_ptr[i]); + EXPECT_EQ(src_ptr[i], dst_ptr[i]); + } + + delete cpu_place; + delete gpu_place; + } +#endif +} + +TEST(CopyToVector, Tensor) { + using namespace paddle::framework; + using namespace paddle::platform; + { + Tensor src; + int* src_ptr = src.mutable_data({3, 3}, CPUPlace()); + for (int i = 0; i < 3 * 3; ++i) { + src_ptr[i] = i; + } + + CPUPlace place; + CPUDeviceContext cpu_ctx(place); + std::vector dst; + CopyToVector(src, cpu_ctx, &dst); + + for (int i = 0; i < 3 * 3; ++i) { + EXPECT_EQ(src_ptr[i], dst[i]); + } + } +#ifdef PADDLE_WITH_CUDA + { + std::vector src_vec = {1, 2, 3, 4, 5, 6, 7, 8, 9}; + Tensor gpu_tensor; + GPUPlace place; + CUDADeviceContext gpu_ctx(place); + CopyFromVector(src_vec, gpu_ctx, &gpu_tensor); + + std::vector dst; + CopyToVector(gpu_tensor, gpu_ctx, &dst); + + for (int i = 0; i < 3 * 3; ++i) { + EXPECT_EQ(src_vec[i], dst[i]); + } + } +#endif +} + +} // namespace framework +} // namespace paddle diff --git a/paddle/memory/CMakeLists.txt b/paddle/memory/CMakeLists.txt index aed5275dbf9be707cc6e19e729133ba8eab58195..8841c14ee083fccfd2271efd0c331805919a09d9 100644 --- a/paddle/memory/CMakeLists.txt +++ b/paddle/memory/CMakeLists.txt @@ -1,6 +1,6 @@ add_subdirectory(detail) -cc_library(memory SRCS memory.cc DEPS place) +cc_library(memory SRCS memory.cc DEPS place enforce) cc_library(memcpy SRCS memcpy.cc) cc_library(paddle_memory diff --git a/paddle/operators/CMakeLists.txt b/paddle/operators/CMakeLists.txt index ee25abd6cb5d2963e311ad8788a31518ad0004dd..7c2d2cf213c4d804b119e8da355a76ad142c9aa6 100644 --- a/paddle/operators/CMakeLists.txt +++ b/paddle/operators/CMakeLists.txt @@ -178,7 +178,6 @@ set(DEPS_OPS cond_op cross_entropy_op recurrent_op - dynamic_recurrent_op softmax_with_cross_entropy_op softmax_op sequence_softmax_op @@ -227,13 +226,6 @@ op_library(sequence_pool_op DEPS sequence_pooling) op_library(lstm_op DEPS sequence2batch lstm_compute) op_library(conv_transpose_op DEPS vol2col) op_library(gru_op DEPS sequence2batch gru_compute) -if(WITH_TESTING) - op_library(dynamic_recurrent_op SRCS dynamic_recurrent_op.cc rnn/recurrent_op_utils.cc - DEPS net_op tensor_array gtest) -else() - op_library(dynamic_recurrent_op SRCS dynamic_recurrent_op.cc rnn/recurrent_op_utils.cc - DEPS net_op tensor_array) -endif() op_library(recurrent_op SRCS recurrent_op.cc DEPS executor) list(REMOVE_ITEM GENERAL_OPS ${DEPS_OPS}) @@ -248,9 +240,6 @@ cc_test(net_op_test SRCS net_op_test.cc DEPS net_op) cc_test(scatter_test SRCS scatter_test.cc DEPS tensor) cc_test(beam_search_decode_op_test SRCS beam_search_decode_op_test.cc DEPS lod_tensor) cc_test(strided_memcpy_test SRCS strided_memcpy_test.cc DEPS tensor paddle_memory) -cc_test(dynamic_recurrent_op_test SRCS dynamic_recurrent_op_test.cc - rnn/recurrent_op_utils.cc - DEPS dynamic_recurrent_op) if(WITH_GPU) cc_test(nccl_op_test SRCS nccl_op_test.cu.cc DEPS nccl_op gpu_info device_context) endif() diff --git a/paddle/operators/activation_op.cc b/paddle/operators/activation_op.cc index c66d575d24bb6b410602c34965ab1db6bc81b41d..154c618e8e7c4650b7f22684d3357de9c52a416c 100644 --- a/paddle/operators/activation_op.cc +++ b/paddle/operators/activation_op.cc @@ -223,6 +223,51 @@ $y = |x|$ } }; +class CeilOpMaker : public framework::OpProtoAndCheckerMaker { + public: + CeilOpMaker(framework::OpProto *proto, framework::OpAttrChecker *op_checker) + : OpProtoAndCheckerMaker(proto, op_checker) { + AddInput("X", "Input of Ceil operator"); + AddOutput("Y", "Output of Ceil operator"); + AddComment(R"DOC( +Ceil Activation Operator. + +$y = ceil(x)$ + +)DOC"); + } +}; + +class FloorOpMaker : public framework::OpProtoAndCheckerMaker { + public: + FloorOpMaker(framework::OpProto *proto, framework::OpAttrChecker *op_checker) + : OpProtoAndCheckerMaker(proto, op_checker) { + AddInput("X", "Input of Floor operator"); + AddOutput("Y", "Output of Floor operator"); + AddComment(R"DOC( +Floor Activation Operator. + +$y = floor(x)$ + +)DOC"); + } +}; + +class RoundOpMaker : public framework::OpProtoAndCheckerMaker { + public: + RoundOpMaker(framework::OpProto *proto, framework::OpAttrChecker *op_checker) + : OpProtoAndCheckerMaker(proto, op_checker) { + AddInput("X", "Input of Round operator"); + AddOutput("Y", "Output of Round operator"); + AddComment(R"DOC( +Round Activation Operator. + +$y = [x]$ + +)DOC"); + } +}; + class ReciprocalOpMaker : public framework::OpProtoAndCheckerMaker { public: ReciprocalOpMaker(framework::OpProto *proto, @@ -493,6 +538,15 @@ REGISTER_OP(sqrt, ops::ActivationOp, ops::SqrtOpMaker, sqrt_grad, REGISTER_OP(abs, ops::ActivationOp, ops::AbsOpMaker, abs_grad, ops::ActivationOpGrad); +REGISTER_OP(ceil, ops::ActivationOp, ops::CeilOpMaker, ceil_grad, + ops::ActivationOpGrad); + +REGISTER_OP(floor, ops::ActivationOp, ops::FloorOpMaker, floor_grad, + ops::ActivationOpGrad); + +REGISTER_OP(round, ops::ActivationOp, ops::RoundOpMaker, round_grad, + ops::ActivationOpGrad); + REGISTER_OP(reciprocal, ops::ActivationOp, ops::ReciprocalOpMaker, reciprocal_grad, ops::ActivationOpGrad); diff --git a/paddle/operators/activation_op.h b/paddle/operators/activation_op.h index ceb4b4e40b67473f42e67e3f02f8e012e1b1eb50..8cd3bfbbd3f8f3210f94aef3a1586c8295730c1d 100644 --- a/paddle/operators/activation_op.h +++ b/paddle/operators/activation_op.h @@ -283,6 +283,41 @@ struct SqrtGradFunctor : public BaseActivationFunctor { } }; +// ceil(x) = ceiling(x) +template +struct CeilFunctor : public BaseActivationFunctor { + template + void operator()(Device d, X x, Y y) const { + y.device(d) = x.ceil(); + } +}; + +template +struct ZeroGradFunctor : public BaseActivationFunctor { + template + void operator()(Device d, X x, Y y, dY dy, dX dx) const { + dx.device(d) = static_cast(0) / x; + } +}; + +// floor(x) = flooring(x) +template +struct FloorFunctor : public BaseActivationFunctor { + template + void operator()(Device d, X x, Y y) const { + y.device(d) = x.ceil(); + } +}; + +// round(x) = [x] +template +struct RoundFunctor : public BaseActivationFunctor { + template + void operator()(Device d, X x, Y y) const { + y.device(d) = x.round(); + } +}; + // abs(x) = |x| template struct AbsFunctor : public BaseActivationFunctor { @@ -677,6 +712,9 @@ struct HardSigmoidGradFunctor : public BaseActivationFunctor { __macro(softshrink, SoftShrinkFunctor, SoftShrinkGradFunctor); \ __macro(sqrt, SqrtFunctor, SqrtGradFunctor); \ __macro(abs, AbsFunctor, AbsGradFunctor); \ + __macro(ceil, CeilFunctor, ZeroGradFunctor); \ + __macro(floor, FloorFunctor, ZeroGradFunctor); \ + __macro(round, RoundFunctor, ZeroGradFunctor); \ __macro(reciprocal, ReciprocalFunctor, ReciprocalGradFunctor); \ __macro(log, LogFunctor, LogGradFunctor); \ __macro(square, SquareFunctor, SquareGradFunctor); \ diff --git a/paddle/operators/array_operator.h b/paddle/operators/array_operator.h index 233a81198e336d3190565fb18556f96979cec0ce..1f2b4fdb4b4a99d5baf5de1cc226dc196ab4eb2e 100644 --- a/paddle/operators/array_operator.h +++ b/paddle/operators/array_operator.h @@ -36,7 +36,7 @@ class ArrayOp : public framework::OperatorBase { if (platform::is_gpu_place(i_tensor.place())) { // FIXME: Avoid copy from GPU to CPU framework::Tensor t; - t.CopyFrom(i_tensor, platform::CPUPlace(), dev_ctx); + framework::CopyFrom(i_tensor, platform::CPUPlace(), dev_ctx, &t); dev_ctx.Wait(); offset = static_cast(*t.data()); } else { diff --git a/paddle/operators/array_to_lod_tensor_op.cc b/paddle/operators/array_to_lod_tensor_op.cc index c0903bb4e5ca7f160e19eefab99af7e3e4a8ed76..faeba7f3ed26d05de16775a1de4d42f802111207 100644 --- a/paddle/operators/array_to_lod_tensor_op.cc +++ b/paddle/operators/array_to_lod_tensor_op.cc @@ -102,8 +102,9 @@ class ArrayToLoDTensorOp : public framework::OperatorBase { if (len == 0) { continue; } - out->Slice(out_offset, out_offset + len) - .CopyFrom(x[x_idx].Slice(start_offset, end_offset), place, dev_ctx); + auto slice = out->Slice(out_offset, out_offset + len); + framework::CopyFrom(x[x_idx].Slice(start_offset, end_offset), place, + dev_ctx, &slice); out_offset += len; } } diff --git a/paddle/operators/assign_op.cc b/paddle/operators/assign_op.cc index 609e915b932e2bc4d5abee1e5f868cc07a7619d3..0a37f18729a93b15623c0a17e3689e518c38b844 100644 --- a/paddle/operators/assign_op.cc +++ b/paddle/operators/assign_op.cc @@ -43,7 +43,8 @@ class AssignFunctor { out_rows.set_rows(rows.rows()); out_rows.set_height(rows.height()); auto &t = rows.value(); - out_rows.mutable_value()->CopyFrom(t, t.place(), dev_ctx_); + auto *m = out_rows.mutable_value(); + framework::CopyFrom(t, t.place(), dev_ctx_, m); } template @@ -55,7 +56,7 @@ class AssignFunctor { void copy_tensor(const framework::LoDTensor &lod_tensor, framework::LoDTensor *out) const { auto &out_tensor = *out; - out_tensor.CopyFrom(lod_tensor, lod_tensor.place(), dev_ctx_); + CopyFrom(lod_tensor, lod_tensor.place(), dev_ctx_, &out_tensor); out_tensor.set_lod(lod_tensor.lod()); } diff --git a/paddle/operators/beam_search_decode_op.cc b/paddle/operators/beam_search_decode_op.cc index 3904a97d58166cfeeb2be7d2144700dbd8bc5721..c796a0c5d089499e7858c7a427825fdbeb05cb7f 100644 --- a/paddle/operators/beam_search_decode_op.cc +++ b/paddle/operators/beam_search_decode_op.cc @@ -17,6 +17,36 @@ limitations under the License. */ namespace paddle { namespace operators { +struct BeamSearchDecodeFunctor { + BeamSearchDecodeFunctor(const LoDTensorArray& step_ids, + const LoDTensorArray& step_scores, + LoDTensor* id_tensor, LoDTensor* score_tensor) + : step_ids_(step_ids), + step_scores_(step_scores), + id_tensor_(id_tensor), + score_tensor_(score_tensor) {} + + template + void operator()() const; + + const LoDTensorArray& step_ids_; + const LoDTensorArray& step_scores_; + LoDTensor* id_tensor_; + LoDTensor* score_tensor_; +}; + +template +void BeamSearchDecodeFunctor::operator()() const { + BeamSearchDecoder beam_search_decoder; + beam_search_decoder.PackAllSteps(step_ids_, step_scores_, id_tensor_, + score_tensor_); +} + +template <> +void BeamSearchDecodeFunctor::operator()() const { + PADDLE_THROW("beam search decode op does not support bool!"); +} + class BeamSearchDecodeOp : public framework::OperatorBase { public: BeamSearchDecodeOp(const std::string& type, @@ -45,9 +75,9 @@ class BeamSearchDecodeOp : public framework::OperatorBase { LoDTensor* sentenceIds = ctx.Output("SentenceIds"); LoDTensor* sentenceScores = ctx.Output("SentenceScores"); - BeamSearchDecoder beam_search_decoder; - beam_search_decoder.PackAllSteps(*ids, *scores, sentenceIds, - sentenceScores); + framework::VisitDataType( + framework::ToDataType(scores->at(0).type()), + BeamSearchDecodeFunctor(*ids, *scores, sentenceIds, sentenceScores)); } }; diff --git a/paddle/operators/beam_search_decode_op.h b/paddle/operators/beam_search_decode_op.h index 0f007ec22f9a66572971516a711317f348e1ec5a..3b1c6cd7a1045bfbb896725c79dc1ae2e22f43dc 100644 --- a/paddle/operators/beam_search_decode_op.h +++ b/paddle/operators/beam_search_decode_op.h @@ -232,12 +232,12 @@ void BeamSearchDecoder::ConvertSentenceVectorToLodTensor( id_tensor->set_lod(lod); id_tensor->Resize({static_cast(id_data.size())}); id_tensor->mutable_data(paddle::platform::CPUPlace()); - id_tensor->CopyFromVector(id_data, cpu_ctx); + framework::CopyFromVector(id_data, cpu_ctx, id_tensor); score_tensor->set_lod(lod); score_tensor->Resize({static_cast(score_data.size())}); score_tensor->mutable_data(paddle::platform::CPUPlace()); - score_tensor->CopyFromVector(score_data, cpu_ctx); + framework::CopyFromVector(score_data, cpu_ctx, score_tensor); } template diff --git a/paddle/operators/bilinear_tensor_product_op.cc b/paddle/operators/bilinear_tensor_product_op.cc index c65ba7eb262f3aabe2c00837b79806c0b40b60fd..c88b2c9beb4497b617078c8ac5582d2f246f43fd 100644 --- a/paddle/operators/bilinear_tensor_product_op.cc +++ b/paddle/operators/bilinear_tensor_product_op.cc @@ -77,11 +77,19 @@ class BilinearTensorProductOpMaker : public framework::OpProtoAndCheckerMaker { AddOutput("Out", "The output of bilinear_tensor_product operator."); AddComment(R"DOC( Bilinear Tensor Product operator. -Given input X and Y, a 3D tensor weight, and bias. Each column of the -output is computed by one slice i = 1, . . . , k of the tensor: - - M = (X W_i) \cdot Y - Out_i = \sum_i {M_i} + Bias_i +Given input X and Y, a 3D tensor Weight and a Bias. Each column of the +Output is computed by one slice $i = 1, . . . , k$ of the tensor: + +$$ +M = (X W_i) * Y \\ +Out_i = \sum_j {M_j} + Bias_i +$$ + +Where $W_i$ is the $i$-th slice of Input(Weight); + $M_j$ is the $j$-th column of $M$; + $Out_i$ is the $i$-th column of Output(Out); + $Bias_i$ is a column vector, each element of it is equal to + the $i$-th element of $Bias$; )DOC"); } diff --git a/paddle/operators/cast_op.cc b/paddle/operators/cast_op.cc index 70ee7861bab3a982eae60dd85b10c2e41f5827d0..3082a53ccfbe4f8666cfdfc2efed6b46ffdfede9 100644 --- a/paddle/operators/cast_op.cc +++ b/paddle/operators/cast_op.cc @@ -25,8 +25,8 @@ class CastOpProtoMaker : public framework::OpProtoAndCheckerMaker { : OpProtoAndCheckerMaker(proto, op_checker) { AddInput("X", "The input tensor of cast op"); AddOutput("Out", "The output tensor of cast op"); - AddAttr("out_data_type", "output data type"); - AddAttr("in_data_type", "input data type"); + AddAttr("out_dtype", "output data type"); + AddAttr("in_dtype", "input data type"); AddComment(R"DOC( Cast Operator. @@ -58,8 +58,8 @@ class CastOpGradMaker : public framework::SingleGradOpDescMaker { grad->SetType("cast"); grad->SetInput("X", OutputGrad("Out")); grad->SetOutput("Out", InputGrad("X")); - grad->SetAttr("out_data_type", GetAttr("in_data_type")); - grad->SetAttr("in_data_type", GetAttr("out_data_type")); + grad->SetAttr("out_dtype", GetAttr("in_dtype")); + grad->SetAttr("in_dtype", GetAttr("out_dtype")); return std::unique_ptr(grad); } }; diff --git a/paddle/operators/cast_op.h b/paddle/operators/cast_op.h index ffdbff7030afedab2efc06479ac86ad70c185f48..850dc8e3498351e54d41fcd2b6596c6fe668df14 100644 --- a/paddle/operators/cast_op.h +++ b/paddle/operators/cast_op.h @@ -55,7 +55,7 @@ class CastOpKernel : public framework::OpKernel { auto* in = context.Input("X"); auto* out = context.Output("Out"); framework::VisitDataType( - static_cast(context.Attr("out_data_type")), + static_cast(context.Attr("out_dtype")), CastOpFunctor(in, out, context.device_context())); } }; diff --git a/paddle/operators/conv_op.h b/paddle/operators/conv_op.h index fac5f1d0e25fe205f89fc7eeb9fadfd8431517d5..09bff0a68db82aa723dc08aa83c775910e17c5b8 100644 --- a/paddle/operators/conv_op.h +++ b/paddle/operators/conv_op.h @@ -38,7 +38,7 @@ inline bool IsExpand(std::vector& filter_dim, std::vector& dilations) { bool filter_1 = true, strides_1 = true, padding_0 = true, dilation_1 = true; for (size_t j = 0; j < strides.size(); ++j) { - filter_1 = filter_1 && (static_cast(filter_dim[j]) == 1); + filter_1 = filter_1 && (static_cast(filter_dim[j + 2]) == 1); strides_1 = strides_1 && (strides[j] == 1); padding_0 = padding_0 && (paddings[j] == 0); dilation_1 = dilation_1 && (dilations[j] == 1); @@ -91,32 +91,28 @@ class GemmConvKernel : public framework::OpKernel { const int batch_size = static_cast(input->dims()[0]); - // filter_shape_vec: {k_h, k_w} or {k_d, k_h, k_w} + // filter_shape_vec: {k_o, k_i, k_h, k_w} or {k_o, k_i, k_d, k_h, k_w} std::vector filter_shape_vec(framework::vectorize(filter.dims())); - filter_shape_vec.erase(filter_shape_vec.begin(), - filter_shape_vec.begin() + 2); - - // output_shape_vec: {o_h, o_w} or {o_d, o_h, o_w} + // output_shape_vec: {o_n, o_c, o_h, o_w} or {o_n, o_c, o_d, o_h, o_w} std::vector output_shape_vec(framework::vectorize(output->dims())); - output_shape_vec.erase(output_shape_vec.begin(), - output_shape_vec.begin() + 2); // use col_shape in the im2col calculation // col_shape_vec: {i_c/g, k_h, k_w, o_h, o_w} or {i_c/g, k_d, k_h, k_w, o_d, // o_h, o_w} - std::vector col_shape_vec; - col_shape_vec.push_back(input->dims()[1] / groups); - col_shape_vec.insert(col_shape_vec.end(), filter_shape_vec.begin(), - filter_shape_vec.end()); - col_shape_vec.insert(col_shape_vec.end(), output_shape_vec.begin(), - output_shape_vec.end()); + size_t data_dim = filter_shape_vec.size() - 2; + std::vector col_shape_vec(1 + 2 * data_dim); + col_shape_vec[0] = input->dims()[1] / groups; + for (size_t j = 0; j < data_dim; ++j) { + col_shape_vec[j + 1] = filter_shape_vec[j + 2]; + col_shape_vec[j + 1 + data_dim] = output_shape_vec[j + 2]; + } framework::DDim col_shape(framework::make_ddim(col_shape_vec)); // use col_matrix_shape in the gemm calculation // size: (i_c/g * k_h * k_w, o_h * o_w) or (i_c/g * k_d * k_h * k_w, o_d * // o_h * o_w) framework::DDim col_matrix_shape = - framework::flatten_to_2d(col_shape, filter_shape_vec.size() + 1); + framework::flatten_to_2d(col_shape, data_dim + 1); bool is_expand = IsExpand(filter_shape_vec, strides, paddings, dilations); Tensor col; @@ -159,13 +155,13 @@ class GemmConvKernel : public framework::OpKernel { col.ShareDataWith(in_slice); col_matrix.ShareDataWith(col); col_matrix.Resize(col_matrix_shape); - } else if (filter_shape_vec.size() == 2) { + } else if (data_dim == 2U) { // im2col im2col(context.device_context(), in_slice, dilations, strides, std::vector{paddings[0], paddings[1], paddings[0], paddings[1]}, &col); - } else if (filter_shape_vec.size() == 3) { + } else if (data_dim == 3U) { // vol2col vol2col(context.device_context(), in_slice, dilations, strides, paddings, &col); @@ -206,26 +202,22 @@ class GemmConvGradKernel : public framework::OpKernel { const int batch_size = static_cast(input->dims()[0]); - // filter_shape_vec: {k_h, k_w} or {k_d, k_h, k_w} + // filter_shape_vec: {k_o, k_i, k_h, k_w} or {k_o, k_i, k_d, k_h, k_w} std::vector filter_shape_vec(framework::vectorize(filter.dims())); - filter_shape_vec.erase(filter_shape_vec.begin(), - filter_shape_vec.begin() + 2); - - // output_shape_vec: {o_h, o_w} or {o_d, o_h, o_w} + // output_shape_vec: {o_n, o_c, o_h, o_w} or {o_n, o_c, o_d, o_h, o_w} std::vector output_shape_vec( framework::vectorize(output_grad->dims())); - output_shape_vec.erase(output_shape_vec.begin(), - output_shape_vec.begin() + 2); // use col_shape in the im2col calculation // col_shape_vec: {i_c/g, k_h, k_w, o_h, o_w} or {i_c/g, k_d, k_h, k_w, o_d, // o_h, o_w} - std::vector col_shape_vec; - col_shape_vec.push_back(input->dims()[1] / groups); - col_shape_vec.insert(col_shape_vec.end(), filter_shape_vec.begin(), - filter_shape_vec.end()); - col_shape_vec.insert(col_shape_vec.end(), output_shape_vec.begin(), - output_shape_vec.end()); + size_t data_dim = filter_shape_vec.size() - 2; + std::vector col_shape_vec(1 + 2 * data_dim); + col_shape_vec[0] = input->dims()[1] / groups; + for (size_t j = 0; j < data_dim; ++j) { + col_shape_vec[j + 1] = filter_shape_vec[j + 2]; + col_shape_vec[j + 1 + data_dim] = output_shape_vec[j + 2]; + } framework::DDim col_shape(framework::make_ddim(col_shape_vec)); // use col_matrix_shape in the gemm calculation @@ -233,7 +225,7 @@ class GemmConvGradKernel : public framework::OpKernel { // or // (i_c/g * k_d * k_h * k_w, o_d * o_h * o_w) framework::DDim col_matrix_shape = - framework::flatten_to_2d(col_shape, filter_shape_vec.size() + 1); + framework::flatten_to_2d(col_shape, data_dim + 1); framework::DDim input_shape = framework::slice_ddim( input->dims(), 1, static_cast(input->dims().size())); @@ -294,12 +286,12 @@ class GemmConvGradKernel : public framework::OpKernel { out_grad_slice, false, T(1.0), &col_matrix, T(0.0)); - if (is_expand && filter_shape_vec.size() == 2) { + if (is_expand && data_dim == 2U) { col2im(context.device_context(), col, dilations, strides, std::vector{paddings[0], paddings[1], paddings[0], paddings[1]}, &in_grad_slice); - } else if (is_expand && filter_shape_vec.size() == 3) { + } else if (is_expand && data_dim == 3U) { col2vol(context.device_context(), col, dilations, strides, paddings, &in_grad_slice); } @@ -328,12 +320,12 @@ class GemmConvGradKernel : public framework::OpKernel { col.ShareDataWith(in_slice); col_matrix.ShareDataWith(col); col_matrix.Resize(col_matrix_shape); - } else if (filter_shape_vec.size() == 2) { + } else if (data_dim == 2U) { im2col(context.device_context(), in_slice, dilations, strides, std::vector{paddings[0], paddings[1], paddings[0], paddings[1]}, &col); - } else if (filter_shape_vec.size() == 3) { + } else if (data_dim == 3U) { vol2col(context.device_context(), in_slice, dilations, strides, paddings, &col); } diff --git a/paddle/operators/conv_transpose_op.h b/paddle/operators/conv_transpose_op.h index ab336ad23ce1c180b68d04e4c85b299e301d5376..0fc0735788c499c2d520c0cc689e1ce07ba67ce8 100644 --- a/paddle/operators/conv_transpose_op.h +++ b/paddle/operators/conv_transpose_op.h @@ -68,30 +68,26 @@ class GemmConvTransposeKernel : public framework::OpKernel { const int batch_size = static_cast(input->dims()[0]); - // input_shape_vec: {h, w} or {d, h, w} + // input_shape_vec: {n, c, h, w} or {n, c, d, h, w} std::vector input_shape_vec = framework::vectorize(input->dims()); - input_shape_vec.erase(input_shape_vec.begin(), input_shape_vec.begin() + 2); - - // filter_shape_vec: {k_h, k_w} or {k_d, k_h, k_w} + // filter_shape_vec: {k_o, k_c, k_h, k_w} or {k_o, k_c, k_d, k_h, k_w} std::vector filter_shape_vec = framework::vectorize(filter.dims()); - filter_shape_vec.erase(filter_shape_vec.begin(), - filter_shape_vec.begin() + 2); // use col_shape in the im2col and col2im (or vol2col and col2vol) // calculation // col_shape_vec: {c, k_h, k_w, h, w} or {c, k_d, k_h, k_w, d, h, w} - std::vector col_shape_vec; - col_shape_vec.push_back(output->dims()[1]); - col_shape_vec.insert(col_shape_vec.end(), filter_shape_vec.begin(), - filter_shape_vec.end()); - col_shape_vec.insert(col_shape_vec.end(), input_shape_vec.begin(), - input_shape_vec.end()); + size_t data_dim = filter_shape_vec.size() - 2; + std::vector col_shape_vec(1 + 2 * data_dim); + col_shape_vec[0] = output->dims()[1]; + for (size_t j = 0; j < data_dim; ++j) { + col_shape_vec[j + 1] = filter_shape_vec[j + 2]; + col_shape_vec[j + 1 + data_dim] = input_shape_vec[j + 2]; + } DDim col_shape(framework::make_ddim(col_shape_vec)); // use col_matrix_shape in the gemm calculation // size: (c * k_h * k_w, h * w) or (c * k_d * k_h * k_w, d * h * w) - DDim col_matrix_shape = - framework::flatten_to_2d(col_shape, filter_shape_vec.size() + 1); + DDim col_matrix_shape = framework::flatten_to_2d(col_shape, data_dim + 1); Tensor col; col.mutable_data(col_shape, context.GetPlace()); @@ -136,7 +132,7 @@ class GemmConvTransposeKernel : public framework::OpKernel { input_batch, false, static_cast(1.0), &col_matrix, static_cast(0.0)); - if (filter_shape_vec.size() == 2) { + if (data_dim == 2U) { // col2im: col_matrix -> dy // from (c * k_h * k_w, h * w) to (c, o_h, o_w) col2im(context.device_context(), col, @@ -144,7 +140,7 @@ class GemmConvTransposeKernel : public framework::OpKernel { std::vector{paddings[0], paddings[1], paddings[0], paddings[1]}, &output_batch); - } else if (filter_shape_vec.size() == 3) { + } else if (data_dim == 3U) { // col2vol: col_matrix -> dy // from (c * k_d * k_h * k_w, d * h * w) to (c, o_d, o_h, o_w) col2vol(context.device_context(), col, dilations, strides, paddings, @@ -176,30 +172,26 @@ class GemmConvTransposeGradKernel : public framework::OpKernel { const int batch_size = static_cast(input->dims()[0]); - // input_shape_vec: {h, w} or {d, h, w} + // input_shape_vec: {n, c, h, w} or {n, c, d, h, w} std::vector input_shape_vec = framework::vectorize(input->dims()); - input_shape_vec.erase(input_shape_vec.begin(), input_shape_vec.begin() + 2); - - // filter_shape_vec: {k_h, k_w} or {k_d, k_h, k_w} + // filter_shape_vec: {k_o, k_c, k_h, k_w} or {k_o, k_c, k_d, k_h, k_w} std::vector filter_shape_vec = framework::vectorize(filter.dims()); - filter_shape_vec.erase(filter_shape_vec.begin(), - filter_shape_vec.begin() + 2); // use col_shape in the im2col and col2im (or vol2col and col2vol) // calculation // col_shape_vec: {c, k_h, k_w, h, w} or {c, k_d, k_h, k_w, d, h, w} - std::vector col_shape_vec; - col_shape_vec.push_back(output_grad->dims()[1]); - col_shape_vec.insert(col_shape_vec.end(), filter_shape_vec.begin(), - filter_shape_vec.end()); - col_shape_vec.insert(col_shape_vec.end(), input_shape_vec.begin(), - input_shape_vec.end()); + size_t data_dim = filter_shape_vec.size() - 2; + std::vector col_shape_vec(1 + 2 * data_dim); + col_shape_vec[0] = output_grad->dims()[1]; + for (size_t j = 0; j < data_dim; ++j) { + col_shape_vec[j + 1] = filter_shape_vec[j + 2]; + col_shape_vec[j + 1 + data_dim] = input_shape_vec[j + 2]; + } DDim col_shape(framework::make_ddim(col_shape_vec)); // use col_matrix_shape in the gemm calculation // size: (c * k_h * k_w, h * w) or (c * k_d * k_h * k_w, d * h * w) - DDim col_matrix_shape = - framework::flatten_to_2d(col_shape, filter_shape_vec.size() + 1); + DDim col_matrix_shape = framework::flatten_to_2d(col_shape, data_dim + 1); // output size: (c, o_h, o_w) or (c, o_d, o_h, o_w) DDim output_shape = framework::slice_ddim(output_grad->dims(), 1, @@ -248,7 +240,7 @@ class GemmConvTransposeGradKernel : public framework::OpKernel { Tensor output_grad_batch = output_grad->Slice(i, i + 1).Resize(output_shape); - if (filter_shape_vec.size() == 2) { + if (data_dim == 2U) { // im2col: dy -> col matrix // from (c, o_h, o_w) to (c * k_h * k_w, h * w) im2col(context.device_context(), output_grad_batch, @@ -256,7 +248,7 @@ class GemmConvTransposeGradKernel : public framework::OpKernel { std::vector{paddings[0], paddings[1], paddings[0], paddings[1]}, &col); - } else if (filter_shape_vec.size() == 3) { + } else if (data_dim == 3U) { // vol2col: dy -> col_matrix // from (c, o_d, o_h, o_w) to (c * k_d * k_h * k_w, d * h * w) vol2col(context.device_context(), output_grad_batch, dilations, diff --git a/paddle/operators/dropout_op.cc b/paddle/operators/dropout_op.cc index 818146aca766cb13b93fd024c11c1209655d9e11..932c0bf8fbf6ffdc466516bb7c8578abf0f57209 100644 --- a/paddle/operators/dropout_op.cc +++ b/paddle/operators/dropout_op.cc @@ -30,7 +30,7 @@ class DropoutOp : public framework::OperatorWithKernel { auto x_dims = ctx->GetInputDim("X"); ctx->SetOutputDim("Out", x_dims); - if (ctx->Attrs().Get("is_training") == true) { + if (ctx->Attrs().Get("is_test") == false) { ctx->SetOutputDim("Mask", x_dims); } ctx->ShareLoD("X", /*->*/ "Out"); @@ -49,7 +49,7 @@ class DropoutOpMaker : public framework::OpProtoAndCheckerMaker { AddAttr("dropout_prob", "Probability of setting units to zero.") .SetDefault(.5f); - AddAttr("is_training", "True if in training phase.").SetDefault(true); + AddAttr("is_test", "True if in test phase.").SetDefault(false); AddAttr("seed", "Dropout random seed.").SetDefault(0); AddComment(R"DOC( @@ -71,8 +71,8 @@ class DropoutOpGrad : public framework::OperatorWithKernel { using framework::OperatorWithKernel::OperatorWithKernel; void InferShape(framework::InferShapeContext* ctx) const override { - PADDLE_ENFORCE_EQ(ctx->Attrs().Get("is_training"), true, - "GradOp is only callable when is_training is true"); + PADDLE_ENFORCE_EQ(ctx->Attrs().Get("is_test"), false, + "GradOp is only callable when is_test is false"); PADDLE_ENFORCE(ctx->HasInput("X"), "Input(X) must not be null."); PADDLE_ENFORCE(ctx->HasInput("Mask"), "Mask must not be null."); diff --git a/paddle/operators/dropout_op.cu b/paddle/operators/dropout_op.cu index 30c769000f2b98c69eaa78a4c139630dd0956386..db3578b9bf4c081e431f202f0828ec6392c924b2 100644 --- a/paddle/operators/dropout_op.cu +++ b/paddle/operators/dropout_op.cu @@ -59,7 +59,7 @@ class GPUDropoutKernel : public framework::OpKernel { auto Y = EigenMatrix::Reshape(*y, 1); auto place = context.GetEigenDevice(); - if (context.Attr("is_training")) { + if (!context.Attr("is_test")) { auto* mask = context.Output("Mask"); auto* mask_data = mask->mutable_data(context.GetPlace()); int size = framework::product(mask->dims()); diff --git a/paddle/operators/dropout_op.h b/paddle/operators/dropout_op.h index 6000b75fecdff74844605215e9364ac8f8a1525a..d9a130fdc040f745b058c39221f0bb9661473388 100644 --- a/paddle/operators/dropout_op.h +++ b/paddle/operators/dropout_op.h @@ -35,7 +35,7 @@ class CPUDropoutKernel : public framework::OpKernel { auto* y_data = y->mutable_data(context.GetPlace()); float dropout_prob = context.Attr("dropout_prob"); - if (context.Attr("is_training")) { + if (!context.Attr("is_test")) { auto* mask = context.Output("Mask"); auto* mask_data = mask->mutable_data(context.GetPlace()); int seed = context.Attr("seed"); @@ -65,8 +65,8 @@ template class DropoutGradKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& context) const override { - PADDLE_ENFORCE(context.Attr("is_training"), - "GradOp is only callable when is_training is true"); + PADDLE_ENFORCE(!context.Attr("is_test"), + "GradOp is only callable when is_test is false"); auto* grad_x = context.Output(framework::GradVarName("X")); auto* grad_y = context.Input(framework::GradVarName("Out")); diff --git a/paddle/operators/dynamic_recurrent_op.cc b/paddle/operators/dynamic_recurrent_op.cc deleted file mode 100644 index d48cc4e8df587708ab93e7d788145adc01c1d3e5..0000000000000000000000000000000000000000 --- a/paddle/operators/dynamic_recurrent_op.cc +++ /dev/null @@ -1,418 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve . - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. */ - -#include "paddle/operators/dynamic_recurrent_op.h" - -#include "paddle/framework/op_registry.h" - -namespace paddle { -namespace operators { - -using framework::Scope; -using framework::TensorArray; -using framework::LoDTensor; -using framework::Variable; -using framework::OperatorBase; -using framework::DySeqMetaBatch; - -namespace detail { - -inline void CreateVariables(Scope& scope, - const std::vector& var_names) { - for (const auto& name : var_names) { - scope.Var(name); - } -} - -/* - * The inputs with sequence should be reordered when they are split, so the - * boot_states should be reordered in the same order. - * - * NOTE This may require that the `pre_state` of the first time step should just - * copy the `boot_state` rather than reference it, for that the content should - * be reordered, but the RNN op should not change the `boot_state` as an input - * variable's content. - */ -inline void ReorderInitialState(const DySeqMetaBatch& metas, - const LoDTensor& boot_state, LoDTensor* tensor, - const platform::Place& dst_place) { - for (size_t seq_id = 0; seq_id < metas.size(); seq_id++) { - auto slice = tensor->Slice(seq_id, seq_id + 1); - auto boot_slice = - boot_state.Slice(metas[seq_id].ori_idx, metas[seq_id].ori_idx + 1); - // TODO(superjom) pass in device context as an argument - slice.CopyFrom(boot_slice, dst_place, platform::CPUDeviceContext()); - } -} - -inline void RestoreInitialState(const DySeqMetaBatch& metas, - const LoDTensor& tensor, LoDTensor* boot_state, - const platform::Place& dst_place) { - for (size_t seq_id = 0; seq_id < metas.size(); seq_id++) { - auto slice = tensor.Slice(seq_id, seq_id + 1); - auto boot_slice = - boot_state->Slice(metas[seq_id].ori_idx, metas[seq_id].ori_idx + 1); - boot_slice.CopyFrom(slice, dst_place, platform::CPUDeviceContext()); - } -} - -} // namespace detail - -// Implementation for forward propagation. -template <> -void RNNAlgorithm::Run( - const framework::Scope& scope, const framework::OperatorBase& op, - const platform::DeviceContext& dev_ctx) { - SetComputeMode(ComputeMode::kForward); - cache_.Init(kArgNames[mode_], op, scope, &dev_ctx, &arg_); - SplitInputs(); - CreateScopes(); - WriteStepInputs(); - InitStates(); - WriteStepOutputs(); - RunSteps(); - ConcatOutputs(); -} - -// Implementation for backward propagation. -template <> -void RNNAlgorithm::Run( - const framework::Scope& scope, const framework::OperatorBase& op, - const platform::DeviceContext& dev_ctx) { - SetComputeMode(ComputeMode::kBackward); - cache_.Init(kArgNames[mode_], op, scope, &dev_ctx, &arg_); - SplitInputs(); - WriteStepInputs(); - InitStates(); - WriteStepOutputs(); - RunSteps(); - // copy boot-states' gradients back. - for (const auto& state : arg_.states) { - ExportInitialStateGradient(state); - } - - ConcatOutputs(); -} - -void RNNAlgorithm::SplitInputs() { - // TODO(superjom) make level a config - // TODO(superjom) check all the inputs has the same LoD - int level = 0; - for (const auto& item : cache_.inputs) { - const auto& var = item.second; - const auto& tensor = var->Get(); - TensorArray& ta = step_inputs_[item.first]; - - dy_seq_metas_[item.first] = - ta.Unpack(tensor, level, true /*length_descend*/); - - if (cache_.num_steps) { - PADDLE_ENFORCE_EQ(ta.size(), cache_.num_steps, - "inputs should have the same steps"); - } else { - cache_.num_steps = ta.size(); - } - } -} - -void RNNAlgorithm::WriteStepInputs() { - for (const auto& item : cache_.inputs) { - auto ta_it = step_inputs_.find(item.first); - PADDLE_ENFORCE(ta_it != step_inputs_.end(), - "step_inputs_ not compatible with memory set"); - TensorArray& ta = ta_it->second; - for (size_t step = 0; step < ta.size(); step++) { - auto tensor = ta.Read(step); - auto& step_scope = cache_.GetScope(step); - Variable* var = step_scope.FindVar(item.first); - if (var == nullptr) { - var = step_scope.Var(item.first); - } - var->GetMutable()->ShareDataWith(tensor); - } - } -} - -void RNNAlgorithm::WriteStepOutputs() { - // initialize step outputs - for (const auto& item : cache_.outputs) { - step_outputs_.emplace(item.first, TensorArray()); - } - PADDLE_ENFORCE_GT(step_outputs_.size(), 0UL); -} - -void RNNAlgorithm::CreateScopes() { - PADDLE_ENFORCE_GT(cache_.num_steps, 0); - // resize scopes - size_t num_scopes_need_create = cache_.num_steps - cache_.scopes->size(); - for (size_t i = 0; i < num_scopes_need_create; i++) { - cache_.scopes->emplace_back(&cache_.scope->NewScope()); - } - - // init temporary inputs - PADDLE_ENFORCE_NOT_NULL(step_unit_, "stepnet should be set first"); - std::vector states; - std::vector ex_states; - std::vector step_unit_outputs; - std::transform(arg_.states.begin(), arg_.states.end(), - std::back_inserter(states), - [](const rnn::StateAttr& m) { return m.var; }); - std::transform(arg_.states.begin(), arg_.states.end(), - std::back_inserter(ex_states), - [](const rnn::StateAttr& m) { return m.pre_var; }); - for (const auto& item : step_unit_->Outputs()) { - for (const auto& var : item.second) { - step_unit_outputs.push_back(var); - } - } - - for (size_t step = 0; step < cache_.num_steps; step++) { - auto& scope = cache_.GetScope(step); - detail::CreateVariables(scope, arg_.inlinks); - detail::CreateVariables(scope, arg_.outlinks); - detail::CreateVariables(scope, states); - detail::CreateVariables(scope, ex_states); - detail::CreateVariables(scope, step_unit_outputs); - } -} - -void RNNAlgorithm::ConcatOutputs() { - // TODO(superjom) transform this to a config - int level = 0; - for (size_t step = 0; step < cache_.num_steps; step++) { - auto& scope = cache_.GetScope(step); - for (auto& item : step_outputs_) { - auto* var = scope.FindVar(item.first); - PADDLE_ENFORCE_NOT_NULL(var); - auto* tensor = var->GetMutable(); - tensor->mutable_data(platform::CPUPlace()); - item.second.WriteShared(step, *tensor); - } - } - // the inputs' lods should be the same, so randomly get one lod. - const auto& some_lod = - cache_.scope->FindVar(arg_.inlinks.front())->Get().lod(); - const auto& some_meta = dy_seq_metas_[arg_.inlinks.front()]; - for (auto& item : step_outputs_) { - auto tensor = item.second.Pack(level, some_meta, some_lod); - auto* output = cache_.outputs[item.first]->GetMutable(); - const_cast(output)->ShareDataWith(tensor); - } -} - -void RNNAlgorithm::RunSteps() { - if (IsBackward()) { - // call stepnet in all the time steps reversely - for (int step = cache_.num_steps - 1; step >= 0; step--) { - auto& step_scope = cache_.GetScope(step); - step_unit_->Run(step_scope, *cache_.dev_ctx); - } - } else { - for (size_t step = 0; step < cache_.num_steps; step++) { - auto& step_scope = cache_.GetScope(step); - step_unit_->Run(step_scope, *cache_.dev_ctx); - } - } -} - -void RNNAlgorithm::InitStates() { - for (size_t step = 0; step < cache_.num_steps; step++) { - for (const auto& state : arg_.states) { - CreateState(state, step); - LinkState(state, step); - } - } -} - -void RNNAlgorithm::CreateState(const rnn::StateAttr& state_attr, size_t step) { - auto& scope = cache_.GetScope(step); - auto& state = *cache_.GetTensor(scope, state_attr.var); - auto& boot_state = *cache_.GetTensor(*cache_.scope, state_attr.boot_var); - - size_t num_instances = - step_inputs_[arg_.inlinks.front()].Read(step).dims()[0]; - auto dims = boot_state.dims(); - dims[0] = num_instances; - - state.Resize(dims); - state.mutable_data(platform::CPUPlace()); - states_[state_attr.var].WriteShared(step, state); -} - -void RNNAlgorithm::LinkState(const rnn::StateAttr& state, size_t step) { - auto& scope = cache_.GetScope(step); - auto& state_pre = *cache_.GetTensor(scope, state.pre_var); - - // process the first state's boot-state(the 0-step in forward mode or the - // last step in backward mode) - // Only forward mode need to link the boot-state to the `pre-state` in first - // time step. In backward mode, need to copy the gradient of `pre-state` in - // first time step to the gradient of `boot-state`. - if (step == 0 && IsForward()) { - LinkInitialState(state); - } else { - size_t num_instances = - step_inputs_[arg_.inlinks.front()].Read(step).dims()[0]; - auto* pre_state = cache_.GetTensor(cache_.GetScope(step - 1), state.var); - // shink and share from previous state - auto shrinked_pre_state = pre_state->Slice(0, num_instances); - state_pre.ShareDataWith(shrinked_pre_state); - } -} - -void RNNAlgorithm::LinkInitialState(const rnn::StateAttr& state) { - // all the step_inputs' metas should be the same, just randomly select one - // and get the dyseq meta. - const auto& some_meta = dy_seq_metas_[arg_.inlinks.front()]; - auto& scope = cache_.GetScope(0); - auto& state_pre = *cache_.GetTensor(scope, state.pre_var); - auto* pre_state = cache_.GetTensor(*cache_.scope, state.boot_var); - pre_state->mutable_data(platform::CPUPlace()); - // allocate state - state_pre.Resize(pre_state->dims()); - state_pre.mutable_data(platform::CPUPlace()); - detail::ReorderInitialState(some_meta, *pre_state, &state_pre, - pre_state->place()); -} - -void RNNAlgorithm::ExportInitialStateGradient(const rnn::StateAttr& state) { - // all the step_inputs' metas should be the same, just randomly select one - // and get the dyseq meta. - const auto& some_meta = dy_seq_metas_[arg_.inlinks.front()]; - auto& scope = cache_.GetScope(0); - - auto& state_pre = *cache_.GetTensor(scope, state.pre_var); - auto& pre_state = *cache_.GetTensor(*cache_.scope, state.boot_var); - pre_state.Resize(state_pre.dims()); - detail::RestoreInitialState(some_meta, state_pre, &pre_state, - pre_state.place()); -} - -void RNNAlgorithm::ArgCache::Init(const rnn::ArgumentName& name, - const paddle::framework::OperatorBase& op, - const paddle::framework::Scope& scope, - platform::DeviceContext const* dev_ctx, - rnn::Argument* arg) { - this->scope = &scope; - InitArgument(name, op, arg); - CacheScopes(scope, *arg); - CacheInlinks(scope, arg->inlinks); - CacheOutlinks(scope, arg->outlinks); - this->dev_ctx = dev_ctx; -} - -void RNNAlgorithm::ArgCache::InitArgument(const rnn::ArgumentName& name, - const OperatorBase& op, - rnn::Argument* arg) { - rnn::InitArgument(name, arg, op, false /*is_grad*/); -} - -void RNNAlgorithm::ArgCache::CacheScopes(const Scope& scope, - const rnn::Argument& arg) { - auto scopes_var = scope.FindVar(arg.step_scopes); - PADDLE_ENFORCE(scopes_var != nullptr, - "the step_scopes output argument [%s] should be created first " - "by framework.", - arg.step_scopes); - this->scopes = scopes_var->GetMutable>(); -} - -void RNNAlgorithm::ArgCache::CacheInlinks( - const Scope& scope, const std::vector& names) { - for (auto name : names) { - auto* var = GetVariable(scope, name); - inputs[name] = var; - } -} - -void RNNAlgorithm::ArgCache::CacheOutlinks( - const Scope& scope, const std::vector& names) { - for (auto name : names) { - auto* var = GetVariable(scope, name); - outputs[name] = var; - } -} - -Variable* RNNAlgorithm::ArgCache::GetVariable(const Scope& scope, - const std::string& name) { - auto* var = scope.FindVar(name); - PADDLE_ENFORCE_NOT_NULL(var, "variable [%s] not exist in scope", name); - return var; -} - -LoDTensor* RNNAlgorithm::ArgCache::GetTensor(const framework::Scope& scope, - const std::string& name) { - auto* var = GetVariable(scope, name); - return var->GetMutable(); -} - -const std::array RNNAlgorithm::kArgNames{ - {rnn::ArgumentName{"step_unit", "step_scopes", "inputs", "outputs", - "states", "ex_states", "initial_states"}, - rnn::ArgumentName{"step_unit", "step_scopes@GRAD", "outputs@GRAD", - "inputs@GRAD", "states", "ex_states", - "initial_states@GRAD"}}}; - -void DynamicRecurrentOp::Run(const framework::Scope& scope, - const platform::DeviceContext& dev_ctx) const { - rnn.Run( - scope, *dynamic_cast(this), dev_ctx); -} - -void DynamicRecurrentGradientOp::Run( - const Scope& scope, const platform::DeviceContext& dev_ctx) const { - rnn.Run( - scope, *dynamic_cast(this), dev_ctx); -} - -class DynamicRecurrentOpProtoAndCheckerMaker - : public framework::OpProtoAndCheckerMaker { - public: - DynamicRecurrentOpProtoAndCheckerMaker(framework::OpProto* proto, - framework::OpAttrChecker* op_checker) - : OpProtoAndCheckerMaker(proto, op_checker) { - const auto& name = - RNNAlgorithm::kArgNames[RNNAlgorithm::ComputeMode::kForward]; - // inputs and outputs stored in proto - AddInput(name.inlinks, - "The inputs that need to be segmented for each step.") - .AsDuplicable(); - AddInput(name.initial_states, "Variables to initialize the states.") - .AsDuplicable(); - - AddOutput(name.outlinks, - "The outputs that need to be concatenated for all steps.") - .AsDuplicable(); - AddOutput(name.step_scopes, "step scopes"); - - // Attributes stored in AttributeMap - AddAttr>(name.ex_states, "names of ex_states"); - AddAttr>(name.states, "names of states"); - - AddComment(R"DOC( -Dynamic Recurrent Operator. - -This is a RNN operator for varience-length sequences. - -)DOC"); - } -}; - -} // namespace operators -} // namespace paddle - -REGISTER_OP(dynamic_recurrent, paddle::operators::DynamicRecurrentOp, - paddle::operators::DynamicRecurrentOpProtoAndCheckerMaker, - dynamic_recurrent_grad, - paddle::operators::DynamicRecurrentGradientOp); diff --git a/paddle/operators/dynamic_recurrent_op.h b/paddle/operators/dynamic_recurrent_op.h deleted file mode 100644 index 5b0548c3a44c9f58838ecc567ee41a587883c26a..0000000000000000000000000000000000000000 --- a/paddle/operators/dynamic_recurrent_op.h +++ /dev/null @@ -1,233 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. */ - -#pragma once - -#ifdef PADDLE_WITH_TESTING -#include "gtest/gtest.h" -#endif - -#include "paddle/framework/lod_tensor.h" -#include "paddle/framework/operator.h" -#include "paddle/framework/tensor_array.h" -#include "paddle/framework/variable.h" -#include "paddle/operators/rnn/recurrent_op_utils.h" - -namespace paddle { -namespace operators { - -class RNNAlgorithm { - public: - enum ComputeMode { kForward = 0, kBackward = 1 }; - static const std::array kArgNames; - using value_type = float; - - /* - * Different `Run` method for forward and backward, `_` is just for template - * specifialization. - */ - template - void Run(const framework::Scope& scope, const framework::OperatorBase& op, - const platform::DeviceContext& dev_ctx); - /* - * Split the inputs(LoDTensors) to segments for each time step. - */ - void SplitInputs(); - - /* - * Create step-scopes to store temporary outputs in each time steps. - */ - void CreateScopes(); - - /* - * Link TensorArray steps to the corresponding variables located in - * step-scopes. - */ - void WriteStepInputs(); - - /* - * Write output of each step to the corresponding TensorArray. - */ - void WriteStepOutputs(); - - /* - * Initialize the states, each state will have a corresponding pre-state, - * which share the memory with the state in the previous time state. The - * pre-state in the first time step will be initialized with an zero tensor or - * a tensor in parent scope if is provided. - */ - void InitStates(); - - /* - * Create state variables for each time step. - */ - void CreateState(const rnn::StateAttr& state, size_t step); - - /* - * Link pre-state variable in current scope to the state variable in the - * previous time step (scope) by reference. - */ - void LinkState(const rnn::StateAttr& state, size_t step); - - /* - * Link the pre-state of the first time step to the `boot-state` in parent's - * scope. - */ - void LinkInitialState(const rnn::StateAttr& state); - - /* - * Copy the gradient from `pre-state` in the first step-scope to the - * `boot-state` in parent's scope. - */ - void ExportInitialStateGradient(const rnn::StateAttr& state); - - /* - * Calculate time steps. - */ - void RunSteps(); - - /* - * Concatenate outputs in each time step and generate a LoDTensor. - */ - void ConcatOutputs(); - - void SetComputeMode(ComputeMode mode) { mode_ = mode; } - bool IsForward() const { return mode_ == ComputeMode::kForward; } - bool IsBackward() const { return mode_ == ComputeMode::kBackward; } - - /* - * set a step unit that is created according to a RecurrentOp's step unit. - */ - void SetStepUnit(std::unique_ptr step_unit) { - PADDLE_ENFORCE_NOT_NULL(step_unit); - step_unit_ = std::move(step_unit); - } - const framework::OperatorBase& GetStepUnit() const { return *step_unit_; } - - const framework::TensorArray& state(const std::string& name) const { - auto it = states_.find(name); - PADDLE_ENFORCE(it != states_.end()); - return it->second; - } - const framework::TensorArray& step_input(const std::string& name) const { - auto it = step_inputs_.find(name); - PADDLE_ENFORCE(it != step_inputs_.end()); - return it->second; - } - const framework::TensorArray& step_output(const std::string& name) const { - auto it = step_outputs_.find(name); - PADDLE_ENFORCE(it != step_outputs_.end()); - return it->second; - } - - protected: - struct ArgCache { - framework::Scope const* scope; - std::vector* scopes; - std::map inputs; - std::map outputs; - platform::DeviceContext const* dev_ctx; - - size_t num_steps{0}; - - void Init(const rnn::ArgumentName& name, const framework::OperatorBase& op, - const framework::Scope& scope, - platform::DeviceContext const* dev_ctx, rnn::Argument* arg); - - framework::Scope& GetScope(size_t index) { - PADDLE_ENFORCE_LT(index, num_steps); - return *scopes->at(index); - } - - framework::LoDTensor* GetTensor(const framework::Scope& scope, - const std::string& name); - - private: - void InitArgument(const rnn::ArgumentName& name, - const framework::OperatorBase& op, rnn::Argument* arg); - void CacheScopes(const framework::Scope& scope, const rnn::Argument& arg); - void CacheInlinks(const framework::Scope& scope, - const std::vector& names); - void CacheOutlinks(const framework::Scope& scope, - const std::vector& names); - framework::Variable* GetVariable(const framework::Scope& scope, - const std::string& name); - }; - - private: - std::unique_ptr step_unit_; - std::map states_; - std::map step_inputs_; - std::map step_outputs_; - std::map> dy_seq_metas_; - rnn::Argument arg_; - ArgCache cache_; - ComputeMode mode_{ComputeMode::kForward}; - -#ifdef PADDLE_WITH_TESTING - // test forward - friend class RNNAlgorithmTestHelper; - FRIEND_TEST(RNNAlgorithmTestHelper, SplitInputs); - FRIEND_TEST(RNNAlgorithmTestHelper, CreateCache); - FRIEND_TEST(RNNAlgorithmTestHelper, CreateScopes); - FRIEND_TEST(RNNAlgorithmTestHelper, WriteStepInputs); - FRIEND_TEST(RNNAlgorithmTestHelper, WriteStepOutputs); - FRIEND_TEST(RNNAlgorithmTestHelper, InitStates); - FRIEND_TEST(RNNAlgorithmTestHelper, ConcatOutputs); -// TODO(superjom) test backward -#endif -}; - -class DynamicRecurrentOp : public framework::OperatorBase { - public: - DynamicRecurrentOp(const std::string& type, - const framework::VariableNameMap& inputs, - const framework::VariableNameMap& outputs, - const framework::AttributeMap& attrs) - : OperatorBase(type, inputs, outputs, attrs) {} - - DynamicRecurrentOp(const DynamicRecurrentOp& o) - : framework::OperatorBase( - static_cast(o)) { - PADDLE_THROW("Not implemented"); - } - - void Run(const framework::Scope& scope, - const platform::DeviceContext& dev_ctx) const override; - - mutable RNNAlgorithm rnn; -}; - -class DynamicRecurrentGradientOp : public framework::OperatorBase { - public: - DynamicRecurrentGradientOp(const std::string& type, - const framework::VariableNameMap& inputs, - const framework::VariableNameMap& outputs, - const framework::AttributeMap& attrs) - : OperatorBase(type, inputs, outputs, attrs) {} - - DynamicRecurrentGradientOp(const DynamicRecurrentGradientOp& o) - : framework::OperatorBase( - static_cast(o)) { - PADDLE_THROW("Not implemented"); - } - - void Run(const framework::Scope& scope, - const platform::DeviceContext& dev_ctx) const override; - - mutable RNNAlgorithm rnn; -}; - -} // namespace operators -} // namespace paddle diff --git a/paddle/operators/dynamic_recurrent_op_test.cc b/paddle/operators/dynamic_recurrent_op_test.cc deleted file mode 100644 index 8d840e259b190ead86a66df8ab31c5170db4d824..0000000000000000000000000000000000000000 --- a/paddle/operators/dynamic_recurrent_op_test.cc +++ /dev/null @@ -1,217 +0,0 @@ -#include "paddle/operators/dynamic_recurrent_op.h" - -#include - -#include "paddle/framework/ddim.h" -#include "paddle/framework/lod_tensor.h" -#include "paddle/framework/op_desc.h" -#include "paddle/framework/op_registry.h" -#include "paddle/operators/net_op.h" - -namespace paddle { -namespace operators { - -using framework::Scope; -using framework::TensorArray; -using framework::LoDTensor; -using framework::Variable; - -class TestOp : public framework::OperatorBase { - public: - using framework::OperatorBase::OperatorBase; - DEFINE_OP_CLONE_METHOD(TestOp); - void Run(const Scope& scope, - const platform::DeviceContext& dev_ctx) const override {} -}; - -void OpDescNewVar(const std::string& param_name, - std::initializer_list arguments, - paddle::framework::OpDesc::Var* var) { - var->set_parameter(param_name); - for (auto& arg_name : arguments) { - var->add_arguments(arg_name); - } -} - -// create a LoD tensor in scope with specific dims -LoDTensor* CreateVar(Scope& scope, std::string name, framework::DDim dims, - const platform::Place& place) { - auto* var = scope.Var(name); - auto* tensor = var->GetMutable(); - tensor->Resize(dims); - tensor->mutable_data(place); - return tensor; -} - -class RNNAlgorithmTestHelper : public ::testing::Test { - protected: - const rnn::ArgumentName argname = RNNAlgorithm::kArgNames[0]; - - virtual void SetUp() override { - CreateGlobalVariables(); - - auto op_desc = CreateOpDesc(); - op = paddle::framework::OpRegistry::CreateOp(op_desc); - dop = &(dynamic_cast(op.get())->rnn); - InitCacheManually(); - InitStepNet(); - } - - framework::OpDesc CreateOpDesc() { - // create op - paddle::framework::OpDesc op_desc; - op_desc.set_type("dynamic_recurrent"); - - OpDescNewVar(argname.inlinks, {"in0"}, op_desc.add_inputs()); - OpDescNewVar(argname.initial_states, {"boot_mem"}, op_desc.add_inputs()); - OpDescNewVar(argname.step_scopes, {"step_scopes"}, op_desc.add_outputs()); - OpDescNewVar(argname.outlinks, {"out0"}, op_desc.add_outputs()); - - // set pre-states - auto pre_memories = op_desc.mutable_attrs()->Add(); - pre_memories->set_name(argname.ex_states); - pre_memories->set_type(paddle::framework::AttrType::STRINGS); - auto pre_memories_item = pre_memories->add_strings(); - *pre_memories_item = "mem@pre"; - - // set states - auto memories = op_desc.mutable_attrs()->Add(); - memories->set_name(argname.states); - memories->set_type(paddle::framework::AttrType::STRINGS); - auto memories_item = memories->add_strings(); - *memories_item = "mem"; - return op_desc; - } - - void CreateGlobalVariables() { - platform::CPUPlace place; - scope.Var("step_scopes"); - CreateVar(scope, "boot_mem", framework::make_ddim({10, 20}), place); - CreateVar(scope, "out0", framework::make_ddim({10, 20}), place); - auto* in0 = CreateVar(scope, "in0", framework::make_ddim({10, 8}), place); - // 10 instanes with 4 sentences, length is 4, 3, 2, 1 respectively. - framework::LoD in0_lod(1); - for (int x : std::vector{0, 4, 7, 9, 10}) { - in0_lod[0].push_back(x); - } - in0->set_lod(in0_lod); - in0->Resize(framework::make_ddim({10, 8})); - // set the content, each sentence content is seqid.batchid - // the seqid starts from 0 - int start = 0; - for (size_t seqid = 0; seqid < in0_lod.size() - 1; seqid++) { - for (size_t batchid = 0; - batchid < in0_lod[0][seqid + 1] - in0_lod[0][seqid]; batchid++) { - float v = seqid + batchid * 0.1; - - for (size_t dim = 0; dim < 8; dim++) { - in0->data()[start * 8 + dim] = v; - } - start++; - } - } - } - - void InitCacheManually() { - dop->cache_.Init(RNNAlgorithm::kArgNames[0], *op, scope, &device_context, - &dop->arg_); - } - - void InitStepNet() { - std::unique_ptr stepnet{new NetOp}; - dynamic_cast(stepnet.get()) - ->AppendOp(std::unique_ptr(new TestOp( - "test", {{"inputs", {"in0"}}, {"initial_states", {"boot_mem"}}}, - {{"outputs", {"out0"}}, {"step_scopes", {"step_scopes"}}}, {}))); - dop->SetStepUnit(std::move(stepnet)); - } - - protected: - RNNAlgorithm* dop; - std::unique_ptr op; - paddle::platform::CPUDeviceContext device_context; - paddle::framework::Scope scope; -}; - -TEST_F(RNNAlgorithmTestHelper, CreateCache) { - const rnn::Argument& arg = dop->arg_; - ASSERT_EQ(arg.inlinks.size(), 1UL); - ASSERT_EQ(arg.outlinks.size(), 1UL); -} - -TEST_F(RNNAlgorithmTestHelper, SplitInputs) { - dop->SplitInputs(); - auto& in0_ta = dop->step_inputs_["in0"]; - ASSERT_EQ(in0_ta.size(), 4UL); - - const auto& batch0 = in0_ta.Read(0); - const auto& batch1 = in0_ta.Read(1); - const auto& batch2 = in0_ta.Read(2); - const auto& batch3 = in0_ta.Read(3); - EXPECT_EQ(batch0.dims()[0], 4); - EXPECT_EQ(batch1.dims()[0], 3); - EXPECT_EQ(batch2.dims()[0], 2); - EXPECT_EQ(batch3.dims()[0], 1); -} - -TEST_F(RNNAlgorithmTestHelper, CreateScopes) { - dop->SplitInputs(); - dop->CreateScopes(); - ASSERT_EQ(dop->cache_.num_steps, 4UL); - ASSERT_EQ(dop->cache_.scopes->size(), 4UL); -} - -TEST_F(RNNAlgorithmTestHelper, WriteStepInputs) { - dop->SplitInputs(); - dop->CreateScopes(); - dop->WriteStepInputs(); - - for (size_t step = 0; step < dop->cache_.num_steps; step++) { - auto& scope = dop->cache_.GetScope(step); - for (auto name : std::vector({"in0"})) { - ASSERT_TRUE(scope.FindVar(name) != nullptr); - } - } -} - -TEST_F(RNNAlgorithmTestHelper, WriteStepOutputs) { - dop->SplitInputs(); - dop->CreateScopes(); - dop->WriteStepInputs(); - dop->WriteStepOutputs(); - - for (size_t step = 0; step < dop->cache_.num_steps; step++) { - auto& scope = dop->cache_.GetScope(step); - for (auto name : std::vector({"out0"})) { - ASSERT_TRUE(scope.FindVar(name)); - } - } -} - -TEST_F(RNNAlgorithmTestHelper, ConcatOutputs) { - // Let's leave this test to python unittest. -} - -TEST_F(RNNAlgorithmTestHelper, InitStates) { - dop->SetComputeMode(RNNAlgorithm::ComputeMode::kForward); - dop->SplitInputs(); - dop->CreateScopes(); - dop->WriteStepInputs(); - dop->WriteStepOutputs(); - dop->InitStates(); - - for (size_t step = 0; step < dop->cache_.num_steps; step++) { - auto& scope = dop->cache_.GetScope(step); - auto state = scope.FindVar("mem"); - ASSERT_TRUE(state != nullptr); - - auto* pre_state = scope.FindVar("mem@pre"); - ASSERT_TRUE(pre_state != nullptr); - - auto* boot_state = scope.FindVar("boot_mem"); - ASSERT_TRUE(boot_state != nullptr); - } -} - -} // operators -} // namespace paddle diff --git a/paddle/operators/expand_op.h b/paddle/operators/expand_op.h index 8ae2c11a5d31dafc1b90d129054ebfabfb761bfe..4d7996ad1e744fead1329c35ce6ea43bf0683ce6 100644 --- a/paddle/operators/expand_op.h +++ b/paddle/operators/expand_op.h @@ -125,7 +125,8 @@ class ExpandGradKernel : public framework::OpKernel { auto* in0 = context.Input(framework::GradVarName("Out")); auto* out0 = context.Output(framework::GradVarName("X")); out0->mutable_data(context.GetPlace()); - out0->CopyFrom(*in0, context.GetPlace(), context.device_context()); + framework::CopyFrom(*in0, context.GetPlace(), context.device_context(), + out0); } else { switch (dims) { REP_EXPAND_GRAD_TEMPLATE(72) diff --git a/paddle/operators/feed_op.cc b/paddle/operators/feed_op.cc index 0dd84cbeaafbafd45132b0a0b744554ce7475411..ee43c22fb13e203c7de1a7e6d1586423fcbfb25a 100644 --- a/paddle/operators/feed_op.cc +++ b/paddle/operators/feed_op.cc @@ -47,7 +47,7 @@ class FeedOp : public framework::OperatorBase { auto &feed_list = feed_var->Get(); auto &feed_item = feed_list.at(static_cast(col)); auto *out_item = out_var->GetMutable(); - out_item->CopyFrom(feed_item, dev_ctx.GetPlace(), dev_ctx); + framework::CopyFrom(feed_item, dev_ctx.GetPlace(), dev_ctx, out_item); out_item->set_lod(feed_item.lod()); } }; diff --git a/paddle/operators/fetch_op.cc b/paddle/operators/fetch_op.cc index 8108ae69dec4bafd1c04d5ab05eef6f467d4c6e8..1ae07194c235ce6724f59c9c60df80f957787cda 100644 --- a/paddle/operators/fetch_op.cc +++ b/paddle/operators/fetch_op.cc @@ -51,7 +51,7 @@ class FetchOp : public framework::OperatorBase { // FIXME(yuyang18): Should we assume the fetch operator always generate // CPU outputs? - dst_item.CopyFrom(src_item, platform::CPUPlace(), dev_ctx); + CopyFrom(src_item, platform::CPUPlace(), dev_ctx, &dst_item); dev_ctx.Wait(); dst_item.set_lod(src_item.lod()); diff --git a/paddle/operators/fill_constant_batch_size_like_op.cc b/paddle/operators/fill_constant_batch_size_like_op.cc index 985b5d1e865e513d833bff72dcd20a8f20851d8c..892922cd3aaec8bf8194320c5c3a0dd0365bb589 100644 --- a/paddle/operators/fill_constant_batch_size_like_op.cc +++ b/paddle/operators/fill_constant_batch_size_like_op.cc @@ -52,7 +52,7 @@ class FillConstantBatchSizeLikeOp : public framework::OperatorWithKernel { framework::OpKernelType GetKernelType( const framework::ExecutionContext &ctx) const override { return framework::OpKernelType( - static_cast(ctx.Attr("data_type")), + static_cast(ctx.Attr("dtype")), ctx.device_context()); } }; @@ -63,7 +63,7 @@ class FillConstantBatchSizeLikeOpMaker FillConstantBatchSizeLikeOpMaker(framework::OpProto *proto, framework::OpAttrChecker *op_checker) : framework::OpProtoAndCheckerMaker(proto, op_checker) { - AddAttr("data_type", + AddAttr("dtype", "(int, default 5 (FP32)) " "Output data type") .SetDefault(framework::DataType::FP32); diff --git a/paddle/operators/fill_constant_op.cc b/paddle/operators/fill_constant_op.cc index 818f113b90a4c239a857791fb9957e51d3287b97..3d5f84bc239615797a5cf01a74150fdb7dfc1b80 100644 --- a/paddle/operators/fill_constant_op.cc +++ b/paddle/operators/fill_constant_op.cc @@ -34,7 +34,7 @@ class FillConstantOp : public framework::OperatorBase { using framework::OperatorBase::OperatorBase; void Run(const framework::Scope &scope, const platform::DeviceContext &dev_ctx) const override { - auto data_type = static_cast(Attr("data_type")); + auto data_type = static_cast(Attr("dtype")); auto value = Attr("value"); auto force_cpu = Attr("force_cpu"); auto &out = @@ -55,7 +55,7 @@ class FillConstantOpMaker : public framework::OpProtoAndCheckerMaker { FillConstantOpMaker(framework::OpProto *proto, framework::OpAttrChecker *op_checker) : framework::OpProtoAndCheckerMaker(proto, op_checker) { - AddAttr("data_type", + AddAttr("dtype", "(int, default 5 (FP32)) " "Output data type") .SetDefault(framework::DataType::FP32); diff --git a/paddle/operators/gaussian_random_op.cc b/paddle/operators/gaussian_random_op.cc index 53ad86c6c48d1868f4495af51661d91b39a84f0b..254c83e1378a121d99c89d9d8705935b5f06edc8 100644 --- a/paddle/operators/gaussian_random_op.cc +++ b/paddle/operators/gaussian_random_op.cc @@ -60,7 +60,7 @@ class GaussianRandomOp : public framework::OperatorWithKernel { framework::OpKernelType GetKernelType( const framework::ExecutionContext& ctx) const override { return framework::OpKernelType( - static_cast(ctx.Attr("data_type")), + static_cast(ctx.Attr("dtype")), ctx.device_context()); } }; @@ -88,7 +88,7 @@ class GaussianRandomOpMaker : public framework::OpProtoAndCheckerMaker { "Random seed of generator." "0 means use system wide seed.") .SetDefault(0); - AddAttr("data_type", + AddAttr("dtype", "(int, default 5(FP32)) " "Output data type.") .SetDefault(framework::DataType::FP32); diff --git a/paddle/operators/gru_unit_op.h b/paddle/operators/gru_unit_op.h index 050430d3252d05236219cd5ced5a792c21413c1f..3398c0934e250cfc292776d08773204bb9b4d87e 100644 --- a/paddle/operators/gru_unit_op.h +++ b/paddle/operators/gru_unit_op.h @@ -28,6 +28,10 @@ template using EigenMatrix = framework::EigenMatrix; +template +using EigenVector = framework::EigenVector; + enum GRUActivationType { identity = 0, sigmoid = 1, tanh = 2, relu = 3 }; template @@ -226,7 +230,7 @@ class GRUUnitGradKernel : public framework::OpKernel { // backward for bias if (bias_grad) { bias_grad->mutable_data(context.GetPlace()); - auto d_b = EigenMatrix::From(*bias_grad); + auto d_b = EigenVector::Flatten(*bias_grad); d_b.device(place) = d_g.sum(Eigen::array({{0}})); } } diff --git a/paddle/operators/huber_loss_op.cc b/paddle/operators/huber_loss_op.cc index 3435e74b0afb470fcbd1c0f4e06ad363352cac00..938803d5b36177c782fe40bc34fd92504e5bbf7b 100644 --- a/paddle/operators/huber_loss_op.cc +++ b/paddle/operators/huber_loss_op.cc @@ -70,11 +70,18 @@ input value and Y as the target value. Huber loss can evaluate the fitness of X to Y. Different from MSE loss, Huber loss is more robust for outliers. The shape of X and Y are [batch_size, 1]. The equation is: -L_{\delta}(y, f(x)) = +$$ +Out_{\delta}(X, Y)_i = \begin{cases} -0.5 * (y - f(x))^2, \quad |y - f(x)| \leq \delta \\ -\delta * (|y - f(x)| - 0.5 * \delta), \quad otherwise +0.5 * (Y_i - X_i)^2, +\quad |Y_i - X_i| \leq \delta \\ +\delta * (|Y_i - X_i| - 0.5 * \delta), +\quad otherwise \end{cases} +$$ + +In the above equation, $Out_\delta(X, Y)_i$, $X_i$ and $Y_i$ represent the ith +element of Out, X and Y. )DOC"); } diff --git a/paddle/operators/linear_chain_crf_op.cc b/paddle/operators/linear_chain_crf_op.cc index 066bdf67aa037e9c25cfdfaff7ec8771eb59cde8..8e079a14e0a15e8ff803b6087e6b0b02083479ef 100644 --- a/paddle/operators/linear_chain_crf_op.cc +++ b/paddle/operators/linear_chain_crf_op.cc @@ -32,19 +32,19 @@ class LinearChainCRFOpMaker : public framework::OpProtoAndCheckerMaker { "[(D + 2) x D]. The learnable parameter for the linear_chain_crf " "operator. See more details in the operator's comments."); AddInput("Label", - "(LoDTensor, default LoDTensor) A LoDTensor with shape " + "(LoDTensor, default LoDTensor) A LoDTensor with shape " "[N x 1], where N is the total element number in a mini-batch. " "The ground truth."); AddOutput( "Alpha", "(Tensor, default Tensor) A 2-D Tensor with shape [N x D]. " - "The forward vectors for the entire batch. Denote it as \f$\alpha\f$. " - "\f$\alpha$\f is a memo table used to calculate the normalization " - "factor in CRF. \f$\alpha[k, v]$\f stores the unnormalized " + "The forward vectors for the entire batch. Denote it as $\alpha$. " + "$\alpha$ is a memo table used to calculate the normalization " + "factor in CRF. $\alpha[k, v]$ stores the unnormalized " "probabilites of all possible unfinished sequences of tags that end at " - "position \f$k$\f with tag \f$v$\f. For each \f$k$\f, " - "\f$\alpha[k, v]$\f is a vector of length \f$D$\f with a component for " - "each tag value \f$v$\f. This vector is called a forward vecotr and " + "position $k$ with tag $v$. For each $k$, " + "$\alpha[k, v]$ is a vector of length $D$ with a component for " + "each tag value $v$. This vector is called a forward vecotr and " "will also be used in backward computations.") .AsIntermediate(); AddOutput( @@ -73,9 +73,9 @@ LinearChainCRF Operator. Conditional Random Field defines an undirected probabilistic graph with nodes denoting random variables and edges denoting dependencies between these -variables. CRF learns the conditional probability \f$P(Y|X)\f$, where -\f$X = (x_1, x_2, ... , x_n)\f$ are structured inputs and -\f$Y = (y_1, y_2, ... , y_n)\f$ are labels for the inputs. +variables. CRF learns the conditional probability $P(Y|X)$, where +$X = (x_1, x_2, ... , x_n)$ are structured inputs and +$Y = (y_1, y_2, ... , y_n)$ are labels for the inputs. Linear chain CRF is a special case of CRF that is useful for sequence labeling task. Sequence labeling tasks do not assume a lot of conditional @@ -88,21 +88,22 @@ CRF. Please refer to http://www.cs.columbia.edu/~mcollins/fb.pdf and http://cseweb.ucsd.edu/~elkan/250Bwinter2012/loglinearCRFs.pdf for details. Equation: -1. Denote Input(Emission) to this operator as \f$x\f$ here. +1. Denote Input(Emission) to this operator as $x$ here. 2. The first D values of Input(Transition) to this operator are for starting -weights, denoted as \f$a\f$ here. +weights, denoted as $a$ here. 3. The next D values of Input(Transition) of this operator are for ending -weights, denoted as \f$b\f$ here. +weights, denoted as $b$ here. 4. The remaning values of Input(Transition) are for transition weights, -denoted as \f$w\f$ here. -5. Denote Input(Label) as \f$s\f$ here. - -The probability of a sequence \f$s\f$ of length \f$L\f$ is defined as: -\f$P(s) = (1/Z) \exp(a_{s_1} + b_{s_L} - + \sum_{l=1}^L x_{s_l} - + \sum_{l=2}^L w_{s_{l-1},s_l})\f$ -where \f$Z\f$ is a normalization value so that the sum of \f$P(s)\f$ over -all possible sequences is \f$1\f$, and \f$x\f$ is the emission feature weight +denoted as $w$ here. +5. Denote Input(Label) as $s$ here. + +The probability of a sequence $s$ of length $L$ is defined as: +$$P(s) = (1/Z) \exp(a_{s_1} + b_{s_L} + + \sum_{l=1}^L x_{s_l} + + \sum_{l=2}^L w_{s_{l-1},s_l})$$ + +where $Z$ is a normalization value so that the sum of $P(s)$ over +all possible sequences is 1, and $x$ is the emission feature weight to the linear chain CRF. Finally, the linear chain CRF operator outputs the logarithm of the conditional diff --git a/paddle/operators/linear_chain_crf_op.h b/paddle/operators/linear_chain_crf_op.h index 872f659fed40d7479d9d8bed6c8469fb28282253..014bbfa7580011e38a2f546e30d1e584965a7815 100644 --- a/paddle/operators/linear_chain_crf_op.h +++ b/paddle/operators/linear_chain_crf_op.h @@ -195,7 +195,7 @@ class LinearChainCRFOpKernel : public framework::OpKernel { auto copyLoDTensor = [](const platform::DeviceContext& ctx, const LoDTensor& src, LoDTensor* dst) { dst->mutable_data(src.dims(), platform::CPUPlace()); - dst->CopyFrom(src, platform::CPUPlace(), ctx); + framework::CopyFrom(src, platform::CPUPlace(), ctx, dst); }; copyLoDTensor(ctx, emission_weights_src, emission_weights_dst); @@ -203,8 +203,8 @@ class LinearChainCRFOpKernel : public framework::OpKernel { transition_weights_dst->mutable_data(transition_weights_src.dims(), platform::CPUPlace()); - transition_weights_dst->CopyFrom(transition_weights_src, - platform::CPUPlace(), ctx); + framework::CopyFrom(transition_weights_src, platform::CPUPlace(), ctx, + transition_weights_dst); } void CopyOutputsToGpuMemory(const platform::DeviceContext& ctx, @@ -219,7 +219,7 @@ class LinearChainCRFOpKernel : public framework::OpKernel { auto copyTensor = [](const platform::DeviceContext& ctx, const Tensor& src, Tensor* dst) { dst->mutable_data(platform::GPUPlace()); - dst->CopyFrom(src, platform::GPUPlace(), ctx); + framework::CopyFrom(src, platform::GPUPlace(), ctx, dst); }; copyTensor(ctx, emission_exps_src, emission_exps_dst); copyTensor(ctx, transition_exps_src, transition_exps_dst); @@ -410,12 +410,12 @@ class LinearChainCRFGradOpKernel : public framework::OpKernel { // Copy the inputs from GPU memory to CPU memory when this operators runs on // GPU device. label_dst->mutable_data(label_src.dims(), platform::CPUPlace()); - label_dst->CopyFrom(label_src, platform::CPUPlace(), ctx); + framework::CopyFrom(label_src, platform::CPUPlace(), ctx, label_dst); auto copyTensor = [](const platform::DeviceContext& ctx, const Tensor& src, Tensor* dst) { dst->mutable_data(src.dims(), platform::CPUPlace()); - dst->CopyFrom(src, platform::CPUPlace(), ctx); + framework::CopyFrom(src, platform::CPUPlace(), ctx, dst); }; copyTensor(ctx, emission_exps_src, emission_exps_dst); copyTensor(ctx, transition_exps_src, transition_exps_dst); @@ -434,7 +434,7 @@ class LinearChainCRFGradOpKernel : public framework::OpKernel { Tensor* dst) { if (src && dst) { dst->mutable_data(platform::GPUPlace()); - dst->CopyFrom(*src, platform::GPUPlace(), ctx); + framework::CopyFrom(*src, platform::GPUPlace(), ctx, dst); } }; copyTensor(ctx, emission_grad_src, emission_grad_dst); diff --git a/paddle/operators/load_op.cc b/paddle/operators/load_op.cc index b71a33a6b1ce80b545e6d7a4020dafc941dc55d2..b0838eed1611c1d51e57fc2300606f753982dc89 100644 --- a/paddle/operators/load_op.cc +++ b/paddle/operators/load_op.cc @@ -105,7 +105,7 @@ class LoadOp : public framework::OperatorBase { out_var->Clear(); tensor = out_var->GetMutable(); tensor->set_lod(cpu_tensor.lod()); - tensor->CopyFrom(cpu_tensor, place, dev_ctx); + CopyFrom(cpu_tensor, place, dev_ctx, tensor); } } }; diff --git a/paddle/operators/lod_reset_op.h b/paddle/operators/lod_reset_op.h index 2bb916ccee80c83a02ea429fe95f5fafc86ccfa6..cbcbf80adc3cf68f9eb28bbe2a69168cc8798347 100644 --- a/paddle/operators/lod_reset_op.h +++ b/paddle/operators/lod_reset_op.h @@ -33,7 +33,8 @@ class LoDResetKernel : public framework::OpKernel { auto* lod = lod_t->data(); if (platform::is_gpu_place(ctx.GetPlace())) { framework::Tensor lod_cpu; - lod_cpu.CopyFrom(*lod_t, platform::CPUPlace(), ctx.device_context()); + framework::CopyFrom(*lod_t, platform::CPUPlace(), ctx.device_context(), + &lod_cpu); lod = lod_cpu.data(); } level0 = std::vector(lod, lod + lod_t->numel()); diff --git a/paddle/operators/lod_tensor_to_array_op.cc b/paddle/operators/lod_tensor_to_array_op.cc index 58af35564d83b9699af4f7783fb6367ff9590682..010c79d4e153463d4b2e48e5fd798d3bc4febaf1 100644 --- a/paddle/operators/lod_tensor_to_array_op.cc +++ b/paddle/operators/lod_tensor_to_array_op.cc @@ -81,11 +81,11 @@ class LoDTensorToArrayOp : public framework::OperatorBase { continue; } // out[i][offset: offset+len] = x[each_range.begin: each_range.end] - out[i] - .Slice(static_cast(offset), static_cast(offset + len)) - .CopyFrom(x.Slice(static_cast(each_range.begin), - static_cast(each_range.end)), - x.place(), dev_ctx); + auto slice = out[i].Slice(static_cast(offset), + static_cast(offset + len)); + framework::CopyFrom(x.Slice(static_cast(each_range.begin), + static_cast(each_range.end)), + x.place(), dev_ctx, &slice); offset += len; } } diff --git a/paddle/operators/math/context_project.h b/paddle/operators/math/context_project.h index 72f4202bace4461d2597204feaa2a21e355bd1ac..d853507188cf8c80aede1e7646736036e30c9678 100644 --- a/paddle/operators/math/context_project.h +++ b/paddle/operators/math/context_project.h @@ -149,7 +149,7 @@ class ContextProjectFunctor { Tensor out_t_sub = out_t.Slice(k * context_length, k * context_length + padding_size); Tensor w_sub = padding_data.Slice(k, k + padding_size); - out_t_sub.CopyFrom(w_sub, context.GetPlace(), context); + framework::CopyFrom(w_sub, context.GetPlace(), context, &out_t_sub); } } if (down_pad > 0) { // add down pad @@ -179,7 +179,7 @@ class ContextProjectFunctor { (down_pad_begin_row + t) * context_length); Tensor w_sub = padding_data.Slice( up_pad + padding_idx, up_pad + padding_idx + padding_size); - out_t_sub.CopyFrom(w_sub, context.GetPlace(), context); + framework::CopyFrom(w_sub, context.GetPlace(), context, &out_t_sub); } } out_t.Resize({sequence_height, context_length * sequence_width}); diff --git a/paddle/operators/math/im2col.h b/paddle/operators/math/im2col.h index deb60051beef56437cf75f0fa2cef90bbc0a209a..24fd9a06e9f5fbd50483429379cf3f46ff88bcaa 100644 --- a/paddle/operators/math/im2col.h +++ b/paddle/operators/math/im2col.h @@ -15,6 +15,7 @@ limitations under the License. */ #pragma once #include "paddle/framework/tensor.h" +#include "paddle/framework/tensor_util.h" #include "paddle/platform/device_context.h" namespace paddle { diff --git a/paddle/operators/math/im2col_test.cc b/paddle/operators/math/im2col_test.cc index 10c28da72ba9d3b94bb59c5cf00e7f5a2f28fd06..ae197a97ed8aa089b51be77a59a8ba6a98ac70ec 100644 --- a/paddle/operators/math/im2col_test.cc +++ b/paddle/operators/math/im2col_test.cc @@ -74,7 +74,7 @@ void testIm2col() { if (paddle::platform::is_cpu_place(*place)) { input = input_tmp; } else { - input.CopyFrom(input_tmp, *place, *context); + CopyFrom(input_tmp, *place, *context, &input); } output_cfo.mutable_data( {1, filter_size, filter_size, output_height, output_width}, *place); @@ -99,7 +99,7 @@ void testIm2col() { if (paddle::platform::is_cpu_place(*place)) { out_cfo_ptr = output_cfo.data(); } else { - output_tmp.CopyFrom(output_cfo, paddle::platform::CPUPlace(), *context); + CopyFrom(output_cfo, paddle::platform::CPUPlace(), *context, &output_tmp); out_cfo_ptr = output_tmp.data(); } for (int i = 0; i < 6; ++i) { @@ -110,7 +110,7 @@ void testIm2col() { if (paddle::platform::is_cpu_place(*place)) { out_ocf_ptr = output_ocf.data(); } else { - output_tmp.CopyFrom(output_ocf, paddle::platform::CPUPlace(), *context); + CopyFrom(output_ocf, paddle::platform::CPUPlace(), *context, &output_tmp); out_ocf_ptr = output_tmp.data(); } for (int i = 0; i < 6; ++i) { @@ -130,7 +130,7 @@ void testIm2col() { if (paddle::platform::is_cpu_place(*place)) { input = input_tmp; } else { - input.CopyFrom(input_tmp, *place, *context); + CopyFrom(input_tmp, *place, *context, &input); } col2im(*context, output_cfo, dilation, stride, padding, &input); @@ -139,7 +139,7 @@ void testIm2col() { if (paddle::platform::is_cpu_place(*place)) { in_ptr = input.data(); } else { - input_tmp.CopyFrom(input, paddle::platform::CPUPlace(), *context); + CopyFrom(input, paddle::platform::CPUPlace(), *context, &input_tmp); in_ptr = input_tmp.data(); } for (int i = 0; i < 6; ++i) { @@ -151,7 +151,7 @@ void testIm2col() { if (paddle::platform::is_cpu_place(*place)) { input = input_tmp; } else { - input.CopyFrom(input_tmp, *place, *context); + CopyFrom(input_tmp, *place, *context, &input); } col2im_ocf(*context, output_ocf, dilation, stride, padding, &input); @@ -159,7 +159,7 @@ void testIm2col() { if (paddle::platform::is_cpu_place(*place)) { in_ptr = input.data(); } else { - input_tmp.CopyFrom(input, paddle::platform::CPUPlace(), *context); + CopyFrom(input, paddle::platform::CPUPlace(), *context, &input_tmp); in_ptr = input_tmp.data(); } for (int i = 0; i < 6; ++i) { diff --git a/paddle/operators/math/math_function.cu b/paddle/operators/math/math_function.cu index 58356a4b7783241ca0292829bf05dc1a8ed80c6c..3018e50a4f54592123df6b9cadd45ce525d7b3e1 100644 --- a/paddle/operators/math/math_function.cu +++ b/paddle/operators/math/math_function.cu @@ -297,7 +297,25 @@ void set_constant_with_place( template struct RowwiseAdd; template struct RowwiseAdd; template struct ColwiseSum; -template struct ColwiseSum; +// template struct ColwiseSum; +// The ColwiseSum failed in debug mode, +// and only failed for this case. So reimplemented it. +template <> +void ColwiseSum::operator()( + const platform::DeviceContext& context, const framework::Tensor& input, + framework::Tensor* vector) { + auto in_dims = input.dims(); + auto size = input.numel() / in_dims[0]; + PADDLE_ENFORCE_EQ(vector->numel(), size); + framework::Tensor one; + one.mutable_data({in_dims[0]}, context.GetPlace()); + SetConstant set; + set(context, &one, static_cast(1.0)); + gemv(context, true, static_cast(in_dims[0]), + static_cast(in_dims[1]), 1.0, + input.data(), one.data(), + 0.0, vector->data()); +} } // namespace math } // namespace operators diff --git a/paddle/operators/math/math_function.h b/paddle/operators/math/math_function.h index ffb99f53808c4316ede96b04e57aec4dae4134de..5a42854f22234629b3405ec2397143ef761a9d08 100644 --- a/paddle/operators/math/math_function.h +++ b/paddle/operators/math/math_function.h @@ -49,6 +49,7 @@ int LAPACKE_dgetri(int matrix_layout, int n, double* a, int lda, #include "paddle/framework/eigen.h" #include "paddle/framework/tensor.h" +#include "paddle/framework/tensor_util.h" #include "paddle/platform/device_context.h" #include "paddle/platform/enforce.h" diff --git a/paddle/operators/math/math_function_test.cu b/paddle/operators/math/math_function_test.cu index 780d17ffc6539c5f4d67ebab5476d6f646840b41..d5d6f0c73bc6bce7a74db2c98fa9f884a0bcd9a2 100644 --- a/paddle/operators/math/math_function_test.cu +++ b/paddle/operators/math/math_function_test.cu @@ -16,15 +16,15 @@ TEST(math_function, notrans_mul_trans) { auto* gpu_place = new paddle::platform::GPUPlace(0); paddle::platform::CUDADeviceContext context(*gpu_place); - input1_gpu.CopyFrom(input1, *gpu_place, context); - input2_gpu.CopyFrom(input1, *gpu_place, context); + paddle::framework::CopyFrom(input1, *gpu_place, context, &input1_gpu); + paddle::framework::CopyFrom(input1, *gpu_place, context, &input2_gpu); out_gpu.mutable_data({2, 2}, *gpu_place); paddle::operators::math::matmul( context, input1_gpu, false, input2_gpu, true, 1, &out_gpu, 0); - out.CopyFrom(out_gpu, *cpu_place, context); + paddle::framework::CopyFrom(out_gpu, *cpu_place, context, &out); float* out_ptr = out.data(); context.Wait(); @@ -50,15 +50,15 @@ TEST(math_function, trans_mul_notrans) { auto* gpu_place = new paddle::platform::GPUPlace(0); paddle::platform::CUDADeviceContext context(*gpu_place); - input1_gpu.CopyFrom(input1, *gpu_place, context); - input2_gpu.CopyFrom(input1, *gpu_place, context); + paddle::framework::CopyFrom(input1, *gpu_place, context, &input1_gpu); + paddle::framework::CopyFrom(input1, *gpu_place, context, &input2_gpu); out_gpu.mutable_data({3, 3}, *gpu_place); paddle::operators::math::matmul( context, input1_gpu, true, input2_gpu, false, 1, &out_gpu, 0); - out.CopyFrom(out_gpu, *cpu_place, context); + paddle::framework::CopyFrom(out_gpu, *cpu_place, context, &out); float* out_ptr = out.data(); context.Wait(); @@ -99,9 +99,9 @@ TEST(math_function, gemm_notrans_cublas) { auto* gpu_place = new paddle::platform::GPUPlace(0); paddle::platform::CUDADeviceContext context(*gpu_place); - input1_gpu.CopyFrom(input1, *gpu_place, context); - input2_gpu.CopyFrom(input2, *gpu_place, context); - input3_gpu.CopyFrom(input3, *gpu_place, context); + paddle::framework::CopyFrom(input1, *gpu_place, context, &input1_gpu); + paddle::framework::CopyFrom(input2, *gpu_place, context, &input2_gpu); + paddle::framework::CopyFrom(input3, *gpu_place, context, &input3_gpu); float* a = input1_gpu.data(); float* b = input2_gpu.data(); float* c = input3_gpu.mutable_data(*gpu_place); @@ -109,7 +109,7 @@ TEST(math_function, gemm_notrans_cublas) { paddle::operators::math::gemm( context, false, false, m, n, k, 1, a, 3, b + 1, 4, 1, c + 1, 4); - input3.CopyFrom(input3_gpu, *cpu_place, context); + paddle::framework::CopyFrom(input3_gpu, *cpu_place, context, &input3); // numpy code: // a = np.arange(6).reshape(2, 3) @@ -154,9 +154,9 @@ TEST(math_function, gemm_trans_cublas) { auto* gpu_place = new paddle::platform::GPUPlace(0); paddle::platform::CUDADeviceContext context(*gpu_place); - input1_gpu.CopyFrom(input1, *gpu_place, context); - input2_gpu.CopyFrom(input2, *gpu_place, context); - input3_gpu.CopyFrom(input3, *gpu_place, context); + paddle::framework::CopyFrom(input1, *gpu_place, context, &input1_gpu); + paddle::framework::CopyFrom(input2, *gpu_place, context, &input2_gpu); + paddle::framework::CopyFrom(input3, *gpu_place, context, &input3_gpu); float* a = input1_gpu.data(); float* b = input2_gpu.data(); float* c = input3_gpu.mutable_data(*gpu_place); @@ -164,7 +164,7 @@ TEST(math_function, gemm_trans_cublas) { paddle::operators::math::gemm( context, false, true, m, n, k, 1, a, 3, b + 3, 3, 1, c + 1, 4); - input3.CopyFrom(input3_gpu, *cpu_place, context); + paddle::framework::CopyFrom(input3_gpu, *cpu_place, context, &input3); context.Wait(); EXPECT_EQ(input3_ptr[0], 0); @@ -205,14 +205,15 @@ void GemvTest(int m, int n, bool trans) { } paddle::platform::CUDADeviceContext context(*gpu_place); - g_mat_a.CopyFrom(mat_a, *gpu_place, context); - g_vec_b.CopyFrom(vec_b, *gpu_place, context); + paddle::framework::CopyFrom(mat_a, *gpu_place, context, &g_mat_a); + paddle::framework::CopyFrom(vec_b, *gpu_place, context, &g_vec_b); paddle::operators::math::gemv( context, trans, static_cast(m), static_cast(n), 1., g_data_a, g_data_b, 0., g_data_c); - vec_c.CopyFrom(g_vec_c, paddle::platform::CPUPlace(), context); + paddle::framework::CopyFrom(g_vec_c, paddle::platform::CPUPlace(), context, + &vec_c); if (!trans) { for (int i = 0; i < m; ++i) { diff --git a/paddle/operators/math/selected_rows_functor.cc b/paddle/operators/math/selected_rows_functor.cc index 075196b47eeaf118a588b96532d87a05e4e600c6..514f2adef284c8877e2e74b943b4e6419c6ae721 100644 --- a/paddle/operators/math/selected_rows_functor.cc +++ b/paddle/operators/math/selected_rows_functor.cc @@ -145,6 +145,8 @@ struct SelectedRowsAddTo { template struct SelectedRowsAddTo; template struct SelectedRowsAddTo; +template struct SelectedRowsAddTo; +template struct SelectedRowsAddTo; template struct SelectedRowsAddToTensor { @@ -175,6 +177,8 @@ struct SelectedRowsAddToTensor { template struct SelectedRowsAddToTensor; template struct SelectedRowsAddToTensor; +template struct SelectedRowsAddToTensor; +template struct SelectedRowsAddToTensor; } // namespace math } // namespace operators diff --git a/paddle/operators/math/selected_rows_functor.cu b/paddle/operators/math/selected_rows_functor.cu index 47fe3b44a50fee9f41ae807793187258159b9f29..c40649e55ef93dec852ff6949b5cb134495e4ebf 100644 --- a/paddle/operators/math/selected_rows_functor.cu +++ b/paddle/operators/math/selected_rows_functor.cu @@ -173,6 +173,8 @@ struct SelectedRowsAddTo { template struct SelectedRowsAddTo; template struct SelectedRowsAddTo; +template struct SelectedRowsAddTo; +template struct SelectedRowsAddTo; namespace { template @@ -223,6 +225,8 @@ struct SelectedRowsAddToTensor { template struct SelectedRowsAddToTensor; template struct SelectedRowsAddToTensor; +template struct SelectedRowsAddToTensor; +template struct SelectedRowsAddToTensor; } // namespace math } // namespace operators diff --git a/paddle/operators/math/selected_rows_functor_test.cu b/paddle/operators/math/selected_rows_functor_test.cu index 09de9dc53a1de9537b5109b3cc7cf9744f9c7908..7de9291c17d3f09a3c6076f00f2457f240e6f0af 100644 --- a/paddle/operators/math/selected_rows_functor_test.cu +++ b/paddle/operators/math/selected_rows_functor_test.cu @@ -67,7 +67,7 @@ TEST(selected_rows_functor, gpu_add) { EXPECT_EQ(out_rows[6], 9); Tensor out_cpu; - out_cpu.CopyFrom(*out_value, cpu_place, ctx); + CopyFrom(*out_value, cpu_place, ctx, &out_cpu); ctx.Wait(); auto* out_cpu_data = out_cpu.data(); @@ -94,7 +94,7 @@ TEST(selected_rows_functor, gpu_add) { add_tensor_functor(ctx, *output, *tensor1, tensor2.get()); Tensor tensor2_cpu; - tensor2_cpu.CopyFrom(*tensor2, cpu_place, ctx); + CopyFrom(*tensor2, cpu_place, ctx, &tensor2_cpu); ctx.Wait(); auto* tensor2_cpu_data = tensor2_cpu.data(); @@ -167,7 +167,7 @@ TEST(selected_rows_functor, gpu_add_to) { EXPECT_EQ(out_rows[6], 9); Tensor out_cpu; - out_cpu.CopyFrom(*out_value, cpu_place, ctx); + CopyFrom(*out_value, cpu_place, ctx, &out_cpu); ctx.Wait(); auto* out_cpu_data = out_cpu.data(); @@ -191,7 +191,7 @@ TEST(selected_rows_functor, gpu_add_to) { add_to_tensor_functor(ctx, *output, tensor1.get()); Tensor tensor1_cpu; - tensor1_cpu.CopyFrom(*tensor1, cpu_place, ctx); + CopyFrom(*tensor1, cpu_place, ctx, &tensor1_cpu); ctx.Wait(); auto* tensor1_cpu_data = tensor1_cpu.data(); diff --git a/paddle/operators/math/vol2col.h b/paddle/operators/math/vol2col.h index cbc30bd754608dd6e6def1a4097d69bdf0c942c3..dc64d1d9776261541a380ed15207904d6b4e641c 100644 --- a/paddle/operators/math/vol2col.h +++ b/paddle/operators/math/vol2col.h @@ -15,6 +15,7 @@ limitations under the License. */ #pragma once #include "paddle/framework/tensor.h" +#include "paddle/framework/tensor_util.h" #include "paddle/platform/device_context.h" namespace paddle { diff --git a/paddle/operators/math/vol2col_test.cc b/paddle/operators/math/vol2col_test.cc index c31c716842f30de67c29b803866b8c82ddcf4a41..62c3152304ad7fe946c996be413e102f3dd92bb2 100644 --- a/paddle/operators/math/vol2col_test.cc +++ b/paddle/operators/math/vol2col_test.cc @@ -82,7 +82,7 @@ void testVol2col() { if (paddle::platform::is_cpu_place(*place)) { input = input_tmp; } else { - input.CopyFrom(input_tmp, *place, *context); + CopyFrom(input_tmp, *place, *context, &input); } output.mutable_data({1, filter_size, filter_size, filter_size, output_depth, output_height, output_width}, @@ -96,7 +96,7 @@ void testVol2col() { if (paddle::platform::is_cpu_place(*place)) { out_cfo_ptr = output.data(); } else { - output_tmp.CopyFrom(output, paddle::platform::CPUPlace(), *context); + CopyFrom(output, paddle::platform::CPUPlace(), *context, &output_tmp); out_cfo_ptr = output_tmp.data(); } @@ -110,7 +110,7 @@ void testVol2col() { if (paddle::platform::is_cpu_place(*place)) { input = input_tmp; } else { - input.CopyFrom(input_tmp, *place, *context); + CopyFrom(input_tmp, *place, *context, &input); } paddle::operators::math::Col2VolFunctor col2vol; @@ -120,7 +120,7 @@ void testVol2col() { if (paddle::platform::is_cpu_place(*place)) { in_ptr = input.data(); } else { - input_tmp.CopyFrom(input, paddle::platform::CPUPlace(), *context); + CopyFrom(input, paddle::platform::CPUPlace(), *context, &input_tmp); in_ptr = input_tmp.data(); } diff --git a/paddle/operators/merge_lod_tensor_op.cc b/paddle/operators/merge_lod_tensor_op.cc index 80460c476921b63ec5228a9780880c7db3c85217..adc688dbd5e13a2203d6842a12acdb8625288275 100644 --- a/paddle/operators/merge_lod_tensor_op.cc +++ b/paddle/operators/merge_lod_tensor_op.cc @@ -45,7 +45,7 @@ class MergeLoDTensorOp : public framework::OperatorBase { cpu_mask->ShareDataWith(mask); } else if (platform::is_gpu_place(mask.place())) { #ifdef PADDLE_WITH_CUDA - cpu_mask->CopyFrom(mask, platform::CPUPlace(), dev_ctx); + framework::CopyFrom(mask, platform::CPUPlace(), dev_ctx, cpu_mask.get()); #else PADDLE_THROW("Not supported GPU, Please compile WITH_GPU option"); #endif @@ -99,8 +99,9 @@ class MergeLoDTensorOp : public framework::OperatorBase { if (len == 0) { continue; } - out->Slice(out_offset, out_offset + len) - .CopyFrom(input->Slice(start_offset, end_offset), place, dev_ctx); + auto slice = out->Slice(out_offset, out_offset + len); + framework::CopyFrom(input->Slice(start_offset, end_offset), place, + dev_ctx, &slice); out_offset += len; (*in_idx) += 1; } diff --git a/paddle/operators/multiplex_op.cu b/paddle/operators/multiplex_op.cu index 49ed8a8879527fd32dd8b001ea256e46a0353487..10dff8d021d0394702cc8b92e779c012a4cf3eb2 100644 --- a/paddle/operators/multiplex_op.cu +++ b/paddle/operators/multiplex_op.cu @@ -33,7 +33,7 @@ class MultiplexGPUKernel : public framework::OpKernel { auto cols = ins[0]->numel() / rows; // copy index to cpu Tensor index_t_cpu; - index_t_cpu.CopyFrom(*ids, platform::CPUPlace(), ctx.device_context()); + CopyFrom(*ids, platform::CPUPlace(), ctx.device_context(), &index_t_cpu); auto* index = index_t_cpu.data(); auto stream = ctx.cuda_device_context().stream(); Place place = boost::get(ctx.GetPlace()); @@ -68,7 +68,7 @@ class MultiplexGradGPUKernel : public framework::OpKernel { auto cols = ins[0]->numel() / rows; // copy index to cpu Tensor index_t_cpu; - index_t_cpu.CopyFrom(*ids, platform::CPUPlace(), ctx.device_context()); + CopyFrom(*ids, platform::CPUPlace(), ctx.device_context(), &index_t_cpu); auto* index = index_t_cpu.data(); auto stream = ctx.cuda_device_context().stream(); diff --git a/paddle/operators/nccl_op.cc b/paddle/operators/nccl_op.cc index 66fcc09bc877867e66a37adc73230d8dabf4cbed..22a37ff1bbf6b8cfb2cbc3c3dbbb20a87c5ea4e7 100644 --- a/paddle/operators/nccl_op.cc +++ b/paddle/operators/nccl_op.cc @@ -49,7 +49,7 @@ class NCCLInitOpMaker : public framework::OpProtoAndCheckerMaker { AddOutput("Communicator", "Create Communicator for communicating between gpus"); AddAttr>("gpus", "(vector) GPU id lists"); - AddAttr("data_type", + AddAttr("dtype", "(int, default 5 (FP32)) " "Output data type") .SetDefault(framework::DataType::FP32); diff --git a/paddle/operators/nccl_op_test.cu.cc b/paddle/operators/nccl_op_test.cu.cc index 56ba57854955c08031214d1f751c17fbb8bb882c..bb7ae20286dd8e52f72b79cbf353bd812a2cc092 100644 --- a/paddle/operators/nccl_op_test.cu.cc +++ b/paddle/operators/nccl_op_test.cu.cc @@ -97,7 +97,7 @@ class NCCLTester : public ::testing::Test { send_tensor->mutable_data(kDims, place); std::vector send_vector(f::product(kDims), gpu_id); - send_tensor->CopyFromVector(send_vector, *ctx); + paddle::framework::CopyFromVector(send_vector, *ctx, send_tensor); ctx->Wait(); VLOG(1) << "Send Tensor filled with elements " << send_tensor->numel(); } diff --git a/paddle/operators/recurrent_op.cc b/paddle/operators/recurrent_op.cc index 0075ccd24271bf83f139e121efad00c2316cc11b..c976e22c7740ad11279ab5ee75e4d130be8fa0c5 100644 --- a/paddle/operators/recurrent_op.cc +++ b/paddle/operators/recurrent_op.cc @@ -284,7 +284,8 @@ class RecurrentOp : public RecurrentBase { auto dst_out = dst_tensor->Slice(seq_offset, seq_offset + 1); // Explicit copy output since the local RNN scope can be destroyed // early. - dst_out.CopyFrom(src_tensor, dev_ctx.GetPlace(), dev_ctx); + framework::CopyFrom(src_tensor, dev_ctx.GetPlace(), dev_ctx, + &dst_out); }); scopes.Next(); @@ -365,7 +366,8 @@ class RecurrentGradOp : public RecurrentBase { auto *cur_grad_var = cur_scope.Var(cur_grad); auto cur_grad_tensor = cur_grad_var->GetMutable(); - cur_grad_tensor->CopyFrom(ex_tensor, dev_ctx.GetPlace(), dev_ctx); + framework::CopyFrom(ex_tensor, dev_ctx.GetPlace(), dev_ctx, + cur_grad_tensor); } } @@ -401,7 +403,7 @@ class RecurrentGradOp : public RecurrentBase { auto &inside_tensor = cur_scope.FindVar(inside_grad_name) ->Get(); framework::AttributeMap attrs; - attrs["data_type"] = framework::ToDataType(inside_tensor.type()); + attrs["dtype"] = framework::ToDataType(inside_tensor.type()); attrs["shape"] = framework::vectorize2int(inside_tensor.dims()); attrs["value"] = 0.0f; @@ -438,7 +440,7 @@ class RecurrentGradOp : public RecurrentBase { } auto dst = outside->Slice(seq_offset, seq_offset + 1); - dst.CopyFrom(inside, dev_ctx.GetPlace(), dev_ctx); + framework::CopyFrom(inside, dev_ctx.GetPlace(), dev_ctx, &dst); }); VLOG(5) << "Link outside gradient finished "; @@ -451,7 +453,7 @@ class RecurrentGradOp : public RecurrentBase { framework::LoDTensor *outside) { outside->Resize(inside.dims()); outside->mutable_data(dev_ctx.GetPlace(), inside.type()); - outside->CopyFrom(inside, dev_ctx.GetPlace(), dev_ctx); + framework::CopyFrom(inside, dev_ctx.GetPlace(), dev_ctx, outside); }); VLOG(5) << "Link initialize state gradient finished "; } diff --git a/paddle/operators/reshape_op.h b/paddle/operators/reshape_op.h index beb951713ae2a9fd83fe7c1a5e97ee8c642158a8..0e98c8b4f443f88ecba044f2f79228227695e182 100644 --- a/paddle/operators/reshape_op.h +++ b/paddle/operators/reshape_op.h @@ -28,7 +28,7 @@ class ReshapeKernel : public framework::OpKernel { auto* in = ctx.Input("X"); auto out_dims = out->dims(); out->mutable_data(ctx.GetPlace()); - out->CopyFrom(*in, ctx.GetPlace(), ctx.device_context()); + framework::CopyFrom(*in, ctx.GetPlace(), ctx.device_context(), out); out->Resize(out_dims); } }; @@ -42,7 +42,7 @@ class ReshapeGradKernel : public framework::OpKernel { d_x->mutable_data(ctx.GetPlace()); auto in_dims = d_x->dims(); - d_x->CopyFrom(*d_out, ctx.GetPlace(), ctx.device_context()); + framework::CopyFrom(*d_out, ctx.GetPlace(), ctx.device_context(), d_x); d_x->Resize(in_dims); } }; diff --git a/paddle/operators/rnn/recurrent_op_utils.cc b/paddle/operators/rnn/recurrent_op_utils.cc deleted file mode 100644 index ee61ea300c33722471189d06eb09f67a083d2a4d..0000000000000000000000000000000000000000 --- a/paddle/operators/rnn/recurrent_op_utils.cc +++ /dev/null @@ -1,134 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. */ - -#include "paddle/operators/rnn/recurrent_op_utils.h" - -namespace paddle { -namespace operators { -namespace rnn { - -namespace f = paddle::framework; - -using Tensor = framework::Tensor; -using LoDTensor = framework::LoDTensor; - -void SegmentInputs(const std::vector& step_scopes, - const std::vector& inlinks, - const size_t seq_len) { - PADDLE_ENFORCE(!inlinks.empty(), "no in links are provided."); - for (size_t i = 0; i < inlinks.size(); ++i) { - // global inputs - auto input_var = step_scopes[0]->parent().FindVar(inlinks[i]); - PADDLE_ENFORCE_NOT_NULL(input_var, "input link [%s] is not in scope.", - inlinks[i]); - - LoDTensor* input = input_var->GetMutable(); - f::DDim dims = input->dims(); - PADDLE_ENFORCE_EQ(static_cast(dims[0]), seq_len, - "all the inputs be the same length"); - f::DDim step_dims = slice_ddim(dims, 1, dims.size()); - for (size_t j = 0; j < seq_len; j++) { - Tensor* step_input = - step_scopes[j]->Var(inlinks[i])->GetMutable(); - // The input of operators of each step is Tensor here. - // Maybe need to modify Slice function. - *step_input = input->Slice(j, j + 1); - step_input->Resize(step_dims); - } - } -} - -void ConcatOutputs(const std::vector& step_scopes, - const std::vector& outlinks, - const size_t seq_len, const platform::DeviceContext& ctx) { - for (size_t i = 0; i < outlinks.size(); i++) { - auto* output_var = step_scopes[0]->parent().FindVar(outlinks[i]); - PADDLE_ENFORCE_NOT_NULL(output_var, "output link [%s] is not in scope.", - outlinks[i]); - LoDTensor* output = output_var->GetMutable(); - - auto* step_scope_var = step_scopes[0]->FindVar(outlinks[i]); - PADDLE_ENFORCE_NOT_NULL(step_scope_var, "%s not in scope", outlinks[i]); - f::DDim step_dims = - step_scope_var->template GetMutable()->dims(); - std::vector dims_vec = vectorize(step_dims); - dims_vec.insert(dims_vec.begin(), seq_len); - output->Resize(f::make_ddim(dims_vec)); - output->mutable_data(platform::CPUPlace()); - for (size_t j = 0; j < seq_len; j++) { - LoDTensor* step_output = - step_scopes[j]->FindVar(outlinks[i])->GetMutable(); - // TODO(luotao02) data type and platform::DeviceContext() should set - // correctly - (output->Slice(j, j + 1)) - .CopyFrom(*step_output, platform::CPUPlace(), ctx); - } - } -} - -void LinkMemories(const std::vector& scopes, - const std::vector& memories, - const size_t step_id, const int offset) { - PADDLE_ENFORCE_LT(step_id, scopes.size(), - "step [%d] is out of range of step scopes' size [%d]", - step_id, scopes.size()); - PADDLE_ENFORCE_GE(static_cast(step_id) + offset, 0, - "offset [%d] must be large than -[%d]", offset, step_id); - PADDLE_ENFORCE_LT( - step_id + offset, scopes.size(), - "offset [%d] is out of range, it must be less than (%d - %d)", offset, - scopes.size(), step_id); - auto* scope = scopes[step_id]; - auto* linked_scope = scopes[step_id + offset]; - for (auto& attr : memories) { - auto* mem = scope->FindVar(attr.pre_var)->GetMutable(); - auto* linked_mem = linked_scope->FindVar(attr.var)->GetMutable(); - mem->Resize(linked_mem->dims()); - mem->ShareDataWith(*linked_mem); - } -} - -void InitArgument(const ArgumentName& name, Argument* arg, - const framework::OperatorBase& op, bool is_grad) { - arg->step_scopes = - is_grad ? op.Input(name.step_scopes) : op.Output(name.step_scopes); - arg->inlinks = op.Inputs(name.inlinks); - arg->outlinks = op.Outputs(name.outlinks); - - auto& boot_memories = is_grad ? op.Outputs(name.initial_states) - : op.Inputs(name.initial_states); - // attributes - auto& memories = op.Attr>(name.states); - auto& pre_memories = op.Attr>(name.ex_states); - - PADDLE_ENFORCE(memories.size() == boot_memories.size(), - "the size of states, initial_states don't match:%d,%d", - memories.size(), boot_memories.size()); - PADDLE_ENFORCE(pre_memories.size() == boot_memories.size(), - "the size of ex_states, initial_states don't match:%d,%d", - pre_memories.size(), boot_memories.size()); - PADDLE_ENFORCE(memories.size() > 0, "more than 1 states should be set"); - - for (size_t i = 0; i < memories.size(); ++i) { - rnn::StateAttr mem_attr; - mem_attr.var = memories[i]; - mem_attr.pre_var = pre_memories[i]; - mem_attr.boot_var = boot_memories[i]; - (arg->states).push_back(mem_attr); - } -} - -} // namespace rnn -} // namespace operators -} // namespace paddle diff --git a/paddle/operators/rnn/recurrent_op_utils.h b/paddle/operators/rnn/recurrent_op_utils.h deleted file mode 100644 index fb0e158e07745d58c6211d33e385b324e492b95e..0000000000000000000000000000000000000000 --- a/paddle/operators/rnn/recurrent_op_utils.h +++ /dev/null @@ -1,85 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. */ - -#pragma once - -#include - -#include "paddle/framework/operator.h" - -namespace paddle { -namespace operators { -namespace rnn { - -using Scope = framework::Scope; - -/** - * Memory of a RNN (same as the role of `Momory` in PaddlePaddle). - * - * Memory attributes cached by this op, dims will be infered from - * boot memories in father scope. Other attributes are copied from Op's proto - * attributes. - */ -struct StateAttr { - // name of current state variable - std::string var; - // name of previous step's state variable - std::string pre_var; - // name of the variables to init this memory (same role of `boot_layer` in - // PaddlePaddle), which is store in father's scope. - std::string boot_var; -}; - -struct Argument { - std::string step_net; - std::string step_scopes; - std::vector inlinks; - std::vector outlinks; - std::vector states; -}; - -struct ArgumentName { - std::string step_net; - std::string step_scopes; - std::string inlinks; - std::string outlinks; - std::string states; // the memory name - std::string ex_states; // the previous memory name - std::string initial_states; // the boot memory name -}; - -/** - * Prepare inputs for each step net. - */ -void SegmentInputs(const std::vector& step_scopes, - const std::vector& inlinks, - const size_t seq_len); - -/** - * Process outputs of step nets and merge to variables. - */ -void ConcatOutputs(const std::vector& step_scopes, - const std::vector& outlinks, - const size_t seq_len, const platform::DeviceContext& ctx); - -void LinkMemories(const std::vector& step_scopes, - const std::vector& memories, const size_t step_id, - const int offset); - -void InitArgument(const ArgumentName& name, Argument* arg, - const framework::OperatorBase& op, bool is_grad = false); - -} // namespace rnn -} // namespace operators -} // namespace paddle diff --git a/paddle/operators/rnn_memory_helper_op.cc b/paddle/operators/rnn_memory_helper_op.cc index b621c7f1ba3f9e9613dea5bc98ef74c7c6dae9a0..3a035f0b9acb94bab60659938e11b4996b8eaa0f 100644 --- a/paddle/operators/rnn_memory_helper_op.cc +++ b/paddle/operators/rnn_memory_helper_op.cc @@ -62,7 +62,7 @@ class RNNMemoryHelperOpInfoMaker : public framework::OpProtoAndCheckerMaker { : OpProtoAndCheckerMaker(proto, op_checker) { AddInput("X", ""); AddOutput("Out", ""); - AddAttr("data_type", + AddAttr("dtype", "(int, default 5 (FP32)) " "Output data type") .SetDefault(framework::DataType::FP32); @@ -95,7 +95,7 @@ class RNNMemoryHelperGradOp : public framework::OperatorBase { auto &in_var_tensor = in_var->Get(); framework::AttributeMap attrs; - attrs["data_type"] = framework::ToDataType(in_var_tensor.type()); + attrs["dtype"] = framework::ToDataType(in_var_tensor.type()); attrs["shape"] = framework::vectorize2int(in_var_tensor.dims()); attrs["value"] = 0.0f; @@ -121,7 +121,7 @@ class RNNMemoryHelperGradOpInfoMaker AddInput("X", ""); AddInput("Out", ""); AddOutput(framework::GradVarName("X"), ""); - AddAttr("data_type", + AddAttr("dtype", "(int, default 5 (FP32)) " "Output data type") .SetDefault(framework::DataType::FP32); diff --git a/paddle/operators/roi_pool_op.cc b/paddle/operators/roi_pool_op.cc new file mode 100755 index 0000000000000000000000000000000000000000..156db9358689c90293311b8f08a7576b680c9472 --- /dev/null +++ b/paddle/operators/roi_pool_op.cc @@ -0,0 +1,165 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "paddle/operators/roi_pool_op.h" + +namespace paddle { +namespace operators { + +using Tensor = framework::Tensor; + +static constexpr int kROISize = 5; + +class ROIPoolOp : public framework::OperatorWithKernel { + public: + using framework::OperatorWithKernel::OperatorWithKernel; + + void InferShape(framework::InferShapeContext* ctx) const override { + PADDLE_ENFORCE(ctx->HasInput("X"), + "Input(X) of ROIPoolOp should not be null."); + PADDLE_ENFORCE(ctx->HasInput("ROIs"), + "Input(ROIs) of ROIPoolOp should not be null."); + PADDLE_ENFORCE(ctx->HasOutput("Out"), + "Output(Out) of ROIPoolOp should not be null."); + PADDLE_ENFORCE(ctx->HasOutput("Argmax"), + "Output(Argmax) of ROIPoolOp should not be null."); + auto input_dims = ctx->GetInputDim("X"); + auto rois_dims = ctx->GetInputDim("ROIs"); + + PADDLE_ENFORCE(input_dims.size() == 4, + "The format of input tensor is NCHW."); + PADDLE_ENFORCE(rois_dims.size() == 2, + "ROIs should be a 2-D tensor of shape (num_rois, 5)" + "given as [[batch_id, x1, y1, x2, y2], …]."); + PADDLE_ENFORCE(rois_dims[1] == kROISize, + "ROIs should be a 2-D tensor of shape (num_rois, 5)" + "given as [[batch_id, x1, y1, x2, y2], …]."); + + int pooled_height = ctx->Attrs().Get("pooled_height"); + int pooled_width = ctx->Attrs().Get("pooled_width"); + float spatial_scale = ctx->Attrs().Get("spatial_scale"); + + PADDLE_ENFORCE_GT(pooled_height, 0, + "The pooled output height must greater than 0"); + PADDLE_ENFORCE_GT(pooled_width, 0, + "The pooled output width must greater than 0"); + PADDLE_ENFORCE_GT(spatial_scale, 0.0f, + "The spatial scale must greater than 0"); + + auto out_dims = input_dims; + out_dims[0] = rois_dims[0]; + out_dims[1] = input_dims[1]; + out_dims[2] = pooled_height; + out_dims[3] = pooled_width; + + ctx->SetOutputDim("Out", out_dims); + ctx->SetOutputDim("Argmax", out_dims); + } + + protected: + framework::OpKernelType GetKernelType( + const framework::ExecutionContext& ctx) const override { + return framework::OpKernelType( + framework::ToDataType(ctx.Input("X")->type()), + ctx.device_context()); + } +}; + +class ROIPoolGradOp : public framework::OperatorWithKernel { + public: + using framework::OperatorWithKernel::OperatorWithKernel; + + void InferShape(framework::InferShapeContext* ctx) const override { + PADDLE_ENFORCE(ctx->HasInput(framework::GradVarName("Out")), + "The gradient of Out should not be null."); + PADDLE_ENFORCE(ctx->HasOutputs(framework::GradVarName("X")), + "The gradient of X should not be null."); + ctx->SetOutputsDim(framework::GradVarName("X"), ctx->GetInputsDim("X")); + } + + protected: + framework::OpKernelType GetKernelType( + const framework::ExecutionContext& ctx) const override { + return framework::OpKernelType( + framework::ToDataType(ctx.Input("X")->type()), + ctx.device_context()); + } +}; + +class ROIPoolOpMaker : public framework::OpProtoAndCheckerMaker { + public: + ROIPoolOpMaker(framework::OpProto* proto, + framework::OpAttrChecker* op_checker) + : OpProtoAndCheckerMaker(proto, op_checker) { + AddInput("X", + "(Tensor), " + "the input of ROIPoolOp. " + "The format of input tensor is NCHW. Where N is batch size, " + "C is the number of input channels, " + "H is the height of the feature, and " + "W is the width of the feature."); + AddInput("ROIs", + "(Tensor), " + "ROIs (Regions of Interest) to pool over. " + "should be a 2-D tensor of shape (num_rois, 5)" + "given as [[batch_id, x1, y1, x2, y2], …]. " + "Where batch_id is the id of the data, " + "(x1, y1) is the top left coordinates, and " + "(x2, y2) is the bottom right coordinates."); + AddOutput("Out", + "(Tensor), " + "The output of ROIPoolOp is a 4-D tensor with shape " + "(num_rois, channels, pooled_h, pooled_w)."); + AddOutput("Argmax", + "(Tensor), " + "Argmaxes corresponding to indices in X used " + "for gradient computation. Only output " + "if arg “is_test” is false.").AsIntermediate(); + AddAttr("spatial_scale", + "(float, default 1.0), " + "Multiplicative spatial scale factor " + "to translate ROI coords from their input scale " + "to the scale used when pooling.") + .SetDefault(1.0); + AddAttr("pooled_height", + "(int, default 1), " + "The pooled output height.") + .SetDefault(1); + AddAttr("pooled_width", + "(int, default 1), " + "The pooled output width.") + .SetDefault(1); + AddComment(R"DOC( +ROIPool operator + +ROI Pooling for Faster-RCNN. The link below is a further introduction: +https://stackoverflow.com/questions/43430056/what-is-roi-layer-in-fast-rcnn + )DOC"); + } +}; + +} // namespace operators +} // namespace paddle + +namespace ops = paddle::operators; +REGISTER_OP(roi_pool, ops::ROIPoolOp, ops::ROIPoolOpMaker, + roi_pool_grad, ops::ROIPoolGradOp); +REGISTER_OP_CPU_KERNEL( + roi_pool, + ops::CPUROIPoolOpKernel, + ops::CPUROIPoolOpKernel); +REGISTER_OP_CPU_KERNEL( + roi_pool_grad, + ops::CPUROIPoolGradOpKernel, + ops::CPUROIPoolOpKernel); diff --git a/paddle/operators/roi_pool_op.cu b/paddle/operators/roi_pool_op.cu new file mode 100755 index 0000000000000000000000000000000000000000..97df45f1b5779d5e28e36814450a9577edf85135 --- /dev/null +++ b/paddle/operators/roi_pool_op.cu @@ -0,0 +1,232 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "paddle/operators/roi_pool_op.h" +#include "paddle/platform/cuda_helper.h" + +namespace paddle { +namespace operators { + +using Tensor = framework::Tensor; + +static constexpr int kNumCUDAThreads = 512; +static constexpr int kNumMaxinumNumBlocks = 4096; +static constexpr int kROISize = 5; + +static inline int NumBlocks(const int N) { + return std::min((N + kNumCUDAThreads - 1) / kNumCUDAThreads, + kNumMaxinumNumBlocks); +} + + template + __global__ void GPUROIPoolForward( + const int nthreads, const T* input_data, const int64_t* input_rois, + const float spatial_scale, const int channels, const int height, + const int width, const int pooled_height, const int pooled_width, + T* output_data, int64_t* argmax_data) { + int index = blockIdx.x * blockDim.x + threadIdx.x; + int offset = blockDim.x * gridDim.x; + for (size_t i = index; i < nthreads; i += offset) { + int pw = index % pooled_width; + int ph = (index / pooled_width) % pooled_height; + int c = (index / pooled_width / pooled_height) % channels; + int n = index / pooled_width / pooled_height / channels; + + const int64_t* offset_input_rois = input_rois + n * kROISize; + int roi_batch_ind = offset_input_rois[0]; + int roi_start_w = round(offset_input_rois[1] * spatial_scale); + int roi_start_h = round(offset_input_rois[2] * spatial_scale); + int roi_end_w = round(offset_input_rois[3] * spatial_scale); + int roi_end_h = round(offset_input_rois[4] * spatial_scale); + + int roi_width = max(roi_end_w - roi_start_w + 1, 1); + int roi_height = max(roi_end_h - roi_start_h + 1, 1); + T bin_size_h = static_cast(roi_height) / static_cast(pooled_height); + T bin_size_w = static_cast(roi_width) / static_cast(pooled_width); + + int hstart = static_cast(floor(static_cast(ph) * bin_size_h)); + int wstart = static_cast(floor(static_cast(pw) * bin_size_w)); + int hend = static_cast(ceil(static_cast(ph + 1) * bin_size_h)); + int wend = static_cast(ceil(static_cast(pw + 1) * bin_size_w)); + + hstart = min(max(hstart + roi_start_h, 0), height); + hend = min(max(hend + roi_start_h, 0), height); + wstart = min(max(wstart + roi_start_w, 0), width); + wend = min(max(wend + roi_start_w, 0), width); + bool is_empty = (hend <= hstart) || (wend <= wstart); + + T maxval = is_empty ? 0 : -std::numeric_limits::max(); + int maxidx = -1; + const T* offset_input_data = + input_data + (roi_batch_ind * channels + c) * height * width; + for (int h = hstart; h < hend; ++h) { + for (int w = wstart; w < wend; ++w) { + int input_data_index = h * width + w; + if (offset_input_data[input_data_index] > maxval) { + maxval = offset_input_data[input_data_index]; + maxidx = input_data_index; + } + } + } + output_data[index] = maxval; + if (argmax_data) { + argmax_data[index] = maxidx; + } + } + } + +template +__global__ void GPUROIPoolBackward( + const int nthreads, + const int64_t* input_rois, + const T* output_grad, + const int64_t* argmax_data, + const int num_rois, + const float spatial_scale, + const int channels, + const int height, + const int width, + const int pooled_height, + const int pooled_width, + T* input_grad) { + int index = blockIdx.x * blockDim.x + threadIdx.x; + int offset = blockDim.x * gridDim.x; + for (int i = index; i < nthreads; i += offset) { + int pw = index % pooled_width; + int ph = (index / pooled_width) % pooled_height; + int c = (index / pooled_width / pooled_height) % channels; + int n = index / pooled_width / pooled_height / channels; + + const int64_t* offset_input_rois = input_rois + n * kROISize; + int roi_batch_ind = offset_input_rois[0]; + int input_offset = (roi_batch_ind * channels + c) * height * width; + int output_offset = (n * channels + c) * pooled_height * pooled_width; + const T* offset_output_grad = output_grad + output_offset; + T* offset_input_grad = input_grad + input_offset; + const int64_t* offset_argmax_data = argmax_data + output_offset; + + int argmax = offset_argmax_data[ph * pooled_width + pw]; + if (argmax != -1) { + platform::CudaAtomicAdd(offset_input_grad + argmax, + static_cast(offset_output_grad[ph * pooled_width + pw])); + } + } + } + + +template +class GPUROIPoolOpKernel : public framework::OpKernel { + public: + void Compute(const framework::ExecutionContext& ctx) const override { + auto* in = ctx.Input("X"); + auto* rois = ctx.Input("ROIs"); + auto* out = ctx.Output("Out"); + auto* argmax = ctx.Output("Argmax"); + + auto pooled_height = ctx.Attr("pooled_height"); + auto pooled_width = ctx.Attr("pooled_width"); + auto spatial_scale = ctx.Attr("spatial_scale"); + + auto in_dims = in->dims(); + auto in_stride = framework::stride(in_dims); + int channels = in_dims[1]; + int height = in_dims[2]; + int width = in_dims[3]; + + size_t rois_num = rois->dims()[0]; + if (rois_num== 0) return; + + int output_size = out->numel(); + int blocks = NumBlocks(output_size); + int threads = kNumCUDAThreads; + + GPUROIPoolForward + <<>>( + output_size, + in->data(), + rois->data(), + spatial_scale, + channels, + height, + width, + pooled_height, + pooled_width, + out->mutable_data(ctx.GetPlace()), + argmax->mutable_data(ctx.GetPlace())); + } +}; + +template +class GPUROIPoolGradOpKernel : public framework::OpKernel { + public: + void Compute(const framework::ExecutionContext& ctx) const override { + auto* in = ctx.Input("X"); + auto* rois = ctx.Input("ROIs"); + auto* argmax = ctx.Input("Argmax"); + + auto* out_grad = + ctx.Input(framework::GradVarName("Out")); + auto* x_grad = + ctx.Output(framework::GradVarName("X")); + + auto pooled_height = ctx.Attr("pooled_height"); + auto pooled_width = ctx.Attr("pooled_width"); + auto spatial_scale = ctx.Attr("spatial_scale"); + + size_t rois_num = rois->dims()[0]; + int channels = in->dims()[1]; + int height = in->dims()[2]; + int width = in->dims()[3]; + + if (x_grad) { + x_grad->mutable_data(ctx.GetPlace()); + math::SetConstant set_zero; + set_zero(ctx.device_context(), x_grad, static_cast(0)); + + int output_grad_size = out_grad->numel(); + int blocks = NumBlocks(output_grad_size); + int threads = kNumCUDAThreads; + + if (output_grad_size > 0) { + GPUROIPoolBackward + <<>>( + output_grad_size, + rois->data(), + out_grad->data(), + argmax->data(), + rois_num, + spatial_scale, + channels, + height, + width, + pooled_height, + pooled_width, + x_grad->mutable_data(ctx.GetPlace())); + } + } + } +}; + +} // namespace operators +} // namespace paddle + +namespace ops = paddle::operators; +REGISTER_OP_GPU_KERNEL( + roi_pool, + ops::GPUROIPoolOpKernel, + ops::GPUROIPoolOpKernel); +REGISTER_OP_GPU_KERNEL( + roi_pool_grad, + ops::GPUROIPoolGradOpKernel, + ops::GPUROIPoolOpKernel); diff --git a/paddle/operators/roi_pool_op.h b/paddle/operators/roi_pool_op.h new file mode 100755 index 0000000000000000000000000000000000000000..bd7736d63125f1be57c8af5141208f66d0592adb --- /dev/null +++ b/paddle/operators/roi_pool_op.h @@ -0,0 +1,190 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#pragma once +#include "paddle/framework/op_registry.h" +#include "paddle/operators/math/math_function.h" + +namespace paddle { +namespace operators { + +template +class CPUROIPoolOpKernel : public framework::OpKernel { + public: + void Compute(const framework::ExecutionContext& ctx) const override { + auto* in = ctx.Input("X"); + auto* rois = ctx.Input("ROIs"); + auto* out = ctx.Output("Out"); + auto* argmax = ctx.Output("Argmax"); + + auto pooled_height = ctx.Attr("pooled_height"); + auto pooled_width = ctx.Attr("pooled_width"); + auto spatial_scale = ctx.Attr("spatial_scale"); + + auto in_dims = in->dims(); + int batch_size = in_dims[0]; + int channels = in_dims[1]; + int height = in_dims[2]; + int width = in_dims[3]; + int rois_num = rois->dims()[0]; + + auto in_stride = framework::stride(in_dims); + auto argmax_stride = framework::stride(argmax->dims()); + auto roi_stride = framework::stride(rois->dims()); + auto out_stride = framework::stride(out->dims()); + + const T* input_data = in->data(); + const int64_t* rois_data = rois->data(); + T* output_data = out->mutable_data(ctx.GetPlace()); + int64_t* argmax_data = argmax->mutable_data(ctx.GetPlace()); + + for (int n = 0; n < rois_num; ++n) { + int roi_batch_id = rois_data[0]; + PADDLE_ENFORCE_GE(roi_batch_id, 0); + PADDLE_ENFORCE_LT(roi_batch_id, batch_size); + rois_data += roi_stride[0]; + } + + rois_data = rois->data(); + for (int n = 0; n < rois_num; ++n) { + int roi_batch_id = rois_data[0]; + int roi_start_w = round(rois_data[1] * spatial_scale); + int roi_start_h = round(rois_data[2] * spatial_scale); + int roi_end_w = round(rois_data[3] * spatial_scale); + int roi_end_h = round(rois_data[4] * spatial_scale); + + // Force malformed ROIs to be 1x1 + int roi_height = std::max(roi_end_h - roi_start_h + 1, 1); + int roi_width = std::max(roi_end_w - roi_start_w + 1, 1); + + const float bin_size_h = + static_cast(roi_height) / static_cast(pooled_height); + const float bin_size_w = + static_cast(roi_width) / static_cast(pooled_width); + + const T* batch_data = input_data + roi_batch_id * in_stride[0]; + + for (int c = 0; c < channels; ++c) { + for (int ph = 0; ph < pooled_height; ++ph) { + for (int pw = 0; pw < pooled_width; ++pw) { + // Compute pooling region for this output unit: + // start (included) = floor(ph * roi_height / pooled_height_) + // end (excluded) = ceil((ph + 1) * roi_height / pooled_height_) + int hstart = + static_cast(floor(static_cast(ph) * bin_size_h)); + int wstart = + static_cast(floor(static_cast(pw) * bin_size_w)); + int hend = + static_cast(ceil(static_cast(ph + 1) * bin_size_h)); + int wend = + static_cast(ceil(static_cast(pw + 1) * bin_size_w)); + + hstart = std::min(std::max(hstart + roi_start_h, 0), height); + hend = std::min(std::max(hend + roi_start_h, 0), height); + wstart = std::min(std::max(wstart + roi_start_w, 0), width); + wend = std::min(std::max(wend + roi_start_w, 0), width); + + const int pool_index = ph * pooled_width + pw; + + // Define an empty pooling region to be zero + bool is_empty = (hend <= hstart) || (wend <= wstart); + output_data[pool_index] = + is_empty ? 0 : -std::numeric_limits::max(); + argmax_data[pool_index] = -1; + + for (int h = hstart; h < hend; ++h) { + for (int w = wstart; w < wend; ++w) { + const int index = h * width + w; + if (batch_data[index] > output_data[pool_index]) { + output_data[pool_index] = batch_data[index]; + argmax_data[pool_index] = index; + } + } + } + } + } + + batch_data += in_stride[1]; + output_data += out_stride[1]; + argmax_data += argmax_stride[1]; + } + // Increment ROI data pointer + rois_data += roi_stride[0]; + } + return; + } +}; + +template +class CPUROIPoolGradOpKernel : public framework::OpKernel { + public: + void Compute(const framework::ExecutionContext& ctx) const override { + auto* in = ctx.Input("X"); + auto* rois = ctx.Input("ROIs"); + auto* argmax = ctx.Input("Argmax"); + + auto* out_grad = + ctx.Input(framework::GradVarName("Out")); + auto* x_grad = + ctx.Output(framework::GradVarName("X")); + + auto pooled_height = ctx.Attr("pooled_height"); + auto pooled_width = ctx.Attr("pooled_width"); + + if (x_grad) { + int channels = in->dims()[1]; + auto in_stride = framework::stride(in->dims()); + auto roi_stride = framework::stride(rois->dims()); + + const int64_t* rois_data = rois->data(); + int rois_num = rois->dims()[0]; + + T* x_grad_data = x_grad->mutable_data(ctx.GetPlace()); + math::SetConstant set_zero; + set_zero(ctx.device_context(), x_grad, static_cast(0)); + + size_t roi_offset = roi_stride[0]; + size_t batch_offset = in_stride[0]; + size_t channel_offset = in_stride[1]; + + const T* out_grad_data = out_grad->data(); + size_t pool_channel_offset = pooled_height * pooled_width; + const int64_t* argmax_data = argmax->data(); + + for (size_t n = 0; n < rois_num; ++n) { + size_t roi_batch_idx = rois_data[0]; + T* batch_grad_data = x_grad_data + batch_offset * roi_batch_idx; + for (int c = 0; c < channels; ++c) { + for (int ph = 0; ph < pooled_height; ++ph) { + for (int pw = 0; pw < pooled_width; ++pw) { + size_t pool_index = ph * pooled_width + pw; + + if (argmax_data[pool_index] >= 0) { + size_t index = static_cast(argmax_data[pool_index]); + batch_grad_data[index] += out_grad_data[pool_index]; + } + } + } + batch_grad_data += channel_offset; + out_grad_data += pool_channel_offset; + argmax_data += pool_channel_offset; + } + rois_data += roi_offset; + } + } + } +}; + +} // namespace operators +} // namespace paddle diff --git a/paddle/operators/sequence_slice_op.h b/paddle/operators/sequence_slice_op.h old mode 100755 new mode 100644 index 2c9b8464a1236a054cf1a38b9dc1d73588f8dd38..6411e0a46630beb0a9abb6aa5e517978b25a5254 --- a/paddle/operators/sequence_slice_op.h +++ b/paddle/operators/sequence_slice_op.h @@ -26,7 +26,7 @@ using LoD = framework::LoD; template inline LoD SequenceSliceLoD(const T& in, const int64_t* offset_data, - const int64_t* length_data) { + const int64_t* length_data) { auto out_lod = in.lod(); size_t lod_offset = 0; @@ -34,7 +34,7 @@ inline LoD SequenceSliceLoD(const T& in, const int64_t* offset_data, out_lod[0][0] = 0; for (size_t i = 0; i < n; ++i) { lod_offset += length_data[i]; - out_lod[0][i+1] = lod_offset; + out_lod[0][i + 1] = lod_offset; } return out_lod; } @@ -51,8 +51,7 @@ class SequenceSliceOpKernel : public framework::OpKernel { auto lod = in->lod(); auto n = lod[0].size() - 1; - PADDLE_ENFORCE_EQ(lod.size(), 1UL, - "Only support one level sequence now."); + PADDLE_ENFORCE_EQ(lod.size(), 1UL, "Only support one level sequence now."); PADDLE_ENFORCE_EQ( n, static_cast(length->dims()[0]), "The size of input-sequence and length-array should be the same") @@ -67,23 +66,23 @@ class SequenceSliceOpKernel : public framework::OpKernel { if (platform::is_gpu_place(ctx.GetPlace())) { offset_cpu.mutable_data(offset->dims(), platform::CPUPlace()); - offset_cpu.CopyFrom(*offset, platform::CPUPlace(), ctx.device_context()); + framework::CopyFrom(*offset, platform::CPUPlace(), ctx.device_context(), + &offset_cpu); offset_data = offset_cpu.data(); length_cpu.mutable_data(length->dims(), platform::CPUPlace()); - length_cpu.CopyFrom(*length, platform::CPUPlace(), ctx.device_context()); + framework::CopyFrom(*length, platform::CPUPlace(), ctx.device_context(), + &length_cpu); length_data = length_cpu.data(); } for (size_t i = 0; i < n; ++i) { PADDLE_ENFORCE_LT(0, offset_data[i], - "The offset[%d] must greater than zero.", i) + "The offset[%d] must greater than zero.", i) PADDLE_ENFORCE_LT(0, length_data[i], - "The length[%d] must greater than zero.", i) - PADDLE_ENFORCE_LT( - lod[0][i] + offset_data[i] + length_data[i], - lod[0][i + 1], - "The target tensor's length overflow.") + "The length[%d] must greater than zero.", i) + PADDLE_ENFORCE_LT(lod[0][i] + offset_data[i] + length_data[i], + lod[0][i + 1], "The target tensor's length overflow.") } out->mutable_data(ctx.GetPlace()); @@ -98,14 +97,12 @@ class SequenceSliceOpKernel : public framework::OpKernel { size_t out_offset = 0; for (size_t i = 0; i < n; ++i) { - Tensor in_t = - in->Slice(static_cast(lod[0][i] + offset_data[i]), - static_cast(lod[0][i] + offset_data[i] + - length_data[i])); - - StridedMemcpy(ctx.device_context(), in_t.data(), - in_stride, in_t.dims(), out_stride, - out->data() + out_offset); + Tensor in_t = in->Slice( + static_cast(lod[0][i] + offset_data[i]), + static_cast(lod[0][i] + offset_data[i] + length_data[i])); + + StridedMemcpy(ctx.device_context(), in_t.data(), in_stride, + in_t.dims(), out_stride, out->data() + out_offset); out_offset += length_data[i] * in_stride[0]; } } @@ -130,11 +127,13 @@ class SequenceSliceGradOpKernel : public framework::OpKernel { if (platform::is_gpu_place(ctx.GetPlace())) { offset_cpu.mutable_data(offset->dims(), platform::CPUPlace()); - offset_cpu.CopyFrom(*offset, platform::CPUPlace(), ctx.device_context()); + framework::CopyFrom(*offset, platform::CPUPlace(), ctx.device_context(), + &offset_cpu); offset_data = offset_cpu.data(); length_cpu.mutable_data(length->dims(), platform::CPUPlace()); - length_cpu.CopyFrom(*length, platform::CPUPlace(), ctx.device_context()); + framework::CopyFrom(*length, platform::CPUPlace(), ctx.device_context(), + &length_cpu); length_data = length_cpu.data(); } @@ -162,8 +161,8 @@ class SequenceSliceGradOpKernel : public framework::OpKernel { static_cast(lod[0][i] + offset_data[i] + length_data[i])); StridedMemcpy(ctx.device_context(), out_grad_t.data(), - out_grad_stride, out_grad_t.dims(), x_grad_stride, - x_grad_t.data()); + out_grad_stride, out_grad_t.dims(), x_grad_stride, + x_grad_t.data()); } } } diff --git a/paddle/operators/shrink_rnn_memory_op.cc b/paddle/operators/shrink_rnn_memory_op.cc index 65bccc0c81d0ad9674649933a20ec7b09fec5b37..48597c1d2ace9cb5fe36ba237f70cab8b280a836 100644 --- a/paddle/operators/shrink_rnn_memory_op.cc +++ b/paddle/operators/shrink_rnn_memory_op.cc @@ -101,8 +101,8 @@ class ShrinkRNNMemoryGradOp : public ArrayOp { } else { auto &dout_tensor = dout_var->Get(); auto height = dout_tensor.dims()[0]; - dx_tensor.Slice(0, static_cast(height)) - .CopyFrom(dout_tensor, dout_tensor.place(), dev_ctx); + auto slice = dx_tensor.Slice(0, static_cast(height)); + framework::CopyFrom(dout_tensor, dout_tensor.place(), dev_ctx, &slice); if (dx_tensor.dims()[0] < height) { auto rest_tensor = dx_tensor.Slice( static_cast(height), static_cast(dout_tensor.dims()[0])); diff --git a/paddle/operators/softmax_op.cc b/paddle/operators/softmax_op.cc index 93f89e33a73c5f4c6c0e5a8793a0abe7c692b656..93e0525badc26808f0dca70cc1153ac728f1fe9c 100644 --- a/paddle/operators/softmax_op.cc +++ b/paddle/operators/softmax_op.cc @@ -59,7 +59,7 @@ Then the ratio of the exponential of the given dimension and the sum of exponential values of all the other dimensions is the output of the softmax operator. -For each row `i` and each column `j` in input X, we have: +For each row $i$ and each column $j$ in Input(X), we have: $$Y[i, j] = \frac{\exp(X[i, j])}{\sum_j(exp(X[i, j])}$$ )DOC"); diff --git a/paddle/operators/softmax_with_cross_entropy_op.cc b/paddle/operators/softmax_with_cross_entropy_op.cc index 3dbb62d2e571eb92025c1b3fc0a6653c7cda007a..fc027d6f95cdbc24af59ef1188b6f16f6a93e85c 100644 --- a/paddle/operators/softmax_with_cross_entropy_op.cc +++ b/paddle/operators/softmax_with_cross_entropy_op.cc @@ -67,15 +67,15 @@ The equation is as follows: 1) Hard label (one-hot label, so every sample has exactly one class) -$$Loss_j = \f$ -\text{Logit}_{Label_j} + +$$Loss_j = -\text{Logit}_{Label_j} + \log\left(\sum_{i=0}^{K}\exp(\text{Logit}_i)\right), -j = 1, ..., K $\f$$ +j = 1,..., K$$ 2) Soft label (each sample can have a distribution over all classes) -$$Loss_j = \f$ -\sum_{i=0}^{K}\text{Label}_i\left(\text{Logit}_i - +$$Loss_j = -\sum_{i=0}^{K}\text{Label}_i \left(\text{Logit}_i - \log\left(\sum_{i=0}^{K}\exp(\text{Logit}_i)\right)\right), -j = 1,...,K $\f$$ +j = 1,...,K$$ )DOC"); } diff --git a/paddle/operators/split_lod_tensor_op.cc b/paddle/operators/split_lod_tensor_op.cc index db635f2ba0804143c9a2e04ff006dfbc8744f3fc..f164a4771186635232fea46327ca1fb8b86f2852 100644 --- a/paddle/operators/split_lod_tensor_op.cc +++ b/paddle/operators/split_lod_tensor_op.cc @@ -49,7 +49,7 @@ class SplitLoDTensorOp : public framework::OperatorBase { cpu_mask->ShareDataWith(mask); } else if (platform::is_gpu_place(mask.place())) { #ifdef PADDLE_WITH_CUDA - cpu_mask->CopyFrom(mask, platform::CPUPlace(), dev_ctx); + framework::CopyFrom(mask, platform::CPUPlace(), dev_ctx, cpu_mask.get()); #else PADDLE_THROW("Not supported GPU, Please compile WITH_GPU option"); #endif @@ -105,10 +105,11 @@ class SplitLoDTensorOp : public framework::OperatorBase { continue; } // out[offset: offset+len] = x[each_range.begin: each_range.end] - out->Slice(static_cast(offset), static_cast(offset + len)) - .CopyFrom(x.Slice(static_cast(each_range.begin), - static_cast(each_range.end)), - x.place(), dev_ctx); + auto slice = out->Slice(static_cast(offset), + static_cast(offset + len)); + framework::CopyFrom(x.Slice(static_cast(each_range.begin), + static_cast(each_range.end)), + x.place(), dev_ctx, &slice); offset += len; } } diff --git a/paddle/operators/sum_op.cc b/paddle/operators/sum_op.cc index c2b7632b2865a3ef66051d815d7722a08c6a8cbd..ddc210c26e69566fef9baa20f49ba1052e993b3f 100644 --- a/paddle/operators/sum_op.cc +++ b/paddle/operators/sum_op.cc @@ -176,4 +176,6 @@ namespace ops = paddle::operators; REGISTER_OPERATOR(sum, ops::SumOp, ops::SumOpMaker, ops::SumGradMaker, ops::SumOpVarTypeInference); REGISTER_OP_CPU_KERNEL(sum, ops::SumKernel, - ops::SumKernel); + ops::SumKernel, + ops::SumKernel, + ops::SumKernel); diff --git a/paddle/operators/sum_op.cu b/paddle/operators/sum_op.cu index 5cf05b876b6d6a2ce61d9e10b7ec52ed3cef57d7..5c30dd4d470c2e0acecef18524a4a81f9eb786a9 100644 --- a/paddle/operators/sum_op.cu +++ b/paddle/operators/sum_op.cu @@ -14,4 +14,6 @@ limitations under the License. */ namespace ops = paddle::operators; REGISTER_OP_GPU_KERNEL(sum, ops::SumKernel, - ops::SumKernel); + ops::SumKernel, + ops::SumKernel, + ops::SumKernel); diff --git a/paddle/operators/sum_op.h b/paddle/operators/sum_op.h index 4ca15611392b3117aa6c92cba95911eb8bebeb15..4afec03ecef168077c9964f5cb1da7cd61861f40 100644 --- a/paddle/operators/sum_op.h +++ b/paddle/operators/sum_op.h @@ -102,8 +102,8 @@ class SumKernel : public framework::OpKernel { out_array.resize(i + 1); } if (out_array[i].numel() == 0) { - out_array[i].CopyFrom(in_array[i], in_array[i].place(), - context.device_context()); + framework::CopyFrom(in_array[i], in_array[i].place(), + context.device_context(), &out_array[i]); out_array[i].set_lod(in_array[i].lod()); } else { PADDLE_ENFORCE(out_array[i].lod() == in_array[i].lod()); diff --git a/paddle/operators/tensor.save b/paddle/operators/tensor.save new file mode 100644 index 0000000000000000000000000000000000000000..c24308a7d0131b84c28c0a9857cce4949afb2091 Binary files /dev/null and b/paddle/operators/tensor.save differ diff --git a/paddle/operators/tensor_array_read_write_op.cc b/paddle/operators/tensor_array_read_write_op.cc index ae1b48d7a8e3d573a5134a822a2ed5ef70511077..ad09fb53ce8c9bf0187e595fe3cdcb6685ab9889 100644 --- a/paddle/operators/tensor_array_read_write_op.cc +++ b/paddle/operators/tensor_array_read_write_op.cc @@ -38,7 +38,7 @@ class WriteToArrayOp : public ArrayOp { out->resize(offset + 1); } auto *out_tensor = &out->at(offset); - out_tensor->CopyFrom(x_tensor, dev_ctx.GetPlace(), dev_ctx); + CopyFrom(x_tensor, dev_ctx.GetPlace(), dev_ctx, out_tensor); out_tensor->set_lod(x_tensor.lod()); } }; @@ -116,7 +116,8 @@ class ReadFromArrayOp : public ArrayOp { auto *out_tensor = out->GetMutable(); size_t offset = GetOffset(scope, dev_ctx); PADDLE_ENFORCE_LT(offset, x_array.size()); - out_tensor->CopyFrom(x_array[offset], dev_ctx.GetPlace(), dev_ctx); + framework::CopyFrom(x_array[offset], dev_ctx.GetPlace(), dev_ctx, + out_tensor); out_tensor->set_lod(x_array[offset].lod()); } }; diff --git a/paddle/operators/uniform_random_op.cc b/paddle/operators/uniform_random_op.cc index 7975efc7cf134aaf591385a6866254a9c5f2a0bb..fff1dc7ccddf1d8cee0c8311828fd38888283cd1 100644 --- a/paddle/operators/uniform_random_op.cc +++ b/paddle/operators/uniform_random_op.cc @@ -66,7 +66,7 @@ class UniformRandomOp : public framework::OperatorWithKernel { framework::OpKernelType GetKernelType( const framework::ExecutionContext& ctx) const override { return framework::OpKernelType( - static_cast(ctx.Attr("data_type")), + static_cast(ctx.Attr("dtype")), ctx.device_context()); } }; @@ -99,7 +99,7 @@ uniform distribution. "Random seed used for generating samples. " "0 means use a seed generated by the system.") .SetDefault(0); - AddAttr("data_type", "(int, default 5(FP32)) Output tensor data type") + AddAttr("dtype", "(int, default 5(FP32)) Output tensor data type") .SetDefault(framework::DataType::FP32); } }; diff --git a/paddle/operators/while_op.cc b/paddle/operators/while_op.cc index dcc59f5ff2ae3a8ca999d72a20cfd5c759987d89..68b4f7705995e5ecb6c9b8216db7373c1777a31e 100644 --- a/paddle/operators/while_op.cc +++ b/paddle/operators/while_op.cc @@ -180,7 +180,7 @@ class WhileGradOp : public framework::OperatorBase { if (var->IsType()) { auto &inside_tensor = var->Get(); framework::AttributeMap attrs; - attrs["data_type"] = framework::ToDataType(inside_tensor.type()); + attrs["dtype"] = framework::ToDataType(inside_tensor.type()); attrs["shape"] = framework::vectorize2int(inside_tensor.dims()); attrs["value"] = 0.0f; diff --git a/paddle/platform/CMakeLists.txt b/paddle/platform/CMakeLists.txt index bd86a9fe268c277065cd450f91b544def6c4d32f..88df28a9668e5f354d115ff8ab32cb21e03aefb5 100644 --- a/paddle/platform/CMakeLists.txt +++ b/paddle/platform/CMakeLists.txt @@ -1,15 +1,20 @@ -cc_library(cpu_info SRCS cpu_info.cc DEPS gflags glog) +if(WITH_GPU) + cc_library(enforce SRCS enforce.cc DEPS nccl) +else() + cc_library(enforce SRCS enforce.cc) +endif() +cc_test(enforce_test SRCS enforce_test.cc DEPS stringpiece enforce) + +cc_library(cpu_info SRCS cpu_info.cc DEPS gflags glog enforce) cc_test(cpu_info_test SRCS cpu_info_test.cc DEPS cpu_info) -nv_library(gpu_info SRCS gpu_info.cc DEPS gflags glog) +nv_library(gpu_info SRCS gpu_info.cc DEPS gflags glog enforce) -cc_library(place SRCS place.cc) +cc_library(place SRCS place.cc DEPS enforce) cc_test(place_test SRCS place_test.cc DEPS place glog gflags) add_subdirectory(dynload) -cc_test(enforce_test SRCS enforce_test.cc DEPS stringpiece) - IF(WITH_GPU) set(GPU_CTX_DEPS dynload_cuda dynamic_loader) ELSE() diff --git a/paddle/platform/cuda_helper.h b/paddle/platform/cuda_helper.h index a7d99cde106a0a66f122a8c43f49717c03e60dec..376bb0e6887c797c3c1019e92f738a62d01a9c51 100644 --- a/paddle/platform/cuda_helper.h +++ b/paddle/platform/cuda_helper.h @@ -31,6 +31,16 @@ constexpr int PADDLE_CUDA_NUM_THREADS = 512; // For atomicAdd. USE_CUDA_ATOMIC(Add, float); +USE_CUDA_ATOMIC(Add, int); +USE_CUDA_ATOMIC(Add, unsigned int); +USE_CUDA_ATOMIC(Add, unsigned long long int); + +CUDA_ATOMIC_WRAPPER(Add, int64_t) { + static_assert(sizeof(int64_t) == sizeof(long long int), + "long long should be int64"); + return CudaAtomicAdd(reinterpret_cast(address), + static_cast(val)); +} #if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 600 USE_CUDA_ATOMIC(Add, double); diff --git a/paddle/platform/dynload/CMakeLists.txt b/paddle/platform/dynload/CMakeLists.txt index bb3fec1be9e811c26cc6851314e960e96fc366b3..f4fda65907dc26e9edb91ee46f3b8bd2de7b3f3a 100644 --- a/paddle/platform/dynload/CMakeLists.txt +++ b/paddle/platform/dynload/CMakeLists.txt @@ -1,3 +1,3 @@ -cc_library(dynamic_loader SRCS dynamic_loader.cc DEPS glog gflags) +cc_library(dynamic_loader SRCS dynamic_loader.cc DEPS glog gflags enforce) nv_library(dynload_cuda SRCS cublas.cc cudnn.cc curand.cc nccl.cc DEPS dynamic_loader nccl) diff --git a/paddle/platform/enforce.cc b/paddle/platform/enforce.cc new file mode 100644 index 0000000000000000000000000000000000000000..e8d31bc782ec3cddd18ceaedf88fe5e7b4aed2cc --- /dev/null +++ b/paddle/platform/enforce.cc @@ -0,0 +1,19 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "paddle/platform/enforce.h" + +namespace paddle { +namespace platform {} // namespace platform +} // namespace paddle diff --git a/paddle/platform/enforce.h b/paddle/platform/enforce.h index bfe708748a62ff9ac5d151bc652142e1f4925c83..415020ab965fa976c37870b7ad5794aab947fb4e 100644 --- a/paddle/platform/enforce.h +++ b/paddle/platform/enforce.h @@ -49,7 +49,6 @@ limitations under the License. */ namespace paddle { namespace platform { -namespace { #ifdef __GNUC__ inline std::string demangle(std::string name) { int status = -4; // some arbitrary value to eliminate the compiler warning @@ -60,7 +59,6 @@ inline std::string demangle(std::string name) { #else inline std::string demangle(std::string name) { return name; } #endif -} struct EnforceNotMet : public std::exception { std::exception_ptr exp_; diff --git a/paddle/pybind/CMakeLists.txt b/paddle/pybind/CMakeLists.txt index a9bcc474387513a8ca019bc9382b88c93e08ff8d..a54dc0d9fdb3c30391b01966ad493540c8ad1375 100644 --- a/paddle/pybind/CMakeLists.txt +++ b/paddle/pybind/CMakeLists.txt @@ -1,8 +1,8 @@ if(WITH_PYTHON) cc_library(paddle_pybind SHARED SRCS pybind.cc exception.cc protobuf.cc - DEPS pybind python backward proto_desc tensor_array paddle_memory executor prune + DEPS pybind python backward proto_desc paddle_memory executor prune ${GLOB_OP_LIB}) endif(WITH_PYTHON) -cc_binary(print_operators_doc SRCS print_operators_doc.cc DEPS ${GLOB_OP_LIB} tensor_array) +cc_binary(print_operators_doc SRCS print_operators_doc.cc DEPS ${GLOB_OP_LIB}) diff --git a/paddle/pybind/protobuf.cc b/paddle/pybind/protobuf.cc index 5a1ff9b7976abbe4a37f8366181d9d1ae78ea4a0..6c8f06cccb92fa9cd22fdb89a9d410e6853895cc 100644 --- a/paddle/pybind/protobuf.cc +++ b/paddle/pybind/protobuf.cc @@ -202,9 +202,9 @@ void BindVarDsec(py::module &m) { }, py::return_value_policy::reference) .def("set_shape", &VarDescBind::SetShape) - .def("set_data_type", &VarDescBind::SetDataType) + .def("set_dtype", &VarDescBind::SetDataType) .def("shape", &VarDescBind::Shape, py::return_value_policy::reference) - .def("data_type", &VarDescBind::GetDataType) + .def("dtype", &VarDescBind::GetDataType) .def("lod_level", &VarDescBind::GetLodLevel) .def("set_lod_level", &VarDescBind::SetLoDLevel) .def("type", &VarDescBind::GetType) diff --git a/paddle/pybind/pybind.cc b/paddle/pybind/pybind.cc index 3d8d3f1d2fd3977f945928c723db5fcafffeae85..f55a1edce31ccf2498dcfcf0b30ba1012d7a7d1a 100644 --- a/paddle/pybind/pybind.cc +++ b/paddle/pybind/pybind.cc @@ -26,9 +26,7 @@ limitations under the License. */ #include "paddle/framework/lod_tensor_array.h" #include "paddle/framework/prune.h" #include "paddle/framework/selected_rows.h" -#include "paddle/framework/tensor_array.h" #include "paddle/operators/cond_op.h" -#include "paddle/operators/dynamic_recurrent_op.h" #include "paddle/operators/net_op.h" #include "paddle/platform/enforce.h" #include "paddle/platform/place.h" @@ -293,6 +291,11 @@ All parameter, weight, gradient are variables in Paddle. Prune(*prog_with_targets.Proto(), &pruned_desc); return new ProgramDescBind(pruned_desc); }); + m.def("inference_optimize", [](ProgramDescBind &origin) { + ProgramDesc pruned_desc; + InferenceOptimize(*(origin.Proto()), &pruned_desc); + return new ProgramDescBind(pruned_desc); + }); m.def_submodule( "var_names", "The module will return special predefined variable name in Paddle") @@ -390,83 +393,6 @@ All parameter, weight, gradient are variables in Paddle. self->CompleteAddOp(); }); - py::class_(m, "TensorArray") - .def("__init__", - [](TensorArray &instance) { new (&instance) TensorArray(); }) - .def("read", - [](TensorArray &self, size_t index) { return self.Read(index); }) - .def("write", [](TensorArray &self, size_t index, - LoDTensor &value) { self.Write(index, value); }) - .def("write_shared", - [](TensorArray &self, size_t index, const LoDTensor &value) { - self.WriteShared(index, value); - }) - .def("size", [](TensorArray &self) { return self.size(); }) - .def("pack", - [](TensorArray &self, size_t level, - const std::vector> &meta_info, - const std::vector> &lod) { - std::vector meta; - for (auto &info : meta_info) { - PADDLE_ENFORCE_EQ(info.size(), 3UL); - meta.emplace_back(info[0], info[1], info[2]); - } -#ifndef PADDLE_WITH_CUDA - return self.Pack(level, meta, lod); -#else - LoD new_lod; - new_lod.reserve(lod.size()); - std::copy(lod.begin(), lod.end(), std::back_inserter(new_lod)); - return self.Pack(level, meta, new_lod); -#endif - }) - .def("unpack", - [](TensorArray &self, const LoDTensor &source, int level, - bool length_descend) { - auto metas = self.Unpack(source, level, length_descend); - std::vector> meta_info; - for (auto meta : metas) { - meta_info.emplace_back( - std::vector({meta.begin, meta.end, meta.ori_idx})); - } - return meta_info; - }) - .def("stack", [](TensorArray &self) { return self.Stack(); }) - .def("unstack", - [](TensorArray &self, const LoDTensor &source) { - return self.Unstack(source); - }) - .def("unstack_shared", [](TensorArray &self, const LoDTensor &source) { - return self.UnstackShared(source); - }); - - py::class_(m, - "DynamicRecurrentOp") - .def_static("create", - [](py::bytes protobin) -> operators::DynamicRecurrentOp * { - OpDesc desc; - PADDLE_ENFORCE(desc.ParsePartialFromString(protobin), - "Cannot parse user input to OpDesc"); - PADDLE_ENFORCE(desc.IsInitialized(), - "User OpDesc is not initialized, reason %s", - desc.InitializationErrorString()); - auto rnn_op = OpRegistry::CreateOp(desc); - return static_cast( - rnn_op.release()); - }) - .def("set_step_unit", - [](operators::DynamicRecurrentOp &self, const operators::NetOp &net) - -> void { self.rnn.SetStepUnit(net.Clone()); }) - .def("get_state", - [](operators::DynamicRecurrentOp &self, const std::string &name) - -> const TensorArray & { return self.rnn.state(name); }) - .def("get_step_input", - [](operators::DynamicRecurrentOp &self, const std::string &name) - -> const TensorArray & { return self.rnn.step_input(name); }) - .def("get_step_output", - [](operators::DynamicRecurrentOp &self, const std::string &name) - -> const TensorArray & { return self.rnn.step_output(name); }); - // cond_op py::class_(m, "CondOp") .def_static("create", diff --git a/python/paddle/v2/fluid/evaluator.py b/python/paddle/v2/fluid/evaluator.py index 3a8f1831cf2c44c81aee62c6ee172942db188217..f78d2f814c89aa6b5ee8387f2558a97c754e655c 100644 --- a/python/paddle/v2/fluid/evaluator.py +++ b/python/paddle/v2/fluid/evaluator.py @@ -8,7 +8,7 @@ def _clone_var_in_block_(block, var): return block.create_var( name=var.name, shape=var.shape, - dtype=var.data_type, + dtype=var.dtype, type=var.type, lod_level=var.lod_level, persistable=True) @@ -33,6 +33,9 @@ class Evaluator(object): else: self._main_program = g_main_program + def states(self): + return self._states + def _update_ops(self, *args, **kwargs): """ append update ops to the global states @@ -57,7 +60,7 @@ class Evaluator(object): attrs={ "shape": g_var.shape, "value": .0, - "data_type": 5, + "dtype": 5, }) block.append_op( type="scale", inputs={"X": zeros}, outputs={"Out": g_var}) @@ -93,7 +96,7 @@ class Accuracy(Evaluator): def _update_ops(self, input, label, k=1, **kwargs): block = self._main_program.global_block() - topk_out = block.create_var(dtype=input.data_type) + topk_out = block.create_var(dtype=input.dtype) topk_indices = block.create_var(dtype="int64") block.append_op( type="top_k", @@ -122,16 +125,16 @@ class Accuracy(Evaluator): inputs={"X": [self._states["Total"]]}, outputs={"Out": [self._states["Total"]]}, attrs={ - "in_data_type": 5, # float32 - "out_data_type": 2, #int32 + "in_dtype": 5, # float32 + "out_dtype": 2, # int32 }) block.append_op( type="cast", inputs={"X": [self._states["Correct"]]}, outputs={"Out": [self._states["Correct"]]}, attrs={ - "in_data_type": 5, - "out_data_type": 2, + "in_dtype": 5, + "out_dtype": 2, }) block.append_op( @@ -153,7 +156,7 @@ class Accuracy(Evaluator): else: eval_program = Program() block = eval_program.global_block() - eval_out = block.create_var(dtype=self._states["Total"].data_type) + eval_out = block.create_var(dtype=self._states["Total"].dtype) e_total = _clone_var_in_block_(block, self._states["Total"]) e_correct = _clone_var_in_block_(block, self._states["Correct"]) block.append_op( @@ -161,16 +164,16 @@ class Accuracy(Evaluator): inputs={"X": [e_total]}, outputs={"Out": [e_total]}, attrs={ - "in_data_type": 2, #int32 - "out_data_type": 5, #float32 + "in_dtype": 2, # int32 + "out_dtype": 5, # float32 }) block.append_op( type="cast", inputs={"X": [e_correct]}, outputs={"Out": [e_correct]}, attrs={ - "in_data_type": 2, - "out_data_type": 5, + "in_dtype": 2, + "out_dtype": 5, }) block.append_op( type="elementwise_div", diff --git a/python/paddle/v2/fluid/framework.py b/python/paddle/v2/fluid/framework.py index 7f7c310ad87f64e5d047ecfc2876d516914c75c8..872c19c2f6f4afbd25a5f7a9df38bd3dd0b61d5f 100644 --- a/python/paddle/v2/fluid/framework.py +++ b/python/paddle/v2/fluid/framework.py @@ -99,9 +99,9 @@ class Variable(object): if not isinstance(dtype, core.DataType): dtype = convert_np_dtype_to_dtype_(dtype) if is_new_var: - self.desc.set_data_type(dtype) + self.desc.set_dtype(dtype) else: - old_dtype = self.data_type + old_dtype = self.dtype if dtype != old_dtype: raise ValueError("Variable {0} has been created before. " "The previous data type is {1}; the new " @@ -162,8 +162,8 @@ class Variable(object): return tuple(self.desc.shape()) @property - def data_type(self): - return self.desc.data_type() + def dtype(self): + return self.desc.dtype() @property def lod_level(self): @@ -511,6 +511,13 @@ class Program(object): res.sync_with_cpp() return res + def inference_optimize(self): + res = Program() + res.desc = core.inference_optimize(self.desc) + res.blocks = [Block(res, i) for i in xrange(res.desc.num_blocks())] + res.sync_with_cpp() + return res + @staticmethod def parse_from_string(binary_str): p = Program() diff --git a/python/paddle/v2/fluid/initializer.py b/python/paddle/v2/fluid/initializer.py index 1a9d804ee7ee8e6463d42fefb809fb45888fd064..9f23e68a7635b6e6ae927603dbcc47d63f9c7f3d 100644 --- a/python/paddle/v2/fluid/initializer.py +++ b/python/paddle/v2/fluid/initializer.py @@ -93,7 +93,7 @@ class ConstantInitializer(Initializer): outputs={"Out": var}, attrs={ "shape": var.shape, - "data_type": int(var.data_type), + "dtype": int(var.dtype), "value": self._value }) var.op = op @@ -140,7 +140,7 @@ class UniformInitializer(Initializer): outputs={"Out": var}, attrs={ "shape": var.shape, - "data_type": int(var.data_type), + "dtype": int(var.dtype), "min": self._low, "max": self._high, "seed": self._seed @@ -188,7 +188,7 @@ class NormalInitializer(Initializer): outputs={"Out": var}, attrs={ "shape": var.shape, - "data_type": int(var.data_type), + "dtype": int(var.dtype), "mean": self._mean, "std": self._std_dev, "seed": self._seed @@ -265,7 +265,7 @@ class XavierInitializer(Initializer): outputs={"Out": var}, attrs={ "shape": var.shape, - "data_type": int(var.data_type), + "dtype": int(var.dtype), "min": -limit, "max": limit, "seed": self._seed @@ -278,7 +278,7 @@ class XavierInitializer(Initializer): outputs={"Out": var}, attrs={ "shape": var.shape, - "data_type": int(var.data_type), + "dtype": int(var.dtype), "mean": 0.0, "std": std, "seed": self._seed @@ -348,7 +348,7 @@ class MSRAInitializer(Initializer): outputs={"Out": var}, attrs={ "shape": var.shape, - "data_type": int(var.data_type), + "dtype": int(var.dtype), "min": -limit, "max": limit, "seed": self._seed @@ -361,7 +361,7 @@ class MSRAInitializer(Initializer): outputs={"Out": var}, attrs={ "shape": var.shape, - "data_type": int(var.data_type), + "dtype": int(var.dtype), "mean": 0.0, "std": std, "seed": self._seed diff --git a/python/paddle/v2/fluid/io.py b/python/paddle/v2/fluid/io.py index 2d070814eef0b099ba71bef223596e30388ac48a..e5b2aa3b919df4cec1091c0bbd39b7e400cc6867 100644 --- a/python/paddle/v2/fluid/io.py +++ b/python/paddle/v2/fluid/io.py @@ -6,7 +6,8 @@ from paddle.v2.fluid.framework import Program, Parameter, g_main_program, \ __all__ = [ 'save_vars', 'save_params', 'save_persistables', 'load_vars', 'load_params', - 'load_persistables', "save_inference_model", "load_inference_model" + 'load_persistables', "save_inference_model", "load_inference_model", + "get_inference_program" ] @@ -23,7 +24,7 @@ def _clone_var_in_block_(block, var): return block.create_var( name=var.name, shape=var.shape, - dtype=var.data_type, + dtype=var.dtype, type=var.type, lod_level=var.lod_level, persistable=True) @@ -151,6 +152,17 @@ def load_persistables(executor, dirname, main_program=None): predicate=is_persistable) +def get_inference_program(target_vars, main_program=None): + if main_program is None: + main_program = g_main_program + if not isinstance(target_vars, list): + target_vars = [target_vars] + + pruned_program = main_program.prune(targets=target_vars) + inference_program = pruned_program.inference_optimize() + return inference_program + + def save_inference_model(dirname, feeded_var_names, target_vars, @@ -177,13 +189,14 @@ def save_inference_model(dirname, if not os.path.isdir(dirname): os.makedirs(dirname) - pruned_program = main_program.prune(target_vars) + pruned_program = main_program.prune(targets=target_vars) + inference_program = pruned_program.inference_optimize() fetch_var_names = [v.name for v in target_vars] model_file_name = dirname + "/__model__" with open(model_file_name, "w") as f: pickle.dump({ - "program_desc_str": pruned_program.desc.serialize_to_string(), + "program_desc_str": inference_program.desc.serialize_to_string(), "feed_var_names": feeded_var_names, "fetch_var_names": fetch_var_names }, f, -1) diff --git a/python/paddle/v2/fluid/layer_helper.py b/python/paddle/v2/fluid/layer_helper.py index e40551ca73e991edd8e1d1df5b103c36367b7050..e0880354fbc5a09bd49de7ec9c5dffc1e3c6259e 100644 --- a/python/paddle/v2/fluid/layer_helper.py +++ b/python/paddle/v2/fluid/layer_helper.py @@ -108,8 +108,8 @@ class LayerHelper(object): dtype = None for each in inputs: if dtype is None: - dtype = each.data_type - elif dtype != each.data_type: + dtype = each.dtype + elif dtype != each.dtype: raise ValueError("Data Type mismatch") return dtype @@ -149,7 +149,7 @@ class LayerHelper(object): self.startup_program.global_block().create_var( name=var.name, type=var.type, - dtype=var.data_type, + dtype=var.dtype, shape=var.shape, persistable=True, initializer=initializer) @@ -180,10 +180,10 @@ class LayerHelper(object): b = self.create_parameter( attr=bias_attr, shape=size, - dtype=input_var.data_type, + dtype=input_var.dtype, suffix='b', initializer=bias_initializer) - tmp = self.create_tmp_variable(dtype=input_var.data_type) + tmp = self.create_tmp_variable(dtype=input_var.dtype) self.append_op( type='elementwise_add', inputs={'X': [input_var], @@ -198,7 +198,7 @@ class LayerHelper(object): return input_var if isinstance(act, basestring): act = {'type': act} - tmp = self.create_tmp_variable(dtype=input_var.data_type) + tmp = self.create_tmp_variable(dtype=input_var.dtype) act_type = act.pop('type') self.append_op( type=act_type, diff --git a/python/paddle/v2/fluid/layers.py b/python/paddle/v2/fluid/layers.py index fac91aac97267b1ecc867bb9b0b1f8fd40f2f299..d094035fe5cae2e77fc2364e8ccb03c350f1301a 100644 --- a/python/paddle/v2/fluid/layers.py +++ b/python/paddle/v2/fluid/layers.py @@ -114,7 +114,7 @@ def embedding(input, is_sparse=False, param_initializer=None, param_attr=None, - data_type='float32', + dtype='float32', main_program=None, startup_program=None): """ @@ -125,7 +125,7 @@ def embedding(input, size: The size of the layer is_sparse: A flag that decleares whether the input is sparse param_attr: Parameters for this layer - data_type: The type of data : float32, float_16, int etc + dtype: The type of data : float32, float_16, int etc main_program: Name of the main program that calls this startup_program: Name of the startup program @@ -145,9 +145,9 @@ def embedding(input, w = helper.create_parameter( attr=helper.param_attr, shape=size, - dtype=data_type, + dtype=dtype, initializer=param_initializer or _get_default_param_initializer()) - tmp = helper.create_tmp_variable(data_type) + tmp = helper.create_tmp_variable(dtype) helper.append_op( type='lookup_table', inputs={'Ids': input, @@ -167,23 +167,23 @@ def dynamic_lstm(input, gate_activation='sigmoid', cell_activation='tanh', candidate_activation='tanh', - data_type='float32', + dtype='float32', main_program=None, startup_program=None): helper = LayerHelper('lstm', **locals()) size = size / 4 weight = helper.create_parameter( - attr=helper.param_attr, shape=[size, 4 * size], dtype=data_type) + attr=helper.param_attr, shape=[size, 4 * size], dtype=dtype) bias_size = [1, 7 * size] if not use_peepholes: bias_size[1] = 4 * size bias = helper.create_parameter( - attr=helper.bias_attr, shape=bias_size, dtype=data_type, suffix='b') + attr=helper.bias_attr, shape=bias_size, dtype=dtype, suffix='b') - hidden = helper.create_tmp_variable(data_type) - cell = helper.create_tmp_variable(data_type) - batch_gate = helper.create_tmp_variable(data_type) - batch_cell_pre_act = helper.create_tmp_variable(data_type) + hidden = helper.create_tmp_variable(dtype) + cell = helper.create_tmp_variable(dtype) + batch_gate = helper.create_tmp_variable(dtype) + batch_cell_pre_act = helper.create_tmp_variable(dtype) helper.append_op( type='lstm', @@ -209,7 +209,7 @@ def dynamic_lstm(input, def data(name, shape, append_batch_size=True, - data_type='float32', + dtype='float32', type=core.VarDesc.VarType.LOD_TENSOR, main_program=None, startup_program=None, @@ -221,7 +221,7 @@ def data(name, name: The name/alias of the function shape: Tuple declaring the shape. append_batch_size: Whether or not to append the data as a batch. - data_type: The type of data : float32, float_16, int etc + dtype: The type of data : float32, float_16, int etc type: The output type. By default it is LOD_TENSOR. main_program: Name of the main program that calls this startup_program: Name of the startup program @@ -251,7 +251,7 @@ def data(name, return helper.create_global_variable( name=name, shape=shape, - dtype=data_type, + dtype=dtype, type=type, stop_gradient=stop_gradient) @@ -362,9 +362,9 @@ def _create_op_func_(op_type): o_name = not_intermediate_outputs[0].name intermediate_output_names = [output.name for output in intermediate_outputs] - def infer_and_check_data_type(op_proto, **kwargs): + def infer_and_check_dtype(op_proto, **kwargs): """ - This function performs the sanity check for data_type and + This function performs the sanity check for dtype and instance type. """ dtype = None @@ -379,8 +379,8 @@ def _create_op_func_(op_type): op_type)) if dtype is None: - dtype = each.data_type - elif dtype != each.data_type: + dtype = each.dtype + elif dtype != each.dtype: raise ValueError( "operator {0} must input same dtype".format(op_type)) @@ -389,7 +389,7 @@ def _create_op_func_(op_type): def func(**kwargs): helper = LayerHelper(op_type, **kwargs) - dtype = infer_and_check_data_type(op_proto, **kwargs) + dtype = infer_and_check_dtype(op_proto, **kwargs) inputs = dict() for ipt in op_proto.inputs: @@ -426,19 +426,19 @@ _create_op_func_('reshape') _create_op_func_('transpose') -def cast(x, data_type, main_program=None): +def cast(x, dtype, main_program=None): """ - This function takes in the input with input_data_type - and casts it to the output_data_type as the output. + This function takes in the input with input_dtype + and casts it to the output_dtype as the output. """ helper = LayerHelper('cast', **locals()) - out = helper.create_tmp_variable(dtype=data_type) + out = helper.create_tmp_variable(dtype=dtype) helper.append_op( type='cast', inputs={'X': [x]}, outputs={'Out': [out]}, - attrs={'in_data_type': x.data_type, - 'out_data_type': out.data_type}) + attrs={'in_dtype': x.dtype, + 'out_dtype': out.dtype}) return out @@ -519,8 +519,8 @@ def split_lod_tensor(input, main_program=None, startup_program=None): helper = LayerHelper('split_lod_tensor', **locals()) - out_true = helper.create_tmp_variable(dtype=input.data_type) - out_false = helper.create_tmp_variable(dtype=input.data_type) + out_true = helper.create_tmp_variable(dtype=input.dtype) + out_false = helper.create_tmp_variable(dtype=input.dtype) helper.append_op( type='split_lod_tensor', inputs={ @@ -541,7 +541,7 @@ def merge_lod_tensor(in_true, main_program=None, startup_program=None): helper = LayerHelper('merge_lod_tensor', **locals()) - out = helper.create_tmp_variable(dtype=in_true.data_type) + out = helper.create_tmp_variable(dtype=in_true.dtype) helper.append_op( type='merge_lod_tensor', inputs={'X': x, @@ -559,9 +559,9 @@ def cos_sim(X, Y, **kwargs): X and Y and returns that as the output. """ helper = LayerHelper('cos_sim', **kwargs) - out = helper.create_tmp_variable(dtype=X.data_type) - xnorm = helper.create_tmp_variable(dtype=X.data_type) - ynorm = helper.create_tmp_variable(dtype=X.data_type) + out = helper.create_tmp_variable(dtype=X.dtype) + xnorm = helper.create_tmp_variable(dtype=X.dtype) + ynorm = helper.create_tmp_variable(dtype=X.dtype) helper.append_op( type='cos_sim', inputs={'X': [X], @@ -577,7 +577,7 @@ def cross_entropy(input, label, **kwargs): This function computes cross_entropy using the input and label. """ helper = LayerHelper('cross_entropy', **kwargs) - out = helper.create_tmp_variable(dtype=input.data_type) + out = helper.create_tmp_variable(dtype=input.dtype) helper.append_op( type='cross_entropy', inputs={'X': [input], @@ -593,14 +593,14 @@ def square_error_cost(input, label, **kwargs): The output is appending the op to do the above. """ helper = LayerHelper('square_error_cost', **kwargs) - minus_out = helper.create_tmp_variable(dtype=input.data_type) + minus_out = helper.create_tmp_variable(dtype=input.dtype) helper.append_op( type='elementwise_sub', inputs={'X': [input], 'Y': [label]}, outputs={'Out': [minus_out]}) - square_out = helper.create_tmp_variable(dtype=input.data_type) + square_out = helper.create_tmp_variable(dtype=input.dtype) helper.append_op( type='square', inputs={'X': [minus_out]}, outputs={'Y': [square_out]}) return square_out @@ -612,7 +612,7 @@ def accuracy(input, label, k=1, **kwargs): The output is the top_k inputs and their indices. """ helper = LayerHelper("accuracy", **kwargs) - topk_out = helper.create_tmp_variable(dtype=input.data_type) + topk_out = helper.create_tmp_variable(dtype=input.dtype) topk_indices = helper.create_tmp_variable(dtype="int64") helper.append_op( type="top_k", @@ -883,12 +883,12 @@ def batch_norm(input, initializer=ConstantInitializer(0.0)) mean = helper.create_global_variable( - dtype=input.data_type, shape=param_shape, persistable=True) + dtype=input.dtype, shape=param_shape, persistable=True) helper.set_variable_initializer( var=mean, initializer=ConstantInitializer(0.0)) variance = helper.create_global_variable( - dtype=input.data_type, shape=param_shape, persistable=True) + dtype=input.dtype, shape=param_shape, persistable=True) helper.set_variable_initializer( var=variance, initializer=ConstantInitializer(1.0)) @@ -927,8 +927,8 @@ def batch_norm(input, def beam_search_decode(ids, scores, main_program=None, startup_program=None): helper = LayerHelper('beam_search_decode', **locals()) - sentence_ids = helper.create_tmp_variable(dtype=ids.data_type) - sentence_scores = helper.create_tmp_variable(dtype=ids.data_type) + sentence_ids = helper.create_tmp_variable(dtype=ids.dtype) + sentence_scores = helper.create_tmp_variable(dtype=ids.dtype) helper.append_op( type="beam_search_decode", @@ -1066,7 +1066,7 @@ class StaticRNN(object): boot_var = parent_block.create_var( name=var_name, shape=shape, - dtype=batch_ref.data_type, + dtype=batch_ref.dtype, persistable=False) parent_block.append_op( @@ -1076,7 +1076,7 @@ class StaticRNN(object): attrs={ 'value': init_value, 'shape': boot_var.shape, - 'data_type': boot_var.data_type, + 'dtype': boot_var.dtype, 'input_dim_idx': ref_batch_dim_idx, 'output_dim_idx': init_batch_dim_idx }) @@ -1085,7 +1085,7 @@ class StaticRNN(object): else: pre_mem = self.helper.create_variable( name=unique_name("@".join([self.helper.name, "mem"])), - dtype=init.data_type, + dtype=init.dtype, shape=init.shape) self.memories[pre_mem.name] = StaticRNNMemoryLink( init=init, pre_mem=pre_mem) @@ -1101,10 +1101,7 @@ class StaticRNN(object): raise ValueError("Static RNN only take fix seq_len input") ipt = self.helper.create_variable( - name=x.name, - dtype=x.data_type, - shape=list(x.shape[1:]), - type=x.type) + name=x.name, dtype=x.dtype, shape=list(x.shape[1:]), type=x.type) self.inputs.append(ipt) return ipt @@ -1113,17 +1110,17 @@ class StaticRNN(object): if not isinstance(o, Variable): raise TypeError("step output takes a Variable") - tmp_o = self.helper.create_tmp_variable(dtype=o.data_type) + tmp_o = self.helper.create_tmp_variable(dtype=o.dtype) self.helper.append_op( type='rnn_memory_helper', inputs={'X': [o]}, outputs={'Out': tmp_o}, - attrs={'data_type': o.data_type}) + attrs={'dtype': o.dtype}) out_var = self.parent_block().create_var( name=tmp_o.name, shape=[self.seq_len] + list(tmp_o.shape), - dtype=tmp_o.data_type) + dtype=tmp_o.dtype) self.outputs.append(out_var) @@ -1195,13 +1192,13 @@ class StaticRNN(object): pre_memories.append(mem.pre_mem.name) mem_var = rnn_block.var(mem.mem.name) assert isinstance(mem_var, Variable) - new_mem = self.helper.create_tmp_variable(dtype=mem_var.data_type) + new_mem = self.helper.create_tmp_variable(dtype=mem_var.dtype) rnn_block.append_op( type='rnn_memory_helper', inputs={'X': [mem_var]}, outputs={'Out': [new_mem]}, - attrs={'data_type': mem_var.data_type}) + attrs={'dtype': mem_var.dtype}) memories.append(new_mem.name) @@ -1251,7 +1248,7 @@ class While(object): if not isinstance(cond, Variable): raise TypeError("condition should be a variable") assert isinstance(cond, Variable) - if cond.data_type != core.DataType.BOOL: + if cond.dtype != core.DataType.BOOL: raise TypeError("condition should be a bool variable") if reduce(lambda a, b: a * b, cond.shape, 1) != 1: raise TypeError("condition should be a bool scalar") @@ -1323,9 +1320,9 @@ def lstm(x, main_program=main_program, startup_program=startup_program) - data_type = x.data_type - c = helper.create_tmp_variable(data_type) - h = helper.create_tmp_variable(data_type) + dtype = x.dtype + c = helper.create_tmp_variable(dtype) + h = helper.create_tmp_variable(dtype) helper.append_op( type='lstm_unit', @@ -1367,7 +1364,7 @@ def lod_tensor_to_array(x, table, main_program=None): array = helper.create_variable( name=unique_name("lod_tensor_to_array"), type=core.VarDesc.VarType.LOD_TENSOR_ARRAY, - dtype=x.data_type) + dtype=x.dtype) helper.append_op( type='lod_tensor_to_array', inputs={'X': x, @@ -1382,7 +1379,7 @@ def array_to_lod_tensor(x, table, main_program=None): LOD_Tensor. """ helper = LayerHelper("array_to_lod_tensor", **locals()) - tmp = helper.create_tmp_variable(dtype=x.data_type) + tmp = helper.create_tmp_variable(dtype=x.dtype) helper.append_op( type="array_to_lod_tensor", inputs={'X': x, @@ -1394,7 +1391,7 @@ def array_to_lod_tensor(x, table, main_program=None): def fill_constant(shape, dtype, value, main_program=None, startup_program=None): """ This function creates a tensor , with shape as mentioned in the input and - specified data_type and fills this up with a constant value that + specified dtype and fills this up with a constant value that comes in the input. It also sets the stop_gradient to be True. """ helper = LayerHelper("fill_constant", **locals()) @@ -1403,11 +1400,9 @@ def fill_constant(shape, dtype, value, main_program=None, startup_program=None): type='fill_constant', inputs={}, outputs={'Out': [out]}, - attrs={ - 'shape': shape, - 'data_type': out.data_type, - 'value': float(value) - }) + attrs={'shape': shape, + 'dtype': out.dtype, + 'value': float(value)}) out.stop_gradient = True return out @@ -1428,7 +1423,7 @@ def fill_constant_batch_size_like(input, outputs={'Out': [out]}, attrs={ 'shape': shape, - 'data_type': out.data_type, + 'dtype': out.dtype, 'value': float(value), 'input_dim_idx': input_dim_idx, 'output_dim_idx': output_dim_idx @@ -1461,7 +1456,7 @@ def increment(x, value=1.0, in_place=True, main_program=None): """ helper = LayerHelper("increment", **locals()) if not in_place: - out = helper.create_tmp_variable(dtype=x.data_type) + out = helper.create_tmp_variable(dtype=x.dtype) else: out = x helper.append_op( @@ -1482,7 +1477,7 @@ def array_write(x, i, array=None, main_program=None): array = helper.create_variable( name="{0}.out".format(helper.name), type=core.VarDesc.VarType.LOD_TENSOR_ARRAY, - dtype=x.data_type) + dtype=x.dtype) helper.append_op( type='write_to_array', inputs={'X': [x], @@ -1521,7 +1516,7 @@ def array_read(array, i, main_program=None): array, Variable) or array.type != core.VarDesc.VarType.LOD_TENSOR_ARRAY: raise TypeError("array should be tensor array vairable") - out = helper.create_tmp_variable(dtype=array.data_type) + out = helper.create_tmp_variable(dtype=array.dtype) helper.append_op( type='read_from_array', inputs={'X': [array], @@ -1536,7 +1531,7 @@ def shrink_memory(x, i, table, main_program=None): as mentioned in the input parameter. """ helper = LayerHelper('shrink_memory', **locals()) - out = helper.create_tmp_variable(dtype=x.data_type) + out = helper.create_tmp_variable(dtype=x.dtype) helper.append_op( type='shrink_rnn_memory', inputs={'X': [x], @@ -1698,11 +1693,11 @@ class IfElse(object): parent_block = self.parent_block() out_true = parent_block.create_var( name=unique_name('ifelse_input' + self.helper.name), - dtype=x.data_type) + dtype=x.dtype) out_false = parent_block.create_var( name=unique_name('ifelse_input' + self.helper.name), - dtype=x.data_type) + dtype=x.dtype) parent_block.append_op( type='split_lod_tensor', inputs={ @@ -1744,7 +1739,7 @@ class IfElse(object): # create outside tensor outside_out = parent_block.create_var( name=unique_name("_".join([self.helper.name, 'output'])), - dtype=each_out.data_type) + dtype=each_out.dtype) out_table.append(outside_out) # assign local var to outside diff --git a/python/paddle/v2/fluid/optimizer.py b/python/paddle/v2/fluid/optimizer.py index 87a478c2903b77d955ebde49a4a0e507c9e9ffd3..e82f0f060de6af63f63d5601ae94059192076e6f 100644 --- a/python/paddle/v2/fluid/optimizer.py +++ b/python/paddle/v2/fluid/optimizer.py @@ -92,7 +92,7 @@ class Optimizer(object): var = self.helper.create_global_variable( name=unique_name(name), persistable=True, - dtype=dtype or param.data_type, + dtype=dtype or param.dtype, type=param.type, shape=param.shape) self.helper.set_variable_initializer( @@ -202,7 +202,7 @@ class Optimizer(object): """ params_grads = append_backward_ops(loss, parameter_list, no_grad_set or set()) - # Add regularization if any + # Add regularization if any params_grads = append_regularization_ops(params_grads) optimize_ops = self.create_optimization_pass(params_grads, loss, startup_program) diff --git a/python/paddle/v2/fluid/tests/book/test_fit_a_line.py b/python/paddle/v2/fluid/tests/book/test_fit_a_line.py index a7f3bfc0caf76302674a00c80c2bd9ebf834f872..a899f1088d77c4ca6462cf5306393444ea114e6c 100644 --- a/python/paddle/v2/fluid/tests/book/test_fit_a_line.py +++ b/python/paddle/v2/fluid/tests/book/test_fit_a_line.py @@ -7,11 +7,11 @@ from paddle.v2.fluid.executor import Executor from paddle.v2.fluid.io import save_persistables, load_persistables from paddle.v2.fluid.optimizer import SGDOptimizer -x = layers.data(name='x', shape=[13], data_type='float32') +x = layers.data(name='x', shape=[13], dtype='float32') y_predict = layers.fc(input=x, size=1, act=None) -y = layers.data(name='y', shape=[1], data_type='float32') +y = layers.data(name='y', shape=[1], dtype='float32') cost = layers.square_error_cost(input=y_predict, label=y) avg_cost = layers.mean(x=cost) diff --git a/python/paddle/v2/fluid/tests/book/test_image_classification_train.py b/python/paddle/v2/fluid/tests/book/test_image_classification_train.py index efe63a68f0745eb728b569a03d0344877c1484f7..76cbd410f94a4be04ba71d1e3175eaed590ac80a 100644 --- a/python/paddle/v2/fluid/tests/book/test_image_classification_train.py +++ b/python/paddle/v2/fluid/tests/book/test_image_classification_train.py @@ -5,6 +5,7 @@ import paddle.v2.fluid.framework as framework import paddle.v2.fluid.layers as layers import paddle.v2.fluid.nets as nets import paddle.v2.fluid.evaluator as evaluator +from paddle.v2.fluid.io import get_inference_program from paddle.v2.fluid.executor import Executor from paddle.v2.fluid.initializer import XavierInitializer from paddle.v2.fluid.optimizer import AdamOptimizer @@ -90,8 +91,8 @@ def vgg16_bn_drop(input): classdim = 10 data_shape = [3, 32, 32] -images = layers.data(name='pixel', shape=data_shape, data_type='float32') -label = layers.data(name='label', shape=[1], data_type='int64') +images = layers.data(name='pixel', shape=data_shape, dtype='float32') +label = layers.data(name='label', shape=[1], dtype='int64') # Add neural network config # option 1. resnet @@ -116,9 +117,11 @@ PASS_NUM = 1 train_reader = paddle.batch( paddle.reader.shuffle( - paddle.dataset.cifar.train10(), buf_size=128 * 10), + paddle.dataset.cifar.train10(), buf_size=BATCH_SIZE * 10), batch_size=BATCH_SIZE) +test_reader = paddle.batch(paddle.dataset.cifar.test10(), batch_size=BATCH_SIZE) + place = core.CPUPlace() exe = Executor(place) @@ -149,10 +152,41 @@ for pass_id in range(PASS_NUM): loss = np.array(outs[0]) acc = np.array(outs[1]) pass_acc = accuracy.eval(exe) + + batch_id = batch_id + 1 + + test_accuracy, test_acc_out = evaluator.accuracy( + input=predict, label=label) + + test_target = [avg_cost, test_acc_out] + test_accuracy.states().values() + inference_program = get_inference_program(test_target) + + test_accuracy.reset(exe) + + for data in test_reader(): + x_data = np.array(map(lambda x: x[0].reshape(data_shape), + data)).astype("float32") + y_data = np.array(map(lambda x: x[1], data)).astype("int64") + y_data = np.expand_dims(y_data, axis=1) + + tensor_x = core.LoDTensor() + tensor_x.set(x_data, place) + + tensor_y = core.LoDTensor() + tensor_y.set(y_data, place) + + outs = exe.run(inference_program, + feed={'pixel': tensor_x, + 'label': tensor_y}, + fetch_list=[avg_cost, test_acc_out]) + out = np.array(outs[0]) + acc = np.array(outs[1]) + + test_pass_acc = test_accuracy.eval(exe) + print("pass_id:" + str(pass_id) + " batch_id:" + str(batch_id) + " loss:" + str(loss) + " acc:" + str(acc) + " pass_acc:" + str( - pass_acc)) - batch_id = batch_id + 1 + pass_acc) + " test_pass_acc:" + str(test_pass_acc)) if batch_id > 1: # this model is slow, so if we can train two mini batch, we think it works properly. diff --git a/python/paddle/v2/fluid/tests/book/test_label_semantic_roles.py b/python/paddle/v2/fluid/tests/book/test_label_semantic_roles.py index f66e6e748b76dec53a9e24b5b352d31395ce6bde..9c9064ba9639829ef3afd8111278b17035bee84a 100644 --- a/python/paddle/v2/fluid/tests/book/test_label_semantic_roles.py +++ b/python/paddle/v2/fluid/tests/book/test_label_semantic_roles.py @@ -34,26 +34,26 @@ def load_parameter(file_name, h, w): def db_lstm(): # 8 features - word = layers.data(name='word_data', shape=[1], data_type='int64') - predicate = layers.data(name='verb_data', shape=[1], data_type='int64') - ctx_n2 = layers.data(name='ctx_n2_data', shape=[1], data_type='int64') - ctx_n1 = layers.data(name='ctx_n1_data', shape=[1], data_type='int64') - ctx_0 = layers.data(name='ctx_0_data', shape=[1], data_type='int64') - ctx_p1 = layers.data(name='ctx_p1_data', shape=[1], data_type='int64') - ctx_p2 = layers.data(name='ctx_p2_data', shape=[1], data_type='int64') - mark = layers.data(name='mark_data', shape=[1], data_type='int64') + word = layers.data(name='word_data', shape=[1], dtype='int64') + predicate = layers.data(name='verb_data', shape=[1], dtype='int64') + ctx_n2 = layers.data(name='ctx_n2_data', shape=[1], dtype='int64') + ctx_n1 = layers.data(name='ctx_n1_data', shape=[1], dtype='int64') + ctx_0 = layers.data(name='ctx_0_data', shape=[1], dtype='int64') + ctx_p1 = layers.data(name='ctx_p1_data', shape=[1], dtype='int64') + ctx_p2 = layers.data(name='ctx_p2_data', shape=[1], dtype='int64') + mark = layers.data(name='mark_data', shape=[1], dtype='int64') predicate_embedding = layers.embedding( input=predicate, size=[pred_len, word_dim], - data_type='float32', + dtype='float32', is_sparse=IS_SPARSE, param_attr={'name': 'vemb'}) mark_embedding = layers.embedding( input=mark, size=[mark_dict_len, mark_dim], - data_type='float32', + dtype='float32', is_sparse=IS_SPARSE) word_input = [word, ctx_n2, ctx_n1, ctx_0, ctx_p1, ctx_p2] @@ -125,7 +125,7 @@ def to_lodtensor(data, place): def main(): # define network topology feature_out = db_lstm() - target = layers.data(name='target', shape=[1], data_type='int64') + target = layers.data(name='target', shape=[1], dtype='int64') crf_cost = layers.linear_chain_crf( input=feature_out, label=target, diff --git a/python/paddle/v2/fluid/tests/book/test_recognize_digits_conv.py b/python/paddle/v2/fluid/tests/book/test_recognize_digits_conv.py index 8f737689609fec4d1819ae58b9665298547a3716..0bea5f95c895b278db86f25f54e2795d3ec0af69 100644 --- a/python/paddle/v2/fluid/tests/book/test_recognize_digits_conv.py +++ b/python/paddle/v2/fluid/tests/book/test_recognize_digits_conv.py @@ -8,8 +8,8 @@ import paddle.v2.fluid.nets as nets from paddle.v2.fluid.executor import Executor from paddle.v2.fluid.optimizer import AdamOptimizer -images = layers.data(name='pixel', shape=[1, 28, 28], data_type='float32') -label = layers.data(name='label', shape=[1], data_type='int64') +images = layers.data(name='pixel', shape=[1, 28, 28], dtype='float32') +label = layers.data(name='label', shape=[1], dtype='int64') conv_pool_1 = nets.simple_img_conv_pool( input=images, filter_size=5, diff --git a/python/paddle/v2/fluid/tests/book/test_recognize_digits_mlp.py b/python/paddle/v2/fluid/tests/book/test_recognize_digits_mlp.py index e42e4c9cc0024e193b0732df6d9ca3200df5f0b9..f57a5c8d98cd8b89e1d300b4d1fe00d6b24b0d68 100644 --- a/python/paddle/v2/fluid/tests/book/test_recognize_digits_mlp.py +++ b/python/paddle/v2/fluid/tests/book/test_recognize_digits_mlp.py @@ -4,13 +4,14 @@ import paddle.v2.fluid.core as core import paddle.v2.fluid.framework as framework import paddle.v2.fluid.layers as layers import paddle.v2.fluid.evaluator as evaluator +from paddle.v2.fluid.io import get_inference_program from paddle.v2.fluid.executor import Executor from paddle.v2.fluid.initializer import UniformInitializer from paddle.v2.fluid.optimizer import MomentumOptimizer from paddle.v2.fluid.regularizer import L2DecayRegularizer BATCH_SIZE = 128 -image = layers.data(name='x', shape=[784], data_type='float32') +image = layers.data(name='x', shape=[784], dtype='float32') param_attr = { 'name': None, @@ -27,7 +28,7 @@ predict = layers.fc(input=hidden2, act='softmax', param_attr=param_attr) -label = layers.data(name='y', shape=[1], data_type='int64') +label = layers.data(name='y', shape=[1], dtype='int64') cost = layers.cross_entropy(input=predict, label=label) avg_cost = layers.mean(x=cost) @@ -42,6 +43,8 @@ train_reader = paddle.batch( paddle.dataset.mnist.train(), buf_size=8192), batch_size=BATCH_SIZE) +test_reader = paddle.batch(paddle.dataset.mnist.test(), batch_size=128) + place = core.CPUPlace() exe = Executor(place) @@ -69,8 +72,36 @@ for pass_id in range(PASS_NUM): acc = np.array(outs[1]) pass_acc = accuracy.eval(exe) - if pass_acc > 0.7: + test_accuracy, test_acc_out = evaluator.accuracy( + input=predict, label=label) + + test_target = [avg_cost, test_acc_out] + test_accuracy.states().values() + inference_program = get_inference_program(test_target) + + test_accuracy.reset(exe) + for data in test_reader(): + x_data = np.array(map(lambda x: x[0], data)).astype("float32") + y_data = np.array(map(lambda x: x[1], data)).astype("int64") + y_data = np.expand_dims(y_data, axis=1) + + tensor_x = core.LoDTensor() + tensor_x.set(x_data, place) + + tensor_y = core.LoDTensor() + tensor_y.set(y_data, place) + + outs = exe.run(inference_program, + feed={'x': tensor_x, + 'y': tensor_y}, + fetch_list=[avg_cost, test_acc_out]) + out = np.array(outs[0]) + acc = np.array(outs[1]) + + test_pass_acc = test_accuracy.eval(exe) + print("pass_id=" + str(pass_id) + " train_cost=" + str( + out) + " train_acc=" + str(acc) + " train_pass_acc=" + str(pass_acc) + + " test_acc=" + str(test_pass_acc)) + + if test_pass_acc > 0.7: exit(0) - # print("pass_id=" + str(pass_id) + " auc=" + - # str(acc) + " pass_acc=" + str(pass_acc)) exit(1) diff --git a/python/paddle/v2/fluid/tests/book/test_recommender_system.py b/python/paddle/v2/fluid/tests/book/test_recommender_system.py index 55ded3aed3a23c8cd7795f915dc1cbd512c6d945..f8dc1518579d5a9d7a8d0498dcc5fd8a6d1692c4 100644 --- a/python/paddle/v2/fluid/tests/book/test_recommender_system.py +++ b/python/paddle/v2/fluid/tests/book/test_recommender_system.py @@ -18,11 +18,11 @@ def get_usr_combined_features(): USR_DICT_SIZE = paddle.dataset.movielens.max_user_id() + 1 - uid = layers.data(name='user_id', shape=[1], data_type='int64') + uid = layers.data(name='user_id', shape=[1], dtype='int64') usr_emb = layers.embedding( input=uid, - data_type='float32', + dtype='float32', size=[USR_DICT_SIZE, 32], param_attr={'name': 'user_table'}, is_sparse=IS_SPARSE) @@ -31,7 +31,7 @@ def get_usr_combined_features(): USR_GENDER_DICT_SIZE = 2 - usr_gender_id = layers.data(name='gender_id', shape=[1], data_type='int64') + usr_gender_id = layers.data(name='gender_id', shape=[1], dtype='int64') usr_gender_emb = layers.embedding( input=usr_gender_id, @@ -42,7 +42,7 @@ def get_usr_combined_features(): usr_gender_fc = layers.fc(input=usr_gender_emb, size=16) USR_AGE_DICT_SIZE = len(paddle.dataset.movielens.age_table) - usr_age_id = layers.data(name='age_id', shape=[1], data_type="int64") + usr_age_id = layers.data(name='age_id', shape=[1], dtype="int64") usr_age_emb = layers.embedding( input=usr_age_id, @@ -53,7 +53,7 @@ def get_usr_combined_features(): usr_age_fc = layers.fc(input=usr_age_emb, size=16) USR_JOB_DICT_SIZE = paddle.dataset.movielens.max_job_id() + 1 - usr_job_id = layers.data(name='job_id', shape=[1], data_type="int64") + usr_job_id = layers.data(name='job_id', shape=[1], dtype="int64") usr_job_emb = layers.embedding( input=usr_job_id, @@ -75,11 +75,11 @@ def get_mov_combined_features(): MOV_DICT_SIZE = paddle.dataset.movielens.max_movie_id() + 1 - mov_id = layers.data(name='movie_id', shape=[1], data_type='int64') + mov_id = layers.data(name='movie_id', shape=[1], dtype='int64') mov_emb = layers.embedding( input=mov_id, - data_type='float32', + dtype='float32', size=[MOV_DICT_SIZE, 32], param_attr={'name': 'movie_table'}, is_sparse=IS_SPARSE) @@ -88,7 +88,7 @@ def get_mov_combined_features(): CATEGORY_DICT_SIZE = len(paddle.dataset.movielens.movie_categories()) - category_id = layers.data(name='category_id', shape=[1], data_type='int64') + category_id = layers.data(name='category_id', shape=[1], dtype='int64') mov_categories_emb = layers.embedding( input=category_id, size=[CATEGORY_DICT_SIZE, 32], is_sparse=IS_SPARSE) @@ -98,7 +98,7 @@ def get_mov_combined_features(): MOV_TITLE_DICT_SIZE = len(paddle.dataset.movielens.get_movie_title_dict()) - mov_title_id = layers.data(name='movie_title', shape=[1], data_type='int64') + mov_title_id = layers.data(name='movie_title', shape=[1], dtype='int64') mov_title_emb = layers.embedding( input=mov_title_id, size=[MOV_TITLE_DICT_SIZE, 32], is_sparse=IS_SPARSE) @@ -126,7 +126,7 @@ def model(): # need cos sim inference = layers.cos_sim(X=usr_combined_features, Y=mov_combined_features) - label = layers.data(name='score', shape=[1], data_type='float32') + label = layers.data(name='score', shape=[1], dtype='float32') square_cost = layers.square_error_cost(input=inference, label=label) diff --git a/python/paddle/v2/fluid/tests/book/test_understand_sentiment_conv.py b/python/paddle/v2/fluid/tests/book/test_understand_sentiment_conv.py index 4929f7cf615e61de5c4f61ef44c5340e9ac4492a..3103be83a63d64fcba87132ddc5d830b92047b27 100644 --- a/python/paddle/v2/fluid/tests/book/test_understand_sentiment_conv.py +++ b/python/paddle/v2/fluid/tests/book/test_understand_sentiment_conv.py @@ -10,8 +10,8 @@ from paddle.v2.fluid.optimizer import AdamOptimizer def convolution_net(input_dim, class_dim=2, emb_dim=32, hid_dim=32): - data = layers.data(name="words", shape=[1], data_type="int64") - label = layers.data(name="label", shape=[1], data_type="int64") + data = layers.data(name="words", shape=[1], dtype="int64") + label = layers.data(name="label", shape=[1], dtype="int64") emb = layers.embedding(input=data, size=[input_dim, emb_dim]) conv_3 = nets.sequence_conv_pool( diff --git a/python/paddle/v2/fluid/tests/book/test_understand_sentiment_dynamic_lstm.py b/python/paddle/v2/fluid/tests/book/test_understand_sentiment_dynamic_lstm.py index b3ee91938865afb929670a388a561b156aec1fe9..208978224f4e83a23efadae37fbe51d0d59dafe8 100644 --- a/python/paddle/v2/fluid/tests/book/test_understand_sentiment_dynamic_lstm.py +++ b/python/paddle/v2/fluid/tests/book/test_understand_sentiment_dynamic_lstm.py @@ -14,8 +14,8 @@ def stacked_lstm_net(input_dim, hid_dim=512, stacked_num=3): assert stacked_num % 2 == 1 - data = layers.data(name="words", shape=[1], data_type="int64") - label = layers.data(name="label", shape=[1], data_type="int64") + data = layers.data(name="words", shape=[1], dtype="int64") + label = layers.data(name="label", shape=[1], dtype="int64") emb = layers.embedding(input=data, size=[input_dim, emb_dim]) # add bias attr diff --git a/python/paddle/v2/fluid/tests/book/test_understand_sentiment_lstm.py b/python/paddle/v2/fluid/tests/book/test_understand_sentiment_lstm.py index 9a51a2f207ebed340b8e5c60e7ebeb82a611dbc5..8aebeba653cf49438929fa51312b5af33c3b438d 100644 --- a/python/paddle/v2/fluid/tests/book/test_understand_sentiment_lstm.py +++ b/python/paddle/v2/fluid/tests/book/test_understand_sentiment_lstm.py @@ -12,19 +12,19 @@ def lstm_net(dict_dim, class_dim=2, emb_dim=32, seq_len=80, batch_size=50): name="words", shape=[seq_len * batch_size, 1], append_batch_size=False, - data_type="int64") + dtype="int64") label = layers.data( name="label", shape=[batch_size, 1], append_batch_size=False, - data_type="int64") + dtype="int64") emb = layers.embedding(input=data, size=[dict_dim, emb_dim]) emb = layers.reshape(x=emb, shape=[batch_size, seq_len, emb_dim]) emb = layers.transpose(x=emb, axis=[1, 0, 2]) c_pre_init = layers.fill_constant( - dtype=emb.data_type, shape=[batch_size, emb_dim], value=0.0) + dtype=emb.dtype, shape=[batch_size, emb_dim], value=0.0) layer_1_out = layers.lstm(emb, c_pre_init=c_pre_init, hidden_dim=emb_dim) layer_1_out = layers.transpose(x=layer_1_out, axis=[1, 0, 2]) diff --git a/python/paddle/v2/fluid/tests/book/test_word2vec.py b/python/paddle/v2/fluid/tests/book/test_word2vec.py index afa7b285198e0349317e123e4bd98e8336217afa..0629e1cab7fd7e501d9cbf3ae8ee22fe9383ad2b 100644 --- a/python/paddle/v2/fluid/tests/book/test_word2vec.py +++ b/python/paddle/v2/fluid/tests/book/test_word2vec.py @@ -16,34 +16,34 @@ IS_SPARSE = True word_dict = paddle.dataset.imikolov.build_dict() dict_size = len(word_dict) -first_word = layers.data(name='firstw', shape=[1], data_type='int64') -second_word = layers.data(name='secondw', shape=[1], data_type='int64') -third_word = layers.data(name='thirdw', shape=[1], data_type='int64') -forth_word = layers.data(name='forthw', shape=[1], data_type='int64') -next_word = layers.data(name='nextw', shape=[1], data_type='int64') +first_word = layers.data(name='firstw', shape=[1], dtype='int64') +second_word = layers.data(name='secondw', shape=[1], dtype='int64') +third_word = layers.data(name='thirdw', shape=[1], dtype='int64') +forth_word = layers.data(name='forthw', shape=[1], dtype='int64') +next_word = layers.data(name='nextw', shape=[1], dtype='int64') embed_first = layers.embedding( input=first_word, size=[dict_size, EMBED_SIZE], - data_type='float32', + dtype='float32', is_sparse=IS_SPARSE, param_attr={'name': 'shared_w'}) embed_second = layers.embedding( input=second_word, size=[dict_size, EMBED_SIZE], - data_type='float32', + dtype='float32', is_sparse=IS_SPARSE, param_attr={'name': 'shared_w'}) embed_third = layers.embedding( input=third_word, size=[dict_size, EMBED_SIZE], - data_type='float32', + dtype='float32', is_sparse=IS_SPARSE, param_attr={'name': 'shared_w'}) embed_forth = layers.embedding( input=forth_word, size=[dict_size, EMBED_SIZE], - data_type='float32', + dtype='float32', is_sparse=IS_SPARSE, param_attr={'name': 'shared_w'}) diff --git a/python/paddle/v2/fluid/tests/op_test.py b/python/paddle/v2/fluid/tests/op_test.py index 90269e308a31d2606b23d741ce0d0fa91a0a6aeb..51023bd19a8326152335eabc9e96600427527f26 100644 --- a/python/paddle/v2/fluid/tests/op_test.py +++ b/python/paddle/v2/fluid/tests/op_test.py @@ -458,7 +458,7 @@ class OpTest(unittest.TestCase): mean_inputs = map(block.var, output_names) if len(mean_inputs) == 1: - loss = block.create_var(dtype=mean_inputs[0].data_type, shape=[1]) + loss = block.create_var(dtype=mean_inputs[0].dtype, shape=[1]) op = block.append_op( inputs={"X": mean_inputs}, outputs={"Out": loss}, type='mean') op.desc.infer_var_type(block.desc) @@ -466,8 +466,7 @@ class OpTest(unittest.TestCase): else: avg_sum = [] for cur_loss in mean_inputs: - cur_avg_loss = block.create_var( - dtype=cur_loss.data_type, shape=[1]) + cur_avg_loss = block.create_var(dtype=cur_loss.dtype, shape=[1]) op = block.append_op( inputs={"X": [cur_loss]}, outputs={"Out": [cur_avg_loss]}, @@ -476,13 +475,13 @@ class OpTest(unittest.TestCase): op.desc.infer_shape(block.desc) avg_sum.append(cur_avg_loss) - loss_sum = block.create_var(dtype=avg_sum[0].data_type, shape=[1]) + loss_sum = block.create_var(dtype=avg_sum[0].dtype, shape=[1]) op_sum = block.append_op( inputs={"X": avg_sum}, outputs={"Out": loss_sum}, type='sum') op_sum.desc.infer_var_type(block.desc) op_sum.desc.infer_shape(block.desc) - loss = block.create_var(dtype=loss_sum.data_type, shape=[1]) + loss = block.create_var(dtype=loss_sum.dtype, shape=[1]) op_loss = block.append_op( inputs={"X": loss_sum}, outputs={"Out": loss}, diff --git a/python/paddle/v2/fluid/tests/test_activation_op.py b/python/paddle/v2/fluid/tests/test_activation_op.py index 7649e60a3833e34523d87cb963af3888c3cef65d..bd52bef2605874d26e880fb09e589891fc1934d5 100644 --- a/python/paddle/v2/fluid/tests/test_activation_op.py +++ b/python/paddle/v2/fluid/tests/test_activation_op.py @@ -152,6 +152,49 @@ class TestAbs(OpTest): self.check_grad(['X'], 'Y', max_relative_error=0.007) +class TestCeil(OpTest): + def setUp(self): + self.op_type = "ceil" + x = np.random.uniform(-1, 1, [4, 4]).astype("float32") + self.inputs = {'X': x} + self.outputs = {'Y': np.ceil(self.inputs['X'])} + + def test_check_output(self): + self.check_output() + + def test_check_grad(self): + self.check_grad(['X'], 'Y', max_relative_error=0.007) + + +class TestFloor(OpTest): + def setUp(self): + self.op_type = "floor" + x = np.random.uniform(-1, 1, [4, 4]).astype("float32") + self.inputs = {'X': x} + # numpy floor need +1 + self.outputs = {'Y': np.floor(self.inputs['X']) + 1.0} + + def test_check_output(self): + self.check_output() + + def test_check_grad(self): + self.check_grad(['X'], 'Y', max_relative_error=0.007) + + +class TestRound(OpTest): + def setUp(self): + self.op_type = "round" + x = np.random.uniform(-1, 1, [4, 4]).astype("float32") + self.inputs = {'X': x} + self.outputs = {'Y': np.round(self.inputs['X'])} + + def test_check_output(self): + self.check_output() + + def test_check_grad(self): + self.check_grad(['X'], 'Y', max_relative_error=0.007) + + class TestRelu(OpTest): def setUp(self): self.op_type = "relu" diff --git a/python/paddle/v2/fluid/tests/test_beam_search_decode_op.py b/python/paddle/v2/fluid/tests/test_beam_search_decode_op.py index 8a11820d2aba2dd4d17d925f0e0fe9f324100418..5fad7d8cce5af3677aa77dc0abb64f1ecd380419 100644 --- a/python/paddle/v2/fluid/tests/test_beam_search_decode_op.py +++ b/python/paddle/v2/fluid/tests/test_beam_search_decode_op.py @@ -35,15 +35,15 @@ class TestBeamSearchDecodeOp(unittest.TestCase): self.append_lod_tensor( scores, [[0, 3, 6], [0, 1, 2, 3, 4, 5, 6]], np.array( - [1, 2, 3, 4, 5, 6], dtype="float32")) + [1, 2, 3, 4, 5, 6], dtype="float64")) self.append_lod_tensor( scores, [[0, 3, 6], [0, 1, 1, 3, 5, 5, 6]], np.array( - [0, 1, 2, 3, 4, 5], dtype="float32")) + [0, 1, 2, 3, 4, 5], dtype="float64")) self.append_lod_tensor( scores, [[0, 3, 6], [0, 0, 1, 2, 3, 4, 5]], np.array( - [0, 1, 2, 3, 4], dtype="float32")) + [0, 1, 2, 3, 4], dtype="float64")) sentence_ids = self.scope.var("sentence_ids").get_tensor() sentence_scores = self.scope.var("sentence_scores").get_tensor() diff --git a/python/paddle/v2/fluid/tests/test_cast_op.py b/python/paddle/v2/fluid/tests/test_cast_op.py index 0c4b6310652e84d3dd7f281a8b98ae0435072afb..4e431bb88da6070718d64a68467be20ca87f8fb9 100644 --- a/python/paddle/v2/fluid/tests/test_cast_op.py +++ b/python/paddle/v2/fluid/tests/test_cast_op.py @@ -10,8 +10,8 @@ class TestCastOp(op_test.OpTest): self.inputs = {'X': ipt.astype('float32')} self.outputs = {'Out': ipt.astype('float64')} self.attrs = { - 'in_data_type': int(core.DataType.FP32), - 'out_data_type': int(core.DataType.FP64) + 'in_dtype': int(core.DataType.FP32), + 'out_dtype': int(core.DataType.FP64) } self.op_type = 'cast' diff --git a/python/paddle/v2/fluid/tests/test_conditional_block.py b/python/paddle/v2/fluid/tests/test_conditional_block.py index 293803f004a1513611fba30634d5552e1da84fef..2a30fd107968ce0fa188bda44e731ad760dce1f5 100644 --- a/python/paddle/v2/fluid/tests/test_conditional_block.py +++ b/python/paddle/v2/fluid/tests/test_conditional_block.py @@ -9,7 +9,7 @@ import numpy class ConditionalBlock(unittest.TestCase): def test_forward(self): - data = layers.data(name='X', shape=[1], data_type='float32') + data = layers.data(name='X', shape=[1], dtype='float32') data.stop_gradient = False cond = layers.ConditionalBlock(inputs=[data]) out = layers.create_tensor(dtype='float32') diff --git a/python/paddle/v2/fluid/tests/test_dropout_op.py b/python/paddle/v2/fluid/tests/test_dropout_op.py index b14a366fcad7f4bf6968b6013c6cfbb57090071d..4f5ea836b44102e5599a2302efd669291ebe920b 100644 --- a/python/paddle/v2/fluid/tests/test_dropout_op.py +++ b/python/paddle/v2/fluid/tests/test_dropout_op.py @@ -7,7 +7,7 @@ class TestDropoutOp(OpTest): def setUp(self): self.op_type = "dropout" self.inputs = {'X': np.random.random((32, 64)).astype("float32")} - self.attrs = {'dropout_prob': 0.0, 'is_training': True} + self.attrs = {'dropout_prob': 0.0, 'is_test': False} self.outputs = { 'Out': self.inputs['X'], 'Mask': np.ones((32, 64)).astype('float32') @@ -24,7 +24,7 @@ class TestDropoutOp2(TestDropoutOp): def setUp(self): self.op_type = "dropout" self.inputs = {'X': np.random.random((32, 64)).astype("float32")} - self.attrs = {'dropout_prob': 1.0, 'is_training': True} + self.attrs = {'dropout_prob': 1.0, 'is_test': False} self.outputs = { 'Out': np.zeros((32, 64)).astype('float32'), 'Mask': np.zeros((32, 64)).astype('float32') @@ -35,7 +35,7 @@ class TestDropoutOp3(TestDropoutOp): def setUp(self): self.op_type = "dropout" self.inputs = {'X': np.random.random((32, 64, 2)).astype("float32")} - self.attrs = {'dropout_prob': 0.0, 'is_training': True} + self.attrs = {'dropout_prob': 0.0, 'is_test': False} self.outputs = { 'Out': self.inputs['X'], 'Mask': np.ones((32, 64, 2)).astype('float32') @@ -46,7 +46,7 @@ class TestDropoutOp4(OpTest): def setUp(self): self.op_type = "dropout" self.inputs = {'X': np.random.random((32, 64)).astype("float32")} - self.attrs = {'dropout_prob': 0.35, 'is_training': False} + self.attrs = {'dropout_prob': 0.35, 'is_test': True} self.outputs = {'Out': self.inputs['X'] * self.attrs['dropout_prob']} def test_check_output(self): @@ -57,7 +57,7 @@ class TestDropoutOp5(OpTest): def setUp(self): self.op_type = "dropout" self.inputs = {'X': np.random.random((32, 64, 3)).astype("float32")} - self.attrs = {'dropout_prob': 0.75, 'is_training': False} + self.attrs = {'dropout_prob': 0.75, 'is_test': True} self.outputs = {'Out': self.inputs['X'] * self.attrs['dropout_prob']} def test_check_output(self): diff --git a/python/paddle/v2/fluid/tests/test_dynamic_recurrent_op.py b/python/paddle/v2/fluid/tests/test_dynamic_recurrent_op.py deleted file mode 100644 index c2d8b48ea944ae40a451492b8e9fad38dda0835c..0000000000000000000000000000000000000000 --- a/python/paddle/v2/fluid/tests/test_dynamic_recurrent_op.py +++ /dev/null @@ -1,171 +0,0 @@ -import logging -import paddle.v2.fluid.core as core -import unittest -from paddle.v2.fluid.op import Operator, DynamicRecurrentOp -import numpy as np - -# for siplicity, just one level LoD -lod_py = [[0, 4, 7, 9, 10]] -input_dim = 30 -num_sents = len(lod_py[0]) - 1 -weight_dim = 15 - - -def create_tensor(scope, name, shape, np_data): - tensor = scope.var(name).get_tensor() - tensor.set_dims(shape) - tensor.set(np_data, core.CPUPlace()) - return tensor - - -class PyRNNStep(object): - def __init__(self): - - self.x = np.random.normal(size=(lod_py[0][-1], - input_dim)).astype("float32") - self.W = np.random.normal(size=(input_dim, input_dim)).astype("float32") - self.U = np.random.normal(size=(input_dim, input_dim)).astype("float32") - self.h_boot = np.random.normal(size=(num_sents, - input_dim)).astype("float32") - - -class DynamicRecurrentOpTest(unittest.TestCase): - ''' - Test RNNOp - - equation: - h_t = \sigma (W x_t + U h_{t-1}) - weights: - - W - - U - vars: - - x - states: - - h - outputs: - - h - ''' - - py = PyRNNStep() - - def forward(self): - self.scope = core.Scope() - self.create_global_variables() - self.create_rnn_op() - self.create_step_net() - ctx = core.DeviceContext.create(core.CPUPlace()) - self.rnnop.run(self.scope, ctx) - state = self.rnnop.get_state("h@state") - print 'state size: ', state.size() - - step_inputs = self.rnnop.get_step_input("x") - print "x size ", step_inputs.size() - for i in range(step_inputs.size()): - print "x %d" % i, np.array(step_inputs.read(i).get_dims()) - step_outputs = self.rnnop.get_step_output('h@state') - print 'step_outputs.size ', step_outputs.size() - output = self.scope.find_var("h@state").get_tensor() - print 'output', np.array(output).shape - - def create_global_variables(self): - # create inlink - x_tensor = create_tensor(self.scope, "x", [num_sents, input_dim], - self.py.x) - x_tensor.set_lod(lod_py) - create_tensor(self.scope, "W", [input_dim, input_dim], self.py.W) - create_tensor(self.scope, "U", [input_dim, input_dim], self.py.U) - create_tensor(self.scope, "h_boot", [num_sents, input_dim], - self.py.h_boot) - self.scope.var("step_scopes") - self.scope.var("h@state") - - def create_rnn_op(self): - # create RNNOp - self.rnnop = DynamicRecurrentOp( - # inputs - inputs=["x"], - initial_states=["h_boot"], - step_net="step_unit", - # outputs - outputs=["h@state"], - step_scopes="step_scopes", - # attributes - ex_states=["h@pre"], - states=["h@state"]) - - def create_step_net(self): - step_unit = core.Net.create() - x_fc_op = Operator("mul", X="x", Y="W", Out="Wx") - h_fc_op = Operator("mul", X="h@pre", Y="U", Out="Uh") - sum_op = Operator("sum", X=["Wx", "Uh"], Out="sum") - sig_op = Operator("sigmoid", X="sum", Y="h@state") - - for op in [x_fc_op, h_fc_op, sum_op, sig_op]: - step_unit.append_op(op) - step_unit.complete_add_op(True) - self.rnnop.set_step_unit(step_unit) - - def test_forward(self): - print 'test recurrent op forward' - pd_output = self.forward() - print 'pd_output', pd_output - - -class RecurrentGradientOpTest(unittest.TestCase): - py = PyRNNStep() - - def create_forward_op(self): - # create RNNOp - self.forward_op = DynamicRecurrentOp( - # inputs - inputs=["x"], - initial_states=["h_boot"], - step_net="step_unit", - # outputs - outputs=["h@state"], - step_scopes="step_scopes", - # attributes - ex_states=["h@pre"], - states=["h@state"]) - - def create_gradient_op(self): - a = set() - backward_op = core.DynamicRecurrentOp.backward(self.forward_op, a) - - def create_step_net(self): - step_unit = core.Net.create() - x_fc_op = Operator("mul", X="x", Y="W", Out="Wx") - h_fc_op = Operator("mul", X="h@pre", Y="U", Out="Uh") - sum_op = Operator("sum", X=["Wx", "Uh"], Out="sum") - sig_op = Operator("sigmoid", X="sum", Y="h@state") - - for op in [x_fc_op, h_fc_op, sum_op, sig_op]: - step_unit.append_op(op) - step_unit.complete_add_op(True) - self.forward_op.set_step_unit(step_unit) - - def create_global_variables(self): - # create inlink - x_tensor = create_tensor(self.scope, "x", [num_sents, input_dim], - self.py.x) - x_tensor.set_lod(lod_py) - create_tensor(self.scope, "W", [input_dim, input_dim], self.py.W) - create_tensor(self.scope, "U", [input_dim, input_dim], self.py.U) - create_tensor(self.scope, "h_boot", [num_sents, input_dim], - self.py.h_boot) - self.scope.var("step_scopes") - self.scope.var("h@state") - - def test_grad(self): - self.scope = core.Scope() - self.create_forward_op() - self.create_global_variables() - self.create_step_net() - self.create_gradient_op() - - -if __name__ == '__main__': - exit( - 0 - ) # FIXME(qijun): https://github.com/PaddlePaddle/Paddle/issues/5101#issuecomment-339814957 - unittest.main() diff --git a/python/paddle/v2/fluid/tests/test_executor_and_mul.py b/python/paddle/v2/fluid/tests/test_executor_and_mul.py index 709250d0c86dde84ac22c37d8e2385ca4a80a40a..da64739de5eb4eca8db8ac8370276c41692a7242 100644 --- a/python/paddle/v2/fluid/tests/test_executor_and_mul.py +++ b/python/paddle/v2/fluid/tests/test_executor_and_mul.py @@ -8,11 +8,11 @@ import numpy class TestExecutor(unittest.TestCase): def test_mul(self): - a = data(name='a', shape=[784], data_type='float32') + a = data(name='a', shape=[784], dtype='float32') b = data( name='b', shape=[784, 100], - data_type='float32', + dtype='float32', append_batch_size=False) out = mul(x=a, y=b) place = core.CPUPlace() diff --git a/python/paddle/v2/fluid/tests/test_image_classification_layer.py b/python/paddle/v2/fluid/tests/test_image_classification_layer.py index bf5444107fa1609e67b09823b82e5fb92234b0a4..8e8e1b0a8c07a60cb1404462f976d10fe26e87f6 100644 --- a/python/paddle/v2/fluid/tests/test_image_classification_layer.py +++ b/python/paddle/v2/fluid/tests/test_image_classification_layer.py @@ -32,7 +32,7 @@ class TestLayer(unittest.TestCase): images = layers.data( name='pixel', shape=[3, 48, 48], - data_type='float32', + dtype='float32', main_program=main_program) layers.batch_norm( input=images, @@ -47,7 +47,7 @@ class TestLayer(unittest.TestCase): images = layers.data( name='pixel', shape=[3, 48, 48], - data_type='float32', + dtype='float32', main_program=main_program) layers.dropout( x=images, @@ -64,7 +64,7 @@ class TestLayer(unittest.TestCase): images = layers.data( name='pixel', shape=[3, 48, 48], - data_type='float32', + dtype='float32', main_program=main_program, startup_program=startup_program) conv1 = conv_block(images, 64, 2, [0.3, 0], main_program, @@ -80,13 +80,13 @@ class TestLayer(unittest.TestCase): image1 = layers.data( name='pixel1', shape=[3, 48, 48], - data_type='float32', + dtype='float32', main_program=main_program, startup_program=startup_program) image2 = layers.data( name='pixel2', shape=[3, 48, 48], - data_type='float32', + dtype='float32', main_program=main_program, startup_program=startup_program) out = layers.elementwise_add( diff --git a/python/paddle/v2/fluid/tests/test_inference_model_io.py b/python/paddle/v2/fluid/tests/test_inference_model_io.py index 98b95713b73e8eba93bd6a58eaaed603cfae7952..74f1ce23262bbc969f9544885a7390534c76cdf6 100644 --- a/python/paddle/v2/fluid/tests/test_inference_model_io.py +++ b/python/paddle/v2/fluid/tests/test_inference_model_io.py @@ -19,13 +19,13 @@ class TestBook(unittest.TestCase): x = layers.data( name='x', shape=[2], - data_type='float32', + dtype='float32', main_program=program, startup_program=init_program) y = layers.data( name='y', shape=[1], - data_type='float32', + dtype='float32', main_program=program, startup_program=init_program) diff --git a/python/paddle/v2/fluid/tests/test_layers.py b/python/paddle/v2/fluid/tests/test_layers.py index d3dc45742d92dc61b81d9cdc04056c5d5bdc2b63..87dc6d1a6270e0f8425b56601d04049450c73380 100644 --- a/python/paddle/v2/fluid/tests/test_layers.py +++ b/python/paddle/v2/fluid/tests/test_layers.py @@ -9,11 +9,11 @@ class TestBook(unittest.TestCase): def test_fit_a_line(self): program = Program() x = layers.data( - name='x', shape=[13], data_type='float32', main_program=program) + name='x', shape=[13], dtype='float32', main_program=program) y_predict = layers.fc(input=x, size=1, act=None, main_program=program) y = layers.data( - name='y', shape=[1], data_type='float32', main_program=program) + name='y', shape=[1], dtype='float32', main_program=program) cost = layers.square_error_cost( input=y_predict, label=y, main_program=program) @@ -21,19 +21,16 @@ class TestBook(unittest.TestCase): self.assertIsNotNone(avg_cost) program.append_backward(avg_cost) - # print str(program) + print str(program) def test_recognize_digits_mlp(self): program = Program() # Change g_program, so the rest layers use `g_program` images = layers.data( - name='pixel', - shape=[784], - data_type='float32', - main_program=program) + name='pixel', shape=[784], dtype='float32', main_program=program) label = layers.data( - name='label', shape=[1], data_type='int32', main_program=program) + name='label', shape=[1], dtype='int32', main_program=program) hidden1 = layers.fc(input=images, size=128, act='relu', @@ -50,14 +47,15 @@ class TestBook(unittest.TestCase): input=predict, label=label, main_program=program) avg_cost = layers.mean(x=cost, main_program=program) self.assertIsNotNone(avg_cost) - # print str(program) + + print str(program) def test_simple_conv2d(self): program = Program() images = layers.data( name='pixel', shape=[3, 48, 48], - data_type='int32', + dtype='int32', main_program=program) layers.conv2d( input=images, @@ -65,7 +63,7 @@ class TestBook(unittest.TestCase): filter_size=[4, 4], main_program=program) - # print str(program) + print str(program) def test_recognize_digits_conv(self): program = Program() @@ -73,10 +71,10 @@ class TestBook(unittest.TestCase): images = layers.data( name='pixel', shape=[1, 28, 28], - data_type='float32', + dtype='float32', main_program=program) label = layers.data( - name='label', shape=[1], data_type='int32', main_program=program) + name='label', shape=[1], dtype='int32', main_program=program) conv_pool_1 = nets.simple_img_conv_pool( input=images, filter_size=5, @@ -104,46 +102,46 @@ class TestBook(unittest.TestCase): program.append_backward(avg_cost) - # print str(program) + print str(program) def test_word_embedding(self): program = Program() dict_size = 10000 embed_size = 32 first_word = layers.data( - name='firstw', shape=[1], data_type='int64', main_program=program) + name='firstw', shape=[1], dtype='int64', main_program=program) second_word = layers.data( - name='secondw', shape=[1], data_type='int64', main_program=program) + name='secondw', shape=[1], dtype='int64', main_program=program) third_word = layers.data( - name='thirdw', shape=[1], data_type='int64', main_program=program) + name='thirdw', shape=[1], dtype='int64', main_program=program) forth_word = layers.data( - name='forthw', shape=[1], data_type='int64', main_program=program) + name='forthw', shape=[1], dtype='int64', main_program=program) next_word = layers.data( - name='nextw', shape=[1], data_type='int64', main_program=program) + name='nextw', shape=[1], dtype='int64', main_program=program) embed_first = layers.embedding( input=first_word, size=[dict_size, embed_size], - data_type='float32', + dtype='float32', param_attr={'name': 'shared_w'}, main_program=program) embed_second = layers.embedding( input=second_word, size=[dict_size, embed_size], - data_type='float32', + dtype='float32', param_attr={'name': 'shared_w'}, main_program=program) embed_third = layers.embedding( input=third_word, size=[dict_size, embed_size], - data_type='float32', + dtype='float32', param_attr={'name': 'shared_w'}, main_program=program) embed_forth = layers.embedding( input=forth_word, size=[dict_size, embed_size], - data_type='float32', + dtype='float32', param_attr={'name': 'shared_w'}, main_program=program) @@ -165,24 +163,21 @@ class TestBook(unittest.TestCase): avg_cost = layers.mean(x=cost, main_program=program) self.assertIsNotNone(avg_cost) - # print str(program) + print str(program) def test_linear_chain_crf(self): program = Program() # Change g_program, so the rest layers use `g_program` images = layers.data( - name='pixel', - shape=[784], - data_type='float32', - main_program=program) + name='pixel', shape=[784], dtype='float32', main_program=program) label = layers.data( - name='label', shape=[1], data_type='int32', main_program=program) + name='label', shape=[1], dtype='int32', main_program=program) hidden = layers.fc(input=images, size=128, main_program=program) crf = layers.linear_chain_crf( input=hidden, label=label, main_program=program) - # print str(program) + print str(program) if __name__ == '__main__': diff --git a/python/paddle/v2/fluid/tests/test_lod_tensor_array_ops.py b/python/paddle/v2/fluid/tests/test_lod_tensor_array_ops.py index b18cb6b49fa41f26e1b6de1128690507c5a2f099..16e64b8cd52d72a3bbc84e43d772b843dad0129a 100644 --- a/python/paddle/v2/fluid/tests/test_lod_tensor_array_ops.py +++ b/python/paddle/v2/fluid/tests/test_lod_tensor_array_ops.py @@ -132,7 +132,7 @@ class TestCPULoDTensorArrayOpGrad(unittest.TestCase): x = layers.data( name='x', shape=[1], - data_type='float32', + dtype='float32', main_program=program, stop_gradient=False) table = layers.lod_rank_table(x, level=0, main_program=program) diff --git a/python/paddle/v2/fluid/tests/test_mnist_if_else_op.py b/python/paddle/v2/fluid/tests/test_mnist_if_else_op.py index 8af99005dc0b5d50de60ca89c2ddf870b1537edb..e76357a5be07d79eafee4c3a27911efe8a3eaef4 100644 --- a/python/paddle/v2/fluid/tests/test_mnist_if_else_op.py +++ b/python/paddle/v2/fluid/tests/test_mnist_if_else_op.py @@ -11,10 +11,9 @@ import numpy as np class TestMNISTIfElseOp(unittest.TestCase): def test_raw_api(self): kwargs = {'startup_program': Program(), 'main_program': Program()} - image = layers.data( - name='x', shape=[784], data_type='float32', **kwargs) + image = layers.data(name='x', shape=[784], dtype='float32', **kwargs) - label = layers.data(name='y', shape=[1], data_type='int64', **kwargs) + label = layers.data(name='y', shape=[1], dtype='int64', **kwargs) limit = layers.fill_constant_batch_size_like( input=label, dtype='int64', shape=[1], value=5.0, **kwargs) @@ -84,10 +83,9 @@ class TestMNISTIfElseOp(unittest.TestCase): def test_ifelse(self): kwargs = {'startup_program': Program(), 'main_program': Program()} - image = layers.data( - name='x', shape=[784], data_type='float32', **kwargs) + image = layers.data(name='x', shape=[784], dtype='float32', **kwargs) - label = layers.data(name='y', shape=[1], data_type='int64', **kwargs) + label = layers.data(name='y', shape=[1], dtype='int64', **kwargs) limit = layers.fill_constant_batch_size_like( input=label, dtype='int64', shape=[1], value=5.0, **kwargs) diff --git a/python/paddle/v2/fluid/tests/test_nccl_init_op.py b/python/paddle/v2/fluid/tests/test_nccl_init_op.py deleted file mode 100644 index a536800ccd81fdc2f3b7c8320cede4f8ecf3a8cb..0000000000000000000000000000000000000000 --- a/python/paddle/v2/fluid/tests/test_nccl_init_op.py +++ /dev/null @@ -1,39 +0,0 @@ -import unittest, os -import numpy as np -import paddle.v2 as paddle -from paddle.v2.fluid.op import Operator -import paddle.v2.fluid.core as core -from op_test import OpTest, create_op, set_input - -if not core.is_compile_gpu(): - exit(0) - -gpu_count = core.get_cuda_device_count() - -if gpu_count <= 1: - exit(0) - -g_scope = core.Scope() -g_ctx = core.DeviceContext.create(core.CPUPlace()) - - -class TestNCCLInit(unittest.TestCase): - def test_init(self): - self.op_type = "ncclInit" - self.gpus = range(gpu_count) - - self.inputs = {} - self.attrs = {"gpus": self.gpus} - g_scope.var("Communicator").get_communicator() - self.outputs = {"Communicator": g_scope.find_var("Communicator")} - nccl_init = create_op( - g_scope, - op_type=self.op_type, - inputs=self.inputs, - outputs=self.outputs, - attrs=self.attrs) - nccl_init.run(g_scope, g_ctx) - - -if __name__ == "__main__": - unittest.main() diff --git a/python/paddle/v2/fluid/tests/test_parameter.py b/python/paddle/v2/fluid/tests/test_parameter.py index a633d22c2b1db2728b6eb767078ce4aec6cce163..d467e4bbb79b291c442c643158ef6c0d630920dd 100644 --- a/python/paddle/v2/fluid/tests/test_parameter.py +++ b/python/paddle/v2/fluid/tests/test_parameter.py @@ -20,7 +20,7 @@ class TestParameter(unittest.TestCase): self.assertIsNotNone(param) self.assertEqual('fc.w', param.name) self.assertEqual((784, 100), param.shape) - self.assertEqual(core.DataType.FP32, param.data_type) + self.assertEqual(core.DataType.FP32, param.dtype) self.assertEqual(0, param.block.idx) exe = Executor(core.CPUPlace()) p = exe.run(g_main_program, fetch_list=[param])[0] diff --git a/python/paddle/v2/fluid/tests/test_protobuf_descs.py b/python/paddle/v2/fluid/tests/test_protobuf_descs.py index 098a9802dfc6763ce2a2356b7267a439145b7939..d8abe17606c4ddb2ff51d5f918b1e5d7e110f7fa 100644 --- a/python/paddle/v2/fluid/tests/test_protobuf_descs.py +++ b/python/paddle/v2/fluid/tests/test_protobuf_descs.py @@ -101,13 +101,13 @@ class TestVarDesc(unittest.TestCase): self.assertEqual(src_shape, res_shape) self.assertEqual(core.VarDesc.VarType.SELECTED_ROWS, var.type()) - def test_data_type(self): + def test_dtype(self): program_desc = core.ProgramDesc() block = program_desc.block(0) var = block.var('my_var') var.set_type(core.VarDesc.VarType.LOD_TENSOR) - var.set_data_type(core.DataType.INT32) - self.assertEqual(core.DataType.INT32, var.data_type()) + var.set_dtype(core.DataType.INT32) + self.assertEqual(core.DataType.INT32, var.dtype()) self.assertEqual(core.VarDesc.VarType.LOD_TENSOR, var.type()) diff --git a/python/paddle/v2/fluid/tests/test_recurrent_op.py b/python/paddle/v2/fluid/tests/test_recurrent_op.py index b623d1231838faff9e91c9234befb1f647fe8ec2..88bcdc3e6a21881ace2be53c22a62d78df30a974 100644 --- a/python/paddle/v2/fluid/tests/test_recurrent_op.py +++ b/python/paddle/v2/fluid/tests/test_recurrent_op.py @@ -118,14 +118,14 @@ class RecurrentOpTest1(unittest.TestCase): def create_rnn_op(self): x = layers.data( shape=[self.sent_len, self.batch_size, self.input_dim], - data_type='float32', + dtype='float32', name='x', append_batch_size=False, **self.p_info) x.stop_gradient = False h_boot = layers.data( shape=[self.input_dim], - data_type='float32', + dtype='float32', name='h_boot', **self.p_info) h_boot.stop_gradient = False @@ -251,14 +251,14 @@ class RecurrentOpTest2(RecurrentOpTest1): def create_rnn_op(self): x = layers.data( shape=[self.sent_len, self.batch_size, self.input_dim], - data_type='float32', + dtype='float32', name='x', append_batch_size=False, **self.p_info) x.stop_gradient = False h_boot = layers.data( shape=[self.input_dim], - data_type='float32', + dtype='float32', name='h_boot', **self.p_info) h_boot.stop_gradient = False @@ -350,21 +350,21 @@ class RecurrentOpMultipleMemoryTest(RecurrentOpTest1): def create_rnn_op(self): x = layers.data( shape=[self.sent_len, self.batch_size, self.input_dim], - data_type='float32', + dtype='float32', name='x', append_batch_size=False, **self.p_info) x.stop_gradient = False h_boot1 = layers.data( shape=[self.batch_size, self.input_dim], - data_type='float32', + dtype='float32', name='h_boot1', append_batch_size=False, **self.p_info) h_boot1.stop_gradient = False h_boot2 = layers.data( shape=[self.batch_size, self.input_dim], - data_type='float32', + dtype='float32', name='h_boot2', append_batch_size=False, **self.p_info) @@ -435,7 +435,7 @@ class RecurrentOpNoMemBootTest(RecurrentOpTest1): def create_rnn_op(self): x = layers.data( shape=[self.sent_len, self.batch_size, self.input_dim], - data_type='float32', + dtype='float32', name='x', append_batch_size=False, **self.p_info) diff --git a/python/paddle/v2/fluid/tests/test_roi_pool_op.py b/python/paddle/v2/fluid/tests/test_roi_pool_op.py new file mode 100644 index 0000000000000000000000000000000000000000..7cedb930ca861aed95c355931d80cb4d265c8235 --- /dev/null +++ b/python/paddle/v2/fluid/tests/test_roi_pool_op.py @@ -0,0 +1,127 @@ +import unittest +import numpy as np +import math +import sys +from op_test import OpTest + +class TestROIPoolOp(OpTest): + def set_data(self): + self.init_test_case() + self.make_rois() + self.calc_roi_pool() + + self.inputs = { + 'X': self.x, + 'ROIs': self.rois} + + self.attrs = { + 'spatial_scale': self.spatial_scale, + 'pooled_height': self.pooled_height, + 'pooled_width': self.pooled_width} + + self.outputs = { + 'Out': self.outs, + 'Argmax': self.argmaxes} + + def init_test_case(self): + self.batch_size = 5 + self.channels = 3 + self.height = 6 + self.width = 4 + + # n, c, h, w + self.x_dim = (self.batch_size, self.channels, + self.height, self.width) + + self.spatial_scale = 1.0/4.0 + self.pooled_height = 2 + self.pooled_width = 2 + self.rois_num = 2 + + self.x = np.random.random(self.x_dim).astype('float32') + + def calc_roi_pool(self): + out_data = np.zeros( + (self.rois_num, self.channels, + self.pooled_height, self.pooled_width)) + argmax_data = np.zeros( + (self.rois_num, self.channels, + self.pooled_height, self.pooled_width)) + + for i in range(self.rois_num): + roi = self.rois[i] + roi_batch_id = roi[0] + roi_start_w = int(round(roi[1] * self.spatial_scale)) + roi_start_h = int(round(roi[2] * self.spatial_scale)) + roi_end_w = int(round(roi[3] * self.spatial_scale)) + roi_end_h = int(round(roi[4] * self.spatial_scale)) + + roi_height = int(max(roi_end_h - roi_start_h + 1, 1)); + roi_width = int(max(roi_end_w - roi_start_w + 1, 1)); + + x_i = self.x[roi_batch_id] + + bin_size_h = float(roi_height) / float(self.pooled_height) + bin_size_w = float(roi_width) / float(self.pooled_width) + + for c in range(self.channels): + for ph in range(self.pooled_height): + for pw in range(self.pooled_width): + hstart = int(math.floor(ph * bin_size_h)) + wstart = int(math.floor(pw * bin_size_w)) + hend = int(math.ceil((ph + 1) * bin_size_h)) + wend = int(math.ceil((pw + 1) * bin_size_w)) + + hstart = min(max(hstart + roi_start_h, 0), self.height) + hend = min(max(hend + roi_start_h, 0), self.height) + wstart = min(max(wstart + roi_start_w, 0), self.width) + wend = min(max(wend + roi_start_w, 0), self.width) + + is_empty = (hend <= hstart) or (wend <= wstart) + if is_empty: + out_data[i, c, ph, pw] = 0 + else: + out_data[i, c, ph, pw] = -sys.float_info.max + + argmax_data[i, c, ph, pw] = -1 + + for h in range(hstart, hend): + for w in range(wstart, wend): + if x_i[c, h, w] > out_data[i, c, ph, pw]: + out_data[i, c, ph, pw] = x_i[c, h, w] + argmax_data[i, c, ph, pw] = h * \ + self.width + w + + self.outs = out_data.astype('float32') + self.argmaxes = argmax_data.astype('int64') + + def make_rois(self): + rois = [] + batch_ids = np.random.randint(0, self.batch_size, size=self.rois_num) + for i in range(self.rois_num): + x1 = np.random.random_integers( + 0, self.width / self.spatial_scale - self.pooled_width) + y1 = np.random.random_integers( + 0, self.height / self.spatial_scale - self.pooled_height) + + x2 = np.random.random_integers( + x1 + self.pooled_width, self.width / self.spatial_scale) + y2 = np.random.random_integers( + y1 + self.pooled_height, self.height / self.spatial_scale) + + roi = [batch_ids[i], x1, y1, x2, y2] + rois.append(roi) + self.rois = np.array(rois).astype("int64") + + def setUp(self): + self.op_type = "roi_pool" + self.set_data() + + def test_check_output(self): + self.check_output() + + def test_check_grad(self): + self.check_grad(['X'], 'Out') + +if __name__ == '__main__': + unittest.main() diff --git a/python/paddle/v2/fluid/tests/test_shrink_rnn_memory.py b/python/paddle/v2/fluid/tests/test_shrink_rnn_memory.py index 1a3b88e18e38b88d75ad17a0bb6a2965d1e60406..953629d610e183cdddf97081f94a77951fe979d8 100644 --- a/python/paddle/v2/fluid/tests/test_shrink_rnn_memory.py +++ b/python/paddle/v2/fluid/tests/test_shrink_rnn_memory.py @@ -9,7 +9,7 @@ import numpy class TestShrinkRNNMemory(unittest.TestCase): def test_shrink_rnn_memory(self): - x = layers.data('x', shape=[100], data_type='float32') + x = layers.data('x', shape=[100], dtype='float32') x.stop_gradient = False table = layers.lod_rank_table(x=x) i = layers.zeros(dtype='int64', shape=[1]) diff --git a/python/paddle/v2/fluid/tests/test_split_and_merge_lod_tensor_op.py b/python/paddle/v2/fluid/tests/test_split_and_merge_lod_tensor_op.py index 3aed83b2ea3418c54f9540279ae6e2e0045421fa..a98cb3bbab8442886206b59a2b591fee96deeb9f 100644 --- a/python/paddle/v2/fluid/tests/test_split_and_merge_lod_tensor_op.py +++ b/python/paddle/v2/fluid/tests/test_split_and_merge_lod_tensor_op.py @@ -123,13 +123,13 @@ class TestCPUSplitMergeLoDTensorGrad(unittest.TestCase): x = layers.data( name='x', shape=[1], - data_type='float32', + dtype='float32', main_program=program, stop_gradient=False) y = layers.data( name='y', shape=[1], - data_type='bool', + dtype='bool', main_program=program, stop_gradient=False) diff --git a/python/paddle/v2/fluid/tests/test_tensor_array.py b/python/paddle/v2/fluid/tests/test_tensor_array.py deleted file mode 100644 index d6929ba16e4dae0c57adcceb4f0e78c094eee55c..0000000000000000000000000000000000000000 --- a/python/paddle/v2/fluid/tests/test_tensor_array.py +++ /dev/null @@ -1,106 +0,0 @@ -import logging -import paddle.v2.fluid.core as core -import unittest -import numpy as np - - -class TestTensorArray(unittest.TestCase): - def setUp(self): - self.ta = core.TensorArray() - - self.batch_size = 10 - self.dim = 2 - - # create a LoDTensor - self.scope = core.Scope() - var = self.scope.var("test_tensor") - self.place = core.CPUPlace() - tensor = var.get_tensor() - tensor.set_dims([self.batch_size, self.dim]) - tensor.alloc_float(self.place) - tensor_array = np.array(tensor) - tensor_array[0, 0] = 0 - tensor_array[1, 0] = 1 - tensor_array[2, 0] = 2 - tensor_array[3, 0] = 3 - tensor_array[4, 0] = 4 - tensor_array[5, 0] = 5 - tensor_array[6, 0] = 6 - tensor_array[7, 0] = 7 - tensor_array[8, 0] = 8 - tensor_array[9, 0] = 9 - - lod_py = [[0, 2, 5, 10]] - lod_tensor = core.LoDTensor(lod_py) - lod_tensor.set(tensor_array, self.place) - - self.py_seq_meta = [[5, 10, 2], [2, 5, 1], [0, 2, 0]] - - self.tensor = lod_tensor - - def test_unstack(self): - self.ta.unstack(self.tensor) - self.assertEqual(self.tensor.get_dims()[0], self.ta.size()) - - def test_read(self): - self.ta.unstack(self.tensor) - for i in range(self.batch_size): - tensor = self.ta.read(i) - - def test_write(self): - self.ta.unstack(self.tensor) - - # create a tensor with shape of [1, self.dim] - var = self.scope.var("hell") - tensor = var.get_tensor() - tensor.set_dims([1, self.dim]) - tensor.alloc_float(self.place) - tensor_array = np.array(tensor) - for i in range(self.dim): - tensor_array[0, i] = i - tensor.set(tensor_array, self.place) - - self.ta.write(2, tensor) - - ta_tensor = self.ta.read(2) - ta_tensor_array = np.array(ta_tensor) - self.assertEqual(ta_tensor.get_dims(), [1, self.dim]) - self.assertTrue((tensor_array == ta_tensor_array).all()) - - def test_write_shared(self): - self.ta.unstack(self.tensor) - - # create a tensor with shape of [1, self.dim] - var = self.scope.var("hell") - tensor = var.get_tensor() - tensor.set_dims([1, self.dim]) - tensor.alloc_float(self.place) - tensor_array = np.array(tensor) - for i in range(self.dim): - tensor_array[0, i] = i - tensor.set(tensor_array, self.place) - - self.ta.write_shared(2, tensor) - - ta_tensor = self.ta.read(2) - ta_tensor_array = np.array(ta_tensor) - self.assertEqual(ta_tensor.get_dims(), [1, self.dim]) - self.assertTrue((tensor_array == ta_tensor_array).all()) - - def test_unpack(self): - meta = self.ta.unpack(self.tensor, 0, True) - self.assertEqual(self.ta.size(), 5) - self.assertEqual(meta, self.py_seq_meta) - - def test_pack(self): - meta = self.ta.unpack(self.tensor, 0, True) - print "meta", meta - tensor = self.ta.pack(0, meta, self.tensor.lod()) - print np.array(self.tensor) - print np.array(tensor) - self.assertTrue((np.array(self.tensor) == np.array(tensor)).all()) - self.assertTrue(tensor.lod(), self.tensor.lod()) - - -if __name__ == '__main__': - unittest.main() diff --git a/python/paddle/v2/fluid/tests/test_variable.py b/python/paddle/v2/fluid/tests/test_variable.py index c3e1f9ac0a70e7448fd8d1983b1c04d27af9771c..92ffdceb6c84fb2669f8c1bb556c46fb1c03c411 100644 --- a/python/paddle/v2/fluid/tests/test_variable.py +++ b/python/paddle/v2/fluid/tests/test_variable.py @@ -22,13 +22,13 @@ class TestVariable(unittest.TestCase): w = b.create_var( dtype="float64", shape=[784, 100], lod_level=0, name="fc.w") self.assertNotEqual(str(w), "") - self.assertEqual(core.DataType.FP64, w.data_type) + self.assertEqual(core.DataType.FP64, w.dtype) self.assertEqual((784, 100), w.shape) self.assertEqual("fc.w", w.name) self.assertEqual(0, w.lod_level) w = b.create_var(name='fc.w') - self.assertEqual(core.DataType.FP64, w.data_type) + self.assertEqual(core.DataType.FP64, w.dtype) self.assertEqual((784, 100), w.shape) self.assertEqual("fc.w", w.name) self.assertEqual(0, w.lod_level) diff --git a/python/paddle/v2/fluid/tests/test_while_op.py b/python/paddle/v2/fluid/tests/test_while_op.py index 84b432333f950f754a97bc1a051b59c16fb22aed..fca0cdcc319ff661ced33b6bcd242c894941576c 100644 --- a/python/paddle/v2/fluid/tests/test_while_op.py +++ b/python/paddle/v2/fluid/tests/test_while_op.py @@ -9,11 +9,11 @@ import numpy class TestWhileOp(unittest.TestCase): def test_simple_forward(self): d0 = layers.data( - "d0", shape=[10], append_batch_size=False, data_type='float32') + "d0", shape=[10], append_batch_size=False, dtype='float32') d1 = layers.data( - "d1", shape=[10], append_batch_size=False, data_type='float32') + "d1", shape=[10], append_batch_size=False, dtype='float32') d2 = layers.data( - "d2", shape=[10], append_batch_size=False, data_type='float32') + "d2", shape=[10], append_batch_size=False, dtype='float32') i = layers.zeros(shape=[1], dtype='int64') i.stop_gradient = True init = layers.zeros(shape=[10], dtype='float32') diff --git a/python/paddle/v2/fluid/tests/tmp/inference_model/__model__ b/python/paddle/v2/fluid/tests/tmp/inference_model/__model__ new file mode 100644 index 0000000000000000000000000000000000000000..e333d10da94943372b0fe4dedd9d857817ec9ca6 Binary files /dev/null and b/python/paddle/v2/fluid/tests/tmp/inference_model/__model__ differ diff --git a/python/paddle/v2/fluid/tests/tmp/inference_model/fc_0.b_0 b/python/paddle/v2/fluid/tests/tmp/inference_model/fc_0.b_0 new file mode 100644 index 0000000000000000000000000000000000000000..b1e5fad056e58f23c2cf917a3f4c4d4632ae7d58 Binary files /dev/null and b/python/paddle/v2/fluid/tests/tmp/inference_model/fc_0.b_0 differ diff --git a/python/paddle/v2/fluid/tests/tmp/inference_model/fc_0.w_0 b/python/paddle/v2/fluid/tests/tmp/inference_model/fc_0.w_0 new file mode 100644 index 0000000000000000000000000000000000000000..2f41796c0495570941c236c8c3f422b3cbd5edd2 Binary files /dev/null and b/python/paddle/v2/fluid/tests/tmp/inference_model/fc_0.w_0 differ diff --git a/python/paddle/v2/framework/tests/test_elementwise_mod_op.py b/python/paddle/v2/framework/tests/test_elementwise_mod_op.py new file mode 100644 index 0000000000000000000000000000000000000000..35c38147a24fb237b4607836a86cffa81b2d8904 --- /dev/null +++ b/python/paddle/v2/framework/tests/test_elementwise_mod_op.py @@ -0,0 +1,36 @@ +import unittest +import numpy as np +from op_test import OpTest + + +class ElementwiseModOp(OpTest): + def setUp(self): + self.op_type = "elementwise_mod" + """ Warning + CPU gradient check error! + 'X': np.random.randint((32,84)).astype("int32"), + 'Y': np.random.randint((32,84)).astype("int32") + """ + self.inputs = { + 'X': np.random.randint(1, 10, [13, 17]).astype("int32"), + 'Y': np.random.randint(1, 10, [13, 17]).astype("int32") + } + self.outputs = {'Out': np.mod(self.inputs['X'], self.inputs['Y'])} + + def test_check_output(self): + self.check_output() + + def test_check_grad_normal(self): + self.check_grad(['X', 'Y'], 'Out', max_relative_error=0.05) + + def test_check_grad_ingore_x(self): + self.check_grad( + ['Y'], 'Out', max_relative_error=0.05, no_grad_set=set("X")) + + def test_check_grad_ingore_y(self): + self.check_grad( + ['X'], 'Out', max_relative_error=0.05, no_grad_set=set('Y')) + + +if __name__ == '__main__': + unittest.main()