diff --git a/paddle/fluid/operators/detail/sendrecvop_utils.cc b/paddle/fluid/operators/detail/sendrecvop_utils.cc index 16c612c45a37dd2ffd17f8d5f5946df30e9b3fe6..69fcffe9bc34006aef2e5a39227cf6d947e4615f 100644 --- a/paddle/fluid/operators/detail/sendrecvop_utils.cc +++ b/paddle/fluid/operators/detail/sendrecvop_utils.cc @@ -82,7 +82,7 @@ void SerializeToByteBuffer(const std::string& name, framework::Variable* var, platform::CPUPlace cpu; auto& gpu_dev_ctx = static_cast(ctx); - auto copy_size = tensor.memory_size(); + auto copy_size = tensor.numel() * framework::SizeOfType(tensor.type()); payload = memory::Alloc(cpu, copy_size); memory::Copy(cpu, payload, @@ -99,7 +99,7 @@ void SerializeToByteBuffer(const std::string& name, framework::Variable* var, } else { payload = tensor.data(); } - payload_size = tensor.memory_size(); + payload_size = tensor.numel() * framework::SizeOfType(tensor.type()); e.WriteVarlengthBeginning(VarMsg::kSerializedFieldNumber, payload_size); } break; case framework::proto::VarType_Type_SELECTED_ROWS: { @@ -118,7 +118,8 @@ void SerializeToByteBuffer(const std::string& name, framework::Variable* var, platform::CPUPlace cpu; auto& gpu_dev_ctx = static_cast(ctx); - auto copy_size = tensor->memory_size(); + auto copy_size = + tensor->numel() * framework::SizeOfType(tensor->type()); payload = memory::Alloc(cpu, copy_size); memory::Copy(cpu, payload, boost::get(tensor->place()), @@ -133,7 +134,7 @@ void SerializeToByteBuffer(const std::string& name, framework::Variable* var, } else { payload = slr->mutable_value()->data(); } - payload_size = tensor->memory_size(); + payload_size = tensor->numel() * framework::SizeOfType(tensor->type()); e.WriteVarlengthBeginning(VarMsg::kSerializedFieldNumber, payload_size); } break; default: diff --git a/paddle/fluid/operators/split_byref_op.cc b/paddle/fluid/operators/split_byref_op.cc new file mode 100644 index 0000000000000000000000000000000000000000..7413ce3e9ce60ed733bb4d27e9ec205e5f0a7e1b --- /dev/null +++ b/paddle/fluid/operators/split_byref_op.cc @@ -0,0 +1,101 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + +http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "paddle/fluid/operators/split_byref_op.h" +#include "paddle/fluid/operators/split_op.h" + +namespace paddle { +namespace operators { +using framework::Tensor; + +class SplitByrefOp : public framework::OperatorWithKernel { + public: + using framework::OperatorWithKernel::OperatorWithKernel; + + void InferShape(framework::InferShapeContext *ctx) const override { + PADDLE_ENFORCE(ctx->HasInput("X"), + "Input(X) of SplitOp should not be null."); + PADDLE_ENFORCE_GE(ctx->Outputs("Out").size(), 1UL, + "Outputs(Out) of SplitOp should not be empty."); + auto in_dims = ctx->GetInputDim("X"); + auto outs_names = ctx->Outputs("Out"); + size_t num = static_cast(ctx->Attrs().Get("num")); + std::vector sections = static_cast>( + ctx->Attrs().Get>("sections")); + const size_t outs_number = outs_names.size(); + std::vector outs_dims; + outs_dims.reserve(outs_number); + + if (num > 0) { + int64_t in_axis_dim = in_dims[0]; + PADDLE_ENFORCE_EQ(in_axis_dim % num, 0, + "tensor split does not result" + " in an equal division"); + size_t out_axis_dim = in_axis_dim / num; + for (size_t i = 0; i < outs_number; ++i) { + auto dim = in_dims; + dim[0] = out_axis_dim; + outs_dims.push_back(dim); + } + } else if (sections.size() > 0) { + PADDLE_ENFORCE_EQ(sections.size(), outs_number, + "tensor split sections size" + "should be equal to output size."); + for (size_t i = 0; i < outs_number; ++i) { + auto dim = in_dims; + dim[0] = sections[i]; + outs_dims.push_back(dim); + } + } + ctx->SetOutputsDim("Out", outs_dims); + } +}; + +class SplitByrefOpMaker : public framework::OpProtoAndCheckerMaker { + public: + SplitByrefOpMaker(OpProto *proto, OpAttrChecker *op_checker) + : OpProtoAndCheckerMaker(proto, op_checker) { + AddInput("X", "(Tensor) Input tensor of the split operator."); + AddOutput("Out", "(Tensor) Output tensors of the split operator.") + .AsDuplicable(); + AddComment(R"DOC( +SplitByref operator + +Split source tensor to sevaral tensors by axis 0. No copy in this operator +is performed, output tensor shares the same blocks of memory. +)DOC"); + AddAttr>("sections", + "(vector) " + "the length of each output along the " + "specified axis.") + .SetDefault(std::vector{}); + AddAttr("num", + "(int, default 0)" + "Number of sub-tensors. This must evenly divide " + "Input.dims()[axis]") + .SetDefault(0); + } +}; + +} // namespace operators +} // namespace paddle + +namespace ops = paddle::operators; +// NOTE: concat op default axis must be 0! +USE_CPU_ONLY_OP(concat); + +REGISTER_OPERATOR(split_byref, ops::SplitByrefOp, ops::SplitByrefOpMaker, + ops::SplitGradMaker); +REGISTER_OP_CPU_KERNEL( + split_byref, ops::SplitByrefOpKernel); diff --git a/paddle/fluid/operators/split_byref_op.cu.cc b/paddle/fluid/operators/split_byref_op.cu.cc new file mode 100644 index 0000000000000000000000000000000000000000..5ee6186f3541b7dcb845ce0c6d28081685925da0 --- /dev/null +++ b/paddle/fluid/operators/split_byref_op.cu.cc @@ -0,0 +1,19 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + +http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "paddle/fluid/operators/split_byref_op.h" +namespace ops = paddle::operators; +REGISTER_OP_CUDA_KERNEL( + split_byref, + ops::SplitByrefOpKernel); diff --git a/paddle/fluid/operators/split_byref_op.h b/paddle/fluid/operators/split_byref_op.h new file mode 100644 index 0000000000000000000000000000000000000000..a3aad68ea736e223d3917607cca17f5cccfef630 --- /dev/null +++ b/paddle/fluid/operators/split_byref_op.h @@ -0,0 +1,43 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + +http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#pragma once + +#include +#include "paddle/fluid/framework/op_registry.h" + +namespace paddle { +namespace operators { + +template +class SplitByrefOpKernel : public framework::OpKernel { + public: + void Compute(const framework::ExecutionContext& ctx) const override { + auto* in = ctx.Input("X"); + auto outs = ctx.MultiOutput("Out"); + auto place = ctx.GetPlace(); + + size_t row_offset = 0; + for (size_t i = 0; i < outs.size(); ++i) { + // NOTE: no need to call mutable_data here to allocate memory. + auto* out = outs[i]; + VLOG(3) << "spliting by ref: " << row_offset << " " << out->dims()[0]; + *out = std::move(in->Slice(row_offset, row_offset + out->dims()[0])); + row_offset += out->dims()[0]; + } + } +}; + +} // namespace operators +} // namespace paddle diff --git a/paddle/fluid/operators/split_op.cc b/paddle/fluid/operators/split_op.cc index e745509ec8c1f2ec305d7d4aabfdd43d847124b5..a4398df36bcc2d3b8bbe8949f27f5d6508861d95 100644 --- a/paddle/fluid/operators/split_op.cc +++ b/paddle/fluid/operators/split_op.cc @@ -108,21 +108,6 @@ Example: } }; -class SplitGradMaker : public framework::SingleGradOpDescMaker { - public: - using framework::SingleGradOpDescMaker::SingleGradOpDescMaker; - - protected: - std::unique_ptr Apply() const override { - auto op = new framework::OpDesc(); - op->SetType("concat"); - op->SetInput("X", OutputGrad("Out")); - op->SetOutput("Out", InputGrad("X")); - op->SetAttrMap(Attrs()); - return std::unique_ptr(op); - } -}; - } // namespace operators } // namespace paddle diff --git a/paddle/fluid/operators/split_op.h b/paddle/fluid/operators/split_op.h index e2c41f44ab3ea3c42837974dae749278c9356ba5..f0c417c70521b1bb3816f884d6ab7393473999e4 100644 --- a/paddle/fluid/operators/split_op.h +++ b/paddle/fluid/operators/split_op.h @@ -44,5 +44,20 @@ class SplitOpKernel : public framework::OpKernel { } }; +class SplitGradMaker : public framework::SingleGradOpDescMaker { + public: + using framework::SingleGradOpDescMaker::SingleGradOpDescMaker; + + protected: + std::unique_ptr Apply() const override { + auto op = new framework::OpDesc(); + op->SetType("concat"); + op->SetInput("X", OutputGrad("Out")); + op->SetOutput("Out", InputGrad("X")); + op->SetAttrMap(Attrs()); + return std::unique_ptr(op); + } +}; + } // namespace operators } // namespace paddle diff --git a/python/paddle/fluid/distribute_transpiler.py b/python/paddle/fluid/distribute_transpiler.py index 591c22d9bd8e5846a99a971256a64e2750541383..50be386c51f89cb5fb50f4e5e3cb8bd84ccf267b 100644 --- a/python/paddle/fluid/distribute_transpiler.py +++ b/python/paddle/fluid/distribute_transpiler.py @@ -825,7 +825,7 @@ class DistributeTranspiler: for v in splited_vars: sections.append(v.shape[0]) program.global_block().append_op( - type="split", + type="split_byref", inputs={"X": orig_var}, outputs={"Out": splited_vars}, attrs={"sections": sections} # assume split evenly diff --git a/python/paddle/fluid/tests/unittests/test_split_op.py b/python/paddle/fluid/tests/unittests/test_split_op.py index 887bdfe8b3608878bace5b857a71ada123b74b2f..eb49a53e54f4bdb6bcd6cb1991423970f29997bb 100644 --- a/python/paddle/fluid/tests/unittests/test_split_op.py +++ b/python/paddle/fluid/tests/unittests/test_split_op.py @@ -19,7 +19,7 @@ from op_test import OpTest class TestSplitOp(OpTest): def setUp(self): - self.op_type = "split" + self._set_op_type() axis = 1 x = np.random.random((4, 5, 6)).astype('float32') out = np.split(x, [2, 3], axis) @@ -28,6 +28,9 @@ class TestSplitOp(OpTest): self.outputs = {'Out': [('out%d' % i, out[i]) \ for i in xrange(len(out))]} + def _set_op_type(self): + self.op_type = "split" + def test_check_output(self): self.check_output() @@ -35,5 +38,10 @@ class TestSplitOp(OpTest): self.check_grad(['X'], ['out0', 'out1', 'out2']) +class TestSplitByrefOp(OpTest): + def _set_op_type(self): + self.op_type = "split_byref" + + if __name__ == '__main__': unittest.main()