From b3f44ad761ae5e3d9afa5bbedecdb10d3926c8cd Mon Sep 17 00:00:00 2001 From: Yibing Liu Date: Wed, 13 Sep 2017 17:16:33 +0800 Subject: [PATCH] add multiplex operator --- paddle/operators/multiplex_op.cc | 107 ++++++++++++++++++ paddle/operators/multiplex_op.cu | 76 +++++++++++++ paddle/operators/multiplex_op.h | 68 +++++++++++ paddle/pybind/pybind.cc | 1 + .../paddle/v2/framework/tests/CMakeLists.txt | 1 + .../v2/framework/tests/test_multiplex_op.py | 34 ++++++ 6 files changed, 287 insertions(+) create mode 100644 paddle/operators/multiplex_op.cc create mode 100644 paddle/operators/multiplex_op.cu create mode 100644 paddle/operators/multiplex_op.h create mode 100644 python/paddle/v2/framework/tests/test_multiplex_op.py diff --git a/paddle/operators/multiplex_op.cc b/paddle/operators/multiplex_op.cc new file mode 100644 index 00000000000..67e8e5f5d79 --- /dev/null +++ b/paddle/operators/multiplex_op.cc @@ -0,0 +1,107 @@ + +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + +#include "paddle/operators/multiplex_op.h" + +namespace paddle { +namespace operators { + +using Tensor = framework::Tensor; + +class MultiplexOp : public framework::OperatorWithKernel { + public: + MultiplexOp(const std::string &type, const framework::VariableNameMap &inputs, + const framework::VariableNameMap &outputs, + const framework::AttributeMap &attrs) + : OperatorWithKernel(type, inputs, outputs, attrs) {} + + protected: + void InferShape(const framework::InferShapeContext &ctx) const override { + auto ins = ctx.MultiInput("X"); + auto *out = ctx.Output("Out"); + auto num_ins = ins.size(); + PADDLE_ENFORCE(num_ins > 2, + "multiplex operator should have more than 2 inputs."); + PADDLE_ENFORCE_EQ(ins[0]->dims().size(), 1, + "The first input must be a index vector."); + auto in_dim = ins[1]->dims(); + + for (size_t i = 2; i < num_ins; i++) { + auto dim = ins[i]->dims(); + PADDLE_ENFORCE( + in_dim == dim, + "All the input tensors except the first one must have the same size"); + } + out->Resize(in_dim); + } +}; + +class MultiplexOpMaker : public framework::OpProtoAndCheckerMaker { + public: + MultiplexOpMaker(framework::OpProto *proto, + framework::OpAttrChecker *op_checker) + : OpProtoAndCheckerMaker(proto, op_checker) { + AddInput("X", "The input tensor of multiplex operator.").AsDuplicable(); + AddOutput("Out", "The output tensor of multiplex operator."); + AddComment(R"DOC(Multiplex operator + +Multiplex multiple tensors according to the index provided by the first +input tensor. + +ins[0]: the index of the tensor to output of size batchSize. +ins[1:N]: the candidate output tensor. +For each index i from 0 to batchSize - 1, the output is the i-th row of the +the (index[i] + 1)-th tensor. + +For each i-th row of output: + +y[i][j] = x_{k}[i][j], j = 0,1, ... , (x_{1}.width - 1) + +where y is the output tensor. `x_{k}` is the k-th input layer +and `k = x{0}[i] + 1`. + +)DOC"); + } +}; + +class MultiplexGradOp : public framework::OperatorWithKernel { + public: + MultiplexGradOp(const std::string &type, + const framework::VariableNameMap &inputs, + const framework::VariableNameMap &outputs, + const framework::AttributeMap &attrs) + : OperatorWithKernel(type, inputs, outputs, attrs) {} + + protected: + void InferShape(const framework::InferShapeContext &ctx) const override { + PADDLE_ENFORCE_NOT_NULL(ctx.InputVar(framework::GradVarName("Out")), + "Input(Out@GRAD) shouldn't be null."); + auto d_ins = ctx.MultiOutput(framework::GradVarName("X")); + auto ins = ctx.MultiInput("X"); + for (size_t i = 0; i < ins.size(); i++) { + auto dims = ins[i]->dims(); + d_ins[i]->Resize(dims); + } + } +}; + +} // namespace operators +} // namespace paddle +namespace ops = paddle::operators; + +REGISTER_OP(multiplex, ops::MultiplexOp, ops::MultiplexOpMaker, multiplex_grad, + ops::MultiplexGradOp); +REGISTER_OP_CPU_KERNEL(multiplex, ops::MultiplexCPUKernel); +REGISTER_OP_CPU_KERNEL(multiplex_grad, ops::MultiplexGradCPUKernel); diff --git a/paddle/operators/multiplex_op.cu b/paddle/operators/multiplex_op.cu new file mode 100644 index 00000000000..81d637686b2 --- /dev/null +++ b/paddle/operators/multiplex_op.cu @@ -0,0 +1,76 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + +#include "paddle/framework/op_registry.h" + +namespace paddle { +namespace operators { + +using Tensor = framework::Tensor; + +template +class MultiplexGPUKernel : public framework::OpKernel { + public: + void Compute(const framework::ExecutionContext& ctx) const { + auto ins = ctx.MultiInput("X"); + auto* out = ctx.Output("Out"); + out->mutable_data(ctx.GetPlace()); + + auto rows = ins[1]->dims()[0]; + auto cols = ins[1]->dims()[1]; + // copy index to cpu + Tensor index_t_cpu; + index_t_cpu.CopyFrom(*(ins[0]), paddle::platform::CPUPlace()); + auto index = index_t_cpu.data(); + for (auto i = 0; i < rows; i++) { + int k = (int)index[i] + 1; + cudaMemcpy(out->data() + i * cols, ins[k]->data() + i * cols, + cols * sizeof(T), cudaMemcpyDeviceToDevice); + } + } +}; + +template +class MultiplexGradGPUKernel : public framework::OpKernel { + public: + void Compute(const framework::ExecutionContext& ctx) const { + auto* d_out = ctx.Input(framework::GradVarName("Out")); + auto ins = ctx.MultiInput("X"); + auto d_ins = ctx.MultiOutput(framework::GradVarName("X")); + for (auto d_in : d_ins) { + d_in->mutable_data(ctx.GetPlace()); + auto dims = d_in->dims(); + cudaMemset(d_in->data(), 0, framework::product(dims) * sizeof(T)); + } + + auto rows = ins[1]->dims()[0]; + auto cols = ins[1]->dims()[1]; + // copy index to cpu + Tensor index_t_cpu; + index_t_cpu.CopyFrom(*(ins[0]), paddle::platform::CPUPlace()); + auto index = index_t_cpu.data(); + for (auto i = 0; i < rows; i++) { + int k = (int)index[i] + 1; + cudaMemcpy(d_ins[k]->data() + i * cols, d_out->data() + i * cols, + cols * sizeof(T), cudaMemcpyDeviceToDevice); + } + } +}; +} // namespace operators +} // namespace paddle + +namespace ops = paddle::operators; + +REGISTER_OP_GPU_KERNEL(multiplex, ops::MultiplexGPUKernel); +REGISTER_OP_GPU_KERNEL(multiplex_grad, ops::MultiplexGradGPUKernel); diff --git a/paddle/operators/multiplex_op.h b/paddle/operators/multiplex_op.h new file mode 100644 index 00000000000..7b627a83b3a --- /dev/null +++ b/paddle/operators/multiplex_op.h @@ -0,0 +1,68 @@ + +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + +#pragma once + +#include "paddle/framework/eigen.h" +#include "paddle/framework/op_registry.h" + +namespace paddle { +namespace operators { + +template +class MultiplexCPUKernel : public framework::OpKernel { + public: + void Compute(const framework::ExecutionContext& ctx) const { + auto ins = ctx.MultiInput("X"); + auto* out = ctx.Output("Out"); + out->mutable_data(ctx.GetPlace()); + + auto index = ins[0]->data(); + auto rows = ins[1]->dims()[0]; + auto cols = ins[1]->dims()[1]; + for (auto i = 0; i < rows; i++) { + int k = (int)index[i] + 1; + memcpy(out->data() + i * cols, ins[k]->data() + i * cols, + cols * sizeof(T)); + } + } +}; + +template +class MultiplexGradCPUKernel : public framework::OpKernel { + public: + void Compute(const framework::ExecutionContext& ctx) const { + auto* d_out = ctx.Input(framework::GradVarName("Out")); + auto ins = ctx.MultiInput("X"); + auto d_ins = + ctx.MultiOutput(framework::GradVarName("X")); + for (auto d_in : d_ins) { + d_in->mutable_data(ctx.GetPlace()); + auto dims = d_in->dims(); + memset(d_in->data(), 0, framework::product(dims) * sizeof(T)); + } + + auto index = ins[0]->data(); + auto rows = ins[1]->dims()[0]; + auto cols = ins[1]->dims()[1]; + for (auto i = 0; i < rows; i++) { + int k = (int)index[i] + 1; + memcpy(d_ins[k]->data() + i * cols, d_out->data() + i * cols, + cols * sizeof(T)); + } + } +}; +} +} diff --git a/paddle/pybind/pybind.cc b/paddle/pybind/pybind.cc index 16a2368aae5..f0ac1f7f389 100644 --- a/paddle/pybind/pybind.cc +++ b/paddle/pybind/pybind.cc @@ -55,6 +55,7 @@ USE_OP(top_k); USE_OP(squared_l2_distance); USE_OP(sum); USE_OP(reshape); +USE_OP(multiplex); namespace paddle { namespace framework { diff --git a/python/paddle/v2/framework/tests/CMakeLists.txt b/python/paddle/v2/framework/tests/CMakeLists.txt index 6b22c000821..752c5a5265e 100644 --- a/python/paddle/v2/framework/tests/CMakeLists.txt +++ b/python/paddle/v2/framework/tests/CMakeLists.txt @@ -36,3 +36,4 @@ py_test(mnist SRCS mnist.py) py_test(test_concat_op SRCS test_concat_op.py) py_test(test_squared_l2_distance_op SRCS test_squared_l2_distance_op.py) py_test(test_reshape_op SRCS test_reshape_op.py) +py_test(test_multiplex_op SRCS test_multiplex_op.py) diff --git a/python/paddle/v2/framework/tests/test_multiplex_op.py b/python/paddle/v2/framework/tests/test_multiplex_op.py new file mode 100644 index 00000000000..c42cb6f0fe2 --- /dev/null +++ b/python/paddle/v2/framework/tests/test_multiplex_op.py @@ -0,0 +1,34 @@ +import unittest +import numpy as np +from op_test import OpTest + + +class TestMultiplexOp(OpTest): + def setUp(self): + self.op_type = "multiplex" + rows = 3 + index = np.array([3, 1, 0]) + ins1 = np.random.random((rows, 10)).astype("float32") + ins2 = np.random.random((rows, 10)).astype("float32") + ins3 = np.random.random((rows, 10)).astype("float32") + ins4 = np.random.random((rows, 10)).astype("float32") + self.inputs = { + 'X': [('index', index), ('x1', ins1), ('x2', ins2), ('x3', ins3), + ('x4', ins4)] + } + # multiplex output + output = np.zeros_like(ins1) + for i in range(0, rows): + k = index[i] + 1 + output[i] = self.inputs['X'][k][1][i] + self.outputs = {'Out': output} + + def test_check_output(self): + self.check_output() + + def test_check_grad(self): + self.check_grad(["x1"], "Out") + + +if __name__ == '__main__': + unittest.main() -- GitLab