diff --git a/paddle/operators/CMakeLists.txt b/paddle/operators/CMakeLists.txt index f466dbc79a2059faa1e3d4ad6ede3f2394580842..f0fd12f1b5276d033ea086c60c80616fb1be7585 100644 --- a/paddle/operators/CMakeLists.txt +++ b/paddle/operators/CMakeLists.txt @@ -47,6 +47,7 @@ cc_test(gather_test SRCS gather_test.cc DEPS tensor) op_library(gather_op SRCS gather_op.cc gather_op.cu) cc_test(scatter_test SRCS scatter_test.cc DEPS tensor) +op_library(scatter_op SRCS scatter_op.cc scatter_op.cu) cc_library(net_op SRCS net_op.cc DEPS op_registry) cc_test(net_op_test SRCS net_op_test.cc DEPS net_op) diff --git a/paddle/operators/scatter_op.cc b/paddle/operators/scatter_op.cc new file mode 100644 index 0000000000000000000000000000000000000000..f901edefa22dc9a252e87116df756d04767a7162 --- /dev/null +++ b/paddle/operators/scatter_op.cc @@ -0,0 +1,86 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "paddle/operators/scatter_op.h" +#include "paddle/framework/ddim.h" + +namespace paddle { +namespace operators { + +class ScatterOp : public framework::OperatorWithKernel { + public: + using framework::OperatorWithKernel::OperatorWithKernel; + + protected: + void InferShape(const framework::InferShapeContext &ctx) const override { + PADDLE_ENFORCE_EQ(ctx.Input("Index")->dims().size(), 1, + "Update Index should be 1-D."); + PADDLE_ENFORCE_EQ(ctx.Input("Ref")->dims().size(), + ctx.Input("Updates")->dims().size(), + "Reference and Updates should have the same shape size"); + PADDLE_ENFORCE_EQ(ctx.Input("Updates")->dims()[0], + ctx.Input("Index")->dims()[0], + "Updates and Index should have same batch-size."); + framework::DDim data_dim(ctx.Input("Updates")->dims()); + for (int i = 1; i < data_dim.size(); ++i) + PADDLE_ENFORCE_EQ(data_dim[i], ctx.Input("Updates")->dims()[i]); + ctx.Output("Out")->Resize(ctx.Input("Ref")->dims()); + } +}; + +class ScatterGradOp : public framework::OperatorWithKernel { + public: + using framework::OperatorWithKernel::OperatorWithKernel; + + protected: + void InferShape(const framework::InferShapeContext &ctx) const override { + auto *dUpdates = ctx.Output(framework::GradVarName("Updates")); + auto *Updates = ctx.Input("Updates"); + auto *dRef = ctx.Output(framework::GradVarName("Ref")); + auto *Ref = ctx.Input("Ref"); + + dRef->Resize(Ref->dims()); + dUpdates->Resize(Updates->dims()); + } +}; + +class ScatterOpMaker : public framework::OpProtoAndCheckerMaker { + public: + ScatterOpMaker(framework::OpProto *proto, + framework::OpAttrChecker *op_checker) + : OpProtoAndCheckerMaker(proto, op_checker) { + AddInput("Ref", "The source input of scatter op"); + AddInput("Index", + "The index input of scatter op where Ref will be updated"); + AddInput("Updates", "The updated value of updates op"); + AddOutput("Out", "The output of add op"); + AddComment(R"DOC( +Scatter Operator by selecting from the first axis, + +Out = Ref +Out[Index] = Ref[Index] + Updates +)DOC"); + } +}; +} // namespace operators +} // namespace paddle + +namespace ops = paddle::operators; +REGISTER_OP(scatter, ops::ScatterOp, ops::ScatterOpMaker, scatter_grad, + ops::ScatterGradOp); +REGISTER_OP_CPU_KERNEL(scatter, + ops::ScatterOpKernel); +REGISTER_OP_CPU_KERNEL( + scatter_grad, + ops::ScatterGradientOpKernel); diff --git a/paddle/operators/scatter_op.cu b/paddle/operators/scatter_op.cu new file mode 100644 index 0000000000000000000000000000000000000000..6716b478833ff3adb6112cdb1ee25b7f1744ea1f --- /dev/null +++ b/paddle/operators/scatter_op.cu @@ -0,0 +1,20 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#define EIGEN_USE_GPU +#include "paddle/operators/scatter_op.h" + +namespace ops = paddle::operators; +REGISTER_OP_GPU_KERNEL(scatter, + ops::ScatterOpKernel); diff --git a/paddle/operators/scatter_op.h b/paddle/operators/scatter_op.h new file mode 100644 index 0000000000000000000000000000000000000000..e9595638a86a4a4536ddad4e6f20fd80a54b1608 --- /dev/null +++ b/paddle/operators/scatter_op.h @@ -0,0 +1,60 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#pragma once +#include "gather.h" +#include "paddle/framework/eigen.h" +#include "paddle/framework/op_registry.h" +#include "scatter.h" + +namespace paddle { +namespace operators { + +using Tensor = framework::Tensor; + +template +class ScatterOpKernel : public framework::OpKernel { + public: + void Compute(const framework::ExecutionContext &ctx) const override { + auto *Ref = ctx.Input("Ref"); + auto *Index = ctx.Input("Index"); + auto *Updates = ctx.Input("Updates"); + auto *Out = ctx.Output("Out"); + + // In place output: Out = Ref, Out[Index] += Updates + Out->ShareDataWith(*Ref); + // Apply ScatterUpdate: Out[index] += Updates[:] + ScatterUpdate(ctx.GetPlace(), Updates, Index, Out); + } +}; + +template +class ScatterGradientOpKernel : public framework::OpKernel { + public: + void Compute(const framework::ExecutionContext &ctx) const override { + auto *dRef = ctx.Output(framework::GradVarName("Ref")); + auto *dUpdates = ctx.Output(framework::GradVarName("Updates")); + auto *Index = ctx.Input("Index"); + auto *dOut = ctx.Input(framework::GradVarName("Out")); + + // In place gradient: dRef = dO + dRef->ShareDataWith(*dOut); + dUpdates->mutable_data(ctx.GetPlace()); + // Gradient by Gather: dUpdates += dO[Index] + Gather(ctx.GetPlace(), dOut, Index, dUpdates); + } +}; + +} // namespace operators +} // namespace paddle diff --git a/paddle/pybind/CMakeLists.txt b/paddle/pybind/CMakeLists.txt index abb9c248eee9c59e8e6b9fa9d1878fec5dd67569..37e186a408ff5f560b5878e3e51ea81ca5810bc7 100644 --- a/paddle/pybind/CMakeLists.txt +++ b/paddle/pybind/CMakeLists.txt @@ -4,6 +4,7 @@ cc_library(paddle_pybind SHARED DEPS pybind python backward sgd_op gather_op + scatter_op add_op mul_op rowwise_add_op diff --git a/paddle/pybind/pybind.cc b/paddle/pybind/pybind.cc index 8fa8be2cef5fff04ed61ac726e5d8111e30c8a09..3bc150ccb7af2885439cc2344aa0db9ba3b1ca03 100644 --- a/paddle/pybind/pybind.cc +++ b/paddle/pybind/pybind.cc @@ -47,6 +47,7 @@ USE_OP(scale); USE_OP_ITSELF(identity); USE_OP(minus); USE_CPU_ONLY_OP(gather); +USE_CPU_ONLY_OP(scatter); namespace paddle { namespace framework { diff --git a/python/paddle/v2/framework/tests/CMakeLists.txt b/python/paddle/v2/framework/tests/CMakeLists.txt index fb4686889a644753afdeb748b444e757ed016eda..661ebd89648feec77367c278e5f045b8238e1dc1 100644 --- a/python/paddle/v2/framework/tests/CMakeLists.txt +++ b/python/paddle/v2/framework/tests/CMakeLists.txt @@ -14,6 +14,7 @@ py_test(test_sigmoid_op SRCS test_sigmoid_op.py) py_test(test_softmax_op SRCS test_softmax_op.py) py_test(test_cross_entropy_op SRCS test_cross_entropy_op.py) py_test(test_gather_op SRCS test_gather_op.py) +py_test(test_scatter_op SRCS test_scatter_op.py) py_test(test_fill_zeros_like_op SRCS test_fill_zeros_like_op.py) py_test(gradient_checker SRCS gradient_checker.py) diff --git a/python/paddle/v2/framework/tests/gradient_checker.py b/python/paddle/v2/framework/tests/gradient_checker.py index d452197ce94eff4dbd773c403007af91ff88c002..9a7a7fbf5e63d4e433576f8e980c41c72fa26cab 100644 --- a/python/paddle/v2/framework/tests/gradient_checker.py +++ b/python/paddle/v2/framework/tests/gradient_checker.py @@ -32,7 +32,8 @@ def get_numeric_gradient(op, output_name, input_to_check, delta=0.005, - local_scope=None): + local_scope=None, + in_place=False): """ Get Numeric Gradient for an operator's input. @@ -81,6 +82,11 @@ def get_numeric_gradient(op, def product(dim): return reduce(lambda a, b: a * b, dim, 1) + def restore_inputs(): + for var_name in input_values: + tensor_ = local_scope.find_var(var_name).get_tensor() + tensor_.set(numpy.copy(input_values[var_name]), core.CPUPlace()) + # get the input tensor that we want to get it's numeric gradient. tensor_to_check = local_scope.find_var(input_to_check).get_tensor() tensor_size = product(tensor_to_check.get_dims()) @@ -90,6 +96,8 @@ def get_numeric_gradient(op, # we only compute gradient of one element each time. # we use a for loop to compute the gradient of every element. for i in xrange(tensor_size): + if in_place: + restore_inputs() # get one input element throw it's index i. origin = tensor_to_check.get_float_element(i) @@ -99,6 +107,8 @@ def get_numeric_gradient(op, y_pos = get_output() # plus delta to this element, run op and get the sum of the result tensor. + if in_place: + restore_inputs() x_neg = origin - delta tensor_to_check.set_float_element(i, x_neg) y_neg = get_output() @@ -251,6 +261,7 @@ class GradientChecker(unittest.TestCase): output_name, no_grad_set=None, only_cpu=False, + in_place=False, max_relative_error=0.005): """ :param forward_op: used to create backward_op @@ -283,7 +294,8 @@ class GradientChecker(unittest.TestCase): # get numerical gradients numeric_grads = [ - get_numeric_gradient(forward_op, input_vars, output_name, name) + get_numeric_gradient( + forward_op, input_vars, output_name, name, in_place=in_place) for name in inputs_to_check ] diff --git a/python/paddle/v2/framework/tests/test_gather_op.py b/python/paddle/v2/framework/tests/test_gather_op.py index e86898304252d08be718e40fed46c5e921596af7..e3de3fd0a1dddb3edb0de5987bd71d8a176d97ef 100644 --- a/python/paddle/v2/framework/tests/test_gather_op.py +++ b/python/paddle/v2/framework/tests/test_gather_op.py @@ -21,12 +21,9 @@ class TestGatherOp(unittest.TestCase): class TestGatherGradOp(GradientChecker): def test_gather_grad(self): - print 'creating op' op = create_op("gather") - print 'creating op done' xnp = numpy.random.random((10, 20)).astype("float32") inputs = {'X': xnp, 'Index': numpy.array([1, 3, 5]).astype("int32")} - print 'correct before check gradient' self.check_grad(op, inputs, set("X"), "Out") diff --git a/python/paddle/v2/framework/tests/test_scatter_op.py b/python/paddle/v2/framework/tests/test_scatter_op.py new file mode 100644 index 0000000000000000000000000000000000000000..c1f9444889372104e39ded78fc7207a59b80a293 --- /dev/null +++ b/python/paddle/v2/framework/tests/test_scatter_op.py @@ -0,0 +1,38 @@ +import unittest +from op_test_util import OpTestMeta +from gradient_checker import GradientChecker, create_op +import numpy +import paddle.v2.framework.core as core +from paddle.v2.framework.op import Operator + + +class TestScatterOp(unittest.TestCase): + __metaclass__ = OpTestMeta + + def setUp(self): + self.type = "scatter" + ref_np = numpy.ones((3, 3)).astype("float32") + index_np = numpy.array([1, 2]).astype("int32") + updates_np = numpy.random.random((2, 3)).astype("float32") + output_np = numpy.copy(ref_np) + output_np[index_np] += updates_np + self.inputs = {'Ref': ref_np, 'Index': index_np, 'Updates': updates_np} + self.outputs = {'Out': output_np} + + +class TestScatterGradOp(GradientChecker): + def test_scatter_grad(self): + op = create_op("scatter") + # test data setup + ref_np = numpy.ones((3, 10)).astype("float32") + index_np = numpy.array([1, 2]).astype("int32") + updates_np = numpy.random.random((2, 10)).astype("float32") + output_np = numpy.copy(ref_np) + output_np[index_np] += updates_np + inputs = {'Ref': ref_np, 'Index': index_np, 'Updates': updates_np} + self.check_grad( + op, inputs, set(["Updates", "Ref"]), "Out", in_place=True) + + +if __name__ == "__main__": + unittest.main()