From ed72af48cecb1462b14c2105e9451052fa583cde Mon Sep 17 00:00:00 2001 From: Xinghai Sun Date: Fri, 1 Sep 2017 19:01:12 +0800 Subject: [PATCH] Add cos_sim op. --- paddle/operators/CMakeLists.txt | 2 +- paddle/operators/cos_sim_op.cc | 91 ++++++++++++++++++ paddle/operators/cos_sim_op.cu | 22 +++++ paddle/operators/cos_sim_op.h | 93 +++++++++++++++++++ paddle/pybind/pybind.cc | 1 + .../paddle/v2/framework/tests/CMakeLists.txt | 1 + .../v2/framework/tests/gradient_checker.py | 21 +++-- .../paddle/v2/framework/tests/op_test_util.py | 8 +- .../v2/framework/tests/test_cos_sim_op.py | 40 ++++++++ 9 files changed, 267 insertions(+), 12 deletions(-) create mode 100644 paddle/operators/cos_sim_op.cc create mode 100644 paddle/operators/cos_sim_op.cu create mode 100644 paddle/operators/cos_sim_op.h create mode 100644 python/paddle/v2/framework/tests/test_cos_sim_op.py diff --git a/paddle/operators/CMakeLists.txt b/paddle/operators/CMakeLists.txt index e5efcccb0..25dbd236e 100644 --- a/paddle/operators/CMakeLists.txt +++ b/paddle/operators/CMakeLists.txt @@ -56,7 +56,7 @@ list(REMOVE_ITEM GENERAL_OPS op_library(net_op SRCS net_op.cc) op_library(minus_op SRCS minus_op.cc minus_op.cu DEPS scale_op) op_library(mul_op SRCS mul_op.cc mul_op.cu DEPS math_function) -op_library(recurrent_op SRCS recurrent_op.cc rnn/recurrent_op_utils.cc +op_library(recurrent_op SRCS recurrent_op.cc rnn/recurrent_op_utils.cc DEPS framework_proto tensor operator net_op) op_library(scale_op SRCS scale_op.cc scale_op.cu DEPS net_op) diff --git a/paddle/operators/cos_sim_op.cc b/paddle/operators/cos_sim_op.cc new file mode 100644 index 000000000..21a616522 --- /dev/null +++ b/paddle/operators/cos_sim_op.cc @@ -0,0 +1,91 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + +#include "paddle/operators/cos_sim_op.h" + +namespace paddle { +namespace operators { + +using framework::Tensor; + +class CosSimOp : public framework::OperatorWithKernel { + public: + using framework::OperatorWithKernel::OperatorWithKernel; + + protected: + void InferShape(const framework::InferShapeContext &ctx) const override { + PADDLE_ENFORCE_NOT_NULL(ctx.InputVar("X"), "Input(X) should not be null."); + PADDLE_ENFORCE_NOT_NULL(ctx.InputVar("Y"), "Input(Y) should not be null."); + PADDLE_ENFORCE_EQ(ctx.Input("X")->dims(), + ctx.Input("Y")->dims(), + "Dimensions of Input(X) and Input(Y) must be the same."); + + auto dims = ctx.Input("X")->dims(); + ctx.Output("Out")->Resize({dims[0], 1}); + } +}; + +class CosSimOpMaker : public framework::OpProtoAndCheckerMaker { + public: + CosSimOpMaker(framework::OpProto *proto, framework::OpAttrChecker *op_checker) + : OpProtoAndCheckerMaker(proto, op_checker) { + AddInput("X", "The first input of cos_sim op."); + AddInput("Y", "The second input of cos_sim op."); + AddOutput("Out", "The output of cos_sim op."); + AddComment(R"DOC( +Cosine Similarity Operator. + +The equation is: Out = X^T * Y / (sqrt(X^T * X) * sqrt(Y^T * Y)) +)DOC"); + } +}; + +class CosSimOpGrad : public framework::OperatorWithKernel { + public: + using framework::OperatorWithKernel::OperatorWithKernel; + + protected: + void InferShape(const framework::InferShapeContext &ctx) const override { + PADDLE_ENFORCE_NOT_NULL(ctx.InputVar("X"), "Input(X) should not be null."); + PADDLE_ENFORCE_NOT_NULL(ctx.InputVar("Y"), "Input(Y) should not be null."); + PADDLE_ENFORCE_NOT_NULL(ctx.InputVar(framework::GradVarName("Out")), + "Input(Out@GRAD) should not be null."); + + auto x_dims = ctx.Input("X")->dims(); + auto y_dims = ctx.Input("Y")->dims(); + auto out_dims = ctx.Input(framework::GradVarName("Out"))->dims(); + PADDLE_ENFORCE_EQ(x_dims, y_dims, + "Dimensions of Input(X) and Input(Y) must be the same."); + PADDLE_ENFORCE_EQ(out_dims[0], x_dims[0], + "1st dimension of Out@GRAD must equal to Input(X)"); + PADDLE_ENFORCE_EQ(out_dims[1], 1, + "1st dimension of Out@GRAD must equal to Input(X)"); + + auto *x_grad = ctx.Output(framework::GradVarName("X")); + auto *y_grad = ctx.Output(framework::GradVarName("Y")); + x_grad->Resize(x_dims); + y_grad->Resize(y_dims); + } +}; + +} // namespace operators +} // namespace paddle + +namespace ops = paddle::operators; +REGISTER_OP(cos_sim, ops::CosSimOp, ops::CosSimOpMaker, cos_sim_grad, + ops::CosSimOpGrad); +REGISTER_OP_CPU_KERNEL(cos_sim, + ops::CosSimKernel); +REGISTER_OP_CPU_KERNEL( + cos_sim_grad, ops::CosSimGradKernel); diff --git a/paddle/operators/cos_sim_op.cu b/paddle/operators/cos_sim_op.cu new file mode 100644 index 000000000..0cb8fd26d --- /dev/null +++ b/paddle/operators/cos_sim_op.cu @@ -0,0 +1,22 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + +#define EIGEN_USE_GPU +#include "paddle/operators/cos_sim_op.h" + +namespace ops = paddle::operators; +REGISTER_OP_GPU_KERNEL(cos_sim, + ops::CosSimKernel); +REGISTER_OP_GPU_KERNEL( + cos_sim_grad, ops::CosSimGradKernel); diff --git a/paddle/operators/cos_sim_op.h b/paddle/operators/cos_sim_op.h new file mode 100644 index 000000000..5247087cc --- /dev/null +++ b/paddle/operators/cos_sim_op.h @@ -0,0 +1,93 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + +#pragma once +#include "paddle/framework/eigen.h" +#include "paddle/framework/op_registry.h" + +namespace paddle { +namespace operators { + +using Tensor = framework::Tensor; +template +using EigenMatrix = framework::EigenMatrix; + +template +class CosSimKernel : public framework::OpKernel { + public: + void Compute(const framework::ExecutionContext& context) const override { + auto* x = context.Input("X"); + auto* y = context.Input("Y"); + auto* z = context.Output("Out"); + + z->mutable_data(context.GetPlace()); + + auto dims = x->dims(); + int size = static_cast(framework::product(dims)); + auto new_dims = framework::make_ddim({dims[0], size / dims[0]}); + auto X = EigenMatrix::From(*x, new_dims); + auto Y = EigenMatrix::From(*y, new_dims); + auto Z = EigenMatrix::From(*z, new_dims); + + auto XY = (X * Y).sum(Eigen::array({1})); + auto XX = (X * X).sum(Eigen::array({1})); + auto YY = (Y * Y).sum(Eigen::array({1})); + auto place = context.GetEigenDevice(); + Z.device(place) = XY / XX.sqrt() / YY.sqrt(); + } +}; + +template +class CosSimGradKernel : public framework::OpKernel { + public: + void Compute(const framework::ExecutionContext& context) const override { + auto* x = context.Input("X"); + auto* y = context.Input("Y"); + auto* z = context.Input("Out"); + auto* grad_x = context.Output(framework::GradVarName("X")); + auto* grad_y = context.Output(framework::GradVarName("Y")); + auto* grad_z = context.Input(framework::GradVarName("Out")); + + grad_x->mutable_data(context.GetPlace()); + grad_y->mutable_data(context.GetPlace()); + + auto dims = x->dims(); + int size = static_cast(framework::product(dims)); + auto new_dims = framework::make_ddim({dims[0], size / dims[0]}); + auto X = EigenMatrix::From(*x, new_dims); + auto Y = EigenMatrix::From(*y, new_dims); + auto Z = EigenMatrix::From(*z); + auto dX = EigenMatrix::From(*grad_x, new_dims); + auto dY = EigenMatrix::From(*grad_y, new_dims); + auto dZ = EigenMatrix::From(*grad_z); + + auto XX = (X * X).sum(Eigen::array({1})); + auto YY = (Y * Y).sum(Eigen::array({1})); + Eigen::DSizes bcast(1, dims[1]); + auto denominator_bcast = (XX.sqrt() * YY.sqrt()).broadcast(bcast); + auto Z_bcast = Z.broadcast(bcast); + auto dZ_bcast = dZ.broadcast(bcast); + auto place = context.GetEigenDevice(); + dX.device(place) = + dZ_bcast * (Y / denominator_bcast - Z_bcast * X / XX.broadcast(bcast)); + dY.device(place) = + dZ_bcast * (X / denominator_bcast - Z_bcast * Y / YY.broadcast(bcast)); + // dX.device(place) = X; + // Y.device(place) = Y; + } +}; + +} // namespace operators +} // namespace paddle diff --git a/paddle/pybind/pybind.cc b/paddle/pybind/pybind.cc index 3bc150ccb..a8ec35dfa 100644 --- a/paddle/pybind/pybind.cc +++ b/paddle/pybind/pybind.cc @@ -46,6 +46,7 @@ USE_OP(lookup_table); USE_OP(scale); USE_OP_ITSELF(identity); USE_OP(minus); +USE_OP(cos_sim); USE_CPU_ONLY_OP(gather); USE_CPU_ONLY_OP(scatter); diff --git a/python/paddle/v2/framework/tests/CMakeLists.txt b/python/paddle/v2/framework/tests/CMakeLists.txt index 661ebd896..e0f77d797 100644 --- a/python/paddle/v2/framework/tests/CMakeLists.txt +++ b/python/paddle/v2/framework/tests/CMakeLists.txt @@ -4,6 +4,7 @@ py_test(test_scope SRCS test_scope.py) py_test(test_tensor SRCS test_tensor.py) py_test(test_mul_op SRCS test_mul_op.py) +py_test(test_cos_sim_op SRCS test_cos_sim_op.py) py_test(test_mean_op SRCS test_mean_op.py) diff --git a/python/paddle/v2/framework/tests/gradient_checker.py b/python/paddle/v2/framework/tests/gradient_checker.py index 518f828ba..bf01ea487 100644 --- a/python/paddle/v2/framework/tests/gradient_checker.py +++ b/python/paddle/v2/framework/tests/gradient_checker.py @@ -36,13 +36,13 @@ def get_numeric_gradient(op, in_place=False): """ Get Numeric Gradient for an operator's input. - - :param op: C++ operator instance, could be an network - :param input_values: The input variables. Should be an dictionary, key is + + :param op: C++ operator instance, could be an network + :param input_values: The input variables. Should be an dictionary, key is variable name. Value is numpy array. - :param output_name: The final output variable name. + :param output_name: The final output variable name. :param input_to_check: The input variable need to get gradient. - :param delta: The perturbation value for numeric gradient method. The + :param delta: The perturbation value for numeric gradient method. The smaller delta is, the more accurate result will get. But if that delta is too small, it could occur numerical stability problem. :param local_scope: The local scope used for get_numeric_gradient. @@ -229,9 +229,9 @@ class GradientChecker(unittest.TestCase): """Use relative error for the comparison. :param numeric_grads: the numerical graidents. - :type numeric_grads: a list of numpy.array + :type numeric_grads: a list of numpy.array :param analytic_grads: the analytical graidents. - :type analytic_grads: a list of numpy.array + :type analytic_grads: a list of numpy.array :param name: the names of gradients, used to print for debug. :type names: a list of string :param msg_prefix: string info, used to print for debug. @@ -304,6 +304,13 @@ class GradientChecker(unittest.TestCase): # get analytical gradients according to different device analytic_grads = self.__get_gradient(forward_op, backward_op, input_vars, check_names, place) + #print(numeric_grads[0], numeric_grads[0].shape) + print("dim0: ", numeric_grads[0], numeric_grads[0].shape) + print("dim0: ", analytic_grads[0], analytic_grads[0].shape) + print("---------------------") + print("dim1: ", numeric_grads[1], numeric_grads[1].shape) + print("dim1: ", analytic_grads[1], analytic_grads[1].shape) + assert False self.__assert_is_close(numeric_grads, analytic_grads, check_names, max_relative_error, "Gradient Check On %s" % str(place)) diff --git a/python/paddle/v2/framework/tests/op_test_util.py b/python/paddle/v2/framework/tests/op_test_util.py index 3bc05a0fe..a4899355b 100644 --- a/python/paddle/v2/framework/tests/op_test_util.py +++ b/python/paddle/v2/framework/tests/op_test_util.py @@ -6,13 +6,13 @@ from paddle.v2.framework.op import Operator class OpTestMeta(type): """ Operator Test ClassMeta. - - It injects `test_all` method into user's OperatorTest class, to make Python + + It injects `test_all` method into user's OperatorTest class, to make Python unittest module run that method. - + The `test_all` read what value is stored in `self`. It use self's values to create and run a operator, and check whether that op is OK or not. - + See `test_add_two_op` for example usage. """ diff --git a/python/paddle/v2/framework/tests/test_cos_sim_op.py b/python/paddle/v2/framework/tests/test_cos_sim_op.py new file mode 100644 index 000000000..f3b04d25f --- /dev/null +++ b/python/paddle/v2/framework/tests/test_cos_sim_op.py @@ -0,0 +1,40 @@ +import unittest +import numpy as np +from gradient_checker import GradientChecker, create_op +from op_test_util import OpTestMeta + + +class TestCosSimOp(unittest.TestCase): + __metaclass__ = OpTestMeta + + def setUp(self): + self.type = "cos_sim" + self.inputs = { + 'X': np.random.random((32, 84)).astype("float32"), + 'Y': np.random.random((32, 84)).astype("float32") + } + expect = (self.inputs['X'] * self.inputs['Y']).sum(axis=1) / \ + np.linalg.norm(self.inputs['X'], axis=1) / \ + np.linalg.norm(self.inputs['Y'], axis=1) + expect = np.expand_dims(expect, 1) + self.outputs = {'Out': expect} + + +class CosSimGradOpTest(GradientChecker): + def test_cos_sim(self): + op = create_op("cos_sim") + #inputs = { + #'X': np.random.random((2, 2)).astype("float32"), + #'Y': np.random.random((2, 2)).astype("float32") + #} + inputs = { + 'X': np.array([[0.9, 0.6], [1.9, 1.6]]).astype("float32"), + 'Y': np.array([[0.7, 0.8], [1.7, 1.8]]).astype("float32") + } + print(inputs) + self.check_grad( + op, inputs, set(["X", "Y"]), "Out", max_relative_error=0.5) + + +if __name__ == '__main__': + unittest.main() -- GitLab