diff --git a/paddle/function/CosSimOp.cpp b/paddle/function/CosSimOp.cpp new file mode 100644 index 0000000000000000000000000000000000000000..c1473a19ede5c438de479cbf4109c0379cb32393 --- /dev/null +++ b/paddle/function/CosSimOp.cpp @@ -0,0 +1,93 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "CosSimOp.h" +#include "paddle/math/Matrix.h" +#include "paddle/math/Vector.h" + +namespace paddle { +template <> +void CosSimForward(CpuMatrix* out_mat, + const CpuMatrix* in1_mat, + const CpuMatrix* in2_mat, + real scale) { + CHECK(out_mat && in1_mat && in2_mat); + size_t num_samples = out_mat->getHeight(); + size_t dim = in1_mat->getWidth(); + /// column vector [nSamples, 1] + real* out = out_mat->getData(); + const real* x = in1_mat->getData(); + const real* y = in2_mat->getData(); + + /// in2 might only have one row or full rows + CHECK(in2_mat->getHeight() == 1LU || in2_mat->getHeight() == num_samples); + size_t inc = (in2_mat->getHeight() == 1LU) ? 0 : dim; + for (size_t i = 0; i < num_samples; ++i, x += dim, y += inc) { + /// for each row, todo(tianbing), use TensorExpression square2 ? + real square_sum_x = 0; + real square_sum_y = 0; + real xy = 0; + for (size_t j = 0; j < dim; ++j) { + square_sum_x += x[j] * x[j]; + square_sum_y += y[j] * y[j]; + xy += x[j] * y[j]; + } + CHECK(square_sum_x > 0 && square_sum_y > 0); + out[i] = scale * xy / (std::sqrt(square_sum_x) * std::sqrt(square_sum_y)); + } +} + +/** + * \param inputs[0] input matrix 1, size: nSamples * dim. + * \param inputs[1] input matrix 2, size: n2 * dim (n2 == 1 or n2 == nSamples). + * \param outputs[0] output matrix, size : nSamples * 1. + */ + +template +class CosSimForwardFunc : public FunctionBase { + void init(const FuncConfig& config) override { + scale_ = config.get("scale"); + } + + void calc(const Arguments& inputs, + const Arguments& outputs, + const Arguments& inouts) override { + CHECK_EQ(inputs.size(), 2); + CHECK_EQ(outputs.size(), 1); + CHECK_EQ(inouts.size(), 0); + + CHECK_EQ(inputs[0].dims_[0], outputs[0].dims_[0]); + CHECK_EQ(inputs[0].dims_[1], inputs[1].dims_[1]); + CHECK_EQ(outputs[0].dims_[1], 1UL); + + CHECK(outputs[0].getData() && inputs[0].getData() && inputs[1].getData()); + auto out_mat = std::make_shared::type>( + outputs[0].getData(), outputs[0].dims_[0], outputs[0].dims_[1]); + const auto in1_mat = std::make_shared::type>( + inputs[0].getData(), inputs[0].dims_[0], inputs[0].dims_[1]); + const auto in2_mat = std::make_shared::type>( + inputs[1].getData(), inputs[1].dims_[0], inputs[1].dims_[1]); + + CosSimForward(out_mat.get(), in1_mat.get(), in2_mat.get(), scale_); + } + +private: + real scale_; +}; + +REGISTER_TYPED_FUNC(CosSimForward, CPU, CosSimForwardFunc); +#ifndef PADDLE_ONLY_CPU +REGISTER_TYPED_FUNC(CosSimForward, GPU, CosSimForwardFunc); +#endif +} // namespace paddle diff --git a/paddle/function/CosSimOp.h b/paddle/function/CosSimOp.h new file mode 100644 index 0000000000000000000000000000000000000000..02250d6db9c644549589c0cf8f2cc110d5f740c3 --- /dev/null +++ b/paddle/function/CosSimOp.h @@ -0,0 +1,40 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#pragma once + +#include "Function.h" + +namespace paddle { + +/** + * \brief Cosine Similarity Forward. + * for each row i, + * out[i] = scale * cos(in1[i], in2[i]) + * = scale * \sum_j (in1[i][j] * in2[i][j]) / + * sqrt(sum_j (in1[i][j]^2) * sum_j (in2[i][j])^2) + * + * \param[out] output output data. + * \param[in] intput1 input data. + * \param[in] intput2 input data. + * \param[in] scale default 1.0. + * + */ +template +void CosSimForward(typename MatrixT::type* output, + const typename MatrixT::type* input1, + const typename MatrixT::type* input2, + real scale); + +} // namespace paddle diff --git a/paddle/function/CosSimOpGpu.cu b/paddle/function/CosSimOpGpu.cu new file mode 100644 index 0000000000000000000000000000000000000000..34835fa5d87c398faa3f52769d317995bbc44bfa --- /dev/null +++ b/paddle/function/CosSimOpGpu.cu @@ -0,0 +1,102 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "hl_base.h" +#include "CosSimOp.h" + +namespace paddle { + +template +__global__ void KeCosSim(real* output, + const real* input1, + const real* input2, + int width, + int input1_height, + int input2_height, + real scale) { + const int ty = blockIdx.y; + int tid = threadIdx.x; + + __shared__ real xx[block_size]; + __shared__ real yy[block_size]; + __shared__ real xy[block_size]; + + xx[tid] = 0.0; + yy[tid] = 0.0; + xy[tid] = 0.0; + __syncthreads(); + + input1 += ty * width; + if (input2_height > 1) { + input2 += ty * width; + } + for (int index = tid; index < width; index += block_size) { + real x = input1[index]; + real y = input2[index]; + xx[tid] += x * x; + yy[tid] += y * y; + xy[tid] += x * y; + } + __syncthreads(); + + for (int s = block_size / 2; s > 0; s >>= 1) { + if (tid < s) { + xx[tid] += xx[tid + s]; + yy[tid] += yy[tid + s]; + xy[tid] += xy[tid + s]; + } + __syncthreads(); + } + if (tid == 0) { + output[ty] = scale * xy[0] / (sqrt(xx[0]) * sqrt(yy[0])); + } +} + +void hlCossim(real* output, + const real* input1, + const real* input2, + size_t width, + size_t input1_height, + size_t input2_height, + real scale) { + CHECK_NOTNULL(output); + CHECK_NOTNULL(input1); + CHECK_NOTNULL(input2); + const int block_size = 256; + dim3 threads(block_size, 1); + dim3 grid(1, input1_height); + + KeCosSim<<>> + (output, input1, input2, width, input1_height, input2_height, scale); + CHECK_SYNC("hl_cossim failed"); +} + +template <> +void CosSimForward(GpuMatrix* out_mat, + const GpuMatrix* in1_mat, + const GpuMatrix* in2_mat, + real scale) { + CHECK(out_mat && in1_mat && in2_mat); + CHECK(in1_mat->useGpu_ == true && in2_mat->useGpu_ == true) + << "Matrix type are not GPU"; + + size_t numSamples = out_mat->getHeight(); + size_t dim = in1_mat->getWidth(); + real* out = out_mat->getData(); + const real* x = in1_mat->getData(); + const real* y = in2_mat->getData(); + hlCossim(out, x, y, dim, in1_mat->getHeight(), in2_mat->getHeight(), scale); +} + +} // namespace paddle diff --git a/paddle/function/CosSimOpTest.cpp b/paddle/function/CosSimOpTest.cpp new file mode 100644 index 0000000000000000000000000000000000000000..49c54620feb3557ae60c2ea624e5f0f2e5934149 --- /dev/null +++ b/paddle/function/CosSimOpTest.cpp @@ -0,0 +1,63 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include +#include "FunctionTest.h" +#include "paddle/math/Matrix.h" + +using namespace paddle; // NOLINT + +void testCosSimForward(size_t height_x, + size_t height_y, + size_t width, + real scale) { + FunctionCompare compare("CosSimForward", FuncConfig().set("scale", scale)); + + CpuMatrix cpu_arg1(height_x, width); + CpuMatrix gpu_arg1(height_x, width); + CpuMatrix cpu_arg2(height_y, width); + CpuMatrix gpu_arg2(height_y, width); + cpu_arg1.randomizeUniform(); + gpu_arg1.copyFrom(cpu_arg1); + cpu_arg2.randomizeUniform(); + cpu_arg2.add(-0.5); + gpu_arg2.copyFrom(cpu_arg2); + CpuMatrix cpu_out(height_x, 1); + GpuMatrix gpu_out(height_x, 1); + + compare.getCpuFunction()->calc( + {Tensor(cpu_arg1.getData(), Dims{height_x, width}), + Tensor(cpu_arg2.getData(), Dims{height_y, width})}, + {Tensor(cpu_out.getData(), Dims{height_x, 1})}, + {}); + compare.getGpuFunction()->calc( + {Tensor(gpu_arg1.getData(), Dims{height_x, width}), + Tensor(gpu_arg2.getData(), Dims{height_y, width})}, + {Tensor(gpu_out.getData(), Dims{height_x, 1})}, + {}); + + autotest::TensorCheckErr(cpu_out, gpu_out); +} + +TEST(Matrix, cosSim) { + for (auto height_x : {10, 100, 1000}) { + for (auto height_y : {1, height_x}) { + for (auto width : {10, 100, 1000}) { + for (auto scale : {1.0, 2.0}) { + testCosSimForward(height_x, height_y, width, scale); + } + } + } + } +}