提交 9ee72367 编写于 作者: X xutianbing

Cosine Simlarity Forward Paddle Function.

上级 823a3f02
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "CosSimOp.h"
#include "paddle/math/Matrix.h"
#include "paddle/math/Vector.h"
namespace paddle {
template <>
void CosSimForward<DEVICE_TYPE_CPU>(CpuMatrix* out_mat,
const CpuMatrix* in1_mat,
const CpuMatrix* in2_mat,
real scale) {
CHECK(out_mat && in1_mat && in2_mat);
size_t num_samples = out_mat->getHeight();
size_t dim = in1_mat->getWidth();
/// column vector [nSamples, 1]
real* out = out_mat->getData();
const real* x = in1_mat->getData();
const real* y = in2_mat->getData();
/// in2 might only have one row or full rows
CHECK(in2_mat->getHeight() == 1LU || in2_mat->getHeight() == num_samples);
size_t inc = (in2_mat->getHeight() == 1LU) ? 0 : dim;
for (size_t i = 0; i < num_samples; ++i, x += dim, y += inc) {
/// for each row, todo(tianbing), use TensorExpression square2 ?
real square_sum_x = 0;
real square_sum_y = 0;
real xy = 0;
for (size_t j = 0; j < dim; ++j) {
square_sum_x += x[j] * x[j];
square_sum_y += y[j] * y[j];
xy += x[j] * y[j];
}
CHECK(square_sum_x > 0 && square_sum_y > 0);
out[i] = scale * xy / (std::sqrt(square_sum_x) * std::sqrt(square_sum_y));
}
}
/**
* \param inputs[0] input matrix 1, size: nSamples * dim.
* \param inputs[1] input matrix 2, size: n2 * dim (n2 == 1 or n2 == nSamples).
* \param outputs[0] output matrix, size : nSamples * 1.
*/
template <DeviceType Device>
class CosSimForwardFunc : public FunctionBase {
void init(const FuncConfig& config) override {
scale_ = config.get<real>("scale");
}
void calc(const Arguments& inputs,
const Arguments& outputs,
const Arguments& inouts) override {
CHECK_EQ(inputs.size(), 2);
CHECK_EQ(outputs.size(), 1);
CHECK_EQ(inouts.size(), 0);
CHECK_EQ(inputs[0].dims_[0], outputs[0].dims_[0]);
CHECK_EQ(inputs[0].dims_[1], inputs[1].dims_[1]);
CHECK_EQ(outputs[0].dims_[1], 1UL);
CHECK(outputs[0].getData() && inputs[0].getData() && inputs[1].getData());
auto out_mat = std::make_shared<typename MatrixT<Device>::type>(
outputs[0].getData(), outputs[0].dims_[0], outputs[0].dims_[1]);
const auto in1_mat = std::make_shared<typename MatrixT<Device>::type>(
inputs[0].getData(), inputs[0].dims_[0], inputs[0].dims_[1]);
const auto in2_mat = std::make_shared<typename MatrixT<Device>::type>(
inputs[1].getData(), inputs[1].dims_[0], inputs[1].dims_[1]);
CosSimForward<Device>(out_mat.get(), in1_mat.get(), in2_mat.get(), scale_);
}
private:
real scale_;
};
REGISTER_TYPED_FUNC(CosSimForward, CPU, CosSimForwardFunc);
#ifndef PADDLE_ONLY_CPU
REGISTER_TYPED_FUNC(CosSimForward, GPU, CosSimForwardFunc);
#endif
} // namespace paddle
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include "Function.h"
namespace paddle {
/**
* \brief Cosine Similarity Forward.
* for each row i,
* out[i] = scale * cos(in1[i], in2[i])
* = scale * \sum_j (in1[i][j] * in2[i][j]) /
* sqrt(sum_j (in1[i][j]^2) * sum_j (in2[i][j])^2)
*
* \param[out] output output data.
* \param[in] intput1 input data.
* \param[in] intput2 input data.
* \param[in] scale default 1.0.
*
*/
template <DeviceType Device>
void CosSimForward(typename MatrixT<Device>::type* output,
const typename MatrixT<Device>::type* input1,
const typename MatrixT<Device>::type* input2,
real scale);
} // namespace paddle
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "hl_base.h"
#include "CosSimOp.h"
namespace paddle {
template<int block_size>
__global__ void KeCosSim(real* output,
const real* input1,
const real* input2,
int width,
int input1_height,
int input2_height,
real scale) {
const int ty = blockIdx.y;
int tid = threadIdx.x;
__shared__ real xx[block_size];
__shared__ real yy[block_size];
__shared__ real xy[block_size];
xx[tid] = 0.0;
yy[tid] = 0.0;
xy[tid] = 0.0;
__syncthreads();
input1 += ty * width;
if (input2_height > 1) {
input2 += ty * width;
}
for (int index = tid; index < width; index += block_size) {
real x = input1[index];
real y = input2[index];
xx[tid] += x * x;
yy[tid] += y * y;
xy[tid] += x * y;
}
__syncthreads();
for (int s = block_size / 2; s > 0; s >>= 1) {
if (tid < s) {
xx[tid] += xx[tid + s];
yy[tid] += yy[tid + s];
xy[tid] += xy[tid + s];
}
__syncthreads();
}
if (tid == 0) {
output[ty] = scale * xy[0] / (sqrt(xx[0]) * sqrt(yy[0]));
}
}
void hlCossim(real* output,
const real* input1,
const real* input2,
size_t width,
size_t input1_height,
size_t input2_height,
real scale) {
CHECK_NOTNULL(output);
CHECK_NOTNULL(input1);
CHECK_NOTNULL(input2);
const int block_size = 256;
dim3 threads(block_size, 1);
dim3 grid(1, input1_height);
KeCosSim<block_size><<<grid, threads, 0, STREAM_DEFAULT>>>
(output, input1, input2, width, input1_height, input2_height, scale);
CHECK_SYNC("hl_cossim failed");
}
template <>
void CosSimForward<DEVICE_TYPE_GPU>(GpuMatrix* out_mat,
const GpuMatrix* in1_mat,
const GpuMatrix* in2_mat,
real scale) {
CHECK(out_mat && in1_mat && in2_mat);
CHECK(in1_mat->useGpu_ == true && in2_mat->useGpu_ == true)
<< "Matrix type are not GPU";
size_t numSamples = out_mat->getHeight();
size_t dim = in1_mat->getWidth();
real* out = out_mat->getData();
const real* x = in1_mat->getData();
const real* y = in2_mat->getData();
hlCossim(out, x, y, dim, in1_mat->getHeight(), in2_mat->getHeight(), scale);
}
} // namespace paddle
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include <gtest/gtest.h>
#include "FunctionTest.h"
#include "paddle/math/Matrix.h"
using namespace paddle; // NOLINT
void testCosSimForward(size_t height_x,
size_t height_y,
size_t width,
real scale) {
FunctionCompare compare("CosSimForward", FuncConfig().set("scale", scale));
CpuMatrix cpu_arg1(height_x, width);
CpuMatrix gpu_arg1(height_x, width);
CpuMatrix cpu_arg2(height_y, width);
CpuMatrix gpu_arg2(height_y, width);
cpu_arg1.randomizeUniform();
gpu_arg1.copyFrom(cpu_arg1);
cpu_arg2.randomizeUniform();
cpu_arg2.add(-0.5);
gpu_arg2.copyFrom(cpu_arg2);
CpuMatrix cpu_out(height_x, 1);
GpuMatrix gpu_out(height_x, 1);
compare.getCpuFunction()->calc(
{Tensor(cpu_arg1.getData(), Dims{height_x, width}),
Tensor(cpu_arg2.getData(), Dims{height_y, width})},
{Tensor(cpu_out.getData(), Dims{height_x, 1})},
{});
compare.getGpuFunction()->calc(
{Tensor(gpu_arg1.getData(), Dims{height_x, width}),
Tensor(gpu_arg2.getData(), Dims{height_y, width})},
{Tensor(gpu_out.getData(), Dims{height_x, 1})},
{});
autotest::TensorCheckErr(cpu_out, gpu_out);
}
TEST(Matrix, cosSim) {
for (auto height_x : {10, 100, 1000}) {
for (auto height_y : {1, height_x}) {
for (auto width : {10, 100, 1000}) {
for (auto scale : {1.0, 2.0}) {
testCosSimForward(height_x, height_y, width, scale);
}
}
}
}
}
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册