cos_sim_op.h 8.8 KB
Newer Older
X
Xinghai Sun 已提交
1 2
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.

L
Luo Tao 已提交
3 4 5
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
X
Xinghai Sun 已提交
6

L
Luo Tao 已提交
7
    http://www.apache.org/licenses/LICENSE-2.0
X
Xinghai Sun 已提交
8

L
Luo Tao 已提交
9 10 11 12 13
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
X
Xinghai Sun 已提交
14 15 16

#pragma once
#include "paddle/framework/op_registry.h"
17 18
#include "paddle/operators/math/math_function.h"
#include "paddle/platform/for_range.h"
X
Xinghai Sun 已提交
19 20 21 22 23 24

namespace paddle {
namespace operators {

using Tensor = framework::Tensor;

C
chengduoZH 已提交
25 26 27 28 29 30 31 32 33 34
template <typename T, bool same_row>
struct CosSimFunctor {
  CosSimFunctor(const T* x, const T* y, T* x_norm, T* y_norm, T* z, int cols)
      : x_norm_(x_norm),
        y_norm_(y_norm),
        x_(x),
        y_(y),
        z_(z),
        cols_(static_cast<size_t>(cols)) {}

C
refine  
chengduoZH 已提交
35 36 37
  inline HOSTDEVICE void operator()(size_t offset) const {
    auto* x = x_ + cols_ * offset;
    T xx = 0, xy = 0, yy = 0;
C
chengduoZH 已提交
38
    if (same_row) {
C
refine  
chengduoZH 已提交
39
      auto* y = y_ + cols_ * offset;
40
      T tep_x, tep_y;
C
chengduoZH 已提交
41
      for (size_t i = 0; i < cols_; ++i) {
42 43 44 45 46
        tep_x = x[i];
        tep_y = y[i];
        xx += tep_x * tep_x;
        yy += tep_y * tep_y;
        xy += tep_x * tep_y;
C
chengduoZH 已提交
47 48 49
      }
      xx = sqrt(xx);
      yy = sqrt(yy);
C
refine  
chengduoZH 已提交
50 51 52
      y_norm_[offset] = yy;
      x_norm_[offset] = xx;
      z_[offset] = xy / (xx * yy);
C
chengduoZH 已提交
53
    } else {  // This can be wrote in a better way.
54
      T tep_x, tep_y;
C
chengduoZH 已提交
55
      for (size_t i = 0; i < cols_; ++i) {
56 57 58 59 60
        tep_x = x[i];
        tep_y = y_[i];
        xx += tep_x * tep_x;
        yy += tep_y * tep_y;  // only need
        xy += tep_x * tep_y;
C
chengduoZH 已提交
61 62 63
      }
      xx = sqrt(xx);
      yy = sqrt(yy);
64
      if (offset == 0) y_norm_[0] = yy;
C
refine  
chengduoZH 已提交
65 66
      x_norm_[offset] = xx;
      z_[offset] = xy / (xx * yy);
C
chengduoZH 已提交
67 68
    }
  }
C
chengduoZH 已提交
69 70 71 72 73 74 75 76

  T* x_norm_;
  T* y_norm_;
  const T* x_;
  const T* y_;
  T* z_;
  const size_t cols_;
};
C
chengduoZH 已提交
77

Q
QI JUN 已提交
78
template <typename DeviceContext, typename T>
Y
Yu Yang 已提交
79
class CosSimKernel : public framework::OpKernel<T> {
X
Xinghai Sun 已提交
80 81
 public:
  void Compute(const framework::ExecutionContext& context) const override {
82 83 84 85 86 87 88 89 90
    // get Tensor
    auto* in_x = context.Input<Tensor>("X");
    auto* in_y = context.Input<Tensor>("Y");
    auto* out_z = context.Output<Tensor>("Out");
    auto* out_x_norm = context.Output<Tensor>("XNorm");
    auto* out_y_norm = context.Output<Tensor>("YNorm");
    out_z->mutable_data<T>(context.GetPlace());
    out_x_norm->mutable_data<T>(context.GetPlace());
    out_y_norm->mutable_data<T>(context.GetPlace());
X
Xinghai Sun 已提交
91

92 93
    int rows_x = in_x->dims()[0];
    int rows_y = in_y->dims()[0];
C
chengduoZH 已提交
94 95

    int cols = framework::product(in_x->dims()) / rows_x;
C
chengduoZH 已提交
96 97 98 99 100

    if (rows_x == rows_y) {
      CosSimFunctor<T, true> functor(
          in_x->data<T>(), in_y->data<T>(), out_x_norm->data<T>(),
          out_y_norm->data<T>(), out_z->data<T>(), cols);
101 102 103
      platform::ForRange<DeviceContext> for_range(
          static_cast<const DeviceContext&>(context.device_context()), rows_x);
      for_range(functor);
C
chengduoZH 已提交
104 105 106 107
    } else {
      CosSimFunctor<T, false> functor(
          in_x->data<T>(), in_y->data<T>(), out_x_norm->data<T>(),
          out_y_norm->data<T>(), out_z->data<T>(), cols);
108 109 110
      platform::ForRange<DeviceContext> for_range(
          static_cast<const DeviceContext&>(context.device_context()), rows_x);
      for_range(functor);
C
chengduoZH 已提交
111
    }
X
Xinghai Sun 已提交
112 113 114
  }
};

C
chengduoZH 已提交
115 116 117 118 119 120 121 122 123 124 125 126 127
template <typename T>
struct CosSimGradFunctor {
  CosSimGradFunctor(const T* x_norm, const T* y_norm, const T* x, const T* y,
                    const T* z, const T* dz, T* dx, int cols)
      : x_norm_(x_norm),
        y_norm_(y_norm),
        x_(x),
        y_(y),
        z_(z),
        dz_(dz),
        dx_(dx),
        cols_(static_cast<size_t>(cols)) {}

C
refine  
chengduoZH 已提交
128 129 130 131 132
  inline HOSTDEVICE void operator()(size_t offset) const {
    auto x_norm_square = x_norm_[offset] * x_norm_[offset];
    auto xy_norm_prod = x_norm_[offset] * y_norm_[offset];
    auto dz = dz_[offset];
    auto z = z_[offset];
C
chengduoZH 已提交
133

C
refine  
chengduoZH 已提交
134 135 136
    auto* dx = dx_ + cols_ * offset;
    auto* x = x_ + cols_ * offset;
    auto* y = y_ + cols_ * offset;
C
chengduoZH 已提交
137

C
chengduoZH 已提交
138 139
    auto reciprocal_xy_norm_prod = 1 / xy_norm_prod;
    auto reciprocal_x_norm_square = 1 / x_norm_square;
C
chengduoZH 已提交
140
    for (size_t i = 0; i < cols_; ++i) {
C
chengduoZH 已提交
141 142
      dx[i] = dz * (y[i] * reciprocal_xy_norm_prod -
                    z * x[i] * reciprocal_x_norm_square);
C
chengduoZH 已提交
143
    }
C
chengduoZH 已提交
144
  }
C
chengduoZH 已提交
145 146 147 148 149 150 151 152 153 154 155

  const T* x_norm_;
  const T* y_norm_;
  const T* x_;
  const T* y_;
  const T* z_;
  const T* dz_;
  T* dx_;
  const size_t cols_;
};

C
refine  
chengduoZH 已提交
156
template <typename T>
C
chengduoZH 已提交
157 158
struct CosSimDxFunctor {
  CosSimDxFunctor(const T* x_norm, const T* y_norm, const T* x, const T* y,
C
refine  
chengduoZH 已提交
159
                  const T* z, const T* dz, T* dx, int cols)
C
chengduoZH 已提交
160 161 162 163 164 165 166 167 168
      : x_norm_(x_norm),
        y_norm_(y_norm),
        x_(x),
        y_(y),
        z_(z),
        dz_(dz),
        dx_(dx),
        cols_(static_cast<size_t>(cols)) {}

C
refine  
chengduoZH 已提交
169 170 171 172 173
  inline HOSTDEVICE void operator()(size_t offset) const {
    auto xy_norm_prod = x_norm_[offset] * y_norm_[0];
    auto dz = dz_[offset];
    auto z = z_[offset];
    auto* x = x_ + cols_ * offset;
C
chengduoZH 已提交
174
    auto reciprocal_xy_norm_prod = 1 / xy_norm_prod;
C
refine  
chengduoZH 已提交
175 176 177
    auto x_norm_square = x_norm_[offset] * x_norm_[offset];
    auto* dx = dx_ + cols_ * offset;
    auto reciprocal_x_norm_square = 1 / x_norm_square;
C
chengduoZH 已提交
178

C
refine  
chengduoZH 已提交
179 180 181
    for (size_t i = 0; i < cols_; ++i) {
      dx[i] = dz * (y_[i] * reciprocal_xy_norm_prod -
                    z * x[i] * reciprocal_x_norm_square);
C
chengduoZH 已提交
182 183 184 185 186 187 188 189
    }
  }
  const T* x_norm_;
  const T* y_norm_;
  const T* x_;
  const T* y_;
  const T* z_;
  const T* dz_;
C
chengduoZH 已提交
190
  T* dx_;
C
chengduoZH 已提交
191 192
  const size_t cols_;
};
C
chengduoZH 已提交
193

194 195 196 197 198
template <typename DeviceContext, typename T>
struct CosSimDyFunctor {
  CosSimDyFunctor(const T* x_norm, const T* y_norm, const T* x, const T* y,
                  const T* z, const T* dz, T* dy, int cols);
  inline HOSTDEVICE void operator()(size_t) const;
X
Xinghai Sun 已提交
199 200
};

Q
QI JUN 已提交
201
template <typename DeviceContext, typename T>
Y
Yu Yang 已提交
202
class CosSimGradKernel : public framework::OpKernel<T> {
X
Xinghai Sun 已提交
203 204
 public:
  void Compute(const framework::ExecutionContext& context) const override {
205 206 207 208 209 210 211 212 213
    // get Tensor
    auto* in_x = context.Input<Tensor>("X");
    auto* in_y = context.Input<Tensor>("Y");
    auto* in_z = context.Input<Tensor>("Out");
    auto* in_x_norm = context.Input<Tensor>("XNorm");
    auto* in_y_norm = context.Input<Tensor>("YNorm");
    auto* out_grad_x = context.Output<Tensor>(framework::GradVarName("X"));
    auto* out_grad_y = context.Output<Tensor>(framework::GradVarName("Y"));
    auto* in_grad_z = context.Input<Tensor>(framework::GradVarName("Out"));
X
Xinghai Sun 已提交
214

215
    // compute gradident
216 217 218
    int rows_x = in_x->dims()[0];
    int rows_y = in_y->dims()[0];
    int cols = framework::product(in_x->dims()) / rows_x;
C
chengduoZH 已提交
219

C
chengduoZH 已提交
220 221 222 223 224 225
    if (rows_x == rows_y) {
      if (out_grad_x) {
        CosSimGradFunctor<T> functor(
            in_x_norm->data<T>(), in_y_norm->data<T>(), in_x->data<T>(),
            in_y->data<T>(), in_z->data<T>(), in_grad_z->data<T>(),
            out_grad_x->mutable_data<T>(context.GetPlace()), cols);
226 227 228 229
        platform::ForRange<DeviceContext> for_range(
            static_cast<const DeviceContext&>(context.device_context()),
            rows_x);
        for_range(functor);
C
chengduoZH 已提交
230 231 232 233 234 235
      }
      if (out_grad_y) {
        CosSimGradFunctor<T> functor(
            in_y_norm->data<T>(), in_x_norm->data<T>(), in_y->data<T>(),
            in_x->data<T>(), in_z->data<T>(), in_grad_z->data<T>(),
            out_grad_y->mutable_data<T>(context.GetPlace()), cols);
236 237 238 239
        platform::ForRange<DeviceContext> for_range(
            static_cast<const DeviceContext&>(context.device_context()),
            rows_x);
        for_range(functor);
C
chengduoZH 已提交
240 241 242
      }
    } else {
      if (out_grad_x) {
C
refine  
chengduoZH 已提交
243
        CosSimDxFunctor<T> functor(
C
chengduoZH 已提交
244 245
            in_x_norm->data<T>(), in_y_norm->data<T>(), in_x->data<T>(),
            in_y->data<T>(), in_z->data<T>(), in_grad_z->data<T>(),
C
refine  
chengduoZH 已提交
246
            out_grad_x->mutable_data<T>(context.GetPlace()), cols);
247 248 249 250
        platform::ForRange<DeviceContext> for_range(
            static_cast<const DeviceContext&>(context.device_context()),
            rows_x);
        for_range(functor);
C
chengduoZH 已提交
251 252
      }
      if (out_grad_y) {
C
refine  
chengduoZH 已提交
253 254 255 256 257 258
        out_grad_y->mutable_data<T>(context.GetPlace());
        math::SetConstant<DeviceContext, T> set_zero;
        auto& dev_ctx = context.template device_context<DeviceContext>();
        set_zero(dev_ctx, out_grad_y, static_cast<T>(0));

        CosSimDyFunctor<DeviceContext, T> functor(
C
chengduoZH 已提交
259
            in_x_norm->data<T>(), in_y_norm->data<T>(), in_x->data<T>(),
C
refine  
chengduoZH 已提交
260 261
            in_y->data<T>(), in_z->data<T>(), in_grad_z->data<T>(),
            out_grad_y->data<T>(), cols);
262 263 264 265
        platform::ForRange<DeviceContext> for_range(
            static_cast<const DeviceContext&>(context.device_context()),
            rows_x);
        for_range(functor);
C
chengduoZH 已提交
266
      }
267
    }
X
Xinghai Sun 已提交
268 269 270 271 272
  }
};

}  // namespace operators
}  // namespace paddle