cos_sim_op.h 8.8 KB
Newer Older
X
Xinghai Sun 已提交
1 2
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.

L
Luo Tao 已提交
3 4 5
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
X
Xinghai Sun 已提交
6

L
Luo Tao 已提交
7
    http://www.apache.org/licenses/LICENSE-2.0
X
Xinghai Sun 已提交
8

L
Luo Tao 已提交
9 10 11 12 13
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
X
Xinghai Sun 已提交
14 15 16

#pragma once
#include "paddle/framework/op_registry.h"
17 18
#include "paddle/operators/math/math_function.h"
#include "paddle/platform/for_range.h"
X
Xinghai Sun 已提交
19 20 21 22 23 24

namespace paddle {
namespace operators {

using Tensor = framework::Tensor;

C
chengduoZH 已提交
25 26 27 28 29 30 31 32 33 34
template <typename T, bool same_row>
struct CosSimFunctor {
  CosSimFunctor(const T* x, const T* y, T* x_norm, T* y_norm, T* z, int cols)
      : x_norm_(x_norm),
        y_norm_(y_norm),
        x_(x),
        y_(y),
        z_(z),
        cols_(static_cast<size_t>(cols)) {}

C
refine  
chengduoZH 已提交
35 36 37
  inline HOSTDEVICE void operator()(size_t offset) const {
    auto* x = x_ + cols_ * offset;
    T xx = 0, xy = 0, yy = 0;
C
chengduoZH 已提交
38
    if (same_row) {
C
refine  
chengduoZH 已提交
39
      auto* y = y_ + cols_ * offset;
40
      T tep_x, tep_y;
C
chengduoZH 已提交
41
      for (size_t i = 0; i < cols_; ++i) {
42 43 44 45 46
        tep_x = x[i];
        tep_y = y[i];
        xx += tep_x * tep_x;
        yy += tep_y * tep_y;
        xy += tep_x * tep_y;
C
chengduoZH 已提交
47 48 49
      }
      xx = sqrt(xx);
      yy = sqrt(yy);
C
refine  
chengduoZH 已提交
50 51 52
      y_norm_[offset] = yy;
      x_norm_[offset] = xx;
      z_[offset] = xy / (xx * yy);
C
chengduoZH 已提交
53
    } else {  // This can be wrote in a better way.
54
      T tep_x, tep_y;
C
chengduoZH 已提交
55
      for (size_t i = 0; i < cols_; ++i) {
56 57 58 59 60
        tep_x = x[i];
        tep_y = y_[i];
        xx += tep_x * tep_x;
        yy += tep_y * tep_y;  // only need
        xy += tep_x * tep_y;
C
chengduoZH 已提交
61 62 63
      }
      xx = sqrt(xx);
      yy = sqrt(yy);
64
      if (offset == 0) y_norm_[0] = yy;
C
refine  
chengduoZH 已提交
65 66
      x_norm_[offset] = xx;
      z_[offset] = xy / (xx * yy);
C
chengduoZH 已提交
67 68
    }
  }
C
chengduoZH 已提交
69 70 71 72 73 74 75 76

  T* x_norm_;
  T* y_norm_;
  const T* x_;
  const T* y_;
  T* z_;
  const size_t cols_;
};
C
chengduoZH 已提交
77

Q
QI JUN 已提交
78
template <typename DeviceContext, typename T>
Y
Yu Yang 已提交
79
class CosSimKernel : public framework::OpKernel<T> {
X
Xinghai Sun 已提交
80 81
 public:
  void Compute(const framework::ExecutionContext& context) const override {
82 83 84 85 86 87 88 89 90
    // get Tensor
    auto* in_x = context.Input<Tensor>("X");
    auto* in_y = context.Input<Tensor>("Y");
    auto* out_z = context.Output<Tensor>("Out");
    auto* out_x_norm = context.Output<Tensor>("XNorm");
    auto* out_y_norm = context.Output<Tensor>("YNorm");
    out_z->mutable_data<T>(context.GetPlace());
    out_x_norm->mutable_data<T>(context.GetPlace());
    out_y_norm->mutable_data<T>(context.GetPlace());
X
Xinghai Sun 已提交
91

92 93
    int rows_x = in_x->dims()[0];
    int rows_y = in_y->dims()[0];
C
chengduoZH 已提交
94 95

    int cols = framework::product(in_x->dims()) / rows_x;
C
chengduoZH 已提交
96 97 98 99 100

    if (rows_x == rows_y) {
      CosSimFunctor<T, true> functor(
          in_x->data<T>(), in_y->data<T>(), out_x_norm->data<T>(),
          out_y_norm->data<T>(), out_z->data<T>(), cols);
101 102 103
      platform::ForRange<DeviceContext> for_range(
          static_cast<const DeviceContext&>(context.device_context()), rows_x);
      for_range(functor);
C
chengduoZH 已提交
104 105 106 107
    } else {
      CosSimFunctor<T, false> functor(
          in_x->data<T>(), in_y->data<T>(), out_x_norm->data<T>(),
          out_y_norm->data<T>(), out_z->data<T>(), cols);
108 109 110
      platform::ForRange<DeviceContext> for_range(
          static_cast<const DeviceContext&>(context.device_context()), rows_x);
      for_range(functor);
C
chengduoZH 已提交
111
    }
X
Xinghai Sun 已提交
112 113 114
  }
};

C
chengduoZH 已提交
115 116 117 118 119 120 121 122 123 124 125 126 127
template <typename T>
struct CosSimGradFunctor {
  CosSimGradFunctor(const T* x_norm, const T* y_norm, const T* x, const T* y,
                    const T* z, const T* dz, T* dx, int cols)
      : x_norm_(x_norm),
        y_norm_(y_norm),
        x_(x),
        y_(y),
        z_(z),
        dz_(dz),
        dx_(dx),
        cols_(static_cast<size_t>(cols)) {}

C
refine  
chengduoZH 已提交
128 129 130 131 132
  inline HOSTDEVICE void operator()(size_t offset) const {
    auto x_norm_square = x_norm_[offset] * x_norm_[offset];
    auto xy_norm_prod = x_norm_[offset] * y_norm_[offset];
    auto dz = dz_[offset];
    auto z = z_[offset];
C
chengduoZH 已提交
133

C
refine  
chengduoZH 已提交
134 135 136
    auto* dx = dx_ + cols_ * offset;
    auto* x = x_ + cols_ * offset;
    auto* y = y_ + cols_ * offset;
C
chengduoZH 已提交
137

C
chengduoZH 已提交
138 139
    auto reciprocal_xy_norm_prod = 1 / xy_norm_prod;
    auto reciprocal_x_norm_square = 1 / x_norm_square;
C
chengduoZH 已提交
140
    for (size_t i = 0; i < cols_; ++i) {
C
chengduoZH 已提交
141 142
      dx[i] = dz * (y[i] * reciprocal_xy_norm_prod -
                    z * x[i] * reciprocal_x_norm_square);
C
chengduoZH 已提交
143
    }
C
chengduoZH 已提交
144
  }
C
chengduoZH 已提交
145 146 147 148 149 150 151 152 153 154 155

  const T* x_norm_;
  const T* y_norm_;
  const T* x_;
  const T* y_;
  const T* z_;
  const T* dz_;
  T* dx_;
  const size_t cols_;
};

C
refine  
chengduoZH 已提交
156
template <typename T>
C
chengduoZH 已提交
157 158
struct CosSimDxFunctor {
  CosSimDxFunctor(const T* x_norm, const T* y_norm, const T* x, const T* y,
C
refine  
chengduoZH 已提交
159
                  const T* z, const T* dz, T* dx, int cols)
C
chengduoZH 已提交
160 161 162 163 164 165 166 167 168
      : x_norm_(x_norm),
        y_norm_(y_norm),
        x_(x),
        y_(y),
        z_(z),
        dz_(dz),
        dx_(dx),
        cols_(static_cast<size_t>(cols)) {}

C
refine  
chengduoZH 已提交
169 170 171 172 173
  inline HOSTDEVICE void operator()(size_t offset) const {
    auto xy_norm_prod = x_norm_[offset] * y_norm_[0];
    auto dz = dz_[offset];
    auto z = z_[offset];
    auto* x = x_ + cols_ * offset;
C
chengduoZH 已提交
174
    auto reciprocal_xy_norm_prod = 1 / xy_norm_prod;
C
refine  
chengduoZH 已提交
175 176 177
    auto x_norm_square = x_norm_[offset] * x_norm_[offset];
    auto* dx = dx_ + cols_ * offset;
    auto reciprocal_x_norm_square = 1 / x_norm_square;
C
chengduoZH 已提交
178

C
refine  
chengduoZH 已提交
179 180 181
    for (size_t i = 0; i < cols_; ++i) {
      dx[i] = dz * (y_[i] * reciprocal_xy_norm_prod -
                    z * x[i] * reciprocal_x_norm_square);
C
chengduoZH 已提交
182 183 184 185 186 187 188 189
    }
  }
  const T* x_norm_;
  const T* y_norm_;
  const T* x_;
  const T* y_;
  const T* z_;
  const T* dz_;
C
chengduoZH 已提交
190
  T* dx_;
C
chengduoZH 已提交
191 192
  const size_t cols_;
};
C
chengduoZH 已提交
193

194 195
template <typename DeviceContext, typename T>
struct CosSimDyFunctor {
C
chengduoZH 已提交
196 197 198 199
  inline void operator()(const DeviceContext& ctx, const T* x_norm,
                         const T* y_norm, const T* x, const T* y, const T* z,
                         const T* dz, const size_t rows, const size_t cols,
                         T* dy) const;
X
Xinghai Sun 已提交
200 201
};

Q
QI JUN 已提交
202
template <typename DeviceContext, typename T>
Y
Yu Yang 已提交
203
class CosSimGradKernel : public framework::OpKernel<T> {
X
Xinghai Sun 已提交
204 205
 public:
  void Compute(const framework::ExecutionContext& context) const override {
206 207 208 209 210 211 212 213 214
    // get Tensor
    auto* in_x = context.Input<Tensor>("X");
    auto* in_y = context.Input<Tensor>("Y");
    auto* in_z = context.Input<Tensor>("Out");
    auto* in_x_norm = context.Input<Tensor>("XNorm");
    auto* in_y_norm = context.Input<Tensor>("YNorm");
    auto* out_grad_x = context.Output<Tensor>(framework::GradVarName("X"));
    auto* out_grad_y = context.Output<Tensor>(framework::GradVarName("Y"));
    auto* in_grad_z = context.Input<Tensor>(framework::GradVarName("Out"));
X
Xinghai Sun 已提交
215

216
    // compute gradident
217 218 219
    int rows_x = in_x->dims()[0];
    int rows_y = in_y->dims()[0];
    int cols = framework::product(in_x->dims()) / rows_x;
C
chengduoZH 已提交
220

C
chengduoZH 已提交
221 222 223 224 225 226
    if (rows_x == rows_y) {
      if (out_grad_x) {
        CosSimGradFunctor<T> functor(
            in_x_norm->data<T>(), in_y_norm->data<T>(), in_x->data<T>(),
            in_y->data<T>(), in_z->data<T>(), in_grad_z->data<T>(),
            out_grad_x->mutable_data<T>(context.GetPlace()), cols);
227 228 229 230
        platform::ForRange<DeviceContext> for_range(
            static_cast<const DeviceContext&>(context.device_context()),
            rows_x);
        for_range(functor);
C
chengduoZH 已提交
231 232 233 234 235 236
      }
      if (out_grad_y) {
        CosSimGradFunctor<T> functor(
            in_y_norm->data<T>(), in_x_norm->data<T>(), in_y->data<T>(),
            in_x->data<T>(), in_z->data<T>(), in_grad_z->data<T>(),
            out_grad_y->mutable_data<T>(context.GetPlace()), cols);
237 238 239 240
        platform::ForRange<DeviceContext> for_range(
            static_cast<const DeviceContext&>(context.device_context()),
            rows_x);
        for_range(functor);
C
chengduoZH 已提交
241 242 243
      }
    } else {
      if (out_grad_x) {
C
refine  
chengduoZH 已提交
244
        CosSimDxFunctor<T> functor(
C
chengduoZH 已提交
245 246
            in_x_norm->data<T>(), in_y_norm->data<T>(), in_x->data<T>(),
            in_y->data<T>(), in_z->data<T>(), in_grad_z->data<T>(),
C
refine  
chengduoZH 已提交
247
            out_grad_x->mutable_data<T>(context.GetPlace()), cols);
248 249 250 251
        platform::ForRange<DeviceContext> for_range(
            static_cast<const DeviceContext&>(context.device_context()),
            rows_x);
        for_range(functor);
C
chengduoZH 已提交
252 253
      }
      if (out_grad_y) {
C
refine  
chengduoZH 已提交
254 255 256 257 258
        out_grad_y->mutable_data<T>(context.GetPlace());
        math::SetConstant<DeviceContext, T> set_zero;
        auto& dev_ctx = context.template device_context<DeviceContext>();
        set_zero(dev_ctx, out_grad_y, static_cast<T>(0));

C
chengduoZH 已提交
259 260 261 262 263
        CosSimDyFunctor<DeviceContext, T> functor;
        functor(dev_ctx, in_x_norm->data<T>(), in_y_norm->data<T>(),
                in_x->data<T>(), in_y->data<T>(), in_z->data<T>(),
                in_grad_z->data<T>(), static_cast<size_t>(rows_x),
                static_cast<size_t>(cols), out_grad_y->data<T>());
C
chengduoZH 已提交
264
      }
265
    }
X
Xinghai Sun 已提交
266 267 268 269 270
  }
};

}  // namespace operators
}  // namespace paddle