cos_sim_op.h 8.8 KB
Newer Older
X
Xinghai Sun 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.

   Licensed under the Apache License, Version 2.0 (the "License");
   you may not use this file except in compliance with the License.
   You may obtain a copy of the License at

   http://www.apache.org/licenses/LICENSE-2.0

   Unless required by applicable law or agreed to in writing, software
   distributed under the License is distributed on an "AS IS" BASIS,
   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
   See the License for the specific language governing permissions and
   limitations under the License. */

#pragma once
#include "paddle/framework/op_registry.h"
C
chengduoZH 已提交
17
#include "paddle/operators/elementwise_op_function.h"
X
Xinghai Sun 已提交
18 19 20 21 22 23

namespace paddle {
namespace operators {

using Tensor = framework::Tensor;

C
chengduoZH 已提交
24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47
template <typename IT1, typename IT2, typename Callback>
static void ForEachZip(IT1 begin1, IT1 last1, IT2 begin2, Callback callback) {
  for (; begin1 < last1; ++begin1, ++begin2) {
    callback(*begin1, *begin2);
  }
}

template <typename T, bool same_row>
struct CosSimFunctor {
  CosSimFunctor(const T* x, const T* y, T* x_norm, T* y_norm, T* z, int cols)
      : x_norm_(x_norm),
        y_norm_(y_norm),
        x_(x),
        y_(y),
        z_(z),
        cols_(static_cast<size_t>(cols)) {}

  inline void operator()(T& x_norm, T& y_norm) const {
    size_t x_offset = &x_norm - x_norm_;
    size_t y_offset = &y_norm - y_norm_;

    auto* x = x_ + cols_ * x_offset;

    T xx = 0, xy = 0;
C
chengduoZH 已提交
48
    T yy = 0;
C
chengduoZH 已提交
49 50 51 52 53 54 55 56 57 58 59 60
    if (same_row) {
      auto* y = y_ + cols_ * y_offset;
      for (size_t i = 0; i < cols_; ++i) {
        xx += x[i] * x[i];
        yy += y[i] * y[i];
        xy += x[i] * y[i];
      }
      xx = sqrt(xx);
      yy = sqrt(yy);
      x_norm_[x_offset] = xx;
      y_norm_[y_offset] = yy;
      z_[x_offset] = xy / (xx * yy);
C
chengduoZH 已提交
61
    } else {  // This can be wrote in a better way.
C
chengduoZH 已提交
62 63 64 65 66 67 68 69 70 71 72
      auto* y = y_;
      for (size_t i = 0; i < cols_; ++i) {
        xx += x[i] * x[i];
        yy += y[i] * y[i];  // only need
        xy += x[i] * y[i];
      }
      xx = sqrt(xx);
      yy = sqrt(yy);
      x_norm_[x_offset] = xx;
      y_norm_[0] = yy;
      z_[x_offset] = xy / (xx * yy);
C
chengduoZH 已提交
73 74
    }
  }
C
chengduoZH 已提交
75 76 77 78 79 80 81 82

  T* x_norm_;
  T* y_norm_;
  const T* x_;
  const T* y_;
  T* z_;
  const size_t cols_;
};
C
chengduoZH 已提交
83

Q
QI JUN 已提交
84
template <typename DeviceContext, typename T>
Y
Yu Yang 已提交
85
class CosSimKernel : public framework::OpKernel<T> {
X
Xinghai Sun 已提交
86 87
 public:
  void Compute(const framework::ExecutionContext& context) const override {
88 89 90 91 92 93 94 95 96
    // get Tensor
    auto* in_x = context.Input<Tensor>("X");
    auto* in_y = context.Input<Tensor>("Y");
    auto* out_z = context.Output<Tensor>("Out");
    auto* out_x_norm = context.Output<Tensor>("XNorm");
    auto* out_y_norm = context.Output<Tensor>("YNorm");
    out_z->mutable_data<T>(context.GetPlace());
    out_x_norm->mutable_data<T>(context.GetPlace());
    out_y_norm->mutable_data<T>(context.GetPlace());
X
Xinghai Sun 已提交
97

98 99
    int rows_x = in_x->dims()[0];
    int rows_y = in_y->dims()[0];
C
chengduoZH 已提交
100 101

    int cols = framework::product(in_x->dims()) / rows_x;
C
chengduoZH 已提交
102 103 104 105 106 107 108 109 110 111 112 113 114 115

    if (rows_x == rows_y) {
      CosSimFunctor<T, true> functor(
          in_x->data<T>(), in_y->data<T>(), out_x_norm->data<T>(),
          out_y_norm->data<T>(), out_z->data<T>(), cols);
      ForEachZip(out_x_norm->data<T>(), out_x_norm->data<T>() + rows_x,
                 out_y_norm->data<T>(), functor);
    } else {
      CosSimFunctor<T, false> functor(
          in_x->data<T>(), in_y->data<T>(), out_x_norm->data<T>(),
          out_y_norm->data<T>(), out_z->data<T>(), cols);
      ForEachZip(out_x_norm->data<T>(), out_x_norm->data<T>() + rows_x,
                 out_y_norm->data<T>(), functor);
    }
X
Xinghai Sun 已提交
116 117 118
  }
};

C
chengduoZH 已提交
119 120 121 122 123 124 125 126 127 128 129 130 131
template <typename T>
struct CosSimGradFunctor {
  CosSimGradFunctor(const T* x_norm, const T* y_norm, const T* x, const T* y,
                    const T* z, const T* dz, T* dx, int cols)
      : x_norm_(x_norm),
        y_norm_(y_norm),
        x_(x),
        y_(y),
        z_(z),
        dz_(dz),
        dx_(dx),
        cols_(static_cast<size_t>(cols)) {}

C
chengduoZH 已提交
132
  inline void operator()(const T& x_norm, const T& y_norm) const {
C
chengduoZH 已提交
133 134 135 136 137 138
    size_t x_offset = &x_norm - x_norm_;
    size_t y_offset = &y_norm - y_norm_;

    auto x_norm_square = x_norm_[x_offset] * x_norm_[x_offset];
    auto xy_norm_prod = x_norm_[x_offset] * y_norm_[y_offset];
    auto dz = dz_[x_offset];
C
chengduoZH 已提交
139
    auto z = z_[x_offset];
C
chengduoZH 已提交
140 141 142

    auto* dx = dx_ + cols_ * x_offset;
    auto* x = x_ + cols_ * x_offset;
C
chengduoZH 已提交
143

C
chengduoZH 已提交
144 145
    auto* y = y_ + cols_ * y_offset;

C
chengduoZH 已提交
146 147
    auto reciprocal_xy_norm_prod = 1 / xy_norm_prod;
    auto reciprocal_x_norm_square = 1 / x_norm_square;
C
chengduoZH 已提交
148
    for (size_t i = 0; i < cols_; ++i) {
C
chengduoZH 已提交
149 150
      dx[i] = dz * (y[i] * reciprocal_xy_norm_prod -
                    z * x[i] * reciprocal_x_norm_square);
C
chengduoZH 已提交
151
    }
C
chengduoZH 已提交
152
  }
C
chengduoZH 已提交
153 154 155 156 157 158 159 160 161 162 163

  const T* x_norm_;
  const T* y_norm_;
  const T* x_;
  const T* y_;
  const T* z_;
  const T* dz_;
  T* dx_;
  const size_t cols_;
};

C
chengduoZH 已提交
164
template <typename T, bool Dx>
C
chengduoZH 已提交
165 166
struct CosSimDxFunctor {
  CosSimDxFunctor(const T* x_norm, const T* y_norm, const T* x, const T* y,
C
chengduoZH 已提交
167
                  const T* z, const T* dz, T* dx, T* dy, int cols)
C
chengduoZH 已提交
168 169 170 171 172 173 174 175 176 177
      : x_norm_(x_norm),
        y_norm_(y_norm),
        x_(x),
        y_(y),
        z_(z),
        dz_(dz),
        dx_(dx),
        dy_(dy),
        cols_(static_cast<size_t>(cols)) {}

C
chengduoZH 已提交
178
  inline void operator()(const T& x_norm, const T& y_norm) const {
C
chengduoZH 已提交
179 180 181 182 183 184
    size_t x_offset = &x_norm - x_norm_;

    auto xy_norm_prod = x_norm_[x_offset] * y_norm_[0];
    auto dz = dz_[x_offset];
    auto z = z_[x_offset];
    auto* x = x_ + cols_ * x_offset;
C
chengduoZH 已提交
185
    auto reciprocal_xy_norm_prod = 1 / xy_norm_prod;
C
chengduoZH 已提交
186

C
chengduoZH 已提交
187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202
    if (Dx) {
      auto x_norm_square = x_norm_[x_offset] * x_norm_[x_offset];
      auto* dx = dx_ + cols_ * x_offset;
      auto* x = x_ + cols_ * x_offset;
      auto reciprocal_x_norm_square = 1 / x_norm_square;
      for (size_t i = 0; i < cols_; ++i) {
        dx[i] = dz * (y_[i] * reciprocal_xy_norm_prod -
                      z * x[i] * reciprocal_x_norm_square);
      }
    } else {
      auto y_norm_square = y_norm_[0] * y_norm_[0];
      auto reciprocal_y_norm_square = 1 / y_norm_square;
      for (size_t i = 0; i < cols_; ++i) {
        dy_[i] += dz * (x[i] * reciprocal_xy_norm_prod -
                        z * y_[i] * reciprocal_y_norm_square);
      }
C
chengduoZH 已提交
203 204 205 206 207 208 209 210 211
    }
  }

  const T* x_norm_;
  const T* y_norm_;
  const T* x_;
  const T* y_;
  const T* z_;
  const T* dz_;
C
chengduoZH 已提交
212
  T* dx_;
C
chengduoZH 已提交
213 214 215
  T* dy_;
  const size_t cols_;
};
C
chengduoZH 已提交
216

Q
QI JUN 已提交
217
template <typename DeviceContext, typename T>
Y
Yu Yang 已提交
218
class CosSimGradKernel : public framework::OpKernel<T> {
X
Xinghai Sun 已提交
219 220
 public:
  void Compute(const framework::ExecutionContext& context) const override {
221 222 223 224 225 226 227 228 229
    // get Tensor
    auto* in_x = context.Input<Tensor>("X");
    auto* in_y = context.Input<Tensor>("Y");
    auto* in_z = context.Input<Tensor>("Out");
    auto* in_x_norm = context.Input<Tensor>("XNorm");
    auto* in_y_norm = context.Input<Tensor>("YNorm");
    auto* out_grad_x = context.Output<Tensor>(framework::GradVarName("X"));
    auto* out_grad_y = context.Output<Tensor>(framework::GradVarName("Y"));
    auto* in_grad_z = context.Input<Tensor>(framework::GradVarName("Out"));
X
Xinghai Sun 已提交
230

231
    // compute gradident
232 233 234
    int rows_x = in_x->dims()[0];
    int rows_y = in_y->dims()[0];
    int cols = framework::product(in_x->dims()) / rows_x;
C
chengduoZH 已提交
235

C
chengduoZH 已提交
236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254
    if (rows_x == rows_y) {
      if (out_grad_x) {
        CosSimGradFunctor<T> functor(
            in_x_norm->data<T>(), in_y_norm->data<T>(), in_x->data<T>(),
            in_y->data<T>(), in_z->data<T>(), in_grad_z->data<T>(),
            out_grad_x->mutable_data<T>(context.GetPlace()), cols);
        ForEachZip(in_x_norm->data<T>(), in_x_norm->data<T>() + rows_x,
                   in_y_norm->data<T>(), functor);
      }
      if (out_grad_y) {
        CosSimGradFunctor<T> functor(
            in_y_norm->data<T>(), in_x_norm->data<T>(), in_y->data<T>(),
            in_x->data<T>(), in_z->data<T>(), in_grad_z->data<T>(),
            out_grad_y->mutable_data<T>(context.GetPlace()), cols);
        ForEachZip(in_y_norm->data<T>(), in_y_norm->data<T>() + rows_x,
                   in_x_norm->data<T>(), functor);
      }
    } else {
      if (out_grad_x) {
C
chengduoZH 已提交
255
        CosSimDxFunctor<T, true> functor(
C
chengduoZH 已提交
256 257
            in_x_norm->data<T>(), in_y_norm->data<T>(), in_x->data<T>(),
            in_y->data<T>(), in_z->data<T>(), in_grad_z->data<T>(),
C
chengduoZH 已提交
258
            out_grad_x->mutable_data<T>(context.GetPlace()), nullptr, cols);
C
chengduoZH 已提交
259 260 261 262
        ForEachZip(in_x_norm->data<T>(), in_x_norm->data<T>() + rows_x,
                   in_y_norm->data<T>(), functor);
      }
      if (out_grad_y) {
C
chengduoZH 已提交
263
        CosSimDxFunctor<T, false> functor(
C
chengduoZH 已提交
264
            in_x_norm->data<T>(), in_y_norm->data<T>(), in_x->data<T>(),
C
chengduoZH 已提交
265
            in_y->data<T>(), in_z->data<T>(), in_grad_z->data<T>(), nullptr,
C
chengduoZH 已提交
266 267 268 269
            out_grad_y->mutable_data<T>(context.GetPlace()), cols);
        ForEachZip(in_x_norm->data<T>(), in_x_norm->data<T>() + rows_x,
                   in_y_norm->data<T>(), functor);
      }
270
    }
X
Xinghai Sun 已提交
271 272 273 274 275
  }
};

}  // namespace operators
}  // namespace paddle