math_function.cc 11.1 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

    http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */

15
#include "paddle/phi/kernels/funcs/math_function.h"
16
#include "paddle/phi/core/utils/visit_place.h"
17 18

#ifdef PADDLE_WITH_MKLML
19
#include "paddle/phi/backends/dynload/mklml.h"
20 21 22 23 24 25 26 27 28
#endif

#ifdef PADDLE_USE_OPENBLAS
#include <cblas.h>
#endif

#include <memory>
#include <utility>
#include <vector>
29

30
#include "paddle/phi/backends/cpu/cpu_context.h"
31
#include "paddle/phi/common/bfloat16.h"
32
#include "paddle/phi/common/data_type.h"
33
#include "paddle/phi/common/float16.h"
34 35
#include "paddle/phi/kernels/funcs/eigen/common.h"
#include "paddle/phi/kernels/funcs/math_function_impl.h"
36
#include "unsupported/Eigen/CXX11/Tensor"
37 38 39 40
#ifdef PADDLE_WITH_CUSTOM_DEVICE
#include "paddle/phi/api/lib/kernel_dispatch.h"
#include "paddle/phi/core/kernel_factory.h"
#endif
41

42
namespace phi {
43 44
namespace funcs {

45
using float16 = phi::dtype::float16;
46

47 48 49 50 51 52 53 54 55
template struct SetConstant<phi::CPUContext, phi::dtype::float16>;
template struct SetConstant<phi::CPUContext, phi::dtype::bfloat16>;
template struct SetConstant<phi::CPUContext, float>;
template struct SetConstant<phi::CPUContext, double>;
template struct SetConstant<phi::CPUContext, int16_t>;
template struct SetConstant<phi::CPUContext, int>;
template struct SetConstant<phi::CPUContext, int64_t>;
template struct SetConstant<phi::CPUContext, bool>;
template struct SetConstant<phi::CPUContext, uint8_t>;
W
wanghuancoder 已提交
56
template struct SetConstant<phi::CPUContext, int8_t>;
57 58
template struct SetConstant<phi::CPUContext, phi::dtype::complex<float>>;
template struct SetConstant<phi::CPUContext, phi::dtype::complex<double>>;
59 60

#ifdef PADDLE_WITH_XPU
H
houj04 已提交
61 62 63 64 65
template struct SetConstant<phi::XPUContext, phi::dtype::float16>;
template struct SetConstant<phi::XPUContext, phi::dtype::bfloat16>;
template struct SetConstant<phi::XPUContext, float>;
template struct SetConstant<phi::XPUContext, double>;
template struct SetConstant<phi::XPUContext, uint8_t>;
W
wanghuancoder 已提交
66
template struct SetConstant<phi::XPUContext, int8_t>;
H
houj04 已提交
67 68 69 70 71 72 73
template struct SetConstant<phi::XPUContext, int16_t>;
template struct SetConstant<phi::XPUContext, int>;
template struct SetConstant<phi::XPUContext, int64_t>;
template struct SetConstant<phi::XPUContext, bool>;
template struct SetConstant<phi::XPUContext, phi::dtype::complex<float>>;
template struct SetConstant<phi::XPUContext, phi::dtype::complex<double>>;

74 75
#endif

L
Leo Chen 已提交
76 77 78 79 80 81 82 83 84 85 86 87 88 89
#define DEFINE_CPU_TRANS(RANK)                                            \
  template struct Transpose<phi::CPUContext, phi::dtype::float16, RANK>;  \
  template struct Transpose<phi::CPUContext, phi::dtype::bfloat16, RANK>; \
  template struct Transpose<phi::CPUContext, float, RANK>;                \
  template struct Transpose<phi::CPUContext, double, RANK>;               \
  template struct Transpose<phi::CPUContext, int, RANK>;                  \
  template struct Transpose<phi::CPUContext, int64_t, RANK>;              \
  template struct Transpose<phi::CPUContext, bool, RANK>;                 \
  template struct Transpose<phi::CPUContext, int16_t, RANK>;              \
  template struct Transpose<phi::CPUContext, uint8_t, RANK>;              \
  template struct Transpose<phi::CPUContext, int8_t, RANK>;               \
  template struct Transpose<phi::CPUContext,                              \
                            phi::dtype::complex<float>,                   \
                            RANK>;                                        \
90
  template struct Transpose<phi::CPUContext, phi::dtype::complex<double>, RANK>;
91 92 93 94 95 96 97 98

DEFINE_CPU_TRANS(1);
DEFINE_CPU_TRANS(2);
DEFINE_CPU_TRANS(3);
DEFINE_CPU_TRANS(4);
DEFINE_CPU_TRANS(5);
DEFINE_CPU_TRANS(6);

99 100
template <typename DeviceContext, typename T>
void TransposeNormal<DeviceContext, T>::operator()(
101
    const DeviceContext& context UNUSED,
102 103
    const phi::DenseTensor& in,
    phi::DenseTensor* out,
104 105
    const std::vector<int>& axis) {
  const int rank = axis.size();
106 107
  auto in_stride = phi::stride(in.dims());
  auto out_stride = phi::stride(out->dims());
108 109 110 111 112 113 114 115 116 117 118 119
  const T* in_ptr = in.data<T>();
  T* out_ptr = out->data<T>();

  auto transpose_helper = [&](int64_t beg, int64_t end) {
    for (int64_t out_idx = beg; out_idx < end; ++out_idx) {
      int64_t in_idx = 0;
      int64_t tmp_idx = out_idx;
      // calculate the input index
      for (int i = 0; i < rank; ++i) {
        const int64_t coordinate = tmp_idx / out_stride[i];
        tmp_idx -= coordinate * out_stride[i];
        in_idx += coordinate * in_stride[axis[i]];
120
      }
121 122 123 124 125
      out_ptr[out_idx] = in_ptr[in_idx];
    }
  };
  transpose_helper(0, out->numel());
}
126 127

// define transpose normal
L
Leo Chen 已提交
128
#define DEFINE_CPU_TRANS_NORMAL(TYPE) \
129
  template struct TransposeNormal<phi::CPUContext, TYPE>
130

131 132
DEFINE_CPU_TRANS_NORMAL(phi::dtype::float16);
DEFINE_CPU_TRANS_NORMAL(phi::dtype::bfloat16);
133 134 135 136 137 138 139 140
DEFINE_CPU_TRANS_NORMAL(float);
DEFINE_CPU_TRANS_NORMAL(double);
DEFINE_CPU_TRANS_NORMAL(int);
DEFINE_CPU_TRANS_NORMAL(int64_t);
DEFINE_CPU_TRANS_NORMAL(bool);
DEFINE_CPU_TRANS_NORMAL(int16_t);
DEFINE_CPU_TRANS_NORMAL(uint8_t);
DEFINE_CPU_TRANS_NORMAL(int8_t);
141 142
DEFINE_CPU_TRANS_NORMAL(phi::dtype::complex<float>);
DEFINE_CPU_TRANS_NORMAL(phi::dtype::complex<double>);
143 144

struct TensorSetConstantCPU {
145
  TensorSetConstantCPU(phi::DenseTensor* tensor, float value)
146 147 148
      : tensor_(tensor), value_(value) {}
  template <typename T>
  void apply() const {
149
    auto cpu = phi::CPUPlace();
150 151 152
    auto* begin = tensor_->mutable_data<T>(cpu);
    std::fill(begin, begin + tensor_->numel(), static_cast<T>(value_));
  }
153
  phi::DenseTensor* tensor_;
154 155 156 157
  float value_;
};

template <>
158 159 160
void set_constant_with_place<phi::XPUPlace>(const phi::DeviceContext& context,
                                            phi::DenseTensor* tensor,
                                            float value) {
J
james 已提交
161 162 163 164 165 166 167
#ifdef PADDLE_WITH_XPU
  phi::VisitDataType(
      tensor->dtype(),
      TensorSetConstantXPU<float>(tensor, value, tensor->place()));
#else
  PADDLE_THROW(phi::errors::PreconditionNotMet("Not compiled with XPU!"));
#endif
168 169 170
}

template <>
171 172 173
void set_constant_with_place<phi::IPUPlace>(const phi::DeviceContext& context,
                                            phi::DenseTensor* tensor,
                                            float value) {
174
  PADDLE_THROW(phi::errors::Unimplemented("IPUPlace is not supported"));
175 176
}

177
template <>
178 179
void set_constant_with_place<phi::CustomPlace>(
    const phi::DeviceContext& context, phi::DenseTensor* tensor, float value) {
180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200
#ifdef PADDLE_WITH_CUSTOM_DEVICE
  auto kernel_result = phi::KernelFactory::Instance().SelectKernelOrThrowError(
      "full",
      {paddle::experimental::ParseBackend(tensor->place()),
       phi::DataLayout::ALL_LAYOUT,
       paddle::experimental::ParseDataType(tensor->dtype())});
  const auto& kernel = kernel_result.kernel;
  using kernel_signature = void (*)(const phi::DeviceContext&,
                                    const phi::IntArray&,
                                    const phi::Scalar&,
                                    DataType,
                                    phi::DenseTensor*);
  auto* kernel_fn = kernel.GetVariadicKernelFn<kernel_signature>();
  (*kernel_fn)(context,
               phi::IntArray(phi::vectorize(tensor->dims())),
               phi::Scalar(value),
               tensor->dtype(),
               tensor);
#else
  PADDLE_THROW(phi::errors::Unimplemented("CustomPlace is not supported"));
#endif
201 202
}

203
template <>
204 205 206
void set_constant_with_place<phi::CPUPlace>(const phi::DeviceContext& context,
                                            phi::DenseTensor* tensor,
                                            float value) {
207
  phi::VisitDataType(tensor->dtype(), TensorSetConstantCPU(tensor, value));
208 209 210
}

template <>
211 212
void set_constant_with_place<phi::GPUPinnedPlace>(
    const phi::DeviceContext& context, phi::DenseTensor* tensor, float value) {
213
  phi::VisitDataType(tensor->dtype(), TensorSetConstantCPU(tensor, value));
214 215
}

216 217 218
struct TensorSetConstantWithPlace {
  using argument_type = phi::Place;
  using result_type = void;
219
  TensorSetConstantWithPlace(const phi::DeviceContext& context,
220
                             phi::DenseTensor* tensor,
221 222 223 224
                             float value)
      : context_(context), tensor_(tensor), value_(value) {}

  template <typename Place>
G
Galaxy1458 已提交
225
  void operator()(Place place UNUSED) const {
226 227 228
    set_constant_with_place<Place>(context_, tensor_, value_);
  }

229
  const phi::DeviceContext& context_;
230
  phi::DenseTensor* tensor_;
231 232 233
  float value_;
};

234
void set_constant(const phi::DeviceContext& context,
235
                  phi::DenseTensor* tensor,
236 237
                  float value) {
  TensorSetConstantWithPlace func(context, tensor, value);
238
#ifdef PADDLE_WITH_CUSTOM_DEVICE
239
  if (context.GetPlace().GetType() == phi::AllocationType::CUSTOM) {
240
    func(phi::CustomPlace());
241 242 243
    return;
  }
#endif
244 245
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
  // tensor->place().apply_visitor(func);
246
  phi::VisitPlace(tensor->place(), func);
J
james 已提交
247 248
#elif defined(PADDLE_WITH_XPU)
  func(phi::XPUPlace());
249
#else
250
  func(phi::CPUPlace());
251 252 253
#endif
}

L
Leo Chen 已提交
254 255 256 257 258 259 260 261
template struct ColwiseSum<phi::CPUContext, float>;
template struct ColwiseSum<phi::CPUContext, double>;
template struct ColwiseSum<phi::CPUContext, int>;
template struct ColwiseSum<phi::CPUContext, int64_t>;

template struct RowwiseMean<phi::CPUContext, float>;
template struct RowwiseMean<phi::CPUContext, double>;

262
template <typename T>
L
Leo Chen 已提交
263
struct RowwiseAdd<phi::CPUContext, T> {
G
Galaxy1458 已提交
264
  void operator()(const phi::CPUContext& context UNUSED,
265 266 267
                  const phi::DenseTensor& input,
                  const phi::DenseTensor& vector,
                  phi::DenseTensor* output) {
268 269 270 271 272 273
    auto in_dims = input.dims();
    auto out_dims = output->dims();
    auto size = input.numel() / in_dims[0];
    PADDLE_ENFORCE_EQ(
        vector.numel(),
        size,
274
        phi::errors::InvalidArgument(
275 276 277 278 279 280 281
            "The input vector size"
            " should be equal to the size of each row of input tensor."
            " Expected vector size=%d, but received %d",
            size,
            vector.numel()));
    PADDLE_ENFORCE_EQ(out_dims,
                      in_dims,
282
                      phi::errors::InvalidArgument(
283 284 285
                          "The output tensor shape should be same as the input"
                          " tensor shape. Expected output tensor shape: %s,"
                          " but received %s",
286 287
                          in_dims.to_str().c_str(),
                          out_dims.to_str().c_str()));
288

289 290 291
    auto in = phi::EigenMatrix<T>::From(input);
    auto vec = phi::EigenVector<T>::Flatten(vector);
    auto out = phi::EigenMatrix<T>::From(*output);
292 293 294 295 296 297 298

    for (int64_t i = 0; i < in_dims[0]; ++i) {
      out.chip(i, 0) = in.chip(i, 0) + vec;
    }
  }
};

L
Leo Chen 已提交
299 300
template struct RowwiseAdd<phi::CPUContext, float>;
template struct RowwiseAdd<phi::CPUContext, double>;
301 302

}  // namespace funcs
303
}  // namespace phi