math_function.cpp 6.4 KB
Newer Older
Z
zhaojiaying01 已提交
1
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
朔-望's avatar
朔-望 已提交
2 3 4 5 6 7 8 9 10 11 12 13 14

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

    http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */

Z
zhaojiaying01 已提交
15
#include "operators/math/math_function.h"
16
#include <string>
H
hjchen2 已提交
17
#include "common/enforce.h"
L
lijiancheng0614 已提交
18 19
#include "framework/data_type.h"
#include "framework/tensor.h"
Z
zhaojiaying01 已提交
20
#include "operators/math/gemm.h"
朔-望's avatar
朔-望 已提交
21 22

namespace paddle_mobile {
朔-望's avatar
朔-望 已提交
23 24 25
namespace operators {
namespace math {

L
lijiancheng0614 已提交
26 27 28 29 30 31 32 33 34 35 36 37
struct TensorSetConstant {
  TensorSetConstant(framework::Tensor *tensor, float value)
      : tensor_(tensor), value_(value) {}
  template <typename T>
  void apply() const {
    auto *begin = tensor_->mutable_data<T>();
    std::fill(begin, begin + tensor_->numel(), static_cast<T>(value_));
  }
  framework::Tensor *tensor_;
  float value_;
};

H
hjchen2 已提交
38
void SetConstant(framework::Tensor *tensor, float value) {
L
lijiancheng0614 已提交
39 40 41 42
  framework::VisitDataType(framework::ToDataType(tensor->type()),
                           TensorSetConstant(tensor, value));
}

朔-望's avatar
朔-望 已提交
43
template <>
H
hjchen2 已提交
44
void MatMul<float, float>(const framework::Tensor &matrix_a, bool trans_a,
45 46 47
                          const framework::Tensor &matrix_b, bool trans_b,
                          float alpha, framework::Tensor *matrix_out,
                          float beta, bool relu, float *bias) {
48 49 50
  auto dim_a = matrix_a.dims();
  auto dim_b = matrix_b.dims();
  auto dim_out = matrix_out->dims();
Z
zhaojiaying01 已提交
51 52
  PADDLE_MOBILE_ENFORCE(
      dim_a.size() == 2 && dim_b.size() == 2 && dim_out.size() == 2,
H
hjchen2 已提交
53
      "The input and output of MatMul be matrix");
54 55 56

  int M = dim_out[0];
  int N = dim_out[1];
57
  int K = (!trans_a) ? dim_a[1] : dim_a[0];
58
  Gemm gemm;
59

60 61 62 63 64 65 66 67 68 69 70 71 72
  if (trans_a) {
    int numel = matrix_a.numel();
    int m = matrix_a.dims()[0];
    int n = matrix_a.dims()[1];
    float *tmp = (float *)(matrix_a.data<float>());  // NOLINT
    float *a = static_cast<float *>(
        paddle_mobile::memory::Alloc(sizeof(float) * numel));
    int index = 0;
    for (int j = 0; j < n; j++) {
      for (int i = 0; i < m; i++) {
        a[index++] = tmp[i * n + j];
      }
    }
73

74
#ifdef _OPENMP
75 76
    gemm.Sgemm_omp(M, N, K, alpha, a, K, matrix_b.data<float>(), N, beta,
                   matrix_out->data<float>(), N, relu, bias);
77
#else
78 79
    gemm.Sgemm(M, N, K, alpha, a, K, matrix_b.data<float>(), N, beta,
               matrix_out->data<float>(), N, relu, bias);
80 81
#endif
  } else {
82
#ifdef _OPENMP
83 84 85
    gemm.Sgemm_omp(M, N, K, alpha, matrix_a.data<float>(), K,
                   matrix_b.data<float>(), N, beta, matrix_out->data<float>(),
                   N, relu, bias);
86
#else
87 88 89
    gemm.Sgemm(M, N, K, alpha, matrix_a.data<float>(), K,
               matrix_b.data<float>(), N, beta, matrix_out->data<float>(), N,
               relu, bias);
90
#endif
91
  }
92
}
朔-望's avatar
朔-望 已提交
93

H
hjchen2 已提交
94 95 96 97 98
void MatMulWithBn(const framework::Tensor &matrix_a, bool trans_a,
                  const framework::Tensor &matrix_b, bool trans_b, float alpha,
                  framework::Tensor *matrix_out, float beta, bool relu,
                  framework::Tensor *new_scale, framework::Tensor *new_bias,
                  int group, float *bias) {
99
  Gemm gemm;
100 101 102
  auto dim_a = matrix_a.dims();
  auto dim_b = matrix_b.dims();
  auto dim_out = matrix_out->dims();
Z
zhaojiaying01 已提交
103 104
  PADDLE_MOBILE_ENFORCE(
      dim_a.size() == 2 && dim_b.size() == 2 && dim_out.size() == 2,
H
hjchen2 已提交
105
      "The input and output of MatMul be matrix");
106 107 108

  int M = dim_out[0];
  int N = dim_out[1];
109 110
  int K = (!trans_a) ? dim_a[1] : dim_a[0];

111
#ifdef _OPENMP
112 113 114 115
  gemm.SgemmWithBn_omp(
      M, N, K, alpha, matrix_a.data<float>(), K, matrix_b.data<float>(), N,
      beta, matrix_out->data<float>(), N, relu,
      new_scale->data<float>() + group, new_bias->data<float>() + group, bias);
116
#else
117 118 119 120
  gemm.SgemmWithBn(M, N, K, alpha, matrix_a.data<float>(), K,
                   matrix_b.data<float>(), N, beta, matrix_out->data<float>(),
                   N, relu, new_scale->data<float>() + group,
                   new_bias->data<float>() + group, bias);
121 122
#endif
}
H
hjchen2 已提交
123
void MatMulWithPRelu(const framework::Tensor &matrix_a, bool trans_a,
124 125 126
                     const framework::Tensor &matrix_b, bool trans_b,
                     framework::Tensor *matrix_out, float *p, std::string mode,
                     float *bias, float *bias1) {
127
  Gemm gemm;
128 129 130
  auto dim_a = matrix_a.dims();
  auto dim_b = matrix_b.dims();
  auto dim_out = matrix_out->dims();
Z
zhaojiaying01 已提交
131 132
  PADDLE_MOBILE_ENFORCE(
      dim_a.size() == 2 && dim_b.size() == 2 && dim_out.size() == 2,
H
hjchen2 已提交
133
      "The input and output of MatMul be matrix");
134 135 136 137 138 139

  int M = dim_out[0];
  int N = dim_out[1];
  int K = (!trans_a) ? dim_a[1] : dim_a[0];

#ifdef _OPENMP
140 141 142
  gemm.SgemmWithPRelu_omp(M, N, K, matrix_a.data<float>(), K,
                          matrix_b.data<float>(), N, matrix_out->data<float>(),
                          N, p, mode, bias, bias1);
143
#else
144 145 146
  gemm.SgemmWithPRelu(M, N, K, matrix_a.data<float>(), K,
                      matrix_b.data<float>(), N, matrix_out->data<float>(), N,
                      p, mode, bias, bias1);
147 148
#endif
}
朔-望's avatar
朔-望 已提交
149

xiebaiyuan's avatar
xiebaiyuan 已提交
150 151 152 153
template <typename T>
struct ClearTensor<CPU, T> {
  void operator()(framework::Tensor *tensor) {
    auto size = tensor->numel();
Z
Zhen Wang 已提交
154
    auto *tensor_data = tensor->data<T>();
155
    memset((void *)tensor_data, 0, sizeof(T) * size);  // NOLINT
xiebaiyuan's avatar
xiebaiyuan 已提交
156 157 158 159 160 161 162 163 164 165 166 167 168 169
  }
};

template <typename T>
struct RowwiseAdd<CPU, T> {
  void operator()(const framework::Tensor &input,
                  const framework::Tensor &vector, framework::Tensor *output) {
    auto in_dims = input.dims();
    auto size = input.numel() / in_dims[0];
    PADDLE_MOBILE_ENFORCE((vector.numel() == size),
                          "vector.numel() must be equal to size.");
    PADDLE_MOBILE_ENFORCE((output->dims() == in_dims),
                          "output->dims() must be equal to in_dims.");

Z
Zhen Wang 已提交
170 171 172
    auto *input_data = input.data<T>();
    auto *out_data = output->data<T>();
    auto *vec_data = vector.data<T>();
xiebaiyuan's avatar
xiebaiyuan 已提交
173 174 175 176 177 178 179 180 181 182 183
    for (int64_t i = 0; i < in_dims[0]; ++i) {
      for (int64_t j = 0; j < size; ++j) {
        out_data[i * size + j] = input_data[i * size + j] + vec_data[j];
      }
    }
  }
};

template struct RowwiseAdd<CPU, float>;
template struct ClearTensor<CPU, float>;

朔-望's avatar
朔-望 已提交
184 185 186
}  // namespace math
}  // namespace operators
}  // namespace paddle_mobile