math_function.cpp 5.9 KB
Newer Older
Z
zhaojiaying01 已提交
1
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
朔-望's avatar
朔-望 已提交
2 3 4 5 6 7 8 9 10 11 12 13 14

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

    http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */

Z
zhaojiaying01 已提交
15
#include "operators/math/math_function.h"
xiebaiyuan's avatar
xiebaiyuan 已提交
16
#include <cstring>
17
#include <string>
Z
zhaojiaying01 已提交
18
#include "operators/math/gemm.h"
朔-望's avatar
朔-望 已提交
19 20

namespace paddle_mobile {
朔-望's avatar
朔-望 已提交
21 22 23
namespace operators {
namespace math {

朔-望's avatar
朔-望 已提交
24
template <>
朔-望's avatar
朔-望 已提交
25
void matmul<float>(const framework::Tensor &matrix_a, bool trans_a,
朔-望's avatar
朔-望 已提交
26
                   const framework::Tensor &matrix_b, bool trans_b, float alpha,
27 28
                   framework::Tensor *matrix_out, float beta, bool relu,
                   float *bias) {
29 30 31
  auto dim_a = matrix_a.dims();
  auto dim_b = matrix_b.dims();
  auto dim_out = matrix_out->dims();
Z
zhaojiaying01 已提交
32 33 34
  PADDLE_MOBILE_ENFORCE(
      dim_a.size() == 2 && dim_b.size() == 2 && dim_out.size() == 2,
      "The input and output of matmul be matrix");
35 36 37

  int M = dim_out[0];
  int N = dim_out[1];
38
  int K = (!trans_a) ? dim_a[1] : dim_a[0];
39
  Gemm gemm;
40

41 42 43 44 45 46 47 48 49 50 51 52 53
  if (trans_a) {
    int numel = matrix_a.numel();
    int m = matrix_a.dims()[0];
    int n = matrix_a.dims()[1];
    float *tmp = (float *)(matrix_a.data<float>());  // NOLINT
    float *a = static_cast<float *>(
        paddle_mobile::memory::Alloc(sizeof(float) * numel));
    int index = 0;
    for (int j = 0; j < n; j++) {
      for (int i = 0; i < m; i++) {
        a[index++] = tmp[i * n + j];
      }
    }
54

55
#ifdef _OPENMP
56 57 58

    gemm.Sgemm_omp(M, N, K, alpha, a, K, matrix_b.data<float>(), N, beta,
                   matrix_out->data<float>(), N, relu, bias);
59
#else
60 61
    gemm.Sgemm(M, N, K, alpha, a, K, matrix_b.data<float>(), N, beta,
               matrix_out->data<float>(), N, relu, bias);
62 63
#endif
  } else {
64
#ifdef _OPENMP
65 66 67
    gemm.Sgemm_omp(M, N, K, alpha, matrix_a.data<float>(), K,
                   matrix_b.data<float>(), N, beta, matrix_out->data<float>(),
                   N, relu, bias);
68
#else
69 70 71
    gemm.Sgemm(M, N, K, alpha, matrix_a.data<float>(), K,
               matrix_b.data<float>(), N, beta, matrix_out->data<float>(), N,
               relu, bias);
72
#endif
73
  }
74
}
朔-望's avatar
朔-望 已提交
75

朔-望's avatar
朔-望 已提交
76
template <>
77 78 79 80
void matmulWithBn<float>(const framework::Tensor &matrix_a, bool trans_a,
                         const framework::Tensor &matrix_b, bool trans_b,
                         float alpha, framework::Tensor *matrix_out, float beta,
                         bool relu, framework::Tensor *new_scale,
Y
yangfei 已提交
81
                         framework::Tensor *new_bias, int group, float *bias) {
82
  Gemm gemm;
83 84 85
  auto dim_a = matrix_a.dims();
  auto dim_b = matrix_b.dims();
  auto dim_out = matrix_out->dims();
Z
zhaojiaying01 已提交
86 87 88
  PADDLE_MOBILE_ENFORCE(
      dim_a.size() == 2 && dim_b.size() == 2 && dim_out.size() == 2,
      "The input and output of matmul be matrix");
89 90 91

  int M = dim_out[0];
  int N = dim_out[1];
92 93
  int K = (!trans_a) ? dim_a[1] : dim_a[0];

94
#ifdef _OPENMP
95 96 97 98
  gemm.SgemmWithBn_omp(
      M, N, K, alpha, matrix_a.data<float>(), K, matrix_b.data<float>(), N,
      beta, matrix_out->data<float>(), N, relu,
      new_scale->data<float>() + group, new_bias->data<float>() + group, bias);
99
#else
100 101 102 103
  gemm.SgemmWithBn(M, N, K, alpha, matrix_a.data<float>(), K,
                   matrix_b.data<float>(), N, beta, matrix_out->data<float>(),
                   N, relu, new_scale->data<float>() + group,
                   new_bias->data<float>() + group, bias);
104 105
#endif
}
106 107 108 109
void matmulWithPRelu(const framework::Tensor &matrix_a, bool trans_a,
                     const framework::Tensor &matrix_b, bool trans_b,
                     framework::Tensor *matrix_out, float *p, std::string mode,
                     float *bias, float *bias1) {
110
  Gemm gemm;
111 112 113
  auto dim_a = matrix_a.dims();
  auto dim_b = matrix_b.dims();
  auto dim_out = matrix_out->dims();
Z
zhaojiaying01 已提交
114 115 116
  PADDLE_MOBILE_ENFORCE(
      dim_a.size() == 2 && dim_b.size() == 2 && dim_out.size() == 2,
      "The input and output of matmul be matrix");
117 118 119 120 121 122

  int M = dim_out[0];
  int N = dim_out[1];
  int K = (!trans_a) ? dim_a[1] : dim_a[0];

#ifdef _OPENMP
123 124 125
  gemm.SgemmWithPRelu_omp(M, N, K, matrix_a.data<float>(), K,
                          matrix_b.data<float>(), N, matrix_out->data<float>(),
                          N, p, mode, bias, bias1);
126
#else
127 128 129
  gemm.SgemmWithPRelu(M, N, K, matrix_a.data<float>(), K,
                      matrix_b.data<float>(), N, matrix_out->data<float>(), N,
                      p, mode, bias, bias1);
130 131 132

#endif
}
朔-望's avatar
朔-望 已提交
133

xiebaiyuan's avatar
xiebaiyuan 已提交
134 135 136 137 138
template <typename T>
struct ClearTensor<CPU, T> {
  void operator()(framework::Tensor *tensor) {
    auto size = tensor->numel();
    auto *tensor_data = tensor->data<float>();
139
    memset((void *)tensor_data, 0, sizeof(T) * size);  // NOLINT
xiebaiyuan's avatar
xiebaiyuan 已提交
140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167
  }
};

template <typename T>
struct RowwiseAdd<CPU, T> {
  void operator()(const framework::Tensor &input,
                  const framework::Tensor &vector, framework::Tensor *output) {
    auto in_dims = input.dims();
    auto size = input.numel() / in_dims[0];
    PADDLE_MOBILE_ENFORCE((vector.numel() == size),
                          "vector.numel() must be equal to size.");
    PADDLE_MOBILE_ENFORCE((output->dims() == in_dims),
                          "output->dims() must be equal to in_dims.");

    auto *input_data = input.data<float>();
    auto *out_data = output->data<float>();
    auto *vec_data = vector.data<float>();
    for (int64_t i = 0; i < in_dims[0]; ++i) {
      for (int64_t j = 0; j < size; ++j) {
        out_data[i * size + j] = input_data[i * size + j] + vec_data[j];
      }
    }
  }
};

template struct RowwiseAdd<CPU, float>;
template struct ClearTensor<CPU, float>;

朔-望's avatar
朔-望 已提交
168 169 170
}  // namespace math
}  // namespace operators
}  // namespace paddle_mobile