math_function.cpp 6.4 KB
Newer Older
Z
zhaojiaying01 已提交
1
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
朔-望's avatar
朔-望 已提交
2 3 4 5 6 7 8 9 10 11 12 13 14

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

    http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */

Z
zhaojiaying01 已提交
15
#include "operators/math/math_function.h"
xiebaiyuan's avatar
xiebaiyuan 已提交
16
#include <cstring>
17
#include <string>
L
lijiancheng0614 已提交
18 19
#include "framework/data_type.h"
#include "framework/tensor.h"
Z
zhaojiaying01 已提交
20
#include "operators/math/gemm.h"
朔-望's avatar
朔-望 已提交
21 22

namespace paddle_mobile {
朔-望's avatar
朔-望 已提交
23 24 25
namespace operators {
namespace math {

L
lijiancheng0614 已提交
26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42
struct TensorSetConstant {
  TensorSetConstant(framework::Tensor *tensor, float value)
      : tensor_(tensor), value_(value) {}
  template <typename T>
  void apply() const {
    auto *begin = tensor_->mutable_data<T>();
    std::fill(begin, begin + tensor_->numel(), static_cast<T>(value_));
  }
  framework::Tensor *tensor_;
  float value_;
};

void set_constant(framework::Tensor *tensor, float value) {
  framework::VisitDataType(framework::ToDataType(tensor->type()),
                           TensorSetConstant(tensor, value));
}

朔-望's avatar
朔-望 已提交
43
template <>
朔-望's avatar
朔-望 已提交
44
void matmul<float>(const framework::Tensor &matrix_a, bool trans_a,
朔-望's avatar
朔-望 已提交
45
                   const framework::Tensor &matrix_b, bool trans_b, float alpha,
46 47
                   framework::Tensor *matrix_out, float beta, bool relu,
                   float *bias) {
48 49 50
  auto dim_a = matrix_a.dims();
  auto dim_b = matrix_b.dims();
  auto dim_out = matrix_out->dims();
Z
zhaojiaying01 已提交
51 52 53
  PADDLE_MOBILE_ENFORCE(
      dim_a.size() == 2 && dim_b.size() == 2 && dim_out.size() == 2,
      "The input and output of matmul be matrix");
54 55 56

  int M = dim_out[0];
  int N = dim_out[1];
57
  int K = (!trans_a) ? dim_a[1] : dim_a[0];
58
  Gemm gemm;
59

60 61 62 63 64 65 66 67 68 69 70 71 72
  if (trans_a) {
    int numel = matrix_a.numel();
    int m = matrix_a.dims()[0];
    int n = matrix_a.dims()[1];
    float *tmp = (float *)(matrix_a.data<float>());  // NOLINT
    float *a = static_cast<float *>(
        paddle_mobile::memory::Alloc(sizeof(float) * numel));
    int index = 0;
    for (int j = 0; j < n; j++) {
      for (int i = 0; i < m; i++) {
        a[index++] = tmp[i * n + j];
      }
    }
73

74
#ifdef _OPENMP
75 76 77

    gemm.Sgemm_omp(M, N, K, alpha, a, K, matrix_b.data<float>(), N, beta,
                   matrix_out->data<float>(), N, relu, bias);
78
#else
79 80
    gemm.Sgemm(M, N, K, alpha, a, K, matrix_b.data<float>(), N, beta,
               matrix_out->data<float>(), N, relu, bias);
81 82
#endif
  } else {
83
#ifdef _OPENMP
84 85 86
    gemm.Sgemm_omp(M, N, K, alpha, matrix_a.data<float>(), K,
                   matrix_b.data<float>(), N, beta, matrix_out->data<float>(),
                   N, relu, bias);
87
#else
88 89 90
    gemm.Sgemm(M, N, K, alpha, matrix_a.data<float>(), K,
               matrix_b.data<float>(), N, beta, matrix_out->data<float>(), N,
               relu, bias);
91
#endif
92
  }
93
}
朔-望's avatar
朔-望 已提交
94

朔-望's avatar
朔-望 已提交
95
template <>
96 97 98 99
void matmulWithBn<float>(const framework::Tensor &matrix_a, bool trans_a,
                         const framework::Tensor &matrix_b, bool trans_b,
                         float alpha, framework::Tensor *matrix_out, float beta,
                         bool relu, framework::Tensor *new_scale,
Y
yangfei 已提交
100
                         framework::Tensor *new_bias, int group, float *bias) {
101
  Gemm gemm;
102 103 104
  auto dim_a = matrix_a.dims();
  auto dim_b = matrix_b.dims();
  auto dim_out = matrix_out->dims();
Z
zhaojiaying01 已提交
105 106 107
  PADDLE_MOBILE_ENFORCE(
      dim_a.size() == 2 && dim_b.size() == 2 && dim_out.size() == 2,
      "The input and output of matmul be matrix");
108 109 110

  int M = dim_out[0];
  int N = dim_out[1];
111 112
  int K = (!trans_a) ? dim_a[1] : dim_a[0];

113
#ifdef _OPENMP
114 115 116 117
  gemm.SgemmWithBn_omp(
      M, N, K, alpha, matrix_a.data<float>(), K, matrix_b.data<float>(), N,
      beta, matrix_out->data<float>(), N, relu,
      new_scale->data<float>() + group, new_bias->data<float>() + group, bias);
118
#else
119 120 121 122
  gemm.SgemmWithBn(M, N, K, alpha, matrix_a.data<float>(), K,
                   matrix_b.data<float>(), N, beta, matrix_out->data<float>(),
                   N, relu, new_scale->data<float>() + group,
                   new_bias->data<float>() + group, bias);
123 124
#endif
}
125 126 127 128
void matmulWithPRelu(const framework::Tensor &matrix_a, bool trans_a,
                     const framework::Tensor &matrix_b, bool trans_b,
                     framework::Tensor *matrix_out, float *p, std::string mode,
                     float *bias, float *bias1) {
129
  Gemm gemm;
130 131 132
  auto dim_a = matrix_a.dims();
  auto dim_b = matrix_b.dims();
  auto dim_out = matrix_out->dims();
Z
zhaojiaying01 已提交
133 134 135
  PADDLE_MOBILE_ENFORCE(
      dim_a.size() == 2 && dim_b.size() == 2 && dim_out.size() == 2,
      "The input and output of matmul be matrix");
136 137 138 139 140 141

  int M = dim_out[0];
  int N = dim_out[1];
  int K = (!trans_a) ? dim_a[1] : dim_a[0];

#ifdef _OPENMP
142 143 144
  gemm.SgemmWithPRelu_omp(M, N, K, matrix_a.data<float>(), K,
                          matrix_b.data<float>(), N, matrix_out->data<float>(),
                          N, p, mode, bias, bias1);
145
#else
146 147 148
  gemm.SgemmWithPRelu(M, N, K, matrix_a.data<float>(), K,
                      matrix_b.data<float>(), N, matrix_out->data<float>(), N,
                      p, mode, bias, bias1);
149 150 151

#endif
}
朔-望's avatar
朔-望 已提交
152

xiebaiyuan's avatar
xiebaiyuan 已提交
153 154 155 156
template <typename T>
struct ClearTensor<CPU, T> {
  void operator()(framework::Tensor *tensor) {
    auto size = tensor->numel();
Z
Zhen Wang 已提交
157
    auto *tensor_data = tensor->data<T>();
158
    memset((void *)tensor_data, 0, sizeof(T) * size);  // NOLINT
xiebaiyuan's avatar
xiebaiyuan 已提交
159 160 161 162 163 164 165 166 167 168 169 170 171 172
  }
};

template <typename T>
struct RowwiseAdd<CPU, T> {
  void operator()(const framework::Tensor &input,
                  const framework::Tensor &vector, framework::Tensor *output) {
    auto in_dims = input.dims();
    auto size = input.numel() / in_dims[0];
    PADDLE_MOBILE_ENFORCE((vector.numel() == size),
                          "vector.numel() must be equal to size.");
    PADDLE_MOBILE_ENFORCE((output->dims() == in_dims),
                          "output->dims() must be equal to in_dims.");

Z
Zhen Wang 已提交
173 174 175
    auto *input_data = input.data<T>();
    auto *out_data = output->data<T>();
    auto *vec_data = vector.data<T>();
xiebaiyuan's avatar
xiebaiyuan 已提交
176 177 178 179 180 181 182 183 184 185 186
    for (int64_t i = 0; i < in_dims[0]; ++i) {
      for (int64_t j = 0; j < size; ++j) {
        out_data[i * size + j] = input_data[i * size + j] + vec_data[j];
      }
    }
  }
};

template struct RowwiseAdd<CPU, float>;
template struct ClearTensor<CPU, float>;

朔-望's avatar
朔-望 已提交
187 188 189
}  // namespace math
}  // namespace operators
}  // namespace paddle_mobile