cblas.cc 1.7 KB
Newer Older
H
backup  
hjchen2 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

    http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */

H
update  
hjchen2 已提交
15 16
#if defined(__ARM_NEON__) || defined(__ARM_NEON)

H
backup  
hjchen2 已提交
17 18 19 20 21 22 23 24 25 26 27 28
#include "operators/math/gemm/cblas.h"
#include "operators/math/gemm/executor.h"
#include "operators/math/gemm/strategy.h"

namespace paddle_mobile {
namespace operators {
namespace math {

void cblas_sgemm(const bool transA, const bool transB, const int M, const int N,
                 const int K, const float alpha, const float *A, const int lda,
                 const float *B, const int ldb, const float beta, float *C,
                 const int ldc) {
29 30 31 32 33 34 35 36
  if (N == 1) {
    return cblas_sgemv(transA, M, K, alpha, A, lda, B, beta, C);
  } else if (M == 1) {
    return cblas_sgemv(!transB, N, K, alpha, B, ldb, A, beta, C);
  } else {
    GemmExecutor<SgemmStrategy> exec(transA, transB, M, N, K);
    exec(alpha, A, lda, B, ldb, beta, C, ldc);
  }
H
backup  
hjchen2 已提交
37 38 39 40 41
}

void cblas_sgemv(const bool trans, const int M, const int N, const float alpha,
                 const float *A, const int lda, const float *B,
                 const float beta, float *C) {
42
  GemvExecutor<SgemvStrategy> exec(trans, M, N);
H
backup  
hjchen2 已提交
43 44 45 46 47 48
  exec(alpha, A, lda, B, beta, C);
}

}  // namespace math
}  // namespace operators
}  // namespace paddle_mobile
H
update  
hjchen2 已提交
49 50

#endif