// Copyright (c) 2021 CINN Authors. All Rights Reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. #pragma once //! \file This file defines some C APIs to trigger CBLAS methods. #include "paddle/cinn/runtime/cinn_runtime.h" #ifdef CINN_WITH_MKL_CBLAS #include #include #else #include #endif // define some C APIs extern "C" { /** * \brief Do GEMM on buffer A and B and write result to buffer C. * We pass the \param M, \param N, \param K although the shape can retrieve from cinn_buffer_t because the size of a * matrix not equals the shape of a buffer it is stored. * @param alpha The scaling factor of the product of A and B * @param M Number of the rows of A * @param N the number of the columns in both B and C * @param K the number of columns of A * @param ta whether to transpose A * @param tb whether to transpose B * @param lda The size of the first dimension of A * @param ldb The size of the first dimension of B * @param ldc The size of the first dimension of C * @param beta The scaling factor of C * @param A The matrix A * @param B The matrix B * @param C The output matrix */ void cinn_cpu_mkl_gemm_fp32(float alpha, int M, int N, int K, bool ta, bool tb, int lda, int ldb, int ldc, float beta, cinn_buffer_t* A, cinn_buffer_t* B, cinn_buffer_t* C); /** * \brief Do GEMM on buffer A and B and write result to buffer C. * We pass the \param M, \param N, \param K although the shape can retrieve from cinn_buffer_t because the size of a * matrix not equals the shape of a buffer it is stored. * @param alpha The scaling factor of the product of A and B * @param batch_size the batch size of A and B * @param M Number of the rows of A * @param N the number of the columns in both B and C * @param K the number of columns of A * @param ta whether to transpose A * @param tb whether to transpose B * @param lda The size of the first dimension of A * @param ldb The size of the first dimension of B * @param ldc The size of the first dimension of C * @param a_stride The stride of A(number of elements, not bytes) between batches * @param b_stride The stride of B(number of elements, not bytes) between batches * @param c_stride The stride of C(number of elements, not bytes) between batches * @param beta The scaling factor of C * @param A The matrix A * @param B The matrix B * @param C The output matrix */ void cinn_cpu_mkl_gemm_batch_fp32(float alpha, int batch_size, int M, int N, int K, bool ta, bool tb, int lda, int ldb, int ldc, int a_stride, int b_stride, int c_stride, float beta, cinn_buffer_t* A, cinn_buffer_t* B, cinn_buffer_t* C); void cinn_call_cholesky_host(void* v_args, int num_args, int batch_size, int m, bool upper); } // extern "C"