diff --git a/paddle/operators/math/.clang-format b/paddle/operators/math/.clang-format new file mode 100644 index 0000000000000000000000000000000000000000..47b8a85206ab457e2b3cb90a68b7a82a0753d327 --- /dev/null +++ b/paddle/operators/math/.clang-format @@ -0,0 +1,5 @@ +--- +Language: Cpp +BasedOnStyle: Google +Standard: Cpp11 +... diff --git a/paddle/operators/math/math_function.cu b/paddle/operators/math/math_function.cu index e1ac8560825ef68322f84a7c6627327e18725b59..3e2aeea1da490d524db7b04cce947c57af97d8ef 100644 --- a/paddle/operators/math/math_function.cu +++ b/paddle/operators/math/math_function.cu @@ -14,66 +14,34 @@ limitations under the License. */ #include "paddle/operators/math/math_function.h" - namespace paddle { namespace operators { namespace math { template <> -void gemm(const CBLAS_TRANSPOSE transA, - const CBLAS_TRANSPOSE transB, - const int M, - const int N, - const int K, - const float alpha, - const float* A, - const int lda, - const float* B, - const int ldb, - const float beta, - float* C, - const int ldc, - platform::DeviceContext* context) { +void gemm( + const CBLAS_TRANSPOSE transA, const CBLAS_TRANSPOSE transB, const int M, + const int N, const int K, const float alpha, const float* A, const int lda, + const float* B, const int ldb, const float beta, float* C, const int ldc, + platform::DeviceContext* context) { // Note that cublas follows fortran order, so the order is different from // the cblas convention. cublasOperation_t cuTransA = (transA == CblasNoTrans) ? CUBLAS_OP_N : CUBLAS_OP_T; cublasOperation_t cuTransB = (transB == CblasNoTrans) ? CUBLAS_OP_N : CUBLAS_OP_T; - + PADDLE_ENFORCE(platform::dynload::cublasSgemm( - reinterpret_cast(context)-> - cublas_handle(), - cuTransB, - cuTransA, - N, - M, - K, - &alpha, - B, - ldb, - A, - lda, - &beta, - C, - ldc)); + reinterpret_cast(context)->cublas_handle(), + cuTransB, cuTransA, N, M, K, &alpha, B, ldb, A, lda, &beta, C, ldc)); } template <> -void gemm(const CBLAS_TRANSPOSE transA, - const CBLAS_TRANSPOSE transB, - const int M, - const int N, - const int K, - const double alpha, - const double* A, - const int lda, - const double* B, - const int ldb, - const double beta, - double* C, - const int ldc, - platform::DeviceContext* context) { +void gemm( + const CBLAS_TRANSPOSE transA, const CBLAS_TRANSPOSE transB, const int M, + const int N, const int K, const double alpha, const double* A, + const int lda, const double* B, const int ldb, const double beta, double* C, + const int ldc, platform::DeviceContext* context) { // Note that cublas follows fortran order, so the order is different from // the cblas convention. cublasOperation_t cuTransA = @@ -81,97 +49,76 @@ void gemm(const CBLAS_TRANSPOSE transA, cublasOperation_t cuTransB = (transB == CblasNoTrans) ? CUBLAS_OP_N : CUBLAS_OP_T; PADDLE_ENFORCE(platform::dynload::cublasDgemm( - reinterpret_cast(context)-> - cublas_handle(), - cuTransB, - cuTransA, - N, - M, - K, - &alpha, - B, - ldb, - A, - lda, - &beta, - C, - ldc)); + reinterpret_cast(context)->cublas_handle(), + cuTransB, cuTransA, N, M, K, &alpha, B, ldb, A, lda, &beta, C, ldc)); } template <> -void matmul(const framework::Tensor& in1, bool in1_T, const framework::Tensor& in2, bool in2_T, float alpha, -framework::Tensor* out, float beta, platform::DeviceContext* context) { +void matmul(const framework::Tensor& in1, bool in1_T, + const framework::Tensor& in2, bool in2_T, + float alpha, framework::Tensor* out, + float beta, + platform::DeviceContext* context) { auto in1_dim = in1.dims(); auto in2_dim = in2.dims(); auto out_dim = out->dims(); - PADDLE_ENFORCE(in1_dim.size() == 2 && in2_dim.size() == 2 && out_dim.size() == 2, - "The input and output of matmul be matrix"); PADDLE_ENFORCE( - in1_dim[1] == in2_dim[0], - "First matrix's width must be equal with second matrix's height."); + in1_dim.size() == 2 && in2_dim.size() == 2 && out_dim.size() == 2, + "The input and output of matmul be matrix"); + PADDLE_ENFORCE( + in1_dim[1] == in2_dim[0], + "First matrix's width must be equal with second matrix's height."); - PADDLE_ENFORCE(platform::is_gpu_place(in1.place()) && platform::is_gpu_place(in2.place())&& platform::is_gpu_place(out->place()), "Matrix must all be in GPUPlace"); + PADDLE_ENFORCE(platform::is_gpu_place(in1.place()) && + platform::is_gpu_place(in2.place()) && + platform::is_gpu_place(out->place()), + "Matrix must all be in GPUPlace"); - int M = out_dim[0]; + int M = out_dim[0]; int N = out_dim[1]; int K = in1_dim[1]; - CBLAS_TRANSPOSE in1_Trans = (in1_T == false) ? CblasNoTrans : CblasTrans; + CBLAS_TRANSPOSE in1_Trans = (in1_T == false) ? CblasNoTrans : CblasTrans; CBLAS_TRANSPOSE in2_Trans = (in1_T == false) ? CblasNoTrans : CblasTrans; - gemm(in1_Trans, - in2_Trans, - M, - N, - K, - alpha, - in1.data(), - K, - in2.data(), - N, - beta, - out->data(), - N, - context); - + gemm(in1_Trans, in2_Trans, M, N, K, alpha, + in1.data(), K, in2.data(), N, + beta, out->data(), N, context); } - template <> -void matmul(const framework::Tensor& in1, bool in1_T, const framework::Tensor& in2, bool in2_T, float alpha, -framework::Tensor* out, float beta, platform::DeviceContext* context) { +void matmul(const framework::Tensor& in1, + bool in1_T, + const framework::Tensor& in2, + bool in2_T, float alpha, + framework::Tensor* out, float beta, + platform::DeviceContext* context) { auto in1_dim = in1.dims(); auto in2_dim = in2.dims(); auto out_dim = out->dims(); - PADDLE_ENFORCE(in1_dim.size() == 2 && in2_dim.size() == 2 && out_dim.size() == 2, - "The input and output of matmul be matrix"); PADDLE_ENFORCE( - in1_dim[1] == in2_dim[0], - "First matrix's width must be equal with second matrix's height."); + in1_dim.size() == 2 && in2_dim.size() == 2 && out_dim.size() == 2, + "The input and output of matmul be matrix"); + PADDLE_ENFORCE( + in1_dim[1] == in2_dim[0], + "First matrix's width must be equal with second matrix's height."); - PADDLE_ENFORCE(platform::is_gpu_place(in1.place()) && platform::is_gpu_place(in2.place())&& platform::is_gpu_place(out->place()), "Matrix must all be in GPUPlace"); + PADDLE_ENFORCE(platform::is_gpu_place(in1.place()) && + platform::is_gpu_place(in2.place()) && + platform::is_gpu_place(out->place()), + "Matrix must all be in GPUPlace"); - CBLAS_TRANSPOSE in1_Trans = (in1_T == false) ? CblasNoTrans : CblasTrans; + int M = out_dim[0]; + int N = out_dim[1]; + int K = in1_dim[1]; + CBLAS_TRANSPOSE in1_Trans = (in1_T == false) ? CblasNoTrans : CblasTrans; CBLAS_TRANSPOSE in2_Trans = (in1_T == false) ? CblasNoTrans : CblasTrans; - gemm(in1_Trans, - in2_Trans, - M, - N, - K, - alpha, - in1.data(), - K, - in2.data(), - N, - beta, - out->data(), - N, - context); - + gemm(in1_Trans, in2_Trans, M, N, K, alpha, + in1.data(), K, in2.data(), N, + beta, out->data(), N, context); } - } // namespace math } // namespace operators } // namespace paddle