diff --git a/paddle/operators/math/.clang-format b/paddle/operators/math/.clang-format deleted file mode 100644 index 47b8a85206ab457e2b3cb90a68b7a82a0753d327..0000000000000000000000000000000000000000 --- a/paddle/operators/math/.clang-format +++ /dev/null @@ -1,5 +0,0 @@ ---- -Language: Cpp -BasedOnStyle: Google -Standard: Cpp11 -... diff --git a/paddle/operators/math/CMakeLists.txt b/paddle/operators/math/CMakeLists.txt index b1d0bc8f8753957a089509ca9176b2791217db0e..84fffe684379a2e4a29006fbe0373292ce9f1362 100644 --- a/paddle/operators/math/CMakeLists.txt +++ b/paddle/operators/math/CMakeLists.txt @@ -1,16 +1,13 @@ -if (WITH_GPU) - if (WITH_MKLML) - nv_library(math_function SRCS math_function.cc math_function.cu DEPS mklml device_context) - else() - nv_library(math_function SRCS math_function.cc math_function.cu DEPS cblas device_context) - endif() +if(WITH_MKLML) + set(BLAS_LIB mklml) else() - if (WITH_MKLML) - cc_library(math_function SRCS math_function.cc DEPS mklml device_context) - else() - cc_library(math_function SRCS math_function.cc DEPS cblas device_context) - endif() -endif() + set(BLAS_LIB cblas) +endif() +if(WITH_GPU) + nv_library(math_function SRCS math_function.cc math_function.cu DEPS ${BLAS_LIB} device_context) +else() + cc_library(math_function SRCS math_function.cc math_function.cu DEPS ${BLAS_LIB} device_context) +endif() nv_test(math_function_test SRCS math_function_test.cc DEPS math_function tensor) diff --git a/paddle/operators/math/math_function.cc b/paddle/operators/math/math_function.cc index e5eefedde05d3276a8613570a1b402be996566bf..03a63d063f832f529405f0d080d27a122a1d32da 100644 --- a/paddle/operators/math/math_function.cc +++ b/paddle/operators/math/math_function.cc @@ -12,6 +12,44 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ +#ifdef PADDLE_USE_MKLML +#include +#include +#include +#endif + +#ifdef PADDLE_USE_MKL +#include +#include +#endif + +#ifdef PADDLE_USE_ATLAS +extern "C" { +#include +#include +} +#endif + +#ifdef PADDLE_USE_OPENBLAS +#include +#include +#endif + +#ifndef LAPACK_FOUND +extern "C" { +#include +int LAPACKE_sgetrf(int matrix_layout, int m, int n, float* a, int lda, + int* ipiv); +int LAPACKE_dgetrf(int matrix_layout, int m, int n, double* a, int lda, + int* ipiv); +int LAPACKE_sgetri(int matrix_layout, int n, float* a, int lda, + const int* ipiv); +int LAPACKE_dgetri(int matrix_layout, int n, double* a, int lda, + const int* ipiv); +} +#endif + +#include #include "paddle/operators/math/math_function.h" namespace paddle { @@ -48,62 +86,65 @@ void gemm(const CBLAS_TRANSPOSE transA, } template <> -void matmul(const framework::Tensor& in1, bool in1_T, - const framework::Tensor& in2, bool in2_T, - float alpha, framework::Tensor* out, +void matmul(const framework::Tensor& matrix_a, + bool trans_a, + const framework::Tensor& matrix_b, + bool trans_b, float alpha, + framework::Tensor* matrix_out, float beta, platform::DeviceContext* context) { - auto in1_dim = in1.dims(); - auto in2_dim = in2.dims(); - auto out_dim = out->dims(); - PADDLE_ENFORCE( - in1_dim.size() == 2 && in2_dim.size() == 2 && out_dim.size() == 2, - "The input and output of matmul be matrix"); - - PADDLE_ENFORCE(platform::is_cpu_place(in1.place()) && - platform::is_cpu_place(in2.place()) && - platform::is_cpu_place(out->place()), + auto dim_a = matrix_a.dims(); + auto dim_b = matrix_b.dims(); + auto dim_out = matrix_out->dims(); + PADDLE_ENFORCE(dim_a.size() == 2 && dim_b.size() == 2 && dim_out.size() == 2, + "The input and output of matmul be matrix"); + + PADDLE_ENFORCE(platform::is_cpu_place(matrix_a.place()) && + platform::is_cpu_place(matrix_b.place()) && + platform::is_cpu_place(matrix_out->place()), "Matrix must all be in CPUPlace"); - int M = out_dim[0]; - int N = out_dim[1]; - int K = (in1_T == false) ? in1_dim[1] : in1_dim[0]; + int M = dim_out[0]; + int N = dim_out[1]; + int K = (trans_a == false) ? dim_a[1] : dim_a[0]; - CBLAS_TRANSPOSE in1_Trans = (in1_T == false) ? CblasNoTrans : CblasTrans; - CBLAS_TRANSPOSE in2_Trans = (in2_T == false) ? CblasNoTrans : CblasTrans; + CBLAS_TRANSPOSE transA = (trans_a == false) ? CblasNoTrans : CblasTrans; + CBLAS_TRANSPOSE transB = (trans_b == false) ? CblasNoTrans : CblasTrans; - gemm(in1_Trans, in2_Trans, M, N, K, alpha, - in1.data(), in2.data(), beta, - out->data(), context); + gemm( + transA, transB, M, N, K, alpha, matrix_a.data(), + matrix_b.data(), beta, matrix_out->data(), context); } template <> -void matmul(const framework::Tensor& in1, - bool in1_T, - const framework::Tensor& in2, - bool in2_T, float alpha, - framework::Tensor* out, float beta, +void matmul(const framework::Tensor& matrix_a, + bool trans_a, + const framework::Tensor& matrix_b, + bool trans_b, double alpha, + framework::Tensor* matrix_out, + double beta, platform::DeviceContext* context) { - auto in1_dim = in1.dims(); - auto in2_dim = in2.dims(); - auto out_dim = out->dims(); - PADDLE_ENFORCE( - in1_dim.size() == 2 && in2_dim.size() == 2 && out_dim.size() == 2, - "The input and output of matmul be matrix"); - PADDLE_ENFORCE(platform::is_cpu_place(in1.place()) && - platform::is_cpu_place(in2.place()) && - platform::is_cpu_place(out->place()), + auto dim_a = matrix_a.dims(); + auto dim_b = matrix_b.dims(); + auto dim_out = matrix_out->dims(); + PADDLE_ENFORCE(dim_a.size() == 2 && dim_b.size() == 2 && dim_out.size() == 2, + "The input and output of matmul be matrix"); + + PADDLE_ENFORCE(platform::is_cpu_place(matrix_a.place()) && + platform::is_cpu_place(matrix_b.place()) && + platform::is_cpu_place(matrix_out->place()), "Matrix must all be in CPUPlace"); - int M = out_dim[0]; - int N = out_dim[1]; - int K = (in1_T == false) ? in1_dim[1] : in1_dim[0]; - CBLAS_TRANSPOSE in1_Trans = (in1_T == false) ? CblasNoTrans : CblasTrans; - CBLAS_TRANSPOSE in2_Trans = (in2_T == false) ? CblasNoTrans : CblasTrans; + int M = dim_out[0]; + int N = dim_out[1]; + int K = (trans_a == false) ? dim_a[1] : dim_a[0]; + + CBLAS_TRANSPOSE transA = (trans_a == false) ? CblasNoTrans : CblasTrans; + CBLAS_TRANSPOSE transB = (trans_b == false) ? CblasNoTrans : CblasTrans; - gemm(in1_Trans, in2_Trans, M, N, K, alpha, - in1.data(), in2.data(), beta, - out->data(), context); + gemm( + transA, transB, M, N, K, alpha, matrix_a.data(), + matrix_b.data(), beta, matrix_out->data(), context); } } // namespace math diff --git a/paddle/operators/math/math_function.cu b/paddle/operators/math/math_function.cu index ff02c6ad7e0101d10111b2009c44ed587645358e..c1ec2d93eda3d736d334bd55940d8f963a299618 100644 --- a/paddle/operators/math/math_function.cu +++ b/paddle/operators/math/math_function.cu @@ -12,7 +12,46 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ +#ifdef PADDLE_USE_MKLML +#include +#include +#include +#endif + +#ifdef PADDLE_USE_MKL +#include +#include +#endif + +#ifdef PADDLE_USE_ATLAS +extern "C" { +#include +#include +} +#endif + +#ifdef PADDLE_USE_OPENBLAS +#include +#include +#endif + +#ifndef LAPACK_FOUND +extern "C" { +#include +int LAPACKE_sgetrf(int matrix_layout, int m, int n, float* a, int lda, + int* ipiv); +int LAPACKE_dgetrf(int matrix_layout, int m, int n, double* a, int lda, + int* ipiv); +int LAPACKE_sgetri(int matrix_layout, int n, float* a, int lda, + const int* ipiv); +int LAPACKE_dgetri(int matrix_layout, int n, double* a, int lda, + const int* ipiv); +} +#endif + +#include #include "paddle/operators/math/math_function.h" + namespace paddle { namespace operators { namespace math { @@ -60,63 +99,67 @@ void gemm(const CBLAS_TRANSPOSE transA, } template <> -void matmul(const framework::Tensor& in1, bool in1_T, - const framework::Tensor& in2, bool in2_T, - float alpha, framework::Tensor* out, +void matmul(const framework::Tensor& matrix_a, + bool trans_a, + const framework::Tensor& matrix_b, + bool trans_b, float alpha, + framework::Tensor* matrix_out, float beta, platform::DeviceContext* context) { - auto in1_dim = in1.dims(); - auto in2_dim = in2.dims(); - auto out_dim = out->dims(); - PADDLE_ENFORCE( - in1_dim.size() == 2 && in2_dim.size() == 2 && out_dim.size() == 2, - "The input and output of matmul be matrix"); - - PADDLE_ENFORCE(platform::is_gpu_place(in1.place()) && - platform::is_gpu_place(in2.place()) && - platform::is_gpu_place(out->place()), + auto dim_a = matrix_a.dims(); + auto dim_b = matrix_b.dims(); + auto dim_out = matrix_out->dims(); + PADDLE_ENFORCE(dim_a.size() == 2 && dim_b.size() == 2 && dim_out.size() == 2, + "The input and output of matmul be matrix"); + + PADDLE_ENFORCE(platform::is_gpu_place(matrix_a.place()) && + platform::is_gpu_place(matrix_b.place()) && + platform::is_gpu_place(matrix_out->place()), "Matrix must all be in GPUPlace"); - int M = out_dim[0]; - int N = out_dim[1]; - int K = (in1_T == false) ? in1_dim[1] : in1_dim[0]; + int M = dim_out[0]; + int N = dim_out[1]; + int K = (trans_a == false) ? dim_a[1] : dim_a[0]; - CBLAS_TRANSPOSE in1_Trans = (in1_T == false) ? CblasNoTrans : CblasTrans; - CBLAS_TRANSPOSE in2_Trans = (in2_T == false) ? CblasNoTrans : CblasTrans; + CBLAS_TRANSPOSE transA = (trans_a == false) ? CblasNoTrans : CblasTrans; + CBLAS_TRANSPOSE transB = (trans_b == false) ? CblasNoTrans : CblasTrans; - gemm(in1_Trans, in2_Trans, M, N, K, alpha, - in1.data(), in2.data(), beta, - out->data(), context); + gemm( + transA, transB, M, N, K, alpha, matrix_a.data(), + matrix_b.data(), beta, matrix_out->data(), context); } template <> -void matmul(const framework::Tensor& in1, - bool in1_T, - const framework::Tensor& in2, - bool in2_T, float alpha, - framework::Tensor* out, float beta, +void matmul(const framework::Tensor& matrix_a, + bool trans_a, + const framework::Tensor& matrix_b, + bool trans_b, double alpha, + framework::Tensor* matrix_out, + double beta, platform::DeviceContext* context) { - auto in1_dim = in1.dims(); - auto in2_dim = in2.dims(); - auto out_dim = out->dims(); - PADDLE_ENFORCE( - in1_dim.size() == 2 && in2_dim.size() == 2 && out_dim.size() == 2, - "The input and output of matmul be matrix"); - PADDLE_ENFORCE(platform::is_gpu_place(in1.place()) && - platform::is_gpu_place(in2.place()) && - platform::is_gpu_place(out->place()), + auto dim_a = matrix_a.dims(); + auto dim_b = matrix_b.dims(); + auto dim_out = matrix_out->dims(); + PADDLE_ENFORCE(dim_a.size() == 2 && dim_b.size() == 2 && dim_out.size() == 2, + "The input and output of matmul be matrix"); + + PADDLE_ENFORCE(platform::is_gpu_place(matrix_a.place()) && + platform::is_gpu_place(matrix_b.place()) && + platform::is_gpu_place(matrix_out->place()), "Matrix must all be in GPUPlace"); - int M = out_dim[0]; - int N = out_dim[1]; - int K = (in1_T == false) ? in1_dim[1] : in1_dim[0]; - CBLAS_TRANSPOSE in1_Trans = (in1_T == false) ? CblasNoTrans : CblasTrans; - CBLAS_TRANSPOSE in2_Trans = (in2_T == false) ? CblasNoTrans : CblasTrans; + int M = dim_out[0]; + int N = dim_out[1]; + int K = (trans_a == false) ? dim_a[1] : dim_a[0]; + + CBLAS_TRANSPOSE transA = (trans_a == false) ? CblasNoTrans : CblasTrans; + CBLAS_TRANSPOSE transB = (trans_b == false) ? CblasNoTrans : CblasTrans; - gemm(in1_Trans, in2_Trans, M, N, K, alpha, - in1.data(), in2.data(), beta, - out->data(), context); + gemm( + transA, transB, M, N, K, alpha, matrix_a.data(), + matrix_b.data(), beta, matrix_out->data(), context); } + } // namespace math } // namespace operators } // namespace paddle diff --git a/paddle/operators/math/math_function.h b/paddle/operators/math/math_function.h index 12d1706afb82d37f0381facb7d448251372497d9..c20e6a3b39fb57aad88d35131e35cf43e3cd38b1 100644 --- a/paddle/operators/math/math_function.h +++ b/paddle/operators/math/math_function.h @@ -14,44 +14,6 @@ limitations under the License. */ #pragma once -#ifdef PADDLE_USE_MKLML -#include -#include -#include -#endif - -#ifdef PADDLE_USE_MKL -#include -#include -#endif - -#ifdef PADDLE_USE_ATLAS -extern "C" { -#include -#include -} -#endif - -#ifdef PADDLE_USE_OPENBLAS -#include -#include -#endif - -#ifndef LAPACK_FOUND -extern "C" { -#include -int LAPACKE_sgetrf(int matrix_layout, int m, int n, float* a, int lda, - int* ipiv); -int LAPACKE_dgetrf(int matrix_layout, int m, int n, double* a, int lda, - int* ipiv); -int LAPACKE_sgetri(int matrix_layout, int n, float* a, int lda, - const int* ipiv); -int LAPACKE_dgetri(int matrix_layout, int n, double* a, int lda, - const int* ipiv); -} -#endif - -#include #include "paddle/framework/tensor.h" #include "paddle/platform/device_context.h" #include "paddle/platform/enforce.h" @@ -60,17 +22,20 @@ namespace paddle { namespace operators { namespace math { -// support continuous memory now -template +// Support continuous memory now +// If transA = N, and transB = N +// Then matrixA: M * K, matrixB: K * N matrixC : M * N +// For more detailed info, please refer to +// http://www.netlib.org/lapack/explore-html/d4/de2/sgemm_8f.html void gemm(const CBLAS_TRANSPOSE transA, const CBLAS_TRANSPOSE transB, const int M, const int N, const int K, const T alpha, const T* A, const T* B, const T beta, T* C, platform::DeviceContext* context); // matrix multiply with continuous memory template -void matmul(const framework::Tensor& in1, bool in1_T, - const framework::Tensor& in2, bool in2_T, float alpha, - framework::Tensor* out, float beta, +void matmul(const framework::Tensor& matrix_a, bool trans_a, + const framework::Tensor& matrix_b, bool trans_b, float alpha, + framework::Tensor* matrix_out, float beta, platform::DeviceContext* context); } // namespace math