Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
机器未来
Paddle
提交
2ec8dab4
P
Paddle
项目概览
机器未来
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1
Issue
1
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
2ec8dab4
编写于
8月 14, 2017
作者:
Q
qijun
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
follow comments
上级
37aa4b98
变更
5
显示空白变更内容
内联
并排
Showing
5 changed file
with
187 addition
and
146 deletion
+187
-146
paddle/operators/math/.clang-format
paddle/operators/math/.clang-format
+0
-5
paddle/operators/math/CMakeLists.txt
paddle/operators/math/CMakeLists.txt
+9
-12
paddle/operators/math/math_function.cc
paddle/operators/math/math_function.cc
+84
-43
paddle/operators/math/math_function.cu
paddle/operators/math/math_function.cu
+86
-43
paddle/operators/math/math_function.h
paddle/operators/math/math_function.h
+8
-43
未找到文件。
paddle/operators/math/.clang-format
已删除
100644 → 0
浏览文件 @
37aa4b98
---
Language: Cpp
BasedOnStyle: Google
Standard: Cpp11
...
paddle/operators/math/CMakeLists.txt
浏览文件 @
2ec8dab4
if
(
WITH_GPU
)
if
(
WITH_MKLML
)
nv_library
(
math_function SRCS math_function.cc math_function.cu DEPS mklml device_context
)
else
()
nv_library
(
math_function SRCS math_function.cc math_function.cu DEPS cblas device_context
)
endif
()
if
(
WITH_MKLML
)
set
(
BLAS_LIB mklml
)
else
()
if
(
WITH_MKLML
)
cc_library
(
math_function SRCS math_function.cc DEPS mklml device_context
)
else
()
cc_library
(
math_function SRCS math_function.cc DEPS cblas device_context
)
endif
()
set
(
BLAS_LIB cblas
)
endif
()
if
(
WITH_GPU
)
nv_library
(
math_function SRCS math_function.cc math_function.cu DEPS
${
BLAS_LIB
}
device_context
)
else
()
cc_library
(
math_function SRCS math_function.cc math_function.cu DEPS
${
BLAS_LIB
}
device_context
)
endif
()
nv_test
(
math_function_test SRCS math_function_test.cc DEPS math_function tensor
)
paddle/operators/math/math_function.cc
浏览文件 @
2ec8dab4
...
...
@@ -12,6 +12,44 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#ifdef PADDLE_USE_MKLML
#include <mkl_cblas.h>
#include <mkl_lapacke.h>
#include <mkl_vml_functions.h>
#endif
#ifdef PADDLE_USE_MKL
#include <mkl.h>
#include <mkl_lapacke.h>
#endif
#ifdef PADDLE_USE_ATLAS
extern
"C"
{
#include <cblas.h>
#include <clapack.h>
}
#endif
#ifdef PADDLE_USE_OPENBLAS
#include <cblas.h>
#include <lapacke.h>
#endif
#ifndef LAPACK_FOUND
extern
"C"
{
#include <cblas.h>
int
LAPACKE_sgetrf
(
int
matrix_layout
,
int
m
,
int
n
,
float
*
a
,
int
lda
,
int
*
ipiv
);
int
LAPACKE_dgetrf
(
int
matrix_layout
,
int
m
,
int
n
,
double
*
a
,
int
lda
,
int
*
ipiv
);
int
LAPACKE_sgetri
(
int
matrix_layout
,
int
n
,
float
*
a
,
int
lda
,
const
int
*
ipiv
);
int
LAPACKE_dgetri
(
int
matrix_layout
,
int
n
,
double
*
a
,
int
lda
,
const
int
*
ipiv
);
}
#endif
#include <cmath>
#include "paddle/operators/math/math_function.h"
namespace
paddle
{
...
...
@@ -48,62 +86,65 @@ void gemm<platform::CPUPlace, double>(const CBLAS_TRANSPOSE transA,
}
template
<
>
void
matmul
<
platform
::
CPUPlace
,
float
>
(
const
framework
::
Tensor
&
in1
,
bool
in1_T
,
const
framework
::
Tensor
&
in2
,
bool
in2_T
,
float
alpha
,
framework
::
Tensor
*
out
,
void
matmul
<
platform
::
CPUPlace
,
float
>
(
const
framework
::
Tensor
&
matrix_a
,
bool
trans_a
,
const
framework
::
Tensor
&
matrix_b
,
bool
trans_b
,
float
alpha
,
framework
::
Tensor
*
matrix_out
,
float
beta
,
platform
::
DeviceContext
*
context
)
{
auto
in1_dim
=
in1
.
dims
();
auto
in2_dim
=
in2
.
dims
();
auto
out_dim
=
out
->
dims
();
PADDLE_ENFORCE
(
in1_dim
.
size
()
==
2
&&
in2_dim
.
size
()
==
2
&&
out_dim
.
size
()
==
2
,
auto
dim_a
=
matrix_a
.
dims
();
auto
dim_b
=
matrix_b
.
dims
();
auto
dim_out
=
matrix_out
->
dims
();
PADDLE_ENFORCE
(
dim_a
.
size
()
==
2
&&
dim_b
.
size
()
==
2
&&
dim_out
.
size
()
==
2
,
"The input and output of matmul be matrix"
);
PADDLE_ENFORCE
(
platform
::
is_cpu_place
(
in1
.
place
())
&&
platform
::
is_cpu_place
(
in2
.
place
())
&&
platform
::
is_cpu_place
(
out
->
place
()),
PADDLE_ENFORCE
(
platform
::
is_cpu_place
(
matrix_a
.
place
())
&&
platform
::
is_cpu_place
(
matrix_b
.
place
())
&&
platform
::
is_cpu_place
(
matrix_
out
->
place
()),
"Matrix must all be in CPUPlace"
);
int
M
=
out_dim
[
0
];
int
N
=
out_dim
[
1
];
int
K
=
(
in1_T
==
false
)
?
in1_dim
[
1
]
:
in1_dim
[
0
];
int
M
=
dim_out
[
0
];
int
N
=
dim_out
[
1
];
int
K
=
(
trans_a
==
false
)
?
dim_a
[
1
]
:
dim_a
[
0
];
CBLAS_TRANSPOSE
in1_Trans
=
(
in1_T
==
false
)
?
CblasNoTrans
:
CblasTrans
;
CBLAS_TRANSPOSE
in2_Trans
=
(
in2_T
==
false
)
?
CblasNoTrans
:
CblasTrans
;
CBLAS_TRANSPOSE
transA
=
(
trans_a
==
false
)
?
CblasNoTrans
:
CblasTrans
;
CBLAS_TRANSPOSE
transB
=
(
trans_b
==
false
)
?
CblasNoTrans
:
CblasTrans
;
gemm
<
platform
::
CPUPlace
,
float
>
(
in1_Trans
,
in2_Trans
,
M
,
N
,
K
,
alpha
,
in1
.
data
<
float
>
(),
in2
.
data
<
float
>
(),
beta
,
out
->
data
<
float
>
(),
context
);
gemm
<
platform
::
CPUPlace
,
float
>
(
transA
,
transB
,
M
,
N
,
K
,
alpha
,
matrix_a
.
data
<
float
>
()
,
matrix_b
.
data
<
float
>
(),
beta
,
matrix_
out
->
data
<
float
>
(),
context
);
}
template
<
>
void
matmul
<
platform
::
CPUPlace
,
double
>
(
const
framework
::
Tensor
&
in1
,
bool
in1_T
,
const
framework
::
Tensor
&
in2
,
bool
in2_T
,
float
alpha
,
framework
::
Tensor
*
out
,
float
beta
,
void
matmul
<
platform
::
CPUPlace
,
double
>
(
const
framework
::
Tensor
&
matrix_a
,
bool
trans_a
,
const
framework
::
Tensor
&
matrix_b
,
bool
trans_b
,
double
alpha
,
framework
::
Tensor
*
matrix_out
,
double
beta
,
platform
::
DeviceContext
*
context
)
{
auto
in1_dim
=
in1
.
dims
();
auto
in2_dim
=
in2
.
dims
();
auto
out_dim
=
out
->
dims
();
PADDLE_ENFORCE
(
in1_dim
.
size
()
==
2
&&
in2_dim
.
size
()
==
2
&&
out_dim
.
size
()
==
2
,
auto
dim_a
=
matrix_a
.
dims
();
auto
dim_b
=
matrix_b
.
dims
();
auto
dim_out
=
matrix_out
->
dims
();
PADDLE_ENFORCE
(
dim_a
.
size
()
==
2
&&
dim_b
.
size
()
==
2
&&
dim_out
.
size
()
==
2
,
"The input and output of matmul be matrix"
);
PADDLE_ENFORCE
(
platform
::
is_cpu_place
(
in1
.
place
())
&&
platform
::
is_cpu_place
(
in2
.
place
())
&&
platform
::
is_cpu_place
(
out
->
place
()),
PADDLE_ENFORCE
(
platform
::
is_cpu_place
(
matrix_a
.
place
())
&&
platform
::
is_cpu_place
(
matrix_b
.
place
())
&&
platform
::
is_cpu_place
(
matrix_out
->
place
()),
"Matrix must all be in CPUPlace"
);
int
M
=
out_dim
[
0
];
int
N
=
out_dim
[
1
];
int
K
=
(
in1_T
==
false
)
?
in1_dim
[
1
]
:
in1_dim
[
0
];
CBLAS_TRANSPOSE
in1_Trans
=
(
in1_T
==
false
)
?
CblasNoTrans
:
CblasTrans
;
CBLAS_TRANSPOSE
in2_Trans
=
(
in2_T
==
false
)
?
CblasNoTrans
:
CblasTrans
;
int
M
=
dim_out
[
0
];
int
N
=
dim_out
[
1
];
int
K
=
(
trans_a
==
false
)
?
dim_a
[
1
]
:
dim_a
[
0
];
CBLAS_TRANSPOSE
transA
=
(
trans_a
==
false
)
?
CblasNoTrans
:
CblasTrans
;
CBLAS_TRANSPOSE
transB
=
(
trans_b
==
false
)
?
CblasNoTrans
:
CblasTrans
;
gemm
<
platform
::
CPUPlace
,
double
>
(
in1_Trans
,
in2_Trans
,
M
,
N
,
K
,
alpha
,
in1
.
data
<
double
>
(),
in2
.
data
<
double
>
(),
beta
,
out
->
data
<
double
>
(),
context
);
gemm
<
platform
::
CPUPlace
,
double
>
(
transA
,
transB
,
M
,
N
,
K
,
alpha
,
matrix_a
.
data
<
double
>
()
,
matrix_b
.
data
<
double
>
(),
beta
,
matrix_
out
->
data
<
double
>
(),
context
);
}
}
// namespace math
...
...
paddle/operators/math/math_function.cu
浏览文件 @
2ec8dab4
...
...
@@ -12,7 +12,46 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#ifdef PADDLE_USE_MKLML
#include <mkl_cblas.h>
#include <mkl_lapacke.h>
#include <mkl_vml_functions.h>
#endif
#ifdef PADDLE_USE_MKL
#include <mkl.h>
#include <mkl_lapacke.h>
#endif
#ifdef PADDLE_USE_ATLAS
extern
"C"
{
#include <cblas.h>
#include <clapack.h>
}
#endif
#ifdef PADDLE_USE_OPENBLAS
#include <cblas.h>
#include <lapacke.h>
#endif
#ifndef LAPACK_FOUND
extern
"C"
{
#include <cblas.h>
int
LAPACKE_sgetrf
(
int
matrix_layout
,
int
m
,
int
n
,
float
*
a
,
int
lda
,
int
*
ipiv
);
int
LAPACKE_dgetrf
(
int
matrix_layout
,
int
m
,
int
n
,
double
*
a
,
int
lda
,
int
*
ipiv
);
int
LAPACKE_sgetri
(
int
matrix_layout
,
int
n
,
float
*
a
,
int
lda
,
const
int
*
ipiv
);
int
LAPACKE_dgetri
(
int
matrix_layout
,
int
n
,
double
*
a
,
int
lda
,
const
int
*
ipiv
);
}
#endif
#include <cmath>
#include "paddle/operators/math/math_function.h"
namespace
paddle
{
namespace
operators
{
namespace
math
{
...
...
@@ -60,63 +99,67 @@ void gemm<platform::GPUPlace, double>(const CBLAS_TRANSPOSE transA,
}
template
<
>
void
matmul
<
platform
::
GPUPlace
,
float
>
(
const
framework
::
Tensor
&
in1
,
bool
in1_T
,
const
framework
::
Tensor
&
in2
,
bool
in2_T
,
float
alpha
,
framework
::
Tensor
*
out
,
void
matmul
<
platform
::
GPUPlace
,
float
>
(
const
framework
::
Tensor
&
matrix_a
,
bool
trans_a
,
const
framework
::
Tensor
&
matrix_b
,
bool
trans_b
,
float
alpha
,
framework
::
Tensor
*
matrix_out
,
float
beta
,
platform
::
DeviceContext
*
context
)
{
auto
in1_dim
=
in1
.
dims
();
auto
in2_dim
=
in2
.
dims
();
auto
out_dim
=
out
->
dims
();
PADDLE_ENFORCE
(
in1_dim
.
size
()
==
2
&&
in2_dim
.
size
()
==
2
&&
out_dim
.
size
()
==
2
,
auto
dim_a
=
matrix_a
.
dims
();
auto
dim_b
=
matrix_b
.
dims
();
auto
dim_out
=
matrix_out
->
dims
();
PADDLE_ENFORCE
(
dim_a
.
size
()
==
2
&&
dim_b
.
size
()
==
2
&&
dim_out
.
size
()
==
2
,
"The input and output of matmul be matrix"
);
PADDLE_ENFORCE
(
platform
::
is_gpu_place
(
in1
.
place
())
&&
platform
::
is_gpu_place
(
in2
.
place
())
&&
platform
::
is_gpu_place
(
out
->
place
()),
PADDLE_ENFORCE
(
platform
::
is_gpu_place
(
matrix_a
.
place
())
&&
platform
::
is_gpu_place
(
matrix_b
.
place
())
&&
platform
::
is_gpu_place
(
matrix_
out
->
place
()),
"Matrix must all be in GPUPlace"
);
int
M
=
out_dim
[
0
];
int
N
=
out_dim
[
1
];
int
K
=
(
in1_T
==
false
)
?
in1_dim
[
1
]
:
in1_dim
[
0
];
int
M
=
dim_out
[
0
];
int
N
=
dim_out
[
1
];
int
K
=
(
trans_a
==
false
)
?
dim_a
[
1
]
:
dim_a
[
0
];
CBLAS_TRANSPOSE
in1_Trans
=
(
in1_T
==
false
)
?
CblasNoTrans
:
CblasTrans
;
CBLAS_TRANSPOSE
in2_Trans
=
(
in2_T
==
false
)
?
CblasNoTrans
:
CblasTrans
;
CBLAS_TRANSPOSE
transA
=
(
trans_a
==
false
)
?
CblasNoTrans
:
CblasTrans
;
CBLAS_TRANSPOSE
transB
=
(
trans_b
==
false
)
?
CblasNoTrans
:
CblasTrans
;
gemm
<
platform
::
GPUPlace
,
float
>
(
in1_Trans
,
in2_Trans
,
M
,
N
,
K
,
alpha
,
in1
.
data
<
float
>
(),
in2
.
data
<
float
>
(),
beta
,
out
->
data
<
float
>
(),
context
);
gemm
<
platform
::
GPUPlace
,
float
>
(
transA
,
transB
,
M
,
N
,
K
,
alpha
,
matrix_a
.
data
<
float
>
()
,
matrix_b
.
data
<
float
>
(),
beta
,
matrix_
out
->
data
<
float
>
(),
context
);
}
template
<
>
void
matmul
<
platform
::
GPUPlace
,
double
>
(
const
framework
::
Tensor
&
in1
,
bool
in1_T
,
const
framework
::
Tensor
&
in2
,
bool
in2_T
,
float
alpha
,
framework
::
Tensor
*
out
,
float
beta
,
void
matmul
<
platform
::
GPUPlace
,
double
>
(
const
framework
::
Tensor
&
matrix_a
,
bool
trans_a
,
const
framework
::
Tensor
&
matrix_b
,
bool
trans_b
,
double
alpha
,
framework
::
Tensor
*
matrix_out
,
double
beta
,
platform
::
DeviceContext
*
context
)
{
auto
in1_dim
=
in1
.
dims
();
auto
in2_dim
=
in2
.
dims
();
auto
out_dim
=
out
->
dims
();
PADDLE_ENFORCE
(
in1_dim
.
size
()
==
2
&&
in2_dim
.
size
()
==
2
&&
out_dim
.
size
()
==
2
,
auto
dim_a
=
matrix_a
.
dims
();
auto
dim_b
=
matrix_b
.
dims
();
auto
dim_out
=
matrix_out
->
dims
();
PADDLE_ENFORCE
(
dim_a
.
size
()
==
2
&&
dim_b
.
size
()
==
2
&&
dim_out
.
size
()
==
2
,
"The input and output of matmul be matrix"
);
PADDLE_ENFORCE
(
platform
::
is_gpu_place
(
in1
.
place
())
&&
platform
::
is_gpu_place
(
in2
.
place
())
&&
platform
::
is_gpu_place
(
out
->
place
()),
PADDLE_ENFORCE
(
platform
::
is_gpu_place
(
matrix_a
.
place
())
&&
platform
::
is_gpu_place
(
matrix_b
.
place
())
&&
platform
::
is_gpu_place
(
matrix_out
->
place
()),
"Matrix must all be in GPUPlace"
);
int
M
=
out_dim
[
0
];
int
N
=
out_dim
[
1
];
int
K
=
(
in1_T
==
false
)
?
in1_dim
[
1
]
:
in1_dim
[
0
];
CBLAS_TRANSPOSE
in1_Trans
=
(
in1_T
==
false
)
?
CblasNoTrans
:
CblasTrans
;
CBLAS_TRANSPOSE
in2_Trans
=
(
in2_T
==
false
)
?
CblasNoTrans
:
CblasTrans
;
int
M
=
dim_out
[
0
];
int
N
=
dim_out
[
1
];
int
K
=
(
trans_a
==
false
)
?
dim_a
[
1
]
:
dim_a
[
0
];
gemm
<
platform
::
GPUPlace
,
double
>
(
in1_Trans
,
in2_Trans
,
M
,
N
,
K
,
alpha
,
in1
.
data
<
double
>
(),
in2
.
data
<
double
>
(),
beta
,
out
->
data
<
double
>
(),
context
);
CBLAS_TRANSPOSE
transA
=
(
trans_a
==
false
)
?
CblasNoTrans
:
CblasTrans
;
CBLAS_TRANSPOSE
transB
=
(
trans_b
==
false
)
?
CblasNoTrans
:
CblasTrans
;
gemm
<
platform
::
GPUPlace
,
double
>
(
transA
,
transB
,
M
,
N
,
K
,
alpha
,
matrix_a
.
data
<
double
>
(),
matrix_b
.
data
<
double
>
(),
beta
,
matrix_out
->
data
<
double
>
(),
context
);
}
}
// namespace math
}
// namespace operators
}
// namespace paddle
paddle/operators/math/math_function.h
浏览文件 @
2ec8dab4
...
...
@@ -14,44 +14,6 @@ limitations under the License. */
#pragma once
#ifdef PADDLE_USE_MKLML
#include <mkl_cblas.h>
#include <mkl_lapacke.h>
#include <mkl_vml_functions.h>
#endif
#ifdef PADDLE_USE_MKL
#include <mkl.h>
#include <mkl_lapacke.h>
#endif
#ifdef PADDLE_USE_ATLAS
extern
"C"
{
#include <cblas.h>
#include <clapack.h>
}
#endif
#ifdef PADDLE_USE_OPENBLAS
#include <cblas.h>
#include <lapacke.h>
#endif
#ifndef LAPACK_FOUND
extern
"C"
{
#include <cblas.h>
int
LAPACKE_sgetrf
(
int
matrix_layout
,
int
m
,
int
n
,
float
*
a
,
int
lda
,
int
*
ipiv
);
int
LAPACKE_dgetrf
(
int
matrix_layout
,
int
m
,
int
n
,
double
*
a
,
int
lda
,
int
*
ipiv
);
int
LAPACKE_sgetri
(
int
matrix_layout
,
int
n
,
float
*
a
,
int
lda
,
const
int
*
ipiv
);
int
LAPACKE_dgetri
(
int
matrix_layout
,
int
n
,
double
*
a
,
int
lda
,
const
int
*
ipiv
);
}
#endif
#include <cmath>
#include "paddle/framework/tensor.h"
#include "paddle/platform/device_context.h"
#include "paddle/platform/enforce.h"
...
...
@@ -60,17 +22,20 @@ namespace paddle {
namespace
operators
{
namespace
math
{
// support continuous memory now
template
<
typename
Place
,
typename
T
>
// Support continuous memory now
// If transA = N, and transB = N
// Then matrixA: M * K, matrixB: K * N matrixC : M * N
// For more detailed info, please refer to
// http://www.netlib.org/lapack/explore-html/d4/de2/sgemm_8f.html
void
gemm
(
const
CBLAS_TRANSPOSE
transA
,
const
CBLAS_TRANSPOSE
transB
,
const
int
M
,
const
int
N
,
const
int
K
,
const
T
alpha
,
const
T
*
A
,
const
T
*
B
,
const
T
beta
,
T
*
C
,
platform
::
DeviceContext
*
context
);
// matrix multiply with continuous memory
template
<
typename
Place
,
typename
T
>
void
matmul
(
const
framework
::
Tensor
&
in1
,
bool
in1_T
,
const
framework
::
Tensor
&
in2
,
bool
in2_T
,
float
alpha
,
framework
::
Tensor
*
out
,
float
beta
,
void
matmul
(
const
framework
::
Tensor
&
matrix_a
,
bool
trans_a
,
const
framework
::
Tensor
&
matrix_b
,
bool
trans_b
,
float
alpha
,
framework
::
Tensor
*
matrix_
out
,
float
beta
,
platform
::
DeviceContext
*
context
);
}
// namespace math
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录