Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
机器未来
Paddle
提交
8a4fad42
P
Paddle
项目概览
机器未来
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1
Issue
1
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
8a4fad42
编写于
8月 23, 2017
作者:
L
Liu Yiqun
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
Support to use clang for Android cross-compiling.
上级
5ca41184
变更
8
隐藏空白更改
内联
并排
Showing
8 changed file
with
155 addition
and
122 deletion
+155
-122
cmake/cblas.cmake
cmake/cblas.cmake
+4
-0
cmake/external/warpctc.cmake
cmake/external/warpctc.cmake
+1
-0
paddle/cuda/include/hl_cpu_gru.cuh
paddle/cuda/include/hl_cpu_gru.cuh
+83
-83
paddle/function/MulOp.cpp
paddle/function/MulOp.cpp
+15
-22
paddle/math/MathFunctions.cpp
paddle/math/MathFunctions.cpp
+4
-0
paddle/math/MathFunctions.h
paddle/math/MathFunctions.h
+21
-2
paddle/math/Matrix.cpp
paddle/math/Matrix.cpp
+12
-6
paddle/scripts/docker/build_android.sh
paddle/scripts/docker/build_android.sh
+15
-9
未找到文件。
cmake/cblas.cmake
浏览文件 @
8a4fad42
...
...
@@ -13,6 +13,10 @@
# system paths.
#
if
(
USE_EIGEN_FOR_BLAS
)
return
()
endif
(
USE_EIGEN_FOR_BLAS
)
set
(
CBLAS_FOUND OFF
)
## Find MKLML First.
...
...
cmake/external/warpctc.cmake
浏览文件 @
8a4fad42
...
...
@@ -41,6 +41,7 @@ IF(CMAKE_CXX_COMPILER_ID STREQUAL "Clang" OR CMAKE_CXX_COMPILER_ID STREQUAL "App
ELSE
()
SET
(
USE_OMP ON
)
ENDIF
()
SET
(
USE_OMP OFF FORCE
)
ExternalProject_Add
(
extern_warpctc
...
...
paddle/cuda/include/hl_cpu_gru.cuh
浏览文件 @
8a4fad42
...
...
@@ -20,11 +20,11 @@ limitations under the License. */
#include "paddle/math/MathFunctions.h"
#ifndef PADDLE_TYPE_DOUBLE
#define CBLAS_GEMM paddle::gemm<float>
#else
#define CBLAS_GEMM paddle::gemm<double>
#endif
//
#ifndef PADDLE_TYPE_DOUBLE
//
#define CBLAS_GEMM paddle::gemm<float>
//
#else
//
#define CBLAS_GEMM paddle::gemm<double>
//
#endif
template
<
class
OpResetOutput
>
void
hl_naive_gru_forward_reset_output
(
OpResetOutput
opResetOutput
,
...
...
@@ -219,37 +219,37 @@ void hl_cpu_gru_forward(OpResetOutput opResetOutput,
hl_activation_mode_t
active_node
,
hl_activation_mode_t
active_gate
)
{
if
(
value
.
prevOutValue
)
{
CBLAS_GEMM
(
CblasNoTrans
,
CblasNoTrans
,
batchSize
,
2
*
frameSize
,
frameSize
,
1
,
value
.
prevOutValue
,
frameSize
,
value
.
gateWeight
,
frameSize
*
2
,
1
,
value
.
gateValue
,
frameSize
*
3
);
//
CBLAS_GEMM(CblasNoTrans,
//
CblasNoTrans,
//
batchSize,
//
2 * frameSize,
//
frameSize,
//
1,
//
value.prevOutValue,
//
frameSize,
//
value.gateWeight,
//
frameSize * 2,
//
1,
//
value.gateValue,
//
frameSize * 3);
}
forward_reset_output
(
opResetOutput
,
value
,
frameSize
,
batchSize
,
active_gate
);
if
(
value
.
prevOutValue
)
{
CBLAS_GEMM
(
CblasNoTrans
,
CblasNoTrans
,
batchSize
,
frameSize
,
frameSize
,
1
,
value
.
resetOutputValue
,
frameSize
,
value
.
stateWeight
,
frameSize
,
1
,
value
.
gateValue
+
frameSize
*
2
,
frameSize
*
3
);
//
CBLAS_GEMM(CblasNoTrans,
//
CblasNoTrans,
//
batchSize,
//
frameSize,
//
frameSize,
//
1,
//
value.resetOutputValue,
//
frameSize,
//
value.stateWeight,
//
frameSize,
//
1,
//
value.gateValue + frameSize * 2,
//
frameSize * 3);
}
forward_final_output
(
opFinalOutput
,
value
,
frameSize
,
batchSize
,
active_node
);
...
...
@@ -538,34 +538,34 @@ void hl_cpu_gru_backward(OpStateGrad opStateGrad,
frameSize
,
batchSize
,
active_node
);
if
(
value
.
prevOutValue
&&
grad
.
prevOutGrad
)
{
CBLAS_GEMM
(
CblasNoTrans
,
CblasTrans
,
batchSize
,
frameSize
,
frameSize
,
1
,
grad
.
gateGrad
+
frameSize
*
2
,
frameSize
*
3
,
value
.
stateWeight
,
frameSize
,
0
,
grad
.
resetOutputGrad
,
frameSize
);
//
CBLAS_GEMM(CblasNoTrans,
//
CblasTrans,
//
batchSize,
//
frameSize,
//
frameSize,
//
1,
//
grad.gateGrad + frameSize * 2,
//
frameSize * 3,
//
value.stateWeight,
//
frameSize,
//
0,
//
grad.resetOutputGrad,
//
frameSize);
if
(
grad
.
stateWeightGrad
)
{
CBLAS_GEMM
(
CblasTrans
,
CblasNoTrans
,
frameSize
,
frameSize
,
batchSize
,
1
,
value
.
resetOutputValue
,
frameSize
,
grad
.
gateGrad
+
frameSize
*
2
,
frameSize
*
3
,
1
,
grad
.
stateWeightGrad
,
frameSize
);
//
CBLAS_GEMM(CblasTrans,
//
CblasNoTrans,
//
frameSize,
//
frameSize,
//
batchSize,
//
1,
//
value.resetOutputValue,
//
frameSize,
//
grad.gateGrad + frameSize * 2,
//
frameSize * 3,
//
1,
//
grad.stateWeightGrad,
//
frameSize);
}
}
...
...
@@ -573,34 +573,34 @@ void hl_cpu_gru_backward(OpStateGrad opStateGrad,
frameSize
,
batchSize
,
active_gate
);
if
(
grad
.
prevOutGrad
&&
value
.
prevOutValue
)
{
CBLAS_GEMM
(
CblasNoTrans
,
CblasTrans
,
batchSize
,
frameSize
,
frameSize
*
2
,
1
,
grad
.
gateGrad
,
frameSize
*
3
,
value
.
gateWeight
,
frameSize
*
2
,
1
,
grad
.
prevOutGrad
,
frameSize
);
//
CBLAS_GEMM(CblasNoTrans,
//
CblasTrans,
//
batchSize,
//
frameSize,
//
frameSize * 2,
//
1,
//
grad.gateGrad,
//
frameSize * 3,
//
value.gateWeight,
//
frameSize * 2,
//
1,
//
grad.prevOutGrad,
//
frameSize);
if
(
grad
.
gateWeightGrad
)
{
CBLAS_GEMM
(
CblasTrans
,
CblasNoTrans
,
frameSize
,
frameSize
*
2
,
batchSize
,
1
,
value
.
prevOutValue
,
frameSize
,
grad
.
gateGrad
,
frameSize
*
3
,
1
,
grad
.
gateWeightGrad
,
frameSize
*
2
);
//
CBLAS_GEMM(CblasTrans,
//
CblasNoTrans,
//
frameSize,
//
frameSize * 2,
//
batchSize,
//
1,
//
value.prevOutValue,
//
frameSize,
//
grad.gateGrad,
//
frameSize * 3,
//
1,
//
grad.gateWeightGrad,
//
frameSize * 2);
}
}
}
...
...
paddle/function/MulOp.cpp
浏览文件 @
8a4fad42
...
...
@@ -13,18 +13,10 @@ See the License for the specific language governing permissions and
limitations under the License. */
#include "MulOp.h"
/// todo(tianbing), delete it
#include <iostream>
#include "paddle/math/MathFunctions.h"
#include "GemmFunctor.h"
#include "paddle/math/SIMDFunctions.h"
#include "paddle/utils/ThreadLocal.h"
#ifndef PADDLE_TYPE_DOUBLE
#define GEMM paddle::gemm<float>
#else
#define GEMM paddle::gemm<double>
#endif
namespace
{
inline
void
vecAddTo
(
real
*
a
,
const
real
*
b
,
real
scaleB
,
size_t
len
)
{
for
(
unsigned
int
i
=
0
;
i
<
len
;
++
i
)
{
...
...
@@ -114,19 +106,20 @@ void MulOp<DEVICE_TYPE_CPU>(CpuMatrix& out,
real
scaleT
,
bool
aTrans
,
bool
bTrans
)
{
GEMM
(
aTrans
?
CblasTrans
:
CblasNoTrans
,
bTrans
?
CblasTrans
:
CblasNoTrans
,
out
.
getHeight
(),
out
.
getWidth
(),
!
aTrans
?
a
.
getWidth
()
:
a
.
getHeight
(),
scaleAB
,
a
.
getData
(),
a
.
getStride
(),
b
.
getData
(),
b
.
getStride
(),
scaleT
,
out
.
getData
(),
out
.
getStride
());
BlasGemm
<
DEVICE_TYPE_CPU
,
real
>::
compute
(
aTrans
,
bTrans
,
out
.
getHeight
(),
out
.
getWidth
(),
!
aTrans
?
a
.
getWidth
()
:
a
.
getHeight
(),
scaleAB
,
a
.
getData
(),
a
.
getStride
(),
b
.
getData
(),
b
.
getStride
(),
scaleT
,
out
.
getData
(),
out
.
getStride
());
}
/// dense matrix (+)= sparse matrix * dense matrix
...
...
paddle/math/MathFunctions.cpp
浏览文件 @
8a4fad42
...
...
@@ -84,6 +84,7 @@ LAPACK_ROUTINE_EACH(DYNAMIC_LOAD_LAPACK_WRAP)
namespace
paddle
{
#ifndef PADDLE_USE_EIGEN_FOR_BLAS
template
<
>
void
gemm
<
float
>
(
const
CBLAS_TRANSPOSE
transA
,
const
CBLAS_TRANSPOSE
transB
,
...
...
@@ -143,6 +144,7 @@ void gemm<double>(const CBLAS_TRANSPOSE transA,
C
,
ldc
);
}
#endif
template
<
>
int
getrf
<
float
>
(
const
CBLAS_ORDER
order
,
...
...
@@ -182,6 +184,7 @@ int getri<double>(const CBLAS_ORDER order,
return
dynload
::
PADDLE_DGETRI
(
order
,
N
,
A
,
lda
,
ipiv
);
}
#ifndef PADDLE_USE_EIGEN_FOR_BLAS
template
<
>
void
axpy
<
float
>
(
const
int
n
,
const
float
alpha
,
const
float
*
x
,
float
*
y
)
{
cblas_saxpy
(
n
,
alpha
,
x
,
1
,
y
,
1
);
...
...
@@ -201,6 +204,7 @@ template <>
double
dotProduct
<
double
>
(
const
int
n
,
const
double
*
x
,
const
double
*
y
)
{
return
cblas_ddot
(
n
,
x
,
1
,
y
,
1
);
}
#endif
#if defined(PADDLE_USE_MKL) || defined(PADDLE_USE_MKLML)
...
...
paddle/math/MathFunctions.h
浏览文件 @
8a4fad42
...
...
@@ -40,7 +40,14 @@ extern "C" {
#ifndef LAPACK_FOUND
extern
"C"
{
#ifndef PADDLE_USE_EIGEN_FOR_BLAS
#include <cblas.h>
#else
typedef
enum
CBLAS_ORDER
{
CblasRowMajor
=
101
,
CblasColMajor
=
102
}
CBLAS_ORDER
;
#endif
int
LAPACKE_sgetrf
(
int
matrix_layout
,
int
m
,
int
n
,
float
*
a
,
int
lda
,
int
*
ipiv
);
int
LAPACKE_dgetrf
(
...
...
@@ -56,6 +63,7 @@ int LAPACKE_dgetri(
namespace
paddle
{
#ifndef PADDLE_USE_EIGEN_FOR_BLAS
template
<
class
T
>
void
gemm
(
const
CBLAS_TRANSPOSE
transA
,
const
CBLAS_TRANSPOSE
transB
,
...
...
@@ -70,6 +78,7 @@ void gemm(const CBLAS_TRANSPOSE transA,
const
T
beta
,
T
*
C
,
const
int
ldc
);
#endif
template
<
class
T
>
int
getrf
(
const
CBLAS_ORDER
Order
,
...
...
@@ -84,10 +93,20 @@ int getri(
const
CBLAS_ORDER
Order
,
const
int
N
,
T
*
A
,
const
int
lda
,
const
int
*
ipiv
);
template
<
class
T
>
void
axpy
(
const
int
n
,
const
T
alpha
,
const
T
*
x
,
T
*
y
);
void
axpy
(
const
int
n
,
const
T
alpha
,
const
T
*
x
,
T
*
y
)
{
/// y = y + alpha * x
for
(
int
i
=
0
;
i
<
n
;
i
++
)
{
y
[
i
]
=
y
[
i
]
+
alpha
*
x
[
i
];
}
}
template
<
class
T
>
T
dotProduct
(
const
int
n
,
const
T
*
x
,
const
T
*
y
);
T
dotProduct
(
const
int
n
,
const
T
*
x
,
const
T
*
y
)
{
T
result
=
static_cast
<
T
>
(
0
);
for
(
int
i
=
0
;
i
<
n
;
i
++
)
{
result
+=
x
[
i
]
*
y
[
i
];
}
}
template
<
class
T
>
void
vExp
(
const
int
n
,
const
T
*
a
,
T
*
r
);
...
...
paddle/math/Matrix.cpp
浏览文件 @
8a4fad42
...
...
@@ -28,6 +28,7 @@ limitations under the License. */
#include "hl_top_k.h"
#include "paddle/utils/Logging.h"
#include "paddle/function/GemmFunctor.h"
#include "paddle/utils/ThreadLocal.h"
#include "SIMDFunctions.h"
...
...
@@ -2222,24 +2223,29 @@ void CpuMatrix::mul(CpuMatrix* a, CpuMatrix* b, real scaleAB, real scaleT) {
CHECK
(
!
isTransposed
())
<<
"Not supported"
;
size_t
a_col
,
b_col
,
a_row
,
b_row
;
CBLAS_TRANSPOSE
a_trans
,
b_trans
;
// CBLAS_TRANSPOSE a_trans, b_trans;
bool
a_trans
,
b_trans
;
if
(
!
a
->
isTransposed
())
{
a_col
=
a
->
getWidth
();
a_row
=
a
->
getHeight
();
a_trans
=
CblasNoTrans
;
// a_trans = CblasNoTrans;
a_trans
=
false
;
}
else
{
a_col
=
a
->
getHeight
();
a_row
=
a
->
getWidth
();
a_trans
=
CblasTrans
;
// a_trans = CblasTrans;
a_trans
=
true
;
}
if
(
!
b
->
isTransposed
())
{
b_col
=
b
->
getWidth
();
b_row
=
b
->
getHeight
();
b_trans
=
CblasNoTrans
;
// b_trans = CblasNoTrans;
b_trans
=
false
;
}
else
{
b_col
=
b
->
getHeight
();
b_row
=
b
->
getWidth
();
b_trans
=
CblasTrans
;
// b_trans = CblasTrans;
b_trans
=
true
;
}
CHECK_EQ
(
a_col
,
b_row
);
...
...
@@ -2256,7 +2262,7 @@ void CpuMatrix::mul(CpuMatrix* a, CpuMatrix* b, real scaleAB, real scaleT) {
int
lda
=
a
->
getStride
();
int
ldb
=
b
->
getStride
();
int
ldc
=
getStride
();
gemm
<
real
>
(
BlasGemm
<
DEVICE_TYPE_CPU
,
real
>::
compute
(
a_trans
,
b_trans
,
M
,
N
,
K
,
scaleAB
,
A
,
lda
,
B
,
ldb
,
scaleT
,
C
,
ldc
);
}
...
...
paddle/scripts/docker/build_android.sh
浏览文件 @
8a4fad42
...
...
@@ -2,9 +2,9 @@
set
-xe
mkdir
-p
/paddle/build_android/
$ANDROID_ABI
cd
/paddle/build_android/
$ANDROID_ABI
rm
-rf
/paddle/install 2>/dev/null
||
true
rm
-rf
/paddle/build_android 2>/dev/null
||
true
mkdir
-p
/paddle/build_android
cd
/paddle/build_android
THIRD_PARTY_PATH
=
/paddle/third_party_android/
$ANDROID_ABI
...
...
@@ -14,19 +14,25 @@ if [ $ANDROID_ABI == "armeabi-v7a" ]; then
-DANDROID_ABI
=
$ANDROID_ABI
\
-DANDROID_ARM_NEON
=
ON
\
-DANDROID_ARM_MODE
=
ON
\
-DCMAKE_C_COMPILER
=
$ANDROID_ARM_STANDALONE_TOOLCHAIN
/bin/arm-linux-androideabi-clang
\
-DCMAKE_CXX_COMPILER
=
$ANDROID_ARM_STANDALONE_TOOLCHAIN
/bin/arm-linux-androideabi-clang++
\
-DHOST_C_COMPILER
=
/usr/bin/gcc
\
-DHOST_CXX_COMPILER
=
/usr/bin/g++
\
-DCMAKE_INSTALL_PREFIX
=
/paddle/install
\
-DTHIRD_PARTY_PATH
=
$THIRD_PARTY_PATH
\
-DCMAKE_BUILD_TYPE
=
Release
\
-DUSE_EIGEN_FOR_BLAS
=
ON
\
-DWITH_C_API
=
ON
\
-DWITH_SWIG_PY
=
OFF
\
/paddle
elif
[
$ANDROID_ABI
==
"arm64-v7a"
]
;
then
-DWITH_STYLE_CHECK
=
OFF
\
..
elif
[
$ANDROID_ABI
==
"arm64-v8a"
]
;
then
cmake
-DCMAKE_SYSTEM_NAME
=
Android
\
-DANDROID_STANDALONE_TOOLCHAIN
=
$ANDROID_ARM64_STANDALONE_TOOLCHAIN
\
-DANDROID_ABI
=
$ANDROID_ABI
\
-DANDROID_ARM_MODE
=
ON
\
-DCMAKE_C_COMPILER
=
$ANDROID_ARM64_STANDALONE_TOOLCHAIN
/bin/aarch64-linux-android-clang
\
-DCMAKE_CXX_COMPILER
=
$ANDROID_ARM64_STANDALONE_TOOLCHAIN
/bin/aarch64-linux-android-clang++
\
-DHOST_C_COMPILER
=
/usr/bin/gcc
\
-DHOST_CXX_COMPILER
=
/usr/bin/g++
\
-DCMAKE_INSTALL_PREFIX
=
/paddle/install
\
...
...
@@ -34,7 +40,7 @@ elif [ $ANDROID_ABI == "arm64-v7a" ]; then
-DCMAKE_BUILD_TYPE
=
Release
\
-DWITH_C_API
=
ON
\
-DWITH_SWIG_PY
=
OFF
\
/paddle
..
elif
[
$ANDROID_ABI
==
"armeabi"
]
;
then
cmake
-DCMAKE_SYSTEM_NAME
=
Android
\
-DANDROID_STANDALONE_TOOLCHAIN
=
$ANDROID_ARM_STANDALONE_TOOLCHAIN
\
...
...
@@ -47,10 +53,10 @@ elif [ $ANDROID_ABI == "armeabi" ]; then
-DCMAKE_BUILD_TYPE
=
Release
\
-DWITH_C_API
=
ON
\
-DWITH_SWIG_PY
=
OFF
\
/paddle
..
else
echo
"Invalid ANDROID_ABI:
$ANDROID_ABI
"
fi
make
-j
`
nproc
`
make
install
-j
`
nproc
`
make
VERBOSE
=
1
make
install
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录