Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
机器未来
Paddle
提交
2df8eec5
P
Paddle
项目概览
机器未来
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1
Issue
1
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
2df8eec5
编写于
1月 16, 2017
作者:
X
xutianbing
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
Pass Unit test for GpuMatrix::mul(GpuMatrix, GpuMatrix) and CpuMatrix::mul(CpuMatrix, CpuMatrix)
上级
1f0cbcf3
变更
6
隐藏空白更改
内联
并排
Showing
6 changed file
with
88 addition
and
48 deletion
+88
-48
paddle/function/BufferArg.cpp
paddle/function/BufferArg.cpp
+2
-4
paddle/function/BufferArg.h
paddle/function/BufferArg.h
+7
-10
paddle/function/MulOp.cpp
paddle/function/MulOp.cpp
+6
-6
paddle/function/MulOp.h
paddle/function/MulOp.h
+7
-0
paddle/function/MulOpGpu.cu
paddle/function/MulOpGpu.cu
+9
-0
paddle/function/MulOpTest.cpp
paddle/function/MulOpTest.cpp
+57
-28
未找到文件。
paddle/function/BufferArg.cpp
浏览文件 @
2df8eec5
...
@@ -32,16 +32,14 @@ const SparseMatrixArg& BufferArg::sparse() const {
...
@@ -32,16 +32,14 @@ const SparseMatrixArg& BufferArg::sparse() const {
SparseMatrixArg
::
SparseMatrixArg
(
const
CpuSparseMatrix
&
sparse
,
ArgType
argType
)
SparseMatrixArg
::
SparseMatrixArg
(
const
CpuSparseMatrix
&
sparse
,
ArgType
argType
)
:
BufferArg
(
sparse
,
argType
),
:
BufferArg
(
sparse
,
argType
),
row_
(
reinterpret_cast
<
void
*>
(
sparse
.
getRows
()),
VALUE_TYPE_INT32
),
row_
(
reinterpret_cast
<
void
*>
(
sparse
.
getRows
()),
VALUE_TYPE_INT32
),
col_
(
reinterpret_cast
<
void
*>
(
sparse
.
getCols
()),
VALUE_TYPE_INT32
),
col_
(
reinterpret_cast
<
void
*>
(
sparse
.
getCols
()),
VALUE_TYPE_INT32
)
{
trans_
(
const_cast
<
CpuSparseMatrix
&>
(
sparse
).
getTranspose
())
{
bufferType_
=
TENSOR_SPARSE
;
bufferType_
=
TENSOR_SPARSE
;
}
}
SparseMatrixArg
::
SparseMatrixArg
(
const
GpuSparseMatrix
&
sparse
,
ArgType
argType
)
SparseMatrixArg
::
SparseMatrixArg
(
const
GpuSparseMatrix
&
sparse
,
ArgType
argType
)
:
BufferArg
(
sparse
,
argType
),
:
BufferArg
(
sparse
,
argType
),
row_
(
reinterpret_cast
<
void
*>
(
sparse
.
getRows
()),
VALUE_TYPE_INT32
),
row_
(
reinterpret_cast
<
void
*>
(
sparse
.
getRows
()),
VALUE_TYPE_INT32
),
col_
(
reinterpret_cast
<
void
*>
(
sparse
.
getCols
()),
VALUE_TYPE_INT32
),
col_
(
reinterpret_cast
<
void
*>
(
sparse
.
getCols
()),
VALUE_TYPE_INT32
)
{
trans_
(
const_cast
<
GpuSparseMatrix
&>
(
sparse
).
getTranspose
())
{
bufferType_
=
TENSOR_SPARSE
;
bufferType_
=
TENSOR_SPARSE
;
}
}
...
...
paddle/function/BufferArg.h
浏览文件 @
2df8eec5
...
@@ -98,7 +98,8 @@ public:
...
@@ -98,7 +98,8 @@ public:
const_cast
<
void
*>
(
reinterpret_cast
<
const
void
*>
(
matrix
.
getData
()))),
const_cast
<
void
*>
(
reinterpret_cast
<
const
void
*>
(
matrix
.
getData
()))),
valueType_
(
DataType
<
real
>::
value
),
valueType_
(
DataType
<
real
>::
value
),
shape_
(
2
),
shape_
(
2
),
argType_
(
argType
)
{
argType_
(
argType
),
trans_
(
matrix
.
isTransposed
())
{
bufferType_
=
TENSOR_NORMAL
;
bufferType_
=
TENSOR_NORMAL
;
shape_
.
setDim
(
0
,
matrix
.
getHeight
());
shape_
.
setDim
(
0
,
matrix
.
getHeight
());
shape_
.
setDim
(
1
,
matrix
.
getWidth
());
shape_
.
setDim
(
1
,
matrix
.
getWidth
());
...
@@ -111,7 +112,8 @@ public:
...
@@ -111,7 +112,8 @@ public:
const_cast
<
void
*>
(
reinterpret_cast
<
const
void
*>
(
matrix
.
getData
()))),
const_cast
<
void
*>
(
reinterpret_cast
<
const
void
*>
(
matrix
.
getData
()))),
valueType_
(
DataType
<
real
>::
value
),
valueType_
(
DataType
<
real
>::
value
),
shape_
(
shape
),
shape_
(
shape
),
argType_
(
argType
)
{
argType_
(
argType
),
trans_
(
matrix
.
isTransposed
())
{
bufferType_
=
TENSOR_NORMAL
;
bufferType_
=
TENSOR_NORMAL
;
CHECK_EQ
(
matrix
.
getElementCnt
(),
shape
.
getElements
());
CHECK_EQ
(
matrix
.
getElementCnt
(),
shape
.
getElements
());
}
}
...
@@ -143,7 +145,7 @@ public:
...
@@ -143,7 +145,7 @@ public:
// CHECK(deviceType_ == DType);
// CHECK(deviceType_ == DType);
CHECK_EQ
((
size_t
)
2
,
shape_
.
ndims
());
CHECK_EQ
((
size_t
)
2
,
shape_
.
ndims
());
return
typename
Tensor
<
real
,
DType
>::
Matrix
(
return
typename
Tensor
<
real
,
DType
>::
Matrix
(
reinterpret_cast
<
real
*>
(
buf_
),
shape_
[
0
],
shape_
[
1
]);
reinterpret_cast
<
real
*>
(
buf_
),
shape_
[
0
],
shape_
[
1
]
,
trans_
);
}
}
template
<
typename
VType
,
DeviceType
DType
>
template
<
typename
VType
,
DeviceType
DType
>
...
@@ -179,6 +181,7 @@ protected:
...
@@ -179,6 +181,7 @@ protected:
TensorShape
shape_
;
TensorShape
shape_
;
BufferType
bufferType_
{
TENSOR_UNKNOWN
};
BufferType
bufferType_
{
TENSOR_UNKNOWN
};
ArgType
argType_
{
UNSPECIFIED
};
ArgType
argType_
{
UNSPECIFIED
};
bool
trans_
{
false
};
// leading dimensions. The size is dims_.size()
// leading dimensions. The size is dims_.size()
// Dims lds_;
// Dims lds_;
};
};
...
@@ -271,15 +274,13 @@ public:
...
@@ -271,15 +274,13 @@ public:
size_t
nnz
,
size_t
nnz
,
SparseDataFormat
format
,
SparseDataFormat
format
,
SparseDataType
type
,
SparseDataType
type
,
bool
trans
=
false
,
ArgType
argType
=
UNSPECIFIED
)
ArgType
argType
=
UNSPECIFIED
)
:
BufferArg
(
buf
,
valueType
,
shape
,
argType
),
:
BufferArg
(
buf
,
valueType
,
shape
,
argType
),
row_
(
row
),
row_
(
row
),
col_
(
col
),
col_
(
col
),
nnz_
(
nnz
),
nnz_
(
nnz
),
format_
(
format
),
format_
(
format
),
type_
(
type
),
type_
(
type
)
{
trans_
(
trans
)
{
bufferType_
=
TENSOR_SPARSE
;
bufferType_
=
TENSOR_SPARSE
;
CHECK
((
valueType
==
VALUE_TYPE_FLOAT
)
||
(
valueType
==
VALUE_TYPE_DOUBLE
));
CHECK
((
valueType
==
VALUE_TYPE_FLOAT
)
||
(
valueType
==
VALUE_TYPE_DOUBLE
));
CHECK_EQ
(
shape_
.
ndims
(),
(
size_t
)
2
);
CHECK_EQ
(
shape_
.
ndims
(),
(
size_t
)
2
);
...
@@ -322,8 +323,6 @@ public:
...
@@ -322,8 +323,6 @@ public:
size_t
nnz
()
const
{
return
nnz_
;
}
size_t
nnz
()
const
{
return
nnz_
;
}
bool
isTranspose
()
const
{
return
trans_
;
}
SparseDataFormat
dataFormat
()
const
{
return
format_
;
}
SparseDataFormat
dataFormat
()
const
{
return
format_
;
}
SparseDataType
dataType
()
const
{
return
type_
;
}
SparseDataType
dataType
()
const
{
return
type_
;
}
...
@@ -334,8 +333,6 @@ private:
...
@@ -334,8 +333,6 @@ private:
size_t
nnz_
;
size_t
nnz_
;
SparseDataFormat
format_
;
SparseDataFormat
format_
;
SparseDataType
type_
;
SparseDataType
type_
;
/// todo(tianbing), move trans_ up to BufferArg
bool
trans_
;
};
};
}
// namespace paddle
}
// namespace paddle
paddle/function/MulOp.cpp
浏览文件 @
2df8eec5
...
@@ -483,8 +483,8 @@ template <DeviceType Device>
...
@@ -483,8 +483,8 @@ template <DeviceType Device>
class
MulFunc
:
public
FunctionBase
{
class
MulFunc
:
public
FunctionBase
{
public:
public:
void
init
(
const
FuncConfig
&
config
)
override
{
void
init
(
const
FuncConfig
&
config
)
override
{
scaleAB
_
=
config
.
get
<
real
>
(
"scaleAB"
);
alpha
_
=
config
.
get
<
real
>
(
"scaleAB"
);
scaleT
_
=
config
.
get
<
real
>
(
"scaleT"
);
beta
_
=
config
.
get
<
real
>
(
"scaleT"
);
}
}
void
calc
(
const
BufferArgs
&
inputs
,
const
BufferArgs
&
outputs
)
override
{
void
calc
(
const
BufferArgs
&
inputs
,
const
BufferArgs
&
outputs
)
override
{
...
@@ -494,7 +494,7 @@ public:
...
@@ -494,7 +494,7 @@ public:
CHECK_EQ
(
inputs
[
0
].
shape
().
ndims
(),
(
size_t
)
2
);
CHECK_EQ
(
inputs
[
0
].
shape
().
ndims
(),
(
size_t
)
2
);
CHECK_EQ
(
inputs
[
1
].
shape
().
ndims
(),
(
size_t
)
2
);
CHECK_EQ
(
inputs
[
1
].
shape
().
ndims
(),
(
size_t
)
2
);
CHECK_EQ
(
outputs
[
0
].
shape
().
ndims
(),
(
size_t
)
2
);
CHECK_EQ
(
outputs
[
0
].
shape
().
ndims
(),
(
size_t
)
2
);
CHECK_EQ
(
outputs
[
0
].
getArgType
(),
A
SSIGN
_TO
);
CHECK_EQ
(
outputs
[
0
].
getArgType
(),
A
DD
_TO
);
auto
in1_mat
=
inputs
[
0
].
matrix
<
Device
>
();
auto
in1_mat
=
inputs
[
0
].
matrix
<
Device
>
();
if
(
inputs
[
0
].
isSparseArg
())
{
if
(
inputs
[
0
].
isSparseArg
())
{
...
@@ -505,12 +505,12 @@ public:
...
@@ -505,12 +505,12 @@ public:
in2_mat
=
inputs
[
1
].
sparse
().
SparseMatrix
<
Device
>
();
in2_mat
=
inputs
[
1
].
sparse
().
SparseMatrix
<
Device
>
();
}
}
auto
out_mat
=
outputs
[
0
].
matrix
<
Device
>
();
auto
out_mat
=
outputs
[
0
].
matrix
<
Device
>
();
MulOp
<
Device
>
(
out_mat
,
in1_mat
,
in2_mat
,
scaleAB_
,
scaleT
_
);
MulOp
<
Device
>
(
out_mat
,
in1_mat
,
in2_mat
,
alpha_
,
beta
_
);
}
}
private:
private:
real
scaleAB
_
;
real
alpha
_
;
real
scaleT
_
;
real
beta
_
;
};
};
REGISTER_TYPED_FUNC
(
MulOp
,
CPU
,
MulFunc
);
REGISTER_TYPED_FUNC
(
MulOp
,
CPU
,
MulFunc
);
...
...
paddle/function/MulOp.h
浏览文件 @
2df8eec5
...
@@ -68,4 +68,11 @@ void MulOp(GpuMatrix& out,
...
@@ -68,4 +68,11 @@ void MulOp(GpuMatrix& out,
real
scaleAB
,
real
scaleAB
,
real
scaleT
);
real
scaleT
);
template
<
DeviceType
DType
>
void
MulOp
(
GpuSparseMatrix
&
out
,
const
GpuMatrix
&
a
,
const
GpuMatrix
&
b
,
real
scaleAB
,
real
scaleT
);
}
// namespace paddle
}
// namespace paddle
paddle/function/MulOpGpu.cu
浏览文件 @
2df8eec5
...
@@ -170,4 +170,13 @@ void MulOp<DEVICE_TYPE_GPU>(GpuMatrix& out,
...
@@ -170,4 +170,13 @@ void MulOp<DEVICE_TYPE_GPU>(GpuMatrix& out,
}
}
}
}
template
<
>
void
MulOp
<
DEVICE_TYPE_GPU
>
(
GpuSparseMatrix
&
out
,
const
GpuMatrix
&
a
,
const
GpuMatrix
&
b
,
real
scale_ab
,
real
scale_t
)
{
/// todo(tianbing), implement it
}
}
// namespace paddle
}
// namespace paddle
paddle/function/MulOpTest.cpp
浏览文件 @
2df8eec5
...
@@ -16,50 +16,79 @@ limitations under the License. */
...
@@ -16,50 +16,79 @@ limitations under the License. */
#include "FunctionTest.h"
#include "FunctionTest.h"
#include "paddle/math/Matrix.h"
#include "paddle/math/Matrix.h"
#include "paddle/math/SparseMatrix.h"
#include "paddle/math/SparseMatrix.h"
#include "paddle/math/tests/test_matrixUtil.h"
#include "paddle/testing/TestUtil.h"
#include "paddle/testing/TestUtil.h"
using
namespace
paddle
;
// NOLINT
using
namespace
paddle
;
// NOLINT
void
testSpMatrixMul
(
int
M
,
int
N
,
int
K
,
real
rate
,
real
scale1
,
real
scale2
)
{
/**
/// todo(tianbing) check CPU/GPU
* C = alpha * C + beta * (A * B)
*/
void
testMatrixMul
(
bool
transa
,
bool
transb
,
int
dimM
,
int
dimN
,
int
dimK
)
{
real
alpha
=
1.5
;
real
beta
=
2.0
;
const
auto
cpuFunc
=
FunctionBase
::
funcRegistrar_
.
createByType
(
"MulOp-CPU"
);
cpuFunc
->
init
(
FuncConfig
().
set
(
"scaleAB"
,
alpha
).
set
(
"scaleT"
,
beta
));
const
auto
gpuFunc
=
FunctionBase
::
funcRegistrar_
.
createByType
(
"MulOp-GPU"
);
const
auto
gpuFunc
=
FunctionBase
::
funcRegistrar_
.
createByType
(
"MulOp-GPU"
);
gpuFunc
->
init
(
FuncConfig
().
set
(
"scaleAB"
,
scale1
).
set
(
"scaleT"
,
scale2
));
gpuFunc
->
init
(
FuncConfig
().
set
(
"scaleAB"
,
alpha
).
set
(
"scaleT"
,
beta
));
int
nnz
=
M
*
N
*
rate
;
int
heightA
=
(
transa
==
false
)
?
dimM
:
dimK
;
MatrixPtr
cpuA
=
std
::
make_shared
<
CpuMatrix
>
(
M
,
K
);
int
widthA
=
(
transa
==
false
)
?
dimK
:
dimM
;
MatrixPtr
cpuB
=
std
::
make_shared
<
CpuMatrix
>
(
N
,
K
);
int
heightB
=
(
transb
==
false
)
?
dimK
:
dimN
;
MatrixPtr
cpuC
(
new
CpuSparseMatrix
(
M
,
N
,
nnz
));
int
widthB
=
(
transb
==
false
)
?
dimN
:
dimK
;
int
heightC
=
dimM
;
int
widthC
=
dimN
;
MatrixPtr
gpuA
=
std
::
make_shared
<
GpuMatrix
>
(
M
,
K
);
auto
cpuA
=
std
::
make_shared
<
CpuMatrix
>
(
heightA
,
widthA
,
transa
);
MatrixPtr
gpuB
=
std
::
make_shared
<
GpuMatrix
>
(
N
,
K
);
auto
cpuB
=
std
::
make_shared
<
CpuMatrix
>
(
heightB
,
widthB
,
transb
);
MatrixPtr
gpuC
(
new
GpuSparseMatrix
(
M
,
N
,
nnz
));
auto
cpuC
=
std
::
make_shared
<
CpuMatrix
>
(
heightC
,
widthC
);
auto
gpuA
=
std
::
make_shared
<
GpuMatrix
>
(
heightA
,
widthA
,
transa
);
auto
gpuB
=
std
::
make_shared
<
GpuMatrix
>
(
heightB
,
widthB
,
transb
);
auto
gpuC
=
std
::
make_shared
<
GpuMatrix
>
(
heightC
,
widthC
);
cpuA
->
randomizeUniform
();
cpuA
->
randomizeUniform
();
cpuB
->
randomizeUniform
();
cpuB
->
randomizeUniform
();
cpuC
->
randomizeUniform
();
cpuC
->
randomizeUniform
();
gpuA
->
copyFrom
(
*
cpuA
);
gpuB
->
copyFrom
(
*
cpuB
);
gpuC
->
copyFrom
(
*
cpuC
);
hl_stream_t
stream
(
HPPL_STREAM_3
);
BufferArgs
cpuInputs
;
gpuA
->
copyFrom
(
*
cpuA
,
stream
);
BufferArgs
cpuOutputs
;
gpuB
->
copyFrom
(
*
cpuB
,
stream
);
cpuInputs
.
addArg
(
*
cpuA
);
gpuC
->
copyFrom
(
*
cpuC
,
stream
);
cpuInputs
.
addArg
(
*
cpuB
);
hl_stream_synchronize
(
stream
);
cpuOutputs
.
addArg
(
*
cpuC
,
ADD_TO
);
cpuFunc
->
calc
(
cpuInputs
,
cpuOutputs
);
BufferArgs
inputs
;
BufferArgs
gpuInputs
;
BufferArgs
outputs
;
BufferArgs
gpuOutputs
;
inputs
.
addArg
(
*
gpuA
->
getTranspose
());
gpuInputs
.
addArg
(
*
gpuA
);
inputs
.
addArg
(
*
gpuB
->
getTranspose
());
gpuInputs
.
addArg
(
*
gpuB
);
outputs
.
addArg
(
*
gpuC
,
ASSIGN_TO
);
gpuOutputs
.
addArg
(
*
gpuC
,
ADD_TO
);
gpuFunc
->
calc
(
gpuInputs
,
gpuOutputs
);
gpuFunc
->
calc
(
inputs
,
outputs
);
autotest
::
TensorCheckErr
(
*
cpuC
,
*
gpuC
);
}
}
TEST
(
SMatrix
,
sMatrixMul
)
{
TEST
(
Matrix
,
mul
)
{
for
(
auto
M
:
{
1
,
40
,
128
,
200
})
{
for
(
auto
transa
:
{
false
,
true
})
{
for
(
auto
N
:
{
100
})
{
for
(
auto
transb
:
{
false
,
true
})
{
for
(
auto
K
:
{
100
})
{
for
(
auto
dimM
:
{
1
,
10
,
100
})
{
/// todo(tianbing), add scaleAB and scaleT
for
(
auto
dimN
:
{
1
,
10
})
{
VLOG
(
3
)
<<
" M="
<<
M
<<
" N="
<<
N
<<
" K="
<<
K
;
for
(
auto
dimK
:
{
8
})
{
testSpMatrixMul
(
M
,
N
,
K
,
0.05
,
1
,
1
);
if
(
true
==
transa
&&
true
==
transb
)
{
continue
;
}
VLOG
(
3
)
<<
setiosflags
(
std
::
ios
::
left
)
<<
std
::
setfill
(
' '
)
<<
" transa="
<<
transa
<<
" transb="
<<
transb
<<
" dimM="
<<
std
::
setw
(
5
)
<<
dimM
<<
" dimN="
<<
std
::
setw
(
5
)
<<
dimN
<<
" dimK="
<<
std
::
setw
(
5
)
<<
dimK
;
testMatrixMul
(
transa
,
transb
,
dimM
,
dimN
,
dimK
);
}
}
}
}
}
}
}
}
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录