Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
机器未来
Paddle
提交
999cd14a
P
Paddle
项目概览
机器未来
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1
Issue
1
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
999cd14a
编写于
1月 23, 2017
作者:
X
xutianbing
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
Further address Daoyuan's comments, clean the code.
上级
b3be7358
变更
9
隐藏空白更改
内联
并排
Showing
9 changed file
with
147 addition
and
131 deletion
+147
-131
paddle/function/BufferArg.cpp
paddle/function/BufferArg.cpp
+4
-4
paddle/function/BufferArg.h
paddle/function/BufferArg.h
+37
-32
paddle/function/FunctionTest.h
paddle/function/FunctionTest.h
+36
-35
paddle/function/MulOp.cpp
paddle/function/MulOp.cpp
+37
-12
paddle/function/MulOp.h
paddle/function/MulOp.h
+8
-3
paddle/function/MulOpGpu.cu
paddle/function/MulOpGpu.cu
+19
-43
paddle/function/MulOpTest.cpp
paddle/function/MulOpTest.cpp
+0
-2
paddle/function/TensorType.h
paddle/function/TensorType.h
+4
-0
paddle/math/Matrix.h
paddle/math/Matrix.h
+2
-0
未找到文件。
paddle/function/BufferArg.cpp
浏览文件 @
999cd14a
...
...
@@ -34,8 +34,8 @@ SparseMatrixArg::SparseMatrixArg(const CpuSparseMatrix& sparse, ArgType argType)
row_
(
reinterpret_cast
<
void
*>
(
sparse
.
getRows
()),
VALUE_TYPE_INT32
),
col_
(
reinterpret_cast
<
void
*>
(
sparse
.
getCols
()),
VALUE_TYPE_INT32
),
nnz_
(
sparse
.
getElementCnt
()),
format_
(
s
parse
.
getFormat
(
)),
type_
(
s
parse
.
getValueType
(
))
{
format_
(
s
tatic_cast
<
SparseDataFormat
>
(
sparse
.
getFormat
()
)),
type_
(
s
tatic_cast
<
SparseDataType
>
(
sparse
.
getValueType
()
))
{
bufferType_
=
TENSOR_SPARSE
;
}
...
...
@@ -44,8 +44,8 @@ SparseMatrixArg::SparseMatrixArg(const GpuSparseMatrix& sparse, ArgType argType)
row_
(
reinterpret_cast
<
void
*>
(
sparse
.
getRows
()),
VALUE_TYPE_INT32
),
col_
(
reinterpret_cast
<
void
*>
(
sparse
.
getCols
()),
VALUE_TYPE_INT32
),
nnz_
(
sparse
.
getElementCnt
()),
format_
(
s
parse
.
getFormat
(
)),
type_
(
s
parse
.
getValueType
(
))
{
format_
(
s
tatic_cast
<
SparseDataFormat
>
(
sparse
.
getFormat
()
)),
type_
(
s
tatic_cast
<
SparseDataType
>
(
sparse
.
getValueType
()
))
{
bufferType_
=
TENSOR_SPARSE
;
}
...
...
paddle/function/BufferArg.h
浏览文件 @
999cd14a
...
...
@@ -72,19 +72,21 @@ public:
BufferArg
(
ValueType
valueType
,
const
TensorShape
&
shape
,
ArgType
argType
=
UNSPECIFIED
)
:
buf_
(
nullptr
),
valueType_
(
valueType
),
shape_
(
shape
),
argType_
(
argType
)
{}
:
buf_
(
nullptr
),
valueType_
(
valueType
),
shape_
(
shape
),
argType_
(
argType
)
{
bufferType_
=
TENSOR_NORMAL
;
}
BufferArg
(
void
*
buf
,
ValueType
valueType
,
const
TensorShape
&
shape
,
ArgType
argType
=
UNSPECIFIED
)
:
buf_
(
buf
),
valueType_
(
valueType
),
shape_
(
shape
),
argType_
(
argType
)
{}
:
buf_
(
buf
),
valueType_
(
valueType
),
shape_
(
shape
),
argType_
(
argType
)
{
bufferType_
=
TENSOR_NORMAL
;
}
BufferArg
(
void
*
buf
,
ValueType
valueType
)
:
buf_
(
buf
),
valueType_
(
valueType
)
{}
BufferArg
(
void
*
buf
,
ValueType
valueType
)
:
buf_
(
buf
),
valueType_
(
valueType
)
{
bufferType_
=
TENSOR_NORMAL
;
}
BufferArg
(
const
Matrix
&
matrix
,
ArgType
argType
=
UNSPECIFIED
)
:
buf_
(
...
...
@@ -173,7 +175,7 @@ protected:
TensorShape
shape_
;
BufferType
bufferType_
{
TENSOR_UNKNOWN
};
ArgType
argType_
{
UNSPECIFIED
};
//
todo
(tianbing), add deviceType_
//
TODO
(tianbing), add deviceType_
// leading dimensions. The size is dims_.size()
// Dims lds_;
};
...
...
@@ -186,6 +188,7 @@ class SequenceIdArg : public BufferArg {
public:
SequenceIdArg
(
const
TensorShape
&
shape
,
ArgType
argType
=
UNSPECIFIED
)
:
BufferArg
(
VALUE_TYPE_INT32
,
shape
,
argType
)
{
bufferType_
=
TENSOR_SEQUENCE_ID
;
CHECK_EQ
(
shape_
.
ndims
(),
(
size_t
)
1
);
CHECK_GT
(
shape_
[
0
],
1
);
numSeqs_
=
shape_
[
0
]
-
1
;
...
...
@@ -223,7 +226,9 @@ public:
SequenceArg
(
ValueType
valueType
,
const
TensorShape
&
shape
,
ArgType
argType
=
UNSPECIFIED
)
:
BufferArg
(
valueType
,
shape
,
argType
),
startPositions_
(
TensorShape
())
{}
:
BufferArg
(
valueType
,
shape
,
argType
),
startPositions_
(
TensorShape
())
{
bufferType_
=
TENSOR_SEQUENCE_DATA
;
}
SequenceArg
(
void
*
buf
,
ValueType
valueType
,
...
...
@@ -271,16 +276,16 @@ public:
row_
(
row
),
col_
(
col
),
nnz_
(
nnz
),
format_
(
format
),
type_
(
type
)
{
format_
(
static_cast
<
SparseDataFormat
>
(
format
)
),
type_
(
static_cast
<
SparseDataType
>
(
type
)
)
{
bufferType_
=
TENSOR_SPARSE
;
CHECK
((
valueType
==
VALUE_TYPE_FLOAT
)
||
(
valueType
==
VALUE_TYPE_DOUBLE
));
CHECK_EQ
(
shape_
.
ndims
(),
(
size_t
)
2
);
CHECK_EQ
(
row_
.
shape
().
ndims
(),
(
size_t
)
1
);
CHECK_EQ
(
col_
.
shape
().
ndims
(),
(
size_t
)
1
);
if
(
format
==
SPARSE_CSR
)
{
if
(
format
_
==
T_
SPARSE_CSR
)
{
CHECK_EQ
(
nnz
,
col
.
shape
()[
0
]);
}
else
if
(
format
==
SPARSE_CSC
)
{
}
else
if
(
format
_
==
T_
SPARSE_CSC
)
{
CHECK_EQ
(
nnz
,
row
.
shape
()[
0
]);
}
}
...
...
@@ -292,23 +297,23 @@ public:
SparseValueType
type
,
ArgType
argType
=
UNSPECIFIED
)
:
BufferArg
(
valueType
,
shape
,
argType
),
/// len of row_ : height + 1 (CSR), buf_ == nullptr
row_
(
format
==
SPARSE_CSR
?
BufferArg
(
VALUE_TYPE_INT32
,
TensorShape
{
shape
[
0
]
+
1
})
:
BufferArg
(
VALUE_TYPE_INT32
,
TensorShape
{
nnz
})),
/// len of col_ : width + 1 (CSC), buf_ == nullptr
col_
(
format
==
SPARSE_CSR
?
BufferArg
(
VALUE_TYPE_INT32
,
TensorShape
{
nnz
})
:
BufferArg
(
VALUE_TYPE_INT32
,
TensorShape
{
shape
[
1
]
+
1
})),
row_
(
BufferArg
(
nullptr
,
VALUE_TYPE_INT32
)),
col_
(
BufferArg
(
nullptr
,
VALUE_TYPE_INT32
)),
nnz_
(
nnz
),
format_
(
format
),
type_
(
type
)
{
format_
(
static_cast
<
SparseDataFormat
>
(
format
)
),
type_
(
static_cast
<
SparseDataType
>
(
type
)
)
{
bufferType_
=
TENSOR_SPARSE
;
/// todo(tianbing)
/// valueType and shape_.ndims() == 2 need to check before
/// this constructor to make sure row_ and col_ are right
CHECK
((
valueType
==
VALUE_TYPE_FLOAT
)
||
(
valueType
==
VALUE_TYPE_DOUBLE
));
CHECK_EQ
(
shape_
.
ndims
(),
(
size_t
)
2
);
/// len of row_ : height + 1 (CSR) or nnz (CSC), buf_ == nullptr
row_
=
(
format_
==
T_SPARSE_CSR
?
BufferArg
(
VALUE_TYPE_INT32
,
TensorShape
{
shape_
[
0
]
+
1
})
:
BufferArg
(
VALUE_TYPE_INT32
,
TensorShape
{
nnz
}));
/// len of col_ : width + 1 (CSC) or nnz (CSR), buf_ == nullptr
col_
=
(
format_
==
T_SPARSE_CSR
?
BufferArg
(
VALUE_TYPE_INT32
,
TensorShape
{
nnz
})
:
BufferArg
(
VALUE_TYPE_INT32
,
TensorShape
{
shape_
[
1
]
+
1
}));
}
SparseMatrixArg
(
const
CpuSparseMatrix
&
sparse
,
ArgType
argType
=
UNSPECIFIED
);
...
...
@@ -328,8 +333,8 @@ public:
shape_
[
0
],
shape_
[
1
],
nnz_
,
type_
,
format_
,
static_cast
<
SparseValueType
>
(
type_
)
,
static_cast
<
SparseFormat
>
(
format_
)
,
false
);
}
...
...
@@ -343,16 +348,16 @@ public:
size_t
numElements
()
const
override
{
return
nnz_
;
}
SparseFormat
dataFormat
()
const
{
return
format_
;
}
Sparse
Data
Format
dataFormat
()
const
{
return
format_
;
}
Sparse
Value
Type
dataType
()
const
{
return
type_
;
}
Sparse
Data
Type
dataType
()
const
{
return
type_
;
}
private:
BufferArg
row_
;
BufferArg
col_
;
size_t
nnz_
;
SparseFormat
format_
;
Sparse
Value
Type
type_
;
Sparse
Data
Format
format_
;
Sparse
Data
Type
type_
;
};
}
// namespace paddle
paddle/function/FunctionTest.h
浏览文件 @
999cd14a
...
...
@@ -15,7 +15,6 @@ limitations under the License. */
#include "Function.h"
#include "paddle/math/Matrix.h"
#include "paddle/math/SparseMatrix.h"
#include "paddle/math/Vector.h"
#include "paddle/math/tests/TensorCheck.h"
#include "paddle/testing/TestUtil.h"
...
...
@@ -77,33 +76,33 @@ public:
cpuMemory_
.
emplace_back
(
std
::
make_shared
<
CpuMemoryHandle
>
(
size
));
gpuMemory_
.
emplace_back
(
std
::
make_shared
<
GpuMemoryHandle
>
(
size
));
cpuOutputs_
.
emplace_back
(
std
::
make_shared
<
BufferArg
>
(
cpuMemory_
.
back
()
->
getBuf
(),
output
.
valueType
(),
output
.
shape
(),
// todo(tianbing), argType = output.getArgType(), but default ADD_TO
argType
));
gpuOutputs_
.
emplace_back
(
std
::
make_shared
<
BufferArg
>
(
gpuMemory_
.
back
()
->
getBuf
(),
output
.
valueType
(),
output
.
shape
(),
// todo(tianbing), argType = output.getArgType(), but default ADD_TO
argType
));
cpuOutputs_
.
emplace_back
(
std
::
make_shared
<
BufferArg
>
(
cpuMemory_
.
back
()
->
getBuf
(),
output
.
valueType
(),
output
.
shape
(),
argType
));
gpuOutputs_
.
emplace_back
(
std
::
make_shared
<
BufferArg
>
(
gpuMemory_
.
back
()
->
getBuf
(),
output
.
valueType
(),
output
.
shape
(),
argType
));
}
/// add and init output sparse matrix
void
addOutputs
(
const
SparseMatrixArg
&
output
,
ArgType
argType
=
ASSIGN_TO
)
{
cpuSparse_
=
std
::
make_shared
<
CpuSparseMatrix
>
(
output
.
shape
()[
0
],
output
.
shape
()[
1
],
output
.
nnz
(),
output
.
dataType
(),
output
.
dataFormat
());
gpuSparse_
=
std
::
make_shared
<
GpuSparseMatrix
>
(
output
.
shape
()[
0
],
output
.
shape
()[
1
],
output
.
nnz
(),
output
.
dataType
(),
output
.
dataFormat
());
cpuSparse_
=
std
::
make_shared
<
CpuSparseMatrix
>
(
output
.
shape
()[
0
],
output
.
shape
()[
1
],
output
.
nnz
(),
static_cast
<
SparseValueType
>
(
output
.
dataType
()),
static_cast
<
SparseFormat
>
(
output
.
dataFormat
()));
gpuSparse_
=
std
::
make_shared
<
GpuSparseMatrix
>
(
output
.
shape
()[
0
],
output
.
shape
()[
1
],
output
.
nnz
(),
static_cast
<
SparseValueType
>
(
output
.
dataType
()),
static_cast
<
SparseFormat
>
(
output
.
dataFormat
()));
/// init sparse matrix
hl_stream_t
stream
(
HPPL_STREAM_1
);
...
...
@@ -138,17 +137,19 @@ public:
}
void
addInputs
(
const
SparseMatrixArg
&
input
)
{
cpuSparse_
=
std
::
make_shared
<
CpuSparseMatrix
>
(
input
.
shape
()[
0
],
input
.
shape
()[
1
],
input
.
nnz
(),
input
.
dataType
(),
input
.
dataFormat
());
gpuSparse_
=
std
::
make_shared
<
GpuSparseMatrix
>
(
input
.
shape
()[
0
],
input
.
shape
()[
1
],
input
.
nnz
(),
input
.
dataType
(),
input
.
dataFormat
());
cpuSparse_
=
std
::
make_shared
<
CpuSparseMatrix
>
(
input
.
shape
()[
0
],
input
.
shape
()[
1
],
input
.
nnz
(),
static_cast
<
SparseValueType
>
(
input
.
dataType
()),
static_cast
<
SparseFormat
>
(
input
.
dataFormat
()));
gpuSparse_
=
std
::
make_shared
<
GpuSparseMatrix
>
(
input
.
shape
()[
0
],
input
.
shape
()[
1
],
input
.
nnz
(),
static_cast
<
SparseValueType
>
(
input
.
dataType
()),
static_cast
<
SparseFormat
>
(
input
.
dataFormat
()));
/// init sparse matrix
hl_stream_t
stream
(
HPPL_STREAM_1
);
...
...
paddle/function/MulOp.cpp
浏览文件 @
999cd14a
...
...
@@ -41,6 +41,7 @@ inline void colVecAddTo(
}
// namespace
namespace
paddle
{
/// sparse matrix (+)= dense matrix * dense matrix
template
<
>
void
MulOp
<
DEVICE_TYPE_CPU
>
(
CpuSparseMatrix
&
out
,
const
CpuMatrix
&
a
,
...
...
@@ -105,6 +106,7 @@ void MulOp<DEVICE_TYPE_CPU>(CpuSparseMatrix& out,
}
}
/// dense matrix (+)= dense matrix * dense matrix
template
<
>
void
MulOp
<
DEVICE_TYPE_CPU
>
(
CpuMatrix
&
out
,
const
CpuMatrix
&
a
,
...
...
@@ -129,6 +131,7 @@ void MulOp<DEVICE_TYPE_CPU>(CpuMatrix& out,
out
.
getStride
());
}
/// dense matrix (+)= sparse matrix * dense matrix
template
<
>
void
MulOp
<
DEVICE_TYPE_CPU
>
(
CpuMatrix
&
out
,
const
CpuSparseMatrix
&
a
,
...
...
@@ -138,8 +141,6 @@ void MulOp<DEVICE_TYPE_CPU>(CpuMatrix& out,
bool
aTrans
,
bool
bTrans
,
bool
cTrans
)
{
CHECK_EQ
(
a
.
getFormat
(),
SPARSE_CSR
)
<<
"Not supported SPARSE_CSR format for a"
;
if
(
scaleT
==
0
)
{
out
.
zeroMem
();
}
...
...
@@ -165,6 +166,7 @@ void MulOp<DEVICE_TYPE_CPU>(CpuMatrix& out,
}
}
/// dense matrix (+)= dense matrix * sparse matrix
template
<
>
void
MulOp
<
DEVICE_TYPE_CPU
>
(
CpuMatrix
&
out
,
const
CpuMatrix
&
a
,
...
...
@@ -183,7 +185,7 @@ void MulOp<DEVICE_TYPE_CPU>(CpuMatrix& out,
int
*
rows
=
b
.
getRows
();
int
*
cols
=
b
.
getCols
();
///
b.getFormat() == SPARSE_CSC
///
SPARSE_CSC format
if
(
b
.
getFormat
()
==
SPARSE_CSC
)
{
for
(
size_t
j
=
0
;
j
<
b
.
getWidth
();
++
j
)
{
int
start
=
b
.
getColStartIdx
(
j
);
...
...
@@ -200,7 +202,7 @@ void MulOp<DEVICE_TYPE_CPU>(CpuMatrix& out,
return
;
}
///
b.getFormat() == SPARSE_CSR
///
SPARSE_CSR format
if
(
b
.
getFormat
()
==
SPARSE_CSR
)
{
for
(
size_t
j
=
0
;
j
<
b
.
getHeight
();
++
j
)
{
int
start
=
b
.
getRowStartIdx
(
j
);
...
...
@@ -220,11 +222,32 @@ void MulOp<DEVICE_TYPE_CPU>(CpuMatrix& out,
/**
* mul operator
* out = scaleT * out + scaleAB*(in1 * in2)
* out = scaleT * out + scaleAB * (in1 * in2)
* here, scaleT in {0, 1}, scaleAB == 1,
* out = in1 (A) * in2 (B), ASSIGN_TO
* out += in1 (A) * in2 (B), ADD_TO
*
*
* \param outputs[0] output matrix (out), M * N,
* could be either Sparse or Dense Matrix
* M is num of rows, N is num of columns
* \param inputs[0] first input matrix (A), M * K (if non-trans)
* could be either Sparse or Dense Matrix
* M is num of rows, K is num of columns
* \param inputs[1] second input matrix (B), K * N (if non-trans)
* could be either Sparse or Dense Matrix
* K is num of rows, N is num of columns
*
* Support eight Mul operators, with both GPU and CPU devices
* For each device, four Mul operators are supported:
* 1. dense (out) = dense (A) * dense (B)
* 2. dense (out) = sparse (A) * dense (B)
* sparse matrix only support SPARSE_CSR format
* 3. dense (out) = dense (A) * sparse (B)
* sparse matrix support SPARSE_CSC and SPARSE_CSR formats
* 4. sparse (out) = dense (A) * dense (B)
* sparse matrix support SPARSE_CSC and SPARSE_CSR formats
*
* \param outputs[0] output matrix, M * N
* \param inputs[0] first input (sparse) matrix, M * K (if non-trans)
* \param inputs[1] second input matrix, K * N (if non-trans)
*/
template
<
DeviceType
Device
>
class
MulFunc
:
public
FunctionBase
{
...
...
@@ -271,7 +294,7 @@ public:
!
inputs
[
1
].
isSparseArg
()));
auto
outMat
=
outputs
[
0
].
matrix
<
Device
>
();
///
matrix = matrix *
matrix
///
dense matrix = dense matrix * dense
matrix
if
(
!
inputs
[
0
].
isSparseArg
()
&&
!
inputs
[
1
].
isSparseArg
()
&&
!
outputs
[
0
].
isSparseArg
())
{
MulOp
<
Device
>
(
outMat
,
...
...
@@ -285,7 +308,7 @@ public:
return
;
}
///
matrix =
matrix * sparse matrix
///
dense matrix = dense
matrix * sparse matrix
if
(
!
inputs
[
0
].
isSparseArg
()
&&
inputs
[
1
].
isSparseArg
()
&&
!
outputs
[
0
].
isSparseArg
())
{
CHECK
(
!
aTrans_
)
<<
"Not supported a transpose"
;
...
...
@@ -300,10 +323,12 @@ public:
return
;
}
///
matrix = sparse matrix *
matrix
///
dense matrix = sparse matrix * dense
matrix
if
(
inputs
[
0
].
isSparseArg
()
&&
!
inputs
[
1
].
isSparseArg
()
&&
!
outputs
[
0
].
isSparseArg
())
{
CHECK
(
!
bTrans_
)
<<
"Not supported b transpose"
;
CHECK_EQ
(
inputs
[
0
].
sparse
().
dataFormat
(),
T_SPARSE_CSR
)
<<
"Only supported SPARSE_CSR format for sparse matrix a"
;
MulOp
<
Device
>
(
outMat
,
inputs
[
0
].
sparse
().
SparseMatrix
<
Device
>
(),
inputs
[
1
].
matrix
<
Device
>
(),
...
...
@@ -315,7 +340,7 @@ public:
return
;
}
/// sparse matrix =
matrix *
matrix
/// sparse matrix =
dense matrix * dense
matrix
auto
outSparseMat
=
outputs
[
0
].
sparse
().
SparseMatrix
<
Device
>
();
if
(
!
inputs
[
0
].
isSparseArg
()
&&
!
inputs
[
1
].
isSparseArg
()
&&
outputs
[
0
].
isSparseArg
())
{
...
...
paddle/function/MulOp.h
浏览文件 @
999cd14a
...
...
@@ -15,12 +15,11 @@ limitations under the License. */
#pragma once
#include "Function.h"
/// todo(tianbing), delete it
#include <iostream>
#include "paddle/math/Matrix.h"
#include "paddle/math/SparseMatrix.h"
namespace
paddle
{
/// CPU, dense matrix (+)= dense matrix * dense matrix
template
<
DeviceType
DType
>
void
MulOp
(
CpuMatrix
&
out
,
const
CpuMatrix
&
a
,
...
...
@@ -31,6 +30,7 @@ void MulOp(CpuMatrix& out,
bool
bTrans
,
bool
cTrans
);
/// CPU, dense matrix (+)= sparse matrix * dense matrix
template
<
DeviceType
DType
>
void
MulOp
(
CpuMatrix
&
out
,
const
CpuSparseMatrix
&
a
,
...
...
@@ -41,6 +41,7 @@ void MulOp(CpuMatrix& out,
bool
bTrans
,
bool
cTrans
);
/// CPU, dense matrix (+)= dense matrix * sparse matrix
template
<
DeviceType
DType
>
void
MulOp
(
CpuMatrix
&
out
,
const
CpuMatrix
&
a
,
...
...
@@ -51,6 +52,7 @@ void MulOp(CpuMatrix& out,
bool
bTrans
,
bool
cTrans
);
/// CPU, sparse matrix (+)= dense matrix * dense matrix
template
<
DeviceType
DType
>
void
MulOp
(
CpuSparseMatrix
&
out
,
const
CpuMatrix
&
a
,
...
...
@@ -61,6 +63,7 @@ void MulOp(CpuSparseMatrix& out,
bool
bTrans
,
bool
cTrans
);
/// GPU, dense matrix (+)= dense matrix * dense matrix
template
<
DeviceType
DType
>
void
MulOp
(
GpuMatrix
&
out
,
const
GpuMatrix
&
a
,
...
...
@@ -71,6 +74,7 @@ void MulOp(GpuMatrix& out,
bool
bTrans
,
bool
cTrans
);
/// GPU, dense matrix (+)= sparse matrix * dense matrix
template
<
DeviceType
DType
>
void
MulOp
(
GpuMatrix
&
out
,
const
GpuSparseMatrix
&
a
,
...
...
@@ -81,6 +85,7 @@ void MulOp(GpuMatrix& out,
bool
bTrans
,
bool
cTrans
);
/// GPU, dense matrix (+)= dense matrix * sparse matrix
template
<
DeviceType
DType
>
void
MulOp
(
GpuMatrix
&
out
,
const
GpuMatrix
&
a
,
...
...
@@ -90,7 +95,7 @@ void MulOp(GpuMatrix& out,
bool
aTrans
,
bool
bTrans
,
bool
cTrans
);
/// GPU, sparse matrix (+)= dense matrix * dense matrix
template
<
DeviceType
DType
>
void
MulOp
(
GpuSparseMatrix
&
out
,
const
GpuMatrix
&
a
,
...
...
paddle/function/MulOpGpu.cu
浏览文件 @
999cd14a
...
...
@@ -18,10 +18,7 @@ limitations under the License. */
#include "paddle/math/SparseMatrix.h"
namespace
paddle
{
/**
* out = scaleT * out + scaleAB * (a * b)
* out : output matrix, M * N
*/
/// dense matrix (+)= dense matrix * dense matrix
template
<
>
void
MulOp
<
DEVICE_TYPE_GPU
>
(
GpuMatrix
&
out
,
const
GpuMatrix
&
a
,
...
...
@@ -32,14 +29,11 @@ void MulOp<DEVICE_TYPE_GPU>(GpuMatrix& out,
bool
bTrans
,
bool
cTrans
)
{
CHECK
(
a
.
useGpu_
&&
b
.
useGpu_
)
<<
"matrix device type not match"
;
real
*
aData
=
const_cast
<
real
*>
(
a
.
getData
());
real
*
bData
=
const_cast
<
real
*>
(
b
.
getData
());
real
*
outData
=
const_cast
<
real
*>
(
out
.
getData
());
hl_matrix_mul
(
aData
,
hl_matrix_mul
(
const_cast
<
real
*>
(
a
.
getData
()),
!
aTrans
?
HPPL_OP_N
:
HPPL_OP_T
,
bData
,
const_cast
<
real
*>
(
b
.
getData
())
,
!
bTrans
?
HPPL_OP_N
:
HPPL_OP_T
,
outData
,
const_cast
<
real
*>
(
out
.
getData
())
,
out
.
getHeight
(),
out
.
getWidth
(),
!
aTrans
?
a
.
getWidth
()
:
a
.
getHeight
(),
...
...
@@ -50,10 +44,7 @@ void MulOp<DEVICE_TYPE_GPU>(GpuMatrix& out,
out
.
getStride
());
}
/**
* out = scaleT * out + scaleAB * (a * b)
* out : M * N
*/
/// dense matrix (+)= sparse matrix * dense matrix
template
<
>
void
MulOp
<
DEVICE_TYPE_GPU
>
(
GpuMatrix
&
out
,
const
GpuSparseMatrix
&
a
,
...
...
@@ -66,15 +57,11 @@ void MulOp<DEVICE_TYPE_GPU>(GpuMatrix& out,
CHECK
(
out
.
isContiguous
());
CHECK
(
b
.
isContiguous
());
CHECK
(
a
.
useGpu_
&&
b
.
useGpu_
)
<<
"matrix device type not match"
;
hl_sparse_matrix_s
aData
=
a
.
sMatrix_
.
get
();
real
*
bData
=
const_cast
<
real
*>
(
b
.
getData
());
real
*
outData
=
const_cast
<
real
*>
(
out
.
getData
());
hl_matrix_csr_mul_dense
(
aData
,
hl_matrix_csr_mul_dense
(
a
.
sMatrix_
.
get
(),
aTrans
?
HPPL_OP_T
:
HPPL_OP_N
,
bData
,
const_cast
<
real
*>
(
b
.
getData
())
,
HPPL_OP_N
,
outData
,
const_cast
<
real
*>
(
out
.
getData
())
,
out
.
getHeight
(),
out
.
getWidth
(),
b
.
getHeight
(),
...
...
@@ -82,10 +69,7 @@ void MulOp<DEVICE_TYPE_GPU>(GpuMatrix& out,
scaleT
);
}
/**
* out = scaleT * out + scaleAB * (a * b)
* out : M * N
*/
/// dense matrix (+)= dense matrix * sparse matrix
template
<
>
void
MulOp
<
DEVICE_TYPE_GPU
>
(
GpuMatrix
&
out
,
const
GpuMatrix
&
a
,
...
...
@@ -99,27 +83,23 @@ void MulOp<DEVICE_TYPE_GPU>(GpuMatrix& out,
CHECK
(
a
.
isContiguous
());
CHECK
(
a
.
useGpu_
&&
b
.
useGpu_
)
<<
"matrix device type not match"
;
hl_sparse_matrix_s
bData
=
b
.
sMatrix_
.
get
();
real
*
aData
=
const_cast
<
real
*>
(
a
.
getData
());
real
*
outData
=
const_cast
<
real
*>
(
out
.
getData
());
if
(
b
.
format_
==
SPARSE_CSC
)
{
hl_matrix_dense_mul_csc
(
aData
,
hl_matrix_dense_mul_csc
(
const_cast
<
real
*>
(
a
.
getData
())
,
HPPL_OP_N
,
b
Data
,
b
.
sMatrix_
.
get
()
,
bTrans
?
HPPL_OP_T
:
HPPL_OP_N
,
outData
,
const_cast
<
real
*>
(
out
.
getData
())
,
out
.
getHeight
(),
out
.
getWidth
(),
a
.
getWidth
(),
scaleAB
,
scaleT
);
}
else
{
hl_matrix_dense_mul_csr
(
aData
,
hl_matrix_dense_mul_csr
(
const_cast
<
real
*>
(
a
.
getData
())
,
HPPL_OP_N
,
b
Data
,
b
.
sMatrix_
.
get
()
,
bTrans
?
HPPL_OP_T
:
HPPL_OP_N
,
outData
,
const_cast
<
real
*>
(
out
.
getData
())
,
out
.
getHeight
(),
out
.
getWidth
(),
a
.
getWidth
(),
...
...
@@ -128,6 +108,7 @@ void MulOp<DEVICE_TYPE_GPU>(GpuMatrix& out,
}
}
/// sparse matrix (+)= dense matrix * dense matrix
template
<
>
void
MulOp
<
DEVICE_TYPE_GPU
>
(
GpuSparseMatrix
&
out
,
const
GpuMatrix
&
a
,
...
...
@@ -138,16 +119,11 @@ void MulOp<DEVICE_TYPE_GPU>(GpuSparseMatrix& out,
bool
bTrans
,
bool
cTrans
)
{
CHECK
(
a
.
useGpu_
&&
b
.
useGpu_
)
<<
"matrix device type not match"
;
real
*
aData
=
const_cast
<
real
*>
(
a
.
getData
());
real
*
bData
=
const_cast
<
real
*>
(
b
.
getData
());
hl_sparse_matrix_s
outData
=
out
.
sMatrix_
.
get
();
hl_sparse_matrix_mul
(
aData
,
hl_sparse_matrix_mul
(
const_cast
<
real
*>
(
a
.
getData
()),
aTrans
?
HPPL_OP_T
:
HPPL_OP_N
,
bData
,
const_cast
<
real
*>
(
b
.
getData
())
,
bTrans
?
HPPL_OP_T
:
HPPL_OP_N
,
out
Data
,
out
.
sMatrix_
.
get
()
,
out
.
getHeight
(),
out
.
getWidth
(),
!
bTrans
?
b
.
getHeight
()
:
b
.
getWidth
(),
...
...
paddle/function/MulOpTest.cpp
浏览文件 @
999cd14a
...
...
@@ -13,8 +13,6 @@ See the License for the specific language governing permissions and
limitations under the License. */
#include <gtest/gtest.h>
/// todo(tianbing), delete
#include <iostream>
#include "FunctionTest.h"
#include "paddle/math/Matrix.h"
#include "paddle/math/SparseMatrix.h"
...
...
paddle/function/TensorType.h
浏览文件 @
999cd14a
...
...
@@ -31,6 +31,10 @@ enum DeviceType {
DEVICE_TYPE_GPU
=
2
};
enum
SparseDataType
{
T_NO_VALUE
=
0
,
T_FLOAT_VALUE
=
1
};
enum
SparseDataFormat
{
T_SPARSE_CSR
=
0
,
T_SPARSE_CSC
=
1
};
inline
int
sizeOfValuType
(
ValueType
valueType
)
{
if
(
valueType
==
VALUE_TYPE_INT32
)
{
return
4
;
...
...
paddle/math/Matrix.h
浏览文件 @
999cd14a
...
...
@@ -31,6 +31,7 @@ limitations under the License. */
namespace
paddle
{
/// TODO(tianbing), move to paddle/function/TensorType.h
enum
SparseValueType
{
NO_VALUE
=
0
,
FLOAT_VALUE
=
1
};
/**
...
...
@@ -56,6 +57,7 @@ enum SparseValueType { NO_VALUE = 0, FLOAT_VALUE = 1 };
* value [1, 1, 2, 2, 5]
* @endcode
*/
/// TODO(tianbing), move to paddle/function/TensorType.h
enum
SparseFormat
{
SPARSE_CSR
=
0
,
SPARSE_CSC
=
1
};
class
Matrix
;
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录