Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
Paddle
提交
999cd14a
P
Paddle
项目概览
PaddlePaddle
/
Paddle
1 年多 前同步成功
通知
2302
Star
20931
Fork
5422
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1423
列表
看板
标记
里程碑
合并请求
543
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1,423
Issue
1,423
列表
看板
标记
里程碑
合并请求
543
合并请求
543
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
999cd14a
编写于
1月 23, 2017
作者:
X
xutianbing
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
Further address Daoyuan's comments, clean the code.
上级
b3be7358
变更
9
隐藏空白更改
内联
并排
Showing
9 changed file
with
147 addition
and
131 deletion
+147
-131
paddle/function/BufferArg.cpp
paddle/function/BufferArg.cpp
+4
-4
paddle/function/BufferArg.h
paddle/function/BufferArg.h
+37
-32
paddle/function/FunctionTest.h
paddle/function/FunctionTest.h
+36
-35
paddle/function/MulOp.cpp
paddle/function/MulOp.cpp
+37
-12
paddle/function/MulOp.h
paddle/function/MulOp.h
+8
-3
paddle/function/MulOpGpu.cu
paddle/function/MulOpGpu.cu
+19
-43
paddle/function/MulOpTest.cpp
paddle/function/MulOpTest.cpp
+0
-2
paddle/function/TensorType.h
paddle/function/TensorType.h
+4
-0
paddle/math/Matrix.h
paddle/math/Matrix.h
+2
-0
未找到文件。
paddle/function/BufferArg.cpp
浏览文件 @
999cd14a
...
...
@@ -34,8 +34,8 @@ SparseMatrixArg::SparseMatrixArg(const CpuSparseMatrix& sparse, ArgType argType)
row_
(
reinterpret_cast
<
void
*>
(
sparse
.
getRows
()),
VALUE_TYPE_INT32
),
col_
(
reinterpret_cast
<
void
*>
(
sparse
.
getCols
()),
VALUE_TYPE_INT32
),
nnz_
(
sparse
.
getElementCnt
()),
format_
(
s
parse
.
getFormat
(
)),
type_
(
s
parse
.
getValueType
(
))
{
format_
(
s
tatic_cast
<
SparseDataFormat
>
(
sparse
.
getFormat
()
)),
type_
(
s
tatic_cast
<
SparseDataType
>
(
sparse
.
getValueType
()
))
{
bufferType_
=
TENSOR_SPARSE
;
}
...
...
@@ -44,8 +44,8 @@ SparseMatrixArg::SparseMatrixArg(const GpuSparseMatrix& sparse, ArgType argType)
row_
(
reinterpret_cast
<
void
*>
(
sparse
.
getRows
()),
VALUE_TYPE_INT32
),
col_
(
reinterpret_cast
<
void
*>
(
sparse
.
getCols
()),
VALUE_TYPE_INT32
),
nnz_
(
sparse
.
getElementCnt
()),
format_
(
s
parse
.
getFormat
(
)),
type_
(
s
parse
.
getValueType
(
))
{
format_
(
s
tatic_cast
<
SparseDataFormat
>
(
sparse
.
getFormat
()
)),
type_
(
s
tatic_cast
<
SparseDataType
>
(
sparse
.
getValueType
()
))
{
bufferType_
=
TENSOR_SPARSE
;
}
...
...
paddle/function/BufferArg.h
浏览文件 @
999cd14a
...
...
@@ -72,19 +72,21 @@ public:
BufferArg
(
ValueType
valueType
,
const
TensorShape
&
shape
,
ArgType
argType
=
UNSPECIFIED
)
:
buf_
(
nullptr
),
valueType_
(
valueType
),
shape_
(
shape
),
argType_
(
argType
)
{}
:
buf_
(
nullptr
),
valueType_
(
valueType
),
shape_
(
shape
),
argType_
(
argType
)
{
bufferType_
=
TENSOR_NORMAL
;
}
BufferArg
(
void
*
buf
,
ValueType
valueType
,
const
TensorShape
&
shape
,
ArgType
argType
=
UNSPECIFIED
)
:
buf_
(
buf
),
valueType_
(
valueType
),
shape_
(
shape
),
argType_
(
argType
)
{}
:
buf_
(
buf
),
valueType_
(
valueType
),
shape_
(
shape
),
argType_
(
argType
)
{
bufferType_
=
TENSOR_NORMAL
;
}
BufferArg
(
void
*
buf
,
ValueType
valueType
)
:
buf_
(
buf
),
valueType_
(
valueType
)
{}
BufferArg
(
void
*
buf
,
ValueType
valueType
)
:
buf_
(
buf
),
valueType_
(
valueType
)
{
bufferType_
=
TENSOR_NORMAL
;
}
BufferArg
(
const
Matrix
&
matrix
,
ArgType
argType
=
UNSPECIFIED
)
:
buf_
(
...
...
@@ -173,7 +175,7 @@ protected:
TensorShape
shape_
;
BufferType
bufferType_
{
TENSOR_UNKNOWN
};
ArgType
argType_
{
UNSPECIFIED
};
//
todo
(tianbing), add deviceType_
//
TODO
(tianbing), add deviceType_
// leading dimensions. The size is dims_.size()
// Dims lds_;
};
...
...
@@ -186,6 +188,7 @@ class SequenceIdArg : public BufferArg {
public:
SequenceIdArg
(
const
TensorShape
&
shape
,
ArgType
argType
=
UNSPECIFIED
)
:
BufferArg
(
VALUE_TYPE_INT32
,
shape
,
argType
)
{
bufferType_
=
TENSOR_SEQUENCE_ID
;
CHECK_EQ
(
shape_
.
ndims
(),
(
size_t
)
1
);
CHECK_GT
(
shape_
[
0
],
1
);
numSeqs_
=
shape_
[
0
]
-
1
;
...
...
@@ -223,7 +226,9 @@ public:
SequenceArg
(
ValueType
valueType
,
const
TensorShape
&
shape
,
ArgType
argType
=
UNSPECIFIED
)
:
BufferArg
(
valueType
,
shape
,
argType
),
startPositions_
(
TensorShape
())
{}
:
BufferArg
(
valueType
,
shape
,
argType
),
startPositions_
(
TensorShape
())
{
bufferType_
=
TENSOR_SEQUENCE_DATA
;
}
SequenceArg
(
void
*
buf
,
ValueType
valueType
,
...
...
@@ -271,16 +276,16 @@ public:
row_
(
row
),
col_
(
col
),
nnz_
(
nnz
),
format_
(
format
),
type_
(
type
)
{
format_
(
static_cast
<
SparseDataFormat
>
(
format
)
),
type_
(
static_cast
<
SparseDataType
>
(
type
)
)
{
bufferType_
=
TENSOR_SPARSE
;
CHECK
((
valueType
==
VALUE_TYPE_FLOAT
)
||
(
valueType
==
VALUE_TYPE_DOUBLE
));
CHECK_EQ
(
shape_
.
ndims
(),
(
size_t
)
2
);
CHECK_EQ
(
row_
.
shape
().
ndims
(),
(
size_t
)
1
);
CHECK_EQ
(
col_
.
shape
().
ndims
(),
(
size_t
)
1
);
if
(
format
==
SPARSE_CSR
)
{
if
(
format
_
==
T_
SPARSE_CSR
)
{
CHECK_EQ
(
nnz
,
col
.
shape
()[
0
]);
}
else
if
(
format
==
SPARSE_CSC
)
{
}
else
if
(
format
_
==
T_
SPARSE_CSC
)
{
CHECK_EQ
(
nnz
,
row
.
shape
()[
0
]);
}
}
...
...
@@ -292,23 +297,23 @@ public:
SparseValueType
type
,
ArgType
argType
=
UNSPECIFIED
)
:
BufferArg
(
valueType
,
shape
,
argType
),
/// len of row_ : height + 1 (CSR), buf_ == nullptr
row_
(
format
==
SPARSE_CSR
?
BufferArg
(
VALUE_TYPE_INT32
,
TensorShape
{
shape
[
0
]
+
1
})
:
BufferArg
(
VALUE_TYPE_INT32
,
TensorShape
{
nnz
})),
/// len of col_ : width + 1 (CSC), buf_ == nullptr
col_
(
format
==
SPARSE_CSR
?
BufferArg
(
VALUE_TYPE_INT32
,
TensorShape
{
nnz
})
:
BufferArg
(
VALUE_TYPE_INT32
,
TensorShape
{
shape
[
1
]
+
1
})),
row_
(
BufferArg
(
nullptr
,
VALUE_TYPE_INT32
)),
col_
(
BufferArg
(
nullptr
,
VALUE_TYPE_INT32
)),
nnz_
(
nnz
),
format_
(
format
),
type_
(
type
)
{
format_
(
static_cast
<
SparseDataFormat
>
(
format
)
),
type_
(
static_cast
<
SparseDataType
>
(
type
)
)
{
bufferType_
=
TENSOR_SPARSE
;
/// todo(tianbing)
/// valueType and shape_.ndims() == 2 need to check before
/// this constructor to make sure row_ and col_ are right
CHECK
((
valueType
==
VALUE_TYPE_FLOAT
)
||
(
valueType
==
VALUE_TYPE_DOUBLE
));
CHECK_EQ
(
shape_
.
ndims
(),
(
size_t
)
2
);
/// len of row_ : height + 1 (CSR) or nnz (CSC), buf_ == nullptr
row_
=
(
format_
==
T_SPARSE_CSR
?
BufferArg
(
VALUE_TYPE_INT32
,
TensorShape
{
shape_
[
0
]
+
1
})
:
BufferArg
(
VALUE_TYPE_INT32
,
TensorShape
{
nnz
}));
/// len of col_ : width + 1 (CSC) or nnz (CSR), buf_ == nullptr
col_
=
(
format_
==
T_SPARSE_CSR
?
BufferArg
(
VALUE_TYPE_INT32
,
TensorShape
{
nnz
})
:
BufferArg
(
VALUE_TYPE_INT32
,
TensorShape
{
shape_
[
1
]
+
1
}));
}
SparseMatrixArg
(
const
CpuSparseMatrix
&
sparse
,
ArgType
argType
=
UNSPECIFIED
);
...
...
@@ -328,8 +333,8 @@ public:
shape_
[
0
],
shape_
[
1
],
nnz_
,
type_
,
format_
,
static_cast
<
SparseValueType
>
(
type_
)
,
static_cast
<
SparseFormat
>
(
format_
)
,
false
);
}
...
...
@@ -343,16 +348,16 @@ public:
size_t
numElements
()
const
override
{
return
nnz_
;
}
SparseFormat
dataFormat
()
const
{
return
format_
;
}
Sparse
Data
Format
dataFormat
()
const
{
return
format_
;
}
Sparse
Value
Type
dataType
()
const
{
return
type_
;
}
Sparse
Data
Type
dataType
()
const
{
return
type_
;
}
private:
BufferArg
row_
;
BufferArg
col_
;
size_t
nnz_
;
SparseFormat
format_
;
Sparse
Value
Type
type_
;
Sparse
Data
Format
format_
;
Sparse
Data
Type
type_
;
};
}
// namespace paddle
paddle/function/FunctionTest.h
浏览文件 @
999cd14a
...
...
@@ -15,7 +15,6 @@ limitations under the License. */
#include "Function.h"
#include "paddle/math/Matrix.h"
#include "paddle/math/SparseMatrix.h"
#include "paddle/math/Vector.h"
#include "paddle/math/tests/TensorCheck.h"
#include "paddle/testing/TestUtil.h"
...
...
@@ -77,33 +76,33 @@ public:
cpuMemory_
.
emplace_back
(
std
::
make_shared
<
CpuMemoryHandle
>
(
size
));
gpuMemory_
.
emplace_back
(
std
::
make_shared
<
GpuMemoryHandle
>
(
size
));
cpuOutputs_
.
emplace_back
(
std
::
make_shared
<
BufferArg
>
(
cpuMemory_
.
back
()
->
getBuf
(),
output
.
valueType
(),
output
.
shape
(),
// todo(tianbing), argType = output.getArgType(), but default ADD_TO
argType
));
gpuOutputs_
.
emplace_back
(
std
::
make_shared
<
BufferArg
>
(
gpuMemory_
.
back
()
->
getBuf
(),
output
.
valueType
(),
output
.
shape
(),
// todo(tianbing), argType = output.getArgType(), but default ADD_TO
argType
));
cpuOutputs_
.
emplace_back
(
std
::
make_shared
<
BufferArg
>
(
cpuMemory_
.
back
()
->
getBuf
(),
output
.
valueType
(),
output
.
shape
(),
argType
));
gpuOutputs_
.
emplace_back
(
std
::
make_shared
<
BufferArg
>
(
gpuMemory_
.
back
()
->
getBuf
(),
output
.
valueType
(),
output
.
shape
(),
argType
));
}
/// add and init output sparse matrix
void
addOutputs
(
const
SparseMatrixArg
&
output
,
ArgType
argType
=
ASSIGN_TO
)
{
cpuSparse_
=
std
::
make_shared
<
CpuSparseMatrix
>
(
output
.
shape
()[
0
],
output
.
shape
()[
1
],
output
.
nnz
(),
output
.
dataType
(),
output
.
dataFormat
());
gpuSparse_
=
std
::
make_shared
<
GpuSparseMatrix
>
(
output
.
shape
()[
0
],
output
.
shape
()[
1
],
output
.
nnz
(),
output
.
dataType
(),
output
.
dataFormat
());
cpuSparse_
=
std
::
make_shared
<
CpuSparseMatrix
>
(
output
.
shape
()[
0
],
output
.
shape
()[
1
],
output
.
nnz
(),
static_cast
<
SparseValueType
>
(
output
.
dataType
()),
static_cast
<
SparseFormat
>
(
output
.
dataFormat
()));
gpuSparse_
=
std
::
make_shared
<
GpuSparseMatrix
>
(
output
.
shape
()[
0
],
output
.
shape
()[
1
],
output
.
nnz
(),
static_cast
<
SparseValueType
>
(
output
.
dataType
()),
static_cast
<
SparseFormat
>
(
output
.
dataFormat
()));
/// init sparse matrix
hl_stream_t
stream
(
HPPL_STREAM_1
);
...
...
@@ -138,17 +137,19 @@ public:
}
void
addInputs
(
const
SparseMatrixArg
&
input
)
{
cpuSparse_
=
std
::
make_shared
<
CpuSparseMatrix
>
(
input
.
shape
()[
0
],
input
.
shape
()[
1
],
input
.
nnz
(),
input
.
dataType
(),
input
.
dataFormat
());
gpuSparse_
=
std
::
make_shared
<
GpuSparseMatrix
>
(
input
.
shape
()[
0
],
input
.
shape
()[
1
],
input
.
nnz
(),
input
.
dataType
(),
input
.
dataFormat
());
cpuSparse_
=
std
::
make_shared
<
CpuSparseMatrix
>
(
input
.
shape
()[
0
],
input
.
shape
()[
1
],
input
.
nnz
(),
static_cast
<
SparseValueType
>
(
input
.
dataType
()),
static_cast
<
SparseFormat
>
(
input
.
dataFormat
()));
gpuSparse_
=
std
::
make_shared
<
GpuSparseMatrix
>
(
input
.
shape
()[
0
],
input
.
shape
()[
1
],
input
.
nnz
(),
static_cast
<
SparseValueType
>
(
input
.
dataType
()),
static_cast
<
SparseFormat
>
(
input
.
dataFormat
()));
/// init sparse matrix
hl_stream_t
stream
(
HPPL_STREAM_1
);
...
...
paddle/function/MulOp.cpp
浏览文件 @
999cd14a
...
...
@@ -41,6 +41,7 @@ inline void colVecAddTo(
}
// namespace
namespace
paddle
{
/// sparse matrix (+)= dense matrix * dense matrix
template
<
>
void
MulOp
<
DEVICE_TYPE_CPU
>
(
CpuSparseMatrix
&
out
,
const
CpuMatrix
&
a
,
...
...
@@ -105,6 +106,7 @@ void MulOp<DEVICE_TYPE_CPU>(CpuSparseMatrix& out,
}
}
/// dense matrix (+)= dense matrix * dense matrix
template
<
>
void
MulOp
<
DEVICE_TYPE_CPU
>
(
CpuMatrix
&
out
,
const
CpuMatrix
&
a
,
...
...
@@ -129,6 +131,7 @@ void MulOp<DEVICE_TYPE_CPU>(CpuMatrix& out,
out
.
getStride
());
}
/// dense matrix (+)= sparse matrix * dense matrix
template
<
>
void
MulOp
<
DEVICE_TYPE_CPU
>
(
CpuMatrix
&
out
,
const
CpuSparseMatrix
&
a
,
...
...
@@ -138,8 +141,6 @@ void MulOp<DEVICE_TYPE_CPU>(CpuMatrix& out,
bool
aTrans
,
bool
bTrans
,
bool
cTrans
)
{
CHECK_EQ
(
a
.
getFormat
(),
SPARSE_CSR
)
<<
"Not supported SPARSE_CSR format for a"
;
if
(
scaleT
==
0
)
{
out
.
zeroMem
();
}
...
...
@@ -165,6 +166,7 @@ void MulOp<DEVICE_TYPE_CPU>(CpuMatrix& out,
}
}
/// dense matrix (+)= dense matrix * sparse matrix
template
<
>
void
MulOp
<
DEVICE_TYPE_CPU
>
(
CpuMatrix
&
out
,
const
CpuMatrix
&
a
,
...
...
@@ -183,7 +185,7 @@ void MulOp<DEVICE_TYPE_CPU>(CpuMatrix& out,
int
*
rows
=
b
.
getRows
();
int
*
cols
=
b
.
getCols
();
///
b.getFormat() == SPARSE_CSC
///
SPARSE_CSC format
if
(
b
.
getFormat
()
==
SPARSE_CSC
)
{
for
(
size_t
j
=
0
;
j
<
b
.
getWidth
();
++
j
)
{
int
start
=
b
.
getColStartIdx
(
j
);
...
...
@@ -200,7 +202,7 @@ void MulOp<DEVICE_TYPE_CPU>(CpuMatrix& out,
return
;
}
///
b.getFormat() == SPARSE_CSR
///
SPARSE_CSR format
if
(
b
.
getFormat
()
==
SPARSE_CSR
)
{
for
(
size_t
j
=
0
;
j
<
b
.
getHeight
();
++
j
)
{
int
start
=
b
.
getRowStartIdx
(
j
);
...
...
@@ -220,11 +222,32 @@ void MulOp<DEVICE_TYPE_CPU>(CpuMatrix& out,
/**
* mul operator
* out = scaleT * out + scaleAB*(in1 * in2)
* out = scaleT * out + scaleAB * (in1 * in2)
* here, scaleT in {0, 1}, scaleAB == 1,
* out = in1 (A) * in2 (B), ASSIGN_TO
* out += in1 (A) * in2 (B), ADD_TO
*
*
* \param outputs[0] output matrix (out), M * N,
* could be either Sparse or Dense Matrix
* M is num of rows, N is num of columns
* \param inputs[0] first input matrix (A), M * K (if non-trans)
* could be either Sparse or Dense Matrix
* M is num of rows, K is num of columns
* \param inputs[1] second input matrix (B), K * N (if non-trans)
* could be either Sparse or Dense Matrix
* K is num of rows, N is num of columns
*
* Support eight Mul operators, with both GPU and CPU devices
* For each device, four Mul operators are supported:
* 1. dense (out) = dense (A) * dense (B)
* 2. dense (out) = sparse (A) * dense (B)
* sparse matrix only support SPARSE_CSR format
* 3. dense (out) = dense (A) * sparse (B)
* sparse matrix support SPARSE_CSC and SPARSE_CSR formats
* 4. sparse (out) = dense (A) * dense (B)
* sparse matrix support SPARSE_CSC and SPARSE_CSR formats
*
* \param outputs[0] output matrix, M * N
* \param inputs[0] first input (sparse) matrix, M * K (if non-trans)
* \param inputs[1] second input matrix, K * N (if non-trans)
*/
template
<
DeviceType
Device
>
class
MulFunc
:
public
FunctionBase
{
...
...
@@ -271,7 +294,7 @@ public:
!
inputs
[
1
].
isSparseArg
()));
auto
outMat
=
outputs
[
0
].
matrix
<
Device
>
();
///
matrix = matrix *
matrix
///
dense matrix = dense matrix * dense
matrix
if
(
!
inputs
[
0
].
isSparseArg
()
&&
!
inputs
[
1
].
isSparseArg
()
&&
!
outputs
[
0
].
isSparseArg
())
{
MulOp
<
Device
>
(
outMat
,
...
...
@@ -285,7 +308,7 @@ public:
return
;
}
///
matrix =
matrix * sparse matrix
///
dense matrix = dense
matrix * sparse matrix
if
(
!
inputs
[
0
].
isSparseArg
()
&&
inputs
[
1
].
isSparseArg
()
&&
!
outputs
[
0
].
isSparseArg
())
{
CHECK
(
!
aTrans_
)
<<
"Not supported a transpose"
;
...
...
@@ -300,10 +323,12 @@ public:
return
;
}
///
matrix = sparse matrix *
matrix
///
dense matrix = sparse matrix * dense
matrix
if
(
inputs
[
0
].
isSparseArg
()
&&
!
inputs
[
1
].
isSparseArg
()
&&
!
outputs
[
0
].
isSparseArg
())
{
CHECK
(
!
bTrans_
)
<<
"Not supported b transpose"
;
CHECK_EQ
(
inputs
[
0
].
sparse
().
dataFormat
(),
T_SPARSE_CSR
)
<<
"Only supported SPARSE_CSR format for sparse matrix a"
;
MulOp
<
Device
>
(
outMat
,
inputs
[
0
].
sparse
().
SparseMatrix
<
Device
>
(),
inputs
[
1
].
matrix
<
Device
>
(),
...
...
@@ -315,7 +340,7 @@ public:
return
;
}
/// sparse matrix =
matrix *
matrix
/// sparse matrix =
dense matrix * dense
matrix
auto
outSparseMat
=
outputs
[
0
].
sparse
().
SparseMatrix
<
Device
>
();
if
(
!
inputs
[
0
].
isSparseArg
()
&&
!
inputs
[
1
].
isSparseArg
()
&&
outputs
[
0
].
isSparseArg
())
{
...
...
paddle/function/MulOp.h
浏览文件 @
999cd14a
...
...
@@ -15,12 +15,11 @@ limitations under the License. */
#pragma once
#include "Function.h"
/// todo(tianbing), delete it
#include <iostream>
#include "paddle/math/Matrix.h"
#include "paddle/math/SparseMatrix.h"
namespace
paddle
{
/// CPU, dense matrix (+)= dense matrix * dense matrix
template
<
DeviceType
DType
>
void
MulOp
(
CpuMatrix
&
out
,
const
CpuMatrix
&
a
,
...
...
@@ -31,6 +30,7 @@ void MulOp(CpuMatrix& out,
bool
bTrans
,
bool
cTrans
);
/// CPU, dense matrix (+)= sparse matrix * dense matrix
template
<
DeviceType
DType
>
void
MulOp
(
CpuMatrix
&
out
,
const
CpuSparseMatrix
&
a
,
...
...
@@ -41,6 +41,7 @@ void MulOp(CpuMatrix& out,
bool
bTrans
,
bool
cTrans
);
/// CPU, dense matrix (+)= dense matrix * sparse matrix
template
<
DeviceType
DType
>
void
MulOp
(
CpuMatrix
&
out
,
const
CpuMatrix
&
a
,
...
...
@@ -51,6 +52,7 @@ void MulOp(CpuMatrix& out,
bool
bTrans
,
bool
cTrans
);
/// CPU, sparse matrix (+)= dense matrix * dense matrix
template
<
DeviceType
DType
>
void
MulOp
(
CpuSparseMatrix
&
out
,
const
CpuMatrix
&
a
,
...
...
@@ -61,6 +63,7 @@ void MulOp(CpuSparseMatrix& out,
bool
bTrans
,
bool
cTrans
);
/// GPU, dense matrix (+)= dense matrix * dense matrix
template
<
DeviceType
DType
>
void
MulOp
(
GpuMatrix
&
out
,
const
GpuMatrix
&
a
,
...
...
@@ -71,6 +74,7 @@ void MulOp(GpuMatrix& out,
bool
bTrans
,
bool
cTrans
);
/// GPU, dense matrix (+)= sparse matrix * dense matrix
template
<
DeviceType
DType
>
void
MulOp
(
GpuMatrix
&
out
,
const
GpuSparseMatrix
&
a
,
...
...
@@ -81,6 +85,7 @@ void MulOp(GpuMatrix& out,
bool
bTrans
,
bool
cTrans
);
/// GPU, dense matrix (+)= dense matrix * sparse matrix
template
<
DeviceType
DType
>
void
MulOp
(
GpuMatrix
&
out
,
const
GpuMatrix
&
a
,
...
...
@@ -90,7 +95,7 @@ void MulOp(GpuMatrix& out,
bool
aTrans
,
bool
bTrans
,
bool
cTrans
);
/// GPU, sparse matrix (+)= dense matrix * dense matrix
template
<
DeviceType
DType
>
void
MulOp
(
GpuSparseMatrix
&
out
,
const
GpuMatrix
&
a
,
...
...
paddle/function/MulOpGpu.cu
浏览文件 @
999cd14a
...
...
@@ -18,10 +18,7 @@ limitations under the License. */
#include "paddle/math/SparseMatrix.h"
namespace
paddle
{
/**
* out = scaleT * out + scaleAB * (a * b)
* out : output matrix, M * N
*/
/// dense matrix (+)= dense matrix * dense matrix
template
<
>
void
MulOp
<
DEVICE_TYPE_GPU
>
(
GpuMatrix
&
out
,
const
GpuMatrix
&
a
,
...
...
@@ -32,14 +29,11 @@ void MulOp<DEVICE_TYPE_GPU>(GpuMatrix& out,
bool
bTrans
,
bool
cTrans
)
{
CHECK
(
a
.
useGpu_
&&
b
.
useGpu_
)
<<
"matrix device type not match"
;
real
*
aData
=
const_cast
<
real
*>
(
a
.
getData
());
real
*
bData
=
const_cast
<
real
*>
(
b
.
getData
());
real
*
outData
=
const_cast
<
real
*>
(
out
.
getData
());
hl_matrix_mul
(
aData
,
hl_matrix_mul
(
const_cast
<
real
*>
(
a
.
getData
()),
!
aTrans
?
HPPL_OP_N
:
HPPL_OP_T
,
bData
,
const_cast
<
real
*>
(
b
.
getData
())
,
!
bTrans
?
HPPL_OP_N
:
HPPL_OP_T
,
outData
,
const_cast
<
real
*>
(
out
.
getData
())
,
out
.
getHeight
(),
out
.
getWidth
(),
!
aTrans
?
a
.
getWidth
()
:
a
.
getHeight
(),
...
...
@@ -50,10 +44,7 @@ void MulOp<DEVICE_TYPE_GPU>(GpuMatrix& out,
out
.
getStride
());
}
/**
* out = scaleT * out + scaleAB * (a * b)
* out : M * N
*/
/// dense matrix (+)= sparse matrix * dense matrix
template
<
>
void
MulOp
<
DEVICE_TYPE_GPU
>
(
GpuMatrix
&
out
,
const
GpuSparseMatrix
&
a
,
...
...
@@ -66,15 +57,11 @@ void MulOp<DEVICE_TYPE_GPU>(GpuMatrix& out,
CHECK
(
out
.
isContiguous
());
CHECK
(
b
.
isContiguous
());
CHECK
(
a
.
useGpu_
&&
b
.
useGpu_
)
<<
"matrix device type not match"
;
hl_sparse_matrix_s
aData
=
a
.
sMatrix_
.
get
();
real
*
bData
=
const_cast
<
real
*>
(
b
.
getData
());
real
*
outData
=
const_cast
<
real
*>
(
out
.
getData
());
hl_matrix_csr_mul_dense
(
aData
,
hl_matrix_csr_mul_dense
(
a
.
sMatrix_
.
get
(),
aTrans
?
HPPL_OP_T
:
HPPL_OP_N
,
bData
,
const_cast
<
real
*>
(
b
.
getData
())
,
HPPL_OP_N
,
outData
,
const_cast
<
real
*>
(
out
.
getData
())
,
out
.
getHeight
(),
out
.
getWidth
(),
b
.
getHeight
(),
...
...
@@ -82,10 +69,7 @@ void MulOp<DEVICE_TYPE_GPU>(GpuMatrix& out,
scaleT
);
}
/**
* out = scaleT * out + scaleAB * (a * b)
* out : M * N
*/
/// dense matrix (+)= dense matrix * sparse matrix
template
<
>
void
MulOp
<
DEVICE_TYPE_GPU
>
(
GpuMatrix
&
out
,
const
GpuMatrix
&
a
,
...
...
@@ -99,27 +83,23 @@ void MulOp<DEVICE_TYPE_GPU>(GpuMatrix& out,
CHECK
(
a
.
isContiguous
());
CHECK
(
a
.
useGpu_
&&
b
.
useGpu_
)
<<
"matrix device type not match"
;
hl_sparse_matrix_s
bData
=
b
.
sMatrix_
.
get
();
real
*
aData
=
const_cast
<
real
*>
(
a
.
getData
());
real
*
outData
=
const_cast
<
real
*>
(
out
.
getData
());
if
(
b
.
format_
==
SPARSE_CSC
)
{
hl_matrix_dense_mul_csc
(
aData
,
hl_matrix_dense_mul_csc
(
const_cast
<
real
*>
(
a
.
getData
())
,
HPPL_OP_N
,
b
Data
,
b
.
sMatrix_
.
get
()
,
bTrans
?
HPPL_OP_T
:
HPPL_OP_N
,
outData
,
const_cast
<
real
*>
(
out
.
getData
())
,
out
.
getHeight
(),
out
.
getWidth
(),
a
.
getWidth
(),
scaleAB
,
scaleT
);
}
else
{
hl_matrix_dense_mul_csr
(
aData
,
hl_matrix_dense_mul_csr
(
const_cast
<
real
*>
(
a
.
getData
())
,
HPPL_OP_N
,
b
Data
,
b
.
sMatrix_
.
get
()
,
bTrans
?
HPPL_OP_T
:
HPPL_OP_N
,
outData
,
const_cast
<
real
*>
(
out
.
getData
())
,
out
.
getHeight
(),
out
.
getWidth
(),
a
.
getWidth
(),
...
...
@@ -128,6 +108,7 @@ void MulOp<DEVICE_TYPE_GPU>(GpuMatrix& out,
}
}
/// sparse matrix (+)= dense matrix * dense matrix
template
<
>
void
MulOp
<
DEVICE_TYPE_GPU
>
(
GpuSparseMatrix
&
out
,
const
GpuMatrix
&
a
,
...
...
@@ -138,16 +119,11 @@ void MulOp<DEVICE_TYPE_GPU>(GpuSparseMatrix& out,
bool
bTrans
,
bool
cTrans
)
{
CHECK
(
a
.
useGpu_
&&
b
.
useGpu_
)
<<
"matrix device type not match"
;
real
*
aData
=
const_cast
<
real
*>
(
a
.
getData
());
real
*
bData
=
const_cast
<
real
*>
(
b
.
getData
());
hl_sparse_matrix_s
outData
=
out
.
sMatrix_
.
get
();
hl_sparse_matrix_mul
(
aData
,
hl_sparse_matrix_mul
(
const_cast
<
real
*>
(
a
.
getData
()),
aTrans
?
HPPL_OP_T
:
HPPL_OP_N
,
bData
,
const_cast
<
real
*>
(
b
.
getData
())
,
bTrans
?
HPPL_OP_T
:
HPPL_OP_N
,
out
Data
,
out
.
sMatrix_
.
get
()
,
out
.
getHeight
(),
out
.
getWidth
(),
!
bTrans
?
b
.
getHeight
()
:
b
.
getWidth
(),
...
...
paddle/function/MulOpTest.cpp
浏览文件 @
999cd14a
...
...
@@ -13,8 +13,6 @@ See the License for the specific language governing permissions and
limitations under the License. */
#include <gtest/gtest.h>
/// todo(tianbing), delete
#include <iostream>
#include "FunctionTest.h"
#include "paddle/math/Matrix.h"
#include "paddle/math/SparseMatrix.h"
...
...
paddle/function/TensorType.h
浏览文件 @
999cd14a
...
...
@@ -31,6 +31,10 @@ enum DeviceType {
DEVICE_TYPE_GPU
=
2
};
enum
SparseDataType
{
T_NO_VALUE
=
0
,
T_FLOAT_VALUE
=
1
};
enum
SparseDataFormat
{
T_SPARSE_CSR
=
0
,
T_SPARSE_CSC
=
1
};
inline
int
sizeOfValuType
(
ValueType
valueType
)
{
if
(
valueType
==
VALUE_TYPE_INT32
)
{
return
4
;
...
...
paddle/math/Matrix.h
浏览文件 @
999cd14a
...
...
@@ -31,6 +31,7 @@ limitations under the License. */
namespace
paddle
{
/// TODO(tianbing), move to paddle/function/TensorType.h
enum
SparseValueType
{
NO_VALUE
=
0
,
FLOAT_VALUE
=
1
};
/**
...
...
@@ -56,6 +57,7 @@ enum SparseValueType { NO_VALUE = 0, FLOAT_VALUE = 1 };
* value [1, 1, 2, 2, 5]
* @endcode
*/
/// TODO(tianbing), move to paddle/function/TensorType.h
enum
SparseFormat
{
SPARSE_CSR
=
0
,
SPARSE_CSC
=
1
};
class
Matrix
;
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录