Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
机器未来
Paddle
提交
b3be7358
P
Paddle
项目概览
机器未来
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1
Issue
1
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
b3be7358
编写于
1月 23, 2017
作者:
X
xutianbing
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
Daoyuan's comments.
上级
bc5d7bb6
变更
6
隐藏空白更改
内联
并排
Showing
6 changed file
with
217 addition
and
259 deletion
+217
-259
paddle/function/BufferArg.h
paddle/function/BufferArg.h
+12
-25
paddle/function/FunctionTest.h
paddle/function/FunctionTest.h
+13
-27
paddle/function/MulOp.cpp
paddle/function/MulOp.cpp
+86
-87
paddle/function/MulOp.h
paddle/function/MulOp.h
+32
-8
paddle/function/MulOpGpu.cu
paddle/function/MulOpGpu.cu
+35
-79
paddle/function/MulOpTest.cpp
paddle/function/MulOpTest.cpp
+39
-33
未找到文件。
paddle/function/BufferArg.h
浏览文件 @
b3be7358
...
@@ -71,24 +71,17 @@ public:
...
@@ -71,24 +71,17 @@ public:
public:
public:
BufferArg
(
ValueType
valueType
,
BufferArg
(
ValueType
valueType
,
const
TensorShape
&
shape
,
const
TensorShape
&
shape
,
ArgType
argType
=
UNSPECIFIED
,
ArgType
argType
=
UNSPECIFIED
)
bool
trans
=
false
)
:
buf_
(
nullptr
),
:
buf_
(
nullptr
),
valueType_
(
valueType
),
valueType_
(
valueType
),
shape_
(
shape
),
shape_
(
shape
),
argType_
(
argType
),
argType_
(
argType
)
{}
trans_
(
trans
)
{}
BufferArg
(
void
*
buf
,
BufferArg
(
void
*
buf
,
ValueType
valueType
,
ValueType
valueType
,
const
TensorShape
&
shape
,
const
TensorShape
&
shape
,
ArgType
argType
=
UNSPECIFIED
,
ArgType
argType
=
UNSPECIFIED
)
bool
trans
=
false
)
:
buf_
(
buf
),
valueType_
(
valueType
),
shape_
(
shape
),
argType_
(
argType
)
{}
:
buf_
(
buf
),
valueType_
(
valueType
),
shape_
(
shape
),
argType_
(
argType
),
trans_
(
trans
)
{}
BufferArg
(
void
*
buf
,
ValueType
valueType
)
BufferArg
(
void
*
buf
,
ValueType
valueType
)
:
buf_
(
buf
),
valueType_
(
valueType
)
{}
:
buf_
(
buf
),
valueType_
(
valueType
)
{}
...
@@ -98,8 +91,7 @@ public:
...
@@ -98,8 +91,7 @@ public:
const_cast
<
void
*>
(
reinterpret_cast
<
const
void
*>
(
matrix
.
getData
()))),
const_cast
<
void
*>
(
reinterpret_cast
<
const
void
*>
(
matrix
.
getData
()))),
valueType_
(
DataType
<
real
>::
value
),
valueType_
(
DataType
<
real
>::
value
),
shape_
(
2
),
shape_
(
2
),
argType_
(
argType
),
argType_
(
argType
)
{
trans_
(
matrix
.
isTransposed
())
{
bufferType_
=
TENSOR_NORMAL
;
bufferType_
=
TENSOR_NORMAL
;
shape_
.
setDim
(
0
,
matrix
.
getHeight
());
shape_
.
setDim
(
0
,
matrix
.
getHeight
());
shape_
.
setDim
(
1
,
matrix
.
getWidth
());
shape_
.
setDim
(
1
,
matrix
.
getWidth
());
...
@@ -112,8 +104,7 @@ public:
...
@@ -112,8 +104,7 @@ public:
const_cast
<
void
*>
(
reinterpret_cast
<
const
void
*>
(
matrix
.
getData
()))),
const_cast
<
void
*>
(
reinterpret_cast
<
const
void
*>
(
matrix
.
getData
()))),
valueType_
(
DataType
<
real
>::
value
),
valueType_
(
DataType
<
real
>::
value
),
shape_
(
shape
),
shape_
(
shape
),
argType_
(
argType
),
argType_
(
argType
)
{
trans_
(
matrix
.
isTransposed
())
{
bufferType_
=
TENSOR_NORMAL
;
bufferType_
=
TENSOR_NORMAL
;
CHECK_EQ
(
matrix
.
getElementCnt
(),
shape
.
getElements
());
CHECK_EQ
(
matrix
.
getElementCnt
(),
shape
.
getElements
());
}
}
...
@@ -145,7 +136,7 @@ public:
...
@@ -145,7 +136,7 @@ public:
// CHECK(deviceType_ == DType);
// CHECK(deviceType_ == DType);
CHECK_EQ
((
size_t
)
2
,
shape_
.
ndims
());
CHECK_EQ
((
size_t
)
2
,
shape_
.
ndims
());
return
typename
Tensor
<
real
,
DType
>::
Matrix
(
return
typename
Tensor
<
real
,
DType
>::
Matrix
(
reinterpret_cast
<
real
*>
(
buf_
),
shape_
[
0
],
shape_
[
1
]
,
trans_
);
reinterpret_cast
<
real
*>
(
buf_
),
shape_
[
0
],
shape_
[
1
]);
}
}
template
<
typename
VType
,
DeviceType
DType
>
template
<
typename
VType
,
DeviceType
DType
>
...
@@ -169,7 +160,6 @@ public:
...
@@ -169,7 +160,6 @@ public:
ValueType
valueType
()
const
{
return
valueType_
;
}
ValueType
valueType
()
const
{
return
valueType_
;
}
BufferType
bufferType
()
const
{
return
bufferType_
;
}
BufferType
bufferType
()
const
{
return
bufferType_
;
}
const
TensorShape
&
shape
()
const
{
return
shape_
;
}
const
TensorShape
&
shape
()
const
{
return
shape_
;
}
bool
isTransposed
()
const
{
return
trans_
;
}
bool
isSparseArg
()
const
{
return
TENSOR_SPARSE
==
bufferType_
;
}
bool
isSparseArg
()
const
{
return
TENSOR_SPARSE
==
bufferType_
;
}
bool
isSequenceArg
()
const
{
return
TENSOR_SEQUENCE_DATA
==
bufferType_
;
}
bool
isSequenceArg
()
const
{
return
TENSOR_SEQUENCE_DATA
==
bufferType_
;
}
virtual
size_t
numElements
()
const
{
return
shape_
.
getElements
();
}
virtual
size_t
numElements
()
const
{
return
shape_
.
getElements
();
}
...
@@ -183,7 +173,6 @@ protected:
...
@@ -183,7 +173,6 @@ protected:
TensorShape
shape_
;
TensorShape
shape_
;
BufferType
bufferType_
{
TENSOR_UNKNOWN
};
BufferType
bufferType_
{
TENSOR_UNKNOWN
};
ArgType
argType_
{
UNSPECIFIED
};
ArgType
argType_
{
UNSPECIFIED
};
bool
trans_
{
false
};
// todo(tianbing), add deviceType_
// todo(tianbing), add deviceType_
// leading dimensions. The size is dims_.size()
// leading dimensions. The size is dims_.size()
// Dims lds_;
// Dims lds_;
...
@@ -277,9 +266,8 @@ public:
...
@@ -277,9 +266,8 @@ public:
size_t
nnz
,
size_t
nnz
,
SparseFormat
format
,
SparseFormat
format
,
SparseValueType
type
,
SparseValueType
type
,
ArgType
argType
=
UNSPECIFIED
,
ArgType
argType
=
UNSPECIFIED
)
bool
trans
=
false
)
:
BufferArg
(
buf
,
valueType
,
shape
,
argType
),
:
BufferArg
(
buf
,
valueType
,
shape
,
argType
,
trans
),
row_
(
row
),
row_
(
row
),
col_
(
col
),
col_
(
col
),
nnz_
(
nnz
),
nnz_
(
nnz
),
...
@@ -302,9 +290,8 @@ public:
...
@@ -302,9 +290,8 @@ public:
size_t
nnz
,
size_t
nnz
,
SparseFormat
format
,
SparseFormat
format
,
SparseValueType
type
,
SparseValueType
type
,
ArgType
argType
=
UNSPECIFIED
,
ArgType
argType
=
UNSPECIFIED
)
bool
trans
=
false
)
:
BufferArg
(
valueType
,
shape
,
argType
),
:
BufferArg
(
valueType
,
shape
,
argType
,
trans
),
/// len of row_ : height + 1 (CSR), buf_ == nullptr
/// len of row_ : height + 1 (CSR), buf_ == nullptr
row_
(
format
==
SPARSE_CSR
row_
(
format
==
SPARSE_CSR
?
BufferArg
(
VALUE_TYPE_INT32
,
TensorShape
{
shape
[
0
]
+
1
})
?
BufferArg
(
VALUE_TYPE_INT32
,
TensorShape
{
shape
[
0
]
+
1
})
...
@@ -343,7 +330,7 @@ public:
...
@@ -343,7 +330,7 @@ public:
nnz_
,
nnz_
,
type_
,
type_
,
format_
,
format_
,
trans_
);
false
);
}
}
~
SparseMatrixArg
()
{}
~
SparseMatrixArg
()
{}
...
...
paddle/function/FunctionTest.h
浏览文件 @
b3be7358
...
@@ -64,22 +64,14 @@ public:
...
@@ -64,22 +64,14 @@ public:
cpuMemory_
.
emplace_back
(
std
::
make_shared
<
CpuMemoryHandle
>
(
size
));
cpuMemory_
.
emplace_back
(
std
::
make_shared
<
CpuMemoryHandle
>
(
size
));
gpuMemory_
.
emplace_back
(
std
::
make_shared
<
GpuMemoryHandle
>
(
size
));
gpuMemory_
.
emplace_back
(
std
::
make_shared
<
GpuMemoryHandle
>
(
size
));
cpuInputs_
.
emplace_back
(
cpuInputs_
.
emplace_back
(
std
::
make_shared
<
BufferArg
>
(
std
::
make_shared
<
BufferArg
>
(
cpuMemory_
.
back
()
->
getBuf
(),
cpuMemory_
.
back
()
->
getBuf
(),
input
.
valueType
(),
input
.
shape
()));
input
.
valueType
(),
gpuInputs_
.
emplace_back
(
std
::
make_shared
<
BufferArg
>
(
input
.
shape
(),
gpuMemory_
.
back
()
->
getBuf
(),
input
.
valueType
(),
input
.
shape
()));
UNSPECIFIED
,
input
.
isTransposed
()));
gpuInputs_
.
emplace_back
(
std
::
make_shared
<
BufferArg
>
(
gpuMemory_
.
back
()
->
getBuf
(),
input
.
valueType
(),
input
.
shape
(),
UNSPECIFIED
,
input
.
isTransposed
()));
}
}
// output need only contains shape, do not contains data.
// output need only contains shape, do not contains data.
void
addOutputs
(
const
BufferArg
&
output
,
ArgType
argType
=
A
SSIGN
_TO
)
{
void
addOutputs
(
const
BufferArg
&
output
,
ArgType
argType
=
A
DD
_TO
)
{
size_t
size
=
size_t
size
=
output
.
shape
().
getElements
()
*
sizeOfValuType
(
output
.
valueType
());
output
.
shape
().
getElements
()
*
sizeOfValuType
(
output
.
valueType
());
cpuMemory_
.
emplace_back
(
std
::
make_shared
<
CpuMemoryHandle
>
(
size
));
cpuMemory_
.
emplace_back
(
std
::
make_shared
<
CpuMemoryHandle
>
(
size
));
...
@@ -89,16 +81,14 @@ public:
...
@@ -89,16 +81,14 @@ public:
cpuMemory_
.
back
()
->
getBuf
(),
cpuMemory_
.
back
()
->
getBuf
(),
output
.
valueType
(),
output
.
valueType
(),
output
.
shape
(),
output
.
shape
(),
// todo(tianbing), argType = output.getArgType(), but default ASSIGN_TO
// todo(tianbing), argType = output.getArgType(), but default ADD_TO
argType
,
argType
));
output
.
isTransposed
()));
gpuOutputs_
.
emplace_back
(
std
::
make_shared
<
BufferArg
>
(
gpuOutputs_
.
emplace_back
(
std
::
make_shared
<
BufferArg
>
(
gpuMemory_
.
back
()
->
getBuf
(),
gpuMemory_
.
back
()
->
getBuf
(),
output
.
valueType
(),
output
.
valueType
(),
output
.
shape
(),
output
.
shape
(),
// todo(tianbing), argType = output.getArgType(), but default ASSIGN_TO
// todo(tianbing), argType = output.getArgType(), but default ADD_TO
argType
,
argType
));
output
.
isTransposed
()));
}
}
/// add and init output sparse matrix
/// add and init output sparse matrix
...
@@ -107,15 +97,13 @@ public:
...
@@ -107,15 +97,13 @@ public:
output
.
shape
()[
1
],
output
.
shape
()[
1
],
output
.
nnz
(),
output
.
nnz
(),
output
.
dataType
(),
output
.
dataType
(),
output
.
dataFormat
(),
output
.
dataFormat
());
output
.
isTransposed
());
gpuSparse_
=
std
::
make_shared
<
GpuSparseMatrix
>
(
output
.
shape
()[
0
],
gpuSparse_
=
std
::
make_shared
<
GpuSparseMatrix
>
(
output
.
shape
()[
0
],
output
.
shape
()[
1
],
output
.
shape
()[
1
],
output
.
nnz
(),
output
.
nnz
(),
output
.
dataType
(),
output
.
dataType
(),
output
.
dataFormat
(),
output
.
dataFormat
());
output
.
isTransposed
());
/// init sparse matrix
/// init sparse matrix
hl_stream_t
stream
(
HPPL_STREAM_1
);
hl_stream_t
stream
(
HPPL_STREAM_1
);
...
@@ -154,15 +142,13 @@ public:
...
@@ -154,15 +142,13 @@ public:
input
.
shape
()[
1
],
input
.
shape
()[
1
],
input
.
nnz
(),
input
.
nnz
(),
input
.
dataType
(),
input
.
dataType
(),
input
.
dataFormat
(),
input
.
dataFormat
());
input
.
isTransposed
());
gpuSparse_
=
std
::
make_shared
<
GpuSparseMatrix
>
(
input
.
shape
()[
0
],
gpuSparse_
=
std
::
make_shared
<
GpuSparseMatrix
>
(
input
.
shape
()[
0
],
input
.
shape
()[
1
],
input
.
shape
()[
1
],
input
.
nnz
(),
input
.
nnz
(),
input
.
dataType
(),
input
.
dataType
(),
input
.
dataFormat
(),
input
.
dataFormat
());
input
.
isTransposed
());
/// init sparse matrix
/// init sparse matrix
hl_stream_t
stream
(
HPPL_STREAM_1
);
hl_stream_t
stream
(
HPPL_STREAM_1
);
...
...
paddle/function/MulOp.cpp
浏览文件 @
b3be7358
...
@@ -46,21 +46,11 @@ void MulOp<DEVICE_TYPE_CPU>(CpuSparseMatrix& out,
...
@@ -46,21 +46,11 @@ void MulOp<DEVICE_TYPE_CPU>(CpuSparseMatrix& out,
const
CpuMatrix
&
a
,
const
CpuMatrix
&
a
,
const
CpuMatrix
&
b
,
const
CpuMatrix
&
b
,
real
scaleAB
,
real
scaleAB
,
real
scaleT
)
{
real
scaleT
,
CHECK
(
!
out
.
isTransposed
())
<<
"Not supported"
;
bool
aTrans
,
bool
bTrans
,
bool
cTrans
)
{
CHECK_EQ
(
out
.
getValueType
(),
FLOAT_VALUE
);
CHECK_EQ
(
out
.
getValueType
(),
FLOAT_VALUE
);
CHECK
(
!
a
.
isTransposed
()
||
!
b
.
isTransposed
())
<<
"Not support both a and b are transpose matrices"
;
size_t
height
=
out
.
getHeight
();
size_t
width
=
out
.
getWidth
();
size_t
aRow
=
!
a
.
isTransposed
()
?
a
.
getHeight
()
:
a
.
getWidth
();
size_t
aCol
=
!
a
.
isTransposed
()
?
a
.
getWidth
()
:
a
.
getHeight
();
size_t
bRow
=
!
b
.
isTransposed
()
?
b
.
getHeight
()
:
b
.
getWidth
();
size_t
bCol
=
!
b
.
isTransposed
()
?
b
.
getWidth
()
:
b
.
getHeight
();
/// C = A * B, for matrix format
CHECK
(
aCol
==
bRow
&&
aRow
==
height
&&
bCol
==
width
);
if
(
scaleT
==
0
)
{
if
(
scaleT
==
0
)
{
out
.
zeroMem
();
out
.
zeroMem
();
}
}
...
@@ -69,12 +59,14 @@ void MulOp<DEVICE_TYPE_CPU>(CpuSparseMatrix& out,
...
@@ -69,12 +59,14 @@ void MulOp<DEVICE_TYPE_CPU>(CpuSparseMatrix& out,
real
*
C
=
out
.
getValue
();
real
*
C
=
out
.
getValue
();
int
*
rows
=
out
.
getRows
();
int
*
rows
=
out
.
getRows
();
int
*
cols
=
out
.
getCols
();
int
*
cols
=
out
.
getCols
();
size_t
width
=
out
.
getWidth
();
size_t
height
=
out
.
getHeight
();
/// SPARSE_CSC, {a any, b not trans}
/// SPARSE_CSC, {a any, b not trans}
if
(
out
.
getFormat
()
==
SPARSE_CSC
)
{
if
(
out
.
getFormat
()
==
SPARSE_CSC
)
{
/// b not trans and a any
/// b not trans and a any
CHECK
(
!
b
.
isTransposed
()
);
CHECK
(
!
b
Trans
);
size_t
m
=
!
a
.
isTransposed
()
?
a
.
getWidth
()
:
a
.
getHeight
();
size_t
m
=
!
a
Trans
?
a
.
getWidth
()
:
a
.
getHeight
();
for
(
size_t
i
=
0
;
i
<
width
;
i
++
)
{
for
(
size_t
i
=
0
;
i
<
width
;
i
++
)
{
size_t
start
=
out
.
getColStartIdx
(
i
);
size_t
start
=
out
.
getColStartIdx
(
i
);
size_t
end
=
out
.
getColStartIdx
(
i
+
1
);
size_t
end
=
out
.
getColStartIdx
(
i
+
1
);
...
@@ -82,9 +74,8 @@ void MulOp<DEVICE_TYPE_CPU>(CpuSparseMatrix& out,
...
@@ -82,9 +74,8 @@ void MulOp<DEVICE_TYPE_CPU>(CpuSparseMatrix& out,
real
sum
=
0
;
real
sum
=
0
;
size_t
rowIdx
=
rows
[
j
];
size_t
rowIdx
=
rows
[
j
];
for
(
size_t
k
=
0
;
k
<
m
;
k
++
)
{
for
(
size_t
k
=
0
;
k
<
m
;
k
++
)
{
sum
+=
sum
+=
(
!
aTrans
?
A
[
rowIdx
*
m
+
k
]
:
A
[
k
*
height
+
rowIdx
])
*
(
!
a
.
isTransposed
()
?
A
[
rowIdx
*
m
+
k
]
:
A
[
k
*
height
+
rowIdx
])
*
B
[
k
*
width
+
i
];
B
[
k
*
width
+
i
];
}
}
C
[
j
]
=
scaleAB
*
sum
+
scaleT
*
C
[
j
];
C
[
j
]
=
scaleAB
*
sum
+
scaleT
*
C
[
j
];
}
}
...
@@ -95,7 +86,7 @@ void MulOp<DEVICE_TYPE_CPU>(CpuSparseMatrix& out,
...
@@ -95,7 +86,7 @@ void MulOp<DEVICE_TYPE_CPU>(CpuSparseMatrix& out,
/// SPARSE_CSR, {a any, b not trans} or {a not trans, b trans}
/// SPARSE_CSR, {a any, b not trans} or {a not trans, b trans}
if
(
out
.
getFormat
()
==
SPARSE_CSR
)
{
if
(
out
.
getFormat
()
==
SPARSE_CSR
)
{
/// a and b can not both transpose
/// a and b can not both transpose
CHECK
(
!
(
a
.
isTransposed
()
&&
b
.
isTransposed
()
));
CHECK
(
!
(
a
Trans
&&
bTrans
));
size_t
m
=
a
.
getWidth
();
size_t
m
=
a
.
getWidth
();
for
(
size_t
i
=
0
;
i
<
height
;
i
++
)
{
for
(
size_t
i
=
0
;
i
<
height
;
i
++
)
{
size_t
start
=
out
.
getRowStartIdx
(
i
);
size_t
start
=
out
.
getRowStartIdx
(
i
);
...
@@ -104,9 +95,8 @@ void MulOp<DEVICE_TYPE_CPU>(CpuSparseMatrix& out,
...
@@ -104,9 +95,8 @@ void MulOp<DEVICE_TYPE_CPU>(CpuSparseMatrix& out,
real
sum
=
0
;
real
sum
=
0
;
size_t
colIdx
=
cols
[
j
];
size_t
colIdx
=
cols
[
j
];
for
(
size_t
k
=
0
;
k
<
m
;
k
++
)
{
for
(
size_t
k
=
0
;
k
<
m
;
k
++
)
{
sum
+=
sum
+=
(
!
aTrans
?
A
[
i
*
m
+
k
]
:
A
[
k
*
height
+
i
])
*
(
!
a
.
isTransposed
()
?
A
[
i
*
m
+
k
]
:
A
[
k
*
height
+
i
])
*
(
!
bTrans
?
B
[
k
*
width
+
colIdx
]
:
B
[
colIdx
*
m
+
k
]);
(
!
b
.
isTransposed
()
?
B
[
k
*
width
+
colIdx
]
:
B
[
colIdx
*
m
+
k
]);
}
}
C
[
j
]
=
scaleAB
*
sum
+
scaleT
*
C
[
j
];
C
[
j
]
=
scaleAB
*
sum
+
scaleT
*
C
[
j
];
}
}
...
@@ -120,25 +110,15 @@ void MulOp<DEVICE_TYPE_CPU>(CpuMatrix& out,
...
@@ -120,25 +110,15 @@ void MulOp<DEVICE_TYPE_CPU>(CpuMatrix& out,
const
CpuMatrix
&
a
,
const
CpuMatrix
&
a
,
const
CpuMatrix
&
b
,
const
CpuMatrix
&
b
,
real
scaleAB
,
real
scaleAB
,
real
scaleT
)
{
real
scaleT
,
CHECK
(
!
out
.
isTransposed
())
<<
"out matrix transpose not supported"
;
bool
aTrans
,
CBLAS_TRANSPOSE
aTrans
=
a
.
isTransposed
()
?
CblasTrans
:
CblasNoTrans
;
bool
bTrans
,
size_t
aRow
=
a
.
isTransposed
()
?
a
.
getWidth
()
:
a
.
getHeight
();
bool
cTrans
)
{
size_t
aCol
=
a
.
isTransposed
()
?
a
.
getHeight
()
:
a
.
getWidth
();
GEMM
(
aTrans
?
CblasTrans
:
CblasNoTrans
,
CBLAS_TRANSPOSE
bTrans
=
b
.
isTransposed
()
?
CblasTrans
:
CblasNoTrans
;
bTrans
?
CblasTrans
:
CblasNoTrans
,
size_t
bRow
=
b
.
isTransposed
()
?
b
.
getWidth
()
:
b
.
getHeight
();
size_t
bCol
=
b
.
isTransposed
()
?
b
.
getHeight
()
:
b
.
getWidth
();
/// C = A * B, for matrix format
CHECK_EQ
(
aCol
,
bRow
);
CHECK_EQ
(
aRow
,
out
.
getHeight
());
CHECK_EQ
(
bCol
,
out
.
getWidth
());
GEMM
(
aTrans
,
bTrans
,
out
.
getHeight
(),
out
.
getHeight
(),
out
.
getWidth
(),
out
.
getWidth
(),
aCol
,
!
aTrans
?
a
.
getWidth
()
:
a
.
getHeight
()
,
scaleAB
,
scaleAB
,
a
.
getData
(),
a
.
getData
(),
a
.
getStride
(),
a
.
getStride
(),
...
@@ -154,21 +134,12 @@ void MulOp<DEVICE_TYPE_CPU>(CpuMatrix& out,
...
@@ -154,21 +134,12 @@ void MulOp<DEVICE_TYPE_CPU>(CpuMatrix& out,
const
CpuSparseMatrix
&
a
,
const
CpuSparseMatrix
&
a
,
const
CpuMatrix
&
b
,
const
CpuMatrix
&
b
,
real
scaleAB
,
real
scaleAB
,
real
scaleT
)
{
real
scaleT
,
CHECK
(
!
out
.
isTransposed
())
<<
"Not supported"
;
bool
aTrans
,
CHECK
(
!
b
.
isTransposed
())
<<
"Not supported"
;
bool
bTrans
,
CHECK
(
scaleT
==
0
||
scaleT
==
1
)
<<
"Not support"
;
bool
cTrans
)
{
CHECK_EQ
(
scaleAB
,
static_cast
<
real
>
(
1.0
))
<<
"Not supported"
;
CHECK_EQ
(
a
.
getFormat
(),
SPARSE_CSR
)
CHECK_EQ
(
a
.
getFormat
(),
SPARSE_CSR
)
<<
"Not supported"
;
<<
"Not supported SPARSE_CSR format for a"
;
if
(
!
a
.
isTransposed
())
{
CHECK
(
b
.
getHeight
()
==
a
.
getWidth
()
&&
a
.
getHeight
()
==
out
.
getHeight
()
&&
b
.
getWidth
()
==
out
.
getWidth
());
}
else
{
CHECK
(
b
.
getHeight
()
==
a
.
getHeight
()
&&
a
.
getWidth
()
==
out
.
getHeight
()
&&
b
.
getWidth
()
==
out
.
getWidth
());
}
if
(
scaleT
==
0
)
{
if
(
scaleT
==
0
)
{
out
.
zeroMem
();
out
.
zeroMem
();
}
}
...
@@ -185,9 +156,9 @@ void MulOp<DEVICE_TYPE_CPU>(CpuMatrix& out,
...
@@ -185,9 +156,9 @@ void MulOp<DEVICE_TYPE_CPU>(CpuMatrix& out,
const
int
start
=
a
.
getRowStartIdx
(
i
);
const
int
start
=
a
.
getRowStartIdx
(
i
);
const
int
end
=
a
.
getRowStartIdx
(
i
+
1
);
const
int
end
=
a
.
getRowStartIdx
(
i
+
1
);
for
(
int
j
=
start
;
j
<
end
;
++
j
)
{
for
(
int
j
=
start
;
j
<
end
;
++
j
)
{
vecAddTo
(
!
a
.
isTransposed
()
?
out
.
getRow
(
i
)
:
out
.
getRow
(
cols
[
j
]),
vecAddTo
(
!
a
Trans
?
out
.
getRow
(
i
)
:
out
.
getRow
(
cols
[
j
]),
!
a
.
isTransposed
()
?
const_cast
<
CpuMatrix
&>
(
b
).
getRow
(
cols
[
j
])
!
a
Trans
?
const_cast
<
CpuMatrix
&>
(
b
).
getRow
(
cols
[
j
])
:
const_cast
<
CpuMatrix
&>
(
b
).
getRow
(
i
),
:
const_cast
<
CpuMatrix
&>
(
b
).
getRow
(
i
),
(
a
.
getValueType
()
==
FLOAT_VALUE
)
?
values
[
j
]
:
(
real
)
1.0
,
(
a
.
getValueType
()
==
FLOAT_VALUE
)
?
values
[
j
]
:
(
real
)
1.0
,
out
.
getWidth
());
out
.
getWidth
());
}
}
...
@@ -199,19 +170,10 @@ void MulOp<DEVICE_TYPE_CPU>(CpuMatrix& out,
...
@@ -199,19 +170,10 @@ void MulOp<DEVICE_TYPE_CPU>(CpuMatrix& out,
const
CpuMatrix
&
a
,
const
CpuMatrix
&
a
,
const
CpuSparseMatrix
&
b
,
const
CpuSparseMatrix
&
b
,
real
scaleAB
,
real
scaleAB
,
real
scaleT
)
{
real
scaleT
,
CHECK
(
!
out
.
trans_
)
<<
"Not supported"
;
bool
aTrans
,
CHECK
(
!
a
.
isTransposed
())
<<
"Not supported"
;
bool
bTrans
,
CHECK
(
scaleT
==
0
||
scaleT
==
1
);
bool
cTrans
)
{
CHECK_EQ
(
scaleAB
,
static_cast
<
real
>
(
1.0
));
if
(
!
b
.
isTransposed
())
{
/// b is not Transpose
CHECK
(
b
.
getHeight
()
==
a
.
getWidth
()
&&
a
.
getHeight
()
==
out
.
getHeight
()
&&
b
.
getWidth
()
==
out
.
getWidth
());
}
else
{
CHECK
(
b
.
getHeight
()
==
out
.
getWidth
()
&&
a
.
getHeight
()
==
out
.
getHeight
()
&&
b
.
getWidth
()
==
a
.
getWidth
());
}
if
(
scaleT
==
0
)
{
if
(
scaleT
==
0
)
{
out
.
zeroMem
();
out
.
zeroMem
();
}
}
...
@@ -227,8 +189,8 @@ void MulOp<DEVICE_TYPE_CPU>(CpuMatrix& out,
...
@@ -227,8 +189,8 @@ void MulOp<DEVICE_TYPE_CPU>(CpuMatrix& out,
int
start
=
b
.
getColStartIdx
(
j
);
int
start
=
b
.
getColStartIdx
(
j
);
int
end
=
b
.
getColStartIdx
(
j
+
1
);
int
end
=
b
.
getColStartIdx
(
j
+
1
);
for
(
int
i
=
start
;
i
<
end
;
++
i
)
{
for
(
int
i
=
start
;
i
<
end
;
++
i
)
{
colVecAddTo
(
!
b
.
isTransposed
()
?
C
+
j
:
C
+
rows
[
i
],
colVecAddTo
(
!
b
Trans
?
C
+
j
:
C
+
rows
[
i
],
!
b
.
isTransposed
()
?
A
+
rows
[
i
]
:
A
+
j
,
!
b
Trans
?
A
+
rows
[
i
]
:
A
+
j
,
(
b
.
getValueType
()
==
NO_VALUE
)
?
(
real
)
1.0
:
B
[
i
],
(
b
.
getValueType
()
==
NO_VALUE
)
?
(
real
)
1.0
:
B
[
i
],
out
.
getHeight
(),
out
.
getHeight
(),
out
.
getWidth
(),
out
.
getWidth
(),
...
@@ -244,8 +206,8 @@ void MulOp<DEVICE_TYPE_CPU>(CpuMatrix& out,
...
@@ -244,8 +206,8 @@ void MulOp<DEVICE_TYPE_CPU>(CpuMatrix& out,
int
start
=
b
.
getRowStartIdx
(
j
);
int
start
=
b
.
getRowStartIdx
(
j
);
int
end
=
b
.
getRowStartIdx
(
j
+
1
);
int
end
=
b
.
getRowStartIdx
(
j
+
1
);
for
(
int
i
=
start
;
i
<
end
;
++
i
)
{
for
(
int
i
=
start
;
i
<
end
;
++
i
)
{
colVecAddTo
(
!
b
.
isTransposed
()
?
C
+
cols
[
i
]
:
C
+
j
,
colVecAddTo
(
!
b
Trans
?
C
+
cols
[
i
]
:
C
+
j
,
!
b
.
isTransposed
()
?
A
+
j
:
A
+
cols
[
i
],
!
b
Trans
?
A
+
j
:
A
+
cols
[
i
],
(
b
.
getValueType
()
==
NO_VALUE
)
?
(
real
)
1.0
:
B
[
i
],
(
b
.
getValueType
()
==
NO_VALUE
)
?
(
real
)
1.0
:
B
[
i
],
out
.
getHeight
(),
out
.
getHeight
(),
out
.
getWidth
(),
out
.
getWidth
(),
...
@@ -270,16 +232,43 @@ public:
...
@@ -270,16 +232,43 @@ public:
void
init
(
const
FuncConfig
&
config
)
override
{
void
init
(
const
FuncConfig
&
config
)
override
{
alpha_
=
config
.
get
<
real
>
(
"scaleAB"
);
alpha_
=
config
.
get
<
real
>
(
"scaleAB"
);
beta_
=
config
.
get
<
real
>
(
"scaleT"
);
beta_
=
config
.
get
<
real
>
(
"scaleT"
);
aTrans_
=
config
.
get
<
bool
>
(
"aTrans"
);
bTrans_
=
config
.
get
<
bool
>
(
"bTrans"
);
cTrans_
=
config
.
get
<
bool
>
(
"cTrans"
);
}
}
void
calc
(
const
BufferArgs
&
inputs
,
const
BufferArgs
&
outputs
)
override
{
void
calc
(
const
BufferArgs
&
inputs
,
const
BufferArgs
&
outputs
)
override
{
CHECK
(
!
cTrans_
)
<<
"output matrix should not be transposed"
;
CHECK
(
!
aTrans_
||
!
bTrans_
)
<<
"Not support both a and b are transpose matrices"
;
CHECK_EQ
((
size_t
)
2
,
inputs
.
size
());
CHECK_EQ
((
size_t
)
2
,
inputs
.
size
());
CHECK_EQ
((
size_t
)
1
,
outputs
.
size
());
CHECK_EQ
((
size_t
)
1
,
outputs
.
size
());
CHECK
(
inputs
[
0
].
data
()
&&
inputs
[
1
].
data
()
&&
outputs
[
0
].
data
());
CHECK
(
inputs
[
0
].
data
()
&&
inputs
[
1
].
data
()
&&
outputs
[
0
].
data
());
CHECK_EQ
(
inputs
[
0
].
shape
().
ndims
(),
(
size_t
)
2
);
CHECK_EQ
(
inputs
[
0
].
shape
().
ndims
(),
(
size_t
)
2
);
CHECK_EQ
(
inputs
[
1
].
shape
().
ndims
(),
(
size_t
)
2
);
CHECK_EQ
(
inputs
[
1
].
shape
().
ndims
(),
(
size_t
)
2
);
CHECK_EQ
(
outputs
[
0
].
shape
().
ndims
(),
(
size_t
)
2
);
CHECK_EQ
(
outputs
[
0
].
shape
().
ndims
(),
(
size_t
)
2
);
CHECK_EQ
(
outputs
[
0
].
getArgType
(),
ADD_TO
);
size_t
aRow
=
!
aTrans_
?
inputs
[
0
].
shape
()[
0
]
:
inputs
[
0
].
shape
()[
1
];
size_t
aCol
=
!
aTrans_
?
inputs
[
0
].
shape
()[
1
]
:
inputs
[
0
].
shape
()[
0
];
size_t
bRow
=
!
bTrans_
?
inputs
[
1
].
shape
()[
0
]
:
inputs
[
1
].
shape
()[
1
];
size_t
bCol
=
!
bTrans_
?
inputs
[
1
].
shape
()[
1
]
:
inputs
[
1
].
shape
()[
0
];
/// C = A * B, or C += A * B, for matrix format
CHECK_EQ
(
aCol
,
bRow
);
CHECK_EQ
(
aRow
,
outputs
[
0
].
shape
()[
0
]);
CHECK_EQ
(
bCol
,
outputs
[
0
].
shape
()[
1
]);
/// only support C = A * B or C += A * B
CHECK_EQ
(
alpha_
,
static_cast
<
real
>
(
1.0
));
CHECK
((
beta_
==
0
&&
outputs
[
0
].
getArgType
()
==
ASSIGN_TO
)
||
(
beta_
==
1
&&
outputs
[
0
].
getArgType
()
==
ADD_TO
));
/// support dense = not both sparse * sparse
/// or sparse = dense * dense
CHECK
((
!
outputs
[
0
].
isSparseArg
()
&&
!
(
inputs
[
0
].
isSparseArg
()
&&
inputs
[
1
].
isSparseArg
()))
||
(
outputs
[
0
].
isSparseArg
()
&&
!
inputs
[
0
].
isSparseArg
()
&&
!
inputs
[
1
].
isSparseArg
()));
auto
outMat
=
outputs
[
0
].
matrix
<
Device
>
();
auto
outMat
=
outputs
[
0
].
matrix
<
Device
>
();
/// matrix = matrix * matrix
/// matrix = matrix * matrix
...
@@ -289,29 +278,40 @@ public:
...
@@ -289,29 +278,40 @@ public:
inputs
[
0
].
matrix
<
Device
>
(),
inputs
[
0
].
matrix
<
Device
>
(),
inputs
[
1
].
matrix
<
Device
>
(),
inputs
[
1
].
matrix
<
Device
>
(),
alpha_
,
alpha_
,
beta_
);
beta_
,
aTrans_
,
bTrans_
,
cTrans_
);
return
;
return
;
}
}
/// matrix = matrix * sparse matrix
/// matrix = matrix * sparse matrix
if
(
!
inputs
[
0
].
isSparseArg
()
&&
inputs
[
1
].
isSparseArg
()
&&
if
(
!
inputs
[
0
].
isSparseArg
()
&&
inputs
[
1
].
isSparseArg
()
&&
!
outputs
[
0
].
isSparseArg
())
{
!
outputs
[
0
].
isSparseArg
())
{
CHECK
(
!
aTrans_
)
<<
"Not supported a transpose"
;
MulOp
<
Device
>
(
outMat
,
MulOp
<
Device
>
(
outMat
,
inputs
[
0
].
matrix
<
Device
>
(),
inputs
[
0
].
matrix
<
Device
>
(),
inputs
[
1
].
sparse
().
SparseMatrix
<
Device
>
(),
inputs
[
1
].
sparse
().
SparseMatrix
<
Device
>
(),
alpha_
,
alpha_
,
beta_
);
beta_
,
aTrans_
,
bTrans_
,
cTrans_
);
return
;
return
;
}
}
/// matrix = sparse matrix * matrix
/// matrix = sparse matrix * matrix
if
(
inputs
[
0
].
isSparseArg
()
&&
!
inputs
[
1
].
isSparseArg
()
&&
if
(
inputs
[
0
].
isSparseArg
()
&&
!
inputs
[
1
].
isSparseArg
()
&&
!
outputs
[
0
].
isSparseArg
())
{
!
outputs
[
0
].
isSparseArg
())
{
CHECK
(
!
bTrans_
)
<<
"Not supported b transpose"
;
MulOp
<
Device
>
(
outMat
,
MulOp
<
Device
>
(
outMat
,
inputs
[
0
].
sparse
().
SparseMatrix
<
Device
>
(),
inputs
[
0
].
sparse
().
SparseMatrix
<
Device
>
(),
inputs
[
1
].
matrix
<
Device
>
(),
inputs
[
1
].
matrix
<
Device
>
(),
alpha_
,
alpha_
,
beta_
);
beta_
,
aTrans_
,
bTrans_
,
cTrans_
);
return
;
return
;
}
}
...
@@ -319,18 +319,14 @@ public:
...
@@ -319,18 +319,14 @@ public:
auto
outSparseMat
=
outputs
[
0
].
sparse
().
SparseMatrix
<
Device
>
();
auto
outSparseMat
=
outputs
[
0
].
sparse
().
SparseMatrix
<
Device
>
();
if
(
!
inputs
[
0
].
isSparseArg
()
&&
!
inputs
[
1
].
isSparseArg
()
&&
if
(
!
inputs
[
0
].
isSparseArg
()
&&
!
inputs
[
1
].
isSparseArg
()
&&
outputs
[
0
].
isSparseArg
())
{
outputs
[
0
].
isSparseArg
())
{
/*
LOG(INFO) << "input0";
inputs[0].matrix<Device>().print(std::cout);
LOG(INFO) << "input1";
inputs[1].matrix<Device>().print(std::cout);
LOG(INFO) << "output sparse matrix";
outSparseMat.print(std::cout); */
MulOp
<
Device
>
(
outSparseMat
,
MulOp
<
Device
>
(
outSparseMat
,
inputs
[
0
].
matrix
<
Device
>
(),
inputs
[
0
].
matrix
<
Device
>
(),
inputs
[
1
].
matrix
<
Device
>
(),
inputs
[
1
].
matrix
<
Device
>
(),
alpha_
,
alpha_
,
beta_
);
beta_
,
aTrans_
,
bTrans_
,
cTrans_
);
return
;
return
;
}
}
}
}
...
@@ -338,6 +334,9 @@ public:
...
@@ -338,6 +334,9 @@ public:
private:
private:
real
alpha_
;
real
alpha_
;
real
beta_
;
real
beta_
;
bool
aTrans_
;
bool
bTrans_
;
bool
cTrans_
;
};
};
REGISTER_TYPED_FUNC
(
MulOp
,
CPU
,
MulFunc
);
REGISTER_TYPED_FUNC
(
MulOp
,
CPU
,
MulFunc
);
...
...
paddle/function/MulOp.h
浏览文件 @
b3be7358
...
@@ -26,55 +26,79 @@ void MulOp(CpuMatrix& out,
...
@@ -26,55 +26,79 @@ void MulOp(CpuMatrix& out,
const
CpuMatrix
&
a
,
const
CpuMatrix
&
a
,
const
CpuMatrix
&
b
,
const
CpuMatrix
&
b
,
real
scaleAB
,
real
scaleAB
,
real
scaleT
);
real
scaleT
,
bool
aTrans
,
bool
bTrans
,
bool
cTrans
);
template
<
DeviceType
DType
>
template
<
DeviceType
DType
>
void
MulOp
(
CpuMatrix
&
out
,
void
MulOp
(
CpuMatrix
&
out
,
const
CpuSparseMatrix
&
a
,
const
CpuSparseMatrix
&
a
,
const
CpuMatrix
&
b
,
const
CpuMatrix
&
b
,
real
scaleAB
,
real
scaleAB
,
real
scaleT
);
real
scaleT
,
bool
aTrans
,
bool
bTrans
,
bool
cTrans
);
template
<
DeviceType
DType
>
template
<
DeviceType
DType
>
void
MulOp
(
CpuMatrix
&
out
,
void
MulOp
(
CpuMatrix
&
out
,
const
CpuMatrix
&
a
,
const
CpuMatrix
&
a
,
const
CpuSparseMatrix
&
b
,
const
CpuSparseMatrix
&
b
,
real
scaleAB
,
real
scaleAB
,
real
scaleT
);
real
scaleT
,
bool
aTrans
,
bool
bTrans
,
bool
cTrans
);
template
<
DeviceType
DType
>
template
<
DeviceType
DType
>
void
MulOp
(
CpuSparseMatrix
&
out
,
void
MulOp
(
CpuSparseMatrix
&
out
,
const
CpuMatrix
&
a
,
const
CpuMatrix
&
a
,
const
CpuMatrix
&
b
,
const
CpuMatrix
&
b
,
real
scaleAB
,
real
scaleAB
,
real
scaleT
);
real
scaleT
,
bool
aTrans
,
bool
bTrans
,
bool
cTrans
);
template
<
DeviceType
DType
>
template
<
DeviceType
DType
>
void
MulOp
(
GpuMatrix
&
out
,
void
MulOp
(
GpuMatrix
&
out
,
const
GpuMatrix
&
a
,
const
GpuMatrix
&
a
,
const
GpuMatrix
&
b
,
const
GpuMatrix
&
b
,
real
scaleAB
,
real
scaleAB
,
real
scaleT
);
real
scaleT
,
bool
aTrans
,
bool
bTrans
,
bool
cTrans
);
template
<
DeviceType
DType
>
template
<
DeviceType
DType
>
void
MulOp
(
GpuMatrix
&
out
,
void
MulOp
(
GpuMatrix
&
out
,
const
GpuSparseMatrix
&
a
,
const
GpuSparseMatrix
&
a
,
const
GpuMatrix
&
b
,
const
GpuMatrix
&
b
,
real
scaleAB
,
real
scaleAB
,
real
scaleT
);
real
scaleT
,
bool
aTrans
,
bool
bTrans
,
bool
cTrans
);
template
<
DeviceType
DType
>
template
<
DeviceType
DType
>
void
MulOp
(
GpuMatrix
&
out
,
void
MulOp
(
GpuMatrix
&
out
,
const
GpuMatrix
&
a
,
const
GpuMatrix
&
a
,
const
GpuSparseMatrix
&
b
,
const
GpuSparseMatrix
&
b
,
real
scaleAB
,
real
scaleAB
,
real
scaleT
);
real
scaleT
,
bool
aTrans
,
bool
bTrans
,
bool
cTrans
);
template
<
DeviceType
DType
>
template
<
DeviceType
DType
>
void
MulOp
(
GpuSparseMatrix
&
out
,
void
MulOp
(
GpuSparseMatrix
&
out
,
const
GpuMatrix
&
a
,
const
GpuMatrix
&
a
,
const
GpuMatrix
&
b
,
const
GpuMatrix
&
b
,
real
scaleAB
,
real
scaleAB
,
real
scaleT
);
real
scaleT
,
bool
aTrans
,
bool
bTrans
,
bool
cTrans
);
}
// namespace paddle
}
// namespace paddle
paddle/function/MulOpGpu.cu
浏览文件 @
b3be7358
...
@@ -27,38 +27,22 @@ void MulOp<DEVICE_TYPE_GPU>(GpuMatrix& out,
...
@@ -27,38 +27,22 @@ void MulOp<DEVICE_TYPE_GPU>(GpuMatrix& out,
const
GpuMatrix
&
a
,
const
GpuMatrix
&
a
,
const
GpuMatrix
&
b
,
const
GpuMatrix
&
b
,
real
scaleAB
,
real
scaleAB
,
real
scaleT
)
{
real
scaleT
,
CHECK
(
!
out
.
isTransposed
())
<<
"Transpose not supported for out matrix"
;
bool
aTrans
,
if
(
!
a
.
isTransposed
()
&&
!
b
.
isTransposed
())
{
bool
bTrans
,
/// a : M * K, b: K * N
bool
cTrans
)
{
CHECK
(
out
.
getWidth
()
==
b
.
getWidth
()
&&
CHECK
(
a
.
useGpu_
&&
b
.
useGpu_
)
<<
"matrix device type not match"
;
out
.
getHeight
()
==
a
.
getHeight
()
&&
a
.
getWidth
()
==
b
.
getHeight
());
}
else
if
(
a
.
isTransposed
()
&&
!
b
.
isTransposed
())
{
/// a : K * M, b : K * N
CHECK
(
out
.
getWidth
()
==
b
.
getWidth
()
&&
out
.
getHeight
()
==
a
.
getWidth
()
&&
a
.
getHeight
()
==
b
.
getHeight
());
}
else
if
(
!
a
.
isTransposed
()
&&
b
.
isTransposed
())
{
/// a: M * K, b : N * K
CHECK
(
out
.
getWidth
()
==
b
.
getHeight
()
&&
out
.
getHeight
()
==
a
.
getHeight
()
&&
a
.
getWidth
()
==
b
.
getWidth
());
}
else
{
LOG
(
FATAL
)
<<
"Not support for both a and b are Transposed Matrices"
;
}
real
*
aData
=
const_cast
<
real
*>
(
a
.
getData
());
real
*
aData
=
const_cast
<
real
*>
(
a
.
getData
());
real
*
bData
=
const_cast
<
real
*>
(
b
.
getData
());
real
*
bData
=
const_cast
<
real
*>
(
b
.
getData
());
real
*
outData
=
const_cast
<
real
*>
(
out
.
getData
());
real
*
outData
=
const_cast
<
real
*>
(
out
.
getData
());
hl_matrix_mul
(
aData
,
hl_matrix_mul
(
aData
,
!
a
.
isTransposed
()
?
HPPL_OP_N
:
HPPL_OP_T
,
!
a
Trans
?
HPPL_OP_N
:
HPPL_OP_T
,
bData
,
bData
,
!
b
.
isTransposed
()
?
HPPL_OP_N
:
HPPL_OP_T
,
!
b
Trans
?
HPPL_OP_N
:
HPPL_OP_T
,
outData
,
outData
,
out
.
getHeight
(),
out
.
getHeight
(),
out
.
getWidth
(),
out
.
getWidth
(),
!
a
.
isTransposed
()
?
a
.
getWidth
()
:
a
.
getHeight
(),
!
a
Trans
?
a
.
getWidth
()
:
a
.
getHeight
(),
scaleAB
,
scaleAB
,
scaleT
,
scaleT
,
a
.
getStride
(),
a
.
getStride
(),
...
@@ -75,27 +59,19 @@ void MulOp<DEVICE_TYPE_GPU>(GpuMatrix& out,
...
@@ -75,27 +59,19 @@ void MulOp<DEVICE_TYPE_GPU>(GpuMatrix& out,
const
GpuSparseMatrix
&
a
,
const
GpuSparseMatrix
&
a
,
const
GpuMatrix
&
b
,
const
GpuMatrix
&
b
,
real
scaleAB
,
real
scaleAB
,
real
scaleT
)
{
real
scaleT
,
bool
aTrans
,
bool
bTrans
,
bool
cTrans
)
{
CHECK
(
out
.
isContiguous
());
CHECK
(
out
.
isContiguous
());
CHECK
(
b
.
isContiguous
());
CHECK
(
b
.
isContiguous
());
CHECK
(
b
.
useGpu_
)
<<
"Matrix type are not equal"
;
CHECK
(
a
.
useGpu_
&&
b
.
useGpu_
)
<<
"matrix device type not match"
;
CHECK
(
!
out
.
isTransposed
()
&&
!
b
.
isTransposed
())
<<
"not supported"
;
if
(
!
a
.
isTransposed
())
{
/// a: M * K, b: K * N
CHECK
(
out
.
getWidth
()
==
b
.
getWidth
()
&&
out
.
getHeight
()
==
a
.
getHeight
()
&&
a
.
getWidth
()
==
b
.
getHeight
())
<<
"Matrix dimensions are not equal"
;
}
else
{
/// a: K * M, transpose, b: K * N
CHECK
(
out
.
getWidth
()
==
b
.
getWidth
()
&&
out
.
getHeight
()
==
a
.
getWidth
()
&&
a
.
getHeight
()
==
b
.
getHeight
())
<<
"Matrix dimensions are not equal"
;
}
hl_trans_op_t
aTrans
=
a
.
isTransposed
()
?
HPPL_OP_T
:
HPPL_OP_N
;
hl_sparse_matrix_s
aData
=
a
.
sMatrix_
.
get
();
hl_sparse_matrix_s
aData
=
a
.
sMatrix_
.
get
();
real
*
bData
=
const_cast
<
real
*>
(
b
.
getData
());
real
*
bData
=
const_cast
<
real
*>
(
b
.
getData
());
real
*
outData
=
const_cast
<
real
*>
(
out
.
getData
());
real
*
outData
=
const_cast
<
real
*>
(
out
.
getData
());
hl_matrix_csr_mul_dense
(
aData
,
hl_matrix_csr_mul_dense
(
aData
,
aTrans
,
aTrans
?
HPPL_OP_T
:
HPPL_OP_N
,
bData
,
bData
,
HPPL_OP_N
,
HPPL_OP_N
,
outData
,
outData
,
...
@@ -115,25 +91,14 @@ void MulOp<DEVICE_TYPE_GPU>(GpuMatrix& out,
...
@@ -115,25 +91,14 @@ void MulOp<DEVICE_TYPE_GPU>(GpuMatrix& out,
const
GpuMatrix
&
a
,
const
GpuMatrix
&
a
,
const
GpuSparseMatrix
&
b
,
const
GpuSparseMatrix
&
b
,
real
scaleAB
,
real
scaleAB
,
real
scaleT
)
{
real
scaleT
,
bool
aTrans
,
bool
bTrans
,
bool
cTrans
)
{
CHECK
(
out
.
isContiguous
());
CHECK
(
out
.
isContiguous
());
CHECK
(
a
.
isContiguous
());
CHECK
(
a
.
isContiguous
());
CHECK
(
a
.
useGpu_
)
<<
"Matrix type are not equal"
;
CHECK
(
a
.
useGpu_
&&
b
.
useGpu_
)
<<
"matrix device type not match"
;
if
(
!
b
.
isTransposed
())
{
/// a : M * K, b : K * N
CHECK
(
out
.
getWidth
()
==
b
.
getWidth
()
&&
out
.
getHeight
()
==
a
.
getHeight
()
&&
a
.
getWidth
()
==
b
.
getHeight
())
<<
"Matrix dimensions are not equal"
;
}
else
{
/// a : M * K, b : N * K, transpose
CHECK
(
out
.
getWidth
()
==
b
.
getHeight
()
&&
out
.
getHeight
()
==
a
.
getHeight
()
&&
a
.
getWidth
()
==
b
.
getWidth
())
<<
"Matrix dimensions are not equal"
;
}
hl_trans_op_t
bTrans
=
b
.
isTransposed
()
?
HPPL_OP_T
:
HPPL_OP_N
;
hl_sparse_matrix_s
bData
=
b
.
sMatrix_
.
get
();
hl_sparse_matrix_s
bData
=
b
.
sMatrix_
.
get
();
real
*
aData
=
const_cast
<
real
*>
(
a
.
getData
());
real
*
aData
=
const_cast
<
real
*>
(
a
.
getData
());
real
*
outData
=
const_cast
<
real
*>
(
out
.
getData
());
real
*
outData
=
const_cast
<
real
*>
(
out
.
getData
());
...
@@ -142,7 +107,7 @@ void MulOp<DEVICE_TYPE_GPU>(GpuMatrix& out,
...
@@ -142,7 +107,7 @@ void MulOp<DEVICE_TYPE_GPU>(GpuMatrix& out,
hl_matrix_dense_mul_csc
(
aData
,
hl_matrix_dense_mul_csc
(
aData
,
HPPL_OP_N
,
HPPL_OP_N
,
bData
,
bData
,
bTrans
,
bTrans
?
HPPL_OP_T
:
HPPL_OP_N
,
outData
,
outData
,
out
.
getHeight
(),
out
.
getHeight
(),
out
.
getWidth
(),
out
.
getWidth
(),
...
@@ -153,7 +118,7 @@ void MulOp<DEVICE_TYPE_GPU>(GpuMatrix& out,
...
@@ -153,7 +118,7 @@ void MulOp<DEVICE_TYPE_GPU>(GpuMatrix& out,
hl_matrix_dense_mul_csr
(
aData
,
hl_matrix_dense_mul_csr
(
aData
,
HPPL_OP_N
,
HPPL_OP_N
,
bData
,
bData
,
bTrans
,
bTrans
?
HPPL_OP_T
:
HPPL_OP_N
,
outData
,
outData
,
out
.
getHeight
(),
out
.
getHeight
(),
out
.
getWidth
(),
out
.
getWidth
(),
...
@@ -168,35 +133,26 @@ void MulOp<DEVICE_TYPE_GPU>(GpuSparseMatrix& out,
...
@@ -168,35 +133,26 @@ void MulOp<DEVICE_TYPE_GPU>(GpuSparseMatrix& out,
const
GpuMatrix
&
a
,
const
GpuMatrix
&
a
,
const
GpuMatrix
&
b
,
const
GpuMatrix
&
b
,
real
scaleAB
,
real
scaleAB
,
real
scaleT
)
{
real
scaleT
,
bool
aTrans
,
bool
bTrans
,
bool
cTrans
)
{
CHECK
(
a
.
useGpu_
&&
b
.
useGpu_
)
<<
"matrix device type not match"
;
CHECK
(
a
.
useGpu_
&&
b
.
useGpu_
)
<<
"matrix device type not match"
;
CHECK
(
!
out
.
isTransposed
())
<<
"Transpose is not supported for out matrix"
;
if
(
!
a
.
isTransposed
()
&&
!
b
.
isTransposed
())
{
CHECK
(
out
.
getHeight
()
==
a
.
getHeight
()
&&
out
.
getWidth
()
==
b
.
getWidth
()
&&
a
.
getWidth
()
==
b
.
getHeight
());
}
else
if
(
a
.
isTransposed
()
&&
!
b
.
isTransposed
())
{
CHECK
(
out
.
getHeight
()
==
a
.
getWidth
()
&&
out
.
getWidth
()
==
b
.
getWidth
()
&&
a
.
getHeight
()
==
b
.
getHeight
());
}
else
if
(
!
a
.
isTransposed
()
&&
b
.
isTransposed
())
{
CHECK
(
out
.
getHeight
()
==
a
.
getHeight
()
&&
out
.
getWidth
()
==
b
.
getHeight
()
&&
a
.
getWidth
()
==
b
.
getWidth
());
}
else
{
LOG
(
FATAL
)
<<
"Not support for both a and b are Transposed Matrices"
;
}
hl_trans_op_t
aTrans
=
a
.
isTransposed
()
?
HPPL_OP_T
:
HPPL_OP_N
;
hl_trans_op_t
bTrans
=
b
.
isTransposed
()
?
HPPL_OP_T
:
HPPL_OP_N
;
int
dimK
=
!
b
.
isTransposed
()
?
b
.
getHeight
()
:
b
.
getWidth
();
real
*
aData
=
const_cast
<
real
*>
(
a
.
getData
());
real
*
aData
=
const_cast
<
real
*>
(
a
.
getData
());
real
*
bData
=
const_cast
<
real
*>
(
b
.
getData
());
real
*
bData
=
const_cast
<
real
*>
(
b
.
getData
());
hl_sparse_matrix_s
outData
=
out
.
sMatrix_
.
get
();
hl_sparse_matrix_s
outData
=
out
.
sMatrix_
.
get
();
hl_sparse_matrix_mul
(
aData
,
aTrans
,
bData
,
bTrans
,
outData
,
hl_sparse_matrix_mul
(
aData
,
out
.
getHeight
(),
out
.
getWidth
(),
dimK
,
scaleAB
,
scaleT
);
aTrans
?
HPPL_OP_T
:
HPPL_OP_N
,
bData
,
bTrans
?
HPPL_OP_T
:
HPPL_OP_N
,
outData
,
out
.
getHeight
(),
out
.
getWidth
(),
!
bTrans
?
b
.
getHeight
()
:
b
.
getWidth
(),
scaleAB
,
scaleT
);
}
}
}
// namespace paddle
}
// namespace paddle
paddle/function/MulOpTest.cpp
浏览文件 @
b3be7358
...
@@ -39,18 +39,21 @@ void testFuncDDDMatrix(
...
@@ -39,18 +39,21 @@ void testFuncDDDMatrix(
size_t
widthC
=
dimN
;
size_t
widthC
=
dimN
;
// init Test object
// init Test object
FunctionCompare
test
(
"MulOp"
,
FunctionCompare
test
(
"MulOp"
,
FuncConfig
().
set
(
"scaleAB"
,
alpha
).
set
(
"scaleT"
,
beta
));
FuncConfig
()
.
set
(
"scaleAB"
,
alpha
)
.
set
(
"scaleT"
,
beta
)
.
set
(
"aTrans"
,
transa
)
.
set
(
"bTrans"
,
transb
)
.
set
(
"cTrans"
,
false
));
// prepare input arguments
// prepare input arguments
/// matrix A : HA * WA
/// matrix A : HA * WA
test
.
addInputs
(
BufferArg
(
test
.
addInputs
(
BufferArg
(
VALUE_TYPE_FLOAT
,
TensorShape
{
heightA
,
widthA
}));
VALUE_TYPE_FLOAT
,
TensorShape
{
heightA
,
widthA
},
UNSPECIFIED
,
transa
));
/// matrix B: HB * WB
/// matrix B: HB * WB
test
.
addInputs
(
BufferArg
(
test
.
addInputs
(
BufferArg
(
VALUE_TYPE_FLOAT
,
TensorShape
{
heightB
,
widthB
}));
VALUE_TYPE_FLOAT
,
TensorShape
{
heightB
,
widthB
},
UNSPECIFIED
,
transb
));
/// output matrix C: HC * WC
/// output matrix C: HC * WC
test
.
addOutputs
(
BufferArg
(
VALUE_TYPE_FLOAT
,
TensorShape
{
heightC
,
widthC
}),
test
.
addOutputs
(
BufferArg
(
VALUE_TYPE_FLOAT
,
TensorShape
{
heightC
,
widthC
}),
ADD
_TO
);
beta
==
1.0
?
ADD_TO
:
ASSIGN
_TO
);
// run Function
// run Function
test
.
run
();
test
.
run
();
}
}
...
@@ -88,21 +91,22 @@ void testFuncDSparseDMatrix(
...
@@ -88,21 +91,22 @@ void testFuncDSparseDMatrix(
real
beta
=
1.0
;
real
beta
=
1.0
;
// init Test object
// init Test object
FunctionCompare
test
(
"MulOp"
,
FunctionCompare
test
(
"MulOp"
,
FuncConfig
().
set
(
"scaleAB"
,
alpha
).
set
(
"scaleT"
,
beta
));
FuncConfig
()
.
set
(
"scaleAB"
,
alpha
)
.
set
(
"scaleT"
,
beta
)
.
set
(
"aTrans"
,
false
)
.
set
(
"bTrans"
,
false
)
.
set
(
"cTrans"
,
false
));
// prepare input arguments
// prepare input arguments
/// sparse matrix A : M * K
/// sparse matrix A : M * K
test
.
addInputs
(
SparseMatrixArg
(
VALUE_TYPE_FLOAT
,
test
.
addInputs
(
SparseMatrixArg
(
TensorShape
{
dimM
,
dimK
},
VALUE_TYPE_FLOAT
,
TensorShape
{
dimM
,
dimK
},
nnz
,
FORMAT
,
FLOAT_VALUE
));
nnz
,
FORMAT
,
FLOAT_VALUE
,
UNSPECIFIED
,
false
));
/// matrix B: K * N
/// matrix B: K * N
test
.
addInputs
(
BufferArg
(
VALUE_TYPE_FLOAT
,
TensorShape
{
dimK
,
dimN
}));
test
.
addInputs
(
BufferArg
(
VALUE_TYPE_FLOAT
,
TensorShape
{
dimK
,
dimN
}));
/// output matrix C: M * N
/// output matrix C: M * N
test
.
addOutputs
(
BufferArg
(
VALUE_TYPE_FLOAT
,
TensorShape
{
dimM
,
dimN
}),
ADD_TO
);
test
.
addOutputs
(
BufferArg
(
VALUE_TYPE_FLOAT
,
TensorShape
{
dimM
,
dimN
}),
beta
==
1.0
?
ADD_TO
:
ASSIGN_TO
);
// run Function
// run Function
test
.
run
();
test
.
run
();
}
}
...
@@ -138,22 +142,23 @@ void testFuncDDSparseMatrix(
...
@@ -138,22 +142,23 @@ void testFuncDDSparseMatrix(
real
beta
=
1.0
;
real
beta
=
1.0
;
// init Test object
// init Test object
FunctionCompare
test
(
"MulOp"
,
FunctionCompare
test
(
"MulOp"
,
FuncConfig
().
set
(
"scaleAB"
,
alpha
).
set
(
"scaleT"
,
beta
));
FuncConfig
()
.
set
(
"scaleAB"
,
alpha
)
.
set
(
"scaleT"
,
beta
)
.
set
(
"aTrans"
,
false
)
.
set
(
"bTrans"
,
false
)
.
set
(
"cTrans"
,
false
));
// prepare input arguments
// prepare input arguments
/// matrix A : M * K
/// matrix A : M * K
test
.
addInputs
(
BufferArg
(
VALUE_TYPE_FLOAT
,
TensorShape
{
dimM
,
dimK
}));
test
.
addInputs
(
BufferArg
(
VALUE_TYPE_FLOAT
,
TensorShape
{
dimM
,
dimK
}));
/// matrix B: K * N
/// matrix B: K * N
test
.
addInputs
(
SparseMatrixArg
(
VALUE_TYPE_FLOAT
,
test
.
addInputs
(
SparseMatrixArg
(
TensorShape
{
dimK
,
dimN
},
VALUE_TYPE_FLOAT
,
TensorShape
{
dimK
,
dimN
},
nnz
,
FORMAT
,
FLOAT_VALUE
));
nnz
,
FORMAT
,
FLOAT_VALUE
,
UNSPECIFIED
,
false
));
/// output matrix C: M * N
/// output matrix C: M * N
test
.
addOutputs
(
BufferArg
(
VALUE_TYPE_FLOAT
,
TensorShape
{
dimM
,
dimN
}),
ADD_TO
);
test
.
addOutputs
(
BufferArg
(
VALUE_TYPE_FLOAT
,
TensorShape
{
dimM
,
dimN
}),
beta
==
1.0
?
ADD_TO
:
ASSIGN_TO
);
// run Function
// run Function
test
.
run
();
test
.
run
();
}
}
...
@@ -189,7 +194,12 @@ void testFuncSparseDDMatrix(
...
@@ -189,7 +194,12 @@ void testFuncSparseDDMatrix(
real
beta
=
1.0
;
real
beta
=
1.0
;
// init Test object
// init Test object
FunctionCompare
test
(
"MulOp"
,
FunctionCompare
test
(
"MulOp"
,
FuncConfig
().
set
(
"scaleAB"
,
alpha
).
set
(
"scaleT"
,
beta
));
FuncConfig
()
.
set
(
"scaleAB"
,
alpha
)
.
set
(
"scaleT"
,
beta
)
.
set
(
"aTrans"
,
false
)
.
set
(
"bTrans"
,
false
)
.
set
(
"cTrans"
,
false
));
// prepare input arguments
// prepare input arguments
/// matrix A : M * K
/// matrix A : M * K
test
.
addInputs
(
BufferArg
(
VALUE_TYPE_FLOAT
,
TensorShape
{
dimM
,
dimK
}));
test
.
addInputs
(
BufferArg
(
VALUE_TYPE_FLOAT
,
TensorShape
{
dimM
,
dimK
}));
...
@@ -198,14 +208,10 @@ void testFuncSparseDDMatrix(
...
@@ -198,14 +208,10 @@ void testFuncSparseDDMatrix(
test
.
addInputs
(
BufferArg
(
VALUE_TYPE_FLOAT
,
TensorShape
{
dimK
,
dimN
}));
test
.
addInputs
(
BufferArg
(
VALUE_TYPE_FLOAT
,
TensorShape
{
dimK
,
dimN
}));
/// output sparse matrix C: M * N
/// output sparse matrix C: M * N
test
.
addOutputs
(
SparseMatrixArg
(
VALUE_TYPE_FLOAT
,
test
.
addOutputs
(
TensorShape
{
dimM
,
dimN
},
SparseMatrixArg
(
nnz
,
VALUE_TYPE_FLOAT
,
TensorShape
{
dimM
,
dimN
},
nnz
,
FORMAT
,
FLOAT_VALUE
),
FORMAT
,
beta
==
1.0
?
ADD_TO
:
ASSIGN_TO
);
FLOAT_VALUE
,
UNSPECIFIED
,
false
),
ADD_TO
);
// run Function
// run Function
test
.
run
();
test
.
run
();
}
}
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录