Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
Paddle
提交
c4dd596d
P
Paddle
项目概览
PaddlePaddle
/
Paddle
大约 1 年 前同步成功
通知
2298
Star
20931
Fork
5422
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1423
列表
看板
标记
里程碑
合并请求
543
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1,423
Issue
1,423
列表
看板
标记
里程碑
合并请求
543
合并请求
543
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
c4dd596d
编写于
5月 15, 2020
作者:
W
wangchaochaohu
提交者:
GitHub
5月 15, 2020
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
cusum op optimization for GPU kernel (#24321)
上级
d43e4047
变更
2
隐藏空白更改
内联
并排
Showing
2 changed file
with
419 addition
and
12 deletion
+419
-12
paddle/fluid/operators/cumsum_op.cu
paddle/fluid/operators/cumsum_op.cu
+329
-6
python/paddle/fluid/tests/unittests/test_cumsum_op.py
python/paddle/fluid/tests/unittests/test_cumsum_op.py
+90
-6
未找到文件。
paddle/fluid/operators/cumsum_op.cu
浏览文件 @
c4dd596d
...
...
@@ -13,11 +13,334 @@ See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/operators/cum_op.h"
#include "paddle/fluid/platform/gpu_launch_param_config.h"
namespace
ops
=
paddle
::
operators
;
using
CUDA
=
paddle
::
platform
::
CUDADeviceContext
;
using
Tensor
=
paddle
::
framework
::
Tensor
;
using
LoDTensor
=
paddle
::
framework
::
LoDTensor
;
namespace
paddle
{
namespace
operators
{
template
<
typename
T
>
__global__
void
OuterScan
(
const
T
*
in
,
T
*
out
,
int
inner_dim_size
,
int
outer_dim_size
,
int
scan_dim_size
,
bool
exclusive
,
bool
reverse
)
{
int
id
=
blockIdx
.
y
*
blockDim
.
x
+
threadIdx
.
x
;
for
(
int
outer_index
=
blockIdx
.
x
;
outer_index
<
outer_dim_size
;
outer_index
+=
gridDim
.
x
)
{
for
(
int
inner_index
=
blockIdx
.
y
*
blockDim
.
x
+
threadIdx
.
x
;
inner_index
<
inner_dim_size
;
inner_index
+=
gridDim
.
y
*
blockDim
.
x
)
{
int
scan_index_init
=
0
;
int
forward_direction
=
1
;
int
src_index
=
outer_index
*
scan_dim_size
*
inner_dim_size
+
inner_index
;
int
dst_index
=
outer_index
*
scan_dim_size
*
inner_dim_size
+
inner_index
;
if
(
reverse
)
{
src_index
=
src_index
+
(
scan_dim_size
-
1
)
*
inner_dim_size
;
dst_index
=
dst_index
+
(
scan_dim_size
-
1
)
*
inner_dim_size
;
forward_direction
=
-
1
;
}
if
(
exclusive
)
{
scan_index_init
=
1
;
out
[
dst_index
]
=
0
;
dst_index
=
dst_index
+
(
forward_direction
*
inner_dim_size
);
}
T
acc
=
0
;
for
(
int
scan_index
=
scan_index_init
;
scan_index
<
scan_dim_size
;
++
scan_index
)
{
acc
=
in
[
src_index
]
+
acc
;
out
[
dst_index
]
=
acc
;
src_index
+=
(
forward_direction
*
inner_dim_size
);
dst_index
+=
(
forward_direction
*
inner_dim_size
);
}
}
}
}
// inclusive scan
template
<
typename
T
,
int
num_threads_x
,
int
num_threads_y
>
__global__
void
InnerMostDimInclusiveScan
(
const
T
*
in
,
T
*
out
,
int
inner_dim_size
,
int
outer_dim_size
,
int
scan_dim_size
,
bool
reverse
)
{
__shared__
T
share_data
[
num_threads_y
][
num_threads_x
*
2
];
T
*
share_row
=
share_data
[
threadIdx
.
y
];
int
forward_direction
=
1
;
if
(
reverse
)
forward_direction
=
-
1
;
for
(
int
block_row
=
blockIdx
.
x
*
blockDim
.
y
;
block_row
<
outer_dim_size
;
block_row
+=
blockDim
.
y
*
gridDim
.
x
)
{
int
row
=
block_row
+
threadIdx
.
y
;
T
acc
=
0
;
const
T
*
row_src
=
in
+
row
*
scan_dim_size
;
T
*
row_dst
=
out
+
row
*
scan_dim_size
;
int
block_col
=
0
;
bool
loop_condition
=
(
block_col
<
scan_dim_size
);
if
(
reverse
)
{
loop_condition
=
(
block_col
>=
0
);
block_col
=
scan_dim_size
-
1
;
}
while
(
loop_condition
)
{
// Load data into share memory(two value per thread)
int
col1
=
block_col
+
threadIdx
.
x
*
forward_direction
;
int
col2
=
block_col
+
(
num_threads_x
+
threadIdx
.
x
)
*
forward_direction
;
if
(
row
<
outer_dim_size
)
{
if
(
col1
<
scan_dim_size
&&
col1
>=
0
)
{
share_row
[
threadIdx
.
x
]
=
row_src
[
col1
];
}
else
{
share_row
[
threadIdx
.
x
]
=
0
;
}
if
(
col2
<
scan_dim_size
&&
col2
>=
0
)
{
share_row
[
num_threads_x
+
threadIdx
.
x
]
=
row_src
[
col2
];
}
else
{
share_row
[
num_threads_x
+
threadIdx
.
x
]
=
0
;
}
// Add the previous block acc to the result
if
(
threadIdx
.
x
==
0
)
{
share_row
[
0
]
=
share_row
[
0
]
+
acc
;
}
}
__syncthreads
();
// Up-Sweep
for
(
unsigned
s
=
num_threads_x
,
d
=
1
;
s
>=
1
;
s
>>=
1
,
d
<<=
1
)
{
if
(
row
<
outer_dim_size
&&
threadIdx
.
x
<
s
)
{
unsigned
offset
=
(
2
*
threadIdx
.
x
+
1
)
*
d
-
1
;
share_row
[
offset
+
d
]
=
share_row
[
offset
]
+
share_row
[
offset
+
d
];
}
__syncthreads
();
}
// Down-Sweep
for
(
unsigned
s
=
2
,
d
=
blockDim
.
x
/
2
;
d
>=
1
;
s
<<=
1
,
d
>>=
1
)
{
if
(
row
<
outer_dim_size
&&
threadIdx
.
x
<
s
-
1
)
{
unsigned
offset
=
2
*
(
threadIdx
.
x
+
1
)
*
d
-
1
;
share_row
[
offset
+
d
]
=
share_row
[
offset
]
+
share_row
[
offset
+
d
];
}
__syncthreads
();
}
// Write to the output
if
(
row
<
outer_dim_size
)
{
if
(
col1
<
scan_dim_size
&&
col1
>=
0
)
row_dst
[
col1
]
=
share_row
[
threadIdx
.
x
];
if
(
col2
<
scan_dim_size
&&
col2
>=
0
)
row_dst
[
col2
]
=
share_row
[
num_threads_x
+
threadIdx
.
x
];
}
acc
=
share_row
[
2
*
num_threads_x
-
1
];
__syncthreads
();
block_col
+=
2
*
num_threads_x
*
forward_direction
;
if
(
reverse
)
loop_condition
=
(
block_col
>=
0
);
else
loop_condition
=
(
block_col
<
scan_dim_size
);
}
}
}
// exclusive block scan and store block sum for large scan
template
<
typename
T
>
__global__
void
InnerMostDimExclusiveScan
(
const
T
*
in
,
T
*
out
,
T
*
sum_data
,
int
inner_dim_size
,
int
outer_dim_size
,
int
scan_dim_size
,
int
two_power
,
bool
reverse
)
{
// https://stackoverflow.com/questions/27570552/templated-cuda-kernel-with-dynamic-shared-memory
extern
__shared__
__align__
(
sizeof
(
T
))
unsigned
char
raw_tmp
[];
T
*
share_tmp
=
reinterpret_cast
<
T
*>
(
raw_tmp
);
int
thread_id
=
threadIdx
.
x
;
int
block_id
=
blockIdx
.
x
;
int
block_scan_size
=
blockDim
.
x
*
2
;
int
remain
=
scan_dim_size
%
(
2
*
blockDim
.
x
);
if
(
block_id
==
gridDim
.
x
-
1
&&
remain
!=
0
)
block_scan_size
=
remain
;
int
col1
=
thread_id
;
int
col2
=
thread_id
+
(
block_scan_size
)
/
2
;
int
index1
=
blockIdx
.
y
*
(
scan_dim_size
)
+
block_id
*
blockDim
.
x
*
2
+
col1
;
int
index2
=
blockIdx
.
y
*
(
scan_dim_size
)
+
block_id
*
blockDim
.
x
*
2
+
col2
;
if
(
reverse
)
{
index1
=
blockIdx
.
y
*
(
scan_dim_size
)
+
scan_dim_size
-
1
-
(
block_id
*
blockDim
.
x
*
2
+
col1
);
index2
=
blockIdx
.
y
*
(
scan_dim_size
)
+
scan_dim_size
-
1
-
(
block_id
*
blockDim
.
x
*
2
+
col2
);
}
int
sum_index
=
blockIdx
.
y
*
gridDim
.
x
+
block_id
;
if
(
thread_id
<
block_scan_size
)
{
share_tmp
[
col1
+
(
col1
>>
5
)]
=
in
[
index1
];
share_tmp
[
col2
+
(
col2
>>
5
)]
=
in
[
index2
];
}
else
{
share_tmp
[
col1
+
(
col1
>>
5
)]
=
0
;
share_tmp
[
col2
+
(
col2
>>
5
)]
=
0
;
}
// Up-Sweep
int
offset
=
1
;
for
(
int
d
=
(
two_power
/
2
);
d
>
0
;
d
>>=
1
)
{
__syncthreads
();
if
(
thread_id
<
d
)
{
int
tmp_index1
=
offset
*
(
2
*
thread_id
+
1
)
-
1
;
int
tmp_index2
=
offset
*
(
2
*
thread_id
+
2
)
-
1
;
tmp_index1
=
tmp_index1
+
(
tmp_index1
>>
5
);
tmp_index2
=
tmp_index2
+
(
tmp_index2
>>
5
);
share_tmp
[
tmp_index2
]
+=
share_tmp
[
tmp_index1
];
}
offset
*=
2
;
}
__syncthreads
();
if
(
thread_id
==
0
)
{
int
tmp_index
=
(
two_power
-
1
)
+
((
two_power
-
1
)
>>
5
);
sum_data
[
sum_index
]
=
share_tmp
[
tmp_index
];
share_tmp
[
tmp_index
]
=
0
;
}
REGISTER_OP_CUDA_KERNEL
(
cumsum
,
ops
::
CumKernel
<
CUDA
,
ops
::
CumsumFunctor
<
float
>>
,
ops
::
CumKernel
<
CUDA
,
ops
::
CumsumFunctor
<
double
>>
,
ops
::
CumKernel
<
CUDA
,
ops
::
CumsumFunctor
<
int
>>
,
ops
::
CumKernel
<
CUDA
,
ops
::
CumsumFunctor
<
int64_t
>>
);
// Down Sweep
for
(
int
d
=
1
;
d
<
two_power
;
d
*=
2
)
{
offset
>>=
1
;
__syncthreads
();
if
(
thread_id
<
d
)
{
int
tmp_index1
=
offset
*
(
2
*
thread_id
+
1
)
-
1
;
int
tmp_index2
=
offset
*
(
2
*
thread_id
+
2
)
-
1
;
tmp_index1
=
tmp_index1
+
(
tmp_index1
>>
5
);
tmp_index2
=
tmp_index2
+
(
tmp_index2
>>
5
);
T
tmp
=
share_tmp
[
tmp_index1
];
share_tmp
[
tmp_index1
]
=
share_tmp
[
tmp_index2
];
share_tmp
[
tmp_index2
]
+=
tmp
;
}
}
__syncthreads
();
if
(
col1
<
block_scan_size
)
out
[
index1
]
=
share_tmp
[
col1
+
(
col1
>>
5
)];
if
(
col2
<
block_scan_size
)
out
[
index2
]
=
share_tmp
[
col2
+
(
col2
>>
5
)];
}
// for large scan_dim_size array we need to add for correct result
template
<
typename
T
>
__global__
void
AddBlockScan
(
T
*
result
,
T
*
sum
,
int
size
,
int
scan_dim_size
,
int
sum_size
,
bool
reverse
)
{
int
idx
=
threadIdx
.
x
+
blockDim
.
x
*
(
blockIdx
.
x
+
blockIdx
.
y
*
gridDim
.
x
);
int
block_id_start
=
blockIdx
.
y
*
sum_size
;
int
block_id_end
=
blockIdx
.
x
+
blockIdx
.
y
*
sum_size
;
int
block_id
=
blockIdx
.
x
;
int
thread_id
=
threadIdx
.
x
;
int
col
=
block_id
*
blockDim
.
x
+
thread_id
+
size
;
int
index
=
blockIdx
.
y
*
(
scan_dim_size
)
+
col
;
if
(
reverse
)
{
index
=
blockIdx
.
y
*
(
scan_dim_size
)
+
scan_dim_size
-
1
-
col
;
}
if
(
col
>=
scan_dim_size
||
col
<
0
)
return
;
for
(
int
i
=
block_id_start
;
i
<=
block_id_end
;
i
++
)
{
result
[
index
]
+=
sum
[
i
];
}
}
template
<
typename
DeviceContext
,
typename
T
>
class
CumCUDAKernel
:
public
framework
::
OpKernel
<
T
>
{
public:
void
Compute
(
const
framework
::
ExecutionContext
&
context
)
const
override
{
auto
*
in
=
context
.
Input
<
framework
::
Tensor
>
(
"X"
);
auto
*
out
=
context
.
Output
<
framework
::
Tensor
>
(
"Out"
);
int
axis
=
context
.
Attr
<
int
>
(
"axis"
);
bool
exclusive
=
context
.
Attr
<
bool
>
(
"exclusive"
);
bool
reverse
=
context
.
Attr
<
bool
>
(
"reverse"
);
auto
in_dims
=
in
->
dims
();
auto
size
=
in
->
numel
();
if
(
axis
==
-
1
)
{
axis
=
in_dims
.
size
()
-
1
;
}
PADDLE_ENFORCE_LT
(
axis
,
in_dims
.
size
(),
platform
::
errors
::
InvalidArgument
(
"axis(%d) should be less than the "
"dimension(%d) of the input tensor."
,
axis
,
in_dims
.
size
()));
int
scan_dim_size
=
in_dims
[
axis
];
bool
optimize_condition
=
(
axis
==
(
in_dims
.
size
()
-
1
))
?
true
:
false
;
int
outer_dim_size
=
1
;
int
inner_dim_size
=
1
;
// treat all dim index < axis as outer_dim_size
for
(
size_t
i
=
0
;
i
<
axis
;
i
++
)
{
outer_dim_size
*=
in_dims
[
i
];
}
// treat all dim index > axis as innner_dim_size
for
(
size_t
i
=
axis
+
1
;
i
<
in_dims
.
size
();
i
++
)
{
inner_dim_size
*=
in_dims
[
i
];
}
T
*
out_data
=
out
->
mutable_data
<
T
>
(
context
.
GetPlace
());
const
T
*
in_data
=
in
->
data
<
T
>
();
auto
&
dev_ctx
=
context
.
template
device_context
<
DeviceContext
>();
if
(
optimize_condition
)
{
auto
nextPowerOfTwo
=
[](
int
x
)
->
int
{
int
ret
=
1
;
while
(
ret
<
x
)
ret
=
ret
*
2
;
return
ret
;
};
if
(
exclusive
)
{
int
element_per_block
=
nextPowerOfTwo
(
scan_dim_size
)
/
2
;
if
(
element_per_block
>
512
||
element_per_block
<
32
)
{
element_per_block
=
64
;
}
int
two_power
=
element_per_block
*
2
;
dim3
block
(
element_per_block
);
dim3
grid
(((
scan_dim_size
+
1
)
/
2
+
block
.
x
-
1
)
/
block
.
x
,
outer_dim_size
);
int
offset_size
=
(
element_per_block
*
2
)
>>
5
;
int
share_mem_size
=
(
element_per_block
*
2
+
offset_size
)
*
sizeof
(
T
);
Tensor
scan_sum
;
paddle
::
framework
::
DDim
dims
{
((
scan_dim_size
+
1
)
/
2
+
block
.
x
-
1
)
/
block
.
x
,
outer_dim_size
};
scan_sum
.
Resize
(
dims
);
T
*
sum_data
=
scan_sum
.
mutable_data
<
T
>
(
context
.
GetPlace
());
InnerMostDimExclusiveScan
<
T
><<<
grid
,
block
,
share_mem_size
,
dev_ctx
.
stream
()
>>>
(
in_data
,
out_data
,
sum_data
,
inner_dim_size
,
outer_dim_size
,
scan_dim_size
,
two_power
,
reverse
);
// for large scan array we need to do add for correct result
int
element_size
=
element_per_block
*
2
;
if
(
scan_dim_size
>
element_size
)
{
dim3
sum_block
(
element_per_block
*
2
);
dim3
sum_grid
((
scan_dim_size
-
element_size
+
block
.
x
-
1
)
/
block
.
x
,
outer_dim_size
);
int
sum_size
=
((
scan_dim_size
+
1
)
/
2
+
block
.
x
-
1
)
/
block
.
x
;
AddBlockScan
<
T
><<<
sum_grid
,
sum_block
,
0
,
dev_ctx
.
stream
()
>>>
(
out_data
,
sum_data
,
element_size
,
scan_dim_size
,
sum_size
,
reverse
);
}
}
else
{
dim3
block
(
32
,
16
);
dim3
grid
((
outer_dim_size
+
block
.
y
-
1
)
/
block
.
y
);
InnerMostDimInclusiveScan
<
T
,
32
,
16
><<<
grid
,
block
,
0
,
dev_ctx
.
stream
()
>>>
(
in_data
,
out_data
,
inner_dim_size
,
outer_dim_size
,
scan_dim_size
,
reverse
);
}
}
else
{
dim3
block
(
std
::
min
(
512
,
inner_dim_size
));
dim3
grid
(
outer_dim_size
,
(
inner_dim_size
+
block
.
x
-
1
)
/
block
.
x
);
OuterScan
<
T
><<<
grid
,
block
,
0
,
dev_ctx
.
stream
()
>>>
(
in_data
,
out_data
,
inner_dim_size
,
outer_dim_size
,
scan_dim_size
,
exclusive
,
reverse
);
}
}
};
}
// namespace operators
}
// namespace paddle
namespace
ops
=
paddle
::
operators
;
REGISTER_OP_CUDA_KERNEL
(
cumsum
,
ops
::
CumCUDAKernel
<
paddle
::
platform
::
CUDADeviceContext
,
float
>
,
ops
::
CumCUDAKernel
<
paddle
::
platform
::
CUDADeviceContext
,
double
>
,
ops
::
CumCUDAKernel
<
paddle
::
platform
::
CUDADeviceContext
,
int
>
,
ops
::
CumCUDAKernel
<
paddle
::
platform
::
CUDADeviceContext
,
int64_t
>
);
python/paddle/fluid/tests/unittests/test_cumsum_op.py
浏览文件 @
c4dd596d
...
...
@@ -108,24 +108,108 @@ class TestSumOp7(OpTest):
self
.
check_grad
([
'X'
],
'Out'
)
class
TestSumOp
8
(
OpTest
):
class
TestSumOp
Exclusive1
(
OpTest
):
def
setUp
(
self
):
self
.
op_type
=
"cumsum"
self
.
attrs
=
{
'axis'
:
2
,
"exclusive"
:
True
}
a
=
np
.
random
.
random
((
5
,
6
,
4
)).
astype
(
"float64"
)
a
=
np
.
random
.
random
((
4
,
5
,
65
)).
astype
(
"float64"
)
self
.
inputs
=
{
'X'
:
a
}
self
.
outputs
=
{
'Out'
:
np
.
concatenate
(
(
np
.
zeros
(
(
5
,
6
,
1
),
dtype
=
np
.
float64
),
a
[:,
:,
:
-
1
].
cumsum
(
axis
=
2
)),
(
4
,
5
,
1
),
dtype
=
np
.
float64
),
a
[:,
:,
:
-
1
].
cumsum
(
axis
=
2
)),
axis
=
2
)
}
def
test_check_output
(
self
):
self
.
check_output
()
def
test_check_grad
(
self
):
self
.
check_grad
([
'X'
],
'Out'
)
class
TestSumOpExclusive2
(
OpTest
):
def
setUp
(
self
):
self
.
op_type
=
"cumsum"
self
.
attrs
=
{
'axis'
:
2
,
"exclusive"
:
True
}
a
=
np
.
random
.
random
((
1
,
1
,
888
)).
astype
(
"float64"
)
self
.
inputs
=
{
'X'
:
a
}
self
.
outputs
=
{
'Out'
:
np
.
concatenate
(
(
np
.
zeros
(
(
1
,
1
,
1
),
dtype
=
np
.
float64
),
a
[:,
:,
:
-
1
].
cumsum
(
axis
=
2
)),
axis
=
2
)
}
def
test_check_output
(
self
):
self
.
check_output
()
class
TestSumOpExclusive3
(
OpTest
):
def
setUp
(
self
):
self
.
op_type
=
"cumsum"
self
.
attrs
=
{
'axis'
:
2
,
"exclusive"
:
True
}
a
=
np
.
random
.
random
((
4
,
5
,
888
)).
astype
(
"float32"
)
self
.
inputs
=
{
'X'
:
a
}
self
.
outputs
=
{
'Out'
:
np
.
concatenate
(
(
np
.
zeros
(
(
4
,
5
,
1
),
dtype
=
np
.
float64
),
a
[:,
:,
:
-
1
].
cumsum
(
axis
=
2
)),
axis
=
2
)
}
def
test_check_output
(
self
):
self
.
check_output
()
class
TestSumOpExclusive4
(
OpTest
):
def
setUp
(
self
):
self
.
op_type
=
"cumsum"
self
.
attrs
=
{
'axis'
:
2
,
"exclusive"
:
True
}
a
=
np
.
random
.
random
((
1
,
1
,
3049
)).
astype
(
"float64"
)
self
.
inputs
=
{
'X'
:
a
}
self
.
outputs
=
{
'Out'
:
np
.
concatenate
(
(
np
.
zeros
(
(
1
,
1
,
1
),
dtype
=
np
.
float64
),
a
[:,
:,
:
-
1
].
cumsum
(
axis
=
2
)),
axis
=
2
)
}
def
test_check_output
(
self
):
self
.
check_output
()
class
TestSumOpExclusive5
(
OpTest
):
def
setUp
(
self
):
self
.
op_type
=
"cumsum"
self
.
attrs
=
{
'axis'
:
2
,
"exclusive"
:
True
}
a
=
np
.
random
.
random
((
4
,
5
,
3096
)).
astype
(
"float64"
)
self
.
inputs
=
{
'X'
:
a
}
self
.
outputs
=
{
'Out'
:
np
.
concatenate
(
(
np
.
zeros
(
(
4
,
5
,
1
),
dtype
=
np
.
float64
),
a
[:,
:,
:
-
1
].
cumsum
(
axis
=
2
)),
axis
=
2
)
}
def
test_check_output
(
self
):
self
.
check_output
()
class
TestSumOpReverseExclusive
(
OpTest
):
def
setUp
(
self
):
self
.
op_type
=
"cumsum"
self
.
attrs
=
{
'axis'
:
2
,
'reverse'
:
True
,
"exclusive"
:
True
}
a
=
np
.
random
.
random
((
4
,
5
,
6
)).
astype
(
"float64"
)
self
.
inputs
=
{
'X'
:
a
}
a
=
np
.
flip
(
a
,
axis
=
2
)
self
.
outputs
=
{
'Out'
:
np
.
concatenate
(
(
np
.
flip
(
a
[:,
:,
:
-
1
].
cumsum
(
axis
=
2
),
axis
=
2
),
np
.
zeros
(
(
4
,
5
,
1
),
dtype
=
np
.
float64
)),
axis
=
2
)
}
def
test_check_output
(
self
):
self
.
check_output
()
class
BadInputTest
(
unittest
.
TestCase
):
...
...
@@ -133,7 +217,7 @@ class BadInputTest(unittest.TestCase):
with
fluid
.
program_guard
(
fluid
.
Program
()):
def
test_bad_x
():
data
=
[
1
,
2
,
3
]
data
=
[
1
,
2
,
4
]
result
=
fluid
.
layers
.
cumsum
(
data
,
axis
=
0
)
self
.
assertRaises
(
TypeError
,
test_bad_x
)
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录