Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
机器未来
Paddle
提交
c4dd596d
P
Paddle
项目概览
机器未来
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1
Issue
1
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
体验新版 GitCode,发现更多精彩内容 >>
未验证
提交
c4dd596d
编写于
5月 15, 2020
作者:
W
wangchaochaohu
提交者:
GitHub
5月 15, 2020
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
cusum op optimization for GPU kernel (#24321)
上级
d43e4047
变更
2
隐藏空白更改
内联
并排
Showing
2 changed file
with
419 addition
and
12 deletion
+419
-12
paddle/fluid/operators/cumsum_op.cu
paddle/fluid/operators/cumsum_op.cu
+329
-6
python/paddle/fluid/tests/unittests/test_cumsum_op.py
python/paddle/fluid/tests/unittests/test_cumsum_op.py
+90
-6
未找到文件。
paddle/fluid/operators/cumsum_op.cu
浏览文件 @
c4dd596d
...
...
@@ -13,11 +13,334 @@ See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/operators/cum_op.h"
#include "paddle/fluid/platform/gpu_launch_param_config.h"
namespace
ops
=
paddle
::
operators
;
using
CUDA
=
paddle
::
platform
::
CUDADeviceContext
;
using
Tensor
=
paddle
::
framework
::
Tensor
;
using
LoDTensor
=
paddle
::
framework
::
LoDTensor
;
namespace
paddle
{
namespace
operators
{
template
<
typename
T
>
__global__
void
OuterScan
(
const
T
*
in
,
T
*
out
,
int
inner_dim_size
,
int
outer_dim_size
,
int
scan_dim_size
,
bool
exclusive
,
bool
reverse
)
{
int
id
=
blockIdx
.
y
*
blockDim
.
x
+
threadIdx
.
x
;
for
(
int
outer_index
=
blockIdx
.
x
;
outer_index
<
outer_dim_size
;
outer_index
+=
gridDim
.
x
)
{
for
(
int
inner_index
=
blockIdx
.
y
*
blockDim
.
x
+
threadIdx
.
x
;
inner_index
<
inner_dim_size
;
inner_index
+=
gridDim
.
y
*
blockDim
.
x
)
{
int
scan_index_init
=
0
;
int
forward_direction
=
1
;
int
src_index
=
outer_index
*
scan_dim_size
*
inner_dim_size
+
inner_index
;
int
dst_index
=
outer_index
*
scan_dim_size
*
inner_dim_size
+
inner_index
;
if
(
reverse
)
{
src_index
=
src_index
+
(
scan_dim_size
-
1
)
*
inner_dim_size
;
dst_index
=
dst_index
+
(
scan_dim_size
-
1
)
*
inner_dim_size
;
forward_direction
=
-
1
;
}
if
(
exclusive
)
{
scan_index_init
=
1
;
out
[
dst_index
]
=
0
;
dst_index
=
dst_index
+
(
forward_direction
*
inner_dim_size
);
}
T
acc
=
0
;
for
(
int
scan_index
=
scan_index_init
;
scan_index
<
scan_dim_size
;
++
scan_index
)
{
acc
=
in
[
src_index
]
+
acc
;
out
[
dst_index
]
=
acc
;
src_index
+=
(
forward_direction
*
inner_dim_size
);
dst_index
+=
(
forward_direction
*
inner_dim_size
);
}
}
}
}
// inclusive scan
template
<
typename
T
,
int
num_threads_x
,
int
num_threads_y
>
__global__
void
InnerMostDimInclusiveScan
(
const
T
*
in
,
T
*
out
,
int
inner_dim_size
,
int
outer_dim_size
,
int
scan_dim_size
,
bool
reverse
)
{
__shared__
T
share_data
[
num_threads_y
][
num_threads_x
*
2
];
T
*
share_row
=
share_data
[
threadIdx
.
y
];
int
forward_direction
=
1
;
if
(
reverse
)
forward_direction
=
-
1
;
for
(
int
block_row
=
blockIdx
.
x
*
blockDim
.
y
;
block_row
<
outer_dim_size
;
block_row
+=
blockDim
.
y
*
gridDim
.
x
)
{
int
row
=
block_row
+
threadIdx
.
y
;
T
acc
=
0
;
const
T
*
row_src
=
in
+
row
*
scan_dim_size
;
T
*
row_dst
=
out
+
row
*
scan_dim_size
;
int
block_col
=
0
;
bool
loop_condition
=
(
block_col
<
scan_dim_size
);
if
(
reverse
)
{
loop_condition
=
(
block_col
>=
0
);
block_col
=
scan_dim_size
-
1
;
}
while
(
loop_condition
)
{
// Load data into share memory(two value per thread)
int
col1
=
block_col
+
threadIdx
.
x
*
forward_direction
;
int
col2
=
block_col
+
(
num_threads_x
+
threadIdx
.
x
)
*
forward_direction
;
if
(
row
<
outer_dim_size
)
{
if
(
col1
<
scan_dim_size
&&
col1
>=
0
)
{
share_row
[
threadIdx
.
x
]
=
row_src
[
col1
];
}
else
{
share_row
[
threadIdx
.
x
]
=
0
;
}
if
(
col2
<
scan_dim_size
&&
col2
>=
0
)
{
share_row
[
num_threads_x
+
threadIdx
.
x
]
=
row_src
[
col2
];
}
else
{
share_row
[
num_threads_x
+
threadIdx
.
x
]
=
0
;
}
// Add the previous block acc to the result
if
(
threadIdx
.
x
==
0
)
{
share_row
[
0
]
=
share_row
[
0
]
+
acc
;
}
}
__syncthreads
();
// Up-Sweep
for
(
unsigned
s
=
num_threads_x
,
d
=
1
;
s
>=
1
;
s
>>=
1
,
d
<<=
1
)
{
if
(
row
<
outer_dim_size
&&
threadIdx
.
x
<
s
)
{
unsigned
offset
=
(
2
*
threadIdx
.
x
+
1
)
*
d
-
1
;
share_row
[
offset
+
d
]
=
share_row
[
offset
]
+
share_row
[
offset
+
d
];
}
__syncthreads
();
}
// Down-Sweep
for
(
unsigned
s
=
2
,
d
=
blockDim
.
x
/
2
;
d
>=
1
;
s
<<=
1
,
d
>>=
1
)
{
if
(
row
<
outer_dim_size
&&
threadIdx
.
x
<
s
-
1
)
{
unsigned
offset
=
2
*
(
threadIdx
.
x
+
1
)
*
d
-
1
;
share_row
[
offset
+
d
]
=
share_row
[
offset
]
+
share_row
[
offset
+
d
];
}
__syncthreads
();
}
// Write to the output
if
(
row
<
outer_dim_size
)
{
if
(
col1
<
scan_dim_size
&&
col1
>=
0
)
row_dst
[
col1
]
=
share_row
[
threadIdx
.
x
];
if
(
col2
<
scan_dim_size
&&
col2
>=
0
)
row_dst
[
col2
]
=
share_row
[
num_threads_x
+
threadIdx
.
x
];
}
acc
=
share_row
[
2
*
num_threads_x
-
1
];
__syncthreads
();
block_col
+=
2
*
num_threads_x
*
forward_direction
;
if
(
reverse
)
loop_condition
=
(
block_col
>=
0
);
else
loop_condition
=
(
block_col
<
scan_dim_size
);
}
}
}
// exclusive block scan and store block sum for large scan
template
<
typename
T
>
__global__
void
InnerMostDimExclusiveScan
(
const
T
*
in
,
T
*
out
,
T
*
sum_data
,
int
inner_dim_size
,
int
outer_dim_size
,
int
scan_dim_size
,
int
two_power
,
bool
reverse
)
{
// https://stackoverflow.com/questions/27570552/templated-cuda-kernel-with-dynamic-shared-memory
extern
__shared__
__align__
(
sizeof
(
T
))
unsigned
char
raw_tmp
[];
T
*
share_tmp
=
reinterpret_cast
<
T
*>
(
raw_tmp
);
int
thread_id
=
threadIdx
.
x
;
int
block_id
=
blockIdx
.
x
;
int
block_scan_size
=
blockDim
.
x
*
2
;
int
remain
=
scan_dim_size
%
(
2
*
blockDim
.
x
);
if
(
block_id
==
gridDim
.
x
-
1
&&
remain
!=
0
)
block_scan_size
=
remain
;
int
col1
=
thread_id
;
int
col2
=
thread_id
+
(
block_scan_size
)
/
2
;
int
index1
=
blockIdx
.
y
*
(
scan_dim_size
)
+
block_id
*
blockDim
.
x
*
2
+
col1
;
int
index2
=
blockIdx
.
y
*
(
scan_dim_size
)
+
block_id
*
blockDim
.
x
*
2
+
col2
;
if
(
reverse
)
{
index1
=
blockIdx
.
y
*
(
scan_dim_size
)
+
scan_dim_size
-
1
-
(
block_id
*
blockDim
.
x
*
2
+
col1
);
index2
=
blockIdx
.
y
*
(
scan_dim_size
)
+
scan_dim_size
-
1
-
(
block_id
*
blockDim
.
x
*
2
+
col2
);
}
int
sum_index
=
blockIdx
.
y
*
gridDim
.
x
+
block_id
;
if
(
thread_id
<
block_scan_size
)
{
share_tmp
[
col1
+
(
col1
>>
5
)]
=
in
[
index1
];
share_tmp
[
col2
+
(
col2
>>
5
)]
=
in
[
index2
];
}
else
{
share_tmp
[
col1
+
(
col1
>>
5
)]
=
0
;
share_tmp
[
col2
+
(
col2
>>
5
)]
=
0
;
}
// Up-Sweep
int
offset
=
1
;
for
(
int
d
=
(
two_power
/
2
);
d
>
0
;
d
>>=
1
)
{
__syncthreads
();
if
(
thread_id
<
d
)
{
int
tmp_index1
=
offset
*
(
2
*
thread_id
+
1
)
-
1
;
int
tmp_index2
=
offset
*
(
2
*
thread_id
+
2
)
-
1
;
tmp_index1
=
tmp_index1
+
(
tmp_index1
>>
5
);
tmp_index2
=
tmp_index2
+
(
tmp_index2
>>
5
);
share_tmp
[
tmp_index2
]
+=
share_tmp
[
tmp_index1
];
}
offset
*=
2
;
}
__syncthreads
();
if
(
thread_id
==
0
)
{
int
tmp_index
=
(
two_power
-
1
)
+
((
two_power
-
1
)
>>
5
);
sum_data
[
sum_index
]
=
share_tmp
[
tmp_index
];
share_tmp
[
tmp_index
]
=
0
;
}
REGISTER_OP_CUDA_KERNEL
(
cumsum
,
ops
::
CumKernel
<
CUDA
,
ops
::
CumsumFunctor
<
float
>>
,
ops
::
CumKernel
<
CUDA
,
ops
::
CumsumFunctor
<
double
>>
,
ops
::
CumKernel
<
CUDA
,
ops
::
CumsumFunctor
<
int
>>
,
ops
::
CumKernel
<
CUDA
,
ops
::
CumsumFunctor
<
int64_t
>>
);
// Down Sweep
for
(
int
d
=
1
;
d
<
two_power
;
d
*=
2
)
{
offset
>>=
1
;
__syncthreads
();
if
(
thread_id
<
d
)
{
int
tmp_index1
=
offset
*
(
2
*
thread_id
+
1
)
-
1
;
int
tmp_index2
=
offset
*
(
2
*
thread_id
+
2
)
-
1
;
tmp_index1
=
tmp_index1
+
(
tmp_index1
>>
5
);
tmp_index2
=
tmp_index2
+
(
tmp_index2
>>
5
);
T
tmp
=
share_tmp
[
tmp_index1
];
share_tmp
[
tmp_index1
]
=
share_tmp
[
tmp_index2
];
share_tmp
[
tmp_index2
]
+=
tmp
;
}
}
__syncthreads
();
if
(
col1
<
block_scan_size
)
out
[
index1
]
=
share_tmp
[
col1
+
(
col1
>>
5
)];
if
(
col2
<
block_scan_size
)
out
[
index2
]
=
share_tmp
[
col2
+
(
col2
>>
5
)];
}
// for large scan_dim_size array we need to add for correct result
template
<
typename
T
>
__global__
void
AddBlockScan
(
T
*
result
,
T
*
sum
,
int
size
,
int
scan_dim_size
,
int
sum_size
,
bool
reverse
)
{
int
idx
=
threadIdx
.
x
+
blockDim
.
x
*
(
blockIdx
.
x
+
blockIdx
.
y
*
gridDim
.
x
);
int
block_id_start
=
blockIdx
.
y
*
sum_size
;
int
block_id_end
=
blockIdx
.
x
+
blockIdx
.
y
*
sum_size
;
int
block_id
=
blockIdx
.
x
;
int
thread_id
=
threadIdx
.
x
;
int
col
=
block_id
*
blockDim
.
x
+
thread_id
+
size
;
int
index
=
blockIdx
.
y
*
(
scan_dim_size
)
+
col
;
if
(
reverse
)
{
index
=
blockIdx
.
y
*
(
scan_dim_size
)
+
scan_dim_size
-
1
-
col
;
}
if
(
col
>=
scan_dim_size
||
col
<
0
)
return
;
for
(
int
i
=
block_id_start
;
i
<=
block_id_end
;
i
++
)
{
result
[
index
]
+=
sum
[
i
];
}
}
template
<
typename
DeviceContext
,
typename
T
>
class
CumCUDAKernel
:
public
framework
::
OpKernel
<
T
>
{
public:
void
Compute
(
const
framework
::
ExecutionContext
&
context
)
const
override
{
auto
*
in
=
context
.
Input
<
framework
::
Tensor
>
(
"X"
);
auto
*
out
=
context
.
Output
<
framework
::
Tensor
>
(
"Out"
);
int
axis
=
context
.
Attr
<
int
>
(
"axis"
);
bool
exclusive
=
context
.
Attr
<
bool
>
(
"exclusive"
);
bool
reverse
=
context
.
Attr
<
bool
>
(
"reverse"
);
auto
in_dims
=
in
->
dims
();
auto
size
=
in
->
numel
();
if
(
axis
==
-
1
)
{
axis
=
in_dims
.
size
()
-
1
;
}
PADDLE_ENFORCE_LT
(
axis
,
in_dims
.
size
(),
platform
::
errors
::
InvalidArgument
(
"axis(%d) should be less than the "
"dimension(%d) of the input tensor."
,
axis
,
in_dims
.
size
()));
int
scan_dim_size
=
in_dims
[
axis
];
bool
optimize_condition
=
(
axis
==
(
in_dims
.
size
()
-
1
))
?
true
:
false
;
int
outer_dim_size
=
1
;
int
inner_dim_size
=
1
;
// treat all dim index < axis as outer_dim_size
for
(
size_t
i
=
0
;
i
<
axis
;
i
++
)
{
outer_dim_size
*=
in_dims
[
i
];
}
// treat all dim index > axis as innner_dim_size
for
(
size_t
i
=
axis
+
1
;
i
<
in_dims
.
size
();
i
++
)
{
inner_dim_size
*=
in_dims
[
i
];
}
T
*
out_data
=
out
->
mutable_data
<
T
>
(
context
.
GetPlace
());
const
T
*
in_data
=
in
->
data
<
T
>
();
auto
&
dev_ctx
=
context
.
template
device_context
<
DeviceContext
>();
if
(
optimize_condition
)
{
auto
nextPowerOfTwo
=
[](
int
x
)
->
int
{
int
ret
=
1
;
while
(
ret
<
x
)
ret
=
ret
*
2
;
return
ret
;
};
if
(
exclusive
)
{
int
element_per_block
=
nextPowerOfTwo
(
scan_dim_size
)
/
2
;
if
(
element_per_block
>
512
||
element_per_block
<
32
)
{
element_per_block
=
64
;
}
int
two_power
=
element_per_block
*
2
;
dim3
block
(
element_per_block
);
dim3
grid
(((
scan_dim_size
+
1
)
/
2
+
block
.
x
-
1
)
/
block
.
x
,
outer_dim_size
);
int
offset_size
=
(
element_per_block
*
2
)
>>
5
;
int
share_mem_size
=
(
element_per_block
*
2
+
offset_size
)
*
sizeof
(
T
);
Tensor
scan_sum
;
paddle
::
framework
::
DDim
dims
{
((
scan_dim_size
+
1
)
/
2
+
block
.
x
-
1
)
/
block
.
x
,
outer_dim_size
};
scan_sum
.
Resize
(
dims
);
T
*
sum_data
=
scan_sum
.
mutable_data
<
T
>
(
context
.
GetPlace
());
InnerMostDimExclusiveScan
<
T
><<<
grid
,
block
,
share_mem_size
,
dev_ctx
.
stream
()
>>>
(
in_data
,
out_data
,
sum_data
,
inner_dim_size
,
outer_dim_size
,
scan_dim_size
,
two_power
,
reverse
);
// for large scan array we need to do add for correct result
int
element_size
=
element_per_block
*
2
;
if
(
scan_dim_size
>
element_size
)
{
dim3
sum_block
(
element_per_block
*
2
);
dim3
sum_grid
((
scan_dim_size
-
element_size
+
block
.
x
-
1
)
/
block
.
x
,
outer_dim_size
);
int
sum_size
=
((
scan_dim_size
+
1
)
/
2
+
block
.
x
-
1
)
/
block
.
x
;
AddBlockScan
<
T
><<<
sum_grid
,
sum_block
,
0
,
dev_ctx
.
stream
()
>>>
(
out_data
,
sum_data
,
element_size
,
scan_dim_size
,
sum_size
,
reverse
);
}
}
else
{
dim3
block
(
32
,
16
);
dim3
grid
((
outer_dim_size
+
block
.
y
-
1
)
/
block
.
y
);
InnerMostDimInclusiveScan
<
T
,
32
,
16
><<<
grid
,
block
,
0
,
dev_ctx
.
stream
()
>>>
(
in_data
,
out_data
,
inner_dim_size
,
outer_dim_size
,
scan_dim_size
,
reverse
);
}
}
else
{
dim3
block
(
std
::
min
(
512
,
inner_dim_size
));
dim3
grid
(
outer_dim_size
,
(
inner_dim_size
+
block
.
x
-
1
)
/
block
.
x
);
OuterScan
<
T
><<<
grid
,
block
,
0
,
dev_ctx
.
stream
()
>>>
(
in_data
,
out_data
,
inner_dim_size
,
outer_dim_size
,
scan_dim_size
,
exclusive
,
reverse
);
}
}
};
}
// namespace operators
}
// namespace paddle
namespace
ops
=
paddle
::
operators
;
REGISTER_OP_CUDA_KERNEL
(
cumsum
,
ops
::
CumCUDAKernel
<
paddle
::
platform
::
CUDADeviceContext
,
float
>
,
ops
::
CumCUDAKernel
<
paddle
::
platform
::
CUDADeviceContext
,
double
>
,
ops
::
CumCUDAKernel
<
paddle
::
platform
::
CUDADeviceContext
,
int
>
,
ops
::
CumCUDAKernel
<
paddle
::
platform
::
CUDADeviceContext
,
int64_t
>
);
python/paddle/fluid/tests/unittests/test_cumsum_op.py
浏览文件 @
c4dd596d
...
...
@@ -108,24 +108,108 @@ class TestSumOp7(OpTest):
self
.
check_grad
([
'X'
],
'Out'
)
class
TestSumOp
8
(
OpTest
):
class
TestSumOp
Exclusive1
(
OpTest
):
def
setUp
(
self
):
self
.
op_type
=
"cumsum"
self
.
attrs
=
{
'axis'
:
2
,
"exclusive"
:
True
}
a
=
np
.
random
.
random
((
5
,
6
,
4
)).
astype
(
"float64"
)
a
=
np
.
random
.
random
((
4
,
5
,
65
)).
astype
(
"float64"
)
self
.
inputs
=
{
'X'
:
a
}
self
.
outputs
=
{
'Out'
:
np
.
concatenate
(
(
np
.
zeros
(
(
5
,
6
,
1
),
dtype
=
np
.
float64
),
a
[:,
:,
:
-
1
].
cumsum
(
axis
=
2
)),
(
4
,
5
,
1
),
dtype
=
np
.
float64
),
a
[:,
:,
:
-
1
].
cumsum
(
axis
=
2
)),
axis
=
2
)
}
def
test_check_output
(
self
):
self
.
check_output
()
def
test_check_grad
(
self
):
self
.
check_grad
([
'X'
],
'Out'
)
class
TestSumOpExclusive2
(
OpTest
):
def
setUp
(
self
):
self
.
op_type
=
"cumsum"
self
.
attrs
=
{
'axis'
:
2
,
"exclusive"
:
True
}
a
=
np
.
random
.
random
((
1
,
1
,
888
)).
astype
(
"float64"
)
self
.
inputs
=
{
'X'
:
a
}
self
.
outputs
=
{
'Out'
:
np
.
concatenate
(
(
np
.
zeros
(
(
1
,
1
,
1
),
dtype
=
np
.
float64
),
a
[:,
:,
:
-
1
].
cumsum
(
axis
=
2
)),
axis
=
2
)
}
def
test_check_output
(
self
):
self
.
check_output
()
class
TestSumOpExclusive3
(
OpTest
):
def
setUp
(
self
):
self
.
op_type
=
"cumsum"
self
.
attrs
=
{
'axis'
:
2
,
"exclusive"
:
True
}
a
=
np
.
random
.
random
((
4
,
5
,
888
)).
astype
(
"float32"
)
self
.
inputs
=
{
'X'
:
a
}
self
.
outputs
=
{
'Out'
:
np
.
concatenate
(
(
np
.
zeros
(
(
4
,
5
,
1
),
dtype
=
np
.
float64
),
a
[:,
:,
:
-
1
].
cumsum
(
axis
=
2
)),
axis
=
2
)
}
def
test_check_output
(
self
):
self
.
check_output
()
class
TestSumOpExclusive4
(
OpTest
):
def
setUp
(
self
):
self
.
op_type
=
"cumsum"
self
.
attrs
=
{
'axis'
:
2
,
"exclusive"
:
True
}
a
=
np
.
random
.
random
((
1
,
1
,
3049
)).
astype
(
"float64"
)
self
.
inputs
=
{
'X'
:
a
}
self
.
outputs
=
{
'Out'
:
np
.
concatenate
(
(
np
.
zeros
(
(
1
,
1
,
1
),
dtype
=
np
.
float64
),
a
[:,
:,
:
-
1
].
cumsum
(
axis
=
2
)),
axis
=
2
)
}
def
test_check_output
(
self
):
self
.
check_output
()
class
TestSumOpExclusive5
(
OpTest
):
def
setUp
(
self
):
self
.
op_type
=
"cumsum"
self
.
attrs
=
{
'axis'
:
2
,
"exclusive"
:
True
}
a
=
np
.
random
.
random
((
4
,
5
,
3096
)).
astype
(
"float64"
)
self
.
inputs
=
{
'X'
:
a
}
self
.
outputs
=
{
'Out'
:
np
.
concatenate
(
(
np
.
zeros
(
(
4
,
5
,
1
),
dtype
=
np
.
float64
),
a
[:,
:,
:
-
1
].
cumsum
(
axis
=
2
)),
axis
=
2
)
}
def
test_check_output
(
self
):
self
.
check_output
()
class
TestSumOpReverseExclusive
(
OpTest
):
def
setUp
(
self
):
self
.
op_type
=
"cumsum"
self
.
attrs
=
{
'axis'
:
2
,
'reverse'
:
True
,
"exclusive"
:
True
}
a
=
np
.
random
.
random
((
4
,
5
,
6
)).
astype
(
"float64"
)
self
.
inputs
=
{
'X'
:
a
}
a
=
np
.
flip
(
a
,
axis
=
2
)
self
.
outputs
=
{
'Out'
:
np
.
concatenate
(
(
np
.
flip
(
a
[:,
:,
:
-
1
].
cumsum
(
axis
=
2
),
axis
=
2
),
np
.
zeros
(
(
4
,
5
,
1
),
dtype
=
np
.
float64
)),
axis
=
2
)
}
def
test_check_output
(
self
):
self
.
check_output
()
class
BadInputTest
(
unittest
.
TestCase
):
...
...
@@ -133,7 +217,7 @@ class BadInputTest(unittest.TestCase):
with
fluid
.
program_guard
(
fluid
.
Program
()):
def
test_bad_x
():
data
=
[
1
,
2
,
3
]
data
=
[
1
,
2
,
4
]
result
=
fluid
.
layers
.
cumsum
(
data
,
axis
=
0
)
self
.
assertRaises
(
TypeError
,
test_bad_x
)
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录