Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
Paddle
提交
3f5c2b5f
P
Paddle
项目概览
PaddlePaddle
/
Paddle
1 年多 前同步成功
通知
2302
Star
20931
Fork
5422
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1423
列表
看板
标记
里程碑
合并请求
543
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1,423
Issue
1,423
列表
看板
标记
里程碑
合并请求
543
合并请求
543
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
3f5c2b5f
编写于
7月 03, 2023
作者:
W
Wang Xin
提交者:
GitHub
7月 03, 2023
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
[CodeStyle][CINN] fix cpplint codestyle for [readability/casting] (#55069)
上级
0fd50551
变更
24
隐藏空白更改
内联
并排
Showing
24 changed file
with
111 addition
and
106 deletion
+111
-106
paddle/cinn/auto_schedule/search_space/auto_gen_rule/auto_bind_test.cc
...uto_schedule/search_space/auto_gen_rule/auto_bind_test.cc
+6
-5
paddle/cinn/backends/codegen_c.cc
paddle/cinn/backends/codegen_c.cc
+1
-1
paddle/cinn/backends/ir_schedule_test.cc
paddle/cinn/backends/ir_schedule_test.cc
+6
-6
paddle/cinn/common/float16.h
paddle/cinn/common/float16.h
+2
-2
paddle/cinn/common/float16_bfloat16_cuda_test.cu
paddle/cinn/common/float16_bfloat16_cuda_test.cu
+2
-2
paddle/cinn/frontend/net_builder_test.cc
paddle/cinn/frontend/net_builder_test.cc
+5
-3
paddle/cinn/hlir/framework/graph_compiler.cc
paddle/cinn/hlir/framework/graph_compiler.cc
+2
-2
paddle/cinn/hlir/framework/op_lowering_util.cc
paddle/cinn/hlir/framework/op_lowering_util.cc
+5
-3
paddle/cinn/hlir/pass/alterlayout_test.cc
paddle/cinn/hlir/pass/alterlayout_test.cc
+3
-3
paddle/cinn/hlir/pe/nn.cc
paddle/cinn/hlir/pe/nn.cc
+7
-7
paddle/cinn/hlir/pe/reduction.cc
paddle/cinn/hlir/pe/reduction.cc
+8
-6
paddle/cinn/hlir/pe/schedule.cc
paddle/cinn/hlir/pe/schedule.cc
+1
-1
paddle/cinn/ir/ir_schedule.cc
paddle/cinn/ir/ir_schedule.cc
+1
-1
paddle/cinn/ir/ir_schedule_util.cc
paddle/cinn/ir/ir_schedule_util.cc
+9
-7
paddle/cinn/ir/schedule_desc_test.cc
paddle/cinn/ir/schedule_desc_test.cc
+25
-30
paddle/cinn/lang/lower_impl.cc
paddle/cinn/lang/lower_impl.cc
+3
-2
paddle/cinn/optim/compute_inline_expand.cc
paddle/cinn/optim/compute_inline_expand.cc
+1
-1
paddle/cinn/optim/unroll_loops_test.cc
paddle/cinn/optim/unroll_loops_test.cc
+1
-1
paddle/cinn/optim/vectorize_loops_test.cc
paddle/cinn/optim/vectorize_loops_test.cc
+1
-1
paddle/cinn/poly/stage.cc
paddle/cinn/poly/stage.cc
+6
-6
paddle/cinn/runtime/cinn_runtime.cc
paddle/cinn/runtime/cinn_runtime.cc
+1
-1
paddle/cinn/runtime/cpu/host_intrinsics.cc
paddle/cinn/runtime/cpu/host_intrinsics.cc
+2
-2
paddle/cinn/runtime/cuda/float16.h
paddle/cinn/runtime/cuda/float16.h
+1
-1
paddle/cinn/runtime/tiny_runtime.cc
paddle/cinn/runtime/tiny_runtime.cc
+12
-12
未找到文件。
paddle/cinn/auto_schedule/search_space/auto_gen_rule/auto_bind_test.cc
浏览文件 @
3f5c2b5f
...
...
@@ -68,8 +68,8 @@ class TestAutoBind : public TestAutoGenRuleBase {
}
else
if
(
total_num
<=
kMaxBlocks
*
kMaxThreadsPerBlock
)
{
ASSERT_EQ
(
all_loops
.
size
(),
2
);
EXPECT_EQ
(
all_loops
[
0
].
As
<
ir
::
For
>
()
->
extent
.
as_int32
(),
static_cast
<
int32_t
>
(
std
::
ceil
(
double
(
total_num
)
/
kMaxThreadsPerBlock
)));
static_cast
<
int32_t
>
(
std
::
ceil
(
static_cast
<
double
>
(
total_num
)
/
kMaxThreadsPerBlock
)));
EXPECT_TRUE
(
all_loops
[
0
].
As
<
ir
::
For
>
()
->
is_gpu_block_binded
());
EXPECT_EQ
(
all_loops
[
1
].
As
<
ir
::
For
>
()
->
extent
.
as_int32
(),
kMaxThreadsPerBlock
);
...
...
@@ -81,9 +81,10 @@ class TestAutoBind : public TestAutoGenRuleBase {
EXPECT_EQ
(
all_loops
[
1
].
As
<
ir
::
For
>
()
->
extent
.
as_int32
(),
kMaxThreadsPerBlock
);
EXPECT_TRUE
(
all_loops
[
1
].
As
<
ir
::
For
>
()
->
is_gpu_thread_binded
());
EXPECT_EQ
(
all_loops
[
2
].
As
<
ir
::
For
>
()
->
extent
.
as_int32
(),
static_cast
<
int32_t
>
(
std
::
ceil
(
double
(
total_num
)
/
(
kMaxBlocks
*
kMaxThreadsPerBlock
))));
EXPECT_EQ
(
all_loops
[
2
].
As
<
ir
::
For
>
()
->
extent
.
as_int32
(),
static_cast
<
int32_t
>
(
std
::
ceil
(
static_cast
<
double
>
(
total_num
)
/
(
kMaxBlocks
*
kMaxThreadsPerBlock
))));
EXPECT_FALSE
(
all_loops
[
2
].
As
<
ir
::
For
>
()
->
is_binded
());
}
...
...
paddle/cinn/backends/codegen_c.cc
浏览文件 @
3f5c2b5f
...
...
@@ -162,7 +162,7 @@ void CodeGenC::Visit(const ir::Mod *op) {
auto
copied
=
op
->
b
();
optim
::
Simplify
(
&
copied
);
if
(
copied
.
is_constant
())
{
int
temp
=
(
int
)
(
copied
.
get_constant
());
int
temp
=
static_cast
<
int
>
(
copied
.
get_constant
());
if
((
temp
&
(
temp
-
1
))
==
0
)
{
os
()
<<
"("
;
Print
(
op
->
a
());
...
...
paddle/cinn/backends/ir_schedule_test.cc
浏览文件 @
3f5c2b5f
...
...
@@ -2871,11 +2871,11 @@ TEST(IrSchedule, Annotate) {
ir
::
IRSchedule
ir_sch
(
ir
::
ModuleExpr
({
funcs
[
0
]
->
body
}));
auto
fused
=
ir_sch
.
Fuse
(
"B"
,
{
0
,
1
});
auto
block_b
=
ir_sch
.
GetBlock
(
"B"
);
ir_sch
.
Annotate
(
block_b
,
"k1"
,
int
(
64
)
);
ir_sch
.
Annotate
(
block_b
,
"k1"
,
64
);
block_b
=
ir_sch
.
GetBlock
(
"B"
);
ir_sch
.
Annotate
(
block_b
,
"k2"
,
bool
(
true
)
);
ir_sch
.
Annotate
(
block_b
,
"k2"
,
true
);
block_b
=
ir_sch
.
GetBlock
(
"B"
);
ir_sch
.
Annotate
(
block_b
,
"k3"
,
float
(
2.0
)
);
ir_sch
.
Annotate
(
block_b
,
"k3"
,
2.0
f
);
block_b
=
ir_sch
.
GetBlock
(
"B"
);
ir_sch
.
Annotate
(
block_b
,
"k4"
,
std
::
string
(
"v4"
));
std
::
string
expected_expr
=
R"ROC({
...
...
@@ -2915,11 +2915,11 @@ TEST(IrSchedule, Unannotate) {
ir
::
IRSchedule
ir_sch
(
ir
::
ModuleExpr
({
funcs
[
0
]
->
body
}));
auto
fused
=
ir_sch
.
Fuse
(
"B"
,
{
0
,
1
});
auto
block_b
=
ir_sch
.
GetBlock
(
"B"
);
ir_sch
.
Annotate
(
block_b
,
"k1"
,
int
(
64
)
);
ir_sch
.
Annotate
(
block_b
,
"k1"
,
64
);
block_b
=
ir_sch
.
GetBlock
(
"B"
);
ir_sch
.
Annotate
(
block_b
,
"k2"
,
bool
(
true
)
);
ir_sch
.
Annotate
(
block_b
,
"k2"
,
true
);
block_b
=
ir_sch
.
GetBlock
(
"B"
);
ir_sch
.
Annotate
(
block_b
,
"k3"
,
float
(
2.0
)
);
ir_sch
.
Annotate
(
block_b
,
"k3"
,
2.0
f
);
block_b
=
ir_sch
.
GetBlock
(
"B"
);
ir_sch
.
Annotate
(
block_b
,
"k4"
,
std
::
string
(
"v4"
));
block_b
=
ir_sch
.
GetBlock
(
"B"
);
...
...
paddle/cinn/common/float16.h
浏览文件 @
3f5c2b5f
...
...
@@ -597,9 +597,9 @@ __host__ __device__ inline bool(isfinite)(const float16& a) {
__host__
__device__
inline
float16
(
abs
)(
const
float16
&
a
)
{
#if defined(CINN_CUDA_FP16) && (defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 530)
return
float16
(
__habs
(
a
.
to_half
()));
return
static_cast
<
float16
>
(
__habs
(
a
.
to_half
()));
#else
return
float16
(
fabsf
(
float
(
a
)));
return
static_cast
<
float16
>
(
fabsf
(
static_cast
<
float
>
(
a
)));
#endif
}
...
...
paddle/cinn/common/float16_bfloat16_cuda_test.cu
浏览文件 @
3f5c2b5f
...
...
@@ -100,7 +100,7 @@ __global__ void cast_fp16_to_fp32_cuda_kernel(const float16* input,
float
*
out
)
{
int
idx
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
if
(
idx
<
num
)
{
out
[
idx
]
=
float
(
input
[
idx
]);
out
[
idx
]
=
static_cast
<
float
>
(
input
[
idx
]);
}
}
...
...
@@ -131,7 +131,7 @@ __global__ void cast_bf16_to_fp32_cuda_kernel(const bfloat16* input,
float
*
out
)
{
int
idx
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
if
(
idx
<
num
)
{
out
[
idx
]
=
float
(
input
[
idx
]);
out
[
idx
]
=
static_cast
<
float
>
(
input
[
idx
]);
}
}
...
...
paddle/cinn/frontend/net_builder_test.cc
浏览文件 @
3f5c2b5f
...
...
@@ -484,7 +484,7 @@ TEST(net_build, program_execute_cast) {
for
(
int
h
=
0
;
h
<
H
;
++
h
)
{
std
::
string
line
;
int
index
=
h
+
H
*
b
;
float
in_data
=
(
float
)
input_data
[
index
]
;
float
in_data
=
static_cast
<
float
>
(
input_data
[
index
])
;
float
out_data
=
output_data
[
index
];
line
+=
(
std
::
to_string
(
out_data
)
+
", "
);
EXPECT_EQ
(
in_data
,
out_data
);
...
...
@@ -1339,7 +1339,8 @@ TEST(net_build, program_execute_repeat_axis_0) {
std
::
vector
<
float
>
output_data
=
GetTensorData
<
float
>
(
output_tensor
,
target
);
for
(
int
m
=
0
;
m
<
new_M
;
++
m
)
{
for
(
int
n
=
0
;
n
<
new_N
;
++
n
)
{
int
in_index
=
n
+
N
*
static_cast
<
int
>
(
std
::
floor
((
float
)
m
/
repeats
));
int
in_index
=
n
+
N
*
static_cast
<
int
>
(
std
::
floor
(
static_cast
<
float
>
(
m
)
/
repeats
));
int
out_index
=
n
+
new_N
*
m
;
float
in_data
=
input_data
[
in_index
];
float
out_data
=
output_data
[
out_index
];
...
...
@@ -1393,7 +1394,8 @@ TEST(net_build, program_execute_repeat_axis_1) {
std
::
vector
<
float
>
output_data
=
GetTensorData
<
float
>
(
output_tensor
,
target
);
for
(
int
m
=
0
;
m
<
new_M
;
++
m
)
{
for
(
int
n
=
0
;
n
<
new_N
;
++
n
)
{
int
in_index
=
N
*
m
+
static_cast
<
int
>
(
std
::
floor
((
float
)
n
/
repeats
));
int
in_index
=
N
*
m
+
static_cast
<
int
>
(
std
::
floor
(
static_cast
<
float
>
(
n
)
/
repeats
));
int
out_index
=
n
+
new_N
*
m
;
float
in_data
=
input_data
[
in_index
];
float
out_data
=
output_data
[
out_index
];
...
...
paddle/cinn/hlir/framework/graph_compiler.cc
浏览文件 @
3f5c2b5f
...
...
@@ -155,7 +155,7 @@ void Program::Export(const std::vector<std::string>& persistent_vars,
std
::
string
name
=
(
std
::
string
)
varname
;
auto
t
=
scope_
->
GetTensor
(
name
);
cinn_buffer_t
buffer
=
*
t
->
buffer
();
buffer
.
memory
=
(
uint8_t
*
)
0
;
buffer
.
memory
=
reinterpret_cast
<
uint8_t
*>
(
0
)
;
if
(
std
::
find
(
persistent_vars
.
begin
(),
persistent_vars
.
end
(),
name
)
!=
persistent_vars
.
end
())
{
pvars
.
emplace_back
(
t
->
buffer
(),
...
...
@@ -206,7 +206,7 @@ void Program::Export(const std::vector<std::string>& persistent_vars,
tellplaceholder
(
instplaceholder
+
findex
*
12
+
8
,
f
);
for
(
auto
&
arg
:
all_args
)
{
uintptr_t
bufindex
=
varindex
[
arg
];
cinn_pod_value_t
v
(
(
cinn_buffer_t
*
)
bufindex
);
cinn_pod_value_t
v
(
reinterpret_cast
<
cinn_buffer_t
*>
(
bufindex
)
);
fwrite
(
&
v
,
sizeof
(
cinn_pod_value_t
),
1
,
f
);
}
}
...
...
paddle/cinn/hlir/framework/op_lowering_util.cc
浏览文件 @
3f5c2b5f
...
...
@@ -722,8 +722,9 @@ void LoopAssignReduceWithLast(ir::IRSchedule& ir_sch,
need_reduce_last_count
*=
inshape
[
i
];
}
}
int
warp_reduce_need_sm_count
=
ceil
((
need_reduce_last_count
*
32
)
/
float
(
target
.
get_max_threads_per_sm
()));
int
warp_reduce_need_sm_count
=
ceil
((
need_reduce_last_count
*
32
)
/
static_cast
<
float
>
(
target
.
get_max_threads_per_sm
()));
// Set Num_max_threads to 32 is Warp Reduce
if
(
target
.
get_multi_processor_count
()
<
warp_reduce_need_sm_count
)
{
max_num_threads
=
32
;
...
...
@@ -805,7 +806,8 @@ void LoopAssignReduceWithLast(ir::IRSchedule& ir_sch,
}
LoopOrderAssignReduce
(
ir_sch
,
block_name
,
first_axes
,
target
,
true
);
// fuse axis before reduce to bind blockidx.
for
(
int
idx
=
0
;
idx
<
int
(
inshape
.
size
()
-
axes
.
size
())
-
1
;
++
idx
)
{
for
(
int
idx
=
0
;
idx
<
static_cast
<
int
>
(
inshape
.
size
()
-
axes
.
size
())
-
1
;
++
idx
)
{
ir_sch
.
Fuse
(
block_name
,
{
0
,
1
});
}
}
...
...
paddle/cinn/hlir/pass/alterlayout_test.cc
浏览文件 @
3f5c2b5f
...
...
@@ -210,7 +210,7 @@ TEST(conv_bn_conv, conv_bn_conv) {
attrs
[
"data_format"
]
=
src_layout
;
absl
::
flat_hash_map
<
std
::
string
,
Program
::
attr_t
>
attrs1
;
attrs1
[
"epsilon"
]
=
(
float
)
0.001
;
attrs1
[
"epsilon"
]
=
0.001
f
;
auto
c
=
program
.
conv2d
(
A
,
B
,
attrs
);
auto
d
=
program
.
batchnorm
(
c
,
Scale
,
Bias
,
Mean
,
Variance
,
attrs1
);
...
...
@@ -317,7 +317,7 @@ TEST(conv_softmax_conv, conv_softmax_conv) {
attrs
[
"data_format"
]
=
src_layout
;
absl
::
flat_hash_map
<
std
::
string
,
Program
::
attr_t
>
attrs1
;
attrs1
[
"axis"
]
=
(
int
)
-
1
;
attrs1
[
"axis"
]
=
static_cast
<
int
>
(
-
1
)
;
auto
c
=
program
.
conv2d
(
A
,
B
,
attrs
);
auto
d
=
program
.
softmax
(
c
,
attrs1
);
...
...
@@ -417,7 +417,7 @@ TEST(conv_mul_conv, conv_mul_conv) {
attrs
[
"data_format"
]
=
src_layout
;
absl
::
flat_hash_map
<
std
::
string
,
Program
::
attr_t
>
attrs1
;
attrs1
[
"axis"
]
=
(
int
)
-
1
;
attrs1
[
"axis"
]
=
static_cast
<
int
>
(
-
1
)
;
auto
c
=
program
.
conv2d
(
A
,
B
,
attrs
);
auto
d
=
program
.
mul
(
c
,
C
,
1
,
1
);
...
...
paddle/cinn/hlir/pe/nn.cc
浏览文件 @
3f5c2b5f
...
...
@@ -806,15 +806,15 @@ std::vector<Tensor> Depthwise_Conv2d_NCHW(const Tensor &input,
CHECK
(
weight
->
shape
[
1
].
is_constant
());
CHECK
(
weight
->
shape
[
2
].
is_constant
());
CHECK
(
weight
->
shape
[
3
].
is_constant
());
int
B
=
(
int
)
input
->
shape
[
0
].
get_constant
(
);
int
O
=
(
int
)
weight
->
shape
[
1
].
get_constant
(
)
*
(
int
)
input
->
shape
[
1
].
get_constant
(
);
int
H
=
(
(
int
)
input
->
shape
[
2
].
get_constant
(
)
-
(
int
)
weight
->
shape
[
2
].
get_constant
(
)
+
2
*
pad_h
)
/
int
B
=
static_cast
<
int
>
(
input
->
shape
[
0
].
get_constant
()
);
int
O
=
static_cast
<
int
>
(
weight
->
shape
[
1
].
get_constant
()
)
*
static_cast
<
int
>
(
input
->
shape
[
1
].
get_constant
()
);
int
H
=
(
static_cast
<
int
>
(
input
->
shape
[
2
].
get_constant
()
)
-
static_cast
<
int
>
(
weight
->
shape
[
2
].
get_constant
()
)
+
2
*
pad_h
)
/
stride_h
+
1
;
int
W
=
(
(
int
)
input
->
shape
[
3
].
get_constant
(
)
-
(
int
)
weight
->
shape
[
3
].
get_constant
(
)
+
2
*
pad_w
)
/
int
W
=
(
static_cast
<
int
>
(
input
->
shape
[
3
].
get_constant
()
)
-
static_cast
<
int
>
(
weight
->
shape
[
3
].
get_constant
()
)
+
2
*
pad_w
)
/
stride_w
+
1
;
output_shape
=
{
...
...
paddle/cinn/hlir/pe/reduction.cc
浏览文件 @
3f5c2b5f
...
...
@@ -380,7 +380,8 @@ std::vector<ir::Tensor> BlockReduceInternal(const ir::Tensor& A,
// compute the reduce dimension stride.
std
::
vector
<
Expr
>
last_reduce_stride
(
A
->
shape
.
size
()
-
axes
.
front
(),
Expr
(
1
));
for
(
int
idx
=
A
->
shape
.
size
(),
index
=
int
(
last_reduce_stride
.
size
())
-
2
;
for
(
int
idx
=
A
->
shape
.
size
(),
index
=
static_cast
<
int
>
(
last_reduce_stride
.
size
())
-
2
;
index
>=
0
;
--
index
)
{
last_reduce_stride
[
index
]
=
last_reduce_stride
[
index
+
1
]
*
A
->
shape
[
--
idx
];
...
...
@@ -407,8 +408,8 @@ std::vector<ir::Tensor> BlockReduceInternal(const ir::Tensor& A,
// compute output shape.
std
::
vector
<
Expr
>
out_shape
(
A
->
shape
.
begin
(),
A
->
shape
.
begin
()
+
axes
.
front
());
int
tailf
=
keep_dim
?
(
int
(
A
->
shape
.
size
())
-
axes
.
front
())
:
(
int
(
A
->
shape
.
size
())
-
axes
.
back
()
-
1
);
int
tailf
=
keep_dim
?
(
static_cast
<
int
>
(
A
->
shape
.
size
())
-
axes
.
front
())
:
(
static_cast
<
int
>
(
A
->
shape
.
size
())
-
axes
.
back
()
-
1
);
for
(
int
idx
=
0
;
idx
<
tailf
;
++
idx
)
{
out_shape
.
push_back
(
Expr
(
1
));
}
...
...
@@ -538,8 +539,8 @@ std::vector<ir::Tensor> BlockReduce(const ir::Tensor& A,
// compute output tensor shape.
std
::
vector
<
Expr
>
out_shape
(
A
->
shape
.
begin
(),
A
->
shape
.
begin
()
+
axes
.
front
());
int
tailf
=
keep_dim
?
(
int
(
A
->
shape
.
size
())
-
axes
.
front
())
:
(
int
(
A
->
shape
.
size
())
-
axes
.
back
()
-
1
);
int
tailf
=
keep_dim
?
(
static_cast
<
int
>
(
A
->
shape
.
size
())
-
axes
.
front
())
:
(
static_cast
<
int
>
(
A
->
shape
.
size
())
-
axes
.
back
()
-
1
);
for
(
int
idx
=
0
;
idx
<
tailf
;
++
idx
)
{
out_shape
.
push_back
(
Expr
(
1
));
}
...
...
@@ -832,7 +833,8 @@ std::vector<ir::Tensor> TwoStepBlockReduceInternal(
}
int
warp_reduce_need_sm_count
=
ceil
((
need_reduce_last_count
*
32
)
/
float
(
common
::
DefaultNVGPUTarget
().
get_max_threads_per_sm
()));
static_cast
<
float
>
(
common
::
DefaultNVGPUTarget
().
get_max_threads_per_sm
()));
// Set Num_max_threads to 32 is Warp Reduce
if
(
common
::
DefaultNVGPUTarget
().
get_multi_processor_count
()
<
warp_reduce_need_sm_count
)
{
...
...
paddle/cinn/hlir/pe/schedule.cc
浏览文件 @
3f5c2b5f
...
...
@@ -2662,7 +2662,7 @@ int gcd(int a, int b) {
int
MaxFactorLessThan
(
int
a
,
int
b
)
{
CHECK_GT
(
a
,
b
);
int
res
=
1
;
for
(
int
i
=
2
;
i
<=
(
int
)
sqrt
((
double
)
a
);
i
++
)
{
for
(
int
i
=
2
;
i
<=
static_cast
<
int
>
(
sqrt
(
static_cast
<
double
>
(
a
))
);
i
++
)
{
if
(
a
%
i
==
0
)
{
if
(
i
<=
b
)
res
=
std
::
max
(
res
,
i
);
if
(
a
/
i
<=
b
)
res
=
std
::
max
(
res
,
a
/
i
);
...
...
paddle/cinn/ir/ir_schedule.cc
浏览文件 @
3f5c2b5f
...
...
@@ -2189,7 +2189,7 @@ void ScheduleImpl::CopyTransformAndLoopInfo(const Expr& block,
Expr
new_loop
;
VLOG
(
3
)
<<
"changed_loop_num is : "
<<
changed_loop_num
;
VLOG
(
3
)
<<
"old_iter_values.size() is : "
<<
old_iter_values
.
size
();
if
(
changed_loop_num
>=
(
int
)
old_iter_values
.
size
(
))
{
if
(
changed_loop_num
>=
static_cast
<
int
>
(
old_iter_values
.
size
()
))
{
new_loop
=
optim
::
IRCopy
(
block
);
new_loop
.
As
<
ir
::
ScheduleBlockRealize
>
()
->
iter_values
=
new_iter_values
;
}
else
{
...
...
paddle/cinn/ir/ir_schedule_util.cc
浏览文件 @
3f5c2b5f
...
...
@@ -74,7 +74,7 @@ int GetLoopExtent(const Expr& loop) {
CHECK
(
loop
.
As
<
ir
::
For
>
());
CHECK
(
common
::
is_zero
(
loop
.
As
<
ir
::
For
>
()
->
min
));
CHECK
(
loop
.
As
<
ir
::
For
>
()
->
extent
.
is_constant
());
return
(
int
)
loop
.
As
<
ir
::
For
>
()
->
extent
.
get_constant
(
);
return
static_cast
<
int
>
(
loop
.
As
<
ir
::
For
>
()
->
extent
.
get_constant
()
);
}
void
SetCudaAxisInfo
(
Expr
*
lowered_func
)
{
...
...
@@ -249,7 +249,8 @@ std::vector<int> ValidateFactors(const std::vector<int>& factors,
<<
"In Split, when there is -1 in factors, the other factors' product "
"should be <= "
"original loop's extent! Please check."
;
int
minus_one_candidate
=
(
int
)
ceil
((
double
)
total_extent
/
(
double
)
product
);
int
minus_one_candidate
=
static_cast
<
int
>
(
ceil
(
static_cast
<
double
>
(
total_extent
)
/
static_cast
<
double
>
(
product
)));
for
(
int
i
=
0
;
i
<
validated_factors
.
size
();
++
i
)
{
if
(
validated_factors
[
i
]
==
-
1
)
{
validated_factors
[
i
]
=
minus_one_candidate
;
...
...
@@ -490,7 +491,7 @@ Expr MakeCacheBlock(const std::vector<IterRange>& buffer_ranges,
ir
::
ScheduleBlock
::
Make
(
block_vars
,
{},
{},
new_tensor
->
name
,
Block
::
Make
({
body
})));
Expr
new_body
=
block
;
for
(
int
i
=
(
int
)
loop_vars
.
size
(
)
-
1
;
i
>=
0
;
i
--
)
{
for
(
int
i
=
static_cast
<
int
>
(
loop_vars
.
size
()
)
-
1
;
i
>=
0
;
i
--
)
{
new_body
=
For
::
Make
(
loop_vars
[
i
],
Expr
(
0
),
common
::
AutoSimplify
(
buffer_ranges
[
i
].
extent
),
...
...
@@ -531,7 +532,9 @@ void FindInsertionPoint(Expr& root, CacheBlockInfo* info, bool is_write) {
->
body
.
As
<
Block
>
());
info
->
loc_block
=
root
.
As
<
ScheduleBlockRealize
>
()
->
schedule_block
.
As
<
ScheduleBlock
>
()
->
body
;
for
(
int
i
=
0
;
i
<
(
int
)
info
->
loc_block
.
As
<
Block
>
()
->
stmts
.
size
();
++
i
)
{
for
(
int
i
=
0
;
i
<
static_cast
<
int
>
(
info
->
loc_block
.
As
<
Block
>
()
->
stmts
.
size
());
++
i
)
{
if
(
Contains
(
info
->
loc_block
.
As
<
Block
>
()
->
stmts
[
i
],
producer
))
{
info
->
loc_pos
=
i
+
1
;
break
;
...
...
@@ -1075,9 +1078,8 @@ std::vector<IterRange> CalculateRequiredRegions(
(
*
find_for_loops
.
begin
()).
As
<
ir
::
For
>
()
->
min
,
(
*
find_for_loops
.
begin
()).
As
<
ir
::
For
>
()
->
extent
);
}
else
{
int
cons
=
(
int
)
block
.
As
<
ir
::
ScheduleBlockRealize
>
()
->
iter_values
[
i
]
.
is_constant
();
int
cons
=
static_cast
<
int
>
(
block
.
As
<
ir
::
ScheduleBlockRealize
>
()
->
iter_values
[
i
].
is_constant
());
required_buffer_range
.
emplace_back
(
Expr
(
cons
),
Expr
(
1
));
}
}
...
...
paddle/cinn/ir/schedule_desc_test.cc
浏览文件 @
3f5c2b5f
...
...
@@ -863,32 +863,29 @@ TEST_F(TestScheduleDesc, StepKind_Annotate) {
auto
block_b
=
ir_sch
.
GetBlock
(
"B"
);
trace
.
Append
(
ScheduleDesc
::
Step
(
"GetBlock"
,
{},
{{
"block_name"
,
std
::
string
(
"B"
)}},
{
block_b
}));
ir_sch
.
Annotate
(
block_b
,
"k1"
,
int
(
64
));
trace
.
Append
(
ScheduleDesc
::
Step
(
"AnnotateIntAttr"
,
{{
"block"
,
std
::
vector
<
Expr
>
({
block_b
})}},
{{
"key"
,
std
::
string
(
"k1"
)},
{
"value"
,
int
(
64
)}},
{}));
ir_sch
.
Annotate
(
block_b
,
"k1"
,
64
);
trace
.
Append
(
ScheduleDesc
::
Step
(
"AnnotateIntAttr"
,
{{
"block"
,
std
::
vector
<
Expr
>
({
block_b
})}},
{{
"key"
,
std
::
string
(
"k1"
)},
{
"value"
,
64
}},
{}));
block_b
=
ir_sch
.
GetBlock
(
"B"
);
trace
.
Append
(
ScheduleDesc
::
Step
(
"GetBlock"
,
{},
{{
"block_name"
,
std
::
string
(
"B"
)}},
{
block_b
}));
ir_sch
.
Annotate
(
block_b
,
"k2"
,
bool
(
true
));
trace
.
Append
(
ScheduleDesc
::
Step
(
"AnnotateBoolAttr"
,
{{
"block"
,
std
::
vector
<
Expr
>
({
block_b
})}},
{{
"key"
,
std
::
string
(
"k2"
)},
{
"value"
,
bool
(
true
)}},
{}));
ir_sch
.
Annotate
(
block_b
,
"k2"
,
true
);
trace
.
Append
(
ScheduleDesc
::
Step
(
"AnnotateBoolAttr"
,
{{
"block"
,
std
::
vector
<
Expr
>
({
block_b
})}},
{{
"key"
,
std
::
string
(
"k2"
)},
{
"value"
,
true
}},
{}));
block_b
=
ir_sch
.
GetBlock
(
"B"
);
trace
.
Append
(
ScheduleDesc
::
Step
(
"GetBlock"
,
{},
{{
"block_name"
,
std
::
string
(
"B"
)}},
{
block_b
}));
ir_sch
.
Annotate
(
block_b
,
"k3"
,
float
(
2.0
));
trace
.
Append
(
ScheduleDesc
::
Step
(
"AnnotateFloatAttr"
,
{{
"block"
,
std
::
vector
<
Expr
>
({
block_b
})}},
{{
"key"
,
std
::
string
(
"k3"
)},
{
"value"
,
float
(
2.0
)}},
{}));
ir_sch
.
Annotate
(
block_b
,
"k3"
,
2.0
f
);
trace
.
Append
(
ScheduleDesc
::
Step
(
"AnnotateFloatAttr"
,
{{
"block"
,
std
::
vector
<
Expr
>
({
block_b
})}},
{{
"key"
,
std
::
string
(
"k3"
)},
{
"value"
,
2.0
f
}},
{}));
block_b
=
ir_sch
.
GetBlock
(
"B"
);
trace
.
Append
(
ScheduleDesc
::
Step
(
...
...
@@ -911,22 +908,20 @@ TEST_F(TestScheduleDesc, StepKind_Unannotate) {
auto
block_b
=
ir_sch
.
GetBlock
(
"B"
);
trace
.
Append
(
ScheduleDesc
::
Step
(
"GetBlock"
,
{},
{{
"block_name"
,
std
::
string
(
"B"
)}},
{
block_b
}));
ir_sch
.
Annotate
(
block_b
,
"k1"
,
int
(
64
));
trace
.
Append
(
ScheduleDesc
::
Step
(
"AnnotateIntAttr"
,
{{
"block"
,
std
::
vector
<
Expr
>
({
block_b
})}},
{{
"key"
,
std
::
string
(
"k1"
)},
{
"value"
,
int
(
64
)}},
{}));
ir_sch
.
Annotate
(
block_b
,
"k1"
,
64
);
trace
.
Append
(
ScheduleDesc
::
Step
(
"AnnotateIntAttr"
,
{{
"block"
,
std
::
vector
<
Expr
>
({
block_b
})}},
{{
"key"
,
std
::
string
(
"k1"
)},
{
"value"
,
64
}},
{}));
block_b
=
ir_sch
.
GetBlock
(
"B"
);
trace
.
Append
(
ScheduleDesc
::
Step
(
"GetBlock"
,
{},
{{
"block_name"
,
std
::
string
(
"B"
)}},
{
block_b
}));
ir_sch
.
Annotate
(
block_b
,
"k2"
,
bool
(
true
));
trace
.
Append
(
ScheduleDesc
::
Step
(
"AnnotateBoolAttr"
,
{{
"block"
,
std
::
vector
<
Expr
>
({
block_b
})}},
{{
"key"
,
std
::
string
(
"k2"
)},
{
"value"
,
bool
(
true
)}},
{}));
ir_sch
.
Annotate
(
block_b
,
"k2"
,
true
);
trace
.
Append
(
ScheduleDesc
::
Step
(
"AnnotateBoolAttr"
,
{{
"block"
,
std
::
vector
<
Expr
>
({
block_b
})}},
{{
"key"
,
std
::
string
(
"k2"
)},
{
"value"
,
true
}},
{}));
block_b
=
ir_sch
.
GetBlock
(
"B"
);
trace
.
Append
(
ScheduleDesc
::
Step
(
...
...
paddle/cinn/lang/lower_impl.cc
浏览文件 @
3f5c2b5f
...
...
@@ -741,14 +741,15 @@ std::vector<Expr> LowerImpl::GenerateFunctionBody(
<<
"'s shape is : "
<<
utils
::
Join
(
tensor
->
shape
,
","
);
for
(
auto
&
expr
:
tensor
->
shape
)
{
CHECK
(
expr
.
is_constant
());
int_shape
.
push_back
(
(
int
)
expr
.
get_constant
(
));
int_shape
.
push_back
(
static_cast
<
int
>
(
expr
.
get_constant
()
));
}
for
(
auto
&
var
:
tensor
->
reduce_axis
)
{
CHECK
(
var
->
lower_bound
.
defined
());
CHECK
(
var
->
upper_bound
.
defined
());
CHECK
(
common
::
is_zero
(
var
->
lower_bound
));
CHECK
(
var
->
upper_bound
.
is_constant
());
int_shape
.
push_back
((
int
)
var
->
upper_bound
.
get_constant
());
int_shape
.
push_back
(
static_cast
<
int
>
(
var
->
upper_bound
.
get_constant
()));
}
// create block itervars, i0,i1...
std
::
vector
<
Var
>
block_vars
;
...
...
paddle/cinn/optim/compute_inline_expand.cc
浏览文件 @
3f5c2b5f
...
...
@@ -79,7 +79,7 @@ struct TensorInlineExpandMutator : public ir::IRMutator<> {
void
Visit
(
const
ir
::
For
*
op
,
Expr
*
expr
)
override
{
CHECK
(
op
->
extent
.
is_constant
());
int
cons_extent
=
(
int
)
op
->
extent
.
get_constant
(
);
int
cons_extent
=
static_cast
<
int
>
(
op
->
extent
.
get_constant
()
);
var_to_extent
[
op
->
loop_var
->
name
]
=
op
->
extent
;
ir
::
IRMutator
<>::
Visit
(
op
,
expr
);
}
...
...
paddle/cinn/optim/unroll_loops_test.cc
浏览文件 @
3f5c2b5f
...
...
@@ -69,7 +69,7 @@ TEST(UnrollLoops, auto_unroll) {
Expr
M
(
100
);
Expr
N
(
4
);
Expr
O
(
5
);
Expr
const_value
(
float
(
2.11
)
);
Expr
const_value
(
2.11
f
);
Placeholder
<
float
>
A
(
"A"
,
{
M
,
N
,
O
});
...
...
paddle/cinn/optim/vectorize_loops_test.cc
浏览文件 @
3f5c2b5f
...
...
@@ -274,7 +274,7 @@ TEST(Vectorize, cuda_vectorize_with_constant) {
Expr
M
(
100
);
Expr
N
(
500
);
Placeholder
<
float
>
A
(
"A"
,
{
M
,
N
});
Expr
const_value
(
float
(
2.11
)
);
Expr
const_value
(
2.11
f
);
Tensor
C
=
Compute
(
{
M
,
N
},
[
&
](
Var
i
,
Var
j
)
{
return
const_value
*
A
(
i
,
j
);
},
"C"
);
...
...
paddle/cinn/poly/stage.cc
浏览文件 @
3f5c2b5f
...
...
@@ -923,8 +923,8 @@ Iterator Stage::Fuse(const std::vector<Iterator> &levels) {
"%s = floor(%s / %d)"
,
levels
.
front
().
id
.
c_str
(),
new_iter_name
.
c_str
(),
(
int
)
std
::
accumulate
(
iterator_max_val
.
begin
()
+
1
,
iterator_max_val
.
end
(),
1
,
my_prod
)));
static_cast
<
int
>
(
std
::
accumulate
(
iterator_max_val
.
begin
()
+
1
,
iterator_max_val
.
end
(),
1
,
my_prod
)))
)
;
conds
.
emplace_back
(
utils
::
StringFormat
(
"%s = %s mod %d"
,
levels
.
back
().
id
.
c_str
(),
new_iter_name
.
c_str
(),
...
...
@@ -935,10 +935,10 @@ Iterator Stage::Fuse(const std::vector<Iterator> &levels) {
"%s = floor(%s / %d) mod %d"
,
levels
[
i
].
id
.
c_str
(),
new_iter_name
.
c_str
(),
(
int
)
std
::
accumulate
(
iterator_max_val
.
begin
()
+
i
+
1
,
iterator_max_val
.
end
(),
1
,
my_prod
),
static_cast
<
int
>
(
std
::
accumulate
(
iterator_max_val
.
begin
()
+
i
+
1
,
iterator_max_val
.
end
(),
1
,
my_prod
)
),
iterator_max_val
[
i
]));
}
...
...
paddle/cinn/runtime/cinn_runtime.cc
浏览文件 @
3f5c2b5f
...
...
@@ -122,7 +122,7 @@ cinn_buffer_t* cinn_buffer_new_default(int target,
fprintf
(
stderr
,
"Not supported device type"
);
abort
();
}
cinn_buffer_malloc
(
(
void
*
)
(
0
),
buf
);
cinn_buffer_malloc
(
reinterpret_cast
<
void
*>
(
0
),
buf
);
return
buf
;
}
...
...
paddle/cinn/runtime/cpu/host_intrinsics.cc
浏览文件 @
3f5c2b5f
...
...
@@ -31,8 +31,8 @@ extern "C" {
void
__cinn_host_tanh_v
(
const
cinn_buffer_t
*
x
,
cinn_buffer_t
*
out
)
{
CINN_CHECK_EQ
(
x
->
num_elements
(),
out
->
num_elements
());
int
xn
=
x
->
num_elements
();
auto
*
x_data
=
(
float
*
)
(
x
->
memory
);
auto
*
out_data
=
(
float
*
)
(
out
->
memory
);
auto
*
x_data
=
reinterpret_cast
<
float
*>
(
x
->
memory
);
auto
*
out_data
=
reinterpret_cast
<
float
*>
(
out
->
memory
);
for
(
int
i
=
0
;
i
<
x
->
num_elements
();
i
++
)
{
out_data
[
i
]
=
tanhf
(
x_data
[
i
]);
}
...
...
paddle/cinn/runtime/cuda/float16.h
浏览文件 @
3f5c2b5f
...
...
@@ -599,7 +599,7 @@ __host__ __device__ inline float16(abs)(const float16& a) {
#if defined(CINN_CUDA_FP16) && (defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 530)
return
float16
(
__habs
(
a
.
to_half
()));
#else
return
float16
(
fabsf
(
float
(
a
)));
return
float16
(
fabsf
(
static_cast
<
float
>
(
a
)));
#endif
}
...
...
paddle/cinn/runtime/tiny_runtime.cc
浏览文件 @
3f5c2b5f
...
...
@@ -62,13 +62,13 @@ void *load_program(const char *paramfile) {
return
nullptr
;
}
// TODO(hp03): check param file version
ctx
->
major_v
=
*
(
int
*
)
(
buf
+
4
);
ctx
->
minor_v
=
*
(
int
*
)
(
buf
+
8
);
ctx
->
major_v
=
*
reinterpret_cast
<
int
*>
(
buf
+
4
);
ctx
->
minor_v
=
*
reinterpret_cast
<
int
*>
(
buf
+
8
);
int
*
namelist_pos
=
(
int
*
)
(
buf
+
16
);
int
*
podvalue_pos
=
(
int
*
)
(
buf
+
*
namelist_pos
);
int
*
persistent_pos
=
(
int
*
)
(
buf
+
*
podvalue_pos
);
int
*
inst_pos
=
(
int
*
)
(
buf
+
*
persistent_pos
);
int
*
namelist_pos
=
reinterpret_cast
<
int
*>
(
buf
+
16
);
int
*
podvalue_pos
=
reinterpret_cast
<
int
*>
(
buf
+
*
namelist_pos
);
int
*
persistent_pos
=
reinterpret_cast
<
int
*>
(
buf
+
*
podvalue_pos
);
int
*
inst_pos
=
reinterpret_cast
<
int
*>
(
buf
+
*
persistent_pos
);
if
(
fsize
<
*
inst_pos
)
{
return
nullptr
;
}
...
...
@@ -78,11 +78,11 @@ void *load_program(const char *paramfile) {
std
::
map
<
std
::
string
,
int
>
name2index
;
for
(
int
i
=
0
;
i
<
namelen
;
i
++
)
{
int
offset
=
(
namelist_pos
+
2
)[
i
];
namev
[
i
]
=
(
char
*
)
(
buf
+
offset
);
namev
[
i
]
=
reinterpret_cast
<
char
*>
(
buf
+
offset
);
name2index
[
namev
[
i
]]
=
i
;
}
cinn_buffer_t
*
cb
=
(
cinn_buffer_t
*
)
(
buf
+
podvalue_pos
[
1
]);
cinn_buffer_t
*
cb
=
reinterpret_cast
<
cinn_buffer_t
*>
(
buf
+
podvalue_pos
[
1
]);
for
(
int
i
=
0
;
i
<
namelen
;
i
++
)
{
// currently only CPU device is supported, so just use malloc
if
(
cb
[
i
].
memory
)
{
...
...
@@ -107,9 +107,9 @@ void *load_program(const char *paramfile) {
int
instargc
=
inst_pos
[
2
+
i
*
3
+
1
];
ctx
->
inst_argc
.
push_back
(
instargc
);
cinn_pod_value_t
*
argv
=
(
cinn_pod_value_t
*
)
(
buf
+
inst_pos
[
2
+
i
*
3
+
2
]);
reinterpret_cast
<
cinn_pod_value_t
*>
(
buf
+
inst_pos
[
2
+
i
*
3
+
2
]);
for
(
int
i
=
0
;
i
<
instargc
;
i
++
)
{
int
idx
=
(
uintptr_t
)((
cinn_buffer_t
*
)
argv
[
i
]);
int
idx
=
(
uintptr_t
)((
cinn_buffer_t
*
)
(
argv
[
i
]));
// NOLINT
cinn_value_t
tmp_v
;
tmp_v
.
v_handle
=
&
cb
[
idx
];
argv
[
i
].
set_value
(
tmp_v
);
...
...
@@ -127,7 +127,7 @@ int set_maxconcurrency(int c) {
typedef
void
(
*
func_t
)(
cinn_pod_value_t
*
,
int
);
void
run_program
(
void
*
ctx
)
{
param_context_t
*
pc
=
(
param_context_t
*
)
ctx
;
param_context_t
*
pc
=
reinterpret_cast
<
param_context_t
*>
(
ctx
)
;
for
(
int
i
=
0
;
i
<
pc
->
instructions
.
size
();
i
++
)
{
const
char
*
sym
=
pc
->
instructions
[
i
].
c_str
();
void
*
p
=
dlsym
(
RTLD_DEFAULT
,
sym
);
...
...
@@ -137,7 +137,7 @@ void run_program(void *ctx) {
}
cinn_pod_value_t
*
get_pod_value
(
void
*
ctx
,
const
char
*
tname
)
{
param_context_t
*
pc
=
(
param_context_t
*
)
ctx
;
param_context_t
*
pc
=
reinterpret_cast
<
param_context_t
*>
(
ctx
)
;
if
(
pc
->
name2podvalue
.
find
(
tname
)
!=
pc
->
name2podvalue
.
end
())
{
return
&
pc
->
name2podvalue
[
tname
];
}
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录