Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
Paddle
提交
b702d2ae
P
Paddle
项目概览
PaddlePaddle
/
Paddle
接近 2 年 前同步成功
通知
2323
Star
20933
Fork
5424
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1423
列表
看板
标记
里程碑
合并请求
543
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1,423
Issue
1,423
列表
看板
标记
里程碑
合并请求
543
合并请求
543
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
b702d2ae
编写于
8月 29, 2023
作者:
G
gouzil
提交者:
GitHub
8月 29, 2023
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
[clang-tidy] NO.8 enable `cppcoreguidelines-narrowing-conversions`. step:1 (#56218)
上级
0236771e
变更
113
展开全部
隐藏空白更改
内联
并排
Showing
113 changed file
with
903 addition
and
751 deletion
+903
-751
paddle/phi/api/profiler/device_tracer.cc
paddle/phi/api/profiler/device_tracer.cc
+3
-3
paddle/phi/api/profiler/event.h
paddle/phi/api/profiler/event.h
+1
-1
paddle/phi/api/profiler/profiler.cc
paddle/phi/api/profiler/profiler.cc
+1
-1
paddle/phi/api/profiler/profiler.proto
paddle/phi/api/profiler/profiler.proto
+1
-1
paddle/phi/backends/cpu/cpu_info.cc
paddle/phi/backends/cpu/cpu_info.cc
+4
-2
paddle/phi/backends/device_manager.cc
paddle/phi/backends/device_manager.cc
+1
-1
paddle/phi/core/ddim.cc
paddle/phi/core/ddim.cc
+8
-8
paddle/phi/core/dense_tensor_impl.cc
paddle/phi/core/dense_tensor_impl.cc
+2
-2
paddle/phi/core/distributed/auto_parallel/device_mesh.cc
paddle/phi/core/distributed/auto_parallel/device_mesh.cc
+5
-5
paddle/phi/core/distributed/auto_parallel/device_mesh.h
paddle/phi/core/distributed/auto_parallel/device_mesh.h
+2
-2
paddle/phi/core/distributed/auto_parallel/dist_attr.cc
paddle/phi/core/distributed/auto_parallel/dist_attr.cc
+3
-3
paddle/phi/core/distributed/auto_parallel/dist_mapper.cc
paddle/phi/core/distributed/auto_parallel/dist_mapper.cc
+4
-4
paddle/phi/core/distributed/auto_parallel/process_mesh.cc
paddle/phi/core/distributed/auto_parallel/process_mesh.cc
+3
-3
paddle/phi/core/distributed/auto_parallel/reshard_utils.cc
paddle/phi/core/distributed/auto_parallel/reshard_utils.cc
+2
-2
paddle/phi/core/distributed/store/tcp_store.cc
paddle/phi/core/distributed/store/tcp_store.cc
+5
-3
paddle/phi/core/generator.cc
paddle/phi/core/generator.cc
+1
-1
paddle/phi/core/infermeta_utils.cc
paddle/phi/core/infermeta_utils.cc
+4
-4
paddle/phi/core/kernel_context.cc
paddle/phi/core/kernel_context.cc
+4
-4
paddle/phi/core/selected_rows_impl.cc
paddle/phi/core/selected_rows_impl.cc
+2
-2
paddle/phi/core/sparse_coo_tensor.cc
paddle/phi/core/sparse_coo_tensor.cc
+1
-1
paddle/phi/core/threadpool.cc
paddle/phi/core/threadpool.cc
+1
-1
paddle/phi/kernels/coalesce_tensor_kernel.cc
paddle/phi/kernels/coalesce_tensor_kernel.cc
+2
-2
paddle/phi/kernels/cpu/accuracy_kernel.cc
paddle/phi/kernels/cpu/accuracy_kernel.cc
+1
-1
paddle/phi/kernels/cpu/adagrad_kernel.cc
paddle/phi/kernels/cpu/adagrad_kernel.cc
+1
-1
paddle/phi/kernels/cpu/affine_grid_grad_kernel.cc
paddle/phi/kernels/cpu/affine_grid_grad_kernel.cc
+7
-7
paddle/phi/kernels/cpu/affine_grid_kernel.cc
paddle/phi/kernels/cpu/affine_grid_kernel.cc
+7
-7
paddle/phi/kernels/cpu/argsort_grad_kernel.cc
paddle/phi/kernels/cpu/argsort_grad_kernel.cc
+1
-1
paddle/phi/kernels/cpu/argsort_kernel.cc
paddle/phi/kernels/cpu/argsort_kernel.cc
+1
-1
paddle/phi/kernels/cpu/auc_kernel.cc
paddle/phi/kernels/cpu/auc_kernel.cc
+2
-2
paddle/phi/kernels/cpu/batch_norm_grad_kernel.cc
paddle/phi/kernels/cpu/batch_norm_grad_kernel.cc
+7
-7
paddle/phi/kernels/cpu/batch_norm_kernel.cc
paddle/phi/kernels/cpu/batch_norm_kernel.cc
+4
-4
paddle/phi/kernels/cpu/bce_loss_grad_kernel.cc
paddle/phi/kernels/cpu/bce_loss_grad_kernel.cc
+1
-1
paddle/phi/kernels/cpu/box_coder_kernel.cc
paddle/phi/kernels/cpu/box_coder_kernel.cc
+1
-1
paddle/phi/kernels/cpu/broadcast_tensors_grad_kernel.cc
paddle/phi/kernels/cpu/broadcast_tensors_grad_kernel.cc
+1
-1
paddle/phi/kernels/cpu/cholesky_kernel.cc
paddle/phi/kernels/cpu/cholesky_kernel.cc
+1
-1
paddle/phi/kernels/cpu/class_center_sample_kernel.cc
paddle/phi/kernels/cpu/class_center_sample_kernel.cc
+1
-1
paddle/phi/kernels/cpu/concat_kernel.cc
paddle/phi/kernels/cpu/concat_kernel.cc
+2
-2
paddle/phi/kernels/cpu/cross_entropy_grad_kernel.cc
paddle/phi/kernels/cpu/cross_entropy_grad_kernel.cc
+1
-1
paddle/phi/kernels/cpu/cross_entropy_kernel.cc
paddle/phi/kernels/cpu/cross_entropy_kernel.cc
+1
-1
paddle/phi/kernels/cpu/cross_grad_kernel.cc
paddle/phi/kernels/cpu/cross_grad_kernel.cc
+2
-2
paddle/phi/kernels/cpu/cross_kernel.cc
paddle/phi/kernels/cpu/cross_kernel.cc
+2
-2
paddle/phi/kernels/cpu/cum_kernel.cc
paddle/phi/kernels/cpu/cum_kernel.cc
+3
-3
paddle/phi/kernels/cpu/deformable_conv_grad_kernel.cc
paddle/phi/kernels/cpu/deformable_conv_grad_kernel.cc
+9
-5
paddle/phi/kernels/cpu/diagonal_grad_kernel.cc
paddle/phi/kernels/cpu/diagonal_grad_kernel.cc
+4
-2
paddle/phi/kernels/cpu/diagonal_kernel.cc
paddle/phi/kernels/cpu/diagonal_kernel.cc
+4
-2
paddle/phi/kernels/cpu/distribute_fpn_proposals_kernel.cc
paddle/phi/kernels/cpu/distribute_fpn_proposals_kernel.cc
+10
-7
paddle/phi/kernels/cpu/eig_grad_kernel.cc
paddle/phi/kernels/cpu/eig_grad_kernel.cc
+1
-1
paddle/phi/kernels/cpu/eig_kernel.cc
paddle/phi/kernels/cpu/eig_kernel.cc
+2
-2
paddle/phi/kernels/cpu/eigvals_kernel.cc
paddle/phi/kernels/cpu/eigvals_kernel.cc
+5
-5
paddle/phi/kernels/cpu/fill_diagonal_tensor_grad_kernel.cc
paddle/phi/kernels/cpu/fill_diagonal_tensor_grad_kernel.cc
+2
-2
paddle/phi/kernels/cpu/fill_diagonal_tensor_kernel.cc
paddle/phi/kernels/cpu/fill_diagonal_tensor_kernel.cc
+1
-1
paddle/phi/kernels/cpu/gather_tree_kernel.cc
paddle/phi/kernels/cpu/gather_tree_kernel.cc
+2
-2
paddle/phi/kernels/cpu/generate_proposals_kernel.cc
paddle/phi/kernels/cpu/generate_proposals_kernel.cc
+2
-2
paddle/phi/kernels/cpu/graph_reindex_kernel.cc
paddle/phi/kernels/cpu/graph_reindex_kernel.cc
+3
-3
paddle/phi/kernels/cpu/graph_sample_neighbors_kernel.cc
paddle/phi/kernels/cpu/graph_sample_neighbors_kernel.cc
+1
-1
paddle/phi/kernels/cpu/grid_sample_grad_kernel.cc
paddle/phi/kernels/cpu/grid_sample_grad_kernel.cc
+58
-58
paddle/phi/kernels/cpu/grid_sample_kernel.cc
paddle/phi/kernels/cpu/grid_sample_kernel.cc
+30
-30
paddle/phi/kernels/cpu/group_norm_grad_kernel.cc
paddle/phi/kernels/cpu/group_norm_grad_kernel.cc
+4
-4
paddle/phi/kernels/cpu/group_norm_kernel.cc
paddle/phi/kernels/cpu/group_norm_kernel.cc
+4
-4
paddle/phi/kernels/cpu/gumbel_softmax_kernel.cc
paddle/phi/kernels/cpu/gumbel_softmax_kernel.cc
+1
-1
paddle/phi/kernels/cpu/hsigmoid_loss_kernel.cc
paddle/phi/kernels/cpu/hsigmoid_loss_kernel.cc
+4
-3
paddle/phi/kernels/cpu/index_put_grad_kernel.cc
paddle/phi/kernels/cpu/index_put_grad_kernel.cc
+2
-1
paddle/phi/kernels/cpu/index_put_kernel.cc
paddle/phi/kernels/cpu/index_put_kernel.cc
+2
-1
paddle/phi/kernels/cpu/index_sample_grad_kernel.cc
paddle/phi/kernels/cpu/index_sample_grad_kernel.cc
+1
-1
paddle/phi/kernels/cpu/index_sample_kernel.cc
paddle/phi/kernels/cpu/index_sample_kernel.cc
+2
-2
paddle/phi/kernels/cpu/instance_norm_grad_kernel.cc
paddle/phi/kernels/cpu/instance_norm_grad_kernel.cc
+4
-4
paddle/phi/kernels/cpu/instance_norm_kernel.cc
paddle/phi/kernels/cpu/instance_norm_kernel.cc
+3
-3
paddle/phi/kernels/cpu/interpolate_grad_kernel.cc
paddle/phi/kernels/cpu/interpolate_grad_kernel.cc
+107
-66
paddle/phi/kernels/cpu/interpolate_kernel.cc
paddle/phi/kernels/cpu/interpolate_kernel.cc
+93
-63
paddle/phi/kernels/cpu/kthvalue_grad_kernel.cc
paddle/phi/kernels/cpu/kthvalue_grad_kernel.cc
+2
-2
paddle/phi/kernels/cpu/kthvalue_kernel.cc
paddle/phi/kernels/cpu/kthvalue_kernel.cc
+2
-2
paddle/phi/kernels/cpu/label_smooth_kernel.cc
paddle/phi/kernels/cpu/label_smooth_kernel.cc
+5
-3
paddle/phi/kernels/cpu/log_softmax_grad_kernel.cc
paddle/phi/kernels/cpu/log_softmax_grad_kernel.cc
+1
-1
paddle/phi/kernels/cpu/log_softmax_kernel.cc
paddle/phi/kernels/cpu/log_softmax_kernel.cc
+1
-1
paddle/phi/kernels/cpu/lstsq_kernel.cc
paddle/phi/kernels/cpu/lstsq_kernel.cc
+4
-4
paddle/phi/kernels/cpu/masked_select_grad_kernel.cc
paddle/phi/kernels/cpu/masked_select_grad_kernel.cc
+1
-1
paddle/phi/kernels/cpu/matrix_nms_kernel.cc
paddle/phi/kernels/cpu/matrix_nms_kernel.cc
+2
-2
paddle/phi/kernels/cpu/matrix_rank_tol_kernel.cc
paddle/phi/kernels/cpu/matrix_rank_tol_kernel.cc
+3
-4
paddle/phi/kernels/cpu/mode_grad_kernel.cc
paddle/phi/kernels/cpu/mode_grad_kernel.cc
+2
-2
paddle/phi/kernels/cpu/mode_kernel.cc
paddle/phi/kernels/cpu/mode_kernel.cc
+2
-2
paddle/phi/kernels/cpu/multiclass_nms3_kernel.cc
paddle/phi/kernels/cpu/multiclass_nms3_kernel.cc
+21
-18
paddle/phi/kernels/cpu/multinomial_kernel.cc
paddle/phi/kernels/cpu/multinomial_kernel.cc
+1
-1
paddle/phi/kernels/cpu/mv_grad_kernel.cc
paddle/phi/kernels/cpu/mv_grad_kernel.cc
+2
-2
paddle/phi/kernels/cpu/nanmedian_grad_kernel.cc
paddle/phi/kernels/cpu/nanmedian_grad_kernel.cc
+1
-1
paddle/phi/kernels/cpu/nanmedian_kernel.cc
paddle/phi/kernels/cpu/nanmedian_kernel.cc
+2
-2
paddle/phi/kernels/cpu/overlap_add_grad_kernel.cc
paddle/phi/kernels/cpu/overlap_add_grad_kernel.cc
+13
-8
paddle/phi/kernels/cpu/overlap_add_kernel.cc
paddle/phi/kernels/cpu/overlap_add_kernel.cc
+12
-7
paddle/phi/kernels/cpu/p_norm_grad_kernel.cc
paddle/phi/kernels/cpu/p_norm_grad_kernel.cc
+4
-4
paddle/phi/kernels/cpu/p_norm_kernel.cc
paddle/phi/kernels/cpu/p_norm_kernel.cc
+4
-4
paddle/phi/kernels/cpu/pad3d_grad_kernel.cc
paddle/phi/kernels/cpu/pad3d_grad_kernel.cc
+18
-18
paddle/phi/kernels/cpu/pad3d_kernel.cc
paddle/phi/kernels/cpu/pad3d_kernel.cc
+18
-18
paddle/phi/kernels/cpu/prelu_grad_kernel.cc
paddle/phi/kernels/cpu/prelu_grad_kernel.cc
+9
-9
paddle/phi/kernels/funcs/concat_and_split_functor.cc
paddle/phi/kernels/funcs/concat_and_split_functor.cc
+3
-3
paddle/phi/kernels/funcs/cross_entropy.cc
paddle/phi/kernels/funcs/cross_entropy.cc
+2
-2
paddle/phi/kernels/funcs/deformable_conv_functor.cc
paddle/phi/kernels/funcs/deformable_conv_functor.cc
+4
-2
paddle/phi/kernels/funcs/gather_scatter_functor.cc
paddle/phi/kernels/funcs/gather_scatter_functor.cc
+5
-5
paddle/phi/kernels/funcs/gpc.cc
paddle/phi/kernels/funcs/gpc.cc
+38
-30
paddle/phi/kernels/funcs/gpc.h
paddle/phi/kernels/funcs/gpc.h
+3
-2
paddle/phi/kernels/funcs/im2col.cc
paddle/phi/kernels/funcs/im2col.cc
+24
-24
paddle/phi/kernels/funcs/jit/gen_base.cc
paddle/phi/kernels/funcs/jit/gen_base.cc
+2
-1
paddle/phi/kernels/funcs/jit/helper.cc
paddle/phi/kernels/funcs/jit/helper.cc
+1
-1
paddle/phi/kernels/funcs/jit/kernel_key.cc
paddle/phi/kernels/funcs/jit/kernel_key.cc
+4
-4
paddle/phi/kernels/funcs/jit/more/intrinsic/layer_norm.cc
paddle/phi/kernels/funcs/jit/more/intrinsic/layer_norm.cc
+13
-12
paddle/phi/kernels/funcs/math_function.cc
paddle/phi/kernels/funcs/math_function.cc
+1
-1
paddle/phi/kernels/funcs/matrix_reduce.cc
paddle/phi/kernels/funcs/matrix_reduce.cc
+1
-1
paddle/phi/kernels/funcs/maxouting.cc
paddle/phi/kernels/funcs/maxouting.cc
+12
-8
paddle/phi/kernels/funcs/pooling.cc
paddle/phi/kernels/funcs/pooling.cc
+151
-124
paddle/phi/kernels/funcs/segment_pooling.cc
paddle/phi/kernels/funcs/segment_pooling.cc
+1
-1
paddle/phi/kernels/funcs/selected_rows_functor.cc
paddle/phi/kernels/funcs/selected_rows_functor.cc
+7
-4
paddle/phi/kernels/funcs/sequence_padding.cc
paddle/phi/kernels/funcs/sequence_padding.cc
+8
-7
paddle/phi/kernels/funcs/sequence_pooling.cc
paddle/phi/kernels/funcs/sequence_pooling.cc
+4
-4
paddle/phi/kernels/funcs/vol2col.cc
paddle/phi/kernels/funcs/vol2col.cc
+19
-19
paddle/utils/string/string_helper.cc
paddle/utils/string/string_helper.cc
+2
-1
未找到文件。
paddle/phi/api/profiler/device_tracer.cc
浏览文件 @
b702d2ae
...
...
@@ -797,8 +797,8 @@ void ClearCurAnnotation() {
if
(
!
main_thread_annotation_stack
.
empty
())
{
std
::
string
name
=
annotation_stack
.
back
()
->
name
();
std
::
string
main_name
=
main_thread_annotation_stack
.
back
()
->
name
();
int
main_name_len
=
main_name
.
length
(
);
int
name_len
=
name
.
length
(
);
int
main_name_len
=
static_cast
<
int
>
(
main_name
.
length
()
);
int
name_len
=
static_cast
<
int
>
(
name
.
length
()
);
int
prefix_len
=
main_name_len
-
name_len
;
if
((
prefix_len
>
0
&&
main_name
.
at
(
prefix_len
-
1
)
==
'/'
&&
...
...
@@ -825,7 +825,7 @@ void SetCurBlock(int block_id) { block_id_stack.push_back(block_id); }
void
ClearCurBlock
()
{
block_id_stack
.
pop_back
();
}
int
BlockDepth
()
{
return
block_id_stack
.
size
(
);
}
int
BlockDepth
()
{
return
static_cast
<
int
>
(
block_id_stack
.
size
()
);
}
uint32_t
GetCurSystemThreadId
()
{
std
::
stringstream
ss
;
...
...
paddle/phi/api/profiler/event.h
浏览文件 @
b702d2ae
...
...
@@ -78,7 +78,7 @@ class Event {
Event
*
parent_
{
nullptr
};
uint64_t
thread_id_
;
EventRole
role_
{};
int64_t
cpu_ns_
;
u
int64_t
cpu_ns_
;
bool
visited_status_
{
false
};
std
::
string
attr_
;
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
...
...
paddle/phi/api/profiler/profiler.cc
浏览文件 @
b702d2ae
...
...
@@ -72,7 +72,7 @@ Event::Event(EventType type,
const
EventType
&
Event
::
type
()
const
{
return
type_
;
}
double
Event
::
CpuElapsedMs
(
const
Event
&
e
)
const
{
return
(
e
.
cpu_ns_
-
cpu_ns_
)
/
(
1000000.0
);
return
(
static_cast
<
double
>
(
e
.
cpu_ns_
-
cpu_ns_
)
)
/
(
1000000.0
);
}
double
Event
::
CudaElapsedMs
(
const
Event
&
e
)
const
{
...
...
paddle/phi/api/profiler/profiler.proto
浏览文件 @
b702d2ae
...
...
@@ -29,7 +29,7 @@ message Event {
optional
uint64
end_ns
=
3
;
// When positive, it represents gpu id. When -1, it represents CPU.
optional
int64
device_id
=
5
;
optional
int64
sub_device_id
=
6
;
optional
u
int64
sub_device_id
=
6
;
optional
MemCopy
memcopy
=
7
;
optional
string
detail_info
=
9
;
...
...
paddle/phi/backends/cpu/cpu_info.cc
浏览文件 @
b702d2ae
...
...
@@ -78,7 +78,8 @@ size_t CpuTotalPhysicalMemory() {
size_t
CpuMaxAllocSize
()
{
// For distributed systems, it requires configuring and limiting
// the fraction of memory to use.
return
FLAGS_fraction_of_cpu_memory_to_use
*
CpuTotalPhysicalMemory
();
return
static_cast
<
size_t
>
(
FLAGS_fraction_of_cpu_memory_to_use
*
static_cast
<
double
>
(
CpuTotalPhysicalMemory
()));
}
size_t
CpuMaxChunkSize
()
{
...
...
@@ -97,7 +98,8 @@ size_t CpuMinChunkSize() {
size_t
CUDAPinnedMaxAllocSize
()
{
// For distributed systems, it requires configuring and limiting
// the fraction of memory to use.
return
FLAGS_fraction_of_cuda_pinned_memory_to_use
*
CpuTotalPhysicalMemory
();
return
static_cast
<
size_t
>
(
FLAGS_fraction_of_cuda_pinned_memory_to_use
*
static_cast
<
double
>
(
CpuTotalPhysicalMemory
()));
}
size_t
CUDAPinnedMinChunkSize
()
{
...
...
paddle/phi/backends/device_manager.cc
浏览文件 @
b702d2ae
...
...
@@ -491,7 +491,7 @@ std::vector<size_t> DeviceManager::GetSelectedDeviceList(
device_list
.
push_back
(
atoi
(
id
.
c_str
()));
}
}
else
{
int
count
=
DeviceManager
::
GetDeviceCount
(
device_type
);
int
count
=
static_cast
<
int
>
(
DeviceManager
::
GetDeviceCount
(
device_type
)
);
for
(
int
i
=
0
;
i
<
count
;
++
i
)
{
device_list
.
push_back
(
i
);
}
...
...
paddle/phi/core/ddim.cc
浏览文件 @
b702d2ae
...
...
@@ -19,15 +19,15 @@
namespace
phi
{
DDim
make_ddim
(
std
::
initializer_list
<
int64_t
>
dims
)
{
return
DDim
(
dims
.
begin
(),
dims
.
size
(
));
return
DDim
(
dims
.
begin
(),
static_cast
<
int
>
(
dims
.
size
()
));
}
DDim
make_ddim
(
const
std
::
vector
<
int64_t
>&
dims
)
{
return
DDim
(
dims
.
data
(),
dims
.
size
(
));
return
DDim
(
dims
.
data
(),
static_cast
<
int
>
(
dims
.
size
()
));
}
DDim
make_ddim
(
const
std
::
vector
<
int
>&
dims
)
{
return
DDim
(
dims
.
data
(),
dims
.
size
(
));
return
DDim
(
dims
.
data
(),
static_cast
<
int
>
(
dims
.
size
()
));
}
struct
DDimEqualityVisitor
{
...
...
@@ -186,19 +186,19 @@ DDim stride_numel(const DDim& ddim) {
DDim
DDim
::
reshape
(
std
::
vector
<
int
>&
shape
)
const
{
const
DDim
&
in_dims
=
*
this
;
for
(
uint64_t
i
=
0
;
i
<
shape
.
size
(
);
++
i
)
{
for
(
int
i
=
0
;
i
<
static_cast
<
int
>
(
shape
.
size
()
);
++
i
)
{
if
(
shape
[
i
]
==
0
)
{
shape
[
i
]
=
in_dims
.
at
(
i
);
shape
[
i
]
=
static_cast
<
int
>
(
in_dims
.
at
(
i
)
);
}
}
// Dim marked as "-1" must be inferred
auto
it
=
std
::
find
(
shape
.
begin
(),
shape
.
end
(),
-
1
);
if
(
it
!=
shape
.
end
())
{
int
index
=
st
d
::
distance
(
shape
.
begin
(),
it
);
int
index
=
st
atic_cast
<
int
>
(
std
::
distance
(
shape
.
begin
(),
it
)
);
int
reshape_out_product
=
std
::
accumulate
(
shape
.
begin
(),
shape
.
end
(),
-
1
,
std
::
multiplies
<
int
>
());
shape
[
index
]
=
product
(
in_dims
)
/
reshape_out_product
;
shape
[
index
]
=
static_cast
<
int
>
(
product
(
in_dims
)
)
/
reshape_out_product
;
}
return
phi
::
make_ddim
(
shape
);
...
...
@@ -208,7 +208,7 @@ DDim DDim::transpose(const std::vector<int>& axis) const {
const
DDim
&
in_dims
=
*
this
;
DDim
out_dims
(
in_dims
);
for
(
size_t
i
=
0
;
i
<
axis
.
size
(
);
i
++
)
{
for
(
int
i
=
0
;
i
<
static_cast
<
int
>
(
axis
.
size
()
);
i
++
)
{
out_dims
[
i
]
=
in_dims
[
axis
[
i
]];
}
return
out_dims
;
...
...
paddle/phi/core/dense_tensor_impl.cc
浏览文件 @
b702d2ae
...
...
@@ -340,7 +340,7 @@ std::vector<DenseTensor> DenseTensor::Split(int64_t split_size,
"split expects split_size be non-negative, but got split_size is %d"
,
split_size
));
int64_t
numel_size
=
meta_
.
dims
[
axis
];
int64_t
numel_size
=
meta_
.
dims
[
static_cast
<
int
>
(
axis
)
];
int64_t
num_splits
=
1
;
if
(
split_size
!=
0
)
{
...
...
@@ -371,7 +371,7 @@ std::vector<DenseTensor> DenseTensor::Chunk(int64_t chunks,
phi
::
errors
::
OutOfRange
(
"chunks expects to be greater than 0, but got chunks is %d"
,
chunks
));
int64_t
numel_size
=
meta_
.
dims
[
axis
];
int64_t
numel_size
=
meta_
.
dims
[
static_cast
<
int
>
(
axis
)
];
int64_t
split_size
=
(
numel_size
+
chunks
-
1
)
/
chunks
;
return
Split
(
split_size
,
axis
);
}
...
...
paddle/phi/core/distributed/auto_parallel/device_mesh.cc
浏览文件 @
b702d2ae
...
...
@@ -330,25 +330,25 @@ DeviceMesh DeviceMesh::from_proto(const DeviceMeshProto &proto) {
mesh
.
name_
=
proto
.
name
();
mesh
.
shape_
.
resize
(
proto
.
shape_size
());
for
(
int
64_t
i
=
0
;
i
<
proto
.
shape_size
();
++
i
)
{
for
(
int
i
=
0
;
i
<
proto
.
shape_size
();
++
i
)
{
mesh
.
shape_
[
i
]
=
proto
.
shape
(
i
);
}
mesh
.
device_ids_
.
resize
(
proto
.
device_ids_size
());
for
(
int
64_t
i
=
0
;
i
<
proto
.
device_ids_size
();
++
i
)
{
for
(
int
i
=
0
;
i
<
proto
.
device_ids_size
();
++
i
)
{
mesh
.
device_ids_
[
i
]
=
proto
.
device_ids
(
i
);
}
mesh
.
dim_names_
.
resize
(
proto
.
dim_names_size
());
for
(
int
64_t
i
=
0
;
i
<
proto
.
dim_names_size
();
++
i
)
{
for
(
int
i
=
0
;
i
<
proto
.
dim_names_size
();
++
i
)
{
mesh
.
dim_names_
[
i
]
=
proto
.
dim_names
(
i
);
}
for
(
int
64_t
i
=
0
;
i
<
proto
.
devices_size
();
++
i
)
{
for
(
int
i
=
0
;
i
<
proto
.
devices_size
();
++
i
)
{
mesh
.
add_device
(
Device
::
from_proto
(
proto
.
devices
(
i
)));
}
for
(
int
64_t
i
=
0
;
i
<
proto
.
links_size
();
++
i
)
{
for
(
int
i
=
0
;
i
<
proto
.
links_size
();
++
i
)
{
mesh
.
add_link
(
Link
::
from_proto
(
proto
.
links
(
i
)));
}
...
...
paddle/phi/core/distributed/auto_parallel/device_mesh.h
浏览文件 @
b702d2ae
...
...
@@ -96,8 +96,8 @@ inline bool operator!=(const Device& lhs, const Device& rhs) {
}
struct
LinkCapability
{
double
bandwidth
=
0.0
;
// Bytes/s
double
latency
=
0.0
;
int64_t
bandwidth
=
0.0
;
// Bytes/s
int64_t
latency
=
0.0
;
// LinkCapability from_string(const std::string& str);
std
::
string
to_string
()
const
;
...
...
paddle/phi/core/distributed/auto_parallel/dist_attr.cc
浏览文件 @
b702d2ae
...
...
@@ -186,7 +186,7 @@ bool TensorDistAttr::verify_dims_mapping(
bool
TensorDistAttr
::
verify_batch_dim
(
int64_t
dim
,
const
std
::
vector
<
int64_t
>&
tensor_shape
)
const
{
VLOG
(
4
)
<<
"[TensorDistAttr verify_batch_dim] "
<<
dim
;
int64_t
ndim
=
tensor_shape
.
size
(
);
int64_t
ndim
=
static_cast
<
int64_t
>
(
tensor_shape
.
size
()
);
if
(
ndim
>
0
)
{
if
(
dim
<
0
)
{
dim
=
dim
+
ndim
;
...
...
@@ -270,12 +270,12 @@ std::string TensorDistAttr::to_string() const {
void
TensorDistAttr
::
from_proto
(
const
TensorDistAttrProto
&
proto
)
{
process_mesh_
=
ProcessMesh
::
from_proto
(
proto
.
process_mesh
());
dims_mapping_
.
resize
(
proto
.
dims_mapping_size
());
for
(
int
64_t
i
=
0
;
i
<
proto
.
dims_mapping_size
();
++
i
)
{
for
(
int
i
=
0
;
i
<
proto
.
dims_mapping_size
();
++
i
)
{
dims_mapping_
[
i
]
=
proto
.
dims_mapping
(
i
);
}
batch_dim_
=
proto
.
batch_dim
();
dynamic_dims_
.
resize
(
proto
.
dynamic_dims_size
());
for
(
int
64_t
i
=
0
;
i
<
proto
.
dynamic_dims_size
();
++
i
)
{
for
(
int
i
=
0
;
i
<
proto
.
dynamic_dims_size
();
++
i
)
{
dynamic_dims_
[
i
]
=
proto
.
dynamic_dims
(
i
);
}
}
...
...
paddle/phi/core/distributed/auto_parallel/dist_mapper.cc
浏览文件 @
b702d2ae
...
...
@@ -72,17 +72,17 @@ void DistributedMapper::set_process_id_to_device_ids(
DistributedMapper
DistributedMapper
::
from_proto
(
const
DistributedMapperProto
&
proto
)
{
DistributedMapper
dist_mapper
;
for
(
int
64_t
i
=
0
;
i
<
proto
.
device_meshes_size
();
++
i
)
{
for
(
int
i
=
0
;
i
<
proto
.
device_meshes_size
();
++
i
)
{
dist_mapper
.
device_meshes_
[
proto
.
device_meshes
(
i
).
name
()]
=
DeviceMesh
::
from_proto
(
proto
.
device_meshes
(
i
));
}
for
(
int
64_t
i
=
0
;
i
<
proto
.
process_id_to_device_ids_size
();
++
i
)
{
for
(
int
i
=
0
;
i
<
proto
.
process_id_to_device_ids_size
();
++
i
)
{
int64_t
process_id
=
proto
.
process_id_to_device_ids
(
i
).
process_id
();
std
::
string
device_mesh_name
=
proto
.
process_id_to_device_ids
(
i
).
device_mesh_name
();
std
::
vector
<
int64_t
>
device_ids
;
int
64_t
num_devices
=
proto
.
process_id_to_device_ids
(
i
).
device_ids_size
();
for
(
int
64_t
j
=
0
;
j
<
num_devices
;
++
j
)
{
int
num_devices
=
proto
.
process_id_to_device_ids
(
i
).
device_ids_size
();
for
(
int
j
=
0
;
j
<
num_devices
;
++
j
)
{
device_ids
.
push_back
(
proto
.
process_id_to_device_ids
(
i
).
device_ids
(
j
));
}
dist_mapper
.
process_id_to_device_ids_
[
process_id
].
first
=
device_mesh_name
;
...
...
paddle/phi/core/distributed/auto_parallel/process_mesh.cc
浏览文件 @
b702d2ae
...
...
@@ -88,17 +88,17 @@ ProcessMesh ProcessMesh::from_proto(const ProcessMeshProto &proto) {
ProcessMesh
mesh
;
mesh
.
shape_
.
resize
(
proto
.
shape_size
());
for
(
int
64_t
i
=
0
;
i
<
proto
.
shape_size
();
++
i
)
{
for
(
int
i
=
0
;
i
<
proto
.
shape_size
();
++
i
)
{
mesh
.
shape_
[
i
]
=
proto
.
shape
(
i
);
}
mesh
.
process_ids_
.
resize
(
proto
.
process_ids_size
());
for
(
int
64_t
i
=
0
;
i
<
proto
.
process_ids_size
();
++
i
)
{
for
(
int
i
=
0
;
i
<
proto
.
process_ids_size
();
++
i
)
{
mesh
.
process_ids_
[
i
]
=
proto
.
process_ids
(
i
);
}
mesh
.
dim_names_
.
resize
(
proto
.
dim_names_size
());
for
(
int
64_t
i
=
0
;
i
<
proto
.
dim_names_size
();
++
i
)
{
for
(
int
i
=
0
;
i
<
proto
.
dim_names_size
();
++
i
)
{
mesh
.
dim_names_
[
i
]
=
proto
.
dim_names
(
i
);
}
...
...
paddle/phi/core/distributed/auto_parallel/reshard_utils.cc
浏览文件 @
b702d2ae
...
...
@@ -46,7 +46,7 @@ bool IsDimsMappingReplicated(const std::vector<int64_t>& dims_mapping) {
std
::
vector
<
int64_t
>
GetCurRankCoordInMesh
(
const
ProcessMesh
&
process_mesh
)
{
const
auto
&
process_shape
=
process_mesh
.
shape
();
const
auto
&
process_ids
=
process_mesh
.
process_ids
();
int64_t
ndims_mesh
=
process_shape
.
size
(
);
int64_t
ndims_mesh
=
static_cast
<
int64_t
>
(
process_shape
.
size
()
);
int64_t
cur_global_rank
=
GetCurGlobalRank
();
VLOG
(
3
)
<<
"Searching current global rank "
<<
cur_global_rank
...
...
@@ -162,7 +162,7 @@ CommContext* CreateOrGetCommContext(const DeviceContext& dev_ctx,
std
::
string
unique_comm_key
=
GenUniqueCommKey
(
process_ids
);
if
(
!
CommContextManager
::
GetInstance
().
Has
(
unique_comm_key
))
{
int64_t
world_size
=
process_ids
.
size
(
);
int64_t
world_size
=
static_cast
<
int64_t
>
(
process_ids
.
size
()
);
int64_t
rank
=
GetLocalRankInParticipate
(
process_ids
);
VLOG
(
3
)
<<
"local world size: "
<<
world_size
<<
" local rank: "
<<
rank
;
...
...
paddle/phi/core/distributed/store/tcp_store.cc
浏览文件 @
b702d2ae
...
...
@@ -172,7 +172,7 @@ void MasterDaemon::ProcessCommands(std::vector<struct pollfd>* p_fds) {
for
(
size_t
i
=
1
;
i
<
fds
.
size
();
i
++
)
{
#else
// 0: listen socket, 1:controller pipe, so loop from 2.
for
(
size_
t
i
=
2
;
i
<
fds
.
size
();
i
++
)
{
for
(
uin
t
i
=
2
;
i
<
fds
.
size
();
i
++
)
{
#endif
try
{
if
(
fds
[
i
].
revents
==
0
)
{
...
...
@@ -345,14 +345,16 @@ TCPStore::TCPStore(std::string host,
bool
is_master
,
size_t
num_workers
,
int
timeout
)
:
Store
(
timeout
),
_is_master
(
is_master
),
_num_workers
(
num_workers
)
{
:
Store
(
timeout
),
_is_master
(
is_master
),
_num_workers
(
static_cast
<
int
>
(
num_workers
))
{
_timeout
=
timeout
;
PADDLE_ENFORCE_GT
(
timeout
,
0
,
phi
::
errors
::
InvalidArgument
(
"timeout must >= %d"
,
timeout
));
VLOG
(
3
)
<<
"input timeout"
<<
timeout
<<
", member timeout:"
<<
_timeout
;
if
(
_is_master
)
{
_server
=
detail
::
TCPServer
::
create
(
port
,
num_workers
,
timeout
);
_server
=
detail
::
TCPServer
::
create
(
port
,
this
->
_
num_workers
,
timeout
);
}
_client
=
detail
::
TCPClient
::
connect
(
host
,
port
);
...
...
paddle/phi/core/generator.cc
浏览文件 @
b702d2ae
...
...
@@ -205,7 +205,7 @@ Generator::Generator(uint64_t seed, uint64_t device_id) {
std
::
seed_seq
seq
({
seed
});
auto
engine
=
std
::
make_shared
<
std
::
mt19937_64
>
(
seq
);
this
->
state_
.
cpu_engine
=
*
engine
;
this
->
state_
.
device
=
device_id
;
this
->
state_
.
device
=
static_cast
<
int64_t
>
(
device_id
)
;
this
->
state_
.
current_seed
=
seed
;
this
->
state_
.
thread_offset
=
0
;
this
->
engine_
=
engine
;
...
...
paddle/phi/core/infermeta_utils.cc
浏览文件 @
b702d2ae
...
...
@@ -21,12 +21,12 @@ void InferMetaContext::SetMetaConfig(MetaConfig config) {
}
void
InferMetaContext
::
EmplaceBackInput
(
MetaTensor
input
)
{
int
index
=
inputs_
.
size
(
);
int
index
=
static_cast
<
int
>
(
inputs_
.
size
()
);
inputs_
.
emplace_back
(
std
::
move
(
input
));
input_range_
.
emplace_back
(
std
::
pair
<
int
,
int
>
(
index
,
index
+
1
));
}
void
InferMetaContext
::
EmplaceBackOutput
(
MetaTensor
output
)
{
int
index
=
outputs_
.
size
(
);
int
index
=
static_cast
<
int
>
(
outputs_
.
size
()
);
outputs_
.
emplace_back
(
std
::
move
(
output
));
output_range_
.
emplace_back
(
std
::
pair
<
int
,
int
>
(
index
,
index
+
1
));
}
...
...
@@ -36,7 +36,7 @@ void InferMetaContext::EmplaceBackAttr(Attribute attr) {
void
InferMetaContext
::
EmplaceBackInputs
(
paddle
::
small_vector
<
MetaTensor
,
phi
::
kInputSmallVectorSize
>
inputs
)
{
int
index
=
inputs_
.
size
(
);
int
index
=
static_cast
<
int
>
(
inputs_
.
size
()
);
input_range_
.
emplace_back
(
std
::
pair
<
int
,
int
>
(
index
,
index
+
inputs
.
size
()));
inputs_
.
insert
(
inputs_
.
end
(),
std
::
make_move_iterator
(
inputs
.
begin
()),
...
...
@@ -44,7 +44,7 @@ void InferMetaContext::EmplaceBackInputs(
}
void
InferMetaContext
::
EmplaceBackOutputs
(
paddle
::
small_vector
<
MetaTensor
,
phi
::
kOutputSmallVectorSize
>
outputs
)
{
int
index
=
outputs_
.
size
(
);
int
index
=
static_cast
<
int
>
(
outputs_
.
size
()
);
output_range_
.
emplace_back
(
std
::
pair
<
int
,
int
>
(
index
,
index
+
outputs
.
size
()));
outputs_
.
insert
(
outputs_
.
end
(),
...
...
paddle/phi/core/kernel_context.cc
浏览文件 @
b702d2ae
...
...
@@ -17,7 +17,7 @@
namespace
phi
{
void
KernelContext
::
EmplaceBackInput
(
const
TensorBase
*
input
)
{
int
index
=
inputs_
.
size
(
);
int
index
=
static_cast
<
int
>
(
inputs_
.
size
()
);
inputs_
.
emplace_back
(
input
);
// Record the start and end index of the input
input_range_
.
emplace_back
(
std
::
pair
<
int
,
int
>
(
index
,
index
+
1
));
...
...
@@ -29,7 +29,7 @@ void KernelContext::EmplaceBackInputWithoutSetRange(const TensorBase* input) {
void
KernelContext
::
EmplaceBackInputs
(
paddle
::
small_vector
<
const
TensorBase
*>
inputs
)
{
int
index
=
inputs_
.
size
(
);
int
index
=
static_cast
<
int
>
(
inputs_
.
size
()
);
// Record the start and end index of the input
input_range_
.
emplace_back
(
std
::
pair
<
int
,
int
>
(
index
,
index
+
inputs
.
size
()));
inputs_
.
insert
(
inputs_
.
end
(),
...
...
@@ -45,7 +45,7 @@ void KernelContext::EmplaceBackInputsWithoutSetRange(
}
void
KernelContext
::
EmplaceBackOutput
(
TensorBase
*
output
)
{
int
index
=
outputs_
.
size
(
);
int
index
=
static_cast
<
int
>
(
outputs_
.
size
()
);
outputs_
.
emplace_back
(
output
);
// Record the start and end index of the input
output_range_
.
emplace_back
(
std
::
pair
<
int
,
int
>
(
index
,
index
+
1
));
...
...
@@ -57,7 +57,7 @@ void KernelContext::EmplaceBackOutputWithoutSetRange(TensorBase* output) {
void
KernelContext
::
EmplaceBackOutputs
(
paddle
::
small_vector
<
TensorBase
*>
outputs
)
{
int
index
=
outputs_
.
size
(
);
int
index
=
static_cast
<
int
>
(
outputs_
.
size
()
);
// Record the start and end index of the input
output_range_
.
emplace_back
(
std
::
pair
<
int
,
int
>
(
index
,
index
+
outputs
.
size
()));
...
...
paddle/phi/core/selected_rows_impl.cc
浏览文件 @
b702d2ae
...
...
@@ -136,7 +136,7 @@ int64_t SelectedRowsImpl::AutoGrownIndex(int64_t key,
}
auto
write_iter
=
id_to_index_
.
find
(
key
);
if
(
write_iter
==
id_to_index_
.
end
())
{
int
row_num
=
rows_
.
size
(
);
int
row_num
=
static_cast
<
int
>
(
rows_
.
size
()
);
if
(
row_num
==
value_
->
dims
()[
0
])
{
rwlock_
->
UNLock
();
PADDLE_THROW
(
phi
::
errors
::
InvalidArgument
(
...
...
@@ -165,7 +165,7 @@ int64_t SelectedRowsImpl::AutoGrownIndex(int64_t key,
void
SelectedRowsImpl
::
SyncIndex
()
{
rwlock_
->
WRLock
();
id_to_index_
.
clear
();
for
(
size_t
i
=
0
;
i
<
rows_
.
size
(
);
++
i
)
{
for
(
int
i
=
0
;
i
<
static_cast
<
int
>
(
rows_
.
size
()
);
++
i
)
{
id_to_index_
[
rows_
[
i
]]
=
i
;
}
rwlock_
->
UNLock
();
...
...
paddle/phi/core/sparse_coo_tensor.cc
浏览文件 @
b702d2ae
...
...
@@ -147,7 +147,7 @@ void SparseCooTensor::SetMember(const DenseTensor& non_zero_indices,
}
int32_t
SparseCooTensor
::
sparse_dim
()
const
{
return
non_zero_indices_
.
dims
()[
0
]
;
return
static_cast
<
int32_t
>
(
non_zero_indices_
.
dims
()[
0
])
;
}
int32_t
SparseCooTensor
::
dense_dim
()
const
{
...
...
paddle/phi/core/threadpool.cc
浏览文件 @
b702d2ae
...
...
@@ -38,7 +38,7 @@ ThreadPool* ThreadPool::GetInstance() {
void
ThreadPool
::
Init
()
{
if
(
threadpool_
.
get
()
==
nullptr
)
{
// TODO(Yancey1989): specify the max threads number
int
num_threads
=
st
d
::
thread
::
hardware_concurrency
(
);
int
num_threads
=
st
atic_cast
<
int
>
(
std
::
thread
::
hardware_concurrency
()
);
if
(
FLAGS_dist_threadpool_size
>
0
)
{
num_threads
=
FLAGS_dist_threadpool_size
;
VLOG
(
1
)
<<
"set dist_threadpool_size to "
<<
num_threads
;
...
...
paddle/phi/kernels/coalesce_tensor_kernel.cc
浏览文件 @
b702d2ae
...
...
@@ -143,7 +143,7 @@ void CoalesceTensorKernel(const Context &dev_ctx,
int64_t
accumulated_ranks
=
0
;
for
(
size_t
i
=
0
;
i
<
input
.
size
();
++
i
)
{
phi
::
DDim
dims
(
concated_shapes
.
data
()
+
accumulated_ranks
,
concated_ranks
[
i
]
);
static_cast
<
int
>
(
concated_ranks
[
i
])
);
if
(
!
input
[
i
]
->
initialized
())
{
PADDLE_ENFORCE_EQ
(
input
[
i
],
...
...
@@ -187,7 +187,7 @@ void CoalesceTensorKernel(const Context &dev_ctx,
size_t
numel
=
0
;
if
(
size_of_dtype
==
-
1
)
{
size_of_dtype
=
phi
::
SizeOf
(
dtype
);
size_of_dtype
=
static_cast
<
int
>
(
phi
::
SizeOf
(
dtype
)
);
}
GetMemSizeAndDtype
(
input
,
&
numel
,
size_of_dtype
,
dev_ctx
.
GetPlace
(),
use_align
,
align_size
);
...
...
paddle/phi/kernels/cpu/accuracy_kernel.cc
浏览文件 @
b702d2ae
...
...
@@ -85,7 +85,7 @@ void AccuracyKernel(const Context& dev_ctx,
}
*
correct_data
=
num_correct
;
*
total_data
=
num_samples
;
*
total_data
=
static_cast
<
int
>
(
num_samples
)
;
*
accuracy_data
=
static_cast
<
float
>
(
num_correct
)
/
static_cast
<
float
>
(
num_samples
);
}
...
...
paddle/phi/kernels/cpu/adagrad_kernel.cc
浏览文件 @
b702d2ae
...
...
@@ -57,7 +57,7 @@ struct DenseAdagradFunctor<phi::CPUContext, T> {
auto
place
=
*
ctx
.
eigen_device
();
moment_out
.
device
(
place
)
=
moment
+
grad
*
grad
;
Eigen
::
DSizes
<
int
,
1
>
m_dsize
(
moment_out_tensor
->
numel
(
));
Eigen
::
DSizes
<
int
,
1
>
m_dsize
(
static_cast
<
int
>
(
moment_out_tensor
->
numel
()
));
auto
*
lr
=
learning_rate
.
data
<
T
>
();
param_out
.
device
(
place
)
=
param
-
lr
[
0
]
*
grad
/
(
moment_out
.
sqrt
()
+
epsilon
);
...
...
paddle/phi/kernels/cpu/affine_grid_grad_kernel.cc
浏览文件 @
b702d2ae
...
...
@@ -49,12 +49,12 @@ void AffineGridGrad4DKernel(const Context& dev_ctx,
bool
align_corners
,
DenseTensor
*
input_grad
)
{
auto
&
theta_grad
=
input_grad
;
int
n
=
output_grad
.
dims
()[
0
]
;
int
n
=
static_cast
<
int
>
(
output_grad
.
dims
()[
0
])
;
auto
&
size_attr
=
outputShape
.
GetData
();
int
h
=
0
;
int
w
=
0
;
h
=
s
ize_attr
[
2
]
;
w
=
s
ize_attr
[
3
]
;
h
=
s
tatic_cast
<
int
>
(
size_attr
[
2
])
;
w
=
s
tatic_cast
<
int
>
(
size_attr
[
3
])
;
theta_grad
->
Resize
(
phi
::
make_ddim
({
n
,
2
,
3
}));
dev_ctx
.
template
Alloc
<
T
>(
theta_grad
);
phi
::
funcs
::
SetConstant
<
Context
,
T
>
()(
dev_ctx
,
theta_grad
,
static_cast
<
T
>
(
0
));
...
...
@@ -86,14 +86,14 @@ void AffineGridGrad5DKernel(const Context& dev_ctx,
bool
align_corners
,
DenseTensor
*
input_grad
)
{
auto
&
theta_grad
=
input_grad
;
int
n
=
output_grad
.
dims
()[
0
]
;
int
n
=
static_cast
<
int
>
(
output_grad
.
dims
()[
0
])
;
auto
&
size_attr
=
outputShape
.
GetData
();
int
d
=
0
;
int
h
=
0
;
int
w
=
0
;
d
=
s
ize_attr
[
2
]
;
h
=
s
ize_attr
[
3
]
;
w
=
s
ize_attr
[
4
]
;
d
=
s
tatic_cast
<
int
>
(
size_attr
[
2
])
;
h
=
s
tatic_cast
<
int
>
(
size_attr
[
3
])
;
w
=
s
tatic_cast
<
int
>
(
size_attr
[
4
])
;
theta_grad
->
Resize
(
phi
::
make_ddim
({
n
,
3
,
4
}));
dev_ctx
.
template
Alloc
<
T
>(
theta_grad
);
phi
::
funcs
::
SetConstant
<
Context
,
T
>
()(
dev_ctx
,
theta_grad
,
static_cast
<
T
>
(
0
));
...
...
paddle/phi/kernels/cpu/affine_grid_kernel.cc
浏览文件 @
b702d2ae
...
...
@@ -49,12 +49,12 @@ void AffineGrid4DKernel(const Context& dev_ctx,
bool
align_corners
,
DenseTensor
*
output
)
{
auto
*
theta
=
&
input
;
int
n
=
theta
->
dims
()[
0
]
;
int
n
=
static_cast
<
int
>
(
theta
->
dims
()[
0
])
;
auto
&
size_attr
=
outputShape
.
GetData
();
int
h
=
0
;
int
w
=
0
;
h
=
s
ize_attr
[
2
]
;
w
=
s
ize_attr
[
3
]
;
h
=
s
tatic_cast
<
int
>
(
size_attr
[
2
])
;
w
=
s
tatic_cast
<
int
>
(
size_attr
[
3
])
;
output
->
Resize
(
phi
::
make_ddim
({
n
,
h
,
w
,
2
}));
dev_ctx
.
template
Alloc
<
T
>(
output
);
phi
::
funcs
::
SetConstant
<
Context
,
T
>
()(
dev_ctx
,
output
,
static_cast
<
T
>
(
0
));
...
...
@@ -81,14 +81,14 @@ void AffineGrid5DKernel(const Context& dev_ctx,
bool
align_corners
,
DenseTensor
*
output
)
{
auto
*
theta
=
&
input
;
int
n
=
theta
->
dims
()[
0
]
;
int
n
=
static_cast
<
int
>
(
theta
->
dims
()[
0
])
;
auto
&
size_attr
=
outputShape
.
GetData
();
int
d
=
0
;
int
h
=
0
;
int
w
=
0
;
d
=
s
ize_attr
[
2
]
;
h
=
s
ize_attr
[
3
]
;
w
=
s
ize_attr
[
4
]
;
d
=
s
tatic_cast
<
int
>
(
size_attr
[
2
])
;
h
=
s
tatic_cast
<
int
>
(
size_attr
[
3
])
;
w
=
s
tatic_cast
<
int
>
(
size_attr
[
4
])
;
output
->
Resize
(
phi
::
make_ddim
({
n
,
d
,
h
,
w
,
3
}));
dev_ctx
.
template
Alloc
<
T
>(
output
);
phi
::
funcs
::
SetConstant
<
Context
,
T
>
()(
dev_ctx
,
output
,
static_cast
<
T
>
(
0
));
...
...
paddle/phi/kernels/cpu/argsort_grad_kernel.cc
浏览文件 @
b702d2ae
...
...
@@ -96,7 +96,7 @@ void ArgsortGradKernel(const Context& dev_ctx,
trans
.
push_back
(
axis
);
phi
::
DDim
trans_dims
(
in_dims
);
for
(
size_t
i
=
0
;
i
<
trans
.
size
();
i
++
)
{
trans_dims
[
i
]
=
in_dims
[
trans
[
i
]];
trans_dims
[
static_cast
<
int
>
(
i
)
]
=
in_dims
[
trans
[
i
]];
}
DenseTensor
trans_dO
;
...
...
paddle/phi/kernels/cpu/argsort_kernel.cc
浏览文件 @
b702d2ae
...
...
@@ -114,7 +114,7 @@ void ArgsortKernel(const Context& dev_ctx,
trans
.
push_back
(
axis
);
phi
::
DDim
trans_dims
(
in_dims
);
for
(
size_t
i
=
0
;
i
<
trans
.
size
();
i
++
)
{
trans_dims
[
i
]
=
in_dims
[
trans
[
i
]];
trans_dims
[
static_cast
<
int
>
(
i
)
]
=
in_dims
[
trans
[
i
]];
}
DenseTensor
trans_inp
;
...
...
paddle/phi/kernels/cpu/auc_kernel.cc
浏览文件 @
b702d2ae
...
...
@@ -124,8 +124,8 @@ inline static void calcAuc(const int64_t *stat_pos,
while
(
idx
>=
0
)
{
totPosPrev
=
totPos
;
totNegPrev
=
totNeg
;
totPos
+=
stat
_pos
[
idx
]
;
totNeg
+=
stat
_neg
[
idx
]
;
totPos
+=
stat
ic_cast
<
double
>
(
stat_pos
[
idx
])
;
totNeg
+=
stat
ic_cast
<
double
>
(
stat_neg
[
idx
])
;
*
auc
+=
trapezoidArea
(
totNeg
,
totNegPrev
,
totPos
,
totPosPrev
);
--
idx
;
}
...
...
paddle/phi/kernels/cpu/batch_norm_grad_kernel.cc
浏览文件 @
b702d2ae
...
...
@@ -104,10 +104,10 @@ void BatchNormGradFunctor(const Context& ctx,
"The size of input X's dimensions should be less than 6."
"But received: the size of input X's dimensions is [%d]"
,
x_dims
.
size
()));
const
int
N
=
x_dims
[
0
]
;
const
int
C
=
(
data_layout
==
DataLayout
::
kNCHW
?
x_dims
[
1
]
:
x_dims
[
x_dims
.
size
()
-
1
]);
const
int
sample_size
=
x
.
numel
()
/
N
/
C
;
const
int
N
=
static_cast
<
int
>
(
x_dims
[
0
])
;
const
int
C
=
static_cast
<
int
>
(
data_layout
==
DataLayout
::
kNCHW
?
x_dims
[
1
]
:
x_dims
[
x_dims
.
size
()
-
1
]);
const
int
sample_size
=
static_cast
<
int
>
(
x
.
numel
()
/
N
/
C
)
;
// input dimension is 2 and the format is NCHW. The input can be regarded as
// NHWC format
...
...
@@ -382,9 +382,9 @@ void BatchNormDoubleGradKernel(
ctx
.
template
Alloc
<
T
>(
ddY
);
const
auto
&
x_dims
=
X
->
dims
();
const
int
C
=
(
data_layout
==
DataLayout
::
kNCHW
?
x_dims
[
1
]
:
x_dims
[
x_dims
.
size
()
-
1
]);
const
int
sample_size
=
X
->
numel
()
/
C
;
const
int
C
=
static_cast
<
int
>
(
data_layout
==
DataLayout
::
kNCHW
?
x_dims
[
1
]
:
x_dims
[
x_dims
.
size
()
-
1
]);
const
int
sample_size
=
static_cast
<
int
>
(
X
->
numel
()
/
C
)
;
phi
::
funcs
::
SetConstant
<
Context
,
T
>
set_constant
;
const
T
*
mean_data
=
Saved_mean
->
data
<
T
>
();
...
...
paddle/phi/kernels/cpu/batch_norm_kernel.cc
浏览文件 @
b702d2ae
...
...
@@ -72,10 +72,10 @@ void BatchNormKernel(const Context& ctx,
"The size of input X's dimensions should be less than 6."
"But received: the size of input X's dimensionss is [%d]"
,
x_dims
.
size
()));
const
int
N
=
x_dims
[
0
]
;
const
int
C
=
(
data_layout
==
DataLayout
::
kNCHW
?
x_dims
[
1
]
:
x_dims
[
x_dims
.
size
()
-
1
]);
const
int
sample_size
=
x
.
numel
()
/
N
/
C
;
const
int
N
=
static_cast
<
int
>
(
x_dims
[
0
])
;
const
int
C
=
static_cast
<
int
>
(
data_layout
==
DataLayout
::
kNCHW
?
x_dims
[
1
]
:
x_dims
[
x_dims
.
size
()
-
1
]);
const
int
sample_size
=
static_cast
<
int
>
(
x
.
numel
()
/
N
/
C
)
;
// alloc memory
ctx
.
template
Alloc
<
T
>(
y
);
...
...
paddle/phi/kernels/cpu/bce_loss_grad_kernel.cc
浏览文件 @
b702d2ae
...
...
@@ -32,7 +32,7 @@ void BCELossGradKernel(const Context& dev_ctx,
auto
x_data
=
input
.
data
<
T
>
();
auto
label_data
=
label
.
data
<
T
>
();
int
x_numel
=
input
.
numel
(
);
int
x_numel
=
static_cast
<
int
>
(
input
.
numel
()
);
// dx = dout * ((x - label)/(x - x^2))
for
(
int
i
=
0
;
i
<
x_numel
;
++
i
)
{
...
...
paddle/phi/kernels/cpu/box_coder_kernel.cc
浏览文件 @
b702d2ae
...
...
@@ -78,7 +78,7 @@ void EncodeCenterSize(const DenseTensor *target_box,
for
(
int64_t
j
=
0
;
j
<
col
;
++
j
)
{
for
(
int
k
=
0
;
k
<
4
;
++
k
)
{
size_t
offset
=
i
*
col
*
len
+
j
*
len
;
int
prior_var_offset
=
j
*
len
;
int
prior_var_offset
=
static_cast
<
int
>
(
j
*
len
)
;
output
[
offset
+
k
]
/=
prior_box_var_data
[
prior_var_offset
+
k
];
}
}
...
...
paddle/phi/kernels/cpu/broadcast_tensors_grad_kernel.cc
浏览文件 @
b702d2ae
...
...
@@ -108,7 +108,7 @@ void BroadcastTensorsGradKernel(const Context& ctx,
int
out_axis
=
out_rank
-
j
-
1
;
int
in_axis
=
in_rank
-
j
-
1
;
reshape_dims_vec
.
push_back
(
input_dims
[
j
]
);
reshape_dims_vec
.
push_back
(
static_cast
<
int
>
(
input_dims
[
j
])
);
if
(
out_axis
<
0
||
output_dims
[
out_axis
]
!=
input_dims
[
in_axis
])
{
reduce_dims_vec
.
push_back
(
in_axis
);
}
...
...
paddle/phi/kernels/cpu/cholesky_kernel.cc
浏览文件 @
b702d2ae
...
...
@@ -35,7 +35,7 @@ void CholeskyKernel(const Context& dev_ctx,
auto
&
dims
=
x
.
dims
();
int
batch_count
=
1
;
for
(
int
i
=
0
;
i
<
dims
.
size
()
-
2
;
i
++
)
{
batch_count
*=
dims
[
i
]
;
batch_count
*=
static_cast
<
int
>
(
dims
[
i
])
;
}
auto
m
=
dims
[
dims
.
size
()
-
1
];
...
...
paddle/phi/kernels/cpu/class_center_sample_kernel.cc
浏览文件 @
b702d2ae
...
...
@@ -80,7 +80,7 @@ void ClassCenterSampleKernel(const Context& dev_ctx,
if
(
!
fix_seed
)
{
std
::
random_device
rnd
;
seed
=
rnd
(
);
seed
=
static_cast
<
int
>
(
rnd
()
);
}
std
::
uniform_int_distribution
<
T
>
dist
(
0
,
num_classes
-
1
);
std
::
shared_ptr
<
std
::
mt19937_64
>
engine
;
...
...
paddle/phi/kernels/cpu/concat_kernel.cc
浏览文件 @
b702d2ae
...
...
@@ -93,8 +93,8 @@ void ConcatKernel(const Context& dev_ctx,
out_stride
,
in
->
data
<
T
>
(),
in_stride
,
in_stride
[
axis
]);
output_offset
+=
in_stride
[
axis
];
in_stride
[
static_cast
<
int
>
(
axis
)
]);
output_offset
+=
in_stride
[
static_cast
<
int
>
(
axis
)
];
}
}
else
{
// TODO(chenweihang): concat functor support vector<DenseTensor*> input
...
...
paddle/phi/kernels/cpu/cross_entropy_grad_kernel.cc
浏览文件 @
b702d2ae
...
...
@@ -43,7 +43,7 @@ void CrossEntropyWithSoftmaxGradCPUKernel(const CPUContext& dev_ctx,
const
int
rank
=
logit_grad
->
dims
().
size
();
const
int
axis_v
=
phi
::
funcs
::
CanonicalAxis
(
axis
,
rank
);
int
axis_dim
=
logit_grad
->
dims
()[
axis_v
]
;
int
axis_dim
=
static_cast
<
int
>
(
logit_grad
->
dims
()[
axis_v
])
;
PADDLE_ENFORCE_GT
(
axis_dim
,
0
,
...
...
paddle/phi/kernels/cpu/cross_entropy_kernel.cc
浏览文件 @
b702d2ae
...
...
@@ -34,7 +34,7 @@ void CrossEntropy(const CPUContext& dev_ctx,
DenseTensor
*
out
)
{
const
int
rank
=
x
.
dims
().
size
();
const
int
axis_v
=
phi
::
funcs
::
CanonicalAxis
(
axis
,
rank
);
int
axis_dim
=
x
.
dims
()[
axis_v
]
;
int
axis_dim
=
static_cast
<
int
>
(
x
.
dims
()[
axis_v
])
;
PADDLE_ENFORCE_GT
(
axis_dim
,
...
...
paddle/phi/kernels/cpu/cross_grad_kernel.cc
浏览文件 @
b702d2ae
...
...
@@ -74,11 +74,11 @@ void CrossGradKernel(const Context &dev_ctx,
}
auto
outer_loops
=
1
;
for
(
auto
i
=
0
;
i
<
dim
;
i
++
)
{
outer_loops
*=
input_x_dims
[
i
]
;
outer_loops
*=
static_cast
<
int
>
(
input_x_dims
[
i
])
;
}
auto
slice_size
=
1
;
for
(
auto
i
=
dim
+
1
;
i
<
input_x_dims
.
size
();
i
++
)
{
slice_size
*=
input_x_dims
[
i
]
;
slice_size
*=
static_cast
<
int
>
(
input_x_dims
[
i
])
;
}
std
::
vector
<
T
>
input_x_vec
,
input_y_vec
,
input_dout_vec
;
...
...
paddle/phi/kernels/cpu/cross_kernel.cc
浏览文件 @
b702d2ae
...
...
@@ -72,11 +72,11 @@ void CrossKernel(const Context& dev_ctx,
}
auto
outer_loops
=
1
;
for
(
auto
i
=
0
;
i
<
dim
;
i
++
)
{
outer_loops
*=
input_x_dims
[
i
]
;
outer_loops
*=
static_cast
<
int
>
(
input_x_dims
[
i
])
;
}
auto
slice_size
=
1
;
for
(
auto
i
=
dim
+
1
;
i
<
input_x_dims
.
size
();
i
++
)
{
slice_size
*=
input_x_dims
[
i
]
;
slice_size
*=
static_cast
<
int
>
(
input_x_dims
[
i
])
;
}
std
::
vector
<
T
>
input_x_vec
,
input_y_vec
;
...
...
paddle/phi/kernels/cpu/cum_kernel.cc
浏览文件 @
b702d2ae
...
...
@@ -82,12 +82,12 @@ void ScanKernel(const Context& dev_ctx,
int
pre
=
1
;
int
post
=
1
;
int
mid
=
out_dims
[
axis
]
;
int
mid
=
static_cast
<
int
>
(
out_dims
[
axis
])
;
for
(
int
i
=
0
;
i
<
axis
;
++
i
)
{
pre
*=
out_dims
[
i
]
;
pre
*=
static_cast
<
int
>
(
out_dims
[
i
])
;
}
for
(
int
i
=
axis
+
1
;
i
<
out_dims
.
size
();
++
i
)
{
post
*=
out_dims
[
i
]
;
post
*=
static_cast
<
int
>
(
out_dims
[
i
])
;
}
auto
x0
=
EigenVector
<
T
>::
Flatten
(
x
);
...
...
paddle/phi/kernels/cpu/deformable_conv_grad_kernel.cc
浏览文件 @
b702d2ae
...
...
@@ -117,8 +117,10 @@ void ModulatedDeformableCol2im(const Context& dev_ctx,
const
std
::
vector
<
int
>&
dilation
,
const
int
deformable_group
,
T
*
grad_im
)
{
int
channel_per_deformable_group
=
im_shape
[
0
]
/
deformable_group
;
int
num_kernels
=
col_shape
[
0
]
*
col_shape
[
1
]
*
col_shape
[
2
]
*
col_shape
[
3
];
int
channel_per_deformable_group
=
static_cast
<
int
>
(
im_shape
[
0
]
/
deformable_group
);
int
num_kernels
=
static_cast
<
int
>
(
col_shape
[
0
]
*
col_shape
[
1
]
*
col_shape
[
2
]
*
col_shape
[
3
]);
ModulatedDeformableCol2imCPUKernel
(
num_kernels
,
data_col
,
...
...
@@ -275,9 +277,11 @@ void ModulatedDeformableCol2imCoord(const Context& dev_ctx,
const
int
deformable_groups
,
T
*
grad_offset
,
T
*
grad_mask
)
{
int
num_kernels
=
2
*
kernel_shape
[
2
]
*
kernel_shape
[
3
]
*
col_shape
[
1
]
*
col_shape
[
2
]
*
col_shape
[
3
]
*
deformable_groups
;
int
channel_per_deformable_group
=
col_shape
[
0
]
/
deformable_groups
;
int
num_kernels
=
static_cast
<
int
>
(
2
*
kernel_shape
[
2
]
*
kernel_shape
[
3
]
*
col_shape
[
1
]
*
col_shape
[
2
]
*
col_shape
[
3
]
*
deformable_groups
);
int
channel_per_deformable_group
=
static_cast
<
int
>
(
col_shape
[
0
]
/
deformable_groups
);
ModulatedDeformableCol2imCoordCPUKernel
(
num_kernels
,
...
...
paddle/phi/kernels/cpu/diagonal_grad_kernel.cc
浏览文件 @
b702d2ae
...
...
@@ -38,8 +38,10 @@ void DiagonalGradKernel(const Context& dev_ctx,
auto
dx_dim_size
=
dx_dim
.
size
();
const
int64_t
offset_
=
offset
;
int64_t
axis1_
=
axis1
<
0
?
dx_dim_size
+
axis1
:
axis1
;
int64_t
axis2_
=
axis2
<
0
?
dx_dim_size
+
axis2
:
axis2
;
int64_t
axis1_
=
static_cast
<
int64_t
>
(
axis1
<
0
?
dx_dim_size
+
axis1
:
axis1
);
int64_t
axis2_
=
static_cast
<
int64_t
>
(
axis2
<
0
?
dx_dim_size
+
axis2
:
axis2
);
std
::
vector
<
int64_t
>
dout_stride
=
funcs
::
ComputeDimStride
(
dout_dim
);
std
::
vector
<
int64_t
>
dx_stride
=
funcs
::
ComputeDimStride
(
dx_dim
);
...
...
paddle/phi/kernels/cpu/diagonal_kernel.cc
浏览文件 @
b702d2ae
...
...
@@ -38,8 +38,10 @@ void DiagonalKernel(const Context& dev_ctx,
auto
output_dim_size
=
output_dim
.
size
();
const
int64_t
offset_
=
offset
;
int64_t
axis1_
=
axis1
<
0
?
input_dim_size
+
axis1
:
axis1
;
int64_t
axis2_
=
axis2
<
0
?
input_dim_size
+
axis2
:
axis2
;
int64_t
axis1_
=
static_cast
<
int64_t
>
(
axis1
<
0
?
input_dim_size
+
axis1
:
axis1
);
int64_t
axis2_
=
static_cast
<
int64_t
>
(
axis2
<
0
?
input_dim_size
+
axis2
:
axis2
);
std
::
vector
<
int64_t
>
input_stride
=
funcs
::
ComputeDimStride
(
input_dim
);
std
::
vector
<
int64_t
>
output_stride
=
funcs
::
ComputeDimStride
(
output_dim
);
...
...
paddle/phi/kernels/cpu/distribute_fpn_proposals_kernel.cc
浏览文件 @
b702d2ae
...
...
@@ -52,14 +52,15 @@ void DistributeFpnProposalsKernel(
}
else
{
fpn_rois_lod
=
fpn_rois
.
lod
().
back
();
}
fpn_rois_num
=
fpn_rois_lod
[
fpn_rois_lod
.
size
()
-
1
]
;
fpn_rois_num
=
static_cast
<
int
>
(
fpn_rois_lod
[
fpn_rois_lod
.
size
()
-
1
])
;
std
::
vector
<
int
>
target_level
;
// record the number of rois in each level
std
::
vector
<
int
>
num_rois_level
(
num_level
,
0
);
std
::
vector
<
int
>
num_rois_level_integral
(
num_level
+
1
,
0
);
for
(
size_t
i
=
0
;
i
<
fpn_rois_lod
.
size
()
-
1
;
++
i
)
{
auto
fpn_rois_slice
=
fpn_rois
.
Slice
(
fpn_rois_lod
[
i
],
fpn_rois_lod
[
i
+
1
]);
auto
fpn_rois_slice
=
fpn_rois
.
Slice
(
static_cast
<
int
>
(
fpn_rois_lod
[
i
]),
static_cast
<
int
>
(
fpn_rois_lod
[
i
+
1
]));
const
T
*
rois_data
=
fpn_rois_slice
.
data
<
T
>
();
for
(
int
j
=
0
;
j
<
fpn_rois_slice
.
dims
()[
0
];
++
j
)
{
// get the target level of current rois
...
...
@@ -92,7 +93,8 @@ void DistributeFpnProposalsKernel(
std
::
vector
<
int
>
restore_index_inter
(
fpn_rois_num
,
-
1
);
// distribute the rois into different fpn level by target level
for
(
size_t
i
=
0
;
i
<
fpn_rois_lod
.
size
()
-
1
;
++
i
)
{
auto
fpn_rois_slice
=
fpn_rois
.
Slice
(
fpn_rois_lod
[
i
],
fpn_rois_lod
[
i
+
1
]);
auto
fpn_rois_slice
=
fpn_rois
.
Slice
(
static_cast
<
int
>
(
fpn_rois_lod
[
i
]),
static_cast
<
int
>
(
fpn_rois_lod
[
i
+
1
]));
const
T
*
rois_data
=
fpn_rois_slice
.
data
<
T
>
();
size_t
cur_offset
=
fpn_rois_lod
[
i
];
...
...
@@ -105,9 +107,10 @@ void DistributeFpnProposalsKernel(
rois_data
,
funcs
::
kBoxDim
*
sizeof
(
T
));
multi_fpn_rois_data
[
lvl
-
min_level
]
+=
funcs
::
kBoxDim
;
int
index_in_shuffle
=
num_rois_level_integral
[
lvl
-
min_level
]
+
multi_fpn_rois_lod0
[
lvl
-
min_level
][
i
+
1
];
restore_index_inter
[
index_in_shuffle
]
=
cur_offset
+
j
;
int
index_in_shuffle
=
static_cast
<
int
>
(
num_rois_level_integral
[
lvl
-
min_level
]
+
multi_fpn_rois_lod0
[
lvl
-
min_level
][
i
+
1
]);
restore_index_inter
[
index_in_shuffle
]
=
static_cast
<
int
>
(
cur_offset
+
j
);
multi_fpn_rois_lod0
[
lvl
-
min_level
][
i
+
1
]
++
;
rois_data
+=
funcs
::
kBoxDim
;
}
...
...
@@ -117,7 +120,7 @@ void DistributeFpnProposalsKernel(
}
if
(
!
multi_level_rois_num
.
empty
())
{
int
batch_size
=
fpn_rois_lod
.
size
()
-
1
;
int
batch_size
=
static_cast
<
int
>
(
fpn_rois_lod
.
size
()
-
1
)
;
for
(
int
i
=
0
;
i
<
num_level
;
++
i
)
{
multi_level_rois_num
[
i
]
->
Resize
({
batch_size
});
int
*
rois_num_data
=
dev_ctx
.
template
Alloc
<
int
>(
multi_level_rois_num
[
i
]);
...
...
paddle/phi/kernels/cpu/eig_grad_kernel.cc
浏览文件 @
b702d2ae
...
...
@@ -32,7 +32,7 @@ void EigGradKernel(const Context& dev_ctx,
phi
::
DDim
dim_origin
=
dims
;
int
num_dims
=
dim_origin
.
size
();
int
batch_count
=
BatchCount
(
out_v
);
const
int
order
=
dim_origin
[
num_dims
-
1
]
;
const
int
order
=
static_cast
<
int
>
(
dim_origin
[
num_dims
-
1
])
;
ComputeBackwardForComplexInput
<
phi
::
dtype
::
Complex
<
T
>
,
Context
>
(
out_w
,
out_v
,
dout_w
,
dout_v
,
dx_data
,
batch_count
,
order
,
dev_ctx
);
...
...
paddle/phi/kernels/cpu/eig_kernel.cc
浏览文件 @
b702d2ae
...
...
@@ -33,7 +33,7 @@ void EigKernel(const Context& dev_ctx,
dev_ctx
.
template
Alloc
<
phi
::
dtype
::
Complex
<
T
>
>
(
out_v
);
int
batch_count
=
BatchCount
(
x
);
int
order
=
x
.
dims
()[
x
.
dims
().
size
()
-
1
]
;
int
order
=
static_cast
<
int
>
(
x
.
dims
()[
x
.
dims
().
size
()
-
1
])
;
PADDLE_ENFORCE_LT
(
0
,
order
,
...
...
@@ -69,7 +69,7 @@ void EigKernel(const Context& dev_ctx,
// 2. construct complex values
auto
*
real_part_data
=
real_part
.
data
<
phi
::
dtype
::
Real
<
T
>>
();
auto
*
imag_part_data
=
imag_part
.
data
<
phi
::
dtype
::
Real
<
T
>>
();
int
out_w_numel
=
out_w
->
numel
(
);
int
out_w_numel
=
static_cast
<
int
>
(
out_w
->
numel
()
);
phi
::
funcs
::
ForRange
<
Context
>
for_range
(
dev_ctx
,
out_w_numel
);
phi
::
funcs
::
RealImagToComplexFunctor
<
phi
::
dtype
::
Complex
<
T
>>
functor
(
...
...
paddle/phi/kernels/cpu/eigvals_kernel.cc
浏览文件 @
b702d2ae
...
...
@@ -81,7 +81,7 @@ typename std::enable_if<std::is_floating_point<T>::value>::type LapackEigvals(
w
.
Resize
(
make_ddim
({
n_dim
<<
1
}));
T
*
w_data
=
ctx
.
template
Alloc
<
T
>(
&
w
);
int64_t
work_mem
=
work
->
memory_size
(
);
int64_t
work_mem
=
static_cast
<
int64_t
>
(
work
->
memory_size
()
);
int64_t
required_work_mem
=
3
*
n_dim
*
sizeof
(
T
);
PADDLE_ENFORCE_GE
(
work_mem
,
...
...
@@ -132,7 +132,7 @@ LapackEigvals(const Context& ctx,
DenseTensor
a
;
// will be overwritten when lapackEig exit
Copy
(
ctx
,
input
,
input
.
place
(),
/*blocking=*/
true
,
&
a
);
int64_t
work_mem
=
work
->
memory_size
(
);
int64_t
work_mem
=
static_cast
<
int64_t
>
(
work
->
memory_size
()
);
int64_t
n_dim
=
input
.
dims
()[
1
];
int64_t
required_work_mem
=
3
*
n_dim
*
sizeof
(
T
);
PADDLE_ENFORCE_GE
(
...
...
@@ -145,7 +145,7 @@ LapackEigvals(const Context& ctx,
required_work_mem
,
work_mem
));
int64_t
rwork_mem
=
rwork
->
memory_size
(
);
int64_t
rwork_mem
=
static_cast
<
int64_t
>
(
rwork
->
memory_size
()
);
int64_t
required_rwork_mem
=
(
n_dim
<<
1
)
*
sizeof
(
dtype
::
Real
<
T
>
);
PADDLE_ENFORCE_GE
(
rwork_mem
,
...
...
@@ -185,7 +185,7 @@ void SpiltBatchSquareMatrix(const DenseTensor& input,
std
::
vector
<
DenseTensor
>*
output
)
{
DDim
input_dims
=
input
.
dims
();
int
last_dim
=
input_dims
.
size
()
-
1
;
int
n_dim
=
input_dims
[
last_dim
]
;
int
n_dim
=
static_cast
<
int
>
(
input_dims
[
last_dim
])
;
DDim
flattened_input_dims
,
flattened_output_dims
;
if
(
input_dims
.
size
()
>
2
)
{
...
...
@@ -209,7 +209,7 @@ void EigvalsKernel(const Context& ctx, const DenseTensor& x, DenseTensor* out) {
SpiltBatchSquareMatrix
(
x
,
/*->*/
&
x_matrices
);
int64_t
n_dim
=
x_matrices
[
0
].
dims
()[
1
];
int64_t
n_batch
=
x_matrices
.
size
(
);
int64_t
n_batch
=
static_cast
<
int64_t
>
(
x_matrices
.
size
()
);
DDim
out_dims
=
out
->
dims
();
out
->
Resize
(
make_ddim
({
n_batch
,
n_dim
}));
std
::
vector
<
DenseTensor
>
out_vectors
=
out
->
Split
(
1
,
0
);
...
...
paddle/phi/kernels/cpu/fill_diagonal_tensor_grad_kernel.cc
浏览文件 @
b702d2ae
...
...
@@ -27,7 +27,7 @@ void FillDiagonalTensorGradKernel(const Context& ctx,
int
dim1
,
int
dim2
,
DenseTensor
*
x_grad
)
{
auto
matrows
=
1
;
int
matrows
=
1
;
if
(
x_grad
)
{
auto
*
data
=
ctx
.
template
Alloc
<
T
>(
x_grad
);
...
...
@@ -35,7 +35,7 @@ void FillDiagonalTensorGradKernel(const Context& ctx,
auto
dx_dims
=
x_grad
->
dims
();
for
(
int
i
=
0
;
i
<
dx_dims
.
size
();
i
++
)
{
if
(
i
!=
dim1
&&
i
!=
dim2
)
{
matrows
*=
dx_dims
[
i
]
;
matrows
*=
static_cast
<
int
>
(
dx_dims
[
i
])
;
}
}
...
...
paddle/phi/kernels/cpu/fill_diagonal_tensor_kernel.cc
浏览文件 @
b702d2ae
...
...
@@ -59,7 +59,7 @@ void CalMatDims(phi::DDim out_dims,
dimprod
*=
out_dims
[
i
];
}
auto
diagdim
=
dim1
;
int64_t
diagdim
=
dim1
;
if
(
*
offset
>=
0
)
{
diagdim
=
std
::
min
(
out_dims
[
dim1
],
out_dims
[
dim2
]
-
*
offset
);
*
offset
*=
strides
[
0
];
...
...
paddle/phi/kernels/cpu/gather_tree_kernel.cc
浏览文件 @
b702d2ae
...
...
@@ -30,7 +30,7 @@ void GatherTreeKernel(const Context &dev_ctx,
T
*
out_data
=
dev_ctx
.
template
Alloc
<
T
>(
out
);
auto
&
ids_dims
=
ids
.
dims
();
auto
max_length
=
ids_dims
[
0
];
int64_t
max_length
=
ids_dims
[
0
];
auto
batch_size
=
ids_dims
[
1
];
auto
beam_size
=
ids_dims
[
2
];
...
...
@@ -49,7 +49,7 @@ void GatherTreeKernel(const Context &dev_ctx,
(
max_length
-
1
)
*
batch_size
*
beam_size
+
batch
*
beam_size
+
beam
;
out_data
[
idx
]
=
ids_data
[
idx
];
auto
parent
=
parents_data
[
idx
];
for
(
int
step
=
max_length
-
2
;
step
>=
0
;
step
--
)
{
for
(
int
64_t
step
=
max_length
-
2
;
step
>=
0
;
step
--
)
{
PADDLE_ENFORCE_LT
(
parent
,
beam_size
,
...
...
paddle/phi/kernels/cpu/generate_proposals_kernel.cc
浏览文件 @
b702d2ae
...
...
@@ -28,7 +28,7 @@ static void AppendProposals(DenseTensor* dst,
auto
*
out_data
=
dst
->
data
();
auto
*
to_add_data
=
src
.
data
();
size_t
size_of_t
=
SizeOf
(
src
.
dtype
());
offset
*=
s
ize_of_t
;
offset
*=
s
tatic_cast
<
int64_t
>
(
size_of_t
)
;
std
::
memcpy
(
reinterpret_cast
<
void
*>
(
reinterpret_cast
<
uintptr_t
>
(
out_data
)
+
offset
),
to_add_data
,
...
...
@@ -367,7 +367,7 @@ void GenerateProposalsKernel(const Context& ctx,
AppendProposals
(
rpn_roi_probs
,
num_proposals
,
nscores
);
num_proposals
+=
proposals
.
dims
()[
0
];
lod0
.
push_back
(
num_proposals
);
tmp_num
.
push_back
(
proposals
.
dims
()[
0
]
);
tmp_num
.
push_back
(
static_cast
<
int
>
(
proposals
.
dims
()[
0
])
);
}
if
(
rpn_rois_num
!=
nullptr
)
{
rpn_rois_num
->
Resize
(
phi
::
make_ddim
({
num
}));
...
...
paddle/phi/kernels/cpu/graph_reindex_kernel.cc
浏览文件 @
b702d2ae
...
...
@@ -35,8 +35,8 @@ void GraphReindexKernel(const Context& dev_ctx,
const
T
*
x_data
=
x
.
data
<
T
>
();
const
T
*
neighbors_data
=
neighbors
.
data
<
T
>
();
const
int
*
count_data
=
count
.
data
<
int
>
();
const
int
bs
=
x
.
dims
()[
0
]
;
const
int
num_edges
=
neighbors
.
dims
()[
0
]
;
const
int
bs
=
static_cast
<
int
>
(
x
.
dims
()[
0
])
;
const
int
num_edges
=
static_cast
<
int
>
(
neighbors
.
dims
()[
0
])
;
std
::
unordered_map
<
T
,
T
>
node_map
;
std
::
vector
<
T
>
unique_nodes
;
...
...
@@ -63,7 +63,7 @@ void GraphReindexKernel(const Context& dev_ctx,
}
// Reindex Dst
// Add support for multi-type edges reindex
int
num_edge_types
=
count
.
dims
()[
0
]
/
bs
;
int
num_edge_types
=
static_cast
<
int
>
(
count
.
dims
()[
0
]
/
bs
)
;
int
cnt
=
0
;
for
(
int
i
=
0
;
i
<
num_edge_types
;
i
++
)
{
for
(
int
j
=
0
;
j
<
bs
;
j
++
)
{
...
...
paddle/phi/kernels/cpu/graph_sample_neighbors_kernel.cc
浏览文件 @
b702d2ae
...
...
@@ -178,7 +178,7 @@ void GraphSampleNeighborsKernel(
const
T
*
row_data
=
row
.
data
<
T
>
();
const
T
*
col_ptr_data
=
col_ptr
.
data
<
T
>
();
const
T
*
x_data
=
x
.
data
<
T
>
();
int
bs
=
x
.
dims
()[
0
]
;
int
bs
=
static_cast
<
int
>
(
x
.
dims
()[
0
])
;
std
::
vector
<
T
>
output
;
std
::
vector
<
int
>
output_count
;
...
...
paddle/phi/kernels/cpu/grid_sample_grad_kernel.cc
浏览文件 @
b702d2ae
...
...
@@ -154,9 +154,9 @@ static void CalcGridLocationsWithGrad(const CPUContext& ctx,
DenseTensor
*
grid_y
,
DenseTensor
*
grid_x_scale
,
DenseTensor
*
grid_y_scale
)
{
const
int
n
=
grid
.
dims
()[
0
]
;
const
int
out_h
=
grid
.
dims
()[
1
]
;
const
int
out_w
=
grid
.
dims
()[
2
]
;
const
int
n
=
static_cast
<
int
>
(
grid
.
dims
()[
0
])
;
const
int
out_h
=
static_cast
<
int
>
(
grid
.
dims
()[
1
])
;
const
int
out_w
=
static_cast
<
int
>
(
grid
.
dims
()[
2
])
;
// split grid with shape (n, h, w, 2) into (x, y) by the 3rd Dim
grid_x
->
Resize
({
n
,
out_h
,
out_w
});
...
...
@@ -193,10 +193,10 @@ static void Calc3DGridLocationsWithGrad(const CPUContext& ctx,
DenseTensor
*
grid_x_scale
,
DenseTensor
*
grid_y_scale
,
DenseTensor
*
grid_z_scale
)
{
const
int
n
=
grid
.
dims
()[
0
]
;
const
int
out_d
=
grid
.
dims
()[
1
]
;
const
int
out_h
=
grid
.
dims
()[
2
]
;
const
int
out_w
=
grid
.
dims
()[
3
]
;
const
int
n
=
static_cast
<
int
>
(
grid
.
dims
()[
0
])
;
const
int
out_d
=
static_cast
<
int
>
(
grid
.
dims
()[
1
])
;
const
int
out_h
=
static_cast
<
int
>
(
grid
.
dims
()[
2
])
;
const
int
out_w
=
static_cast
<
int
>
(
grid
.
dims
()[
3
])
;
// split grid with shape (n, d, h, w, 3) into (x, y, z) by the 3rd Dim
grid_x
->
Resize
({
n
,
out_d
,
out_h
,
out_w
});
...
...
@@ -232,12 +232,12 @@ static void GatherOutputGradToInputGrad(const DenseTensor& output_grad,
const
DenseTensor
&
y
,
const
DenseTensor
&
d1
,
const
DenseTensor
&
d2
)
{
const
int
n
=
output_grad
.
dims
()[
0
]
;
const
int
c
=
output_grad
.
dims
()[
1
]
;
const
int
out_h
=
output_grad
.
dims
()[
2
]
;
const
int
out_w
=
output_grad
.
dims
()[
3
]
;
const
int
in_h
=
input_grad
->
dims
()[
2
]
;
const
int
in_w
=
input_grad
->
dims
()[
3
]
;
const
int
n
=
static_cast
<
int
>
(
output_grad
.
dims
()[
0
])
;
const
int
c
=
static_cast
<
int
>
(
output_grad
.
dims
()[
1
])
;
const
int
out_h
=
static_cast
<
int
>
(
output_grad
.
dims
()[
2
])
;
const
int
out_w
=
static_cast
<
int
>
(
output_grad
.
dims
()[
3
])
;
const
int
in_h
=
static_cast
<
int
>
(
input_grad
->
dims
()[
2
])
;
const
int
in_w
=
static_cast
<
int
>
(
input_grad
->
dims
()[
3
])
;
auto
x_t
=
EigenTensor
<
T
,
3
>::
From
(
x
);
auto
y_t
=
EigenTensor
<
T
,
3
>::
From
(
y
);
auto
d1_t
=
EigenTensor
<
T
,
3
>::
From
(
d1
);
...
...
@@ -272,14 +272,14 @@ static void Gather3DOutputGradToInputGrad(const DenseTensor& output_grad,
const
DenseTensor
&
d1
,
const
DenseTensor
&
d2
,
const
DenseTensor
&
d3
)
{
const
int
n
=
output_grad
.
dims
()[
0
]
;
const
int
c
=
output_grad
.
dims
()[
1
]
;
const
int
out_d
=
output_grad
.
dims
()[
2
]
;
const
int
out_h
=
output_grad
.
dims
()[
3
]
;
const
int
out_w
=
output_grad
.
dims
()[
4
]
;
const
int
in_d
=
input_grad
->
dims
()[
2
]
;
const
int
in_h
=
input_grad
->
dims
()[
3
]
;
const
int
in_w
=
input_grad
->
dims
()[
4
]
;
const
int
n
=
static_cast
<
int
>
(
output_grad
.
dims
()[
0
])
;
const
int
c
=
static_cast
<
int
>
(
output_grad
.
dims
()[
1
])
;
const
int
out_d
=
static_cast
<
int
>
(
output_grad
.
dims
()[
2
])
;
const
int
out_h
=
static_cast
<
int
>
(
output_grad
.
dims
()[
3
])
;
const
int
out_w
=
static_cast
<
int
>
(
output_grad
.
dims
()[
4
])
;
const
int
in_d
=
static_cast
<
int
>
(
input_grad
->
dims
()[
2
])
;
const
int
in_h
=
static_cast
<
int
>
(
input_grad
->
dims
()[
3
])
;
const
int
in_w
=
static_cast
<
int
>
(
input_grad
->
dims
()[
4
])
;
auto
x_t
=
EigenTensor
<
T
,
4
>::
From
(
x
);
auto
y_t
=
EigenTensor
<
T
,
4
>::
From
(
y
);
auto
z_t
=
EigenTensor
<
T
,
4
>::
From
(
z
);
...
...
@@ -325,10 +325,10 @@ static void GatherBilinearGrad(const CPUContext& ctx,
DenseTensor
*
grid_y_scale
,
DenseTensor
*
input_grad
,
DenseTensor
*
grid_grad
)
{
const
int
n
=
grid_x
->
dims
()[
0
]
;
const
int
out_h
=
grid_x
->
dims
()[
1
]
;
const
int
out_w
=
grid_x
->
dims
()[
2
]
;
const
int
c
=
input
.
dims
()[
1
]
;
const
int
n
=
static_cast
<
int
>
(
grid_x
->
dims
()[
0
])
;
const
int
out_h
=
static_cast
<
int
>
(
grid_x
->
dims
()[
1
])
;
const
int
out_w
=
static_cast
<
int
>
(
grid_x
->
dims
()[
2
])
;
const
int
c
=
static_cast
<
int
>
(
input
.
dims
()[
1
])
;
DenseTensor
x_w
,
x_e
,
y_n
,
y_s
;
DenseTensor
d_w
,
d_e
,
d_n
,
d_s
;
...
...
@@ -427,11 +427,11 @@ static void Gather3DBilinearGrad(const CPUContext& ctx,
DenseTensor
*
grid_z_scale
,
DenseTensor
*
input_grad
,
DenseTensor
*
grid_grad
)
{
const
int
n
=
grid_x
->
dims
()[
0
]
;
const
int
out_d
=
grid_x
->
dims
()[
1
]
;
const
int
out_h
=
grid_x
->
dims
()[
2
]
;
const
int
out_w
=
grid_x
->
dims
()[
3
]
;
const
int
c
=
input
.
dims
()[
1
]
;
const
int
n
=
static_cast
<
int
>
(
grid_x
->
dims
()[
0
])
;
const
int
out_d
=
static_cast
<
int
>
(
grid_x
->
dims
()[
1
])
;
const
int
out_h
=
static_cast
<
int
>
(
grid_x
->
dims
()[
2
])
;
const
int
out_w
=
static_cast
<
int
>
(
grid_x
->
dims
()[
3
])
;
const
int
c
=
static_cast
<
int
>
(
input
.
dims
()[
1
])
;
DenseTensor
x_w
,
x_e
,
y_n
,
y_s
,
z_t
,
z_b
;
DenseTensor
d_w
,
d_e
,
d_n
,
d_s
,
d_t
,
d_b
;
...
...
@@ -577,12 +577,12 @@ static void GatherOutputGradToInputGrad(const DenseTensor& output_grad,
DenseTensor
*
input_grad
,
const
DenseTensor
&
x
,
const
DenseTensor
&
y
)
{
const
int
n
=
output_grad
.
dims
()[
0
]
;
const
int
c
=
output_grad
.
dims
()[
1
]
;
const
int
out_h
=
output_grad
.
dims
()[
2
]
;
const
int
out_w
=
output_grad
.
dims
()[
3
]
;
const
int
in_h
=
input_grad
->
dims
()[
2
]
;
const
int
in_w
=
input_grad
->
dims
()[
3
]
;
const
int
n
=
static_cast
<
int
>
(
output_grad
.
dims
()[
0
])
;
const
int
c
=
static_cast
<
int
>
(
output_grad
.
dims
()[
1
])
;
const
int
out_h
=
static_cast
<
int
>
(
output_grad
.
dims
()[
2
])
;
const
int
out_w
=
static_cast
<
int
>
(
output_grad
.
dims
()[
3
])
;
const
int
in_h
=
static_cast
<
int
>
(
input_grad
->
dims
()[
2
])
;
const
int
in_w
=
static_cast
<
int
>
(
input_grad
->
dims
()[
3
])
;
auto
x_t
=
EigenTensor
<
T
,
3
>::
From
(
x
);
auto
y_t
=
EigenTensor
<
T
,
3
>::
From
(
y
);
auto
input_grad_t
=
EigenTensor
<
T
,
4
>::
From
(
*
input_grad
);
...
...
@@ -611,14 +611,14 @@ static void Gather3DOutputGradToInputGrad(const DenseTensor& output_grad,
const
DenseTensor
&
x
,
const
DenseTensor
&
y
,
const
DenseTensor
&
z
)
{
const
int
n
=
output_grad
.
dims
()[
0
]
;
const
int
c
=
output_grad
.
dims
()[
1
]
;
const
int
out_d
=
output_grad
.
dims
()[
2
]
;
const
int
out_h
=
output_grad
.
dims
()[
3
]
;
const
int
out_w
=
output_grad
.
dims
()[
4
]
;
const
int
in_d
=
input_grad
->
dims
()[
2
]
;
const
int
in_h
=
input_grad
->
dims
()[
3
]
;
const
int
in_w
=
input_grad
->
dims
()[
4
]
;
const
int
n
=
static_cast
<
int
>
(
output_grad
.
dims
()[
0
])
;
const
int
c
=
static_cast
<
int
>
(
output_grad
.
dims
()[
1
])
;
const
int
out_d
=
static_cast
<
int
>
(
output_grad
.
dims
()[
2
])
;
const
int
out_h
=
static_cast
<
int
>
(
output_grad
.
dims
()[
3
])
;
const
int
out_w
=
static_cast
<
int
>
(
output_grad
.
dims
()[
4
])
;
const
int
in_d
=
static_cast
<
int
>
(
input_grad
->
dims
()[
2
])
;
const
int
in_h
=
static_cast
<
int
>
(
input_grad
->
dims
()[
3
])
;
const
int
in_w
=
static_cast
<
int
>
(
input_grad
->
dims
()[
4
])
;
auto
x_t
=
EigenTensor
<
T
,
4
>::
From
(
x
);
auto
y_t
=
EigenTensor
<
T
,
4
>::
From
(
y
);
auto
z_t
=
EigenTensor
<
T
,
4
>::
From
(
z
);
...
...
@@ -660,12 +660,12 @@ void GridSampleGradKernel(const Context& dev_ctx,
DenseTensor
*
x_grad
,
DenseTensor
*
grid_grad
)
{
if
(
x
.
dims
().
size
()
==
4
)
{
const
int
n
=
grid
.
dims
()[
0
]
;
const
int
out_h
=
grid
.
dims
()[
1
]
;
const
int
out_w
=
grid
.
dims
()[
2
]
;
const
int
c
=
x
.
dims
()[
1
]
;
const
int
in_h
=
x
.
dims
()[
2
]
;
const
int
in_w
=
x
.
dims
()[
3
]
;
const
int
n
=
static_cast
<
int
>
(
grid
.
dims
()[
0
])
;
const
int
out_h
=
static_cast
<
int
>
(
grid
.
dims
()[
1
])
;
const
int
out_w
=
static_cast
<
int
>
(
grid
.
dims
()[
2
])
;
const
int
c
=
static_cast
<
int
>
(
x
.
dims
()[
1
])
;
const
int
in_h
=
static_cast
<
int
>
(
x
.
dims
()[
2
])
;
const
int
in_w
=
static_cast
<
int
>
(
x
.
dims
()[
3
])
;
x_grad
->
Resize
({
n
,
c
,
in_h
,
in_w
});
dev_ctx
.
template
Alloc
<
T
>(
x_grad
);
...
...
@@ -708,14 +708,14 @@ void GridSampleGradKernel(const Context& dev_ctx,
GatherOutputGradToInputGrad
<
T
>
(
out_grid
,
x_grad
,
grid_x
,
grid_y
);
}
}
else
{
const
int
n
=
grid
.
dims
()[
0
]
;
const
int
out_d
=
grid
.
dims
()[
1
]
;
const
int
out_h
=
grid
.
dims
()[
2
]
;
const
int
out_w
=
grid
.
dims
()[
3
]
;
const
int
c
=
x
.
dims
()[
1
]
;
const
int
in_d
=
x
.
dims
()[
2
]
;
const
int
in_h
=
x
.
dims
()[
3
]
;
const
int
in_w
=
x
.
dims
()[
4
]
;
const
int
n
=
static_cast
<
int
>
(
grid
.
dims
()[
0
])
;
const
int
out_d
=
static_cast
<
int
>
(
grid
.
dims
()[
1
])
;
const
int
out_h
=
static_cast
<
int
>
(
grid
.
dims
()[
2
])
;
const
int
out_w
=
static_cast
<
int
>
(
grid
.
dims
()[
3
])
;
const
int
c
=
static_cast
<
int
>
(
x
.
dims
()[
1
])
;
const
int
in_d
=
static_cast
<
int
>
(
x
.
dims
()[
2
])
;
const
int
in_h
=
static_cast
<
int
>
(
x
.
dims
()[
3
])
;
const
int
in_w
=
static_cast
<
int
>
(
x
.
dims
()[
4
])
;
x_grad
->
Resize
({
n
,
c
,
in_d
,
in_h
,
in_w
});
dev_ctx
.
template
Alloc
<
T
>(
x_grad
);
...
...
paddle/phi/kernels/cpu/grid_sample_kernel.cc
浏览文件 @
b702d2ae
...
...
@@ -97,9 +97,9 @@ static void CalcGridLocations(const CPUContext& ctx,
std
::
string
padding_mode
,
DenseTensor
*
grid_x
,
DenseTensor
*
grid_y
)
{
const
int
n
=
grid
.
dims
()[
0
]
;
const
int
out_h
=
grid
.
dims
()[
1
]
;
const
int
out_w
=
grid
.
dims
()[
2
]
;
const
int
n
=
static_cast
<
int
>
(
grid
.
dims
()[
0
])
;
const
int
out_h
=
static_cast
<
int
>
(
grid
.
dims
()[
1
])
;
const
int
out_w
=
static_cast
<
int
>
(
grid
.
dims
()[
2
])
;
// split grid with shape (n, h, w, 2) into (x, y) by the 3rd Dim
grid_x
->
Resize
({
n
,
out_h
,
out_w
});
...
...
@@ -130,10 +130,10 @@ static void Calc3DGridLocations(const CPUContext& ctx,
DenseTensor
*
grid_x
,
DenseTensor
*
grid_y
,
DenseTensor
*
grid_z
)
{
const
int
n
=
grid
.
dims
()[
0
]
;
const
int
out_d
=
grid
.
dims
()[
1
]
;
const
int
out_h
=
grid
.
dims
()[
2
]
;
const
int
out_w
=
grid
.
dims
()[
3
]
;
const
int
n
=
static_cast
<
int
>
(
grid
.
dims
()[
0
])
;
const
int
out_d
=
static_cast
<
int
>
(
grid
.
dims
()[
1
])
;
const
int
out_h
=
static_cast
<
int
>
(
grid
.
dims
()[
2
])
;
const
int
out_w
=
static_cast
<
int
>
(
grid
.
dims
()[
3
])
;
// split grid with shape (n, d, h, w, 3) into (x, y, z) by the 3rd Dim
grid_x
->
Resize
({
n
,
out_d
,
out_h
,
out_w
});
...
...
@@ -165,10 +165,10 @@ static void BilinearInter(const CPUContext& ctx,
DenseTensor
*
grid_y
,
DenseTensor
*
out
)
{
auto
&
place
=
*
ctx
.
eigen_device
();
const
int
n
=
grid_x
->
dims
()[
0
]
;
const
int
out_h
=
grid_x
->
dims
()[
1
]
;
const
int
out_w
=
grid_x
->
dims
()[
2
]
;
const
int
c
=
input
.
dims
()[
1
]
;
const
int
n
=
static_cast
<
int
>
(
grid_x
->
dims
()[
0
])
;
const
int
out_h
=
static_cast
<
int
>
(
grid_x
->
dims
()[
1
])
;
const
int
out_w
=
static_cast
<
int
>
(
grid_x
->
dims
()[
2
])
;
const
int
c
=
static_cast
<
int
>
(
input
.
dims
()[
1
])
;
DenseTensor
x_w
,
x_e
,
y_n
,
y_s
;
DenseTensor
d_w
,
d_e
,
d_n
,
d_s
;
...
...
@@ -224,11 +224,11 @@ static void Bilinear3DInter(const CPUContext& ctx,
DenseTensor
*
grid_z
,
DenseTensor
*
out
)
{
auto
&
place
=
*
ctx
.
eigen_device
();
const
int
n
=
grid_x
->
dims
()[
0
]
;
const
int
out_d
=
grid_x
->
dims
()[
1
]
;
const
int
out_h
=
grid_x
->
dims
()[
2
]
;
const
int
out_w
=
grid_x
->
dims
()[
3
]
;
const
int
c
=
input
.
dims
()[
1
]
;
const
int
n
=
static_cast
<
int
>
(
grid_x
->
dims
()[
0
])
;
const
int
out_d
=
static_cast
<
int
>
(
grid_x
->
dims
()[
1
])
;
const
int
out_h
=
static_cast
<
int
>
(
grid_x
->
dims
()[
2
])
;
const
int
out_w
=
static_cast
<
int
>
(
grid_x
->
dims
()[
3
])
;
const
int
c
=
static_cast
<
int
>
(
input
.
dims
()[
1
])
;
// get corner pixel values from (x, y, z)
// for 4d, we used north-east-south-west
...
...
@@ -313,12 +313,12 @@ void GridSampleKernel(const Context& dev_ctx,
bool
align_corners
,
DenseTensor
*
out
)
{
if
(
x
.
dims
().
size
()
==
4
)
{
const
int
n
=
grid
.
dims
()[
0
]
;
const
int
out_h
=
grid
.
dims
()[
1
]
;
const
int
out_w
=
grid
.
dims
()[
2
]
;
const
int
c
=
x
.
dims
()[
1
]
;
const
int
in_h
=
x
.
dims
()[
2
]
;
const
int
in_w
=
x
.
dims
()[
3
]
;
const
int
n
=
static_cast
<
int
>
(
grid
.
dims
()[
0
])
;
const
int
out_h
=
static_cast
<
int
>
(
grid
.
dims
()[
1
])
;
const
int
out_w
=
static_cast
<
int
>
(
grid
.
dims
()[
2
])
;
const
int
c
=
static_cast
<
int
>
(
x
.
dims
()[
1
])
;
const
int
in_h
=
static_cast
<
int
>
(
x
.
dims
()[
2
])
;
const
int
in_w
=
static_cast
<
int
>
(
x
.
dims
()[
3
])
;
out
->
Resize
(
phi
::
make_ddim
({
n
,
c
,
out_h
,
out_w
}));
dev_ctx
.
template
Alloc
<
T
>(
out
);
...
...
@@ -344,14 +344,14 @@ void GridSampleKernel(const Context& dev_ctx,
GetGridPointValue
<
T
>
(
x
,
out
,
grid_x
,
grid_y
);
}
}
else
{
const
int
n
=
grid
.
dims
()[
0
]
;
const
int
out_d
=
grid
.
dims
()[
1
]
;
const
int
out_h
=
grid
.
dims
()[
2
]
;
const
int
out_w
=
grid
.
dims
()[
3
]
;
const
int
c
=
x
.
dims
()[
1
]
;
const
int
in_d
=
x
.
dims
()[
2
]
;
const
int
in_h
=
x
.
dims
()[
3
]
;
const
int
in_w
=
x
.
dims
()[
4
]
;
const
int
n
=
static_cast
<
int
>
(
grid
.
dims
()[
0
])
;
const
int
out_d
=
static_cast
<
int
>
(
grid
.
dims
()[
1
])
;
const
int
out_h
=
static_cast
<
int
>
(
grid
.
dims
()[
2
])
;
const
int
out_w
=
static_cast
<
int
>
(
grid
.
dims
()[
3
])
;
const
int
c
=
static_cast
<
int
>
(
x
.
dims
()[
1
])
;
const
int
in_d
=
static_cast
<
int
>
(
x
.
dims
()[
2
])
;
const
int
in_h
=
static_cast
<
int
>
(
x
.
dims
()[
3
])
;
const
int
in_w
=
static_cast
<
int
>
(
x
.
dims
()[
4
])
;
out
->
Resize
(
phi
::
make_ddim
({
n
,
c
,
out_d
,
out_h
,
out_w
}));
dev_ctx
.
template
Alloc
<
T
>(
out
);
...
...
paddle/phi/kernels/cpu/group_norm_grad_kernel.cc
浏览文件 @
b702d2ae
...
...
@@ -48,8 +48,8 @@ void GroupNormGradKernel(const Context& dev_ctx,
const
auto
scale_ptr
=
scale
.
get_ptr
();
const
auto
bias_ptr
=
bias
.
get_ptr
();
const
auto
&
x_dims
=
y
.
dims
();
const
int
C
=
(
data_layout
==
DataLayout
::
kNCHW
?
x_dims
[
1
]
:
x_dims
[
x_dims
.
size
()
-
1
]);
const
int
C
=
static_cast
<
int
>
(
data_layout
==
DataLayout
::
kNCHW
?
x_dims
[
1
]
:
x_dims
[
x_dims
.
size
()
-
1
]);
const
int
group_size
=
C
/
groups
;
dev_ctx
.
template
Alloc
<
T
>(
d_x
);
...
...
@@ -80,11 +80,11 @@ void GroupNormGradKernel(const Context& dev_ctx,
int
imsize
=
1
;
if
(
data_layout
==
DataLayout
::
kNCHW
)
{
for
(
int
i
=
2
;
i
<
x_dims
.
size
();
++
i
)
{
imsize
*=
x_dims
[
i
]
;
imsize
*=
static_cast
<
int
>
(
x_dims
[
i
])
;
}
}
else
{
for
(
int
i
=
1
;
i
<
x_dims
.
size
()
-
1
;
++
i
)
{
imsize
*=
x_dims
[
i
]
;
imsize
*=
static_cast
<
int
>
(
x_dims
[
i
])
;
}
}
auto
*
iter_x_data
=
x_data
;
...
...
paddle/phi/kernels/cpu/group_norm_kernel.cc
浏览文件 @
b702d2ae
...
...
@@ -45,8 +45,8 @@ void GroupNormKernel(const Context& dev_ctx,
const
auto
bias_ptr
=
bias
.
get_ptr
();
const
auto
x_dims
=
x
.
dims
();
const
int
C
=
(
data_layout
==
DataLayout
::
kNCHW
?
x_dims
[
1
]
:
x_dims
[
x_dims
.
size
()
-
1
]);
const
int
C
=
static_cast
<
int
>
(
data_layout
==
DataLayout
::
kNCHW
?
x_dims
[
1
]
:
x_dims
[
x_dims
.
size
()
-
1
]);
const
int
group_size
=
C
/
groups
;
dev_ctx
.
template
Alloc
<
T
>(
y
);
...
...
@@ -66,11 +66,11 @@ void GroupNormKernel(const Context& dev_ctx,
int
imsize
=
1
;
if
(
data_layout
==
DataLayout
::
kNCHW
)
{
for
(
int
i
=
2
;
i
<
x_dims
.
size
();
++
i
)
{
imsize
*=
x_dims
[
i
]
;
imsize
*=
static_cast
<
int
>
(
x_dims
[
i
])
;
}
}
else
{
for
(
int
i
=
1
;
i
<
x_dims
.
size
()
-
1
;
++
i
)
{
imsize
*=
x_dims
[
i
]
;
imsize
*=
static_cast
<
int
>
(
x_dims
[
i
])
;
}
}
auto
*
iter_x_data
=
x_data
;
...
...
paddle/phi/kernels/cpu/gumbel_softmax_kernel.cc
浏览文件 @
b702d2ae
...
...
@@ -66,7 +66,7 @@ struct OneHotGenerator<CPUContext, T> {
const
int
size_out_axis
=
funcs
::
SizeOutAxis
(
axis
,
x
.
dims
());
for
(
int
i
=
0
;
i
<
x
.
dims
().
size
();
i
++
)
{
if
(
i
!=
axis
)
index_dim
.
push_back
(
x
.
dims
().
Get
()[
i
]
);
if
(
i
!=
axis
)
index_dim
.
push_back
(
static_cast
<
int
>
(
x
.
dims
().
Get
()[
i
])
);
}
DDim
index_ddim
(
index_dim
.
data
(),
rank
-
1
);
index
.
Resize
(
index_ddim
);
...
...
paddle/phi/kernels/cpu/hsigmoid_loss_kernel.cc
浏览文件 @
b702d2ae
...
...
@@ -45,9 +45,10 @@ void HSigmoidLossKernel(const Context& ctx,
if
(
path
.
get_ptr
())
{
is_custom
=
true
;
}
int64_t
code_length
=
path
.
get_ptr
()
?
path
.
get_ptr
()
->
dims
()[
1
]
:
phi
::
funcs
::
FindLastSet
(
num_classes_st
-
1
);
int64_t
code_length
=
path
.
get_ptr
()
?
static_cast
<
int64_t
>
(
path
.
get_ptr
()
->
dims
()[
1
])
:
static_cast
<
int64_t
>
(
phi
::
funcs
::
FindLastSet
(
num_classes_st
-
1
));
int64_t
batch_size
=
x
.
dims
()[
0
];
DenseTensor
sum
;
pre_out
->
Resize
(
phi
::
make_ddim
({
batch_size
,
code_length
}));
...
...
paddle/phi/kernels/cpu/index_put_grad_kernel.cc
浏览文件 @
b702d2ae
...
...
@@ -210,7 +210,8 @@ void IndexPutGradKernel(const Context& dev_ctx,
std
::
vector
<
DenseTensor
>
tmp_res_indices_v
;
std
::
vector
<
DenseTensor
>
range_tensor_v
;
for
(
int
i
=
int_indices_v
.
size
();
i
<
x
.
dims
().
size
();
++
i
)
{
for
(
int
i
=
static_cast
<
int
>
(
int_indices_v
.
size
());
i
<
x
.
dims
().
size
();
++
i
)
{
range_tensor_v
.
emplace_back
(
funcs
::
GetRangeTensor
<
int64_t
,
Context
>
(
dev_ctx
,
x
.
dims
()[
i
],
phi
::
DataType
::
INT64
));
}
...
...
paddle/phi/kernels/cpu/index_put_kernel.cc
浏览文件 @
b702d2ae
...
...
@@ -134,7 +134,8 @@ void IndexPutKernel(const Context& dev_ctx,
std
::
vector
<
DenseTensor
>
range_tensor_v
;
const
DenseTensor
*
ptr_value
=
nullptr
;
for
(
int
i
=
int_indices_v
.
size
();
i
<
x
.
dims
().
size
();
++
i
)
{
for
(
int
i
=
static_cast
<
int
>
(
int_indices_v
.
size
());
i
<
x
.
dims
().
size
();
++
i
)
{
range_tensor_v
.
emplace_back
(
funcs
::
GetRangeTensor
<
int64_t
,
Context
>
(
dev_ctx
,
x
.
dims
()[
i
],
phi
::
DataType
::
INT64
));
}
...
...
paddle/phi/kernels/cpu/index_sample_grad_kernel.cc
浏览文件 @
b702d2ae
...
...
@@ -35,7 +35,7 @@ void IndexSampleGradInner(const Context& context,
auto
value_length
=
x_grad_dims
[
1
];
auto
index_length
=
index_dims
[
1
];
int
index_ids_num
=
index
.
numel
(
);
int
index_ids_num
=
static_cast
<
int
>
(
index
.
numel
()
);
std
::
vector
<
T
>
x_grad_vec
(
x_grad
->
numel
(),
0
);
...
...
paddle/phi/kernels/cpu/index_sample_kernel.cc
浏览文件 @
b702d2ae
...
...
@@ -37,10 +37,10 @@ void IndexSampleInner(const Context &context,
auto
input_dims
=
input
.
dims
();
auto
index_dims
=
index
.
dims
();
int
batch_size
=
input_dims
[
0
]
;
int
batch_size
=
static_cast
<
int
>
(
input_dims
[
0
])
;
auto
value_length
=
input_dims
[
1
];
auto
index_length
=
index_dims
[
1
];
int
index_ids_num
=
index
.
numel
(
);
int
index_ids_num
=
static_cast
<
int
>
(
index
.
numel
()
);
std
::
vector
<
T
>
input_vec
;
std
::
vector
<
IndexT
>
index_vec
;
...
...
paddle/phi/kernels/cpu/instance_norm_grad_kernel.cc
浏览文件 @
b702d2ae
...
...
@@ -55,10 +55,10 @@ void InstanceNormGradKernel(const Context& dev_ctx,
const
auto
&
x_dims
=
x
.
dims
();
const
int
N
=
x_dims
[
0
]
;
const
int
C
=
x_dims
[
1
]
;
const
int
N
=
static_cast
<
int
>
(
x_dims
[
0
])
;
const
int
C
=
static_cast
<
int
>
(
x_dims
[
1
])
;
const
int
NxC
=
N
*
C
;
const
int
sample_size
=
x
.
numel
()
/
N
/
C
;
const
int
sample_size
=
static_cast
<
int
>
(
x
.
numel
()
/
N
/
C
)
;
dev_ctx
.
template
Alloc
<
T
>(
d_x
);
auto
*
place
=
dev_ctx
.
eigen_device
();
...
...
@@ -172,7 +172,7 @@ void InstanceNormDoubleGradKernel(const Context& dev_ctx,
const
auto
&
x_dims
=
x
.
dims
();
int
N
,
C
,
H
,
W
,
D
;
funcs
::
ExtractNCWHD
(
x_dims
,
DataLayout
::
kNCHW
,
&
N
,
&
C
,
&
H
,
&
W
,
&
D
);
const
int
sample_size
=
x
.
numel
()
/
N
/
C
;
const
int
sample_size
=
static_cast
<
int
>
(
x
.
numel
()
/
N
/
C
)
;
const
int
NxC
=
N
*
C
;
const
T
*
mean_data
=
saved_mean
.
data
<
T
>
();
...
...
paddle/phi/kernels/cpu/instance_norm_kernel.cc
浏览文件 @
b702d2ae
...
...
@@ -40,10 +40,10 @@ void InstanceNormKernel(const Context& dev_ctx,
DenseTensor
*
saved_variance
)
{
const
auto
&
x_dims
=
x
.
dims
();
T
epsilon
=
static_cast
<
T
>
(
epsilon_f
);
const
int
N
=
x_dims
[
0
]
;
const
int
C
=
x_dims
[
1
]
;
const
int
N
=
static_cast
<
int
>
(
x_dims
[
0
])
;
const
int
C
=
static_cast
<
int
>
(
x_dims
[
1
])
;
const
int
NxC
=
N
*
C
;
const
int
sample_size
=
x
.
numel
()
/
N
/
C
;
const
int
sample_size
=
static_cast
<
int
>
(
x
.
numel
()
/
N
/
C
)
;
auto
*
place
=
dev_ctx
.
eigen_device
();
Eigen
::
DSizes
<
int
,
2
>
shape
(
NxC
,
sample_size
);
...
...
paddle/phi/kernels/cpu/interpolate_grad_kernel.cc
浏览文件 @
b702d2ae
...
...
@@ -40,15 +40,18 @@ static void LinearInterpolationGrad(const DenseTensor& output_grad,
bool
align_flag
=
(
align_mode
==
0
&&
!
align_corners
);
using
MT
=
typename
phi
::
dtype
::
MPTypeTrait
<
T
>::
Type
;
for
(
int
l
=
0
;
l
<
out_w
;
l
++
)
{
int
x_w
=
align_flag
?
static_cast
<
int
>
(
ratio_w
*
(
l
+
0.5
)
-
0.5
)
:
static_cast
<
int
>
(
ratio_w
*
l
);
int
x_w
=
static_cast
<
int
>
(
align_flag
?
(
ratio_w
*
(
l
+
0.5
)
-
0.5
)
:
(
ratio_w
*
static_cast
<
float
>
(
l
))
);
x_w
=
(
x_w
>
0
)
?
x_w
:
0
;
// w
int
x_e
=
(
x_w
<
(
in_w
-
1
))
?
(
x_w
+
1
)
:
x_w
;
// w_id
float
idx_src_x
=
ratio_w
*
(
l
+
0.5
)
-
0.5
;
float
idx_src_x
=
ratio_w
*
(
static_cast
<
float
>
(
l
)
+
0.5
f
)
-
0.5
f
;
idx_src_x
=
(
idx_src_x
>
0
)
?
idx_src_x
:
0
;
float
d_w
=
align_flag
?
idx_src_x
-
x_w
:
ratio_w
*
l
-
x_w
;
// w1lambda
float
d_e
=
1.
f
-
d_w
;
// w2lambda
float
d_w
=
static_cast
<
float
>
(
align_flag
?
idx_src_x
-
static_cast
<
float
>
(
x_w
)
:
ratio_w
*
static_cast
<
float
>
(
l
)
-
static_cast
<
float
>
(
x_w
));
// w1lambda
float
d_e
=
1.
f
-
d_w
;
// w2lambda
for
(
int
i
=
0
;
i
<
n
;
i
++
)
{
// loop for batches
for
(
int
j
=
0
;
j
<
c
;
j
++
)
{
// loop for channels
...
...
@@ -88,23 +91,28 @@ static void BilinearInterpolationGrad(const DenseTensor& output_grad,
using
MT
=
typename
phi
::
dtype
::
MPTypeTrait
<
T
>::
Type
;
for
(
int
k
=
0
;
k
<
out_h
;
k
++
)
{
// loop for images
int
y_n
=
align_flag
?
static_cast
<
int
>
(
ratio_h
*
(
k
+
0.5
)
-
0.5
)
:
static_cast
<
int
>
(
ratio_h
*
k
);
int
y_n
=
static_cast
<
int
>
(
align_flag
?
(
ratio_h
*
(
k
+
0.5
)
-
0.5
)
:
(
ratio_h
*
static_cast
<
float
>
(
k
))
);
y_n
=
(
y_n
>
0
)
?
y_n
:
0
;
int
y_s
=
(
y_n
+
1
)
<
(
in_h
-
1
)
?
(
y_n
+
1
)
:
(
in_h
-
1
);
float
idx_src_y
=
ratio_h
*
(
k
+
0.5
)
-
0.5
;
float
idx_src_y
=
ratio_h
*
(
static_cast
<
float
>
(
k
)
+
0.5
f
)
-
0.5
f
;
idx_src_y
=
(
idx_src_y
>
0
)
?
idx_src_y
:
0
;
float
d_n
=
align_flag
?
idx_src_y
-
y_n
:
ratio_h
*
k
-
y_n
;
float
d_n
=
align_flag
?
idx_src_y
-
static_cast
<
float
>
(
y_n
)
:
ratio_h
*
static_cast
<
float
>
(
k
)
-
static_cast
<
float
>
(
y_n
);
float
d_s
=
1.
f
-
d_n
;
for
(
int
l
=
0
;
l
<
out_w
;
l
++
)
{
int
x_w
=
align_flag
?
static_cast
<
int
>
(
ratio_w
*
(
l
+
0.5
)
-
0.5
)
:
static_cast
<
int
>
(
ratio_w
*
l
);
int
x_w
=
static_cast
<
int
>
(
align_flag
?
(
ratio_w
*
(
static_cast
<
float
>
(
l
)
+
0.5
f
)
-
0.5
f
)
:
(
ratio_w
*
static_cast
<
float
>
(
l
)));
x_w
=
(
x_w
>
0
)
?
x_w
:
0
;
int
x_e
=
(
x_w
+
1
)
<
(
in_w
-
1
)
?
(
x_w
+
1
)
:
(
in_w
-
1
);
float
idx_src_x
=
ratio_w
*
(
l
+
0.5
)
-
0.5
;
float
idx_src_x
=
ratio_w
*
(
static_cast
<
float
>
(
l
)
+
0.5
f
)
-
0.5
f
;
idx_src_x
=
(
idx_src_x
>
0
)
?
idx_src_x
:
0
;
float
d_w
=
align_flag
?
idx_src_x
-
x_w
:
ratio_w
*
l
-
x_w
;
float
d_w
=
align_flag
?
idx_src_x
-
static_cast
<
float
>
(
x_w
)
:
ratio_w
*
static_cast
<
float
>
(
l
)
-
static_cast
<
float
>
(
x_w
);
float
d_e
=
1.
f
-
d_w
;
for
(
int
i
=
0
;
i
<
n
;
i
++
)
{
// loop for batches
...
...
@@ -144,12 +152,14 @@ static void NearestNeighborInterpolateGrad(const DenseTensor& output_grad,
auto
output_grad_t
=
EigenTensor
<
T
,
4
>::
From
(
output_grad
);
for
(
int
k
=
0
;
k
<
out_h
;
k
++
)
{
// loop for images
int
in_k
=
(
align_corners
)
?
static_cast
<
int
>
(
ratio_h
*
k
+
0.5
)
:
static_cast
<
int
>
(
ratio_h
*
k
);
int
in_k
=
static_cast
<
int
>
(
align_corners
?
(
ratio_h
*
static_cast
<
float
>
(
k
)
+
0.5
f
)
:
(
ratio_h
*
static_cast
<
float
>
(
k
)));
for
(
int
l
=
0
;
l
<
out_w
;
l
++
)
{
int
in_l
=
(
align_corners
)
?
static_cast
<
int
>
(
ratio_w
*
l
+
0.5
)
:
static_cast
<
int
>
(
ratio_w
*
l
);
int
in_l
=
static_cast
<
int
>
(
align_corners
?
(
ratio_w
*
static_cast
<
float
>
(
l
)
+
0.5
f
)
:
(
ratio_w
*
static_cast
<
float
>
(
l
)));
for
(
int
i
=
0
;
i
<
n
;
i
++
)
{
// loop for batches
for
(
int
j
=
0
;
j
<
c
;
j
++
)
{
// loop for channels
...
...
@@ -182,12 +192,14 @@ static void BicubicInterpolationGrad(const DenseTensor& output_grad,
using
MT
=
typename
phi
::
dtype
::
MPTypeTrait
<
T
>::
Type
;
for
(
int
k
=
0
;
k
<
out_h
;
k
++
)
{
// loop for images
MT
y_n
=
align_corners
?
ratio_h
*
k
:
ratio_h
*
(
k
+
0.5
)
-
0.5
;
MT
y_n
=
align_corners
?
ratio_h
*
static_cast
<
float
>
(
k
)
:
ratio_h
*
(
static_cast
<
float
>
(
k
)
+
0.5
f
)
-
0.5
f
;
int
input_y
=
floorf
(
y_n
);
MT
y_t
=
y_n
-
input_y
;
for
(
int
l
=
0
;
l
<
out_w
;
l
++
)
{
MT
x_n
=
align_corners
?
ratio_w
*
l
:
ratio_w
*
(
l
+
0.5
)
-
0.5
;
MT
x_n
=
align_corners
?
ratio_w
*
static_cast
<
float
>
(
l
)
:
ratio_w
*
(
static_cast
<
float
>
(
l
)
+
0.5
f
)
-
0.5
f
;
int
input_x
=
floorf
(
x_n
);
MT
x_t
=
x_n
-
input_x
;
...
...
@@ -245,33 +257,42 @@ static void TrilinearInterpolationGrad(const DenseTensor& output_grad,
bool
align_flag
=
(
align_mode
==
0
&&
!
align_corners
);
using
MT
=
typename
phi
::
dtype
::
MPTypeTrait
<
T
>::
Type
;
for
(
int
j
=
0
;
j
<
out_d
;
j
++
)
{
// loop for D
int
t_f
=
align_flag
?
static_cast
<
int
>
(
ratio_d
*
(
j
+
0.5
)
-
0.5
)
:
static_cast
<
int
>
(
ratio_d
*
j
);
int
t_f
=
static_cast
<
int
>
(
align_flag
?
(
ratio_d
*
(
static_cast
<
float
>
(
j
)
+
0.5
f
)
-
0.5
f
)
:
(
ratio_d
*
static_cast
<
float
>
(
j
)));
t_f
=
(
t_f
>
0
)
?
t_f
:
0
;
int
t_b
=
(
t_f
+
1
)
<
(
in_d
-
1
)
?
(
t_f
+
1
)
:
(
in_d
-
1
);
float
idx_src_t
=
ratio_d
*
(
j
+
0.5
)
-
0.5
;
float
idx_src_t
=
ratio_d
*
(
static_cast
<
float
>
(
j
)
+
0.5
f
)
-
0.5
f
;
idx_src_t
=
(
idx_src_t
>
0
)
?
idx_src_t
:
0
;
float
d_f
=
align_flag
?
idx_src_t
-
t_f
:
ratio_d
*
j
-
t_f
;
float
d_f
=
align_flag
?
idx_src_t
-
static_cast
<
float
>
(
t_f
)
:
ratio_d
*
static_cast
<
float
>
(
j
)
-
static_cast
<
float
>
(
t_f
);
float
d_b
=
1.
f
-
d_f
;
for
(
int
k
=
0
;
k
<
out_h
;
k
++
)
{
// loop for H
int
y_n
=
align_flag
?
static_cast
<
int
>
(
ratio_h
*
(
k
+
0.5
)
-
0.5
)
:
static_cast
<
int
>
(
ratio_h
*
k
);
int
y_n
=
static_cast
<
int
>
(
align_flag
?
(
ratio_h
*
(
static_cast
<
float
>
(
k
)
+
0.5
f
)
-
0.5
f
)
:
(
ratio_h
*
static_cast
<
float
>
(
k
)));
y_n
=
(
y_n
>
0
)
?
y_n
:
0
;
int
y_s
=
(
y_n
+
1
)
<
(
in_h
-
1
)
?
(
y_n
+
1
)
:
(
in_h
-
1
);
float
idx_src_y
=
ratio_h
*
(
k
+
0.5
)
-
0.5
;
float
idx_src_y
=
ratio_h
*
(
static_cast
<
float
>
(
k
)
+
0.5
f
)
-
0.5
f
;
idx_src_y
=
(
idx_src_y
>
0
)
?
idx_src_y
:
0
;
float
d_n
=
align_flag
?
idx_src_y
-
y_n
:
ratio_h
*
k
-
y_n
;
float
d_n
=
align_flag
?
idx_src_y
-
static_cast
<
float
>
(
y_n
)
:
ratio_h
*
static_cast
<
float
>
(
k
)
-
static_cast
<
float
>
(
y_n
);
float
d_s
=
1.
f
-
d_n
;
for
(
int
l
=
0
;
l
<
out_w
;
l
++
)
{
// loop for W
int
x_w
=
align_flag
?
static_cast
<
int
>
(
ratio_w
*
(
l
+
0.5
)
-
0.5
)
:
static_cast
<
int
>
(
ratio_w
*
l
);
int
x_w
=
static_cast
<
int
>
(
align_flag
?
(
ratio_w
*
(
static_cast
<
float
>
(
l
)
+
0.5
f
)
-
0.5
f
)
:
(
ratio_w
*
static_cast
<
float
>
(
l
)));
x_w
=
(
x_w
>
0
)
?
x_w
:
0
;
int
x_e
=
(
x_w
+
1
)
<
(
in_w
-
1
)
?
(
x_w
+
1
)
:
(
in_w
-
1
);
float
idx_src_x
=
ratio_w
*
(
l
+
0.5
)
-
0.5
;
float
idx_src_x
=
ratio_w
*
(
static_cast
<
float
>
(
l
)
+
0.5
f
)
-
0.5
f
;
idx_src_x
=
(
idx_src_x
>
0
)
?
idx_src_x
:
0
;
float
d_w
=
align_flag
?
idx_src_x
-
x_w
:
ratio_w
*
l
-
x_w
;
float
d_w
=
align_flag
?
idx_src_x
-
static_cast
<
float
>
(
x_w
)
:
ratio_w
*
static_cast
<
float
>
(
l
)
-
static_cast
<
float
>
(
x_w
);
float
d_e
=
1.
f
-
d_w
;
for
(
int
b
=
0
;
b
<
n
;
b
++
)
{
// loop for batches
...
...
@@ -338,15 +359,18 @@ static void NearestNeighbor3DInterpolateGrad(const DenseTensor& output_grad,
auto
output_grad_t
=
EigenTensor
<
T
,
5
>::
From
(
output_grad
);
for
(
int
d
=
0
;
d
<
out_d
;
d
++
)
{
int
in_d
=
(
align_corners
)
?
static_cast
<
int
>
(
ratio_d
*
d
+
0.5
)
:
static_cast
<
int
>
(
ratio_d
*
d
);
int
in_d
=
static_cast
<
int
>
(
align_corners
?
(
ratio_d
*
static_cast
<
float
>
(
d
)
+
0.5
f
)
:
(
ratio_d
*
static_cast
<
float
>
(
d
)));
for
(
int
k
=
0
;
k
<
out_h
;
k
++
)
{
// loop for images
int
in_k
=
(
align_corners
)
?
static_cast
<
int
>
(
ratio_h
*
k
+
0.5
)
:
static_cast
<
int
>
(
ratio_h
*
k
);
int
in_k
=
static_cast
<
int
>
(
align_corners
?
(
ratio_h
*
static_cast
<
float
>
(
k
)
+
0.5
f
)
:
(
ratio_h
*
static_cast
<
float
>
(
k
)));
for
(
int
l
=
0
;
l
<
out_w
;
l
++
)
{
int
in_l
=
(
align_corners
)
?
static_cast
<
int
>
(
ratio_w
*
l
+
0.5
)
:
static_cast
<
int
>
(
ratio_w
*
l
);
int
in_l
=
static_cast
<
int
>
(
align_corners
?
(
ratio_w
*
static_cast
<
float
>
(
l
)
+
0.5
f
)
:
(
ratio_w
*
static_cast
<
float
>
(
l
)));
for
(
int
i
=
0
;
i
<
n
;
i
++
)
{
// loop for batches
for
(
int
j
=
0
;
j
<
c
;
j
++
)
{
// loop for channels
...
...
@@ -408,7 +432,7 @@ static void Interpolate1DCPUBwd(
}
}
if
(
scale_w
>
0.
)
{
out_w
=
static_cast
<
int
>
(
in_w
*
scale_w
);
out_w
=
static_cast
<
int
>
(
static_cast
<
float
>
(
in_w
)
*
scale_w
);
}
if
(
out_size
)
{
auto
out_size_data
=
...
...
@@ -442,10 +466,13 @@ static void Interpolate1DCPUBwd(
float
ratio_w
=
0.
f
;
if
(
out_w
>
1
)
{
float
new_scale_w
=
0.
f
;
new_scale_w
=
(
scale_w
>
0
)
?
static_cast
<
float
>
(
1.
/
scale_w
)
:
static_cast
<
float
>
(
in_w
)
/
out_w
;
ratio_w
=
(
align_corners
)
?
static_cast
<
float
>
(
in_w
-
1
)
/
(
out_w
-
1
)
:
static_cast
<
float
>
(
new_scale_w
);
new_scale_w
=
static_cast
<
float
>
(
scale_w
>
0
?
(
1.
f
/
scale_w
)
:
static_cast
<
float
>
(
in_w
)
/
static_cast
<
float
>
(
out_w
));
ratio_w
=
static_cast
<
float
>
(
align_corners
?
(
static_cast
<
float
>
(
in_w
)
-
1.
f
)
/
(
static_cast
<
float
>
(
out_w
)
-
1.
f
)
:
new_scale_w
);
}
if
(
"linear"
==
interp_method
)
{
LinearInterpolationGrad
<
T
>
(
output_grad
,
...
...
@@ -528,8 +555,8 @@ static void Interpolate2DCPUBwd(
}
}
if
(
scale_h
>
0.
&&
scale_w
>
0.
)
{
out_h
=
static_cast
<
int
>
(
in_h
*
scale_h
);
out_w
=
static_cast
<
int
>
(
in_w
*
scale_w
);
out_h
=
static_cast
<
int
>
(
in_h
*
scale_h
);
// NOLINT
out_w
=
static_cast
<
int
>
(
in_w
*
scale_w
);
// NOLINT
}
if
(
out_size
)
{
auto
out_size_data
=
...
...
@@ -566,17 +593,23 @@ static void Interpolate2DCPUBwd(
float
ratio_w
=
0.
f
;
if
(
out_h
>
1
)
{
float
new_scale_h
=
0.
f
;
new_scale_h
=
(
scale_h
>
0
)
?
static_cast
<
float
>
(
1.
/
scale_h
)
:
static_cast
<
float
>
(
in_h
)
/
out_h
;
ratio_h
=
(
align_corners
)
?
static_cast
<
float
>
(
in_h
-
1
)
/
(
out_h
-
1
)
:
static_cast
<
float
>
(
new_scale_h
);
new_scale_h
=
static_cast
<
float
>
(
(
scale_h
>
0
)
?
(
1.
f
/
scale_h
)
:
static_cast
<
float
>
(
in_h
)
/
static_cast
<
float
>
(
out_h
));
ratio_h
=
static_cast
<
float
>
(
align_corners
?
(
static_cast
<
float
>
(
in_h
)
-
1.
f
)
/
(
static_cast
<
float
>
(
out_h
)
-
1.
f
)
:
new_scale_h
);
}
if
(
out_w
>
1
)
{
float
new_scale_w
=
0.
f
;
new_scale_w
=
(
scale_w
>
0
)
?
static_cast
<
float
>
(
1.
/
scale_w
)
:
static_cast
<
float
>
(
in_w
)
/
out_w
;
ratio_w
=
(
align_corners
)
?
static_cast
<
float
>
(
in_w
-
1
)
/
(
out_w
-
1
)
:
static_cast
<
float
>
(
new_scale_w
);
new_scale_w
=
static_cast
<
float
>
(
(
scale_w
>
0
)
?
(
1.
f
/
scale_w
)
:
static_cast
<
float
>
(
in_w
)
/
static_cast
<
float
>
(
out_w
));
ratio_w
=
static_cast
<
float
>
(
align_corners
?
(
static_cast
<
float
>
(
in_w
)
-
1.
f
)
/
(
static_cast
<
float
>
(
out_w
)
-
1.
f
)
:
new_scale_w
);
}
if
(
"bilinear"
==
interp_method
)
{
...
...
@@ -706,9 +739,9 @@ static void Interpolate3DCPUBwd(
}
}
if
(
scale_d
>
0.
&&
scale_h
>
0.
&&
scale_w
>
0.
)
{
out_d
=
static_cast
<
int
>
(
in_d
*
scale_d
);
out_h
=
static_cast
<
int
>
(
in_h
*
scale_h
);
out_w
=
static_cast
<
int
>
(
in_w
*
scale_w
);
out_d
=
static_cast
<
int
>
(
in_d
*
scale_d
);
// NOLINT
out_h
=
static_cast
<
int
>
(
in_h
*
scale_h
);
// NOLINT
out_w
=
static_cast
<
int
>
(
in_w
*
scale_w
);
// NOLINT
}
if
(
out_size
)
{
auto
out_size_data
=
...
...
@@ -747,24 +780,32 @@ static void Interpolate3DCPUBwd(
float
ratio_w
=
0.
f
;
if
(
out_d
>
1
)
{
float
new_scale_d
=
0.
f
;
new_scale_d
=
(
scale_d
>
0
)
?
static_cast
<
float
>
(
1.
/
scale_d
)
:
static_cast
<
float
>
(
in_d
)
/
out_d
;
ratio_d
=
(
align_corners
)
?
static_cast
<
float
>
(
in_d
-
1
)
/
(
out_d
-
1
)
:
static_cast
<
float
>
(
new_scale_d
);
new_scale_d
=
static_cast
<
float
>
(
(
scale_d
>
0
)
?
(
1.
f
/
scale_d
)
:
static_cast
<
float
>
(
in_d
)
/
static_cast
<
float
>
(
out_d
));
ratio_d
=
static_cast
<
float
>
(
align_corners
?
(
static_cast
<
float
>
(
in_d
)
-
1.
f
)
/
(
static_cast
<
float
>
(
out_d
)
-
1.
f
)
:
new_scale_d
);
}
if
(
out_h
>
1
)
{
float
new_scale_h
=
0.
f
;
new_scale_h
=
(
scale_h
>
0
)
?
static_cast
<
float
>
(
1.
/
scale_h
)
:
static_cast
<
float
>
(
in_h
)
/
out_h
;
ratio_h
=
(
align_corners
)
?
static_cast
<
float
>
(
in_h
-
1
)
/
(
out_h
-
1
)
new_scale_h
=
static_cast
<
float
>
(
(
scale_h
>
0
)
?
(
1.
f
/
scale_h
)
:
static_cast
<
float
>
(
in_h
)
/
static_cast
<
float
>
(
out_h
));
ratio_h
=
(
align_corners
)
?
static_cast
<
float
>
(
in_h
-
1
)
/
(
static_cast
<
float
>
(
out_h
)
-
1
)
:
static_cast
<
float
>
(
new_scale_h
);
}
if
(
out_w
>
1
)
{
float
new_scale_w
=
0.
f
;
new_scale_w
=
(
scale_w
>
0
)
?
static_cast
<
float
>
(
1.
/
scale_w
)
:
static_cast
<
float
>
(
in_w
)
/
out_w
;
ratio_w
=
(
align_corners
)
?
static_cast
<
float
>
(
in_w
-
1
)
/
(
out_w
-
1
)
:
static_cast
<
float
>
(
new_scale_w
);
new_scale_w
=
static_cast
<
float
>
(
(
scale_w
>
0
)
?
(
1.
f
/
scale_w
)
:
static_cast
<
float
>
(
in_w
)
/
static_cast
<
float
>
(
out_w
));
ratio_w
=
static_cast
<
float
>
(
align_corners
?
(
static_cast
<
float
>
(
in_w
)
-
1.
f
)
/
(
static_cast
<
float
>
(
out_w
)
-
1.
f
)
:
new_scale_w
);
}
if
(
"trilinear"
==
interp_method
)
{
...
...
paddle/phi/kernels/cpu/interpolate_kernel.cc
浏览文件 @
b702d2ae
...
...
@@ -57,15 +57,18 @@ static void LinearInterpolation(const DenseTensor& input,
#pragma omp parallel for
#endif
for
(
int
l
=
0
;
l
<
out_w
;
l
++
)
{
int
x_w
=
align_flag
?
static_cast
<
int
>
(
ratio_w
*
(
l
+
0.5
)
-
0.5
)
:
static_cast
<
int
>
(
ratio_w
*
l
);
int
x_w
=
static_cast
<
int
>
(
align_flag
?
(
ratio_w
*
(
static_cast
<
float
>
(
l
)
+
0.5
f
)
-
0.5
f
)
:
ratio_w
*
static_cast
<
float
>
(
l
));
x_w
=
(
x_w
>
0
)
?
x_w
:
0
;
// w
int
x_e
=
(
x_w
<
(
in_w
-
1
))
?
(
x_w
+
1
)
:
x_w
;
// w_id
float
idx_src_x
=
ratio_w
*
(
l
+
0.5
)
-
0.5
;
float
idx_src_x
=
ratio_w
*
(
static_cast
<
float
>
(
l
)
+
0.5
f
)
-
0.5
f
;
idx_src_x
=
(
idx_src_x
>
0
)
?
idx_src_x
:
0
;
float
d_w
=
align_flag
?
idx_src_x
-
x_w
:
ratio_w
*
l
-
x_w
;
// w1lambda
float
d_e
=
1.
f
-
d_w
;
// w2lambda
float
d_w
=
align_flag
?
idx_src_x
-
static_cast
<
float
>
(
x_w
)
:
ratio_w
*
static_cast
<
float
>
(
l
)
-
static_cast
<
float
>
(
x_w
);
// w1lambda
float
d_e
=
1.
f
-
d_w
;
// w2lambda
{
vx_w
[
l
]
=
x_w
;
vx_e
[
l
]
=
x_e
;
...
...
@@ -127,13 +130,15 @@ static void BilinearInterpolation(const DenseTensor& input,
#pragma omp parallel for
#endif
for
(
int
k
=
0
;
k
<
out_h
;
k
++
)
{
int
y_n
=
align_flag
?
static_cast
<
int
>
(
ratio_h
*
(
k
+
0.5
)
-
0.5
)
:
static_cast
<
int
>
(
ratio_h
*
k
);
int
y_n
=
static_cast
<
int
>
(
align_flag
?
(
ratio_h
*
(
k
+
0.5
)
-
0.5
)
:
(
ratio_h
*
static_cast
<
float
>
(
k
))
);
y_n
=
(
y_n
>
0
)
?
y_n
:
0
;
int
y_s
=
(
y_n
+
1
)
<
(
in_h
-
1
)
?
(
y_n
+
1
)
:
(
in_h
-
1
);
float
idx_src_y
=
ratio_h
*
(
k
+
0.5
)
-
0.5
;
float
idx_src_y
=
ratio_h
*
(
static_cast
<
float
>
(
k
)
+
0.5
f
)
-
0.5
f
;
idx_src_y
=
(
idx_src_y
>
0
)
?
idx_src_y
:
0
;
float
d_n
=
align_flag
?
idx_src_y
-
y_n
:
ratio_h
*
k
-
y_n
;
float
d_n
=
align_flag
?
idx_src_y
-
static_cast
<
float
>
(
y_n
)
:
ratio_h
*
static_cast
<
float
>
(
k
)
-
static_cast
<
float
>
(
y_n
);
float
d_s
=
1.
f
-
d_n
;
{
vy_n
[
k
]
=
y_n
;
...
...
@@ -155,12 +160,14 @@ static void BilinearInterpolation(const DenseTensor& input,
for
(
int
l
=
0
;
l
<
out_w
;
l
++
)
{
int
x_w
=
(
align_mode
==
0
&&
!
align_corners
)
?
static_cast
<
int
>
(
ratio_w
*
(
l
+
0.5
)
-
0.5
)
:
static_cast
<
int
>
(
ratio_w
*
l
);
:
static_cast
<
int
>
(
ratio_w
*
static_cast
<
float
>
(
l
)
);
x_w
=
(
x_w
>
0
)
?
x_w
:
0
;
int
x_e
=
(
x_w
+
1
)
<
(
in_w
-
1
)
?
(
x_w
+
1
)
:
(
in_w
-
1
);
float
idx_src_x
=
ratio_w
*
(
l
+
0.5
)
-
0.5
;
float
idx_src_x
=
ratio_w
*
(
static_cast
<
float
>
(
l
)
+
0.5
f
)
-
0.5
f
;
idx_src_x
=
(
idx_src_x
>
0
)
?
idx_src_x
:
0
;
float
d_w
=
align_flag
?
idx_src_x
-
x_w
:
ratio_w
*
l
-
x_w
;
float
d_w
=
align_flag
?
idx_src_x
-
static_cast
<
float
>
(
x_w
)
:
ratio_w
*
static_cast
<
float
>
(
l
)
-
static_cast
<
float
>
(
x_w
);
float
d_e
=
1.
f
-
d_w
;
{
vx_w
[
l
]
=
x_w
;
...
...
@@ -224,12 +231,14 @@ static void NearestNeighborInterpolate(const DenseTensor& input,
auto
output_t
=
EigenTensor
<
T
,
4
>::
From
(
*
output
);
for
(
int
k
=
0
;
k
<
out_h
;
k
++
)
{
// loop for images
int
in_k
=
(
align_corners
)
?
static_cast
<
int
>
(
ratio_h
*
k
+
0.5
)
:
static_cast
<
int
>
(
ratio_h
*
k
);
int
in_k
=
(
align_corners
)
?
static_cast
<
int
>
(
ratio_h
*
static_cast
<
float
>
(
k
)
+
0.5
)
:
static_cast
<
int
>
(
ratio_h
*
static_cast
<
float
>
(
k
));
for
(
int
l
=
0
;
l
<
out_w
;
l
++
)
{
int
in_l
=
(
align_corners
)
?
static_cast
<
int
>
(
ratio_w
*
l
+
0.5
)
:
static_cast
<
int
>
(
ratio_w
*
l
);
int
in_l
=
(
align_corners
)
?
static_cast
<
int
>
(
ratio_w
*
static_cast
<
float
>
(
l
)
+
0.5
)
:
static_cast
<
int
>
(
ratio_w
*
static_cast
<
float
>
(
l
));
for
(
int
i
=
0
;
i
<
n
;
i
++
)
{
// loop for batches
for
(
int
j
=
0
;
j
<
c
;
j
++
)
{
// loop for channels
...
...
@@ -262,13 +271,13 @@ static void BicubicInterpolation(const DenseTensor& input,
using
MT
=
typename
phi
::
dtype
::
MPTypeTrait
<
T
>::
Type
;
for
(
int
k
=
0
;
k
<
out_h
;
k
++
)
{
// loop for images
MT
y_n
=
align_corners
?
static_cast
<
MT
>
(
ratio_h
*
k
)
MT
y_n
=
align_corners
?
static_cast
<
MT
>
(
ratio_h
*
static_cast
<
float
>
(
k
)
)
:
static_cast
<
MT
>
(
ratio_h
*
(
k
+
0.5
)
-
0.5
);
int
input_y
=
floorf
(
y_n
);
const
MT
y_t
=
y_n
-
input_y
;
for
(
int
l
=
0
;
l
<
out_w
;
l
++
)
{
MT
x_n
=
align_corners
?
static_cast
<
MT
>
(
ratio_w
*
l
)
MT
x_n
=
align_corners
?
static_cast
<
MT
>
(
ratio_w
*
static_cast
<
float
>
(
l
)
)
:
static_cast
<
MT
>
(
ratio_w
*
(
l
+
0.5
)
-
0.5
);
int
input_x
=
floorf
(
x_n
);
const
MT
x_t
=
x_n
-
input_x
;
...
...
@@ -360,12 +369,14 @@ static void TrilinearInterpolation(const DenseTensor& input,
#endif
for
(
int
j
=
0
;
j
<
out_d
;
j
++
)
{
int
t_f
=
align_flag
?
static_cast
<
int
>
(
ratio_d
*
(
j
+
0.5
)
-
0.5
)
:
static_cast
<
int
>
(
ratio_d
*
j
);
:
static_cast
<
int
>
(
ratio_d
*
static_cast
<
float
>
(
j
)
);
t_f
=
(
t_f
>
0
)
?
t_f
:
0
;
int
t_b
=
(
t_f
+
1
)
<
(
in_d
-
1
)
?
(
t_f
+
1
)
:
(
in_d
-
1
);
float
idx_src_t
=
ratio_d
*
(
j
+
0.5
)
-
0.5
;
float
idx_src_t
=
ratio_d
*
(
static_cast
<
float
>
(
j
)
+
0.5
f
)
-
0.5
f
;
idx_src_t
=
(
idx_src_t
>
0
)
?
idx_src_t
:
0
;
float
d_f
=
align_flag
?
idx_src_t
-
t_f
:
ratio_d
*
j
-
t_f
;
float
d_f
=
align_flag
?
idx_src_t
-
static_cast
<
float
>
(
t_f
)
:
ratio_d
*
static_cast
<
float
>
(
j
)
-
static_cast
<
float
>
(
t_f
);
float
d_b
=
1.
f
-
d_f
;
{
vt_f
[
j
]
=
t_f
;
...
...
@@ -386,12 +397,14 @@ static void TrilinearInterpolation(const DenseTensor& input,
#endif
for
(
int
k
=
0
;
k
<
out_h
;
k
++
)
{
int
y_n
=
align_flag
?
static_cast
<
int
>
(
ratio_h
*
(
k
+
0.5
)
-
0.5
)
:
static_cast
<
int
>
(
ratio_h
*
k
);
:
static_cast
<
int
>
(
ratio_h
*
static_cast
<
float
>
(
k
)
);
y_n
=
(
y_n
>
0
)
?
y_n
:
0
;
int
y_s
=
(
y_n
+
1
)
<
(
in_h
-
1
)
?
(
y_n
+
1
)
:
(
in_h
-
1
);
float
idx_src_y
=
ratio_h
*
(
k
+
0.5
)
-
0.5
;
float
idx_src_y
=
ratio_h
*
(
static_cast
<
float
>
(
k
)
+
0.5
f
)
-
0.5
f
;
idx_src_y
=
(
idx_src_y
>
0
)
?
idx_src_y
:
0
;
float
d_n
=
align_flag
?
idx_src_y
-
y_n
:
ratio_h
*
k
-
y_n
;
float
d_n
=
align_flag
?
idx_src_y
-
static_cast
<
float
>
(
y_n
)
:
ratio_h
*
static_cast
<
float
>
(
k
)
-
static_cast
<
float
>
(
y_n
);
float
d_s
=
1.
f
-
d_n
;
{
vy_n
[
k
]
=
y_n
;
...
...
@@ -413,12 +426,14 @@ static void TrilinearInterpolation(const DenseTensor& input,
for
(
int
l
=
0
;
l
<
out_w
;
l
++
)
{
int
x_w
=
(
align_mode
==
0
&&
!
align_corners
)
?
static_cast
<
int
>
(
ratio_w
*
(
l
+
0.5
)
-
0.5
)
:
static_cast
<
int
>
(
ratio_w
*
l
);
:
static_cast
<
int
>
(
ratio_w
*
static_cast
<
float
>
(
l
)
);
x_w
=
(
x_w
>
0
)
?
x_w
:
0
;
int
x_e
=
(
x_w
+
1
)
<
(
in_w
-
1
)
?
(
x_w
+
1
)
:
(
in_w
-
1
);
float
idx_src_x
=
ratio_w
*
(
l
+
0.5
)
-
0.5
;
float
idx_src_x
=
ratio_w
*
(
static_cast
<
float
>
(
l
)
+
0.5
f
)
-
0.5
f
;
idx_src_x
=
(
idx_src_x
>
0
)
?
idx_src_x
:
0
;
float
d_w
=
align_flag
?
idx_src_x
-
x_w
:
ratio_w
*
l
-
x_w
;
float
d_w
=
align_flag
?
idx_src_x
-
static_cast
<
float
>
(
x_w
)
:
ratio_w
*
static_cast
<
float
>
(
l
)
-
static_cast
<
float
>
(
x_w
);
float
d_e
=
1.
f
-
d_w
;
{
vx_w
[
l
]
=
x_w
;
...
...
@@ -499,15 +514,18 @@ static void NearestNeighbor3DInterpolate(const DenseTensor& input,
auto
input_t
=
EigenTensor
<
T
,
5
>::
From
(
input
);
auto
output_t
=
EigenTensor
<
T
,
5
>::
From
(
*
output
);
for
(
int
d
=
0
;
d
<
out_d
;
d
++
)
{
// loop for images
int
in_d
=
(
align_corners
)
?
static_cast
<
int
>
(
ratio_d
*
d
+
0.5
)
:
static_cast
<
int
>
(
ratio_d
*
d
);
int
in_d
=
(
align_corners
)
?
static_cast
<
int
>
(
ratio_d
*
static_cast
<
float
>
(
d
)
+
0.5
)
:
static_cast
<
int
>
(
ratio_d
*
static_cast
<
float
>
(
d
));
for
(
int
k
=
0
;
k
<
out_h
;
k
++
)
{
int
in_k
=
(
align_corners
)
?
static_cast
<
int
>
(
ratio_h
*
k
+
0.5
)
:
static_cast
<
int
>
(
ratio_h
*
k
);
int
in_k
=
(
align_corners
)
?
static_cast
<
int
>
(
ratio_h
*
static_cast
<
float
>
(
k
)
+
0.5
)
:
static_cast
<
int
>
(
ratio_h
*
static_cast
<
float
>
(
k
));
for
(
int
l
=
0
;
l
<
out_w
;
l
++
)
{
int
in_l
=
(
align_corners
)
?
static_cast
<
int
>
(
ratio_w
*
l
+
0.5
)
:
static_cast
<
int
>
(
ratio_w
*
l
);
int
in_l
=
(
align_corners
)
?
static_cast
<
int
>
(
ratio_w
*
static_cast
<
float
>
(
l
)
+
0.5
)
:
static_cast
<
int
>
(
ratio_w
*
static_cast
<
float
>
(
l
));
for
(
int
i
=
0
;
i
<
n
;
i
++
)
{
// loop for batches
for
(
int
j
=
0
;
j
<
c
;
j
++
)
{
// loop for channels
...
...
@@ -572,7 +590,7 @@ static void Interpolate1DCPUFwd(
}
}
if
(
scale_w
>
0.
)
{
out_w
=
static_cast
<
int
>
(
in_w
*
scale_w
);
out_w
=
static_cast
<
int
>
(
in_w
*
scale_w
);
// NOLINT
}
if
(
out_size
)
{
auto
out_size_data
=
...
...
@@ -602,10 +620,12 @@ static void Interpolate1DCPUFwd(
float
ratio_w
=
0.
f
;
if
(
out_w
>
1
)
{
float
new_scale_w
=
0.
f
;
new_scale_w
=
(
scale_w
>
0
)
?
static_cast
<
float
>
(
1.
/
scale_w
)
:
static_cast
<
float
>
(
in_w
)
/
out_w
;
ratio_w
=
(
align_corners
)
?
static_cast
<
float
>
(
in_w
-
1
)
/
(
out_w
-
1
)
:
static_cast
<
float
>
(
new_scale_w
);
new_scale_w
=
(
scale_w
>
0
)
?
static_cast
<
float
>
(
1.
/
scale_w
)
:
static_cast
<
float
>
(
in_w
)
/
static_cast
<
float
>
(
out_w
);
ratio_w
=
(
align_corners
)
?
static_cast
<
float
>
(
in_w
-
1
)
/
static_cast
<
float
>
(
out_w
-
1
)
:
static_cast
<
float
>
(
new_scale_w
);
}
if
(
"linear"
==
interp_method
)
{
LinearInterpolation
<
T
>
(
x
,
...
...
@@ -695,8 +715,8 @@ static void Interpolate2DCPUFwd(
}
}
if
(
scale_h
>
0.
&&
scale_w
>
0.
)
{
out_h
=
static_cast
<
int
>
(
in_h
*
scale_h
);
out_w
=
static_cast
<
int
>
(
in_w
*
scale_w
);
out_h
=
static_cast
<
int
>
(
in_h
*
scale_h
);
// NOLINT
out_w
=
static_cast
<
int
>
(
in_w
*
scale_w
);
// NOLINT
}
if
(
out_size
)
{
auto
out_size_data
=
...
...
@@ -733,17 +753,21 @@ static void Interpolate2DCPUFwd(
float
ratio_w
=
0.
f
;
if
(
out_h
>
1
)
{
float
new_scale_h
=
0.
f
;
new_scale_h
=
(
scale_h
>
0
)
?
static_cast
<
float
>
(
1.
/
scale_h
)
:
static_cast
<
float
>
(
in_h
)
/
out_h
;
ratio_h
=
(
align_corners
)
?
static_cast
<
float
>
(
in_h
-
1
)
/
(
out_h
-
1
)
:
static_cast
<
float
>
(
new_scale_h
);
new_scale_h
=
(
scale_h
>
0
)
?
static_cast
<
float
>
(
1.
/
scale_h
)
:
static_cast
<
float
>
(
in_h
)
/
static_cast
<
float
>
(
out_h
);
ratio_h
=
(
align_corners
)
?
static_cast
<
float
>
(
in_h
-
1
)
/
static_cast
<
float
>
(
out_h
-
1
)
:
static_cast
<
float
>
(
new_scale_h
);
}
if
(
out_w
>
1
)
{
float
new_scale_w
=
0.
f
;
new_scale_w
=
(
scale_w
>
0
)
?
static_cast
<
float
>
(
1.
/
scale_w
)
:
static_cast
<
float
>
(
in_w
)
/
out_w
;
ratio_w
=
(
align_corners
)
?
static_cast
<
float
>
(
in_w
-
1
)
/
(
out_w
-
1
)
:
static_cast
<
float
>
(
new_scale_w
);
new_scale_w
=
(
scale_w
>
0
)
?
static_cast
<
float
>
(
1.
/
scale_w
)
:
static_cast
<
float
>
(
in_w
)
/
static_cast
<
float
>
(
out_w
);
ratio_w
=
(
align_corners
)
?
static_cast
<
float
>
(
in_w
-
1
)
/
static_cast
<
float
>
(
out_w
-
1
)
:
static_cast
<
float
>
(
new_scale_w
);
}
if
(
"bilinear"
==
interp_method
)
{
...
...
@@ -881,9 +905,9 @@ static void Interpolate3DCPUFwd(
}
}
if
(
scale_w
>
0.
&&
scale_h
>
0.
&&
scale_d
>
0.
)
{
out_d
=
static_cast
<
int
>
(
in_d
*
scale_d
);
out_h
=
static_cast
<
int
>
(
in_h
*
scale_h
);
out_w
=
static_cast
<
int
>
(
in_w
*
scale_w
);
out_d
=
static_cast
<
int
>
(
in_d
*
scale_d
);
// NOLINT
out_h
=
static_cast
<
int
>
(
in_h
*
scale_h
);
// NOLINT
out_w
=
static_cast
<
int
>
(
in_w
*
scale_w
);
// NOLINT
}
if
(
out_size
)
{
auto
out_size_data
=
...
...
@@ -929,24 +953,30 @@ static void Interpolate3DCPUFwd(
float
ratio_w
=
0.
f
;
if
(
out_d
>
1
)
{
float
new_scale_d
=
0.
f
;
new_scale_d
=
(
scale_d
>
0
)
?
static_cast
<
float
>
(
1.
/
scale_d
)
:
static_cast
<
float
>
(
in_d
)
/
out_d
;
ratio_d
=
(
align_corners
)
?
static_cast
<
float
>
(
in_d
-
1
)
/
(
out_d
-
1
)
:
static_cast
<
float
>
(
new_scale_d
);
new_scale_d
=
(
scale_d
>
0
)
?
static_cast
<
float
>
(
1.
/
scale_d
)
:
static_cast
<
float
>
(
in_d
)
/
static_cast
<
float
>
(
out_d
);
ratio_d
=
(
align_corners
)
?
static_cast
<
float
>
(
in_d
-
1
)
/
static_cast
<
float
>
(
out_d
-
1
)
:
static_cast
<
float
>
(
new_scale_d
);
}
if
(
out_h
>
1
)
{
float
new_scale_h
=
0.
f
;
new_scale_h
=
(
scale_h
>
0
)
?
static_cast
<
float
>
(
1.
/
scale_h
)
:
static_cast
<
float
>
(
in_h
)
/
out_h
;
ratio_h
=
(
align_corners
)
?
static_cast
<
float
>
(
in_h
-
1
)
/
(
out_h
-
1
)
:
static_cast
<
float
>
(
new_scale_h
);
new_scale_h
=
(
scale_h
>
0
)
?
static_cast
<
float
>
(
1.
/
scale_h
)
:
static_cast
<
float
>
(
in_h
)
/
static_cast
<
float
>
(
out_h
);
ratio_h
=
(
align_corners
)
?
static_cast
<
float
>
(
in_h
-
1
)
/
static_cast
<
float
>
(
out_h
-
1
)
:
static_cast
<
float
>
(
new_scale_h
);
}
if
(
out_w
>
1
)
{
float
new_scale_w
=
0.
f
;
new_scale_w
=
(
scale_w
>
0
)
?
static_cast
<
float
>
(
1.
/
scale_w
)
:
static_cast
<
float
>
(
in_w
)
/
out_w
;
ratio_w
=
(
align_corners
)
?
static_cast
<
float
>
(
in_w
-
1
)
/
(
out_w
-
1
)
:
static_cast
<
float
>
(
new_scale_w
);
new_scale_w
=
(
scale_w
>
0
)
?
static_cast
<
float
>
(
1.
/
scale_w
)
:
static_cast
<
float
>
(
in_w
)
/
static_cast
<
float
>
(
out_w
);
ratio_w
=
(
align_corners
)
?
static_cast
<
float
>
(
in_w
-
1
)
/
static_cast
<
float
>
(
out_w
-
1
)
:
static_cast
<
float
>
(
new_scale_w
);
}
if
(
"trilinear"
==
interp_method
)
{
...
...
paddle/phi/kernels/cpu/kthvalue_grad_kernel.cc
浏览文件 @
b702d2ae
...
...
@@ -117,7 +117,7 @@ void KthvalueGradKernel(const Context& dev_ctx,
trans
.
emplace_back
(
axis
);
DDim
trans_dims
(
out_dims
);
DDim
trans_in_dims
(
in_dims
);
for
(
size_t
i
=
0
;
i
<
trans
.
size
(
);
i
++
)
{
for
(
int
i
=
0
;
i
<
static_cast
<
int
>
(
trans
.
size
()
);
i
++
)
{
trans_dims
[
i
]
=
out_dims
[
trans
[
i
]];
trans_in_dims
[
i
]
=
in_dims
[
trans
[
i
]];
}
...
...
@@ -126,7 +126,7 @@ void KthvalueGradKernel(const Context& dev_ctx,
trans_ind
.
Resize
(
trans_dims
);
dev_ctx
.
template
Alloc
<
T
>(
&
trans_dO
);
dev_ctx
.
template
Alloc
<
int64_t
>(
&
trans_ind
);
int
ndims
=
trans
.
size
(
);
int
ndims
=
static_cast
<
int
>
(
trans
.
size
()
);
if
(
keepdim
)
{
funcs
::
TransCompute
<
phi
::
CPUContext
,
T
>
(
ndims
,
dev_ctx
,
d_out
,
&
trans_dO
,
trans
);
...
...
paddle/phi/kernels/cpu/kthvalue_kernel.cc
浏览文件 @
b702d2ae
...
...
@@ -136,7 +136,7 @@ void KthvalueKernel(const Context& dev_ctx,
DDim
trans_dims
(
in_dims
);
DDim
trans_out_dims
(
in_dims
);
for
(
size_t
i
=
0
;
i
<
trans
.
size
(
);
i
++
)
{
for
(
int
i
=
0
;
i
<
static_cast
<
int
>
(
trans
.
size
()
);
i
++
)
{
trans_dims
[
i
]
=
in_dims
[
trans
[
i
]];
trans_out_dims
[
i
]
=
in_dims
[
trans
[
i
]];
}
...
...
@@ -144,7 +144,7 @@ void KthvalueKernel(const Context& dev_ctx,
DenseTensor
trans_inp
;
trans_inp
.
Resize
(
trans_dims
);
dev_ctx
.
template
Alloc
<
T
>(
&
trans_inp
);
int
ndims
=
trans
.
size
(
);
int
ndims
=
static_cast
<
int
>
(
trans
.
size
()
);
funcs
::
TransCompute
<
phi
::
CPUContext
,
T
>
(
ndims
,
dev_ctx
,
x
,
&
trans_inp
,
trans
);
...
...
paddle/phi/kernels/cpu/label_smooth_kernel.cc
浏览文件 @
b702d2ae
...
...
@@ -37,10 +37,12 @@ void LabelSmoothKernel(const Context& ctx,
eigen_out
.
device
(
dev
)
=
static_cast
<
T
>
(
1
-
epsilon
)
*
eigen_in
+
static_cast
<
T
>
(
epsilon
)
*
dist
.
broadcast
(
Eigen
::
DSizes
<
int
,
1
>
(
label
.
numel
()
/
label_dim
));
dist
.
broadcast
(
Eigen
::
DSizes
<
int
,
1
>
(
static_cast
<
int
>
(
label
.
numel
()
/
label_dim
)));
}
else
{
eigen_out
.
device
(
dev
)
=
static_cast
<
T
>
(
1
-
epsilon
)
*
eigen_in
+
static_cast
<
T
>
(
epsilon
/
label_dim
);
eigen_out
.
device
(
dev
)
=
static_cast
<
T
>
(
1
-
epsilon
)
*
eigen_in
+
static_cast
<
T
>
(
epsilon
/
static_cast
<
float
>
(
label_dim
));
}
}
}
...
...
paddle/phi/kernels/cpu/log_softmax_grad_kernel.cc
浏览文件 @
b702d2ae
...
...
@@ -46,7 +46,7 @@ struct LogSoftmaxGradFunctor {
auto
dy
=
EigenMatrixTemplate
<
T
>::
From
(
*
dY
,
dim_2d
);
auto
dx
=
EigenMatrixTemplate
<
T
>::
From
(
*
dX
,
dim_2d
);
const
int
axis_dim
=
Y
->
dims
()[
axis
]
;
const
int
axis_dim
=
static_cast
<
int
>
(
Y
->
dims
()[
axis
])
;
const
int
batch_size
=
y
.
dimension
(
kBatchDim
);
const
int
num_classes
=
y
.
dimension
(
kClassDim
);
const
int
num_remain
=
num_classes
/
axis_dim
;
...
...
paddle/phi/kernels/cpu/log_softmax_kernel.cc
浏览文件 @
b702d2ae
...
...
@@ -46,7 +46,7 @@ struct LogSoftmaxFunctor {
constexpr
int
kClassDim
=
1
;
constexpr
int
kAxisDim
=
1
;
int
axis_dim
=
X
->
dims
()[
axis
]
;
int
axis_dim
=
static_cast
<
int
>
(
X
->
dims
()[
axis
])
;
const
int
n
=
funcs
::
SizeToAxis
(
axis
,
X
->
dims
());
const
int
d
=
funcs
::
SizeFromAxis
(
axis
,
X
->
dims
());
phi
::
DDim
dim_2d
{
n
,
d
};
...
...
paddle/phi/kernels/cpu/lstsq_kernel.cc
浏览文件 @
b702d2ae
...
...
@@ -63,9 +63,9 @@ void LstsqKernel(const Context& dev_ctx,
// lapack is a column-major storge, transpose make the input to
// have a continuous memory layout
int
info
=
0
;
int
m
=
x_dims
[
dim_size
-
2
]
;
int
n
=
x_dims
[
dim_size
-
1
]
;
int
nrhs
=
y_dims
[
dim_size
-
1
]
;
int
m
=
static_cast
<
int
>
(
x_dims
[
dim_size
-
2
])
;
int
n
=
static_cast
<
int
>
(
x_dims
[
dim_size
-
1
])
;
int
nrhs
=
static_cast
<
int
>
(
y_dims
[
dim_size
-
1
])
;
int
lda
=
std
::
max
<
int
>
(
m
,
1
);
int
ldb
=
std
::
max
<
int
>
(
1
,
std
::
max
(
m
,
n
));
...
...
@@ -115,7 +115,7 @@ void LstsqKernel(const Context& dev_ctx,
s_data
=
dev_ctx
.
template
Alloc
<
T
>(
singular_values
);
s_working_ptr
=
s_data
;
auto
s_dims
=
singular_values
->
dims
();
s_stride
=
s
_dims
[
s_dims
.
size
()
-
1
]
;
s_stride
=
s
tatic_cast
<
int
>
(
s_dims
[
s_dims
.
size
()
-
1
])
;
}
// "jpvt" is only used for "gelsy" driver
...
...
paddle/phi/kernels/cpu/masked_select_grad_kernel.cc
浏览文件 @
b702d2ae
...
...
@@ -64,7 +64,7 @@ void MaskedSelectGradKernel(const Context& dev_ctx,
auto
*
mask_data
=
mask_expand
.
data
<
bool
>
();
auto
*
input_data
=
out_grad
.
data
<
T
>
();
int
mask_size
=
mask_expand
.
numel
(
);
int
mask_size
=
static_cast
<
int
>
(
mask_expand
.
numel
()
);
int
index
=
0
;
for
(
int
i
=
0
;
i
<
mask_size
;
i
++
)
{
...
...
paddle/phi/kernels/cpu/matrix_nms_kernel.cc
浏览文件 @
b702d2ae
...
...
@@ -219,7 +219,7 @@ size_t MultiClassMatrixNMS(const DenseTensor& scores,
std
::
iota
(
perm
.
begin
(),
perm
.
end
(),
0
);
std
::
partial_sort
(
perm
.
begin
(),
perm
.
begin
()
+
num_det
,
perm
.
begin
()
+
num_det
,
// NOLINT
perm
.
end
(),
[
&
all_scores
](
int
lhs
,
int
rhs
)
{
return
all_scores
[
lhs
]
>
all_scores
[
rhs
];
...
...
@@ -295,7 +295,7 @@ void MatrixNMSKernel(const Context& ctx,
num_per_batch
.
emplace_back
(
num_out
);
}
int64_t
num_kept
=
offsets
.
back
(
);
int64_t
num_kept
=
static_cast
<
int64_t
>
(
offsets
.
back
()
);
if
(
num_kept
==
0
)
{
out
->
Resize
(
phi
::
make_ddim
({
0
,
out_dim
}));
ctx
.
template
Alloc
<
T
>(
out
);
...
...
paddle/phi/kernels/cpu/matrix_rank_tol_kernel.cc
浏览文件 @
b702d2ae
...
...
@@ -87,11 +87,10 @@ void MatrixRankTolKernel(const Context& dev_ctx,
dev_ctx
.
template
Alloc
<
int64_t
>(
out
);
auto
dim_x
=
x
.
dims
();
auto
dim_out
=
out
->
dims
();
int
rows
=
dim_x
[
dim_x
.
size
()
-
2
]
;
int
cols
=
dim_x
[
dim_x
.
size
()
-
1
]
;
int
rows
=
static_cast
<
int
>
(
dim_x
[
dim_x
.
size
()
-
2
])
;
int
cols
=
static_cast
<
int
>
(
dim_x
[
dim_x
.
size
()
-
1
])
;
int
k
=
std
::
min
(
rows
,
cols
);
auto
numel
=
x
.
numel
();
int
batches
=
numel
/
(
rows
*
cols
);
int
batches
=
static_cast
<
int
>
(
x
.
numel
()
/
(
rows
*
cols
));
T
rtol_T
=
0
;
...
...
paddle/phi/kernels/cpu/mode_grad_kernel.cc
浏览文件 @
b702d2ae
...
...
@@ -105,7 +105,7 @@ void ModeGradKernel(const Context& dev_ctx,
trans_axis
.
emplace_back
(
axis
);
DDim
trans_shape
(
out_dims
);
DDim
trans_in_shape
(
in_dims
);
for
(
size_t
i
=
0
;
i
<
trans_axis
.
size
(
);
i
++
)
{
for
(
int
i
=
0
;
i
<
static_cast
<
int
>
(
trans_axis
.
size
()
);
i
++
)
{
trans_shape
[
i
]
=
out_dims
[
trans_axis
[
i
]];
trans_in_shape
[
i
]
=
in_dims
[
trans_axis
[
i
]];
}
...
...
@@ -118,7 +118,7 @@ void ModeGradKernel(const Context& dev_ctx,
trans_ind
.
Resize
(
trans_shape
);
dev_ctx
.
template
Alloc
<
int64_t
>(
&
trans_ind
);
int
ndims
=
trans_axis
.
size
(
);
int
ndims
=
static_cast
<
int
>
(
trans_axis
.
size
()
);
if
(
keepdim
)
{
// Do transpose
...
...
paddle/phi/kernels/cpu/mode_kernel.cc
浏览文件 @
b702d2ae
...
...
@@ -89,7 +89,7 @@ void ModeKernel(const Context& dev_ctx,
DDim
trans_shape
(
in_dims
);
DDim
trans_out_shape
(
in_dims
);
for
(
size_t
i
=
0
;
i
<
trans_axis
.
size
(
);
i
++
)
{
for
(
int
i
=
0
;
i
<
static_cast
<
int
>
(
trans_axis
.
size
()
);
i
++
)
{
trans_shape
[
i
]
=
in_dims
[
trans_axis
[
i
]];
trans_out_shape
[
i
]
=
in_dims
[
trans_axis
[
i
]];
}
...
...
@@ -98,7 +98,7 @@ void ModeKernel(const Context& dev_ctx,
DenseTensor
trans_input
;
trans_input
.
Resize
(
trans_shape
);
dev_ctx
.
template
Alloc
<
T
>(
&
trans_input
);
int
ndims
=
trans_axis
.
size
(
);
int
ndims
=
static_cast
<
int
>
(
trans_axis
.
size
()
);
// transpose the input value
funcs
::
TransCompute
<
CPUContext
,
T
>
(
...
...
paddle/phi/kernels/cpu/multiclass_nms3_kernel.cc
浏览文件 @
b702d2ae
...
...
@@ -65,7 +65,7 @@ void Array2Poly(const T* box,
(
*
poly
).
hole
[
0
]
=
0
;
(
*
poly
).
contour
=
(
phi
::
funcs
::
gpc_vertex_list
*
)
malloc
(
sizeof
(
phi
::
funcs
::
gpc_vertex_list
));
(
*
poly
).
contour
->
num_vertices
=
pts_num
;
(
*
poly
).
contour
->
num_vertices
=
static_cast
<
int
>
(
pts_num
)
;
(
*
poly
).
contour
->
vertex
=
(
phi
::
funcs
::
gpc_vertex
*
)
malloc
(
sizeof
(
phi
::
funcs
::
gpc_vertex
)
*
pts_num
);
for
(
size_t
i
=
0
;
i
<
pts_num
;
++
i
)
{
...
...
@@ -255,9 +255,9 @@ void SliceOneClass(const Context& ctx,
T
*
item_data
=
ctx
.
template
Alloc
<
T
>(
one_class_item
);
const
T
*
items_data
=
items
.
data
<
T
>
();
const
int64_t
num_item
=
items
.
dims
()[
0
];
const
int
class_num
=
items
.
dims
()[
1
]
;
const
int
class_num
=
static_cast
<
int
>
(
items
.
dims
()[
1
])
;
if
(
items
.
dims
().
size
()
==
3
)
{
int
item_size
=
items
.
dims
()[
2
]
;
int
item_size
=
static_cast
<
int
>
(
items
.
dims
()[
2
])
;
for
(
int
i
=
0
;
i
<
num_item
;
++
i
)
{
std
::
memcpy
(
item_data
+
i
*
item_size
,
items_data
+
i
*
class_num
*
item_size
+
class_id
*
item_size
,
...
...
@@ -350,9 +350,10 @@ void MultiClassNMS(const Context& ctx,
int
num_det
=
0
;
int64_t
class_num
=
scores_size
==
3
?
scores
.
dims
()[
0
]
:
scores
.
dims
()[
1
];
int
class_num
=
static_cast
<
int
>
(
scores_size
==
3
?
scores
.
dims
()[
0
]
:
scores
.
dims
()[
1
]);
DenseTensor
bbox_slice
,
score_slice
;
for
(
int
64_t
c
=
0
;
c
<
class_num
;
++
c
)
{
for
(
int
c
=
0
;
c
<
class_num
;
++
c
)
{
if
(
c
==
background_label
)
continue
;
if
(
scores_size
==
3
)
{
score_slice
=
scores
.
Slice
(
c
,
c
+
1
);
...
...
@@ -374,7 +375,7 @@ void MultiClassNMS(const Context& ctx,
if
(
scores_size
==
2
)
{
std
::
stable_sort
((
*
indices
)[
c
].
begin
(),
(
*
indices
)[
c
].
end
());
}
num_det
+=
(
*
indices
)[
c
].
size
(
);
num_det
+=
static_cast
<
int
>
((
*
indices
)[
c
].
size
()
);
}
*
num_nmsed_out
=
num_det
;
...
...
@@ -466,7 +467,7 @@ void MultiClassOutput(const Context& ctx,
bdata
=
bbox
.
data
<
T
>
()
+
idx
*
box_size
;
odata
[
count
*
out_dim
+
1
]
=
*
(
scores_data
+
idx
*
class_num
+
label
);
if
(
oindices
!=
nullptr
)
{
oindices
[
count
]
=
offset
+
idx
*
class_num
+
label
;
oindices
[
count
]
=
offset
+
idx
*
class_num
+
label
;
// NOLINT
}
}
// xmin, ymin, xmax, ymax or multi-points coordinates
...
...
@@ -505,9 +506,11 @@ void MultiClassNMSKernel(const Context& ctx,
DenseTensor
boxes_slice
,
scores_slice
;
int
n
=
0
;
if
(
has_roisnum
)
{
n
=
score_size
==
3
?
batch_size
:
rois_num
.
get_ptr
()
->
numel
();
n
=
static_cast
<
int
>
(
score_size
==
3
?
batch_size
:
rois_num
.
get_ptr
()
->
numel
());
}
else
{
n
=
score_size
==
3
?
batch_size
:
bboxes
.
lod
().
back
().
size
()
-
1
;
n
=
static_cast
<
int
>
(
score_size
==
3
?
batch_size
:
bboxes
.
lod
().
back
().
size
()
-
1
);
}
for
(
int
i
=
0
;
i
<
n
;
++
i
)
{
std
::
map
<
int
,
std
::
vector
<
int
>>
indices
;
...
...
@@ -528,8 +531,8 @@ void MultiClassNMSKernel(const Context& ctx,
batch_starts
.
push_back
(
batch_starts
.
back
());
continue
;
}
scores_slice
=
scores
.
Slice
(
boxes_lod
[
i
],
boxes_lod
[
i
+
1
]);
boxes_slice
=
bboxes
.
Slice
(
boxes_lod
[
i
],
boxes_lod
[
i
+
1
]);
scores_slice
=
scores
.
Slice
(
boxes_lod
[
i
],
boxes_lod
[
i
+
1
]);
// NOLINT
boxes_slice
=
bboxes
.
Slice
(
boxes_lod
[
i
],
boxes_lod
[
i
+
1
]);
// NOLINT
}
MultiClassNMS
<
T
,
Context
>
(
ctx
,
scores_slice
,
...
...
@@ -548,7 +551,7 @@ void MultiClassNMSKernel(const Context& ctx,
batch_starts
.
push_back
(
batch_starts
.
back
()
+
num_nmsed_out
);
}
int
num_kept
=
batch_starts
.
back
(
);
int
num_kept
=
static_cast
<
int
>
(
batch_starts
.
back
()
);
if
(
num_kept
==
0
)
{
if
(
return_index
)
{
out
->
Resize
({
0
,
out_dim
});
...
...
@@ -583,15 +586,15 @@ void MultiClassNMSKernel(const Context& ctx,
boxes_lod
=
bboxes
.
lod
().
back
();
}
if
(
boxes_lod
[
i
]
==
boxes_lod
[
i
+
1
])
continue
;
scores_slice
=
scores
.
Slice
(
boxes_lod
[
i
],
boxes_lod
[
i
+
1
]);
boxes_slice
=
bboxes
.
Slice
(
boxes_lod
[
i
],
boxes_lod
[
i
+
1
]);
scores_slice
=
scores
.
Slice
(
boxes_lod
[
i
],
boxes_lod
[
i
+
1
]);
// NOLINT
boxes_slice
=
bboxes
.
Slice
(
boxes_lod
[
i
],
boxes_lod
[
i
+
1
]);
// NOLINT
if
(
return_index
)
{
offset
=
boxes_lod
[
i
]
*
score_dims
[
1
]
;
offset
=
static_cast
<
int
>
(
boxes_lod
[
i
]
*
score_dims
[
1
])
;
}
}
int64_t
s
=
batch_starts
[
i
]
;
int64_t
e
=
batch_starts
[
i
+
1
]
;
int64_t
s
=
static_cast
<
int64_t
>
(
batch_starts
[
i
])
;
int64_t
e
=
static_cast
<
int64_t
>
(
batch_starts
[
i
+
1
])
;
if
(
e
>
s
)
{
DenseTensor
nout
=
out
->
Slice
(
s
,
e
);
if
(
return_index
)
{
...
...
@@ -615,7 +618,7 @@ void MultiClassNMSKernel(const Context& ctx,
ctx
.
template
Alloc
<
int
>(
nms_rois_num
);
int
*
num_data
=
nms_rois_num
->
data
<
int
>
();
for
(
int
i
=
1
;
i
<=
n
;
i
++
)
{
num_data
[
i
-
1
]
=
batch_starts
[
i
]
-
batch_starts
[
i
-
1
];
num_data
[
i
-
1
]
=
batch_starts
[
i
]
-
batch_starts
[
i
-
1
];
// NOLINT
}
nms_rois_num
->
Resize
({
n
});
}
...
...
paddle/phi/kernels/cpu/multinomial_kernel.cc
浏览文件 @
b702d2ae
...
...
@@ -29,7 +29,7 @@ void MultinomialKernel(const Context& dev_ctx,
auto
*
in_data
=
x
.
data
<
T
>
();
int64_t
*
out_data
=
dev_ctx
.
template
Alloc
<
int64_t
>(
out
);
auto
in_dims
=
x
.
dims
();
int
64_t
in_rank
=
in_dims
.
size
();
int
in_rank
=
in_dims
.
size
();
const
int64_t
num_categories
=
in_dims
[
in_rank
-
1
];
const
int64_t
num_distributions
=
in_rank
>
1
?
in_dims
[
in_rank
-
2
]
:
1
;
...
...
paddle/phi/kernels/cpu/mv_grad_kernel.cc
浏览文件 @
b702d2ae
...
...
@@ -32,8 +32,8 @@ void MvGradKernel(const Context& dev_ctx,
auto
dvec
=
vec_grad
;
const
auto
&
dim_x
=
x
.
dims
();
int
m
=
dim_x
[
0
]
;
int
n
=
dim_x
[
1
]
;
int
m
=
static_cast
<
int
>
(
dim_x
[
0
])
;
int
n
=
static_cast
<
int
>
(
dim_x
[
1
])
;
// get data ptr
const
T
*
x_data
=
x
.
data
<
T
>
();
...
...
paddle/phi/kernels/cpu/nanmedian_grad_kernel.cc
浏览文件 @
b702d2ae
...
...
@@ -38,7 +38,7 @@ void CalcMedianGradKernel(const Context& dev_ctx,
int64_t
numel
=
x
.
numel
();
auto
x_dim
=
x
.
dims
();
int64_t
rank
=
x_dim
.
size
();
int64_t
stride
=
x_dim
[
rank
-
1
];
int64_t
stride
=
x_dim
[
static_cast
<
int
>
(
rank
-
1
)
];
int64_t
pre_dim
=
numel
/
stride
;
int64_t
i
=
0
;
...
...
paddle/phi/kernels/cpu/nanmedian_kernel.cc
浏览文件 @
b702d2ae
...
...
@@ -35,7 +35,7 @@ void CalcMedianFunc(const Context& dev_ctx,
DenseTensor
sort_indices
;
auto
sort_dim
=
x
.
dims
();
int64_t
rank
=
sort_dim
.
size
();
sort_dim
[
rank
-
1
]
=
sort_k
;
sort_dim
[
static_cast
<
int
>
(
rank
-
1
)
]
=
sort_k
;
sort_out
.
Resize
(
sort_dim
);
sort_indices
.
Resize
(
sort_dim
);
...
...
@@ -115,7 +115,7 @@ void ProcessMedianKernel(const Context& dev_ctx,
int64_t
numel
=
x
.
numel
();
auto
x_dim
=
x
.
dims
();
int64_t
x_rank
=
x_dim
.
size
();
int64_t
stride
=
x_dim
[
x_rank
-
1
];
int64_t
stride
=
x_dim
[
static_cast
<
int
>
(
x_rank
-
1
)
];
PADDLE_ENFORCE_NE
(
stride
,
...
...
paddle/phi/kernels/cpu/overlap_add_grad_kernel.cc
浏览文件 @
b702d2ae
...
...
@@ -30,12 +30,15 @@ void OverlapAddGradKernel(const Context& dev_ctx,
const
size_t
out_grad_rank
=
out_grad
.
dims
().
size
();
const
size_t
x_grad_rank
=
x_grad
->
dims
().
size
();
const
int
n_frames
=
(
axis
==
0
)
?
x_grad
->
dims
()[
0
]
:
x_grad
->
dims
()[
x_grad_rank
-
1
];
const
int
frame_length
=
(
axis
==
0
)
?
x_grad
->
dims
()[
1
]
:
x_grad
->
dims
()[
x_grad_rank
-
2
];
const
int
seq_length
=
(
axis
==
0
)
?
out_grad
.
dims
()[
0
]
:
out_grad
.
dims
()[
out_grad_rank
-
1
];
const
int
n_frames
=
static_cast
<
int
>
(
(
axis
==
0
)
?
x_grad
->
dims
()[
0
]
:
x_grad
->
dims
()[
static_cast
<
int
>
(
x_grad_rank
)
-
1
]);
const
int
frame_length
=
static_cast
<
int
>
(
(
axis
==
0
)
?
x_grad
->
dims
()[
1
]
:
x_grad
->
dims
()[
static_cast
<
int
>
(
x_grad_rank
)
-
2
]);
const
int
seq_length
=
static_cast
<
int
>
(
(
axis
==
0
)
?
out_grad
.
dims
()[
0
]
:
out_grad
.
dims
()[
static_cast
<
int
>
(
out_grad_rank
)
-
1
]);
// When the number of input dims is larger than 2, it needs to copy
// from x to resize input into 2d and output into 3d. Morevoer, output
...
...
@@ -50,12 +53,14 @@ void OverlapAddGradKernel(const Context& dev_ctx,
phi
::
DDim
x_grad_resized_dims
;
phi
::
DDim
out_grad_resized_dims
;
if
(
axis
==
0
)
{
preserved_dims
=
phi
::
slice_ddim
(
out_grad_
.
dims
(),
1
,
out_grad_rank
);
preserved_dims
=
phi
::
slice_ddim
(
out_grad_
.
dims
(),
1
,
static_cast
<
int
>
(
out_grad_rank
));
x_grad_resized_dims
=
{
n_frames
,
frame_length
,
phi
::
product
(
preserved_dims
)};
out_grad_resized_dims
=
{
seq_length
,
phi
::
product
(
preserved_dims
)};
}
else
{
preserved_dims
=
phi
::
slice_ddim
(
out_grad_
.
dims
(),
0
,
out_grad_rank
-
1
);
preserved_dims
=
phi
::
slice_ddim
(
out_grad_
.
dims
(),
0
,
static_cast
<
int
>
(
out_grad_rank
)
-
1
);
x_grad_resized_dims
=
{
phi
::
product
(
preserved_dims
),
frame_length
,
n_frames
};
out_grad_resized_dims
=
{
phi
::
product
(
preserved_dims
),
seq_length
};
...
...
paddle/phi/kernels/cpu/overlap_add_kernel.cc
浏览文件 @
b702d2ae
...
...
@@ -26,13 +26,16 @@ void OverlapAddKernel(const Context& dev_ctx,
int
axis
,
DenseTensor
*
out
)
{
dev_ctx
.
template
Alloc
<
T
>(
out
);
const
size_
t
x_rank
=
x
.
dims
().
size
();
const
in
t
x_rank
=
x
.
dims
().
size
();
const
size_t
out_rank
=
out
->
dims
().
size
();
const
int
n_frames
=
(
axis
==
0
)
?
x
.
dims
()[
0
]
:
x
.
dims
()[
x_rank
-
1
];
const
int
frame_length
=
(
axis
==
0
)
?
x
.
dims
()[
1
]
:
x
.
dims
()[
x_rank
-
2
];
const
int
seq_length
=
(
axis
==
0
)
?
out
->
dims
()[
0
]
:
out
->
dims
()[
out_rank
-
1
];
const
int
n_frames
=
static_cast
<
int
>
((
axis
==
0
)
?
x
.
dims
()[
0
]
:
x
.
dims
()[
x_rank
-
1
]);
const
int
frame_length
=
static_cast
<
int
>
((
axis
==
0
)
?
x
.
dims
()[
1
]
:
x
.
dims
()[
x_rank
-
2
]);
const
int
seq_length
=
static_cast
<
int
>
(
(
axis
==
0
)
?
out
->
dims
()[
0
]
:
out
->
dims
()[
static_cast
<
int
>
(
out_rank
)
-
1
]);
// auto& dev_ctx = ctx.device_context<Context>();
...
...
@@ -46,11 +49,13 @@ void OverlapAddKernel(const Context& dev_ctx,
phi
::
DDim
x_resized_dims
;
phi
::
DDim
out_resized_dims
;
if
(
axis
==
0
)
{
preserved_dims
=
phi
::
slice_ddim
(
out
->
dims
(),
1
,
out_rank
);
preserved_dims
=
phi
::
slice_ddim
(
out
->
dims
(),
1
,
static_cast
<
int
>
(
out_rank
));
x_resized_dims
=
{
n_frames
,
frame_length
,
phi
::
product
(
preserved_dims
)};
out_resized_dims
=
{
seq_length
,
phi
::
product
(
preserved_dims
)};
}
else
{
preserved_dims
=
phi
::
slice_ddim
(
out
->
dims
(),
0
,
out_rank
-
1
);
preserved_dims
=
phi
::
slice_ddim
(
out
->
dims
(),
0
,
static_cast
<
int
>
(
out_rank
)
-
1
);
x_resized_dims
=
{
phi
::
product
(
preserved_dims
),
frame_length
,
n_frames
};
out_resized_dims
=
{
phi
::
product
(
preserved_dims
),
seq_length
};
}
...
...
paddle/phi/kernels/cpu/p_norm_grad_kernel.cc
浏览文件 @
b702d2ae
...
...
@@ -30,15 +30,15 @@ inline void GetDims(const phi::DDim& dim,
bool
asvector
)
{
*
pre
=
1
;
*
post
=
1
;
*
n
=
dim
[
axis
]
;
*
n
=
static_cast
<
int
>
(
dim
[
axis
])
;
if
(
asvector
)
{
*
n
=
product
(
dim
);
*
n
=
static_cast
<
int
>
(
product
(
dim
)
);
}
else
{
for
(
int
i
=
0
;
i
<
axis
;
++
i
)
{
(
*
pre
)
*=
dim
[
i
]
;
(
*
pre
)
*=
static_cast
<
int
>
(
dim
[
i
])
;
}
for
(
int
i
=
axis
+
1
;
i
<
dim
.
size
();
++
i
)
{
(
*
post
)
*=
dim
[
i
]
;
(
*
post
)
*=
static_cast
<
int
>
(
dim
[
i
])
;
}
}
}
...
...
paddle/phi/kernels/cpu/p_norm_kernel.cc
浏览文件 @
b702d2ae
...
...
@@ -31,15 +31,15 @@ inline void GetDims(const phi::DDim& dim,
bool
asvector
)
{
*
pre
=
1
;
*
post
=
1
;
*
n
=
dim
[
axis
]
;
*
n
=
static_cast
<
int
>
(
dim
[
axis
])
;
if
(
asvector
)
{
*
n
=
product
(
dim
);
*
n
=
static_cast
<
int
>
(
product
(
dim
)
);
}
else
{
for
(
int
i
=
0
;
i
<
axis
;
++
i
)
{
(
*
pre
)
*=
dim
[
i
]
;
(
*
pre
)
*=
static_cast
<
int
>
(
dim
[
i
])
;
}
for
(
int
i
=
axis
+
1
;
i
<
dim
.
size
();
++
i
)
{
(
*
post
)
*=
dim
[
i
]
;
(
*
post
)
*=
static_cast
<
int
>
(
dim
[
i
])
;
}
}
}
...
...
paddle/phi/kernels/cpu/pad3d_grad_kernel.cc
浏览文件 @
b702d2ae
...
...
@@ -377,18 +377,18 @@ void Pad3dGradKernel(const Context& dev_ctx,
T
*
d_in_data
=
dev_ctx
.
template
Alloc
<
T
>(
d_in
);
phi
::
funcs
::
SetConstant
<
Context
,
T
>
()(
dev_ctx
,
d_in
,
static_cast
<
T
>
(
0
));
const
int
pad_left
=
pads
[
0
]
;
const
int
pad_top
=
pads
[
2
]
;
const
int
pad_front
=
pads
[
4
]
;
const
int
num
=
d_in_dims
[
0
]
;
const
int
pad_left
=
static_cast
<
int
>
(
pads
[
0
])
;
const
int
pad_top
=
static_cast
<
int
>
(
pads
[
2
])
;
const
int
pad_front
=
static_cast
<
int
>
(
pads
[
4
])
;
const
int
num
=
static_cast
<
int
>
(
d_in_dims
[
0
])
;
if
(
data_format
==
"NCDHW"
)
{
const
int
channels
=
d_in_dims
[
1
]
;
const
int
in_depth
=
d_in_dims
[
2
]
;
const
int
in_height
=
d_in_dims
[
3
]
;
const
int
in_width
=
d_in_dims
[
4
]
;
const
int
out_depth
=
d_out_dims
[
2
]
;
const
int
out_height
=
d_out_dims
[
3
]
;
const
int
out_width
=
d_out_dims
[
4
]
;
const
int
channels
=
static_cast
<
int
>
(
d_in_dims
[
1
])
;
const
int
in_depth
=
static_cast
<
int
>
(
d_in_dims
[
2
])
;
const
int
in_height
=
static_cast
<
int
>
(
d_in_dims
[
3
])
;
const
int
in_width
=
static_cast
<
int
>
(
d_in_dims
[
4
])
;
const
int
out_depth
=
static_cast
<
int
>
(
d_out_dims
[
2
])
;
const
int
out_height
=
static_cast
<
int
>
(
d_out_dims
[
3
])
;
const
int
out_width
=
static_cast
<
int
>
(
d_out_dims
[
4
])
;
std
::
map
<
std
::
string
,
void
(
*
)(
T
*
,
...
...
@@ -427,13 +427,13 @@ void Pad3dGradKernel(const Context& dev_ctx,
d_out_data
,
func_map
[
mode
]);
}
else
{
const
int
channels
=
d_in_dims
[
4
]
;
const
int
in_depth
=
d_in_dims
[
1
]
;
const
int
in_height
=
d_in_dims
[
2
]
;
const
int
in_width
=
d_in_dims
[
3
]
;
const
int
out_depth
=
d_out_dims
[
1
]
;
const
int
out_height
=
d_out_dims
[
2
]
;
const
int
out_width
=
d_out_dims
[
3
]
;
const
int
channels
=
static_cast
<
int
>
(
d_in_dims
[
4
])
;
const
int
in_depth
=
static_cast
<
int
>
(
d_in_dims
[
1
])
;
const
int
in_height
=
static_cast
<
int
>
(
d_in_dims
[
2
])
;
const
int
in_width
=
static_cast
<
int
>
(
d_in_dims
[
3
])
;
const
int
out_depth
=
static_cast
<
int
>
(
d_out_dims
[
1
])
;
const
int
out_height
=
static_cast
<
int
>
(
d_out_dims
[
2
])
;
const
int
out_width
=
static_cast
<
int
>
(
d_out_dims
[
3
])
;
std
::
map
<
std
::
string
,
void
(
*
)(
T
*
,
...
...
paddle/phi/kernels/cpu/pad3d_kernel.cc
浏览文件 @
b702d2ae
...
...
@@ -407,21 +407,21 @@ void Pad3dKernel(const Context& dev_ctx,
auto
out_dims
=
out
->
dims
();
T
*
out_data
=
dev_ctx
.
template
Alloc
<
T
>(
out
);
int
channels
=
in_dims
[
1
]
;
int
in_depth
=
in_dims
[
2
]
;
int
in_height
=
in_dims
[
3
]
;
int
in_width
=
in_dims
[
4
]
;
int
out_depth
=
out_dims
[
2
]
;
int
out_height
=
out_dims
[
3
]
;
int
out_width
=
out_dims
[
4
]
;
int
channels
=
static_cast
<
int
>
(
in_dims
[
1
])
;
int
in_depth
=
static_cast
<
int
>
(
in_dims
[
2
])
;
int
in_height
=
static_cast
<
int
>
(
in_dims
[
3
])
;
int
in_width
=
static_cast
<
int
>
(
in_dims
[
4
])
;
int
out_depth
=
static_cast
<
int
>
(
out_dims
[
2
])
;
int
out_height
=
static_cast
<
int
>
(
out_dims
[
3
])
;
int
out_width
=
static_cast
<
int
>
(
out_dims
[
4
])
;
if
(
data_format
==
"NDHWC"
)
{
channels
=
in_dims
[
4
]
;
in_depth
=
in_dims
[
1
]
;
in_height
=
in_dims
[
2
]
;
in_width
=
in_dims
[
3
]
;
out_depth
=
out_dims
[
1
]
;
out_height
=
out_dims
[
2
]
;
out_width
=
out_dims
[
3
]
;
channels
=
static_cast
<
int
>
(
in_dims
[
4
])
;
in_depth
=
static_cast
<
int
>
(
in_dims
[
1
])
;
in_height
=
static_cast
<
int
>
(
in_dims
[
2
])
;
in_width
=
static_cast
<
int
>
(
in_dims
[
3
])
;
out_depth
=
static_cast
<
int
>
(
out_dims
[
1
])
;
out_height
=
static_cast
<
int
>
(
out_dims
[
2
])
;
out_width
=
static_cast
<
int
>
(
out_dims
[
3
])
;
}
if
(
mode
==
"reflect"
)
{
...
...
@@ -489,10 +489,10 @@ void Pad3dKernel(const Context& dev_ctx,
"or replicate padding mode."
));
}
const
int
pad_left
=
pads
[
0
]
;
const
int
pad_top
=
pads
[
2
]
;
const
int
pad_front
=
pads
[
4
]
;
const
int
num
=
in_dims
[
0
]
;
const
int
pad_left
=
static_cast
<
int
>
(
pads
[
0
])
;
const
int
pad_top
=
static_cast
<
int
>
(
pads
[
2
])
;
const
int
pad_front
=
static_cast
<
int
>
(
pads
[
4
])
;
const
int
num
=
static_cast
<
int
>
(
in_dims
[
0
])
;
if
(
data_format
==
"NCDHW"
)
{
std
::
map
<
std
::
string
,
void
(
*
)(
const
T
*
,
...
...
paddle/phi/kernels/cpu/prelu_grad_kernel.cc
浏览文件 @
b702d2ae
...
...
@@ -31,7 +31,7 @@ void PReluGradKernel(const Context& dev_ctx,
const
T
*
alpha_ptr
=
alpha
.
data
<
T
>
();
const
T
*
x_ptr
=
x
.
data
<
T
>
();
const
T
*
out_grad_ptr
=
out_grad
.
data
<
T
>
();
int
numel
=
x
.
numel
(
);
int
numel
=
static_cast
<
int
>
(
x
.
numel
()
);
auto
dim
=
x
.
dims
();
int
index
=
0
;
int
i
=
0
;
...
...
@@ -41,16 +41,16 @@ void PReluGradKernel(const Context& dev_ctx,
if
(
data_format
==
"NCHW"
)
{
int
temp
=
1
;
for
(
int
j
=
2
;
j
<
dim
.
size
();
j
++
)
{
temp
*=
dim
[
j
]
;
temp
*=
static_cast
<
int
>
(
dim
[
j
])
;
}
for
(
i
=
0
;
i
<
numel
;
i
++
)
{
index
=
(
i
/
temp
)
%
dim
[
1
]
;
index
=
static_cast
<
int
>
((
i
/
temp
)
%
dim
[
1
])
;
x_grad_ptr
[
i
]
=
x_ptr
[
i
]
>
0
?
out_grad_ptr
[
i
]
:
alpha_ptr
[
index
]
*
out_grad_ptr
[
i
];
}
}
else
{
for
(
i
=
0
;
i
<
numel
;
i
++
)
{
index
=
i
%
dim
[
dim
.
size
()
-
1
]
;
index
=
static_cast
<
int
>
(
i
%
dim
[
dim
.
size
()
-
1
])
;
x_grad_ptr
[
i
]
=
x_ptr
[
i
]
>
0
?
out_grad_ptr
[
i
]
:
alpha_ptr
[
index
]
*
out_grad_ptr
[
i
];
}
...
...
@@ -58,7 +58,7 @@ void PReluGradKernel(const Context& dev_ctx,
}
else
if
(
mode
==
"element"
)
{
int
temp
=
1
;
for
(
int
j
=
1
;
j
<
dim
.
size
();
j
++
)
{
temp
*=
dim
[
j
]
;
temp
*=
static_cast
<
int
>
(
dim
[
j
])
;
}
for
(
i
=
0
;
i
<
numel
;
i
++
)
{
index
=
i
%
temp
;
...
...
@@ -82,16 +82,16 @@ void PReluGradKernel(const Context& dev_ctx,
if
(
data_format
==
"NCHW"
)
{
int
temp
=
1
;
for
(
int
j
=
2
;
j
<
dim
.
size
();
j
++
)
{
temp
*=
dim
[
j
]
;
temp
*=
static_cast
<
int
>
(
dim
[
j
])
;
}
for
(
i
=
0
;
i
<
numel
;
i
++
)
{
index
=
(
i
/
temp
)
%
dim
[
1
]
;
index
=
static_cast
<
int
>
((
i
/
temp
)
%
dim
[
1
])
;
alpha_grad_ptr
[
index
]
+=
x_ptr
[
i
]
>
0
?
0
:
x_ptr
[
i
]
*
out_grad_ptr
[
i
];
}
}
else
{
for
(
i
=
0
;
i
<
numel
;
i
++
)
{
index
=
i
%
dim
[
dim
.
size
()
-
1
]
;
index
=
static_cast
<
int
>
(
i
%
dim
[
dim
.
size
()
-
1
])
;
alpha_grad_ptr
[
index
]
+=
x_ptr
[
i
]
>
0
?
0
:
x_ptr
[
i
]
*
out_grad_ptr
[
i
];
}
...
...
@@ -99,7 +99,7 @@ void PReluGradKernel(const Context& dev_ctx,
}
else
if
(
mode
==
"element"
)
{
int
temp
=
1
;
for
(
int
j
=
1
;
j
<
dim
.
size
();
j
++
)
{
temp
*=
dim
[
j
]
;
temp
*=
static_cast
<
int
>
(
dim
[
j
])
;
}
for
(
i
=
0
;
i
<
numel
;
i
++
)
{
index
=
i
%
temp
;
...
...
paddle/phi/kernels/funcs/concat_and_split_functor.cc
浏览文件 @
b702d2ae
...
...
@@ -92,14 +92,14 @@ struct SplitFunctor<phi::CPUContext, T> {
int
input_rows
=
1
;
auto
dim_0
=
ref_inputs
[
0
]
->
dims
();
for
(
int
i
=
0
;
i
<
axis
;
++
i
)
{
input_rows
*=
dim_0
[
i
]
;
input_rows
*=
static_cast
<
int
>
(
dim_0
[
i
])
;
}
int
input_cols
=
0
;
std
::
vector
<
int64_t
>
output_cols
(
outputs
->
size
());
for
(
size_t
i
=
0
;
i
<
num
;
++
i
)
{
int
t_cols
=
ref_inputs
[
i
]
->
numel
()
/
input_rows
;
int
t_cols
=
static_cast
<
int
>
(
ref_inputs
[
i
]
->
numel
()
/
input_rows
)
;
input_cols
+=
t_cols
;
output_cols
[
i
]
=
t_cols
;
}
...
...
@@ -110,7 +110,7 @@ struct SplitFunctor<phi::CPUContext, T> {
const
T
*
src_ptr
=
input
.
data
<
T
>
()
+
k
*
input_cols
;
int
col_idx
=
0
;
for
(
size_t
j
=
0
;
j
<
num
;
++
j
)
{
int
col_len
=
output_cols
[
j
]
;
int
col_len
=
static_cast
<
int
>
(
output_cols
[
j
])
;
auto
*
out_tensor
=
outputs
->
at
(
j
);
if
(
out_tensor
!=
nullptr
)
{
T
*
dst_ptr
=
out_tensor
->
data
<
T
>
()
+
k
*
col_len
;
...
...
paddle/phi/kernels/funcs/cross_entropy.cc
浏览文件 @
b702d2ae
...
...
@@ -101,8 +101,8 @@ void CrossEntropyFunctor<DeviceContext, T>::operator()(
const
int
ignore_index
,
const
int
axis_dim
)
{
if
(
softLabel
)
{
const
int
batch_size
=
prob
->
dims
()[
0
]
;
const
int
num_classes
=
prob
->
dims
()[
1
]
;
const
int
batch_size
=
static_cast
<
const
int
>
(
prob
->
dims
()[
0
])
;
const
int
num_classes
=
static_cast
<
const
int
>
(
prob
->
dims
()[
1
])
;
const
int
num_remain
=
num_classes
/
axis_dim
;
Eigen
::
DSizes
<
int
,
3
>
batch_axis_remain
(
batch_size
,
axis_dim
,
num_remain
);
...
...
paddle/phi/kernels/funcs/deformable_conv_functor.cc
浏览文件 @
b702d2ae
...
...
@@ -111,8 +111,10 @@ void ModulatedDeformableIm2col(const Context& dev_ctx UNUSED,
const
std
::
vector
<
int
>&
dilations
,
const
int
deformable_groups
,
T
*
data_col
)
{
int
channel_per_deformable_group
=
im_shape
[
0
]
/
deformable_groups
;
int
num_kernels
=
im_shape
[
0
]
*
col_shape
[
1
]
*
col_shape
[
2
]
*
col_shape
[
3
];
int
channel_per_deformable_group
=
static_cast
<
int
>
(
im_shape
[
0
]
/
deformable_groups
);
int
num_kernels
=
static_cast
<
int
>
(
im_shape
[
0
]
*
col_shape
[
1
]
*
col_shape
[
2
]
*
col_shape
[
3
]);
// get outputs of im2col with offset by bilinear interpolation
ModulatedDeformableIm2colCPUKernel
(
num_kernels
,
...
...
paddle/phi/kernels/funcs/gather_scatter_functor.cc
浏览文件 @
b702d2ae
...
...
@@ -78,13 +78,13 @@ struct cpu_gather_scatter_functor {
"self_size, src_size, index_size cannot be 0"
);
return
;
}
int
select_dim_size
=
index_dims
[
dim
];
int
64_t
select_dim_size
=
index_dims
[
dim
];
// index matrix has different shape with self matrix or src matrix.
int
replaced_select_dim_size
=
is_scatter_like
?
self_dims
[
dim
]
:
src_dims
[
dim
];
int64_t
inner_dim_size
=
1
;
int64_t
outer_dim_size
=
1
;
for
(
int
64_t
i
=
0
;
i
<
dim
;
++
i
)
{
for
(
int
i
=
0
;
i
<
dim
;
++
i
)
{
inner_dim_size
*=
index_dims
[
i
];
}
...
...
@@ -193,9 +193,9 @@ void cpu_scatter_input_grad_kernel(phi::DenseTensor self UNUSED,
int64_t
inner_dim_size
=
1
;
int64_t
outer_dim_size
=
1
;
int
select_dim_size
=
index_dims
[
dim
];
int
output_select_dim_size
=
output_dims
[
dim
];
for
(
int
64_t
i
=
0
;
i
<
dim
;
++
i
)
{
int
64_t
select_dim_size
=
index_dims
[
dim
];
int
64_t
output_select_dim_size
=
output_dims
[
dim
];
for
(
int
i
=
0
;
i
<
dim
;
++
i
)
{
inner_dim_size
*=
index_dims
[
i
];
}
...
...
paddle/phi/kernels/funcs/gpc.cc
浏览文件 @
b702d2ae
...
...
@@ -252,7 +252,7 @@ static edge_node *build_lmt(lmt_node **lmt,
/* Create the entire input polygon edge table in one go */
gpc_malloc
<
edge_node
>
(
edge_table
,
total_vertices
*
s
izeof
(
edge_node
),
total_vertices
*
s
tatic_cast
<
int
>
(
sizeof
(
edge_node
)
),
const_cast
<
char
*>
(
"edge table creation"
));
for
(
c
=
0
;
c
<
p
->
num_contours
;
c
++
)
{
...
...
@@ -711,7 +711,7 @@ static bbox *create_contour_bboxes(gpc_polygon *p) {
int
v
=
0
;
gpc_malloc
<
bbox
>
(
box
,
p
->
num_contours
*
s
izeof
(
bbox
),
p
->
num_contours
*
s
tatic_cast
<
int
>
(
sizeof
(
bbox
)
),
const_cast
<
char
*>
(
"Bounding box creation"
));
PADDLE_ENFORCE_NOT_NULL
(
box
,
phi
::
errors
::
ResourceExhausted
(
"Failed to malloc box memory."
));
...
...
@@ -754,9 +754,10 @@ static void minimax_test(gpc_polygon *subj, gpc_polygon *clip, gpc_op op) {
s_bbox
=
create_contour_bboxes
(
subj
);
c_bbox
=
create_contour_bboxes
(
clip
);
gpc_malloc
<
int
>
(
o_table
,
subj
->
num_contours
*
clip
->
num_contours
*
sizeof
(
int
),
const_cast
<
char
*>
(
"overlap table creation"
));
gpc_malloc
<
int
>
(
o_table
,
subj
->
num_contours
*
clip
->
num_contours
*
static_cast
<
int
>
(
sizeof
(
int
)),
const_cast
<
char
*>
(
"overlap table creation"
));
/* Check all subject contour bounding boxes against clip boxes */
for
(
s
=
0
;
s
<
subj
->
num_contours
;
s
++
)
{
...
...
@@ -877,16 +878,17 @@ void gpc_add_contour(gpc_polygon *p, gpc_vertex_list *new_contour, int hole) {
/* Create an extended hole array */
gpc_malloc
<
int
>
(
extended_hole
,
(
p
->
num_contours
+
1
)
*
s
izeof
(
int
),
(
p
->
num_contours
+
1
)
*
s
tatic_cast
<
int
>
(
sizeof
(
int
)
),
const_cast
<
char
*>
(
"contour hole addition"
));
PADDLE_ENFORCE_NOT_NULL
(
extended_hole
,
phi
::
errors
::
ResourceExhausted
(
"Failed to malloc extended hole memory."
));
/* Create an extended contour array */
gpc_malloc
<
gpc_vertex_list
>
(
extended_contour
,
(
p
->
num_contours
+
1
)
*
sizeof
(
gpc_vertex_list
),
const_cast
<
char
*>
(
"contour addition"
));
gpc_malloc
<
gpc_vertex_list
>
(
extended_contour
,
(
p
->
num_contours
+
1
)
*
static_cast
<
int
>
(
sizeof
(
gpc_vertex_list
)),
const_cast
<
char
*>
(
"contour addition"
));
/* Copy the old contour and hole data into the extended arrays */
for
(
c
=
0
;
c
<
p
->
num_contours
;
c
++
)
{
...
...
@@ -898,9 +900,10 @@ void gpc_add_contour(gpc_polygon *p, gpc_vertex_list *new_contour, int hole) {
c
=
p
->
num_contours
;
extended_hole
[
c
]
=
hole
;
extended_contour
[
c
].
num_vertices
=
new_contour
->
num_vertices
;
gpc_malloc
<
gpc_vertex
>
(
extended_contour
[
c
].
vertex
,
new_contour
->
num_vertices
*
sizeof
(
gpc_vertex
),
const_cast
<
char
*>
(
"contour addition"
));
gpc_malloc
<
gpc_vertex
>
(
extended_contour
[
c
].
vertex
,
new_contour
->
num_vertices
*
static_cast
<
int
>
(
sizeof
(
gpc_vertex
)),
const_cast
<
char
*>
(
"contour addition"
));
for
(
v
=
0
;
v
<
new_contour
->
num_vertices
;
v
++
)
{
extended_contour
[
c
].
vertex
[
v
]
=
new_contour
->
vertex
[
v
];
}
...
...
@@ -999,8 +1002,9 @@ void gpc_polygon_clip(gpc_op op,
}
/* Build scanbeam table from scanbeam tree */
gpc_malloc
<
double
>
(
sbt
,
sbt_entries
*
sizeof
(
double
),
const_cast
<
char
*>
(
"sbt creation"
));
gpc_malloc
<
double
>
(
sbt
,
sbt_entries
*
static_cast
<
int
>
(
sizeof
(
double
)),
const_cast
<
char
*>
(
"sbt creation"
));
PADDLE_ENFORCE_NOT_NULL
(
sbt
,
phi
::
errors
::
ResourceExhausted
(
"Failed to malloc scanbeam table memory."
));
...
...
@@ -1496,11 +1500,12 @@ void gpc_polygon_clip(gpc_op op,
result
->
num_contours
=
count_contours
(
out_poly
);
if
(
result
->
num_contours
>
0
)
{
gpc_malloc
<
int
>
(
result
->
hole
,
result
->
num_contours
*
s
izeof
(
int
),
result
->
num_contours
*
s
tatic_cast
<
int
>
(
sizeof
(
int
)
),
const_cast
<
char
*>
(
"hole flag table creation"
));
gpc_malloc
<
gpc_vertex_list
>
(
result
->
contour
,
result
->
num_contours
*
sizeof
(
gpc_vertex_list
),
const_cast
<
char
*>
(
"contour creation"
));
gpc_malloc
<
gpc_vertex_list
>
(
result
->
contour
,
result
->
num_contours
*
static_cast
<
int
>
(
sizeof
(
gpc_vertex_list
)),
const_cast
<
char
*>
(
"contour creation"
));
c
=
0
;
for
(
poly
=
out_poly
;
poly
;
poly
=
npoly
)
{
...
...
@@ -1508,10 +1513,10 @@ void gpc_polygon_clip(gpc_op op,
if
(
poly
->
active
)
{
result
->
hole
[
c
]
=
poly
->
proxy
->
hole
;
result
->
contour
[
c
].
num_vertices
=
poly
->
active
;
gpc_malloc
<
gpc_vertex
>
(
result
->
contour
[
c
].
vertex
,
result
->
contour
[
c
].
num_vertices
*
sizeof
(
gpc_vertex
),
const_cast
<
char
*>
(
"vertex creation"
));
gpc_malloc
<
gpc_vertex
>
(
result
->
contour
[
c
].
vertex
,
result
->
contour
[
c
].
num_vertices
*
static_cast
<
int
>
(
sizeof
(
gpc_vertex
)
),
const_cast
<
char
*>
(
"vertex creation"
));
v
=
result
->
contour
[
c
].
num_vertices
-
1
;
for
(
vtx
=
poly
->
proxy
->
v
[
LEFT
];
vtx
;
vtx
=
nv
)
{
...
...
@@ -1644,8 +1649,9 @@ void gpc_tristrip_clip(gpc_op op,
}
/* Build scanbeam table from scanbeam tree */
gpc_malloc
<
double
>
(
sbt
,
sbt_entries
*
sizeof
(
double
),
const_cast
<
char
*>
(
"sbt creation"
));
gpc_malloc
<
double
>
(
sbt
,
sbt_entries
*
static_cast
<
int
>
(
sizeof
(
double
)),
const_cast
<
char
*>
(
"sbt creation"
));
PADDLE_ENFORCE_NOT_NULL
(
sbt
,
phi
::
errors
::
ResourceExhausted
(
"Failed to malloc scanbeam table memory."
));
...
...
@@ -2181,9 +2187,10 @@ void gpc_tristrip_clip(gpc_op op,
result
->
strip
=
nullptr
;
result
->
num_strips
=
count_tristrips
(
tlist
);
if
(
result
->
num_strips
>
0
)
{
gpc_malloc
<
gpc_vertex_list
>
(
result
->
strip
,
result
->
num_strips
*
sizeof
(
gpc_vertex_list
),
const_cast
<
char
*>
(
"tristrip list creation"
));
gpc_malloc
<
gpc_vertex_list
>
(
result
->
strip
,
result
->
num_strips
*
static_cast
<
int
>
(
sizeof
(
gpc_vertex_list
)),
const_cast
<
char
*>
(
"tristrip list creation"
));
s
=
0
;
for
(
tn
=
tlist
;
tn
;
tn
=
tnn
)
{
...
...
@@ -2191,9 +2198,10 @@ void gpc_tristrip_clip(gpc_op op,
if
(
tn
->
active
>
2
)
{
/* Valid tristrip: copy the vertices and free the heap */
result
->
strip
[
s
].
num_vertices
=
tn
->
active
;
gpc_malloc
<
gpc_vertex
>
(
result
->
strip
[
s
].
vertex
,
tn
->
active
*
sizeof
(
gpc_vertex
),
const_cast
<
char
*>
(
"tristrip creation"
));
gpc_malloc
<
gpc_vertex
>
(
result
->
strip
[
s
].
vertex
,
tn
->
active
*
static_cast
<
int
>
(
sizeof
(
gpc_vertex
)),
const_cast
<
char
*>
(
"tristrip creation"
));
v
=
0
;
if
(
false
)
{
lt
=
tn
->
v
[
RIGHT
];
...
...
paddle/phi/kernels/funcs/gpc.h
浏览文件 @
b702d2ae
...
...
@@ -139,6 +139,7 @@ typedef struct edge_shape {
}
edge_node
;
inline
bool
gpc_eq
(
float
a
,
float
b
)
{
return
(
fabs
(
a
-
b
)
<=
1e-6
);
}
inline
bool
gpc_eq
(
double
a
,
double
b
)
{
return
(
fabs
(
a
-
b
)
<=
1e-6
);
}
inline
bool
gpc_prev_index
(
float
a
,
float
b
)
{
return
(
fabs
(
a
-
b
)
<=
1e-6
);
}
...
...
@@ -189,7 +190,7 @@ inline void gpc_n_edge(edge_node *d, edge_node *e, int p) {
}
template
<
typename
T
>
void
gpc_malloc
(
T
*&
p
,
int
b
,
char
*
s
)
{
void
gpc_malloc
(
T
*&
p
,
int
b
,
char
*
s
)
{
// NOLINT
if
(
b
>
0
)
{
p
=
reinterpret_cast
<
T
*>
(
malloc
(
b
));
...
...
@@ -202,7 +203,7 @@ void gpc_malloc(T *&p, int b, char *s) {
}
}
template
<
typename
T
>
void
gpc_free
(
T
*&
p
)
{
void
gpc_free
(
T
*&
p
)
{
// NOLINT
if
(
p
)
{
free
(
p
);
p
=
NULL
;
...
...
paddle/phi/kernels/funcs/im2col.cc
浏览文件 @
b702d2ae
...
...
@@ -94,16 +94,16 @@ class Col2ImFunctor<phi::funcs::ColFormat::kCFO, DeviceContext, T> {
"The dimension of tensor 'col' should be 5. But got "
"the dims of tensor 'col' is [%s]."
,
col
.
dims
()));
int
im_channels
=
(
data_layout
!=
DataLayout
::
kNHWC
?
im
->
dims
()[
0
]
:
im
->
dims
()[
2
]);
int
im_height
=
(
data_layout
!=
DataLayout
::
kNHWC
?
im
->
dims
()[
1
]
:
im
->
dims
()[
0
]);
int
im_width
=
(
data_layout
!=
DataLayout
::
kNHWC
?
im
->
dims
()[
2
]
:
im
->
dims
()[
1
]);
int
filter_height
=
col
.
dims
()[
1
]
;
int
filter_width
=
col
.
dims
()[
2
]
;
int
col_height
=
col
.
dims
()[
3
]
;
int
col_width
=
col
.
dims
()[
4
]
;
int
im_channels
=
static_cast
<
int
>
(
data_layout
!=
DataLayout
::
kNHWC
?
im
->
dims
()[
0
]
:
im
->
dims
()[
2
]);
int
im_height
=
static_cast
<
int
>
(
data_layout
!=
DataLayout
::
kNHWC
?
im
->
dims
()[
1
]
:
im
->
dims
()[
0
]);
int
im_width
=
static_cast
<
int
>
(
data_layout
!=
DataLayout
::
kNHWC
?
im
->
dims
()[
2
]
:
im
->
dims
()[
1
]);
int
filter_height
=
static_cast
<
int
>
(
col
.
dims
()[
1
])
;
int
filter_width
=
static_cast
<
int
>
(
col
.
dims
()[
2
])
;
int
col_height
=
static_cast
<
int
>
(
col
.
dims
()[
3
])
;
int
col_width
=
static_cast
<
int
>
(
col
.
dims
()[
4
])
;
PADDLE_ENFORCE_EQ
(
(
im_height
+
padding
[
0
]
+
padding
[
2
]
-
...
...
@@ -194,13 +194,13 @@ class Im2ColFunctor<phi::funcs::ColFormat::kOCF, DeviceContext, T> {
"The dimension of tensor 'col' should be 5. But got "
"the dims of tensor 'col' is [%s]."
,
col
->
dims
()));
int
im_channels
=
im
.
dims
()[
0
]
;
int
im_height
=
im
.
dims
()[
1
]
;
int
im_width
=
im
.
dims
()[
2
]
;
int
filter_height
=
col
->
dims
()[
3
]
;
int
filter_width
=
col
->
dims
()[
4
]
;
int
col_height
=
col
->
dims
()[
0
]
;
int
col_width
=
col
->
dims
()[
1
]
;
int
im_channels
=
static_cast
<
int
>
(
im
.
dims
()[
0
])
;
int
im_height
=
static_cast
<
int
>
(
im
.
dims
()[
1
])
;
int
im_width
=
static_cast
<
int
>
(
im
.
dims
()[
2
])
;
int
filter_height
=
static_cast
<
int
>
(
col
->
dims
()[
3
])
;
int
filter_width
=
static_cast
<
int
>
(
col
->
dims
()[
4
])
;
int
col_height
=
static_cast
<
int
>
(
col
->
dims
()[
0
])
;
int
col_width
=
static_cast
<
int
>
(
col
->
dims
()[
1
])
;
const
T
*
im_data
=
im
.
data
<
T
>
();
T
*
col_data
=
col
->
data
<
T
>
();
...
...
@@ -267,13 +267,13 @@ class Col2ImFunctor<phi::funcs::ColFormat::kOCF, DeviceContext, T> {
"The dimension of tensor 'col' should be 5. But got "
"the dims of tensor 'col' is [%s]."
,
col
.
dims
()));
int
im_channels
=
im
->
dims
()[
0
]
;
int
im_height
=
im
->
dims
()[
1
]
;
int
im_width
=
im
->
dims
()[
2
]
;
int
filter_height
=
col
.
dims
()[
3
]
;
int
filter_width
=
col
.
dims
()[
4
]
;
int
col_height
=
col
.
dims
()[
0
]
;
int
col_width
=
col
.
dims
()[
1
]
;
int
im_channels
=
static_cast
<
int
>
(
im
->
dims
()[
0
])
;
int
im_height
=
static_cast
<
int
>
(
im
->
dims
()[
1
])
;
int
im_width
=
static_cast
<
int
>
(
im
->
dims
()[
2
])
;
int
filter_height
=
static_cast
<
int
>
(
col
.
dims
()[
3
])
;
int
filter_width
=
static_cast
<
int
>
(
col
.
dims
()[
4
])
;
int
col_height
=
static_cast
<
int
>
(
col
.
dims
()[
0
])
;
int
col_width
=
static_cast
<
int
>
(
col
.
dims
()[
1
])
;
PADDLE_ENFORCE_EQ
(
(
im_height
+
padding
[
0
]
+
padding
[
2
]
-
filter_height
)
/
stride
[
0
]
+
1
,
...
...
paddle/phi/kernels/funcs/jit/gen_base.cc
浏览文件 @
b702d2ae
...
...
@@ -39,7 +39,8 @@ void GenBase::dumpCode(const unsigned char* code) const {
counter
++
;
std
::
ofstream
fout
(
filename
.
str
(),
std
::
ios
::
out
);
if
(
fout
.
is_open
())
{
fout
.
write
(
reinterpret_cast
<
const
char
*>
(
code
),
this
->
getSize
());
fout
.
write
(
reinterpret_cast
<
const
char
*>
(
code
),
static_cast
<
int
>
(
this
->
getSize
()));
fout
.
close
();
}
}
...
...
paddle/phi/kernels/funcs/jit/helper.cc
浏览文件 @
b702d2ae
此差异已折叠。
点击以展开。
paddle/phi/kernels/funcs/jit/kernel_key.cc
浏览文件 @
b702d2ae
此差异已折叠。
点击以展开。
paddle/phi/kernels/funcs/jit/more/intrinsic/layer_norm.cc
浏览文件 @
b702d2ae
此差异已折叠。
点击以展开。
paddle/phi/kernels/funcs/math_function.cc
浏览文件 @
b702d2ae
此差异已折叠。
点击以展开。
paddle/phi/kernels/funcs/matrix_reduce.cc
浏览文件 @
b702d2ae
此差异已折叠。
点击以展开。
paddle/phi/kernels/funcs/maxouting.cc
浏览文件 @
b702d2ae
此差异已折叠。
点击以展开。
paddle/phi/kernels/funcs/pooling.cc
浏览文件 @
b702d2ae
此差异已折叠。
点击以展开。
paddle/phi/kernels/funcs/segment_pooling.cc
浏览文件 @
b702d2ae
此差异已折叠。
点击以展开。
paddle/phi/kernels/funcs/selected_rows_functor.cc
浏览文件 @
b702d2ae
此差异已折叠。
点击以展开。
paddle/phi/kernels/funcs/sequence_padding.cc
浏览文件 @
b702d2ae
此差异已折叠。
点击以展开。
paddle/phi/kernels/funcs/sequence_pooling.cc
浏览文件 @
b702d2ae
此差异已折叠。
点击以展开。
paddle/phi/kernels/funcs/vol2col.cc
浏览文件 @
b702d2ae
此差异已折叠。
点击以展开。
paddle/utils/string/string_helper.cc
浏览文件 @
b702d2ae
此差异已折叠。
点击以展开。
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录