Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
Paddle
提交
9f04f2ac
P
Paddle
项目概览
PaddlePaddle
/
Paddle
大约 2 年 前同步成功
通知
2325
Star
20933
Fork
5424
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1423
列表
看板
标记
里程碑
合并请求
543
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1,423
Issue
1,423
列表
看板
标记
里程碑
合并请求
543
合并请求
543
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
9f04f2ac
编写于
8月 09, 2023
作者:
R
Ruibin Cheung
提交者:
GitHub
8月 09, 2023
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
[clang-tidy] fix modernize-make-unique (#55764)
上级
4ae9945b
变更
51
显示空白变更内容
内联
并排
Showing
51 changed file
with
241 addition
and
230 deletion
+241
-230
paddle/fluid/distributed/collective/process_group_custom.cc
paddle/fluid/distributed/collective/process_group_custom.cc
+1
-1
paddle/fluid/distributed/collective/process_group_nccl.cc
paddle/fluid/distributed/collective/process_group_nccl.cc
+1
-1
paddle/fluid/distributed/fleet_executor/dist_model.cc
paddle/fluid/distributed/fleet_executor/dist_model.cc
+4
-4
paddle/fluid/distributed/ps/service/communicator/communicator.cc
...fluid/distributed/ps/service/communicator/communicator.cc
+4
-4
paddle/fluid/distributed/ps/table/memory_sparse_geo_table.cc
paddle/fluid/distributed/ps/table/memory_sparse_geo_table.cc
+1
-1
paddle/fluid/distributed/ps/table/memory_sparse_table.cc
paddle/fluid/distributed/ps/table/memory_sparse_table.cc
+2
-2
paddle/fluid/framework/async_executor.cc
paddle/fluid/framework/async_executor.cc
+3
-3
paddle/fluid/framework/block_desc.cc
paddle/fluid/framework/block_desc.cc
+1
-1
paddle/fluid/framework/custom_operator.cc
paddle/fluid/framework/custom_operator.cc
+2
-2
paddle/fluid/framework/details/fast_threaded_ssa_graph_executor.cc
...uid/framework/details/fast_threaded_ssa_graph_executor.cc
+1
-1
paddle/fluid/framework/details/multi_devices_helper.cc
paddle/fluid/framework/details/multi_devices_helper.cc
+1
-1
paddle/fluid/framework/details/parallel_ssa_graph_executor.cc
...le/fluid/framework/details/parallel_ssa_graph_executor.cc
+1
-1
paddle/fluid/framework/details/reduce_op_handle_test.cc
paddle/fluid/framework/details/reduce_op_handle_test.cc
+1
-1
paddle/fluid/framework/details/threaded_ssa_graph_executor.cc
...le/fluid/framework/details/threaded_ssa_graph_executor.cc
+1
-1
paddle/fluid/framework/executor.cc
paddle/fluid/framework/executor.cc
+12
-10
paddle/fluid/framework/executor_thread_worker.cc
paddle/fluid/framework/executor_thread_worker.cc
+1
-1
paddle/fluid/framework/garbage_collector.cc
paddle/fluid/framework/garbage_collector.cc
+6
-6
paddle/fluid/framework/garbage_collector.h
paddle/fluid/framework/garbage_collector.h
+1
-1
paddle/fluid/framework/ir/memory_optimize_pass/test_reference_count_pass_last_lived_ops.cc
...optimize_pass/test_reference_count_pass_last_lived_ops.cc
+8
-8
paddle/fluid/framework/ir/mkldnn/compute_propagate_scales_mkldnn_pass_tester.cc
.../ir/mkldnn/compute_propagate_scales_mkldnn_pass_tester.cc
+2
-2
paddle/fluid/framework/ir/pass_test.cc
paddle/fluid/framework/ir/pass_test.cc
+8
-8
paddle/fluid/framework/operator.cc
paddle/fluid/framework/operator.cc
+31
-28
paddle/fluid/framework/parallel_executor.cc
paddle/fluid/framework/parallel_executor.cc
+45
-39
paddle/fluid/framework/section_worker.cc
paddle/fluid/framework/section_worker.cc
+4
-3
paddle/fluid/framework/selected_rows_utils_test.cc
paddle/fluid/framework/selected_rows_utils_test.cc
+1
-1
paddle/fluid/imperative/basic_engine.cc
paddle/fluid/imperative/basic_engine.cc
+10
-6
paddle/fluid/imperative/gloo_context.cc
paddle/fluid/imperative/gloo_context.cc
+1
-2
paddle/fluid/imperative/partial_grad_engine.cc
paddle/fluid/imperative/partial_grad_engine.cc
+6
-6
paddle/fluid/imperative/tracer.cc
paddle/fluid/imperative/tracer.cc
+10
-8
paddle/fluid/inference/analysis/passes/convert_to_mixed_precision.cc
...d/inference/analysis/passes/convert_to_mixed_precision.cc
+1
-2
paddle/fluid/inference/analysis/passes/passes.cc
paddle/fluid/inference/analysis/passes/passes.cc
+10
-14
paddle/fluid/inference/api/analysis_predictor.cc
paddle/fluid/inference/api/analysis_predictor.cc
+6
-6
paddle/fluid/inference/api/api_impl.cc
paddle/fluid/inference/api/api_impl.cc
+2
-2
paddle/fluid/inference/api/mkldnn_quantizer.cc
paddle/fluid/inference/api/mkldnn_quantizer.cc
+1
-1
paddle/fluid/inference/api/resource_manager.cc
paddle/fluid/inference/api/resource_manager.cc
+4
-4
paddle/fluid/inference/tensorrt/plugin/generic_plugin.cu
paddle/fluid/inference/tensorrt/plugin/generic_plugin.cu
+5
-4
paddle/fluid/memory/allocation/buffered_allocator_test.cc
paddle/fluid/memory/allocation/buffered_allocator_test.cc
+2
-2
paddle/fluid/memory/allocation/naive_best_fit_allocator.cc
paddle/fluid/memory/allocation/naive_best_fit_allocator.cc
+8
-8
paddle/fluid/memory/malloc_test.cu
paddle/fluid/memory/malloc_test.cu
+2
-2
paddle/fluid/operators/nccl/nccl_gpu_common.cc
paddle/fluid/operators/nccl/nccl_gpu_common.cc
+2
-2
paddle/fluid/operators/reader/lod_tensor_blocking_queue.h
paddle/fluid/operators/reader/lod_tensor_blocking_queue.h
+6
-5
paddle/fluid/platform/device/xpu/xpu_info.cc
paddle/fluid/platform/device/xpu/xpu_info.cc
+1
-1
paddle/fluid/platform/profiler.cc
paddle/fluid/platform/profiler.cc
+4
-6
paddle/fluid/pybind/eager_functions.cc
paddle/fluid/pybind/eager_functions.cc
+2
-2
paddle/fluid/pybind/inference_api.cc
paddle/fluid/pybind/inference_api.cc
+1
-2
paddle/fluid/pybind/pybind.cc
paddle/fluid/pybind/pybind.cc
+6
-6
paddle/phi/backends/gpu/gpu_context.cc
paddle/phi/backends/gpu/gpu_context.cc
+1
-1
paddle/phi/backends/gpu/gpu_context.h
paddle/phi/backends/gpu/gpu_context.h
+1
-1
test/cpp/fluid/benchmark/op_tester.cc
test/cpp/fluid/benchmark/op_tester.cc
+1
-1
test/cpp/phi/core/test_selected_rows.cc
test/cpp/phi/core/test_selected_rows.cc
+1
-1
test/cpp/prim/init_env_utils.cc
test/cpp/prim/init_env_utils.cc
+4
-4
未找到文件。
paddle/fluid/distributed/collective/process_group_custom.cc
浏览文件 @
9f04f2ac
...
@@ -173,7 +173,7 @@ void ProcessGroupCustom::CreateCustomManagerCache(
...
@@ -173,7 +173,7 @@ void ProcessGroupCustom::CreateCustomManagerCache(
phi
::
DeviceGuard
guard
(
places
[
i
]);
phi
::
DeviceGuard
guard
(
places
[
i
]);
ccl_comms
[
i
]
=
CustomCCLCommManager
::
Create
(
ccl_comms
[
i
]
=
CustomCCLCommManager
::
Create
(
device_type
,
GetSize
(),
GetRank
(),
&
ccl_id
,
new
phi
::
ccl
::
CCLComm
);
device_type
,
GetSize
(),
GetRank
(),
&
ccl_id
,
new
phi
::
ccl
::
CCLComm
);
dev_ctx
[
i
]
.
reset
(
new
CustomDeviceContext
(
places
[
i
])
);
dev_ctx
[
i
]
=
std
::
make_unique
<
CustomDeviceContext
>
(
places
[
i
]
);
dev_ctx
[
i
]
->
SetAllocator
(
dev_ctx
[
i
]
->
SetAllocator
(
&
(
phi
::
DeviceContextPool
::
Instance
().
Get
(
places
[
i
])
->
GetAllocator
()));
&
(
phi
::
DeviceContextPool
::
Instance
().
Get
(
places
[
i
])
->
GetAllocator
()));
dev_ctx
[
i
]
->
SetHostAllocator
(
&
(
dev_ctx
[
i
]
->
SetHostAllocator
(
&
(
...
...
paddle/fluid/distributed/collective/process_group_nccl.cc
浏览文件 @
9f04f2ac
...
@@ -625,7 +625,7 @@ void ProcessGroupNCCL::CreateNCCLManagerCache(
...
@@ -625,7 +625,7 @@ void ProcessGroupNCCL::CreateNCCLManagerCache(
for
(
size_t
i
=
0
;
i
<
places
.
size
();
++
i
)
{
for
(
size_t
i
=
0
;
i
<
places
.
size
();
++
i
)
{
platform
::
CUDADeviceGuard
guard
(
places
[
i
]);
platform
::
CUDADeviceGuard
guard
(
places
[
i
]);
dev_ctx
[
i
]
.
reset
(
new
phi
::
GPUContext
(
places
[
i
])
);
dev_ctx
[
i
]
=
std
::
make_unique
<
phi
::
GPUContext
>
(
places
[
i
]
);
ncclComm_t
nccl_comm
;
ncclComm_t
nccl_comm
;
NCCL_CHECK
(
phi
::
dynload
::
ncclCommInitRank
(
NCCL_CHECK
(
phi
::
dynload
::
ncclCommInitRank
(
&
nccl_comm
,
GetSize
(),
nccl_id
,
GetRank
()));
&
nccl_comm
,
GetSize
(),
nccl_id
,
GetRank
()));
...
...
paddle/fluid/distributed/fleet_executor/dist_model.cc
浏览文件 @
9f04f2ac
...
@@ -374,7 +374,7 @@ void DistModel::InsertCommOp(std::string tmp_var_name,
...
@@ -374,7 +374,7 @@ void DistModel::InsertCommOp(std::string tmp_var_name,
}
}
bool
DistModel
::
PrepareScope
()
{
bool
DistModel
::
PrepareScope
()
{
scope_
.
reset
(
new
framework
::
Scope
()
);
scope_
=
std
::
make_unique
<
framework
::
Scope
>
(
);
return
true
;
return
true
;
}
}
...
@@ -412,7 +412,7 @@ bool DistModel::LoadProgram() {
...
@@ -412,7 +412,7 @@ bool DistModel::LoadProgram() {
fin
.
close
();
fin
.
close
();
program_proto
.
ParseFromString
(
pb_content
);
program_proto
.
ParseFromString
(
pb_content
);
VLOG
(
5
)
<<
pb_content
;
VLOG
(
5
)
<<
pb_content
;
program_
.
reset
(
new
framework
::
ProgramDesc
(
program_proto
)
);
program_
=
std
::
make_unique
<
framework
::
ProgramDesc
>
(
program_proto
);
return
true
;
return
true
;
}
}
...
@@ -469,7 +469,7 @@ bool DistModel::LoadParameters() {
...
@@ -469,7 +469,7 @@ bool DistModel::LoadParameters() {
}
}
bool
DistModel
::
PrepareFleetExe
()
{
bool
DistModel
::
PrepareFleetExe
()
{
task_node_
.
reset
(
new
TaskNode
(
program_
.
get
(),
config_
.
local_rank
)
);
task_node_
=
std
::
make_unique
<
TaskNode
>
(
program_
.
get
(),
config_
.
local_rank
);
// With auto cut, there is no concept of pp, no need to add dependency.
// With auto cut, there is no concept of pp, no need to add dependency.
task_node_
->
SetType
(
"Compute"
);
task_node_
->
SetType
(
"Compute"
);
task_node_
->
Init
();
task_node_
->
Init
();
...
@@ -487,7 +487,7 @@ bool DistModel::PrepareFleetExe() {
...
@@ -487,7 +487,7 @@ bool DistModel::PrepareFleetExe() {
}
}
id_to_rank
.
insert
({
i
,
i
});
id_to_rank
.
insert
({
i
,
i
});
}
}
fleet_exe
.
reset
(
new
FleetExecutor
(
executor_desc_
)
);
fleet_exe
=
std
::
make_unique
<
FleetExecutor
>
(
executor_desc_
);
fleet_exe
->
Init
(
carrier_id_
,
fleet_exe
->
Init
(
carrier_id_
,
*
(
program_
.
get
()),
*
(
program_
.
get
()),
scope_
.
get
(),
scope_
.
get
(),
...
...
paddle/fluid/distributed/ps/service/communicator/communicator.cc
浏览文件 @
9f04f2ac
...
@@ -796,8 +796,8 @@ void AsyncCommunicator::InitImpl(const RpcCtxMap &send_varname_to_ctx,
...
@@ -796,8 +796,8 @@ void AsyncCommunicator::InitImpl(const RpcCtxMap &send_varname_to_ctx,
send_varname_to_ctx_
=
std
::
move
(
send_varname_to_ctx
);
send_varname_to_ctx_
=
std
::
move
(
send_varname_to_ctx
);
recv_varname_to_ctx_
=
std
::
move
(
recv_varname_to_ctx
);
recv_varname_to_ctx_
=
std
::
move
(
recv_varname_to_ctx
);
recv_scope_
=
std
::
move
(
recv_scope
);
recv_scope_
=
std
::
move
(
recv_scope
);
send_scope_
.
reset
(
new
Scope
()
);
send_scope_
=
std
::
make_unique
<
Scope
>
(
);
xpu_temp_scope_
.
reset
(
new
Scope
()
);
xpu_temp_scope_
=
std
::
make_unique
<
Scope
>
(
);
for
(
auto
&
iter
:
send_varname_to_ctx_
)
{
for
(
auto
&
iter
:
send_varname_to_ctx_
)
{
auto
&
ctx
=
iter
.
second
;
auto
&
ctx
=
iter
.
second
;
auto
&
varnames
=
ctx
.
origin_varnames
;
auto
&
varnames
=
ctx
.
origin_varnames
;
...
@@ -807,7 +807,7 @@ void AsyncCommunicator::InitImpl(const RpcCtxMap &send_varname_to_ctx,
...
@@ -807,7 +807,7 @@ void AsyncCommunicator::InitImpl(const RpcCtxMap &send_varname_to_ctx,
send_queue_size_
);
send_queue_size_
);
}
}
}
}
send_threadpool_
.
reset
(
new
::
ThreadPool
(
thread_pool_size_
)
);
send_threadpool_
=
std
::
make_unique
<::
ThreadPool
>
(
thread_pool_size_
);
}
}
AsyncCommunicator
::~
AsyncCommunicator
()
{
AsyncCommunicator
::~
AsyncCommunicator
()
{
...
@@ -1517,7 +1517,7 @@ void FLCommunicator::InitBrpcClient(
...
@@ -1517,7 +1517,7 @@ void FLCommunicator::InitBrpcClient(
// before, but no need for Coordinator
// before, but no need for Coordinator
}
}
if
(
coordinator_client_ptr_
==
nullptr
)
{
if
(
coordinator_client_ptr_
==
nullptr
)
{
coordinator_client_ptr_
.
reset
(
new
CoordinatorClient
);
coordinator_client_ptr_
=
std
::
make_unique
<
CoordinatorClient
>
(
);
}
}
int16_t
servers
=
host_sign_list
.
size
();
int16_t
servers
=
host_sign_list
.
size
();
coordinator_client_ptr_
->
_env
=
&
ps_env_
;
coordinator_client_ptr_
->
_env
=
&
ps_env_
;
...
...
paddle/fluid/distributed/ps/table/memory_sparse_geo_table.cc
浏览文件 @
9f04f2ac
...
@@ -138,7 +138,7 @@ int32_t MemorySparseGeoTable::Initialize() {
...
@@ -138,7 +138,7 @@ int32_t MemorySparseGeoTable::Initialize() {
shards_task
.
reset
(
new
::
ThreadPool
(
1
));
shards_task
.
reset
(
new
::
ThreadPool
(
1
));
}
}
_local_shards
.
reset
(
new
shard_type
[
_task_pool_size
]);
_local_shards
.
reset
(
new
shard_type
[
_task_pool_size
]);
// NOLINT
return
0
;
return
0
;
}
}
...
...
paddle/fluid/distributed/ps/table/memory_sparse_table.cc
浏览文件 @
9f04f2ac
...
@@ -97,7 +97,7 @@ int32_t MemorySparseTable::InitializeValue() {
...
@@ -97,7 +97,7 @@ int32_t MemorySparseTable::InitializeValue() {
LOG
(
INFO
)
<<
"merged shard info: ["
<<
_m_sparse_table_shard_num
<<
"|"
LOG
(
INFO
)
<<
"merged shard info: ["
<<
_m_sparse_table_shard_num
<<
"|"
<<
_m_avg_local_shard_num
<<
"|"
<<
_m_real_local_shard_num
<<
_m_avg_local_shard_num
<<
"|"
<<
_m_real_local_shard_num
<<
"]"
;
<<
"]"
;
_local_shards_new
.
reset
(
new
shard_type
[
_real_local_shard_num
]);
_local_shards_new
.
reset
(
new
shard_type
[
_real_local_shard_num
]);
// NOLINT
}
}
return
0
;
return
0
;
}
}
...
@@ -322,7 +322,7 @@ int32_t MemorySparseTable::Save(const std::string &dirname,
...
@@ -322,7 +322,7 @@ int32_t MemorySparseTable::Save(const std::string &dirname,
// patch model
// patch model
if
(
save_param
==
5
)
{
if
(
save_param
==
5
)
{
_local_shards_patch_model
.
reset
(
_local_shards_new
.
release
());
_local_shards_patch_model
.
reset
(
_local_shards_new
.
release
());
_local_shards_new
.
reset
(
new
shard_type
[
_real_local_shard_num
]);
_local_shards_new
.
reset
(
new
shard_type
[
_real_local_shard_num
]);
// NOLINT
_save_patch_model_thread
=
std
::
thread
(
std
::
bind
(
_save_patch_model_thread
=
std
::
thread
(
std
::
bind
(
&
MemorySparseTable
::
SavePatch
,
this
,
std
::
string
(
dirname
),
save_param
));
&
MemorySparseTable
::
SavePatch
,
this
,
std
::
string
(
dirname
),
save_param
));
return
0
;
return
0
;
...
...
paddle/fluid/framework/async_executor.cc
浏览文件 @
9f04f2ac
...
@@ -136,12 +136,12 @@ void AsyncExecutor::RunFromFile(const ProgramDesc& main_program,
...
@@ -136,12 +136,12 @@ void AsyncExecutor::RunFromFile(const ProgramDesc& main_program,
for
(
auto
&
worker
:
workers
)
{
for
(
auto
&
worker
:
workers
)
{
#ifdef PADDLE_WITH_PSLIB
#ifdef PADDLE_WITH_PSLIB
if
(
mode
==
"mpi"
)
{
if
(
mode
==
"mpi"
)
{
worker
.
reset
(
new
AsyncExecutorThreadWorker
);
worker
=
std
::
make_unique
<
AsyncExecutorThreadWorker
>
(
);
}
else
{
}
else
{
worker
.
reset
(
new
ExecutorThreadWorker
);
worker
=
std
::
make_unique
<
ExecutorThreadWorker
>
(
);
}
}
#else
#else
worker
.
reset
(
new
ExecutorThreadWorker
);
worker
=
std
::
make_unique
<
ExecutorThreadWorker
>
(
);
#endif
#endif
}
}
...
...
paddle/fluid/framework/block_desc.cc
浏览文件 @
9f04f2ac
...
@@ -236,7 +236,7 @@ proto::BlockDesc *BlockDesc::Proto() {
...
@@ -236,7 +236,7 @@ proto::BlockDesc *BlockDesc::Proto() {
BlockDesc
::
BlockDesc
(
ProgramDesc
*
prog
,
proto
::
BlockDesc
*
desc
)
BlockDesc
::
BlockDesc
(
ProgramDesc
*
prog
,
proto
::
BlockDesc
*
desc
)
:
prog_
(
prog
),
desc_
(
desc
),
need_update_
(
false
)
{
:
prog_
(
prog
),
desc_
(
desc
),
need_update_
(
false
)
{
for
(
const
proto
::
VarDesc
&
var_desc
:
desc_
->
vars
())
{
for
(
const
proto
::
VarDesc
&
var_desc
:
desc_
->
vars
())
{
vars_
[
var_desc
.
name
()]
.
reset
(
new
VarDesc
(
var_desc
)
);
vars_
[
var_desc
.
name
()]
=
std
::
make_unique
<
VarDesc
>
(
var_desc
);
}
}
for
(
const
proto
::
OpDesc
&
op_desc
:
desc_
->
ops
())
{
for
(
const
proto
::
OpDesc
&
op_desc
:
desc_
->
ops
())
{
...
...
paddle/fluid/framework/custom_operator.cc
浏览文件 @
9f04f2ac
...
@@ -270,8 +270,8 @@ static void RunKernelFunc(
...
@@ -270,8 +270,8 @@ static void RunKernelFunc(
FLAGS_tensor_operants_mode
=
"phi"
;
FLAGS_tensor_operants_mode
=
"phi"
;
if
(
paddle
::
OperantsManager
::
Instance
().
phi_operants
.
get
()
==
nullptr
)
{
if
(
paddle
::
OperantsManager
::
Instance
().
phi_operants
.
get
()
==
nullptr
)
{
paddle
::
OperantsManager
::
Instance
().
phi_operants
.
reset
(
paddle
::
OperantsManager
::
Instance
().
phi_operants
=
new
paddle
::
operants
::
PhiTensorOperants
()
);
std
::
make_unique
<
paddle
::
operants
::
PhiTensorOperants
>
(
);
VLOG
(
4
)
<<
"Initialize phi tensor operants successfully"
;
VLOG
(
4
)
<<
"Initialize phi tensor operants successfully"
;
}
}
...
...
paddle/fluid/framework/details/fast_threaded_ssa_graph_executor.cc
浏览文件 @
9f04f2ac
...
@@ -59,7 +59,7 @@ FastThreadedSSAGraphExecutor::FastThreadedSSAGraphExecutor(
...
@@ -59,7 +59,7 @@ FastThreadedSSAGraphExecutor::FastThreadedSSAGraphExecutor(
}
}
}
}
}
}
pool_
.
reset
(
new
::
ThreadPool
(
strategy
.
num_threads_
)
);
pool_
=
std
::
make_unique
<::
ThreadPool
>
(
strategy
.
num_threads_
);
for
(
auto
&
op
:
ir
::
FilterByNodeWrapper
<
OpHandleBase
>
(
*
graph_
))
{
for
(
auto
&
op
:
ir
::
FilterByNodeWrapper
<
OpHandleBase
>
(
*
graph_
))
{
int
dep
=
static_cast
<
int
>
(
op
->
NotReadyInputSize
());
int
dep
=
static_cast
<
int
>
(
op
->
NotReadyInputSize
());
op_deps_
.
emplace
(
op
,
dep
);
op_deps_
.
emplace
(
op
,
dep
);
...
...
paddle/fluid/framework/details/multi_devices_helper.cc
浏览文件 @
9f04f2ac
...
@@ -208,7 +208,7 @@ std::vector<std::unique_ptr<ir::Graph>> TrySeparateToMultipleSingleDeviceGraphs(
...
@@ -208,7 +208,7 @@ std::vector<std::unique_ptr<ir::Graph>> TrySeparateToMultipleSingleDeviceGraphs(
std
::
vector
<
std
::
unique_ptr
<
ir
::
Graph
>>
graphs
(
place_num
);
std
::
vector
<
std
::
unique_ptr
<
ir
::
Graph
>>
graphs
(
place_num
);
for
(
auto
&
g
:
graphs
)
{
for
(
auto
&
g
:
graphs
)
{
g
.
reset
(
new
ir
::
Graph
(
ProgramDesc
()
));
g
=
std
::
make_unique
<
ir
::
Graph
>
(
ProgramDesc
(
));
g
->
Set
(
kGraphVars
,
new
GraphVars
(
1UL
));
g
->
Set
(
kGraphVars
,
new
GraphVars
(
1UL
));
g
->
Set
(
kGraphDepVars
,
new
GraphDepVars
());
g
->
Set
(
kGraphDepVars
,
new
GraphDepVars
());
}
}
...
...
paddle/fluid/framework/details/parallel_ssa_graph_executor.cc
浏览文件 @
9f04f2ac
...
@@ -30,7 +30,7 @@ static std::vector<std::unique_ptr<ir::Graph>> SeparateMultiDevicesGraph(
...
@@ -30,7 +30,7 @@ static std::vector<std::unique_ptr<ir::Graph>> SeparateMultiDevicesGraph(
graphs
.
reserve
(
place_num
);
graphs
.
reserve
(
place_num
);
for
(
size_t
i
=
0
;
i
<
place_num
;
++
i
)
{
for
(
size_t
i
=
0
;
i
<
place_num
;
++
i
)
{
ProgramDesc
empty
;
ProgramDesc
empty
;
graphs
.
emplace_back
(
std
::
unique_ptr
<
ir
::
Graph
>
(
new
ir
::
Graph
(
empty
)
));
graphs
.
emplace_back
(
std
::
make_unique
<
ir
::
Graph
>
(
empty
));
auto
&
g
=
graphs
.
back
();
auto
&
g
=
graphs
.
back
();
g
->
Set
(
kGraphVars
,
new
GraphVars
(
1UL
));
g
->
Set
(
kGraphVars
,
new
GraphVars
(
1UL
));
g
->
Set
(
kGraphDepVars
,
new
GraphDepVars
);
g
->
Set
(
kGraphDepVars
,
new
GraphDepVars
);
...
...
paddle/fluid/framework/details/reduce_op_handle_test.cc
浏览文件 @
9f04f2ac
...
@@ -71,7 +71,7 @@ struct TestReduceOpHandle {
...
@@ -71,7 +71,7 @@ struct TestReduceOpHandle {
gpu_list_
.
push_back
(
p
);
gpu_list_
.
push_back
(
p
);
ctxs_
.
emplace_back
(
new
phi
::
GPUContext
(
p
));
ctxs_
.
emplace_back
(
new
phi
::
GPUContext
(
p
));
}
}
nccl_ctxs_
.
reset
(
new
platform
::
NCCLContextMap
(
gpu_list_
)
);
nccl_ctxs_
=
std
::
make_unique
<
platform
::
NCCLContextMap
>
(
gpu_list_
);
#else
#else
PADDLE_THROW
(
PADDLE_THROW
(
platform
::
errors
::
PreconditionNotMet
(
"Not compiled with NCLL."
));
platform
::
errors
::
PreconditionNotMet
(
"Not compiled with NCLL."
));
...
...
paddle/fluid/framework/details/threaded_ssa_graph_executor.cc
浏览文件 @
9f04f2ac
...
@@ -282,7 +282,7 @@ void ThreadedSSAGraphExecutor::InsertPendingVar(
...
@@ -282,7 +282,7 @@ void ThreadedSSAGraphExecutor::InsertPendingVar(
}
}
void
ThreadedSSAGraphExecutor
::
PrepareOpDeps
()
{
void
ThreadedSSAGraphExecutor
::
PrepareOpDeps
()
{
op_deps_
.
reset
(
new
OpDependentData
()
);
op_deps_
=
std
::
make_unique
<
OpDependentData
>
(
);
std
::
unordered_map
<
OpHandleBase
*
,
size_t
>
&
pending_ops
=
std
::
unordered_map
<
OpHandleBase
*
,
size_t
>
&
pending_ops
=
op_deps_
->
pending_ops_
;
op_deps_
->
pending_ops_
;
std
::
unordered_set
<
VarHandleBase
*>
&
pending_vars
=
op_deps_
->
pending_vars_
;
std
::
unordered_set
<
VarHandleBase
*>
&
pending_vars
=
op_deps_
->
pending_vars_
;
...
...
paddle/fluid/framework/executor.cc
浏览文件 @
9f04f2ac
...
@@ -346,7 +346,7 @@ void Executor::Run(const ProgramDesc& program,
...
@@ -346,7 +346,7 @@ void Executor::Run(const ProgramDesc& program,
ProgramDesc
*
copy_program
=
const_cast
<
ProgramDesc
*>
(
&
program
);
ProgramDesc
*
copy_program
=
const_cast
<
ProgramDesc
*>
(
&
program
);
std
::
unique_ptr
<
ProgramDesc
>
unique_ptr_of_copy_program
;
std
::
unique_ptr
<
ProgramDesc
>
unique_ptr_of_copy_program
;
if
(
!
has_feed_ops
||
!
has_fetch_ops
)
{
if
(
!
has_feed_ops
||
!
has_fetch_ops
)
{
unique_ptr_of_copy_program
.
reset
(
new
ProgramDesc
(
program
)
);
unique_ptr_of_copy_program
=
std
::
make_unique
<
ProgramDesc
>
(
program
);
copy_program
=
unique_ptr_of_copy_program
.
get
();
copy_program
=
unique_ptr_of_copy_program
.
get
();
}
}
auto
*
global_block
=
copy_program
->
MutableBlock
(
0
);
auto
*
global_block
=
copy_program
->
MutableBlock
(
0
);
...
@@ -494,26 +494,28 @@ void Executor::RunPartialPreparedContext(ExecutorPrepareContext* ctx,
...
@@ -494,26 +494,28 @@ void Executor::RunPartialPreparedContext(ExecutorPrepareContext* ctx,
if
(
platform
::
is_gpu_place
(
place_
))
{
if
(
platform
::
is_gpu_place
(
place_
))
{
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
if
(
IsFastEagerDeletionModeEnabled
())
{
if
(
IsFastEagerDeletionModeEnabled
())
{
gc
.
reset
(
new
UnsafeFastGPUGarbageCollector
(
place_
,
max_memory_size
));
gc
=
std
::
make_unique
<
UnsafeFastGPUGarbageCollector
>
(
place_
,
max_memory_size
);
}
else
{
}
else
{
gc
.
reset
(
new
DefaultStreamGarbageCollector
(
place_
,
max_memory_size
));
gc
=
std
::
make_unique
<
DefaultStreamGarbageCollector
>
(
place_
,
max_memory_size
);
}
}
#else
#else
PADDLE_THROW
(
PADDLE_THROW
(
platform
::
errors
::
Unimplemented
(
"No GPU gc found in CPU/XPU paddle"
));
platform
::
errors
::
Unimplemented
(
"No GPU gc found in CPU/XPU paddle"
));
#endif
#endif
}
else
if
(
platform
::
is_cpu_place
(
place_
))
{
}
else
if
(
platform
::
is_cpu_place
(
place_
))
{
gc
.
reset
(
new
CPUGarbageCollector
(
place_
,
max_memory_size
)
);
gc
=
std
::
make_unique
<
CPUGarbageCollector
>
(
place_
,
max_memory_size
);
}
else
if
(
platform
::
is_xpu_place
(
place_
))
{
}
else
if
(
platform
::
is_xpu_place
(
place_
))
{
#ifdef PADDLE_WITH_XPU
#ifdef PADDLE_WITH_XPU
gc
.
reset
(
new
XPUGarbageCollector
(
place_
,
max_memory_size
)
);
gc
=
std
::
make_unique
<
XPUGarbageCollector
>
(
place_
,
max_memory_size
);
#else
#else
PADDLE_THROW
(
PADDLE_THROW
(
platform
::
errors
::
Unimplemented
(
"No XPU gc found in CPU/GPU paddle"
));
platform
::
errors
::
Unimplemented
(
"No XPU gc found in CPU/GPU paddle"
));
#endif
#endif
}
else
if
(
platform
::
is_ipu_place
(
place_
))
{
}
else
if
(
platform
::
is_ipu_place
(
place_
))
{
#ifdef PADDLE_WITH_IPU
#ifdef PADDLE_WITH_IPU
gc
.
reset
(
new
IPUGarbageCollector
(
place_
,
max_memory_size
)
);
gc
=
std
::
make_unique
<
IPUGarbageCollector
>
(
place_
,
max_memory_size
);
#else
#else
PADDLE_THROW
(
PADDLE_THROW
(
platform
::
errors
::
Unimplemented
(
"No IPU gc found in CPU/IPU paddle"
));
platform
::
errors
::
Unimplemented
(
"No IPU gc found in CPU/IPU paddle"
));
...
@@ -522,12 +524,12 @@ void Executor::RunPartialPreparedContext(ExecutorPrepareContext* ctx,
...
@@ -522,12 +524,12 @@ void Executor::RunPartialPreparedContext(ExecutorPrepareContext* ctx,
#ifdef PADDLE_WITH_CUSTOM_DEVICE
#ifdef PADDLE_WITH_CUSTOM_DEVICE
if
(
IsFastEagerDeletionModeEnabled
())
{
if
(
IsFastEagerDeletionModeEnabled
())
{
VLOG
(
4
)
<<
"Use unsafe fast gc for "
<<
place_
<<
"."
;
VLOG
(
4
)
<<
"Use unsafe fast gc for "
<<
place_
<<
"."
;
gc
.
reset
(
new
CustomDeviceUnsafeFastGarbageCollector
(
place_
,
gc
=
std
::
make_unique
<
CustomDeviceUnsafeFastGarbageCollector
>
(
max_memory_size
)
);
place_
,
max_memory_size
);
}
else
{
}
else
{
VLOG
(
4
)
<<
"Use default stream gc for "
<<
place_
<<
"."
;
VLOG
(
4
)
<<
"Use default stream gc for "
<<
place_
<<
"."
;
gc
.
reset
(
gc
=
std
::
make_unique
<
CustomDefaultStreamGarbageCollector
>
(
new
CustomDefaultStreamGarbageCollector
(
place_
,
max_memory_size
)
);
place_
,
max_memory_size
);
}
}
#else
#else
PADDLE_THROW
(
platform
::
errors
::
Unimplemented
(
"No CustomDevice gc found"
));
PADDLE_THROW
(
platform
::
errors
::
Unimplemented
(
"No CustomDevice gc found"
));
...
...
paddle/fluid/framework/executor_thread_worker.cc
浏览文件 @
9f04f2ac
...
@@ -345,7 +345,7 @@ void ExecutorThreadWorker::SetPlace(const platform::Place& place) {
...
@@ -345,7 +345,7 @@ void ExecutorThreadWorker::SetPlace(const platform::Place& place) {
void
ExecutorThreadWorker
::
SetMainProgram
(
void
ExecutorThreadWorker
::
SetMainProgram
(
const
ProgramDesc
&
main_program_desc
)
{
const
ProgramDesc
&
main_program_desc
)
{
main_program_
.
reset
(
new
ProgramDesc
(
main_program_desc
)
);
main_program_
=
std
::
make_unique
<
ProgramDesc
>
(
main_program_desc
);
}
}
void
ExecutorThreadWorker
::
SetRootScope
(
Scope
*
g_scope
)
{
void
ExecutorThreadWorker
::
SetRootScope
(
Scope
*
g_scope
)
{
...
...
paddle/fluid/framework/garbage_collector.cc
浏览文件 @
9f04f2ac
...
@@ -31,10 +31,10 @@ namespace framework {
...
@@ -31,10 +31,10 @@ namespace framework {
GarbageCollector
::
GarbageCollector
(
const
platform
::
Place
&
place
,
GarbageCollector
::
GarbageCollector
(
const
platform
::
Place
&
place
,
size_t
max_memory_size
)
size_t
max_memory_size
)
:
max_memory_size_
((
std
::
max
)(
max_memory_size
,
static_cast
<
size_t
>
(
1
)))
{
:
max_memory_size_
((
std
::
max
)(
max_memory_size
,
static_cast
<
size_t
>
(
1
)))
{
garbages_
.
reset
(
new
GarbageQueue
()
);
garbages_
=
std
::
make_unique
<
GarbageQueue
>
(
);
dev_ctx_
=
platform
::
DeviceContextPool
::
Instance
().
Get
(
place
);
dev_ctx_
=
platform
::
DeviceContextPool
::
Instance
().
Get
(
place
);
if
(
max_memory_size_
>
1
)
{
if
(
max_memory_size_
>
1
)
{
mutex_
.
reset
(
new
std
::
mutex
()
);
mutex_
=
std
::
make_unique
<
std
::
mutex
>
(
);
}
}
}
}
...
@@ -95,8 +95,8 @@ StreamGarbageCollector::StreamGarbageCollector(const platform::CUDAPlace &place,
...
@@ -95,8 +95,8 @@ StreamGarbageCollector::StreamGarbageCollector(const platform::CUDAPlace &place,
PADDLE_ENFORCE_GPU_SUCCESS
(
hipStreamCreate
(
&
stream_
));
PADDLE_ENFORCE_GPU_SUCCESS
(
hipStreamCreate
(
&
stream_
));
#else
#else
PADDLE_ENFORCE_GPU_SUCCESS
(
cudaStreamCreate
(
&
stream_
));
PADDLE_ENFORCE_GPU_SUCCESS
(
cudaStreamCreate
(
&
stream_
));
callback_manager_
.
reset
(
callback_manager_
=
new
platform
::
StreamCallbackManager
<
gpuStream_t
>
(
stream_
)
);
std
::
make_unique
<
platform
::
StreamCallbackManager
<
gpuStream_t
>>
(
stream_
);
#endif
#endif
}
}
...
@@ -155,9 +155,9 @@ CustomStreamGarbageCollector::CustomStreamGarbageCollector(
...
@@ -155,9 +155,9 @@ CustomStreamGarbageCollector::CustomStreamGarbageCollector(
const
platform
::
CustomPlace
&
place
,
size_t
max_memory_size
)
const
platform
::
CustomPlace
&
place
,
size_t
max_memory_size
)
:
GarbageCollector
(
place
,
max_memory_size
)
{
:
GarbageCollector
(
place
,
max_memory_size
)
{
phi
::
DeviceGuard
guard
(
place
);
phi
::
DeviceGuard
guard
(
place
);
stream_
.
reset
(
new
phi
::
stream
::
Stream
);
stream_
=
std
::
make_unique
<
phi
::
stream
::
Stream
>
(
);
stream_
->
Init
(
place
);
stream_
->
Init
(
place
);
callback_manager_
.
reset
(
new
phi
::
CallbackManager
(
stream_
.
get
()
));
callback_manager_
=
std
::
make_unique
<
phi
::
CallbackManager
>
(
stream_
.
get
(
));
}
}
CustomStreamGarbageCollector
::~
CustomStreamGarbageCollector
()
{
CustomStreamGarbageCollector
::~
CustomStreamGarbageCollector
()
{
...
...
paddle/fluid/framework/garbage_collector.h
浏览文件 @
9f04f2ac
...
@@ -204,7 +204,7 @@ void GarbageCollector::Add(Container &&objs, Callback &&callback) {
...
@@ -204,7 +204,7 @@ void GarbageCollector::Add(Container &&objs, Callback &&callback) {
if
(
cur_memory_size_
>=
max_memory_size_
)
{
if
(
cur_memory_size_
>=
max_memory_size_
)
{
cur_memory_size_
=
0
;
cur_memory_size_
=
0
;
garbage_queue
=
garbages_
.
release
();
garbage_queue
=
garbages_
.
release
();
garbages_
.
reset
(
new
GarbageQueue
()
);
garbages_
=
std
::
make_unique
<
GarbageQueue
>
(
);
}
}
}
}
...
...
paddle/fluid/framework/ir/memory_optimize_pass/test_reference_count_pass_last_lived_ops.cc
浏览文件 @
9f04f2ac
...
@@ -98,14 +98,14 @@ class ReferenceCountPassTestHelper {
...
@@ -98,14 +98,14 @@ class ReferenceCountPassTestHelper {
details
::
ExecutionStrategy
exec_strategy
;
details
::
ExecutionStrategy
exec_strategy
;
exec_strategy
.
use_device_
=
use_cuda
?
p
::
kCUDA
:
p
::
kCPU
;
exec_strategy
.
use_device_
=
use_cuda
?
p
::
kCUDA
:
p
::
kCPU
;
executor_
.
reset
(
new
ParallelExecutor
(
CreatePlaces
(
1
,
use_cuda
),
executor_
=
std
::
make_unique
<
ParallelExecutor
>
(
CreatePlaces
(
1
,
use_cuda
),
{}
,
std
::
vector
<
std
::
string
>
()
,
""
,
""
,
&
scope_
,
&
scope_
,
{}
,
std
::
vector
<
Scope
*>
()
,
exec_strategy
,
exec_strategy
,
build_strategy
,
build_strategy
,
&
graph_
)
);
&
graph_
);
auto
ref_cnt_pass
=
auto
ref_cnt_pass
=
ir
::
PassRegistry
::
Instance
().
Get
(
"reference_count_pass"
);
ir
::
PassRegistry
::
Instance
().
Get
(
"reference_count_pass"
);
...
...
paddle/fluid/framework/ir/mkldnn/compute_propagate_scales_mkldnn_pass_tester.cc
浏览文件 @
9f04f2ac
...
@@ -56,8 +56,8 @@ static const std::initializer_list<std::string> rnn_variable_names{
...
@@ -56,8 +56,8 @@ static const std::initializer_list<std::string> rnn_variable_names{
class
ComputePropagateScalesMkldnnPassTest
:
public
testing
::
Test
{
class
ComputePropagateScalesMkldnnPassTest
:
public
testing
::
Test
{
public:
public:
ComputePropagateScalesMkldnnPassTest
()
{
// NOLINT
ComputePropagateScalesMkldnnPassTest
()
{
pass
.
reset
(
new
ComputePropagateScalesMkldnnPass
()
);
pass
=
std
::
make_unique
<
ComputePropagateScalesMkldnnPass
>
(
);
}
}
std
::
vector
<
float
>
GetScales
(
phi
::
DenseTensor
*
tensor
,
int
axis
)
const
{
std
::
vector
<
float
>
GetScales
(
phi
::
DenseTensor
*
tensor
,
int
axis
)
const
{
...
...
paddle/fluid/framework/ir/pass_test.cc
浏览文件 @
9f04f2ac
...
@@ -69,7 +69,7 @@ TEST(PassTest, TestPassAttrCheck) {
...
@@ -69,7 +69,7 @@ TEST(PassTest, TestPassAttrCheck) {
"test_pass > is not set"
)
!=
exception
.
npos
);
"test_pass > is not set"
)
!=
exception
.
npos
);
int
val
=
1
;
int
val
=
1
;
graph
.
reset
(
new
Graph
(
prog
)
);
graph
=
std
::
make_unique
<
Graph
>
(
prog
);
pass
->
SetNotOwned
<
int
>
(
"test_pass_attr"
,
&
val
);
pass
->
SetNotOwned
<
int
>
(
"test_pass_attr"
,
&
val
);
for
(
std
::
string
try_type
:
{
"bool"
,
"const int"
,
"std::string"
})
{
for
(
std
::
string
try_type
:
{
"bool"
,
"const int"
,
"std::string"
})
{
...
@@ -99,7 +99,7 @@ TEST(PassTest, TestPassAttrCheck) {
...
@@ -99,7 +99,7 @@ TEST(PassTest, TestPassAttrCheck) {
"Required atrribute test_graph_attr for graph is not set"
)
!=
"Required atrribute test_graph_attr for graph is not set"
)
!=
exception
.
npos
);
exception
.
npos
);
graph
.
reset
(
new
Graph
(
prog
)
);
graph
=
std
::
make_unique
<
Graph
>
(
prog
);
graph
->
Set
<
int
>
(
"test_graph_attr"
,
new
int
);
graph
->
Set
<
int
>
(
"test_graph_attr"
,
new
int
);
graph
->
Get
<
int
>
(
"test_graph_attr"
)
=
1
;
graph
->
Get
<
int
>
(
"test_graph_attr"
)
=
1
;
graph
.
reset
(
pass
->
Apply
(
graph
.
release
()));
graph
.
reset
(
pass
->
Apply
(
graph
.
release
()));
...
@@ -107,13 +107,13 @@ TEST(PassTest, TestPassAttrCheck) {
...
@@ -107,13 +107,13 @@ TEST(PassTest, TestPassAttrCheck) {
ASSERT_EQ
(
graph
->
Get
<
int
>
(
"copy_test_graph_attr"
),
2
);
ASSERT_EQ
(
graph
->
Get
<
int
>
(
"copy_test_graph_attr"
),
2
);
// Allow apply more than once.
// Allow apply more than once.
graph
.
reset
(
new
Graph
(
prog
)
);
graph
=
std
::
make_unique
<
Graph
>
(
prog
);
graph
->
Set
<
int
>
(
"test_graph_attr"
,
new
int
);
graph
->
Set
<
int
>
(
"test_graph_attr"
,
new
int
);
graph
.
reset
(
pass
->
Apply
(
graph
.
release
()));
graph
.
reset
(
pass
->
Apply
(
graph
.
release
()));
pass
=
PassRegistry
::
Instance
().
Get
(
"test_pass"
);
pass
=
PassRegistry
::
Instance
().
Get
(
"test_pass"
);
pass
->
SetNotOwned
<
int
>
(
"test_pass_attr"
,
&
val
);
pass
->
SetNotOwned
<
int
>
(
"test_pass_attr"
,
&
val
);
graph
.
reset
(
new
Graph
(
prog
)
);
graph
=
std
::
make_unique
<
Graph
>
(
prog
);
BuildCircleGraph
(
graph
.
get
());
BuildCircleGraph
(
graph
.
get
());
graph
->
Set
<
int
>
(
"test_graph_attr"
,
new
int
);
graph
->
Set
<
int
>
(
"test_graph_attr"
,
new
int
);
graph
->
Get
<
int
>
(
"test_graph_attr"
)
=
2
;
graph
->
Get
<
int
>
(
"test_graph_attr"
)
=
2
;
...
@@ -154,7 +154,7 @@ TEST(PassTest, TestPassAttrCheckConvertAllBlocks) {
...
@@ -154,7 +154,7 @@ TEST(PassTest, TestPassAttrCheckConvertAllBlocks) {
"test_pass > is not set"
)
!=
exception
.
npos
);
"test_pass > is not set"
)
!=
exception
.
npos
);
int
val
=
1
;
int
val
=
1
;
graph
.
reset
(
new
Graph
(
prog
)
);
graph
=
std
::
make_unique
<
Graph
>
(
prog
);
pass
->
SetNotOwned
<
int
>
(
"test_pass_attr"
,
&
val
);
pass
->
SetNotOwned
<
int
>
(
"test_pass_attr"
,
&
val
);
for
(
std
::
string
try_type
:
{
"bool"
,
"const int"
,
"std::string"
})
{
for
(
std
::
string
try_type
:
{
"bool"
,
"const int"
,
"std::string"
})
{
...
@@ -184,7 +184,7 @@ TEST(PassTest, TestPassAttrCheckConvertAllBlocks) {
...
@@ -184,7 +184,7 @@ TEST(PassTest, TestPassAttrCheckConvertAllBlocks) {
"Required atrribute test_graph_attr for graph is not set"
)
!=
"Required atrribute test_graph_attr for graph is not set"
)
!=
exception
.
npos
);
exception
.
npos
);
graph
.
reset
(
new
Graph
(
prog
)
);
graph
=
std
::
make_unique
<
Graph
>
(
prog
);
graph
->
Set
<
int
>
(
"test_graph_attr"
,
new
int
);
graph
->
Set
<
int
>
(
"test_graph_attr"
,
new
int
);
graph
->
Get
<
int
>
(
"test_graph_attr"
)
=
1
;
graph
->
Get
<
int
>
(
"test_graph_attr"
)
=
1
;
graph
.
reset
(
pass
->
Apply
(
graph
.
release
()));
graph
.
reset
(
pass
->
Apply
(
graph
.
release
()));
...
@@ -192,13 +192,13 @@ TEST(PassTest, TestPassAttrCheckConvertAllBlocks) {
...
@@ -192,13 +192,13 @@ TEST(PassTest, TestPassAttrCheckConvertAllBlocks) {
ASSERT_EQ
(
graph
->
Get
<
int
>
(
"copy_test_graph_attr"
),
2
);
ASSERT_EQ
(
graph
->
Get
<
int
>
(
"copy_test_graph_attr"
),
2
);
// Allow apply more than once.
// Allow apply more than once.
graph
.
reset
(
new
Graph
(
prog
)
);
graph
=
std
::
make_unique
<
Graph
>
(
prog
);
graph
->
Set
<
int
>
(
"test_graph_attr"
,
new
int
);
graph
->
Set
<
int
>
(
"test_graph_attr"
,
new
int
);
graph
.
reset
(
pass
->
Apply
(
graph
.
release
()));
graph
.
reset
(
pass
->
Apply
(
graph
.
release
()));
pass
=
PassRegistry
::
Instance
().
Get
(
"test_pass"
);
pass
=
PassRegistry
::
Instance
().
Get
(
"test_pass"
);
pass
->
SetNotOwned
<
int
>
(
"test_pass_attr"
,
&
val
);
pass
->
SetNotOwned
<
int
>
(
"test_pass_attr"
,
&
val
);
graph
.
reset
(
new
Graph
(
prog
)
);
graph
=
std
::
make_unique
<
Graph
>
(
prog
);
BuildCircleGraph
(
graph
.
get
());
BuildCircleGraph
(
graph
.
get
());
graph
->
Set
<
int
>
(
"test_graph_attr"
,
new
int
);
graph
->
Set
<
int
>
(
"test_graph_attr"
,
new
int
);
graph
->
Get
<
int
>
(
"test_graph_attr"
)
=
2
;
graph
->
Get
<
int
>
(
"test_graph_attr"
)
=
2
;
...
...
paddle/fluid/framework/operator.cc
浏览文件 @
9f04f2ac
...
@@ -1665,7 +1665,8 @@ void OperatorWithKernel::RunImpl(const Scope& scope,
...
@@ -1665,7 +1665,8 @@ void OperatorWithKernel::RunImpl(const Scope& scope,
if
(
runtime_ctx_
.
get
()
==
nullptr
||
pre_scope_
!=
cur_scope
)
{
if
(
runtime_ctx_
.
get
()
==
nullptr
||
pre_scope_
!=
cur_scope
)
{
std
::
lock_guard
<
std
::
mutex
>
lock
(
cache_update_mutex_
);
std
::
lock_guard
<
std
::
mutex
>
lock
(
cache_update_mutex_
);
if
(
runtime_ctx_
.
get
()
==
nullptr
||
pre_scope_
!=
cur_scope
)
{
if
(
runtime_ctx_
.
get
()
==
nullptr
||
pre_scope_
!=
cur_scope
)
{
runtime_ctx_
.
reset
(
new
RuntimeContext
(
Inputs
(),
Outputs
(),
scope
));
runtime_ctx_
=
std
::
make_unique
<
RuntimeContext
>
(
Inputs
(),
Outputs
(),
scope
);
pre_scope_
=
cur_scope
;
pre_scope_
=
cur_scope
;
}
}
}
}
...
@@ -1702,16 +1703,17 @@ void OperatorWithKernel::RunImpl(const Scope& scope,
...
@@ -1702,16 +1703,17 @@ void OperatorWithKernel::RunImpl(const Scope& scope,
if
(
phi
::
KernelFactory
::
Instance
().
HasCompatiblePhiKernel
(
type_
))
{
if
(
phi
::
KernelFactory
::
Instance
().
HasCompatiblePhiKernel
(
type_
))
{
if
(
kernel_signature_
==
nullptr
||
phi_kernel_
==
nullptr
)
{
if
(
kernel_signature_
==
nullptr
||
phi_kernel_
==
nullptr
)
{
if
(
phi
::
KernelFactory
::
Instance
().
HasStructuredKernel
(
type_
))
{
if
(
phi
::
KernelFactory
::
Instance
().
HasStructuredKernel
(
type_
))
{
kernel_signature_
.
reset
(
new
phi
::
KernelSignature
(
type_
.
c_str
()));
kernel_signature_
=
std
::
make_unique
<
phi
::
KernelSignature
>
(
type_
.
c_str
());
}
else
{
}
else
{
kernel_signature_
.
reset
(
new
phi
::
KernelSignature
(
kernel_signature_
=
std
::
make_unique
<
phi
::
KernelSignature
>
(
std
::
move
(
GetExpectedPhiKernelArgs
(
exe_ctx
)))
)
;
std
::
move
(
GetExpectedPhiKernelArgs
(
exe_ctx
)));
}
}
VLOG
(
6
)
<<
*
kernel_signature_
.
get
();
VLOG
(
6
)
<<
*
kernel_signature_
.
get
();
phi_kernel_name
=
kernel_signature_
->
name
;
phi_kernel_name
=
kernel_signature_
->
name
;
kernel_type_
.
reset
(
kernel_type_
=
std
::
make_unique
<
OpKernelType
>
(
new
OpKernelType
(
std
::
move
(
InnerGetExpectedKernelType
(
exe_ctx
)
)));
std
::
move
(
InnerGetExpectedKernelType
(
exe_ctx
)));
dev_ctx
=
pool
.
Get
(
kernel_type_
->
place_
);
dev_ctx
=
pool
.
Get
(
kernel_type_
->
place_
);
// NOTE(Liu-xiandong): The register kernel used KP have library_type[KP],
// NOTE(Liu-xiandong): The register kernel used KP have library_type[KP],
// But the default library_type is Plain, so we need to modify the
// But the default library_type is Plain, so we need to modify the
...
@@ -1754,9 +1756,9 @@ void OperatorWithKernel::RunImpl(const Scope& scope,
...
@@ -1754,9 +1756,9 @@ void OperatorWithKernel::RunImpl(const Scope& scope,
}
}
#endif
#endif
phi_kernel_key
=
TransOpKernelTypeToPhiKernelKey
(
*
kernel_type_
.
get
());
phi_kernel_key
=
TransOpKernelTypeToPhiKernelKey
(
*
kernel_type_
.
get
());
phi_kernel_
.
reset
(
phi_kernel_
=
std
::
make_unique
<
phi
::
Kernel
>
(
new
phi
::
Kernel
(
phi
::
KernelFactory
::
Instance
().
SelectKernel
(
phi
::
KernelFactory
::
Instance
().
SelectKernel
(
phi_kernel_name
,
phi_kernel_name
,
phi_kernel_key
)
));
phi_kernel_key
));
if
(
phi_kernel_
->
IsValid
())
{
if
(
phi_kernel_
->
IsValid
())
{
VLOG
(
6
)
<<
"Static graph mode ChoosePhiKernel - kernel name: "
VLOG
(
6
)
<<
"Static graph mode ChoosePhiKernel - kernel name: "
...
@@ -1898,9 +1900,9 @@ void OperatorWithKernel::RunImpl(const Scope& scope,
...
@@ -1898,9 +1900,9 @@ void OperatorWithKernel::RunImpl(const Scope& scope,
VLOG
(
3
)
<<
"fluid in black list: "
<<
phi_kernel_name
;
VLOG
(
3
)
<<
"fluid in black list: "
<<
phi_kernel_name
;
}
}
phi_cpu_kernel_key
=
FallBackToCpu
(
phi_kernel_key
,
*
this
);
phi_cpu_kernel_key
=
FallBackToCpu
(
phi_kernel_key
,
*
this
);
phi_kernel_
.
reset
(
phi_kernel_
=
std
::
make_unique
<
phi
::
Kernel
>
(
new
phi
::
Kernel
(
phi
::
KernelFactory
::
Instance
().
SelectKernel
(
phi
::
KernelFactory
::
Instance
().
SelectKernel
(
phi_kernel_name
,
phi_kernel_name
,
phi_cpu_kernel_key
)
));
phi_cpu_kernel_key
));
dev_ctx
=
pool
.
Get
(
platform
::
CPUPlace
());
dev_ctx
=
pool
.
Get
(
platform
::
CPUPlace
());
if
(
phi_kernel_
->
IsValid
())
{
if
(
phi_kernel_
->
IsValid
())
{
...
@@ -1996,11 +1998,11 @@ void OperatorWithKernel::RunImpl(const Scope& scope,
...
@@ -1996,11 +1998,11 @@ void OperatorWithKernel::RunImpl(const Scope& scope,
}
}
}
}
impl_
.
reset
(
impl_
=
std
::
make_unique
<
CacheImpl
>
(
new
CacheImpl
(
new
phi
::
KernelContext
(),
new
phi
::
KernelContext
(),
new
RuntimeInferShapeContext
(
*
this
,
*
runtime_ctx
),
new
RuntimeInferShapeContext
(
*
this
,
*
runtime_ctx
),
tensors
,
tensors
,
HasAttr
(
CacheImpl
::
kNotAllowInferShapeCahce
)
));
HasAttr
(
CacheImpl
::
kNotAllowInferShapeCahce
));
BuildPhiKernelContext
(
*
runtime_ctx
,
dev_ctx
,
impl_
->
getKernelContext
());
BuildPhiKernelContext
(
*
runtime_ctx
,
dev_ctx
,
impl_
->
getKernelContext
());
(
*
phi_kernel_
)(
impl_
->
getKernelContext
());
(
*
phi_kernel_
)(
impl_
->
getKernelContext
());
}
else
{
}
else
{
...
@@ -2211,19 +2213,20 @@ phi::KernelKey OperatorWithKernel::ChoosePhiKernel(
...
@@ -2211,19 +2213,20 @@ phi::KernelKey OperatorWithKernel::ChoosePhiKernel(
const
ExecutionContext
&
ctx
)
const
{
const
ExecutionContext
&
ctx
)
const
{
std
::
string
phi_kernel_name
;
std
::
string
phi_kernel_name
;
if
(
phi
::
KernelFactory
::
Instance
().
HasStructuredKernel
(
type_
))
{
if
(
phi
::
KernelFactory
::
Instance
().
HasStructuredKernel
(
type_
))
{
kernel_signature_
.
reset
(
new
phi
::
KernelSignature
(
type_
.
c_str
()
));
kernel_signature_
=
std
::
make_unique
<
phi
::
KernelSignature
>
(
type_
.
c_str
(
));
}
else
{
}
else
{
kernel_signature_
.
reset
(
kernel_signature_
=
std
::
make_unique
<
phi
::
KernelSignature
>
(
new
phi
::
KernelSignature
(
std
::
move
(
GetExpectedPhiKernelArgs
(
ctx
)
)));
std
::
move
(
GetExpectedPhiKernelArgs
(
ctx
)));
}
}
VLOG
(
6
)
<<
*
kernel_signature_
.
get
();
VLOG
(
6
)
<<
*
kernel_signature_
.
get
();
phi_kernel_name
=
kernel_signature_
->
name
;
phi_kernel_name
=
kernel_signature_
->
name
;
kernel_type_
.
reset
(
kernel_type_
=
std
::
make_unique
<
OpKernelType
>
(
new
OpKernelType
(
std
::
move
(
InnerGetExpectedKernelType
(
ctx
)
)));
std
::
move
(
InnerGetExpectedKernelType
(
ctx
)));
auto
phi_kernel_key
=
TransOpKernelTypeToPhiKernelKey
(
*
kernel_type_
.
get
());
auto
phi_kernel_key
=
TransOpKernelTypeToPhiKernelKey
(
*
kernel_type_
.
get
());
phi_kernel_
.
reset
(
new
phi
::
Kernel
(
phi
::
KernelFactory
::
Instance
().
SelectKernel
(
phi_kernel_
=
phi_kernel_name
,
phi_kernel_key
)));
std
::
make_unique
<
phi
::
Kernel
>
(
phi
::
KernelFactory
::
Instance
().
SelectKernel
(
phi_kernel_name
,
phi_kernel_key
));
if
(
phi_kernel_
->
IsValid
())
{
if
(
phi_kernel_
->
IsValid
())
{
VLOG
(
6
)
<<
"Static graph mode ChoosePhiKernel - kernel name: "
VLOG
(
6
)
<<
"Static graph mode ChoosePhiKernel - kernel name: "
...
@@ -2356,8 +2359,8 @@ void OperatorWithKernel::ChooseKernel(const ExecutionContext& ctx) const {
...
@@ -2356,8 +2359,8 @@ void OperatorWithKernel::ChooseKernel(const ExecutionContext& ctx) const {
std
::
lock_guard
<
std
::
mutex
>
lock
(
cache_update_mutex_
);
std
::
lock_guard
<
std
::
mutex
>
lock
(
cache_update_mutex_
);
if
(
kernel_type_
.
get
()
==
nullptr
||
kernel_func_
.
get
()
==
nullptr
)
{
if
(
kernel_type_
.
get
()
==
nullptr
||
kernel_func_
.
get
()
==
nullptr
)
{
kernel_type_
.
reset
(
new
OpKernelType
(
expected_kernel_key
)
);
kernel_type_
=
std
::
make_unique
<
OpKernelType
>
(
expected_kernel_key
);
kernel_func_
.
reset
(
new
OpKernelFunc
(
kernel_iter
->
second
)
);
kernel_func_
=
std
::
make_unique
<
OpKernelFunc
>
(
kernel_iter
->
second
);
}
}
}
}
...
@@ -3004,14 +3007,14 @@ phi::KernelSignature OperatorWithKernel::GetExpectedPhiKernelArgs(
...
@@ -3004,14 +3007,14 @@ phi::KernelSignature OperatorWithKernel::GetExpectedPhiKernelArgs(
if
(
arg_map_fn_
==
nullptr
)
{
if
(
arg_map_fn_
==
nullptr
)
{
auto
*
arg_map_fn
=
phi
::
OpUtilsMap
::
Instance
().
GetArgumentMappingFn
(
type_
);
auto
*
arg_map_fn
=
phi
::
OpUtilsMap
::
Instance
().
GetArgumentMappingFn
(
type_
);
if
(
arg_map_fn
)
{
if
(
arg_map_fn
)
{
arg_map_fn_
.
reset
(
new
phi
::
ArgumentMappingFn
(
*
arg_map_fn
)
);
arg_map_fn_
=
std
::
make_unique
<
phi
::
ArgumentMappingFn
>
(
*
arg_map_fn
);
}
else
{
}
else
{
auto
func
=
auto
func
=
[
this
](
[
this
](
const
phi
::
ArgumentMappingContext
&
ctx
)
->
phi
::
KernelSignature
{
const
phi
::
ArgumentMappingContext
&
ctx
)
->
phi
::
KernelSignature
{
return
phi
::
DefaultKernelSignatureMap
::
Instance
().
Get
(
type_
);
return
phi
::
DefaultKernelSignatureMap
::
Instance
().
Get
(
type_
);
};
};
arg_map_fn_
.
reset
(
new
phi
::
ArgumentMappingFn
(
func
)
);
arg_map_fn_
=
std
::
make_unique
<
phi
::
ArgumentMappingFn
>
(
func
);
}
}
}
}
return
(
*
arg_map_fn_
)(
arg_mapping_ctx
);
return
(
*
arg_map_fn_
)(
arg_mapping_ctx
);
...
...
paddle/fluid/framework/parallel_executor.cc
浏览文件 @
9f04f2ac
...
@@ -514,9 +514,10 @@ ir::Graph *ParallelExecutorPrivate::ApplyMemoryOptimizePass(ir::Graph *graph) {
...
@@ -514,9 +514,10 @@ ir::Graph *ParallelExecutorPrivate::ApplyMemoryOptimizePass(ir::Graph *graph) {
if
(
platform
::
is_gpu_place
(
place
))
{
if
(
platform
::
is_gpu_place
(
place
))
{
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
if
(
IsFastEagerDeletionModeEnabled
())
{
if
(
IsFastEagerDeletionModeEnabled
())
{
gc
.
reset
(
new
UnsafeFastGPUGarbageCollector
(
place
,
max_memory_size
));
gc
=
std
::
make_unique
<
UnsafeFastGPUGarbageCollector
>
(
place
,
max_memory_size
);
}
else
{
}
else
{
gc
.
reset
(
new
StreamGarbageCollector
(
place
,
max_memory_size
)
);
gc
=
std
::
make_unique
<
StreamGarbageCollector
>
(
place
,
max_memory_size
);
}
}
VLOG
(
10
)
<<
"Created "
<<
i
<<
"-th GarbageCollector at "
<<
place
;
VLOG
(
10
)
<<
"Created "
<<
i
<<
"-th GarbageCollector at "
<<
place
;
#else
#else
...
@@ -526,7 +527,7 @@ ir::Graph *ParallelExecutorPrivate::ApplyMemoryOptimizePass(ir::Graph *graph) {
...
@@ -526,7 +527,7 @@ ir::Graph *ParallelExecutorPrivate::ApplyMemoryOptimizePass(ir::Graph *graph) {
#endif
#endif
}
else
if
(
platform
::
is_xpu_place
(
place
))
{
}
else
if
(
platform
::
is_xpu_place
(
place
))
{
#if defined(PADDLE_WITH_XPU)
#if defined(PADDLE_WITH_XPU)
gc
.
reset
(
new
XPUGarbageCollector
(
place
,
max_memory_size
)
);
gc
=
std
::
make_unique
<
XPUGarbageCollector
>
(
place
,
max_memory_size
);
VLOG
(
10
)
<<
"Created "
<<
i
<<
"-th GarbageCollector at "
<<
place
;
VLOG
(
10
)
<<
"Created "
<<
i
<<
"-th GarbageCollector at "
<<
place
;
#else
#else
PADDLE_THROW
(
platform
::
errors
::
PermissionDenied
(
PADDLE_THROW
(
platform
::
errors
::
PermissionDenied
(
...
@@ -535,7 +536,7 @@ ir::Graph *ParallelExecutorPrivate::ApplyMemoryOptimizePass(ir::Graph *graph) {
...
@@ -535,7 +536,7 @@ ir::Graph *ParallelExecutorPrivate::ApplyMemoryOptimizePass(ir::Graph *graph) {
#endif
#endif
}
else
if
(
platform
::
is_ipu_place
(
place
))
{
}
else
if
(
platform
::
is_ipu_place
(
place
))
{
#if defined(PADDLE_WITH_IPU)
#if defined(PADDLE_WITH_IPU)
gc
.
reset
(
new
IPUGarbageCollector
(
place
,
max_memory_size
)
);
gc
=
std
::
make_unique
<
IPUGarbageCollector
>
(
place
,
max_memory_size
);
VLOG
(
10
)
<<
"Created "
<<
i
<<
"-th GarbageCollector at "
<<
place
;
VLOG
(
10
)
<<
"Created "
<<
i
<<
"-th GarbageCollector at "
<<
place
;
#else
#else
PADDLE_THROW
(
platform
::
errors
::
PermissionDenied
(
PADDLE_THROW
(
platform
::
errors
::
PermissionDenied
(
...
@@ -545,10 +546,11 @@ ir::Graph *ParallelExecutorPrivate::ApplyMemoryOptimizePass(ir::Graph *graph) {
...
@@ -545,10 +546,11 @@ ir::Graph *ParallelExecutorPrivate::ApplyMemoryOptimizePass(ir::Graph *graph) {
}
else
if
(
platform
::
is_custom_place
(
place
))
{
}
else
if
(
platform
::
is_custom_place
(
place
))
{
#if defined(PADDLE_WITH_CUSTOM_DEVICE)
#if defined(PADDLE_WITH_CUSTOM_DEVICE)
if
(
IsFastEagerDeletionModeEnabled
())
{
if
(
IsFastEagerDeletionModeEnabled
())
{
gc
.
reset
(
gc
=
std
::
make_unique
<
CustomDeviceUnsafeFastGarbageCollector
>
(
new
CustomDeviceUnsafeFastGarbageCollector
(
place
,
max_memory_size
)
);
place
,
max_memory_size
);
}
else
{
}
else
{
gc
.
reset
(
new
CustomStreamGarbageCollector
(
place
,
max_memory_size
));
gc
=
std
::
make_unique
<
CustomStreamGarbageCollector
>
(
place
,
max_memory_size
);
}
}
VLOG
(
10
)
<<
"Created "
<<
i
<<
"-th GarbageCollector at "
<<
place
;
VLOG
(
10
)
<<
"Created "
<<
i
<<
"-th GarbageCollector at "
<<
place
;
#else
#else
...
@@ -558,7 +560,7 @@ ir::Graph *ParallelExecutorPrivate::ApplyMemoryOptimizePass(ir::Graph *graph) {
...
@@ -558,7 +560,7 @@ ir::Graph *ParallelExecutorPrivate::ApplyMemoryOptimizePass(ir::Graph *graph) {
"Please recompile or reinstall Paddle with CustomDevice support."
));
"Please recompile or reinstall Paddle with CustomDevice support."
));
#endif
#endif
}
else
if
(
platform
::
is_cpu_place
(
place
))
{
}
else
if
(
platform
::
is_cpu_place
(
place
))
{
gc
.
reset
(
new
CPUGarbageCollector
(
place
,
max_memory_size
)
);
gc
=
std
::
make_unique
<
CPUGarbageCollector
>
(
place
,
max_memory_size
);
VLOG
(
10
)
<<
"Created GarbageCollector at "
<<
place
;
VLOG
(
10
)
<<
"Created GarbageCollector at "
<<
place
;
}
else
{
}
else
{
PADDLE_THROW
(
platform
::
errors
::
PreconditionNotMet
(
PADDLE_THROW
(
platform
::
errors
::
PreconditionNotMet
(
...
@@ -726,13 +728,14 @@ ParallelExecutor::ParallelExecutor(const std::vector<platform::Place> &places,
...
@@ -726,13 +728,14 @@ ParallelExecutor::ParallelExecutor(const std::vector<platform::Place> &places,
VLOG
(
3
)
<<
"use ScopeBufferedSSAGraphExecutor"
;
VLOG
(
3
)
<<
"use ScopeBufferedSSAGraphExecutor"
;
if
(
!
member_
->
build_strategy_
.
async_mode_
)
{
if
(
!
member_
->
build_strategy_
.
async_mode_
)
{
member_
->
executor_
.
reset
(
new
details
::
ScopeBufferedSSAGraphExecutor
(
member_
->
executor_
=
std
::
make_unique
<
details
::
ScopeBufferedSSAGraphExecutor
>
(
exec_strategy
,
exec_strategy
,
member_
->
local_scopes_
,
member_
->
local_scopes_
,
member_
->
local_exec_scopes_
,
member_
->
local_exec_scopes_
,
std
::
move
(
var_infos
),
std
::
move
(
var_infos
),
member_
->
places_
,
member_
->
places_
,
std
::
move
(
member_
->
executor_
)
));
std
::
move
(
member_
->
executor_
));
}
}
ResetOpHandleScopeMapOfGraphs
(
final_graphs
,
scope_map
);
ResetOpHandleScopeMapOfGraphs
(
final_graphs
,
scope_map
);
...
@@ -1646,12 +1649,12 @@ std::vector<ir::Graph *> ParallelExecutor::CreateSSAGraphExecutor(
...
@@ -1646,12 +1649,12 @@ std::vector<ir::Graph *> ParallelExecutor::CreateSSAGraphExecutor(
if
(
member_
->
build_strategy_
.
async_mode_
)
{
if
(
member_
->
build_strategy_
.
async_mode_
)
{
VLOG
(
3
)
<<
"use AsyncSSAGraphExecutor"
;
VLOG
(
3
)
<<
"use AsyncSSAGraphExecutor"
;
member_
->
executor_
.
reset
(
member_
->
executor_
=
std
::
make_unique
<
details
::
AsyncSSAGraphExecutor
>
(
new
details
::
AsyncSSAGraphExecutor
(
exec_strategy
,
exec_strategy
,
member_
->
local_scopes_
,
member_
->
local_scopes_
,
member_
->
local_exec_scopes_
,
member_
->
local_exec_scopes_
,
member_
->
places_
,
member_
->
places_
,
*
async_graphs
)
);
*
async_graphs
);
final_graphs
=
*
async_graphs
;
final_graphs
=
*
async_graphs
;
}
else
if
(
member_
->
build_strategy_
.
enable_parallel_graph_
)
{
}
else
if
(
member_
->
build_strategy_
.
enable_parallel_graph_
)
{
VLOG
(
3
)
<<
"use ParallelSSAGraphExecutor"
;
VLOG
(
3
)
<<
"use ParallelSSAGraphExecutor"
;
...
@@ -1713,22 +1716,24 @@ std::vector<ir::Graph *> ParallelExecutor::CreateSSAGraphExecutor(
...
@@ -1713,22 +1716,24 @@ std::vector<ir::Graph *> ParallelExecutor::CreateSSAGraphExecutor(
"network. It is automatically turned to drop_last=True."
;
"network. It is automatically turned to drop_last=True."
;
if
(
exec_strategy
.
type_
==
ExecutionStrategy
::
kDefault
)
{
if
(
exec_strategy
.
type_
==
ExecutionStrategy
::
kDefault
)
{
VLOG
(
3
)
<<
"use ThreadedSSAGraphExecutor"
;
VLOG
(
3
)
<<
"use ThreadedSSAGraphExecutor"
;
member_
->
executor_
.
reset
(
member_
->
executor_
=
new
details
::
ThreadedSSAGraphExecutor
(
exec_strategy
,
std
::
make_unique
<
details
::
ThreadedSSAGraphExecutor
>
(
exec_strategy
,
member_
->
local_scopes_
,
member_
->
local_scopes_
,
member_
->
local_exec_scopes_
,
member_
->
local_exec_scopes_
,
member_
->
places_
,
member_
->
places_
,
graph
)
);
graph
);
}
else
{
}
else
{
if
(
member_
->
use_device_
==
p
::
kXPU
)
{
if
(
member_
->
use_device_
==
p
::
kXPU
)
{
#if defined(PADDLE_WITH_XPU)
#if defined(PADDLE_WITH_XPU)
VLOG
(
3
)
<<
"use BindThreadedSSAGraphExecutor"
;
VLOG
(
3
)
<<
"use BindThreadedSSAGraphExecutor"
;
member_
->
executor_
.
reset
(
new
details
::
BindThreadedSSAGraphExecutor
(
member_
->
executor_
=
std
::
make_unique
<
details
::
BindThreadedSSAGraphExecutor
>
(
exec_strategy
,
exec_strategy
,
member_
->
local_scopes_
,
member_
->
local_scopes_
,
member_
->
local_exec_scopes_
,
member_
->
local_exec_scopes_
,
member_
->
places_
,
member_
->
places_
,
graph
)
);
graph
);
#else
#else
PADDLE_THROW
(
platform
::
errors
::
PermissionDenied
(
PADDLE_THROW
(
platform
::
errors
::
PermissionDenied
(
"Paddle can't use XPU device since it's not compiled with XPU,"
"Paddle can't use XPU device since it's not compiled with XPU,"
...
@@ -1736,12 +1741,13 @@ std::vector<ir::Graph *> ParallelExecutor::CreateSSAGraphExecutor(
...
@@ -1736,12 +1741,13 @@ std::vector<ir::Graph *> ParallelExecutor::CreateSSAGraphExecutor(
#endif
#endif
}
else
{
}
else
{
VLOG
(
3
)
<<
"use FastThreadedSSAGraphExecutor"
;
VLOG
(
3
)
<<
"use FastThreadedSSAGraphExecutor"
;
member_
->
executor_
.
reset
(
new
details
::
FastThreadedSSAGraphExecutor
(
member_
->
executor_
=
std
::
make_unique
<
details
::
FastThreadedSSAGraphExecutor
>
(
exec_strategy
,
exec_strategy
,
member_
->
local_scopes_
,
member_
->
local_scopes_
,
member_
->
local_exec_scopes_
,
member_
->
local_exec_scopes_
,
member_
->
places_
,
member_
->
places_
,
graph
)
);
graph
);
}
}
}
}
final_graphs
.
emplace_back
(
graph
);
final_graphs
.
emplace_back
(
graph
);
...
...
paddle/fluid/framework/section_worker.cc
浏览文件 @
9f04f2ac
...
@@ -25,8 +25,8 @@ uint64_t SectionWorker::batch_id_(0);
...
@@ -25,8 +25,8 @@ uint64_t SectionWorker::batch_id_(0);
void
SectionWorker
::
Initialize
(
const
TrainerDesc
&
desc
)
{
void
SectionWorker
::
Initialize
(
const
TrainerDesc
&
desc
)
{
dev_ctx_
=
platform
::
DeviceContextPool
::
Instance
().
Get
(
place_
);
dev_ctx_
=
platform
::
DeviceContextPool
::
Instance
().
Get
(
place_
);
program_
.
reset
(
program_
=
std
::
make_unique
<
ProgramDesc
>
(
new
ProgramDesc
(
desc
.
section_param
().
section_config
().
program_desc
()
));
desc
.
section_param
().
section_config
().
program_desc
(
));
for
(
auto
&
op_desc
:
program_
->
Block
(
0
).
AllOps
())
{
for
(
auto
&
op_desc
:
program_
->
Block
(
0
).
AllOps
())
{
ops_
.
push_back
(
OpRegistry
::
CreateOp
(
*
op_desc
));
ops_
.
push_back
(
OpRegistry
::
CreateOp
(
*
op_desc
));
}
}
...
@@ -231,7 +231,8 @@ void SectionWorker::TrainFiles() {
...
@@ -231,7 +231,8 @@ void SectionWorker::TrainFiles() {
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
if
(
platform
::
is_gpu_place
(
place_
))
{
if
(
platform
::
is_gpu_place
(
place_
))
{
if
(
IsFastEagerDeletionModeEnabled
())
{
if
(
IsFastEagerDeletionModeEnabled
())
{
gc
.
reset
(
new
UnsafeFastGPUGarbageCollector
(
place_
,
max_memory_size
));
gc
=
std
::
make_unique
<
UnsafeFastGPUGarbageCollector
>
(
place_
,
max_memory_size
);
}
}
}
}
#endif
#endif
...
...
paddle/fluid/framework/selected_rows_utils_test.cc
浏览文件 @
9f04f2ac
...
@@ -26,7 +26,7 @@ class SelectedRowsTester : public ::testing::Test {
...
@@ -26,7 +26,7 @@ class SelectedRowsTester : public ::testing::Test {
std
::
vector
<
int64_t
>
rows
{
0
,
4
,
7
};
std
::
vector
<
int64_t
>
rows
{
0
,
4
,
7
};
int64_t
height
=
10
;
int64_t
height
=
10
;
int64_t
row_numel
=
100
;
int64_t
row_numel
=
100
;
selected_rows_
.
reset
(
new
phi
::
SelectedRows
(
rows
,
height
)
);
selected_rows_
=
std
::
make_unique
<
phi
::
SelectedRows
>
(
rows
,
height
);
phi
::
DenseTensor
*
value
=
selected_rows_
->
mutable_value
();
phi
::
DenseTensor
*
value
=
selected_rows_
->
mutable_value
();
auto
*
data
=
value
->
mutable_data
<
float
>
(
auto
*
data
=
value
->
mutable_data
<
float
>
(
...
...
paddle/fluid/imperative/basic_engine.cc
浏览文件 @
9f04f2ac
...
@@ -126,9 +126,10 @@ void BasicEngine::Init(
...
@@ -126,9 +126,10 @@ void BasicEngine::Init(
[
init_grad_var
];
[
init_grad_var
];
if
(
!
accumulator
)
{
if
(
!
accumulator
)
{
if
(
FLAGS_sort_sum_gradient
)
{
if
(
FLAGS_sort_sum_gradient
)
{
accumulator
.
reset
(
new
SortedGradientAccumulator
(
init_grad_var
));
accumulator
=
std
::
make_unique
<
SortedGradientAccumulator
>
(
init_grad_var
);
}
else
{
}
else
{
accumulator
.
reset
(
new
EagerGradientAccumulator
(
init_grad_var
)
);
accumulator
=
std
::
make_unique
<
EagerGradientAccumulator
>
(
init_grad_var
);
}
}
}
}
accumulator
->
IncreaseRefCnt
();
accumulator
->
IncreaseRefCnt
();
...
@@ -225,9 +226,11 @@ void BasicEngine::PrepareGradAccumulators(
...
@@ -225,9 +226,11 @@ void BasicEngine::PrepareGradAccumulators(
if
(
!
accumulator
)
{
if
(
!
accumulator
)
{
if
(
FLAGS_sort_sum_gradient
)
{
if
(
FLAGS_sort_sum_gradient
)
{
accumulator
.
reset
(
new
SortedGradientAccumulator
(
var
.
get
()));
accumulator
=
std
::
make_unique
<
SortedGradientAccumulator
>
(
var
.
get
());
}
else
{
}
else
{
accumulator
.
reset
(
new
EagerGradientAccumulator
(
var
.
get
()));
accumulator
=
std
::
make_unique
<
EagerGradientAccumulator
>
(
var
.
get
());
}
}
}
}
...
@@ -255,9 +258,10 @@ void BasicEngine::PrepareGradAccumulators(
...
@@ -255,9 +258,10 @@ void BasicEngine::PrepareGradAccumulators(
auto
&
accumulator
=
accumulators_
[
var
.
get
()];
auto
&
accumulator
=
accumulators_
[
var
.
get
()];
if
(
!
accumulator
)
{
if
(
!
accumulator
)
{
if
(
FLAGS_sort_sum_gradient
)
{
if
(
FLAGS_sort_sum_gradient
)
{
accumulator
.
reset
(
new
SortedGradientAccumulator
(
var
.
get
()));
accumulator
=
std
::
make_unique
<
SortedGradientAccumulator
>
(
var
.
get
());
}
else
{
}
else
{
accumulator
.
reset
(
new
EagerGradientAccumulator
(
var
.
get
()
));
accumulator
=
std
::
make_unique
<
EagerGradientAccumulator
>
(
var
.
get
(
));
}
}
}
}
...
...
paddle/fluid/imperative/gloo_context.cc
浏览文件 @
9f04f2ac
...
@@ -46,8 +46,7 @@ void GLOOParallelContext::Init() {
...
@@ -46,8 +46,7 @@ void GLOOParallelContext::Init() {
int
port
=
std
::
stoi
(
addr
[
1
]);
int
port
=
std
::
stoi
(
addr
[
1
]);
gloo_wrapper
->
SetHttpStore
(
host
,
port
,
"worker"
);
gloo_wrapper
->
SetHttpStore
(
host
,
port
,
"worker"
);
gloo_wrapper
->
Init
();
gloo_wrapper
->
Init
();
device_
=
std
::
unique_ptr
<
phi
::
CPUContext
>
(
device_
=
std
::
make_unique
<
phi
::
CPUContext
>
(
platform
::
CPUPlace
());
new
phi
::
CPUContext
(
platform
::
CPUPlace
()));
device_
->
SetAllocator
(
paddle
::
memory
::
allocation
::
AllocatorFacade
::
Instance
()
device_
->
SetAllocator
(
paddle
::
memory
::
allocation
::
AllocatorFacade
::
Instance
()
.
GetAllocator
(
platform
::
CPUPlace
())
.
GetAllocator
(
platform
::
CPUPlace
())
.
get
());
.
get
());
...
...
paddle/fluid/imperative/partial_grad_engine.cc
浏览文件 @
9f04f2ac
...
@@ -367,11 +367,11 @@ class GradientAccumulationInfo {
...
@@ -367,11 +367,11 @@ class GradientAccumulationInfo {
grad_var_
=
std
::
make_shared
<
VarBase
>
(
true
,
mapped_grad_var_
->
Name
());
grad_var_
=
std
::
make_shared
<
VarBase
>
(
true
,
mapped_grad_var_
->
Name
());
grad_var_
->
SetOverridedStopGradient
(
false
);
grad_var_
->
SetOverridedStopGradient
(
false
);
if
(
sort_gradient_
)
{
if
(
sort_gradient_
)
{
accumulator_
.
reset
(
accumulator_
=
std
::
make_unique
<
SortedGradientAccumulator
>
(
new
SortedGradientAccumulator
(
grad_var_
->
SharedVar
().
get
()
));
grad_var_
->
SharedVar
().
get
(
));
}
else
{
}
else
{
accumulator_
.
reset
(
accumulator_
=
std
::
make_unique
<
EagerGradientAccumulator
>
(
new
EagerGradientAccumulator
(
grad_var_
->
SharedVar
().
get
()
));
grad_var_
->
SharedVar
().
get
(
));
}
}
accumulator_
->
IncreaseRefCnt
();
accumulator_
->
IncreaseRefCnt
();
}
}
...
@@ -1080,8 +1080,8 @@ void PartialGradTask::PrepareInitialGradientAccumulators(const OpBase *op) {
...
@@ -1080,8 +1080,8 @@ void PartialGradTask::PrepareInitialGradientAccumulators(const OpBase *op) {
auto
&
accumulator
=
grad_accumulators_
[
var
.
get
()];
auto
&
accumulator
=
grad_accumulators_
[
var
.
get
()];
if
(
!
accumulator
)
{
if
(
!
accumulator
)
{
accumulator
.
reset
(
new
GradientAccumulationInfo
(
accumulator
=
std
::
make_unique
<
GradientAccumulationInfo
>
(
var
,
FLAGS_sort_sum_gradient
,
create_graph_
)
)
;
var
,
FLAGS_sort_sum_gradient
,
create_graph_
);
}
}
accumulator
->
IncreaseTotalRefCnt
();
accumulator
->
IncreaseTotalRefCnt
();
...
...
paddle/fluid/imperative/tracer.cc
浏览文件 @
9f04f2ac
...
@@ -111,7 +111,7 @@ paddle::framework::GarbageCollector* Tracer::MutableGarbageCollectorIfNotExists(
...
@@ -111,7 +111,7 @@ paddle::framework::GarbageCollector* Tracer::MutableGarbageCollectorIfNotExists(
std
::
unique_ptr
<
framework
::
GarbageCollector
>
gc
;
std
::
unique_ptr
<
framework
::
GarbageCollector
>
gc
;
if
(
platform
::
is_gpu_place
(
place
))
{
if
(
platform
::
is_gpu_place
(
place
))
{
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
gc
.
reset
(
new
framework
::
DefaultStreamGarbageCollector
(
place
,
0
)
);
gc
=
std
::
make_unique
<
framework
::
DefaultStreamGarbageCollector
>
(
place
,
0
);
VLOG
(
10
)
<<
"Created GarbageCollector at "
<<
place
;
VLOG
(
10
)
<<
"Created GarbageCollector at "
<<
place
;
#else
#else
...
@@ -121,7 +121,7 @@ paddle::framework::GarbageCollector* Tracer::MutableGarbageCollectorIfNotExists(
...
@@ -121,7 +121,7 @@ paddle::framework::GarbageCollector* Tracer::MutableGarbageCollectorIfNotExists(
#endif
#endif
}
else
if
(
platform
::
is_cuda_pinned_place
(
place
))
{
}
else
if
(
platform
::
is_cuda_pinned_place
(
place
))
{
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
gc
.
reset
(
new
framework
::
CUDAPinnedGarbageCollector
(
place
,
0
)
);
gc
=
std
::
make_unique
<
framework
::
CUDAPinnedGarbageCollector
>
(
place
,
0
);
VLOG
(
10
)
<<
"Created GarbageCollector at "
<<
place
;
VLOG
(
10
)
<<
"Created GarbageCollector at "
<<
place
;
#else
#else
...
@@ -132,7 +132,7 @@ paddle::framework::GarbageCollector* Tracer::MutableGarbageCollectorIfNotExists(
...
@@ -132,7 +132,7 @@ paddle::framework::GarbageCollector* Tracer::MutableGarbageCollectorIfNotExists(
#endif
#endif
}
else
if
(
platform
::
is_xpu_place
(
place
))
{
}
else
if
(
platform
::
is_xpu_place
(
place
))
{
#if defined(PADDLE_WITH_XPU)
#if defined(PADDLE_WITH_XPU)
gc
.
reset
(
new
framework
::
XPUGarbageCollector
(
place
,
0
)
);
gc
=
std
::
make_unique
<
framework
::
XPUGarbageCollector
>
(
place
,
0
);
VLOG
(
10
)
<<
"Created GarbageCollector at "
<<
place
;
VLOG
(
10
)
<<
"Created GarbageCollector at "
<<
place
;
#else
#else
PADDLE_THROW
(
platform
::
errors
::
PermissionDenied
(
PADDLE_THROW
(
platform
::
errors
::
PermissionDenied
(
...
@@ -140,11 +140,11 @@ paddle::framework::GarbageCollector* Tracer::MutableGarbageCollectorIfNotExists(
...
@@ -140,11 +140,11 @@ paddle::framework::GarbageCollector* Tracer::MutableGarbageCollectorIfNotExists(
"Please recompile or reinstall Paddle with XPU support."
));
"Please recompile or reinstall Paddle with XPU support."
));
#endif
#endif
}
else
if
(
platform
::
is_cpu_place
(
place
))
{
}
else
if
(
platform
::
is_cpu_place
(
place
))
{
gc
.
reset
(
new
framework
::
CPUGarbageCollector
(
place
,
0
)
);
gc
=
std
::
make_unique
<
framework
::
CPUGarbageCollector
>
(
place
,
0
);
VLOG
(
10
)
<<
"Created GarbageCollector at "
<<
place
;
VLOG
(
10
)
<<
"Created GarbageCollector at "
<<
place
;
}
else
if
(
platform
::
is_ipu_place
(
place
))
{
}
else
if
(
platform
::
is_ipu_place
(
place
))
{
#if defined(PADDLE_WITH_IPU)
#if defined(PADDLE_WITH_IPU)
gc
.
reset
(
new
framework
::
IPUGarbageCollector
(
place
,
0
)
);
gc
=
std
::
make_unique
<
framework
::
IPUGarbageCollector
>
(
place
,
0
);
VLOG
(
10
)
<<
"Created GarbageCollector at "
<<
place
;
VLOG
(
10
)
<<
"Created GarbageCollector at "
<<
place
;
#else
#else
PADDLE_THROW
(
platform
::
errors
::
PermissionDenied
(
PADDLE_THROW
(
platform
::
errors
::
PermissionDenied
(
...
@@ -154,11 +154,13 @@ paddle::framework::GarbageCollector* Tracer::MutableGarbageCollectorIfNotExists(
...
@@ -154,11 +154,13 @@ paddle::framework::GarbageCollector* Tracer::MutableGarbageCollectorIfNotExists(
}
else
if
(
platform
::
is_custom_place
(
place
))
{
}
else
if
(
platform
::
is_custom_place
(
place
))
{
#if defined(PADDLE_WITH_CUSTOM_DEVICE)
#if defined(PADDLE_WITH_CUSTOM_DEVICE)
if
(
framework
::
IsFastEagerDeletionModeEnabled
())
{
if
(
framework
::
IsFastEagerDeletionModeEnabled
())
{
gc
.
reset
(
gc
=
new
framework
::
CustomDeviceUnsafeFastGarbageCollector
(
place
,
0
));
std
::
make_unique
<
framework
::
CustomDeviceUnsafeFastGarbageCollector
>
(
place
,
0
);
VLOG
(
10
)
<<
"Created UnsafeFastGarbageCollector at "
<<
place
;
VLOG
(
10
)
<<
"Created UnsafeFastGarbageCollector at "
<<
place
;
}
else
{
}
else
{
gc
.
reset
(
new
framework
::
CustomDefaultStreamGarbageCollector
(
place
,
0
));
gc
=
std
::
make_unique
<
framework
::
CustomDefaultStreamGarbageCollector
>
(
place
,
0
);
VLOG
(
10
)
<<
"Created GarbageCollector at "
<<
place
;
VLOG
(
10
)
<<
"Created GarbageCollector at "
<<
place
;
}
}
#else
#else
...
...
paddle/fluid/inference/analysis/passes/convert_to_mixed_precision.cc
浏览文件 @
9f04f2ac
...
@@ -79,8 +79,7 @@ void ConvertToMixedPrecisionPass::LoadModel() {
...
@@ -79,8 +79,7 @@ void ConvertToMixedPrecisionPass::LoadModel() {
bool
load_params
=
!
params_file_
.
empty
();
bool
load_params
=
!
params_file_
.
empty
();
auto
program_desc
=
auto
program_desc
=
inference
::
Load
(
&
exe
,
&
scope_
,
model_file_
,
params_file_
,
load_params
);
inference
::
Load
(
&
exe
,
&
scope_
,
model_file_
,
params_file_
,
load_params
);
main_graph_
=
std
::
unique_ptr
<
framework
::
ir
::
Graph
>
(
main_graph_
=
std
::
make_unique
<
framework
::
ir
::
Graph
>
(
*
program_desc
);
new
framework
::
ir
::
Graph
(
*
program_desc
));
main_graph_
->
SetNotOwned
(
framework
::
ir
::
kParamScopeAttr
,
&
scope_
);
main_graph_
->
SetNotOwned
(
framework
::
ir
::
kParamScopeAttr
,
&
scope_
);
}
}
...
...
paddle/fluid/inference/analysis/passes/passes.cc
浏览文件 @
9f04f2ac
...
@@ -30,24 +30,20 @@ namespace analysis {
...
@@ -30,24 +30,20 @@ namespace analysis {
PassRegistry
::
PassRegistry
()
{
// NOLINT
PassRegistry
::
PassRegistry
()
{
// NOLINT
// Register manually to avoid the trivial `USE_OP` like macro for easier use
// Register manually to avoid the trivial `USE_OP` like macro for easier use
// and link.
// and link.
passes_
.
emplace
(
"ir_analysis_pass"
,
passes_
.
emplace
(
"ir_analysis_pass"
,
std
::
make_unique
<
IrAnalysisPass
>
());
std
::
unique_ptr
<
AnalysisPass
>
(
new
IrAnalysisPass
));
passes_
.
emplace
(
"ir_graph_build_pass"
,
std
::
make_unique
<
IrGraphBuildPass
>
());
passes_
.
emplace
(
"ir_graph_build_pass"
,
std
::
unique_ptr
<
AnalysisPass
>
(
new
IrGraphBuildPass
));
passes_
.
emplace
(
"save_optimized_model_pass"
,
passes_
.
emplace
(
"save_optimized_model_pass"
,
std
::
unique_ptr
<
AnalysisPass
>
(
new
SaveOptimizedModelPass
));
std
::
make_unique
<
SaveOptimizedModelPass
>
(
));
passes_
.
emplace
(
"memory_optimize_pass"
,
passes_
.
emplace
(
"memory_optimize_pass"
,
std
::
unique_ptr
<
AnalysisPass
>
(
new
MemoryOptimizePass
));
std
::
make_unique
<
MemoryOptimizePass
>
());
passes_
.
emplace
(
passes_
.
emplace
(
"ir_params_sync_among_devices_pass"
,
"ir_params_sync_among_devices_pass"
,
std
::
make_unique
<
IrParamsSyncAmongDevicesPass
>
());
std
::
unique_ptr
<
AnalysisPass
>
(
new
IrParamsSyncAmongDevicesPass
));
passes_
.
emplace
(
"adjust_cudnn_workspace_size_pass"
,
passes_
.
emplace
(
"adjust_cudnn_workspace_size_pass"
,
std
::
unique_ptr
<
AnalysisPass
>
(
new
AdjustCudnnWorkSpacePass
));
std
::
make_unique
<
AdjustCudnnWorkSpacePass
>
(
));
passes_
.
emplace
(
"inference_op_replace_pass"
,
passes_
.
emplace
(
"inference_op_replace_pass"
,
std
::
unique_ptr
<
AnalysisPass
>
(
new
InferenceOpReplacePass
));
std
::
make_unique
<
InferenceOpReplacePass
>
());
passes_
.
emplace
(
passes_
.
emplace
(
"ir_graph_to_program_pass"
,
"ir_graph_to_program_pass"
,
std
::
make_unique
<
IrGraphToProgramPass
>
());
std
::
unique_ptr
<
IrGraphToProgramPass
>
(
new
IrGraphToProgramPass
));
}
}
}
// namespace analysis
}
// namespace analysis
...
...
paddle/fluid/inference/api/analysis_predictor.cc
浏览文件 @
9f04f2ac
...
@@ -600,7 +600,7 @@ bool AnalysisPredictor::PrepareScope(
...
@@ -600,7 +600,7 @@ bool AnalysisPredictor::PrepareScope(
paddle
::
framework
::
InitDevices
();
paddle
::
framework
::
InitDevices
();
paddle
::
framework
::
InitDefaultKernelSignatureMap
();
paddle
::
framework
::
InitDefaultKernelSignatureMap
();
// TODO(wilber): we need to release memory occupied by weights.
// TODO(wilber): we need to release memory occupied by weights.
scope_
.
reset
(
new
paddle
::
framework
::
Scope
()
);
scope_
=
std
::
make_unique
<
paddle
::
framework
::
Scope
>
(
);
status_is_cloned_
=
false
;
status_is_cloned_
=
false
;
}
}
sub_scope_
=
&
scope_
->
NewScope
();
sub_scope_
=
&
scope_
->
NewScope
();
...
@@ -722,8 +722,8 @@ bool AnalysisPredictor::PrepareFleetExecutor() {
...
@@ -722,8 +722,8 @@ bool AnalysisPredictor::PrepareFleetExecutor() {
if
(
config_
.
dist_config
().
nranks
()
>
1
&&
!
CommInit
())
{
if
(
config_
.
dist_config
().
nranks
()
>
1
&&
!
CommInit
())
{
return
false
;
return
false
;
}
}
task_node_
.
reset
(
new
distributed
::
TaskNode
(
inference_program_
.
get
(),
task_node_
=
std
::
make_unique
<
distributed
::
TaskNode
>
(
config_
.
dist_config
().
rank
()
));
inference_program_
.
get
(),
config_
.
dist_config
().
rank
(
));
// With auto cut, there is no concept of pp, no need to add dependency.
// With auto cut, there is no concept of pp, no need to add dependency.
task_node_
->
SetType
(
"Compute"
);
task_node_
->
SetType
(
"Compute"
);
task_node_
->
Init
(
config_
.
use_feed_fetch_ops_enabled
());
task_node_
->
Init
(
config_
.
use_feed_fetch_ops_enabled
());
...
@@ -736,7 +736,7 @@ bool AnalysisPredictor::PrepareFleetExecutor() {
...
@@ -736,7 +736,7 @@ bool AnalysisPredictor::PrepareFleetExecutor() {
rank_info
->
set_ip_port
(
config_
.
dist_config
().
trainer_endpoints
()[
i
]);
rank_info
->
set_ip_port
(
config_
.
dist_config
().
trainer_endpoints
()[
i
]);
id_to_rank
.
insert
({
i
,
i
});
id_to_rank
.
insert
({
i
,
i
});
}
}
fleet_exe_
.
reset
(
new
distributed
::
FleetExecutor
(
executor_desc_
)
);
fleet_exe_
=
std
::
make_unique
<
distributed
::
FleetExecutor
>
(
executor_desc_
);
// NOTE: Vars of feed fetch ops are not persistable,
// NOTE: Vars of feed fetch ops are not persistable,
// which will result in that those vars will be created in
// which will result in that those vars will be created in
// the subscope (microscope) in fleet executor. This will
// the subscope (microscope) in fleet executor. This will
...
@@ -2425,7 +2425,7 @@ bool AnalysisPredictor::LoadProgramDesc() {
...
@@ -2425,7 +2425,7 @@ bool AnalysisPredictor::LoadProgramDesc() {
}
else
{
}
else
{
proto
.
ParseFromString
(
config_
.
prog_file
());
proto
.
ParseFromString
(
config_
.
prog_file
());
}
}
inference_program_
.
reset
(
new
framework
::
ProgramDesc
(
proto
)
);
inference_program_
=
std
::
make_unique
<
framework
::
ProgramDesc
>
(
proto
);
return
true
;
return
true
;
}
}
...
@@ -3111,7 +3111,7 @@ PredictorPool::PredictorPool(const Config &config, size_t size) {
...
@@ -3111,7 +3111,7 @@ PredictorPool::PredictorPool(const Config &config, size_t size) {
"The predictor pool size should be greater than 1, but it's (%d)"
,
"The predictor pool size should be greater than 1, but it's (%d)"
,
size
));
size
));
Config
copy_config
(
config
);
Config
copy_config
(
config
);
main_pred_
.
reset
(
new
Predictor
(
config
)
);
main_pred_
=
std
::
make_unique
<
Predictor
>
(
config
);
for
(
size_t
i
=
0
;
i
<
size
-
1
;
i
++
)
{
for
(
size_t
i
=
0
;
i
<
size
-
1
;
i
++
)
{
if
(
config
.
tensorrt_engine_enabled
())
{
if
(
config
.
tensorrt_engine_enabled
())
{
Config
config_tmp
(
copy_config
);
Config
config_tmp
(
copy_config
);
...
...
paddle/fluid/inference/api/api_impl.cc
浏览文件 @
9f04f2ac
...
@@ -95,10 +95,10 @@ bool NativePaddlePredictor::Init(
...
@@ -95,10 +95,10 @@ bool NativePaddlePredictor::Init(
paddle
::
framework
::
InitMemoryMethod
();
paddle
::
framework
::
InitMemoryMethod
();
paddle
::
framework
::
InitDevices
();
paddle
::
framework
::
InitDevices
();
paddle
::
framework
::
InitDefaultKernelSignatureMap
();
paddle
::
framework
::
InitDefaultKernelSignatureMap
();
scope_
.
reset
(
new
paddle
::
framework
::
Scope
()
);
scope_
=
std
::
make_unique
<
paddle
::
framework
::
Scope
>
(
);
}
}
executor_
.
reset
(
new
paddle
::
framework
::
Executor
(
place_
)
);
executor_
=
std
::
make_unique
<
paddle
::
framework
::
Executor
>
(
place_
);
// Initialize the inference program
// Initialize the inference program
if
(
!
config_
.
model_dir
.
empty
())
{
if
(
!
config_
.
model_dir
.
empty
())
{
...
...
paddle/fluid/inference/api/mkldnn_quantizer.cc
浏览文件 @
9f04f2ac
...
@@ -592,7 +592,7 @@ void AnalysisPredictor::MkldnnQuantizer::PrepareArgument() const {
...
@@ -592,7 +592,7 @@ void AnalysisPredictor::MkldnnQuantizer::PrepareArgument() const {
auto
&
arg
=
predictor_
.
argument_
;
auto
&
arg
=
predictor_
.
argument_
;
if
(
!
arg
->
scope_valid
())
arg
->
SetScope
(
new
framework
::
Scope
);
if
(
!
arg
->
scope_valid
())
arg
->
SetScope
(
new
framework
::
Scope
);
arg
->
SetMainProgramNotOwned
(
predictor_
.
inference_program_
.
get
());
arg
->
SetMainProgramNotOwned
(
predictor_
.
inference_program_
.
get
());
auto
graph
=
std
::
unique_ptr
<
Graph
>
(
new
Graph
(
arg
->
main_program
()
));
auto
graph
=
std
::
make_unique
<
Graph
>
(
arg
->
main_program
(
));
arg
->
SetMainGraph
(
graph
.
release
());
arg
->
SetMainGraph
(
graph
.
release
());
auto
*
scope_ptr
=
arg
->
scope_ptr
();
auto
*
scope_ptr
=
arg
->
scope_ptr
();
PADDLE_ENFORCE_NOT_NULL
(
PADDLE_ENFORCE_NOT_NULL
(
...
...
paddle/fluid/inference/api/resource_manager.cc
浏览文件 @
9f04f2ac
...
@@ -127,7 +127,7 @@ Eigen::DefaultDevice* CPUContextResource::GetCPUEigenDevice() const {
...
@@ -127,7 +127,7 @@ Eigen::DefaultDevice* CPUContextResource::GetCPUEigenDevice() const {
}
}
void
CPUContextResource
::
InitCPUResource
()
{
void
CPUContextResource
::
InitCPUResource
()
{
cpu_eigen_device_
.
reset
(
new
Eigen
::
DefaultDevice
()
);
cpu_eigen_device_
=
std
::
make_unique
<
Eigen
::
DefaultDevice
>
(
);
}
}
CPUContextResource
::
CPUContextResource
()
{
InitCPUResource
();
}
CPUContextResource
::
CPUContextResource
()
{
InitCPUResource
();
}
...
@@ -186,9 +186,9 @@ void GPUContextResource::InitGpuEigenDevice() {
...
@@ -186,9 +186,9 @@ void GPUContextResource::InitGpuEigenDevice() {
auto
*
allocator
=
paddle
::
memory
::
allocation
::
AllocatorFacade
::
Instance
()
auto
*
allocator
=
paddle
::
memory
::
allocation
::
AllocatorFacade
::
Instance
()
.
GetAllocator
(
place_
)
.
GetAllocator
(
place_
)
.
get
();
.
get
();
eigen_stream_
.
reset
(
new
internal
::
EigenGpuStreamDevice
()
);
eigen_stream_
=
std
::
make_unique
<
internal
::
EigenGpuStreamDevice
>
(
);
eigen_stream_
->
Reinitialize
(
stream_
,
allocator
,
place_
);
eigen_stream_
->
Reinitialize
(
stream_
,
allocator
,
place_
);
gpu_eigen_device_
.
reset
(
new
Eigen
::
GpuDevice
(
eigen_stream_
.
get
()
));
gpu_eigen_device_
=
std
::
make_unique
<
Eigen
::
GpuDevice
>
(
eigen_stream_
.
get
(
));
}
}
void
GPUContextResource
::
InitDnnHanlde
()
{
void
GPUContextResource
::
InitDnnHanlde
()
{
...
@@ -364,7 +364,7 @@ std::array<int, 3> GPUContextResource::GetGpuMaxGridDimSize() const {
...
@@ -364,7 +364,7 @@ std::array<int, 3> GPUContextResource::GetGpuMaxGridDimSize() const {
void
ResourceManager
::
InitCPUResource
()
{
void
ResourceManager
::
InitCPUResource
()
{
std
::
lock_guard
<
std
::
mutex
>
lock_gurad
(
cpu_mutex_
);
std
::
lock_guard
<
std
::
mutex
>
lock_gurad
(
cpu_mutex_
);
if
(
cpu_resource_
==
nullptr
)
{
if
(
cpu_resource_
==
nullptr
)
{
cpu_resource_
.
reset
(
new
CPUContextResource
()
);
cpu_resource_
=
std
::
make_unique
<
CPUContextResource
>
(
);
}
}
}
}
...
...
paddle/fluid/inference/tensorrt/plugin/generic_plugin.cu
浏览文件 @
9f04f2ac
...
@@ -419,13 +419,14 @@ int GenericPlugin::initialize() TRT_NOEXCEPT {
...
@@ -419,13 +419,14 @@ int GenericPlugin::initialize() TRT_NOEXCEPT {
phi
::
Backend
::
GPU
,
phi
::
DataLayout
::
ANY
,
precision_type
);
phi
::
Backend
::
GPU
,
phi
::
DataLayout
::
ANY
,
precision_type
);
auto
nv_dtype
=
PhiType2NvType
(
precision_type
);
auto
nv_dtype
=
PhiType2NvType
(
precision_type
);
phi_kernels_
[
nv_dtype
]
.
reset
(
phi_kernels_
[
nv_dtype
]
=
std
::
make_unique
<
phi
::
Kernel
>
(
new
phi
::
Kernel
(
phi
::
KernelFactory
::
Instance
().
SelectKernel
(
phi
::
KernelFactory
::
Instance
().
SelectKernel
(
phi_kernel_signature
.
name
,
phi_kernel_signature
.
name
,
phi_kernel_key
)
));
phi_kernel_key
));
if
(
phi_kernel_contexts_
.
find
(
nv_dtype
)
==
phi_kernel_contexts_
.
end
()
||
if
(
phi_kernel_contexts_
.
find
(
nv_dtype
)
==
phi_kernel_contexts_
.
end
()
||
!
phi_kernel_contexts_
[
nv_dtype
])
{
!
phi_kernel_contexts_
[
nv_dtype
])
{
phi_kernel_contexts_
[
nv_dtype
].
reset
(
new
phi
::
KernelContext
(
dev_ctx
));
phi_kernel_contexts_
[
nv_dtype
]
=
std
::
make_unique
<
phi
::
KernelContext
>
(
dev_ctx
);
BuildPhiKernelContextAttr
(
op_desc_
,
BuildPhiKernelContextAttr
(
op_desc_
,
phi_kernel_contexts_
[
nv_dtype
].
get
(),
phi_kernel_contexts_
[
nv_dtype
].
get
(),
phi_kernel_signature
,
phi_kernel_signature
,
...
...
paddle/fluid/memory/allocation/buffered_allocator_test.cc
浏览文件 @
9f04f2ac
...
@@ -112,8 +112,8 @@ TEST(buffered_allocator, garbage_collection) {
...
@@ -112,8 +112,8 @@ TEST(buffered_allocator, garbage_collection) {
auto
chunk
=
cpu_allocator
->
Allocate
(
2048
);
auto
chunk
=
cpu_allocator
->
Allocate
(
2048
);
std
::
unique_ptr
<
Allocator
>
allocator
(
new
BestFitAllocator
(
chunk
.
get
()));
std
::
unique_ptr
<
Allocator
>
allocator
(
new
BestFitAllocator
(
chunk
.
get
()));
auto
buffered_allocator
=
std
::
unique_ptr
<
BufferedAllocator
>
(
auto
buffered_allocator
=
new
BufferedAllocator
(
std
::
move
(
allocator
)
));
std
::
make_unique
<
BufferedAllocator
>
(
std
::
move
(
allocator
));
auto
x1
=
buffered_allocator
->
Allocate
(
1600
);
auto
x1
=
buffered_allocator
->
Allocate
(
1600
);
auto
x2
=
buffered_allocator
->
Allocate
(
400
);
auto
x2
=
buffered_allocator
->
Allocate
(
400
);
x1
=
nullptr
;
x1
=
nullptr
;
...
...
paddle/fluid/memory/allocation/naive_best_fit_allocator.cc
浏览文件 @
9f04f2ac
...
@@ -248,11 +248,11 @@ class GPUBuddyAllocatorList {
...
@@ -248,11 +248,11 @@ class GPUBuddyAllocatorList {
std
::
call_once
(
*
init_flags_
[
pos
],
[
this
,
pos
]
{
std
::
call_once
(
*
init_flags_
[
pos
],
[
this
,
pos
]
{
platform
::
SetDeviceId
(
devices_
[
pos
]);
platform
::
SetDeviceId
(
devices_
[
pos
]);
allocators_
[
pos
]
.
reset
(
allocators_
[
pos
]
=
std
::
make_unique
<
BuddyAllocator
>
(
new
BuddyAllocator
(
std
::
unique_ptr
<
detail
::
SystemAllocator
>
(
std
::
unique_ptr
<
detail
::
SystemAllocator
>
(
new
detail
::
GPUAllocator
(
devices_
[
pos
])),
new
detail
::
GPUAllocator
(
devices_
[
pos
])),
platform
::
GpuMinChunkSize
(),
platform
::
GpuMinChunkSize
(),
platform
::
GpuMaxChunkSize
()
));
platform
::
GpuMaxChunkSize
(
));
VLOG
(
10
)
<<
"
\n\n
NOTE:
\n
"
VLOG
(
10
)
<<
"
\n\n
NOTE:
\n
"
<<
"You can set GFlags environment variable "
<<
"You can set GFlags environment variable "
<<
"'FLAGS_fraction_of_gpu_memory_to_use' "
<<
"'FLAGS_fraction_of_gpu_memory_to_use' "
...
@@ -430,7 +430,7 @@ class BuddyAllocatorList {
...
@@ -430,7 +430,7 @@ class BuddyAllocatorList {
:
device_type_
(
device_type
)
{
:
device_type_
(
device_type
)
{
auto
devices
=
phi
::
DeviceManager
::
GetSelectedDeviceList
(
device_type
);
auto
devices
=
phi
::
DeviceManager
::
GetSelectedDeviceList
(
device_type
);
for
(
auto
dev_id
:
devices
)
{
for
(
auto
dev_id
:
devices
)
{
init_flags_
[
dev_id
]
.
reset
(
new
std
::
once_flag
()
);
init_flags_
[
dev_id
]
=
std
::
make_unique
<
std
::
once_flag
>
(
);
}
}
}
}
...
@@ -460,13 +460,13 @@ class BuddyAllocatorList {
...
@@ -460,13 +460,13 @@ class BuddyAllocatorList {
phi
::
DeviceManager
::
SetDevice
(
device_type_
,
dev_id
);
phi
::
DeviceManager
::
SetDevice
(
device_type_
,
dev_id
);
platform
::
CustomPlace
place
(
device_type_
,
dev_id
);
platform
::
CustomPlace
place
(
device_type_
,
dev_id
);
allocators_
[
dev_id
]
.
reset
(
new
BuddyAllocator
(
allocators_
[
dev_id
]
=
std
::
make_unique
<
BuddyAllocator
>
(
std
::
unique_ptr
<
detail
::
SystemAllocator
>
(
std
::
unique_ptr
<
detail
::
SystemAllocator
>
(
new
detail
::
CustomAllocator
(
device_type_
,
dev_id
)),
new
detail
::
CustomAllocator
(
device_type_
,
dev_id
)),
phi
::
DeviceManager
::
GetMinChunkSize
(
place
),
phi
::
DeviceManager
::
GetMinChunkSize
(
place
),
phi
::
DeviceManager
::
GetMaxChunkSize
(
place
),
phi
::
DeviceManager
::
GetMaxChunkSize
(
place
),
phi
::
DeviceManager
::
GetExtraPaddingSize
(
place
),
phi
::
DeviceManager
::
GetExtraPaddingSize
(
place
),
device_type_
)
)
;
device_type_
);
});
});
return
allocators_
[
dev_id
].
get
();
return
allocators_
[
dev_id
].
get
();
...
...
paddle/fluid/memory/malloc_test.cu
浏览文件 @
9f04f2ac
...
@@ -116,7 +116,7 @@ TEST(Malloc, GPUContextMultiStream) {
...
@@ -116,7 +116,7 @@ TEST(Malloc, GPUContextMultiStream) {
main_stream_alloc_ptr
.
reset
();
main_stream_alloc_ptr
.
reset
();
for
(
int
i
=
0
;
i
<
NUM_STREAMS
;
++
i
)
{
for
(
int
i
=
0
;
i
<
NUM_STREAMS
;
++
i
)
{
auto
ctx
=
std
::
unique_ptr
<
phi
::
GPUContext
>
(
new
phi
::
GPUContext
(
place
)
);
auto
ctx
=
std
::
make_unique
<
phi
::
GPUContext
>
(
place
);
ctx
->
SetAllocator
(
paddle
::
memory
::
allocation
::
AllocatorFacade
::
Instance
()
ctx
->
SetAllocator
(
paddle
::
memory
::
allocation
::
AllocatorFacade
::
Instance
()
.
GetAllocator
(
place
,
ctx
->
stream
())
.
GetAllocator
(
place
,
ctx
->
stream
())
.
get
());
.
get
());
...
@@ -171,7 +171,7 @@ TEST(Malloc, GPUContextMultiThreadMultiStream) {
...
@@ -171,7 +171,7 @@ TEST(Malloc, GPUContextMultiThreadMultiStream) {
main_stream_alloc_ptr
.
reset
();
main_stream_alloc_ptr
.
reset
();
for
(
int
i
=
0
;
i
<
NUM_STREAMS
;
++
i
)
{
for
(
int
i
=
0
;
i
<
NUM_STREAMS
;
++
i
)
{
auto
ctx
=
std
::
unique_ptr
<
phi
::
GPUContext
>
(
new
phi
::
GPUContext
(
place
)
);
auto
ctx
=
std
::
make_unique
<
phi
::
GPUContext
>
(
place
);
ctx
->
SetAllocator
(
paddle
::
memory
::
allocation
::
AllocatorFacade
::
Instance
()
ctx
->
SetAllocator
(
paddle
::
memory
::
allocation
::
AllocatorFacade
::
Instance
()
.
GetAllocator
(
place
,
ctx
->
stream
())
.
GetAllocator
(
place
,
ctx
->
stream
())
.
get
());
.
get
());
...
...
paddle/fluid/operators/nccl/nccl_gpu_common.cc
浏览文件 @
9f04f2ac
...
@@ -44,8 +44,8 @@ void Communicator::InitAll(const std::vector<int>& gpus) {
...
@@ -44,8 +44,8 @@ void Communicator::InitAll(const std::vector<int>& gpus) {
dynload
::
ncclCommDestroy
((
*
global_comms
)[
i
]);
dynload
::
ncclCommDestroy
((
*
global_comms
)[
i
]);
}
}
}
}
global_comms
.
reset
(
new
std
::
vector
<
ncclComm_t
>
()
);
global_comms
=
std
::
make_unique
<
std
::
vector
<
ncclComm_t
>>
(
);
comm_id_map
.
reset
(
new
std
::
unordered_map
<
int
,
int
>
()
);
comm_id_map
=
std
::
make_unique
<
std
::
unordered_map
<
int
,
int
>>
(
);
global_comms
->
resize
(
gpus
.
size
());
global_comms
->
resize
(
gpus
.
size
());
for
(
size_t
i
=
0
;
i
<
gpus
.
size
();
++
i
)
{
for
(
size_t
i
=
0
;
i
<
gpus
.
size
();
++
i
)
{
(
*
comm_id_map
)[
gpus
[
i
]]
=
i
;
(
*
comm_id_map
)[
gpus
[
i
]]
=
i
;
...
...
paddle/fluid/operators/reader/lod_tensor_blocking_queue.h
浏览文件 @
9f04f2ac
...
@@ -108,7 +108,7 @@ class OrderedMultiDeviceLoDTensorBlockingQueue {
...
@@ -108,7 +108,7 @@ class OrderedMultiDeviceLoDTensorBlockingQueue {
queues_
.
resize
(
dev_cnt
);
queues_
.
resize
(
dev_cnt
);
for
(
auto
&
item
:
queues_
)
{
for
(
auto
&
item
:
queues_
)
{
auto
cap
=
(
capacity_
+
dev_cnt
-
1
)
/
dev_cnt
;
auto
cap
=
(
capacity_
+
dev_cnt
-
1
)
/
dev_cnt
;
item
.
reset
(
new
LoDTensorBlockingQueue
(
cap
,
speed_test_mode_
)
);
item
=
std
::
make_unique
<
LoDTensorBlockingQueue
>
(
cap
,
speed_test_mode_
);
}
}
}
}
cv_
.
notify_all
();
cv_
.
notify_all
();
...
@@ -158,7 +158,7 @@ class OrderedMultiDeviceLoDTensorBlockingQueue {
...
@@ -158,7 +158,7 @@ class OrderedMultiDeviceLoDTensorBlockingQueue {
auto
dev_cnt
=
queues_
.
size
();
auto
dev_cnt
=
queues_
.
size
();
for
(
auto
&
item
:
queues_
)
{
for
(
auto
&
item
:
queues_
)
{
auto
cap
=
(
capacity_
+
dev_cnt
-
1
)
/
dev_cnt
;
auto
cap
=
(
capacity_
+
dev_cnt
-
1
)
/
dev_cnt
;
item
.
reset
(
new
LoDTensorBlockingQueue
(
cap
,
speed_test_mode_
)
);
item
=
std
::
make_unique
<
LoDTensorBlockingQueue
>
(
cap
,
speed_test_mode_
);
}
}
data_index_
=
0
;
data_index_
=
0
;
}
}
...
@@ -211,7 +211,8 @@ class LoDTensorBlockingQueueHolder {
...
@@ -211,7 +211,8 @@ class LoDTensorBlockingQueueHolder {
nullptr
,
nullptr
,
platform
::
errors
::
AlreadyExists
(
"LoDTensorBlockingQueueHolder::"
platform
::
errors
::
AlreadyExists
(
"LoDTensorBlockingQueueHolder::"
"InitOnce() can only be called once"
));
"InitOnce() can only be called once"
));
queue_
.
reset
(
new
LoDTensorBlockingQueue
(
capacity
,
speed_test_mode
));
queue_
=
std
::
make_unique
<
LoDTensorBlockingQueue
>
(
capacity
,
speed_test_mode
);
}
}
inline
const
std
::
shared_ptr
<
LoDTensorBlockingQueue
>&
GetQueue
()
const
{
inline
const
std
::
shared_ptr
<
LoDTensorBlockingQueue
>&
GetQueue
()
const
{
...
@@ -230,8 +231,8 @@ class OrderedMultiDeviceLoDTensorBlockingQueueHolder {
...
@@ -230,8 +231,8 @@ class OrderedMultiDeviceLoDTensorBlockingQueueHolder {
platform
::
errors
::
AlreadyExists
(
platform
::
errors
::
AlreadyExists
(
"OrderedMultiDeviceLoDTensorBlockingQueueHolder::"
"OrderedMultiDeviceLoDTensorBlockingQueueHolder::"
"InitOnce() can only be called once"
));
"InitOnce() can only be called once"
));
queue_
.
reset
(
new
OrderedMultiDeviceLoDTensorBlockingQueue
(
capacity
,
queue_
=
std
::
make_unique
<
OrderedMultiDeviceLoDTensorBlockingQueue
>
(
speed_test_mode
)
);
capacity
,
speed_test_mode
);
}
}
inline
const
std
::
shared_ptr
<
OrderedMultiDeviceLoDTensorBlockingQueue
>&
inline
const
std
::
shared_ptr
<
OrderedMultiDeviceLoDTensorBlockingQueue
>&
...
...
paddle/fluid/platform/device/xpu/xpu_info.cc
浏览文件 @
9f04f2ac
...
@@ -109,7 +109,7 @@ class RecordedXPUMallocHelper {
...
@@ -109,7 +109,7 @@ class RecordedXPUMallocHelper {
explicit
RecordedXPUMallocHelper
(
int
dev_id
,
uint64_t
limit_size
=
0
)
explicit
RecordedXPUMallocHelper
(
int
dev_id
,
uint64_t
limit_size
=
0
)
:
dev_id_
(
dev_id
),
limit_size_
(
limit_size
)
{
:
dev_id_
(
dev_id
),
limit_size_
(
limit_size
)
{
if
(
NeedRecord
())
{
if
(
NeedRecord
())
{
mtx_
.
reset
(
new
std
::
mutex
()
);
mtx_
=
std
::
make_unique
<
std
::
mutex
>
(
);
}
}
}
}
...
...
paddle/fluid/platform/profiler.cc
浏览文件 @
9f04f2ac
...
@@ -489,9 +489,8 @@ void MemEvenRecorder::PushMemRecord(const void *ptr,
...
@@ -489,9 +489,8 @@ void MemEvenRecorder::PushMemRecord(const void *ptr,
0
,
0
,
platform
::
errors
::
InvalidArgument
(
platform
::
errors
::
InvalidArgument
(
"The Place can't exist in the stage of PushMemRecord"
));
"The Place can't exist in the stage of PushMemRecord"
));
events
.
emplace
(
ptr
,
events
.
emplace
(
std
::
unique_ptr
<
RecordMemEvent
>
(
ptr
,
std
::
make_unique
<
MemEvenRecorder
::
RecordMemEvent
>
(
place
,
size
));
new
MemEvenRecorder
::
RecordMemEvent
(
place
,
size
)));
}
}
void
MemEvenRecorder
::
PushMemRecord
(
const
void
*
ptr
,
void
MemEvenRecorder
::
PushMemRecord
(
const
void
*
ptr
,
...
@@ -526,9 +525,8 @@ void MemEvenRecorder::PushMemRecord(const void *ptr,
...
@@ -526,9 +525,8 @@ void MemEvenRecorder::PushMemRecord(const void *ptr,
0
,
0
,
platform
::
errors
::
InvalidArgument
(
platform
::
errors
::
InvalidArgument
(
"The Place can't exist in the stage of PushMemRecord"
));
"The Place can't exist in the stage of PushMemRecord"
));
events
.
emplace
(
ptr
,
events
.
emplace
(
std
::
unique_ptr
<
RecordMemEvent
>
(
ptr
,
std
::
make_unique
<
MemEvenRecorder
::
RecordMemEvent
>
(
place
,
size
));
new
MemEvenRecorder
::
RecordMemEvent
(
place
,
size
)));
}
}
void
MemEvenRecorder
::
PopMemRecord
(
const
void
*
ptr
,
const
Place
&
place
)
{
void
MemEvenRecorder
::
PopMemRecord
(
const
void
*
ptr
,
const
Place
&
place
)
{
...
...
paddle/fluid/pybind/eager_functions.cc
浏览文件 @
9f04f2ac
...
@@ -513,8 +513,8 @@ static PyObject* eager_api_run_custom_op(PyObject* self,
...
@@ -513,8 +513,8 @@ static PyObject* eager_api_run_custom_op(PyObject* self,
EAGER_TRY
EAGER_TRY
FLAGS_tensor_operants_mode
=
"phi"
;
FLAGS_tensor_operants_mode
=
"phi"
;
if
(
paddle
::
OperantsManager
::
Instance
().
phi_operants
.
get
()
==
nullptr
)
{
if
(
paddle
::
OperantsManager
::
Instance
().
phi_operants
.
get
()
==
nullptr
)
{
paddle
::
OperantsManager
::
Instance
().
phi_operants
.
reset
(
paddle
::
OperantsManager
::
Instance
().
phi_operants
=
new
paddle
::
operants
::
PhiTensorOperants
()
);
std
::
make_unique
<
paddle
::
operants
::
PhiTensorOperants
>
(
);
VLOG
(
4
)
<<
"Initialize phi tensor operants successfully"
;
VLOG
(
4
)
<<
"Initialize phi tensor operants successfully"
;
}
}
...
...
paddle/fluid/pybind/inference_api.cc
浏览文件 @
9f04f2ac
...
@@ -499,8 +499,7 @@ void BindInferenceApi(py::module *m) {
...
@@ -499,8 +499,7 @@ void BindInferenceApi(py::module *m) {
m
->
def
(
"create_predictor"
,
m
->
def
(
"create_predictor"
,
[](
const
paddle_infer
::
Config
&
config
)
[](
const
paddle_infer
::
Config
&
config
)
->
std
::
unique_ptr
<
paddle_infer
::
Predictor
>
{
->
std
::
unique_ptr
<
paddle_infer
::
Predictor
>
{
auto
pred
=
std
::
unique_ptr
<
paddle_infer
::
Predictor
>
(
auto
pred
=
std
::
make_unique
<
paddle_infer
::
Predictor
>
(
config
);
new
paddle_infer
::
Predictor
(
config
));
return
pred
;
return
pred
;
});
});
m
->
def
(
m
->
def
(
...
...
paddle/fluid/pybind/pybind.cc
浏览文件 @
9f04f2ac
...
@@ -1999,12 +1999,12 @@ All parameter, weight, gradient are variables in Paddle.
...
@@ -1999,12 +1999,12 @@ All parameter, weight, gradient are variables in Paddle.
m
.
def
(
"init_default_kernel_signatures"
,
m
.
def
(
"init_default_kernel_signatures"
,
[]()
{
framework
::
InitDefaultKernelSignatureMap
();
});
[]()
{
framework
::
InitDefaultKernelSignatureMap
();
});
m
.
def
(
"init_tensor_operants"
,
[]()
{
m
.
def
(
"init_tensor_operants"
,
[]()
{
paddle
::
OperantsManager
::
Instance
().
eager_operants
.
reset
(
paddle
::
OperantsManager
::
Instance
().
eager_operants
=
new
paddle
::
prim
::
EagerTensorOperants
()
);
std
::
make_unique
<
paddle
::
prim
::
EagerTensorOperants
>
(
);
paddle
::
OperantsManager
::
Instance
().
static_operants
.
reset
(
paddle
::
OperantsManager
::
Instance
().
static_operants
=
new
paddle
::
prim
::
StaticTensorOperants
()
);
std
::
make_unique
<
paddle
::
prim
::
StaticTensorOperants
>
(
);
paddle
::
OperantsManager
::
Instance
().
phi_operants
.
reset
(
paddle
::
OperantsManager
::
Instance
().
phi_operants
=
new
paddle
::
operants
::
PhiTensorOperants
()
);
std
::
make_unique
<
paddle
::
operants
::
PhiTensorOperants
>
(
);
VLOG
(
4
)
<<
"Initialize tensor operants successfully"
;
VLOG
(
4
)
<<
"Initialize tensor operants successfully"
;
});
});
m
.
def
(
"is_compiled_with_avx"
,
IsCompiledWithAVX
);
m
.
def
(
"is_compiled_with_avx"
,
IsCompiledWithAVX
);
...
...
paddle/phi/backends/gpu/gpu_context.cc
浏览文件 @
9f04f2ac
...
@@ -333,7 +333,7 @@ struct GPUContext::Impl {
...
@@ -333,7 +333,7 @@ struct GPUContext::Impl {
void
InitEigenDevice
()
{
void
InitEigenDevice
()
{
PD_CHECK
(
allocator_
!=
nullptr
,
PD_CHECK
(
allocator_
!=
nullptr
,
"the allocator for eigen device is nullptr."
);
"the allocator for eigen device is nullptr."
);
eigen_stream_
.
reset
(
new
internal
::
EigenGpuStreamDevice
()
);
eigen_stream_
=
std
::
make_unique
<
internal
::
EigenGpuStreamDevice
>
(
);
eigen_stream_
->
Reinitialize
(
stream
(),
allocator_
,
place_
);
eigen_stream_
->
Reinitialize
(
stream
(),
allocator_
,
place_
);
eigen_device_
=
new
Eigen
::
GpuDevice
(
eigen_stream_
.
get
());
eigen_device_
=
new
Eigen
::
GpuDevice
(
eigen_stream_
.
get
());
}
}
...
...
paddle/phi/backends/gpu/gpu_context.h
浏览文件 @
9f04f2ac
...
@@ -38,7 +38,7 @@ class DnnWorkspaceHandle {
...
@@ -38,7 +38,7 @@ class DnnWorkspaceHandle {
public:
public:
inline
DnnWorkspaceHandle
(
Allocator
*
allocator
,
gpuStream_t
stream
)
inline
DnnWorkspaceHandle
(
Allocator
*
allocator
,
gpuStream_t
stream
)
:
allocator_
(
allocator
),
stream_
(
stream
)
{
:
allocator_
(
allocator
),
stream_
(
stream
)
{
mtx_
.
reset
(
new
std
::
mutex
()
);
mtx_
=
std
::
make_unique
<
std
::
mutex
>
(
);
}
}
inline
void
RunFunc
(
const
std
::
function
<
void
(
void
*
)
>&
cudnn_func
,
inline
void
RunFunc
(
const
std
::
function
<
void
(
void
*
)
>&
cudnn_func
,
...
...
test/cpp/fluid/benchmark/op_tester.cc
浏览文件 @
9f04f2ac
...
@@ -63,7 +63,7 @@ void OpTester::Init(const OpTesterConfig &config) {
...
@@ -63,7 +63,7 @@ void OpTester::Init(const OpTesterConfig &config) {
}
}
framework
::
InitDevices
();
framework
::
InitDevices
();
scope_
.
reset
(
new
paddle
::
framework
::
Scope
()
);
scope_
=
std
::
make_unique
<
paddle
::
framework
::
Scope
>
(
);
op_
=
framework
::
OpRegistry
::
CreateOp
(
op_desc_
);
op_
=
framework
::
OpRegistry
::
CreateOp
(
op_desc_
);
CreateVariables
(
scope_
.
get
());
CreateVariables
(
scope_
.
get
());
...
...
test/cpp/phi/core/test_selected_rows.cc
浏览文件 @
9f04f2ac
...
@@ -27,7 +27,7 @@ class SelectedRowsTester : public ::testing::Test {
...
@@ -27,7 +27,7 @@ class SelectedRowsTester : public ::testing::Test {
std
::
vector
<
int64_t
>
rows
{
0
,
4
,
7
};
std
::
vector
<
int64_t
>
rows
{
0
,
4
,
7
};
int64_t
height
=
10
;
int64_t
height
=
10
;
int64_t
row_numel
=
100
;
int64_t
row_numel
=
100
;
selected_rows_
.
reset
(
new
SelectedRows
(
rows
,
height
)
);
selected_rows_
=
std
::
make_unique
<
SelectedRows
>
(
rows
,
height
);
phi
::
DenseTensor
*
value
=
selected_rows_
->
mutable_value
();
phi
::
DenseTensor
*
value
=
selected_rows_
->
mutable_value
();
auto
*
data
=
value
->
mutable_data
<
float
>
(
auto
*
data
=
value
->
mutable_data
<
float
>
(
...
...
test/cpp/prim/init_env_utils.cc
浏览文件 @
9f04f2ac
...
@@ -21,10 +21,10 @@ namespace paddle {
...
@@ -21,10 +21,10 @@ namespace paddle {
namespace
prim
{
namespace
prim
{
void
InitTensorOperants
()
{
void
InitTensorOperants
()
{
paddle
::
OperantsManager
::
Instance
().
eager_operants
.
reset
(
paddle
::
OperantsManager
::
Instance
().
eager_operants
=
new
paddle
::
prim
::
EagerTensorOperants
()
);
std
::
make_unique
<
paddle
::
prim
::
EagerTensorOperants
>
(
);
paddle
::
OperantsManager
::
Instance
().
static_operants
.
reset
(
paddle
::
OperantsManager
::
Instance
().
static_operants
=
new
paddle
::
prim
::
StaticTensorOperants
()
);
std
::
make_unique
<
paddle
::
prim
::
StaticTensorOperants
>
(
);
}
}
}
// namespace prim
}
// namespace prim
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录