Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
Paddle
提交
dfd4a111
P
Paddle
项目概览
PaddlePaddle
/
Paddle
接近 2 年 前同步成功
通知
2323
Star
20933
Fork
5424
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1423
列表
看板
标记
里程碑
合并请求
543
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1,423
Issue
1,423
列表
看板
标记
里程碑
合并请求
543
合并请求
543
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
dfd4a111
编写于
11月 28, 2018
作者:
X
Xin Pan
提交者:
GitHub
11月 28, 2018
浏览文件
操作
浏览文件
下载
差异文件
Merge pull request #14592 from velconia/revert_vlog
Revert the changes of VLOG
上级
4ffc3764
9d7c3b18
变更
113
展开全部
显示空白变更内容
内联
并排
Showing
113 changed file
with
508 addition
and
525 deletion
+508
-525
paddle/fluid/framework/data_device_transform.cc
paddle/fluid/framework/data_device_transform.cc
+2
-2
paddle/fluid/framework/data_device_transform_test.cu
paddle/fluid/framework/data_device_transform_test.cu
+3
-3
paddle/fluid/framework/details/broadcast_op_handle.cc
paddle/fluid/framework/details/broadcast_op_handle.cc
+1
-1
paddle/fluid/framework/details/modify_op_lock_and_record_event_pass.cc
...framework/details/modify_op_lock_and_record_event_pass.cc
+2
-2
paddle/fluid/framework/details/multi_devices_graph_pass.cc
paddle/fluid/framework/details/multi_devices_graph_pass.cc
+6
-6
paddle/fluid/framework/details/reference_count_pass.cc
paddle/fluid/framework/details/reference_count_pass.cc
+2
-2
paddle/fluid/framework/details/scale_loss_grad_op_handle.cc
paddle/fluid/framework/details/scale_loss_grad_op_handle.cc
+1
-1
paddle/fluid/framework/details/sequential_execution_pass.cc
paddle/fluid/framework/details/sequential_execution_pass.cc
+2
-2
paddle/fluid/framework/details/threaded_ssa_graph_executor.cc
...le/fluid/framework/details/threaded_ssa_graph_executor.cc
+4
-4
paddle/fluid/framework/executor.cc
paddle/fluid/framework/executor.cc
+11
-11
paddle/fluid/framework/feed_fetch_method.cc
paddle/fluid/framework/feed_fetch_method.cc
+3
-3
paddle/fluid/framework/ir/attention_lstm_fuse_pass.cc
paddle/fluid/framework/ir/attention_lstm_fuse_pass.cc
+14
-14
paddle/fluid/framework/ir/conv_bias_mkldnn_fuse_pass.cc
paddle/fluid/framework/ir/conv_bias_mkldnn_fuse_pass.cc
+2
-2
paddle/fluid/framework/ir/conv_bn_fuse_pass.cc
paddle/fluid/framework/ir/conv_bn_fuse_pass.cc
+3
-3
paddle/fluid/framework/ir/conv_relu_mkldnn_fuse_pass.cc
paddle/fluid/framework/ir/conv_relu_mkldnn_fuse_pass.cc
+2
-2
paddle/fluid/framework/ir/depthwise_conv_mkldnn_pass.cc
paddle/fluid/framework/ir/depthwise_conv_mkldnn_pass.cc
+1
-1
paddle/fluid/framework/ir/fc_fuse_pass.cc
paddle/fluid/framework/ir/fc_fuse_pass.cc
+1
-1
paddle/fluid/framework/ir/fuse_elewise_add_act_pass.cc
paddle/fluid/framework/ir/fuse_elewise_add_act_pass.cc
+14
-14
paddle/fluid/framework/ir/graph.cc
paddle/fluid/framework/ir/graph.cc
+2
-2
paddle/fluid/framework/ir/graph.h
paddle/fluid/framework/ir/graph.h
+1
-1
paddle/fluid/framework/ir/graph_helper.cc
paddle/fluid/framework/ir/graph_helper.cc
+5
-6
paddle/fluid/framework/ir/graph_pattern_detector.cc
paddle/fluid/framework/ir/graph_pattern_detector.cc
+10
-11
paddle/fluid/framework/ir/graph_viz_pass.cc
paddle/fluid/framework/ir/graph_viz_pass.cc
+1
-1
paddle/fluid/framework/ir/mkldnn_placement_pass.cc
paddle/fluid/framework/ir/mkldnn_placement_pass.cc
+1
-1
paddle/fluid/framework/ir/multi_batch_merge_pass.cc
paddle/fluid/framework/ir/multi_batch_merge_pass.cc
+4
-4
paddle/fluid/framework/ir/pass.h
paddle/fluid/framework/ir/pass.h
+1
-1
paddle/fluid/framework/ir/seq_concat_fc_fuse_pass.cc
paddle/fluid/framework/ir/seq_concat_fc_fuse_pass.cc
+1
-1
paddle/fluid/framework/ir/seqconv_eltadd_relu_fuse_pass.cc
paddle/fluid/framework/ir/seqconv_eltadd_relu_fuse_pass.cc
+1
-1
paddle/fluid/framework/lod_rank_table.cc
paddle/fluid/framework/lod_rank_table.cc
+1
-1
paddle/fluid/framework/mixed_vector_test.cc
paddle/fluid/framework/mixed_vector_test.cc
+1
-1
paddle/fluid/framework/op_desc.cc
paddle/fluid/framework/op_desc.cc
+18
-18
paddle/fluid/framework/op_registry.cc
paddle/fluid/framework/op_registry.cc
+3
-3
paddle/fluid/framework/operator.cc
paddle/fluid/framework/operator.cc
+7
-8
paddle/fluid/framework/parallel_executor.cc
paddle/fluid/framework/parallel_executor.cc
+1
-1
paddle/fluid/framework/scope.cc
paddle/fluid/framework/scope.cc
+1
-1
paddle/fluid/framework/selected_rows.cc
paddle/fluid/framework/selected_rows.cc
+1
-1
paddle/fluid/framework/tensor_util.cc
paddle/fluid/framework/tensor_util.cc
+12
-12
paddle/fluid/framework/threadpool.cc
paddle/fluid/framework/threadpool.cc
+1
-1
paddle/fluid/framework/var_desc.cc
paddle/fluid/framework/var_desc.cc
+14
-14
paddle/fluid/inference/api/analysis_predictor.cc
paddle/fluid/inference/api/analysis_predictor.cc
+7
-7
paddle/fluid/inference/api/api_impl.cc
paddle/fluid/inference/api/api_impl.cc
+1
-1
paddle/fluid/inference/api/demo_ci/trt_mobilenet_demo.cc
paddle/fluid/inference/api/demo_ci/trt_mobilenet_demo.cc
+4
-4
paddle/fluid/inference/api/demo_ci/utils.h
paddle/fluid/inference/api/demo_ci/utils.h
+5
-5
paddle/fluid/inference/api/details/reset_tensor_array.cc
paddle/fluid/inference/api/details/reset_tensor_array.cc
+2
-2
paddle/fluid/inference/io.cc
paddle/fluid/inference/io.cc
+2
-2
paddle/fluid/inference/tensorrt/convert/pool2d_op.cc
paddle/fluid/inference/tensorrt/convert/pool2d_op.cc
+1
-1
paddle/fluid/inference/tests/api/analyzer_vis_tester.cc
paddle/fluid/inference/tests/api/analyzer_vis_tester.cc
+3
-3
paddle/fluid/memory/allocation/allocator_facade.cc
paddle/fluid/memory/allocation/allocator_facade.cc
+4
-4
paddle/fluid/memory/allocation/legacy_allocator.cc
paddle/fluid/memory/allocation/legacy_allocator.cc
+9
-9
paddle/fluid/memory/detail/buddy_allocator.cc
paddle/fluid/memory/detail/buddy_allocator.cc
+28
-28
paddle/fluid/memory/detail/meta_cache.cc
paddle/fluid/memory/detail/meta_cache.cc
+1
-1
paddle/fluid/operators/activation_op.h
paddle/fluid/operators/activation_op.h
+1
-1
paddle/fluid/operators/array_operator.h
paddle/fluid/operators/array_operator.h
+1
-1
paddle/fluid/operators/array_to_lod_tensor_op.cc
paddle/fluid/operators/array_to_lod_tensor_op.cc
+2
-2
paddle/fluid/operators/batch_norm_op.cu.cc
paddle/fluid/operators/batch_norm_op.cu.cc
+1
-1
paddle/fluid/operators/beam_search_op.cc
paddle/fluid/operators/beam_search_op.cc
+6
-6
paddle/fluid/operators/concat_op.cc
paddle/fluid/operators/concat_op.cc
+1
-1
paddle/fluid/operators/controlflow/feed_op.cc
paddle/fluid/operators/controlflow/feed_op.cc
+2
-2
paddle/fluid/operators/controlflow/fetch_op.cc
paddle/fluid/operators/controlflow/fetch_op.cc
+1
-1
paddle/fluid/operators/controlflow/parallel_do_op.cc
paddle/fluid/operators/controlflow/parallel_do_op.cc
+5
-5
paddle/fluid/operators/controlflow/tensor_array_read_write_op.cc
...fluid/operators/controlflow/tensor_array_read_write_op.cc
+7
-7
paddle/fluid/operators/controlflow/while_op.cc
paddle/fluid/operators/controlflow/while_op.cc
+9
-9
paddle/fluid/operators/conv_cudnn_op.cu.cc
paddle/fluid/operators/conv_cudnn_op.cu.cc
+2
-2
paddle/fluid/operators/distributed/brpc_server.cc
paddle/fluid/operators/distributed/brpc_server.cc
+2
-2
paddle/fluid/operators/distributed/grpc_client.cc
paddle/fluid/operators/distributed/grpc_client.cc
+7
-7
paddle/fluid/operators/distributed/grpc_server.cc
paddle/fluid/operators/distributed/grpc_server.cc
+22
-23
paddle/fluid/operators/distributed/request_handler.h
paddle/fluid/operators/distributed/request_handler.h
+2
-2
paddle/fluid/operators/distributed/request_handler_impl.cc
paddle/fluid/operators/distributed/request_handler_impl.cc
+12
-13
paddle/fluid/operators/distributed/rpc_server.cc
paddle/fluid/operators/distributed/rpc_server.cc
+10
-10
paddle/fluid/operators/distributed/variable_response.cc
paddle/fluid/operators/distributed/variable_response.cc
+4
-4
paddle/fluid/operators/distributed_ops/checkpoint_notify_op.cc
...e/fluid/operators/distributed_ops/checkpoint_notify_op.cc
+2
-2
paddle/fluid/operators/distributed_ops/fetch_barrier_op.cc
paddle/fluid/operators/distributed_ops/fetch_barrier_op.cc
+1
-1
paddle/fluid/operators/distributed_ops/gen_nccl_id_op.cc
paddle/fluid/operators/distributed_ops/gen_nccl_id_op.cc
+5
-5
paddle/fluid/operators/distributed_ops/listen_and_serv_op.cc
paddle/fluid/operators/distributed_ops/listen_and_serv_op.cc
+17
-17
paddle/fluid/operators/distributed_ops/prefetch_op.cc
paddle/fluid/operators/distributed_ops/prefetch_op.cc
+3
-3
paddle/fluid/operators/distributed_ops/recv_op.cc
paddle/fluid/operators/distributed_ops/recv_op.cc
+1
-1
paddle/fluid/operators/distributed_ops/send_barrier_op.cc
paddle/fluid/operators/distributed_ops/send_barrier_op.cc
+2
-2
paddle/fluid/operators/distributed_ops/send_op.cc
paddle/fluid/operators/distributed_ops/send_op.cc
+2
-2
paddle/fluid/operators/distributed_ops/send_recv_op_test.cc
paddle/fluid/operators/distributed_ops/send_recv_op_test.cc
+2
-2
paddle/fluid/operators/distributed_ops/split_byref_op.h
paddle/fluid/operators/distributed_ops/split_byref_op.h
+1
-1
paddle/fluid/operators/distributed_ops/split_ids_op.h
paddle/fluid/operators/distributed_ops/split_ids_op.h
+1
-1
paddle/fluid/operators/lod_rank_table_op.cc
paddle/fluid/operators/lod_rank_table_op.cc
+2
-2
paddle/fluid/operators/lookup_table_op.cc
paddle/fluid/operators/lookup_table_op.cc
+4
-4
paddle/fluid/operators/math/cpu_vec_test.cc
paddle/fluid/operators/math/cpu_vec_test.cc
+2
-2
paddle/fluid/operators/math/jit_kernel_test.cc
paddle/fluid/operators/math/jit_kernel_test.cc
+40
-51
paddle/fluid/operators/math/selected_rows_functor.cc
paddle/fluid/operators/math/selected_rows_functor.cc
+2
-2
paddle/fluid/operators/math/selected_rows_functor.cu
paddle/fluid/operators/math/selected_rows_functor.cu
+2
-2
paddle/fluid/operators/mul_op.cc
paddle/fluid/operators/mul_op.cc
+3
-3
paddle/fluid/operators/nccl/nccl_op.cu.cc
paddle/fluid/operators/nccl/nccl_op.cu.cc
+15
-16
paddle/fluid/operators/nccl/nccl_op_test.cu.cc
paddle/fluid/operators/nccl/nccl_op_test.cu.cc
+7
-7
paddle/fluid/operators/nce_op.cc
paddle/fluid/operators/nce_op.cc
+7
-7
paddle/fluid/operators/optimizers/adam_op.h
paddle/fluid/operators/optimizers/adam_op.h
+1
-1
paddle/fluid/operators/optimizers/momentum_op.h
paddle/fluid/operators/optimizers/momentum_op.h
+1
-1
paddle/fluid/operators/optimizers/sgd_op.h
paddle/fluid/operators/optimizers/sgd_op.h
+4
-4
paddle/fluid/operators/random_crop_op.h
paddle/fluid/operators/random_crop_op.h
+2
-2
paddle/fluid/operators/reader/blocking_queue.h
paddle/fluid/operators/reader/blocking_queue.h
+2
-2
paddle/fluid/operators/reader/create_shuffle_reader_op.cc
paddle/fluid/operators/reader/create_shuffle_reader_op.cc
+3
-3
paddle/fluid/operators/recurrent_op.cc
paddle/fluid/operators/recurrent_op.cc
+13
-13
paddle/fluid/operators/rnn_memory_helper_op.cc
paddle/fluid/operators/rnn_memory_helper_op.cc
+1
-1
paddle/fluid/operators/save_op.cc
paddle/fluid/operators/save_op.cc
+1
-1
paddle/fluid/operators/sequence_ops/sequence_mask_op.h
paddle/fluid/operators/sequence_ops/sequence_mask_op.h
+1
-1
paddle/fluid/operators/sum_mkldnn_op.cc
paddle/fluid/operators/sum_mkldnn_op.cc
+1
-1
paddle/fluid/operators/sum_op.cc
paddle/fluid/operators/sum_op.cc
+3
-3
paddle/fluid/operators/tensorrt/tensorrt_engine_op.h
paddle/fluid/operators/tensorrt/tensorrt_engine_op.h
+6
-6
paddle/fluid/platform/device_tracer.cc
paddle/fluid/platform/device_tracer.cc
+4
-4
paddle/fluid/platform/dynload/dynamic_loader.cc
paddle/fluid/platform/dynload/dynamic_loader.cc
+2
-2
paddle/fluid/platform/gpu_info.cc
paddle/fluid/platform/gpu_info.cc
+2
-2
paddle/fluid/platform/init.cc
paddle/fluid/platform/init.cc
+1
-1
paddle/fluid/platform/nccl_helper.h
paddle/fluid/platform/nccl_helper.h
+1
-1
paddle/fluid/pybind/protobuf.cc
paddle/fluid/pybind/protobuf.cc
+3
-3
paddle/fluid/train/demo/demo_trainer.cc
paddle/fluid/train/demo/demo_trainer.cc
+1
-1
paddle/legacy/pserver/ParameterClient2.cpp
paddle/legacy/pserver/ParameterClient2.cpp
+8
-8
paddle/testing/TestUtil.cpp
paddle/testing/TestUtil.cpp
+1
-1
未找到文件。
paddle/fluid/framework/data_device_transform.cc
浏览文件 @
dfd4a111
...
@@ -18,7 +18,7 @@ namespace framework {
...
@@ -18,7 +18,7 @@ namespace framework {
void
TransDataDevice
(
const
Tensor
&
in
,
const
platform
::
Place
&
dst_place
,
void
TransDataDevice
(
const
Tensor
&
in
,
const
platform
::
Place
&
dst_place
,
Tensor
*
out
)
{
Tensor
*
out
)
{
VLOG
(
3
0
)
<<
"DeviceTransform in, src_place "
<<
in
.
place
()
VLOG
(
3
)
<<
"DeviceTransform in, src_place "
<<
in
.
place
()
<<
" dst_place: "
<<
dst_place
;
<<
" dst_place: "
<<
dst_place
;
PADDLE_ENFORCE_NE
(
PADDLE_ENFORCE_NE
(
...
...
paddle/fluid/framework/data_device_transform_test.cu
浏览文件 @
dfd4a111
...
@@ -49,10 +49,10 @@ class TestOpWithKernel : public OperatorWithKernel {
...
@@ -49,10 +49,10 @@ class TestOpWithKernel : public OperatorWithKernel {
OpKernelType
GetExpectedKernelType
(
OpKernelType
GetExpectedKernelType
(
const
ExecutionContext
&
ctx
)
const
override
{
const
ExecutionContext
&
ctx
)
const
override
{
if
(
Attr
<
bool
>
(
"use_gpu"
))
{
if
(
Attr
<
bool
>
(
"use_gpu"
))
{
VLOG
(
3
0
)
<<
"force use gpu kernel"
;
VLOG
(
3
)
<<
"force use gpu kernel"
;
return
OpKernelType
(
proto
::
VarType
::
FP32
,
platform
::
CUDAPlace
(
0
));
return
OpKernelType
(
proto
::
VarType
::
FP32
,
platform
::
CUDAPlace
(
0
));
}
else
{
}
else
{
VLOG
(
3
0
)
<<
"use default kernel"
;
VLOG
(
3
)
<<
"use default kernel"
;
return
OpKernelType
(
proto
::
VarType
::
FP32
,
return
OpKernelType
(
proto
::
VarType
::
FP32
,
ctx
.
Input
<
Tensor
>
(
"input"
)
->
place
());
ctx
.
Input
<
Tensor
>
(
"input"
)
->
place
());
}
}
...
@@ -148,7 +148,7 @@ TEST(Operator, CPUtoGPU) {
...
@@ -148,7 +148,7 @@ TEST(Operator, CPUtoGPU) {
// get output
// get output
auto
*
output2
=
scope
.
Var
(
"OUT2"
);
auto
*
output2
=
scope
.
Var
(
"OUT2"
);
gpu_op
->
Run
(
scope
,
cuda_place
);
gpu_op
->
Run
(
scope
,
cuda_place
);
VLOG
(
3
0
)
<<
"after gpu_op run"
;
VLOG
(
3
)
<<
"after gpu_op run"
;
// auto* output2_ptr = output2->Get<LoDTensor>().data<float>();
// auto* output2_ptr = output2->Get<LoDTensor>().data<float>();
paddle
::
platform
::
DeviceContextPool
&
pool
=
paddle
::
platform
::
DeviceContextPool
&
pool
=
...
...
paddle/fluid/framework/details/broadcast_op_handle.cc
浏览文件 @
dfd4a111
...
@@ -60,7 +60,7 @@ void BroadcastOpHandle::BroadcastOneVar(
...
@@ -60,7 +60,7 @@ void BroadcastOpHandle::BroadcastOneVar(
PADDLE_ENFORCE_NOT_NULL
(
in_var
);
PADDLE_ENFORCE_NOT_NULL
(
in_var
);
Tensor
&
in_tensor
=
VariableVisitor
::
GetMutableTensor
(
in_var
);
Tensor
&
in_tensor
=
VariableVisitor
::
GetMutableTensor
(
in_var
);
if
(
UNLIKELY
(
!
in_tensor
.
IsInitialized
()))
{
if
(
UNLIKELY
(
!
in_tensor
.
IsInitialized
()))
{
VLOG
(
3
0
)
<<
"in var "
<<
in_var_handle
.
name_
<<
"not inited, return!"
;
VLOG
(
3
)
<<
"in var "
<<
in_var_handle
.
name_
<<
"not inited, return!"
;
return
;
return
;
}
}
...
...
paddle/fluid/framework/details/modify_op_lock_and_record_event_pass.cc
浏览文件 @
dfd4a111
...
@@ -45,7 +45,7 @@ std::unique_ptr<ir::Graph> ModifyOpLockAndRecordEventPass::ApplyImpl(
...
@@ -45,7 +45,7 @@ std::unique_ptr<ir::Graph> ModifyOpLockAndRecordEventPass::ApplyImpl(
IsLockAndRecordEventFreeComputationOpHandle
(
compute_op
,
graph_view
);
IsLockAndRecordEventFreeComputationOpHandle
(
compute_op
,
graph_view
);
compute_op
->
SetLockAndRecordEventFree
(
is_lock_and_record_event_free
);
compute_op
->
SetLockAndRecordEventFree
(
is_lock_and_record_event_free
);
if
(
is_lock_and_record_event_free
)
{
if
(
is_lock_and_record_event_free
)
{
VLOG
(
10
0
)
<<
"Set is_lock_and_record_event_free be true in op "
VLOG
(
10
)
<<
"Set is_lock_and_record_event_free be true in op "
<<
compute_op
->
DebugString
();
<<
compute_op
->
DebugString
();
}
}
}
}
...
...
paddle/fluid/framework/details/multi_devices_graph_pass.cc
浏览文件 @
dfd4a111
...
@@ -399,7 +399,7 @@ std::unique_ptr<ir::Graph> MultiDevSSAGraphBuilder::ApplyImpl(
...
@@ -399,7 +399,7 @@ std::unique_ptr<ir::Graph> MultiDevSSAGraphBuilder::ApplyImpl(
for
(
size_t
i
=
0
;
i
<
backward_vars
.
size
();
i
+=
2
)
{
for
(
size_t
i
=
0
;
i
<
backward_vars
.
size
();
i
+=
2
)
{
auto
&
p_name
=
backward_vars
[
i
];
auto
&
p_name
=
backward_vars
[
i
];
auto
&
g_name
=
backward_vars
[
i
+
1
];
auto
&
g_name
=
backward_vars
[
i
+
1
];
VLOG
(
10
0
)
<<
"Bcast "
<<
g_name
<<
" for parameter "
<<
p_name
;
VLOG
(
10
)
<<
"Bcast "
<<
g_name
<<
" for parameter "
<<
p_name
;
switch
(
strategy_
.
reduce_
)
{
switch
(
strategy_
.
reduce_
)
{
case
BuildStrategy
::
ReduceStrategy
::
kReduce
:
case
BuildStrategy
::
ReduceStrategy
::
kReduce
:
...
@@ -809,7 +809,7 @@ int MultiDevSSAGraphBuilder::CreateRPCOp(
...
@@ -809,7 +809,7 @@ int MultiDevSSAGraphBuilder::CreateRPCOp(
node
->
Op
()
->
GetAttr
(
OpProtoAndCheckerMaker
::
OpRoleVarAttrName
()));
node
->
Op
()
->
GetAttr
(
OpProtoAndCheckerMaker
::
OpRoleVarAttrName
()));
PADDLE_ENFORCE_EQ
(
send_param_grad
.
size
(),
2U
);
PADDLE_ENFORCE_EQ
(
send_param_grad
.
size
(),
2U
);
op_dev_id
=
GetAppropriateDeviceID
({
send_param_grad
[
1
]});
op_dev_id
=
GetAppropriateDeviceID
({
send_param_grad
[
1
]});
VLOG
(
10
0
)
<<
"send grad "
<<
input_var_names
[
0
]
<<
" origin "
VLOG
(
10
)
<<
"send grad "
<<
input_var_names
[
0
]
<<
" origin "
<<
send_param_grad
[
1
]
<<
" place: "
<<
op_dev_id
;
<<
send_param_grad
[
1
]
<<
" place: "
<<
op_dev_id
;
for
(
auto
&
varname
:
input_var_names
)
{
for
(
auto
&
varname
:
input_var_names
)
{
sharded_var_device
->
emplace
(
varname
,
op_dev_id
);
sharded_var_device
->
emplace
(
varname
,
op_dev_id
);
...
@@ -826,7 +826,7 @@ int MultiDevSSAGraphBuilder::CreateRPCOp(
...
@@ -826,7 +826,7 @@ int MultiDevSSAGraphBuilder::CreateRPCOp(
if
(
recv_param_grad
.
size
()
==
2U
)
{
if
(
recv_param_grad
.
size
()
==
2U
)
{
op_dev_id
=
op_dev_id
=
GetVarDeviceID
(
*
result
,
recv_param_grad
[
1
],
*
sharded_var_device
);
GetVarDeviceID
(
*
result
,
recv_param_grad
[
1
],
*
sharded_var_device
);
VLOG
(
10
0
)
<<
"recv param "
<<
recv_param_grad
[
0
]
VLOG
(
10
)
<<
"recv param "
<<
recv_param_grad
[
0
]
<<
" get grad place: "
<<
recv_param_grad
[
1
]
<<
" get grad place: "
<<
recv_param_grad
[
1
]
<<
" place: "
<<
op_dev_id
;
<<
" place: "
<<
op_dev_id
;
}
else
{
}
else
{
...
...
paddle/fluid/framework/details/reference_count_pass.cc
浏览文件 @
dfd4a111
...
@@ -140,7 +140,7 @@ std::unique_ptr<ir::Graph> ReferenceCountPass::ApplyImpl(
...
@@ -140,7 +140,7 @@ std::unique_ptr<ir::Graph> ReferenceCountPass::ApplyImpl(
if
(
next_compute_op
!=
nullptr
)
{
if
(
next_compute_op
!=
nullptr
)
{
if
(
compute_ref_cnt_map
.
count
(
next_compute_op
))
{
if
(
compute_ref_cnt_map
.
count
(
next_compute_op
))
{
compute_ref_cnt_map
[
next_compute_op
]
->
AddVar
(
var_name
);
compute_ref_cnt_map
[
next_compute_op
]
->
AddVar
(
var_name
);
VLOG
(
5
0
)
<<
"Add reference count of "
<<
var_name
<<
" to Operator "
VLOG
(
5
)
<<
"Add reference count of "
<<
var_name
<<
" to Operator "
<<
next_compute_op
->
Name
();
<<
next_compute_op
->
Name
();
}
else
{
}
else
{
// Create new reference_count_op_handle
// Create new reference_count_op_handle
...
...
paddle/fluid/framework/details/scale_loss_grad_op_handle.cc
浏览文件 @
dfd4a111
...
@@ -51,7 +51,7 @@ void ScaleLossGradOpHandle::RunImpl() {
...
@@ -51,7 +51,7 @@ void ScaleLossGradOpHandle::RunImpl() {
->
stream
();
->
stream
();
memory
::
Copy
(
boost
::
get
<
platform
::
CUDAPlace
>
(
place_
),
tmp
,
memory
::
Copy
(
boost
::
get
<
platform
::
CUDAPlace
>
(
place_
),
tmp
,
platform
::
CPUPlace
(),
&
coeff_
,
sizeof
(
float
),
stream
);
platform
::
CPUPlace
(),
&
coeff_
,
sizeof
(
float
),
stream
);
VLOG
(
10
0
)
<<
place_
<<
"RUN Scale loss grad op"
;
VLOG
(
10
)
<<
place_
<<
"RUN Scale loss grad op"
;
});
});
#endif
#endif
}
}
...
...
paddle/fluid/framework/details/sequential_execution_pass.cc
浏览文件 @
dfd4a111
...
@@ -94,7 +94,7 @@ std::unique_ptr<ir::Graph> SequentialExecutionPass::ApplyImpl(
...
@@ -94,7 +94,7 @@ std::unique_ptr<ir::Graph> SequentialExecutionPass::ApplyImpl(
op_node_list
[
i
-
1
]
->
outputs
.
push_back
(
dep_var
);
op_node_list
[
i
-
1
]
->
outputs
.
push_back
(
dep_var
);
dep_var
->
outputs
.
push_back
(
op_node_list
[
i
]);
dep_var
->
outputs
.
push_back
(
op_node_list
[
i
]);
dep_var
->
inputs
.
push_back
(
op_node_list
[
i
-
1
]);
dep_var
->
inputs
.
push_back
(
op_node_list
[
i
-
1
]);
VLOG
(
10
0
)
<<
"Add dependencies between "
<<
op_node_list
[
i
-
1
]
->
Name
()
VLOG
(
10
)
<<
"Add dependencies between "
<<
op_node_list
[
i
-
1
]
->
Name
()
<<
" and "
<<
op_node_list
[
i
]
->
Name
();
<<
" and "
<<
op_node_list
[
i
]
->
Name
();
}
}
return
graph
;
return
graph
;
...
...
paddle/fluid/framework/details/threaded_ssa_graph_executor.cc
浏览文件 @
dfd4a111
...
@@ -210,16 +210,16 @@ void ThreadedSSAGraphExecutor::RunOp(
...
@@ -210,16 +210,16 @@ void ThreadedSSAGraphExecutor::RunOp(
details
::
OpHandleBase
*
op
)
{
details
::
OpHandleBase
*
op
)
{
auto
op_run
=
[
ready_var_q
,
op
,
this
]
{
auto
op_run
=
[
ready_var_q
,
op
,
this
]
{
try
{
try
{
if
(
VLOG_IS_ON
(
10
0
))
{
if
(
VLOG_IS_ON
(
10
))
{
VLOG
(
10
0
)
<<
op
<<
" "
<<
op
->
Name
()
<<
" : "
<<
op
->
DebugString
();
VLOG
(
10
)
<<
op
<<
" "
<<
op
->
Name
()
<<
" : "
<<
op
->
DebugString
();
}
}
if
(
LIKELY
(
!
strategy_
.
dry_run_
))
{
if
(
LIKELY
(
!
strategy_
.
dry_run_
))
{
op
->
Run
(
strategy_
.
use_cuda_
);
op
->
Run
(
strategy_
.
use_cuda_
);
}
}
VLOG
(
10
0
)
<<
op
<<
" "
<<
op
->
Name
()
<<
" Done "
;
VLOG
(
10
)
<<
op
<<
" "
<<
op
->
Name
()
<<
" Done "
;
running_ops_
--
;
running_ops_
--
;
ready_var_q
->
Extend
(
op
->
Outputs
());
ready_var_q
->
Extend
(
op
->
Outputs
());
VLOG
(
10
0
)
<<
op
<<
" "
<<
op
->
Name
()
<<
"Signal posted"
;
VLOG
(
10
)
<<
op
<<
" "
<<
op
->
Name
()
<<
"Signal posted"
;
}
catch
(...)
{
}
catch
(...)
{
exception_holder_
.
Catch
(
std
::
current_exception
());
exception_holder_
.
Catch
(
std
::
current_exception
());
}
}
...
...
paddle/fluid/framework/executor.cc
浏览文件 @
dfd4a111
...
@@ -46,7 +46,7 @@ ExecutorPrepareContext::ExecutorPrepareContext(
...
@@ -46,7 +46,7 @@ ExecutorPrepareContext::ExecutorPrepareContext(
}
}
ExecutorPrepareContext
::~
ExecutorPrepareContext
()
{
ExecutorPrepareContext
::~
ExecutorPrepareContext
()
{
VLOG
(
5
0
)
<<
"destroy ExecutorPrepareContext"
;
VLOG
(
5
)
<<
"destroy ExecutorPrepareContext"
;
}
}
template
<
typename
RefCntMap
>
template
<
typename
RefCntMap
>
...
@@ -63,7 +63,7 @@ static void DeleteUnusedTensors(const Scope& scope, const OperatorBase* op,
...
@@ -63,7 +63,7 @@ static void DeleteUnusedTensors(const Scope& scope, const OperatorBase* op,
if
((
it
->
second
)
--
==
1
)
{
if
((
it
->
second
)
--
==
1
)
{
auto
*
var
=
scope
.
FindVar
(
name
);
auto
*
var
=
scope
.
FindVar
(
name
);
if
(
var
!=
nullptr
)
{
if
(
var
!=
nullptr
)
{
VLOG
(
10
0
)
<<
"Erase tensor
\'
"
<<
name
<<
"
\'
"
;
VLOG
(
10
)
<<
"Erase tensor
\'
"
<<
name
<<
"
\'
"
;
if
(
var
->
IsType
<
LoDTensor
>
())
{
if
(
var
->
IsType
<
LoDTensor
>
())
{
erase_tensors
.
insert
(
var
->
GetMutable
<
LoDTensor
>
());
erase_tensors
.
insert
(
var
->
GetMutable
<
LoDTensor
>
());
}
else
if
(
var
->
IsType
<
SelectedRows
>
())
{
}
else
if
(
var
->
IsType
<
SelectedRows
>
())
{
...
@@ -162,12 +162,12 @@ void Executor::CreateVariables(const ProgramDesc& pdesc, Scope* scope,
...
@@ -162,12 +162,12 @@ void Executor::CreateVariables(const ProgramDesc& pdesc, Scope* scope,
if
(
var
->
Persistable
())
{
if
(
var
->
Persistable
())
{
auto
*
ptr
=
const_cast
<
Scope
*>
(
ancestor_scope
)
->
Var
(
var
->
Name
());
auto
*
ptr
=
const_cast
<
Scope
*>
(
ancestor_scope
)
->
Var
(
var
->
Name
());
InitializeVariable
(
ptr
,
var
->
GetType
());
InitializeVariable
(
ptr
,
var
->
GetType
());
VLOG
(
3
0
)
<<
"Create Variable "
<<
var
->
Name
()
VLOG
(
3
)
<<
"Create Variable "
<<
var
->
Name
()
<<
" global, which pointer is "
<<
ptr
;
<<
" global, which pointer is "
<<
ptr
;
}
else
{
}
else
{
auto
*
ptr
=
scope
->
Var
(
var
->
Name
());
auto
*
ptr
=
scope
->
Var
(
var
->
Name
());
InitializeVariable
(
ptr
,
var
->
GetType
());
InitializeVariable
(
ptr
,
var
->
GetType
());
VLOG
(
3
0
)
<<
"Create Variable "
<<
var
->
Name
()
VLOG
(
3
)
<<
"Create Variable "
<<
var
->
Name
()
<<
" locally, which pointer is "
<<
ptr
;
<<
" locally, which pointer is "
<<
ptr
;
}
}
}
}
...
@@ -175,7 +175,7 @@ void Executor::CreateVariables(const ProgramDesc& pdesc, Scope* scope,
...
@@ -175,7 +175,7 @@ void Executor::CreateVariables(const ProgramDesc& pdesc, Scope* scope,
for
(
auto
&
var
:
global_block
.
AllVars
())
{
for
(
auto
&
var
:
global_block
.
AllVars
())
{
auto
*
ptr
=
scope
->
Var
(
var
->
Name
());
auto
*
ptr
=
scope
->
Var
(
var
->
Name
());
InitializeVariable
(
ptr
,
var
->
GetType
());
InitializeVariable
(
ptr
,
var
->
GetType
());
VLOG
(
3
0
)
<<
"Create variable "
<<
var
->
Name
()
<<
", which pointer is "
VLOG
(
3
)
<<
"Create variable "
<<
var
->
Name
()
<<
", which pointer is "
<<
ptr
;
<<
ptr
;
}
}
}
}
...
@@ -307,7 +307,7 @@ void Executor::Run(const ProgramDesc& program, Scope* scope,
...
@@ -307,7 +307,7 @@ void Executor::Run(const ProgramDesc& program, Scope* scope,
int
i
=
0
;
int
i
=
0
;
for
(
auto
&
feed_target
:
(
*
feed_targets
))
{
for
(
auto
&
feed_target
:
(
*
feed_targets
))
{
std
::
string
var_name
=
feed_target
.
first
;
std
::
string
var_name
=
feed_target
.
first
;
VLOG
(
3
0
)
<<
"feed target's name: "
<<
var_name
;
VLOG
(
3
)
<<
"feed target's name: "
<<
var_name
;
// prepend feed op
// prepend feed op
auto
*
op
=
global_block
->
PrependOp
();
auto
*
op
=
global_block
->
PrependOp
();
...
@@ -330,7 +330,7 @@ void Executor::Run(const ProgramDesc& program, Scope* scope,
...
@@ -330,7 +330,7 @@ void Executor::Run(const ProgramDesc& program, Scope* scope,
int
i
=
0
;
int
i
=
0
;
for
(
auto
&
fetch_target
:
(
*
fetch_targets
))
{
for
(
auto
&
fetch_target
:
(
*
fetch_targets
))
{
std
::
string
var_name
=
fetch_target
.
first
;
std
::
string
var_name
=
fetch_target
.
first
;
VLOG
(
3
0
)
<<
"fetch target's name: "
<<
var_name
;
VLOG
(
3
)
<<
"fetch target's name: "
<<
var_name
;
// append fetch op
// append fetch op
auto
*
op
=
global_block
->
AppendOp
();
auto
*
op
=
global_block
->
AppendOp
();
...
@@ -482,7 +482,7 @@ void Executor::RunPreparedContext(
...
@@ -482,7 +482,7 @@ void Executor::RunPreparedContext(
void
Executor
::
EnableMKLDNN
(
const
ProgramDesc
&
program
)
{
void
Executor
::
EnableMKLDNN
(
const
ProgramDesc
&
program
)
{
#ifdef PADDLE_WITH_MKLDNN
#ifdef PADDLE_WITH_MKLDNN
VLOG
(
3
0
)
<<
"use_mkldnn=True"
;
VLOG
(
3
)
<<
"use_mkldnn=True"
;
for
(
size_t
bid
=
0
;
bid
<
program
.
Size
();
++
bid
)
{
for
(
size_t
bid
=
0
;
bid
<
program
.
Size
();
++
bid
)
{
auto
*
block
=
const_cast
<
ProgramDesc
&>
(
program
).
MutableBlock
(
bid
);
auto
*
block
=
const_cast
<
ProgramDesc
&>
(
program
).
MutableBlock
(
bid
);
for
(
auto
*
op
:
block
->
AllOps
())
{
for
(
auto
*
op
:
block
->
AllOps
())
{
...
...
paddle/fluid/framework/feed_fetch_method.cc
浏览文件 @
dfd4a111
...
@@ -25,7 +25,7 @@ void SetFeedVariable(Scope* scope, const LoDTensor& input,
...
@@ -25,7 +25,7 @@ void SetFeedVariable(Scope* scope, const LoDTensor& input,
const
std
::
string
&
var_name
,
size_t
index
)
{
const
std
::
string
&
var_name
,
size_t
index
)
{
// If var_name Variable is not found in GlobalScope, a new variable will
// If var_name Variable is not found in GlobalScope, a new variable will
// be created.
// be created.
VLOG
(
3
0
)
<<
"SetFeedVariable name="
<<
var_name
<<
" index="
<<
index
;
VLOG
(
3
)
<<
"SetFeedVariable name="
<<
var_name
<<
" index="
<<
index
;
Variable
*
g_feed_value
=
scope
->
Var
(
var_name
);
Variable
*
g_feed_value
=
scope
->
Var
(
var_name
);
auto
&
feed_inputs
=
*
(
g_feed_value
->
GetMutable
<
FeedFetchList
>
());
auto
&
feed_inputs
=
*
(
g_feed_value
->
GetMutable
<
FeedFetchList
>
());
if
(
index
>=
feed_inputs
.
size
())
{
if
(
index
>=
feed_inputs
.
size
())
{
...
@@ -47,7 +47,7 @@ LoDTensor& GetFetchVariable(const Scope& scope, const std::string& var_name,
...
@@ -47,7 +47,7 @@ LoDTensor& GetFetchVariable(const Scope& scope, const std::string& var_name,
typeid
(
FeedFetchList
).
name
());
typeid
(
FeedFetchList
).
name
());
auto
&
fetch_outputs
=
*
g_fetch_value
->
GetMutable
<
FeedFetchList
>
();
auto
&
fetch_outputs
=
*
g_fetch_value
->
GetMutable
<
FeedFetchList
>
();
auto
&
tensor
=
fetch_outputs
[
index
];
auto
&
tensor
=
fetch_outputs
[
index
];
VLOG
(
3
0
)
<<
"Fetch "
<<
var_name
<<
" with index "
<<
index
VLOG
(
3
)
<<
"Fetch "
<<
var_name
<<
" with index "
<<
index
<<
" shape= "
<<
tensor
.
dims
();
<<
" shape= "
<<
tensor
.
dims
();
PADDLE_ENFORCE_LT
(
index
,
fetch_outputs
.
size
());
PADDLE_ENFORCE_LT
(
index
,
fetch_outputs
.
size
());
return
tensor
;
return
tensor
;
...
...
paddle/fluid/framework/ir/attention_lstm_fuse_pass.cc
浏览文件 @
dfd4a111
...
@@ -152,11 +152,11 @@ void PrepareParameters(Graph* graph, const Param& param) {
...
@@ -152,11 +152,11 @@ void PrepareParameters(Graph* graph, const Param& param) {
auto* W_##name__##_w1 = scope->FindVar(#name__ ".w_1"); \
auto* W_##name__##_w1 = scope->FindVar(#name__ ".w_1"); \
auto* W_##name__##_b0 = scope->FindVar(#name__ ".b_0"); \
auto* W_##name__##_b0 = scope->FindVar(#name__ ".b_0"); \
CHECK_P3(W_##name__##_w0, W_##name__##_w1, W_##name__##_b0); \
CHECK_P3(W_##name__##_w0, W_##name__##_w1, W_##name__##_b0); \
VLOG(4
0
) << #name__ "_w0" \
VLOG(4) << #name__ "_w0" \
<< " shape: " << W_##name__##_w0->Get<LoDTensor>().dims(); \
<< " shape: " << W_##name__##_w0->Get<LoDTensor>().dims(); \
VLOG(4
0
) << #name__ "_w1" \
VLOG(4) << #name__ "_w1" \
<< " shape: " << W_##name__##_w1->Get<LoDTensor>().dims(); \
<< " shape: " << W_##name__##_w1->Get<LoDTensor>().dims(); \
VLOG(4
0
) << #name__ "_b0" \
VLOG(4) << #name__ "_b0" \
<< " shape: " << W_##name__##_b0->Get<LoDTensor>().dims(); \
<< " shape: " << W_##name__##_b0->Get<LoDTensor>().dims(); \
auto& W_##name__##_w0_t = W_##name__##_w0->Get<LoDTensor>(); \
auto& W_##name__##_w0_t = W_##name__##_w0->Get<LoDTensor>(); \
auto& W_##name__##_w1_t = W_##name__##_w1->Get<LoDTensor>(); \
auto& W_##name__##_w1_t = W_##name__##_w1->Get<LoDTensor>(); \
...
@@ -208,7 +208,7 @@ void PrepareLSTMWeight(const LoDTensor& W_forget_w0,
...
@@ -208,7 +208,7 @@ void PrepareLSTMWeight(const LoDTensor& W_forget_w0,
int
D
=
W_forget_w0
.
dims
()[
0
];
int
D
=
W_forget_w0
.
dims
()[
0
];
int
M
=
W_forget_w1
.
dims
()[
0
];
int
M
=
W_forget_w1
.
dims
()[
0
];
out
->
Resize
(
make_ddim
({
D
+
M
,
4
*
D
}));
out
->
Resize
(
make_ddim
({
D
+
M
,
4
*
D
}));
VLOG
(
3
0
)
<<
"LSTMWeight resized to "
<<
out
->
dims
();
VLOG
(
3
)
<<
"LSTMWeight resized to "
<<
out
->
dims
();
float
*
out_data
=
out
->
mutable_data
<
float
>
(
platform
::
CPUPlace
());
float
*
out_data
=
out
->
mutable_data
<
float
>
(
platform
::
CPUPlace
());
std
::
array
<
const
float
*
,
4
>
tensors
{
std
::
array
<
const
float
*
,
4
>
tensors
{
...
...
paddle/fluid/framework/ir/conv_bias_mkldnn_fuse_pass.cc
浏览文件 @
dfd4a111
...
@@ -57,7 +57,7 @@ std::unique_ptr<ir::Graph> ConvBiasFusePass::ApplyImpl(
...
@@ -57,7 +57,7 @@ std::unique_ptr<ir::Graph> ConvBiasFusePass::ApplyImpl(
int
found_conv_bias_count
=
0
;
int
found_conv_bias_count
=
0
;
auto
handler
=
[
&
](
const
GraphPatternDetector
::
subgraph_t
&
subgraph
,
auto
handler
=
[
&
](
const
GraphPatternDetector
::
subgraph_t
&
subgraph
,
Graph
*
g
)
{
Graph
*
g
)
{
VLOG
(
4
0
)
<<
"handle ConvBias fuse"
;
VLOG
(
4
)
<<
"handle ConvBias fuse"
;
GET_IR_NODE_FROM_SUBGRAPH
(
conv_weight
,
conv_weight
,
GET_IR_NODE_FROM_SUBGRAPH
(
conv_weight
,
conv_weight
,
conv_bias_pattern
);
// Filter
conv_bias_pattern
);
// Filter
GET_IR_NODE_FROM_SUBGRAPH
(
conv_out
,
conv_out
,
conv_bias_pattern
);
// tmp
GET_IR_NODE_FROM_SUBGRAPH
(
conv_out
,
conv_out
,
conv_bias_pattern
);
// tmp
...
@@ -74,7 +74,7 @@ std::unique_ptr<ir::Graph> ConvBiasFusePass::ApplyImpl(
...
@@ -74,7 +74,7 @@ std::unique_ptr<ir::Graph> ConvBiasFusePass::ApplyImpl(
// check if fuse can be done and if MKL-DNN should be used
// check if fuse can be done and if MKL-DNN should be used
FuseOptions
fuse_option
=
FindFuseOption
(
*
conv
,
*
eltwise
);
FuseOptions
fuse_option
=
FindFuseOption
(
*
conv
,
*
eltwise
);
if
(
fuse_option
==
DO_NOT_FUSE
||
fuse_option
==
FUSE_NATIVE
)
{
if
(
fuse_option
==
DO_NOT_FUSE
||
fuse_option
==
FUSE_NATIVE
)
{
VLOG
(
3
0
)
<<
"do not perform conv+bias fuse"
;
VLOG
(
3
)
<<
"do not perform conv+bias fuse"
;
return
;
return
;
}
}
...
...
paddle/fluid/framework/ir/conv_bn_fuse_pass.cc
浏览文件 @
dfd4a111
...
@@ -121,7 +121,7 @@ std::unique_ptr<ir::Graph> ConvBNFusePass::ApplyImpl(
...
@@ -121,7 +121,7 @@ std::unique_ptr<ir::Graph> ConvBNFusePass::ApplyImpl(
int
found_conv_bn_count
=
0
;
int
found_conv_bn_count
=
0
;
auto
handler
=
[
&
](
const
GraphPatternDetector
::
subgraph_t
&
subgraph
,
auto
handler
=
[
&
](
const
GraphPatternDetector
::
subgraph_t
&
subgraph
,
Graph
*
g
)
{
Graph
*
g
)
{
VLOG
(
4
0
)
<<
"handle ConvBN fuse"
;
VLOG
(
4
)
<<
"handle ConvBN fuse"
;
// conv, batch_norm,
// conv, batch_norm,
// conv_weight, conv_out,
// conv_weight, conv_out,
...
@@ -133,7 +133,7 @@ std::unique_ptr<ir::Graph> ConvBNFusePass::ApplyImpl(
...
@@ -133,7 +133,7 @@ std::unique_ptr<ir::Graph> ConvBNFusePass::ApplyImpl(
// check if fuse can be done and if MKL-DNN should be used
// check if fuse can be done and if MKL-DNN should be used
FuseOptions
fuse_option
=
FindFuseOption
(
*
conv
,
*
batch_norm
);
FuseOptions
fuse_option
=
FindFuseOption
(
*
conv
,
*
batch_norm
);
if
(
fuse_option
==
DO_NOT_FUSE
)
{
if
(
fuse_option
==
DO_NOT_FUSE
)
{
VLOG
(
3
0
)
<<
"do not perform conv+bn fuse"
;
VLOG
(
3
)
<<
"do not perform conv+bn fuse"
;
return
;
return
;
}
}
...
@@ -241,7 +241,7 @@ std::unique_ptr<ir::Graph> ConvEltwiseAddBNFusePass::ApplyImpl(
...
@@ -241,7 +241,7 @@ std::unique_ptr<ir::Graph> ConvEltwiseAddBNFusePass::ApplyImpl(
int
found_conv_bn_count
=
0
;
int
found_conv_bn_count
=
0
;
auto
handler
=
[
&
](
const
GraphPatternDetector
::
subgraph_t
&
subgraph
,
auto
handler
=
[
&
](
const
GraphPatternDetector
::
subgraph_t
&
subgraph
,
Graph
*
g
)
{
Graph
*
g
)
{
VLOG
(
4
0
)
<<
"handle ConvBN fuse"
;
VLOG
(
4
)
<<
"handle ConvBN fuse"
;
// conv, batch_norm,
// conv, batch_norm,
// conv_weight, conv_out,
// conv_weight, conv_out,
...
...
paddle/fluid/framework/ir/conv_relu_mkldnn_fuse_pass.cc
浏览文件 @
dfd4a111
...
@@ -38,7 +38,7 @@ std::unique_ptr<ir::Graph> ConvReLUFusePass::ApplyImpl(
...
@@ -38,7 +38,7 @@ std::unique_ptr<ir::Graph> ConvReLUFusePass::ApplyImpl(
int
found_conv_relu_count
=
0
;
int
found_conv_relu_count
=
0
;
auto
handler
=
[
&
](
const
GraphPatternDetector
::
subgraph_t
&
subgraph
,
auto
handler
=
[
&
](
const
GraphPatternDetector
::
subgraph_t
&
subgraph
,
Graph
*
g
)
{
Graph
*
g
)
{
VLOG
(
4
0
)
<<
"handle ConvReLU fuse"
;
VLOG
(
4
)
<<
"handle ConvReLU fuse"
;
GET_IR_NODE_FROM_SUBGRAPH
(
conv_weight
,
conv_weight
,
GET_IR_NODE_FROM_SUBGRAPH
(
conv_weight
,
conv_weight
,
conv_relu_pattern
);
// Filter
conv_relu_pattern
);
// Filter
GET_IR_NODE_FROM_SUBGRAPH
(
conv_out
,
conv_out
,
conv_relu_pattern
);
// tmp
GET_IR_NODE_FROM_SUBGRAPH
(
conv_out
,
conv_out
,
conv_relu_pattern
);
// tmp
...
@@ -48,7 +48,7 @@ std::unique_ptr<ir::Graph> ConvReLUFusePass::ApplyImpl(
...
@@ -48,7 +48,7 @@ std::unique_ptr<ir::Graph> ConvReLUFusePass::ApplyImpl(
FuseOptions
fuse_option
=
FindFuseOption
(
*
conv
,
*
relu
);
FuseOptions
fuse_option
=
FindFuseOption
(
*
conv
,
*
relu
);
if
(
fuse_option
==
DO_NOT_FUSE
)
{
if
(
fuse_option
==
DO_NOT_FUSE
)
{
VLOG
(
3
0
)
<<
"do not perform conv+relu fuse"
;
VLOG
(
3
)
<<
"do not perform conv+relu fuse"
;
return
;
return
;
}
}
...
...
paddle/fluid/framework/ir/depthwise_conv_mkldnn_pass.cc
浏览文件 @
dfd4a111
...
@@ -39,7 +39,7 @@ std::unique_ptr<ir::Graph> DepthwiseConvMKLDNNPass::ApplyImpl(
...
@@ -39,7 +39,7 @@ std::unique_ptr<ir::Graph> DepthwiseConvMKLDNNPass::ApplyImpl(
int
found_depthwise_conv_mkldnn_count
=
0
;
int
found_depthwise_conv_mkldnn_count
=
0
;
auto
handler
=
[
&
](
const
GraphPatternDetector
::
subgraph_t
&
subgraph
,
auto
handler
=
[
&
](
const
GraphPatternDetector
::
subgraph_t
&
subgraph
,
Graph
*
g
)
{
Graph
*
g
)
{
VLOG
(
3
0
)
<<
"handle DepthwiseConvMKLDNN fuse"
;
VLOG
(
3
)
<<
"handle DepthwiseConvMKLDNN fuse"
;
GET_NODE
(
depthwise_conv
,
(
*
pattern
));
GET_NODE
(
depthwise_conv
,
(
*
pattern
));
depthwise_conv
->
Op
()
->
SetType
(
"conv2d"
);
depthwise_conv
->
Op
()
->
SetType
(
"conv2d"
);
found_depthwise_conv_mkldnn_count
++
;
found_depthwise_conv_mkldnn_count
++
;
...
...
paddle/fluid/framework/ir/fc_fuse_pass.cc
浏览文件 @
dfd4a111
...
@@ -39,7 +39,7 @@ std::unique_ptr<ir::Graph> FCFusePass::ApplyImpl(
...
@@ -39,7 +39,7 @@ std::unique_ptr<ir::Graph> FCFusePass::ApplyImpl(
int
found_fc_count
=
0
;
int
found_fc_count
=
0
;
auto
handler
=
[
&
](
const
GraphPatternDetector
::
subgraph_t
&
subgraph
,
auto
handler
=
[
&
](
const
GraphPatternDetector
::
subgraph_t
&
subgraph
,
Graph
*
g
)
{
Graph
*
g
)
{
VLOG
(
4
0
)
<<
"handle FC fuse"
;
VLOG
(
4
)
<<
"handle FC fuse"
;
GET_IR_NODE_FROM_SUBGRAPH
(
w
,
w
,
fc_pattern
);
GET_IR_NODE_FROM_SUBGRAPH
(
w
,
w
,
fc_pattern
);
GET_IR_NODE_FROM_SUBGRAPH
(
fc_bias
,
bias
,
fc_pattern
);
GET_IR_NODE_FROM_SUBGRAPH
(
fc_bias
,
bias
,
fc_pattern
);
GET_IR_NODE_FROM_SUBGRAPH
(
fc_out
,
Out
,
fc_pattern
);
GET_IR_NODE_FROM_SUBGRAPH
(
fc_out
,
Out
,
fc_pattern
);
...
...
paddle/fluid/framework/ir/fuse_elewise_add_act_pass.cc
浏览文件 @
dfd4a111
...
@@ -61,7 +61,7 @@ std::unique_ptr<ir::Graph> FuseElewiseAddActPass::FuseElewiseAddAct(
...
@@ -61,7 +61,7 @@ std::unique_ptr<ir::Graph> FuseElewiseAddActPass::FuseElewiseAddAct(
auto
handler
=
[
&
](
const
GraphPatternDetector
::
subgraph_t
&
subgraph
,
auto
handler
=
[
&
](
const
GraphPatternDetector
::
subgraph_t
&
subgraph
,
Graph
*
g
)
{
Graph
*
g
)
{
VLOG
(
4
0
)
<<
"handle FuseElewiseAddAct fuse"
;
VLOG
(
4
)
<<
"handle FuseElewiseAddAct fuse"
;
GET_IR_NODE_FROM_SUBGRAPH
(
ele_y
,
ele_y
,
elewise_add_act_pattern
);
GET_IR_NODE_FROM_SUBGRAPH
(
ele_y
,
ele_y
,
elewise_add_act_pattern
);
GET_IR_NODE_FROM_SUBGRAPH
(
ele_out
,
elewise_add_out
,
GET_IR_NODE_FROM_SUBGRAPH
(
ele_out
,
elewise_add_out
,
elewise_add_act_pattern
);
elewise_add_act_pattern
);
...
@@ -77,7 +77,7 @@ std::unique_ptr<ir::Graph> FuseElewiseAddActPass::FuseElewiseAddAct(
...
@@ -77,7 +77,7 @@ std::unique_ptr<ir::Graph> FuseElewiseAddActPass::FuseElewiseAddAct(
Node
*
elewise_add_act_node
=
CreateFuseElewiseAddActNode
(
Node
*
elewise_add_act_node
=
CreateFuseElewiseAddActNode
(
g
,
act
,
ele_add
,
ele_x_n
,
ele_y_n
,
ele_out_n
,
act_out_n
);
g
,
act
,
ele_add
,
ele_x_n
,
ele_y_n
,
ele_out_n
,
act_out_n
);
VLOG
(
4
0
)
<<
"
\n\t
"
<<
ele_x_n
<<
" and "
<<
ele_y_n
<<
" -> "
VLOG
(
4
)
<<
"
\n\t
"
<<
ele_x_n
<<
" and "
<<
ele_y_n
<<
" -> "
<<
ele_add
->
Name
()
<<
" -> "
<<
ele_out_n
<<
"
\n
"
<<
ele_add
->
Name
()
<<
" -> "
<<
ele_out_n
<<
"
\n
"
<<
"
\t
"
<<
ele_out_n
<<
" -> "
<<
act
->
Name
()
<<
" -> "
<<
"
\t
"
<<
ele_out_n
<<
" -> "
<<
act
->
Name
()
<<
" -> "
<<
act_out_n
;
<<
act_out_n
;
...
@@ -113,7 +113,7 @@ std::unique_ptr<ir::Graph> FuseElewiseAddActPass::FuseActElewiseAdd(
...
@@ -113,7 +113,7 @@ std::unique_ptr<ir::Graph> FuseElewiseAddActPass::FuseActElewiseAdd(
auto
handler
=
[
&
](
const
GraphPatternDetector
::
subgraph_t
&
subgraph
,
auto
handler
=
[
&
](
const
GraphPatternDetector
::
subgraph_t
&
subgraph
,
Graph
*
g
)
{
Graph
*
g
)
{
VLOG
(
4
0
)
<<
"handle FuseElewiseAddAct fuse"
;
VLOG
(
4
)
<<
"handle FuseElewiseAddAct fuse"
;
GET_IR_NODE_FROM_SUBGRAPH
(
act_out
,
act_out
,
act_elewise_add_pattern
);
GET_IR_NODE_FROM_SUBGRAPH
(
act_out
,
act_out
,
act_elewise_add_pattern
);
GET_IR_NODE_FROM_SUBGRAPH
(
ele_x
,
ele_x
,
act_elewise_add_pattern
);
GET_IR_NODE_FROM_SUBGRAPH
(
ele_x
,
ele_x
,
act_elewise_add_pattern
);
GET_IR_NODE_FROM_SUBGRAPH
(
ele_out
,
elewise_add_out
,
GET_IR_NODE_FROM_SUBGRAPH
(
ele_out
,
elewise_add_out
,
...
@@ -129,7 +129,7 @@ std::unique_ptr<ir::Graph> FuseElewiseAddActPass::FuseActElewiseAdd(
...
@@ -129,7 +129,7 @@ std::unique_ptr<ir::Graph> FuseElewiseAddActPass::FuseActElewiseAdd(
Node
*
elewise_add_act_node
=
CreateFuseElewiseAddActNode
(
Node
*
elewise_add_act_node
=
CreateFuseElewiseAddActNode
(
g
,
ele_add
,
act
,
elewise_add_x_n
,
act_i_n
,
act_o_n
,
elewise_add_out_n
);
g
,
ele_add
,
act
,
elewise_add_x_n
,
act_i_n
,
act_o_n
,
elewise_add_out_n
);
VLOG
(
4
0
)
<<
"
\n\t
"
<<
act_i_n
<<
" -> "
<<
act
->
Name
()
<<
" -> "
<<
act_o_n
VLOG
(
4
)
<<
"
\n\t
"
<<
act_i_n
<<
" -> "
<<
act
->
Name
()
<<
" -> "
<<
act_o_n
<<
"
\n\t
"
<<
act_o_n
<<
" and "
<<
elewise_add_x_n
<<
" -> "
<<
"
\n\t
"
<<
act_o_n
<<
" and "
<<
elewise_add_x_n
<<
" -> "
<<
ele_add
->
Name
()
<<
" -> "
<<
elewise_add_out_n
;
<<
ele_add
->
Name
()
<<
" -> "
<<
elewise_add_out_n
;
...
@@ -165,7 +165,7 @@ std::unique_ptr<ir::Graph> FuseElewiseAddActPass::FuseElewiseAddActInplaceGrad(
...
@@ -165,7 +165,7 @@ std::unique_ptr<ir::Graph> FuseElewiseAddActPass::FuseElewiseAddActInplaceGrad(
auto
handler
=
[
&
](
const
GraphPatternDetector
::
subgraph_t
&
subgraph
,
auto
handler
=
[
&
](
const
GraphPatternDetector
::
subgraph_t
&
subgraph
,
Graph
*
g
)
{
Graph
*
g
)
{
VLOG
(
4
0
)
<<
"handle FuseElewiseAddActGrad1 fuse"
;
VLOG
(
4
)
<<
"handle FuseElewiseAddActGrad1 fuse"
;
GET_IR_NODE_FROM_SUBGRAPH
(
act_out
,
act_out
,
elewise_add_act_grad_pattern
);
GET_IR_NODE_FROM_SUBGRAPH
(
act_out
,
act_out
,
elewise_add_act_grad_pattern
);
GET_IR_NODE_FROM_SUBGRAPH
(
act_grad
,
act_grad
,
elewise_add_act_grad_pattern
);
GET_IR_NODE_FROM_SUBGRAPH
(
act_grad
,
act_grad
,
elewise_add_act_grad_pattern
);
GET_IR_NODE_FROM_SUBGRAPH
(
d_itermediate_out
,
d_itermediate_out
,
GET_IR_NODE_FROM_SUBGRAPH
(
d_itermediate_out
,
d_itermediate_out
,
...
@@ -208,7 +208,7 @@ std::unique_ptr<ir::Graph> FuseElewiseAddActPass::FuseElewiseAddActInplaceGrad(
...
@@ -208,7 +208,7 @@ std::unique_ptr<ir::Graph> FuseElewiseAddActPass::FuseElewiseAddActInplaceGrad(
auto
fused_node
=
g
->
CreateOpNode
(
&
desc
);
auto
fused_node
=
g
->
CreateOpNode
(
&
desc
);
VLOG
(
4
0
)
<<
"
\n\t
"
<<
d_act_out_n
<<
" and "
<<
act_out_n
<<
" -> "
VLOG
(
4
)
<<
"
\n\t
"
<<
d_act_out_n
<<
" and "
<<
act_out_n
<<
" -> "
<<
act_grad
->
Name
()
<<
" -> "
<<
d_itermediate_out_n
<<
"
\n\t
"
<<
act_grad
->
Name
()
<<
" -> "
<<
d_itermediate_out_n
<<
"
\n\t
"
<<
d_itermediate_out_n
<<
" and "
<<
act_out_n
<<
" -> "
<<
d_itermediate_out_n
<<
" and "
<<
act_out_n
<<
" -> "
<<
ele_add_grad
->
Name
()
<<
" -> "
<<
d_itermediate_out_n
;
<<
ele_add_grad
->
Name
()
<<
" -> "
<<
d_itermediate_out_n
;
...
...
paddle/fluid/framework/ir/graph.cc
浏览文件 @
dfd4a111
...
@@ -90,7 +90,7 @@ Graph::Graph(const ProgramDesc &program) : program_(program) {
...
@@ -90,7 +90,7 @@ Graph::Graph(const ProgramDesc &program) : program_(program) {
std
::
map
<
std
::
string
,
std
::
vector
<
ir
::
Node
*>>
Graph
::
InitFromProgram
(
std
::
map
<
std
::
string
,
std
::
vector
<
ir
::
Node
*>>
Graph
::
InitFromProgram
(
const
ProgramDesc
&
program
)
{
const
ProgramDesc
&
program
)
{
VLOG
(
3
0
)
<<
"block in program:"
<<
program_
.
Size
();
VLOG
(
3
)
<<
"block in program:"
<<
program_
.
Size
();
std
::
unordered_map
<
std
::
string
,
VarDesc
*>
all_vars
;
std
::
unordered_map
<
std
::
string
,
VarDesc
*>
all_vars
;
// var nodes for each var name, will have multiple versions in SSA
// var nodes for each var name, will have multiple versions in SSA
std
::
map
<
std
::
string
,
std
::
vector
<
ir
::
Node
*>>
var_nodes
;
std
::
map
<
std
::
string
,
std
::
vector
<
ir
::
Node
*>>
var_nodes
;
...
@@ -158,7 +158,7 @@ void Graph::ResolveHazard(
...
@@ -158,7 +158,7 @@ void Graph::ResolveHazard(
auto
it_old
=
versions
.
rbegin
();
auto
it_old
=
versions
.
rbegin
();
++
it_old
;
++
it_old
;
for
(;
it_old
!=
versions
.
rend
();
it_new
=
it_old
,
++
it_old
)
{
for
(;
it_old
!=
versions
.
rend
();
it_new
=
it_old
,
++
it_old
)
{
VLOG
(
3
0
)
<<
"deal with var: "
<<
(
*
it_new
)
->
Name
();
VLOG
(
3
)
<<
"deal with var: "
<<
(
*
it_new
)
->
Name
();
ir
::
Node
*
write_op
=
ir
::
Node
*
write_op
=
(
*
it_new
)
->
inputs
.
empty
()
?
nullptr
:
(
*
it_new
)
->
inputs
[
0
];
(
*
it_new
)
->
inputs
.
empty
()
?
nullptr
:
(
*
it_new
)
->
inputs
[
0
];
const
auto
&
read_ops
=
(
*
it_old
)
->
outputs
;
const
auto
&
read_ops
=
(
*
it_old
)
->
outputs
;
...
...
paddle/fluid/framework/ir/graph.h
浏览文件 @
dfd4a111
...
@@ -89,7 +89,7 @@ class Graph {
...
@@ -89,7 +89,7 @@ class Graph {
attr_name
);
attr_name
);
attrs_
[
attr_name
]
=
attr
;
attrs_
[
attr_name
]
=
attr
;
attr_dels_
[
attr_name
]
=
[
attr
,
attr_name
]()
{
attr_dels_
[
attr_name
]
=
[
attr
,
attr_name
]()
{
VLOG
(
3
0
)
<<
"deleting "
<<
attr_name
;
VLOG
(
3
)
<<
"deleting "
<<
attr_name
;
delete
attr
;
delete
attr
;
};
};
}
}
...
...
paddle/fluid/framework/ir/graph_helper.cc
浏览文件 @
dfd4a111
...
@@ -40,9 +40,8 @@ void SortHelper(
...
@@ -40,9 +40,8 @@ void SortHelper(
}
}
}
}
VLOG
(
30
)
<<
"topology sort insert: "
<<
node
->
Name
()
VLOG
(
3
)
<<
"topology sort insert: "
<<
node
->
Name
()
<<
reinterpret_cast
<
void
*>
(
node
)
<<
" input "
<<
reinterpret_cast
<
void
*>
(
node
)
<<
" input "
<<
node
->
inputs
.
size
();
<<
node
->
inputs
.
size
();
ret
->
push_back
(
node
);
ret
->
push_back
(
node
);
}
}
...
@@ -111,7 +110,7 @@ std::map<ir::Node *, std::unordered_set<ir::Node *>> BuildOperationAdjList(
...
@@ -111,7 +110,7 @@ std::map<ir::Node *, std::unordered_set<ir::Node *>> BuildOperationAdjList(
for
(
auto
&
var
:
n
->
inputs
)
{
for
(
auto
&
var
:
n
->
inputs
)
{
for
(
auto
&
adj_n
:
var
->
inputs
)
{
for
(
auto
&
adj_n
:
var
->
inputs
)
{
PADDLE_ENFORCE
(
adj_n
->
NodeType
()
==
ir
::
Node
::
Type
::
kOperation
);
PADDLE_ENFORCE
(
adj_n
->
NodeType
()
==
ir
::
Node
::
Type
::
kOperation
);
VLOG
(
4
0
)
<<
"adj "
<<
adj_n
->
Name
()
<<
reinterpret_cast
<
void
*>
(
adj_n
)
VLOG
(
4
)
<<
"adj "
<<
adj_n
->
Name
()
<<
reinterpret_cast
<
void
*>
(
adj_n
)
<<
" -> "
<<
n
->
Name
()
<<
reinterpret_cast
<
void
*>
(
n
)
<<
" -> "
<<
n
->
Name
()
<<
reinterpret_cast
<
void
*>
(
n
)
<<
" via "
<<
var
->
Name
()
<<
reinterpret_cast
<
void
*>
(
var
);
<<
" via "
<<
var
->
Name
()
<<
reinterpret_cast
<
void
*>
(
var
);
adj_list
[
n
].
insert
(
adj_n
);
adj_list
[
n
].
insert
(
adj_n
);
...
...
paddle/fluid/framework/ir/graph_pattern_detector.cc
浏览文件 @
dfd4a111
...
@@ -92,19 +92,19 @@ void GraphPatternDetector::operator()(Graph *graph,
...
@@ -92,19 +92,19 @@ void GraphPatternDetector::operator()(Graph *graph,
PrettyLogEndl
(
Style
::
detail
(),
"--- detect %d subgraphs"
,
subgraphs
.
size
());
PrettyLogEndl
(
Style
::
detail
(),
"--- detect %d subgraphs"
,
subgraphs
.
size
());
int
id
=
0
;
int
id
=
0
;
for
(
auto
&
g
:
subgraphs
)
{
for
(
auto
&
g
:
subgraphs
)
{
VLOG
(
3
0
)
<<
"optimizing #"
<<
id
++
<<
" subgraph"
;
VLOG
(
3
)
<<
"optimizing #"
<<
id
++
<<
" subgraph"
;
handler
(
g
,
graph
);
handler
(
g
,
graph
);
}
}
}
}
bool
GraphPatternDetector
::
MarkPDNodesInGraph
(
const
ir
::
Graph
&
graph
)
{
bool
GraphPatternDetector
::
MarkPDNodesInGraph
(
const
ir
::
Graph
&
graph
)
{
VLOG
(
3
0
)
<<
"mark pdnodes in graph"
;
VLOG
(
3
)
<<
"mark pdnodes in graph"
;
if
(
graph
.
Nodes
().
empty
())
return
false
;
if
(
graph
.
Nodes
().
empty
())
return
false
;
for
(
auto
&
node
:
GraphTraits
::
DFS
(
graph
))
{
for
(
auto
&
node
:
GraphTraits
::
DFS
(
graph
))
{
for
(
const
auto
&
pdnode
:
pattern_
.
nodes
())
{
for
(
const
auto
&
pdnode
:
pattern_
.
nodes
())
{
if
(
pdnode
->
Tell
(
&
node
))
{
if
(
pdnode
->
Tell
(
&
node
))
{
VLOG
(
4
0
)
<<
"pdnode "
<<
pdnode
->
name
()
<<
" marked"
;
VLOG
(
4
)
<<
"pdnode "
<<
pdnode
->
name
()
<<
" marked"
;
pdnodes2nodes_
[
pdnode
.
get
()].
insert
(
&
node
);
pdnodes2nodes_
[
pdnode
.
get
()].
insert
(
&
node
);
}
}
}
}
...
@@ -112,7 +112,7 @@ bool GraphPatternDetector::MarkPDNodesInGraph(const ir::Graph &graph) {
...
@@ -112,7 +112,7 @@ bool GraphPatternDetector::MarkPDNodesInGraph(const ir::Graph &graph) {
// Check to early stop if some PDNode can't find matched Node.
// Check to early stop if some PDNode can't find matched Node.
for
(
auto
&
pdnode
:
pattern_
.
nodes
())
{
for
(
auto
&
pdnode
:
pattern_
.
nodes
())
{
if
(
!
pdnodes2nodes_
.
count
(
pdnode
.
get
()))
{
if
(
!
pdnodes2nodes_
.
count
(
pdnode
.
get
()))
{
VLOG
(
4
0
)
<<
pdnode
->
name
()
<<
" can't find matched Node, early stop"
;
VLOG
(
4
)
<<
pdnode
->
name
()
<<
" can't find matched Node, early stop"
;
// return false;
// return false;
}
}
}
}
...
@@ -121,7 +121,7 @@ bool GraphPatternDetector::MarkPDNodesInGraph(const ir::Graph &graph) {
...
@@ -121,7 +121,7 @@ bool GraphPatternDetector::MarkPDNodesInGraph(const ir::Graph &graph) {
GetMarkedNodes
(
const_cast
<
Graph
*>
(
&
graph
)).
insert
(
n
);
GetMarkedNodes
(
const_cast
<
Graph
*>
(
&
graph
)).
insert
(
n
);
}
}
}
}
VLOG
(
3
0
)
<<
pdnodes2nodes_
.
size
()
<<
" nodes marked"
;
VLOG
(
3
)
<<
pdnodes2nodes_
.
size
()
<<
" nodes marked"
;
return
!
pdnodes2nodes_
.
empty
();
return
!
pdnodes2nodes_
.
empty
();
}
}
...
@@ -215,7 +215,7 @@ GraphPatternDetector::DetectPatterns() {
...
@@ -215,7 +215,7 @@ GraphPatternDetector::DetectPatterns() {
// Extend a PDNode to subgraphs by deducing the connection relations defined
// Extend a PDNode to subgraphs by deducing the connection relations defined
// in edges of PDNodes.
// in edges of PDNodes.
for
(
const
auto
&
edge
:
pattern_
.
edges
())
{
for
(
const
auto
&
edge
:
pattern_
.
edges
())
{
VLOG
(
4
0
)
<<
"check "
<<
edge
.
first
->
name
()
<<
" -> "
<<
edge
.
second
->
name
();
VLOG
(
4
)
<<
"check "
<<
edge
.
first
->
name
()
<<
" -> "
<<
edge
.
second
->
name
();
// TODO(Superjomn) Fix bug here, the groups might be duplicate here.
// TODO(Superjomn) Fix bug here, the groups might be duplicate here.
// Each role has two PDNodes, which indicates two roles.
// Each role has two PDNodes, which indicates two roles.
// Detect two Nodes that can match these two roles and they are connected.
// Detect two Nodes that can match these two roles and they are connected.
...
@@ -226,7 +226,7 @@ GraphPatternDetector::DetectPatterns() {
...
@@ -226,7 +226,7 @@ GraphPatternDetector::DetectPatterns() {
// source -> target
// source -> target
for
(
Node
*
source
:
pdnodes2nodes_
[
edge
.
first
])
{
for
(
Node
*
source
:
pdnodes2nodes_
[
edge
.
first
])
{
for
(
Node
*
target
:
pdnodes2nodes_
[
edge
.
second
])
{
for
(
Node
*
target
:
pdnodes2nodes_
[
edge
.
second
])
{
VLOG
(
8
0
)
<<
"check "
<<
source
->
id
()
<<
" -- "
<<
target
->
id
();
VLOG
(
8
)
<<
"check "
<<
source
->
id
()
<<
" -- "
<<
target
->
id
();
// TODO(Superjomn) add some prune strategies.
// TODO(Superjomn) add some prune strategies.
for
(
const
auto
&
group
:
pre_groups
)
{
for
(
const
auto
&
group
:
pre_groups
)
{
if
(
IsNodesLink
(
source
,
target
))
{
if
(
IsNodesLink
(
source
,
target
))
{
...
@@ -243,13 +243,12 @@ GraphPatternDetector::DetectPatterns() {
...
@@ -243,13 +243,12 @@ GraphPatternDetector::DetectPatterns() {
}
}
}
}
}
}
VLOG
(
3
0
)
<<
"step "
<<
step
<<
" get records: "
<<
cur_groups
.
size
();
VLOG
(
3
)
<<
"step "
<<
step
<<
" get records: "
<<
cur_groups
.
size
();
for
(
auto
&
group
:
cur_groups
)
{
for
(
auto
&
group
:
cur_groups
)
{
for
(
auto
&
item
:
group
.
roles
)
{
for
(
auto
&
item
:
group
.
roles
)
{
VLOG
(
40
)
<<
"node "
<<
item
.
second
->
id
()
<<
" as "
VLOG
(
4
)
<<
"node "
<<
item
.
second
->
id
()
<<
" as "
<<
item
.
first
->
name
();
<<
item
.
first
->
name
();
}
}
VLOG
(
4
0
)
<<
"========================================================="
;
VLOG
(
4
)
<<
"========================================================="
;
}
}
}
}
...
...
paddle/fluid/framework/ir/graph_viz_pass.cc
浏览文件 @
dfd4a111
...
@@ -41,7 +41,7 @@ std::string FormatName(const Node* node) {
...
@@ -41,7 +41,7 @@ std::string FormatName(const Node* node) {
std
::
unique_ptr
<
ir
::
Graph
>
GraphVizPass
::
ApplyImpl
(
std
::
unique_ptr
<
ir
::
Graph
>
GraphVizPass
::
ApplyImpl
(
std
::
unique_ptr
<
ir
::
Graph
>
graph
)
const
{
std
::
unique_ptr
<
ir
::
Graph
>
graph
)
const
{
const
std
::
string
graph_viz_path
=
Get
<
std
::
string
>
(
kGraphVizPath
);
const
std
::
string
graph_viz_path
=
Get
<
std
::
string
>
(
kGraphVizPath
);
VLOG
(
3
0
)
<<
"draw IR graph viz to "
<<
graph_viz_path
;
VLOG
(
3
)
<<
"draw IR graph viz to "
<<
graph_viz_path
;
std
::
unique_ptr
<
std
::
ostream
>
fout
(
new
std
::
ofstream
(
graph_viz_path
));
std
::
unique_ptr
<
std
::
ostream
>
fout
(
new
std
::
ofstream
(
graph_viz_path
));
PADDLE_ENFORCE
(
fout
->
good
());
PADDLE_ENFORCE
(
fout
->
good
());
std
::
ostream
&
sout
=
*
fout
;
std
::
ostream
&
sout
=
*
fout
;
...
...
paddle/fluid/framework/ir/mkldnn_placement_pass.cc
浏览文件 @
dfd4a111
...
@@ -20,7 +20,7 @@ namespace ir {
...
@@ -20,7 +20,7 @@ namespace ir {
std
::
unique_ptr
<
ir
::
Graph
>
MKLDNNPlacementPass
::
ApplyImpl
(
std
::
unique_ptr
<
ir
::
Graph
>
MKLDNNPlacementPass
::
ApplyImpl
(
std
::
unique_ptr
<
ir
::
Graph
>
graph
)
const
{
std
::
unique_ptr
<
ir
::
Graph
>
graph
)
const
{
VLOG
(
3
0
)
<<
"Aplies MKL-DNN placement strategy."
;
VLOG
(
3
)
<<
"Aplies MKL-DNN placement strategy."
;
for
(
const
Node
*
n
:
graph
->
Nodes
())
{
for
(
const
Node
*
n
:
graph
->
Nodes
())
{
if
(
n
->
IsOp
()
&&
n
->
Op
()
->
HasAttr
(
"use_mkldnn"
))
{
if
(
n
->
IsOp
()
&&
n
->
Op
()
->
HasAttr
(
"use_mkldnn"
))
{
n
->
Op
()
->
SetAttr
(
"use_mkldnn"
,
true
);
n
->
Op
()
->
SetAttr
(
"use_mkldnn"
,
true
);
...
...
paddle/fluid/framework/ir/multi_batch_merge_pass.cc
浏览文件 @
dfd4a111
...
@@ -62,7 +62,7 @@ VarDesc UpdateGradVarDesc(
...
@@ -62,7 +62,7 @@ VarDesc UpdateGradVarDesc(
string
::
Sprintf
(
"%s.repeat.%d"
,
var_desc
->
Name
(),
repeat
);
string
::
Sprintf
(
"%s.repeat.%d"
,
var_desc
->
Name
(),
repeat
);
VarDesc
repeated_var
=
CopyVarDesc
(
var_desc
);
VarDesc
repeated_var
=
CopyVarDesc
(
var_desc
);
repeated_var
.
SetName
(
new_gname
);
repeated_var
.
SetName
(
new_gname
);
VLOG
(
3
0
)
<<
"update "
<<
var_desc
->
Name
()
<<
" to repeat "
<<
repeat
;
VLOG
(
3
)
<<
"update "
<<
var_desc
->
Name
()
<<
" to repeat "
<<
repeat
;
return
repeated_var
;
return
repeated_var
;
}
}
return
*
var_desc
;
return
*
var_desc
;
...
@@ -78,7 +78,7 @@ std::unique_ptr<Graph> BatchMergePass::ApplyImpl(
...
@@ -78,7 +78,7 @@ std::unique_ptr<Graph> BatchMergePass::ApplyImpl(
std
::
vector
<
ir
::
Node
*>
nodes
=
TopologySortOperations
(
*
graph
);
std
::
vector
<
ir
::
Node
*>
nodes
=
TopologySortOperations
(
*
graph
);
auto
origin_nodes
=
graph
->
ReleaseNodes
();
auto
origin_nodes
=
graph
->
ReleaseNodes
();
VLOG
(
3
0
)
<<
"origin nodes count: "
<<
origin_nodes
.
size
();
VLOG
(
3
)
<<
"origin nodes count: "
<<
origin_nodes
.
size
();
ir
::
Graph
&
result
=
*
graph
;
ir
::
Graph
&
result
=
*
graph
;
// 1. record op nodes of different roles
// 1. record op nodes of different roles
...
@@ -137,7 +137,7 @@ std::unique_ptr<Graph> BatchMergePass::ApplyImpl(
...
@@ -137,7 +137,7 @@ std::unique_ptr<Graph> BatchMergePass::ApplyImpl(
"%s.repeat.%d"
,
repeated_op
.
Input
(
"Variance"
)[
0
],
i
);
"%s.repeat.%d"
,
repeated_op
.
Input
(
"Variance"
)[
0
],
i
);
bn_vars_need_rename
.
insert
(
repeated_op
.
Input
(
"Mean"
)[
0
]);
bn_vars_need_rename
.
insert
(
repeated_op
.
Input
(
"Mean"
)[
0
]);
bn_vars_need_rename
.
insert
(
repeated_op
.
Input
(
"Variance"
)[
0
]);
bn_vars_need_rename
.
insert
(
repeated_op
.
Input
(
"Variance"
)[
0
]);
VLOG
(
3
0
)
<<
"renaming "
<<
repeated_op
.
Input
(
"Mean"
)[
0
]
<<
" to "
VLOG
(
3
)
<<
"renaming "
<<
repeated_op
.
Input
(
"Mean"
)[
0
]
<<
" to "
<<
new_mean_name
;
<<
new_mean_name
;
repeated_op
.
RenameInput
(
repeated_op
.
Input
(
"Mean"
)[
0
],
new_mean_name
);
repeated_op
.
RenameInput
(
repeated_op
.
Input
(
"Mean"
)[
0
],
new_mean_name
);
repeated_op
.
RenameInput
(
repeated_op
.
Input
(
"Variance"
)[
0
],
new_var_name
);
repeated_op
.
RenameInput
(
repeated_op
.
Input
(
"Variance"
)[
0
],
new_var_name
);
...
...
paddle/fluid/framework/ir/pass.h
浏览文件 @
dfd4a111
...
@@ -76,7 +76,7 @@ class Pass {
...
@@ -76,7 +76,7 @@ class Pass {
attr_name
);
attr_name
);
attrs_
[
attr_name
]
=
attr
;
attrs_
[
attr_name
]
=
attr
;
attr_dels_
[
attr_name
]
=
[
attr
,
attr_name
]()
{
attr_dels_
[
attr_name
]
=
[
attr
,
attr_name
]()
{
VLOG
(
3
0
)
<<
"deleting "
<<
attr_name
;
VLOG
(
3
)
<<
"deleting "
<<
attr_name
;
delete
attr
;
delete
attr
;
};
};
}
}
...
...
paddle/fluid/framework/ir/seq_concat_fc_fuse_pass.cc
浏览文件 @
dfd4a111
...
@@ -196,7 +196,7 @@ std::unique_ptr<ir::Graph> SeqConcatFcFusePass::ApplyImpl(
...
@@ -196,7 +196,7 @@ std::unique_ptr<ir::Graph> SeqConcatFcFusePass::ApplyImpl(
detector
(
graph
.
get
(),
[
&
](
const
GraphPatternDetector
::
subgraph_t
&
subgraph
,
detector
(
graph
.
get
(),
[
&
](
const
GraphPatternDetector
::
subgraph_t
&
subgraph
,
Graph
*
graph
)
{
Graph
*
graph
)
{
VLOG
(
4
0
)
<<
"get one concat pattern"
;
VLOG
(
4
)
<<
"get one concat pattern"
;
// fc
// fc
GET_NODE
(
fc_w
,
detector
.
pattern
());
GET_NODE
(
fc_w
,
detector
.
pattern
());
GET_NODE
(
fc_bias
,
detector
.
pattern
());
GET_NODE
(
fc_bias
,
detector
.
pattern
());
...
...
paddle/fluid/framework/ir/seqconv_eltadd_relu_fuse_pass.cc
浏览文件 @
dfd4a111
...
@@ -60,7 +60,7 @@ int BuildFusion(Graph* graph, const std::string& name_scope, Scope* scope) {
...
@@ -60,7 +60,7 @@ int BuildFusion(Graph* graph, const std::string& name_scope, Scope* scope) {
auto
handler
=
[
&
](
const
GraphPatternDetector
::
subgraph_t
&
subgraph
,
auto
handler
=
[
&
](
const
GraphPatternDetector
::
subgraph_t
&
subgraph
,
Graph
*
g
)
{
Graph
*
g
)
{
VLOG
(
4
0
)
<<
"handle SeqConv EltAdd Relu fuse"
;
VLOG
(
4
)
<<
"handle SeqConv EltAdd Relu fuse"
;
GET_IR_NODE_FROM_SUBGRAPH
(
seqconv
,
seqconv
,
fuse_pattern
);
GET_IR_NODE_FROM_SUBGRAPH
(
seqconv
,
seqconv
,
fuse_pattern
);
GET_IR_NODE_FROM_SUBGRAPH
(
seqconv_weight
,
seqconv_weight
,
fuse_pattern
);
GET_IR_NODE_FROM_SUBGRAPH
(
seqconv_weight
,
seqconv_weight
,
fuse_pattern
);
GET_IR_NODE_FROM_SUBGRAPH
(
seqconv_out
,
seqconv_out
,
fuse_pattern
);
GET_IR_NODE_FROM_SUBGRAPH
(
seqconv_out
,
seqconv_out
,
fuse_pattern
);
...
...
paddle/fluid/framework/lod_rank_table.cc
浏览文件 @
dfd4a111
...
@@ -31,7 +31,7 @@ void LoDRankTable::Reset(const LoD& lod, size_t level) {
...
@@ -31,7 +31,7 @@ void LoDRankTable::Reset(const LoD& lod, size_t level) {
TableItem
item
;
TableItem
item
;
item
.
index
=
i
;
item
.
index
=
i
;
item
.
length
=
vec
[
i
+
1
]
-
vec
[
i
];
item
.
length
=
vec
[
i
+
1
]
-
vec
[
i
];
VLOG
(
10
0
)
<<
"Add item to rank table "
<<
item
.
index
<<
" "
<<
item
.
length
;
VLOG
(
10
)
<<
"Add item to rank table "
<<
item
.
index
<<
" "
<<
item
.
length
;
items_
.
emplace_back
(
item
);
items_
.
emplace_back
(
item
);
}
}
// NOTE(yuyang18):
// NOTE(yuyang18):
...
...
paddle/fluid/framework/mixed_vector_test.cc
浏览文件 @
dfd4a111
...
@@ -51,7 +51,7 @@ TEST(mixed_vector, InitWithCount) {
...
@@ -51,7 +51,7 @@ TEST(mixed_vector, InitWithCount) {
TEST
(
mixed_vector
,
ForEach
)
{
TEST
(
mixed_vector
,
ForEach
)
{
vec
<
int
>
tmp
;
vec
<
int
>
tmp
;
for
(
auto
&
v
:
tmp
)
{
for
(
auto
&
v
:
tmp
)
{
VLOG
(
3
0
)
<<
v
;
VLOG
(
3
)
<<
v
;
}
}
}
}
...
...
paddle/fluid/framework/op_desc.cc
浏览文件 @
dfd4a111
...
@@ -82,7 +82,7 @@ class CompileTimeInferShapeContext : public InferShapeContext {
...
@@ -82,7 +82,7 @@ class CompileTimeInferShapeContext : public InferShapeContext {
auto
*
in_var
=
block_
.
FindVarRecursive
(
Inputs
(
in
)[
i
]);
auto
*
in_var
=
block_
.
FindVarRecursive
(
Inputs
(
in
)[
i
]);
auto
*
out_var
=
block_
.
FindVarRecursive
(
Outputs
(
out
)[
j
]);
auto
*
out_var
=
block_
.
FindVarRecursive
(
Outputs
(
out
)[
j
]);
if
(
in_var
->
GetType
()
!=
proto
::
VarType
::
LOD_TENSOR
)
{
if
(
in_var
->
GetType
()
!=
proto
::
VarType
::
LOD_TENSOR
)
{
VLOG
(
3
0
)
<<
"input "
<<
in
<<
" is not LodTensor"
;
VLOG
(
3
)
<<
"input "
<<
in
<<
" is not LodTensor"
;
return
;
return
;
}
}
out_var
->
SetLoDLevel
(
in_var
->
GetLoDLevel
());
out_var
->
SetLoDLevel
(
in_var
->
GetLoDLevel
());
...
@@ -241,37 +241,37 @@ void OpDesc::SetAttr(const std::string &name, const Attribute &v) {
...
@@ -241,37 +241,37 @@ void OpDesc::SetAttr(const std::string &name, const Attribute &v) {
const
proto
::
OpProto
::
Attr
&
attr
=
GetProtoAttr
(
name
);
const
proto
::
OpProto
::
Attr
&
attr
=
GetProtoAttr
(
name
);
switch
(
attr
.
type
())
{
switch
(
attr
.
type
())
{
case
proto
::
AttrType
::
BOOLEANS
:
{
case
proto
::
AttrType
::
BOOLEANS
:
{
VLOG
(
11
0
)
<<
"SetAttr: "
<<
Type
()
<<
", "
<<
name
VLOG
(
11
)
<<
"SetAttr: "
<<
Type
()
<<
", "
<<
name
<<
" from INTS to BOOLEANS"
;
<<
" from INTS to BOOLEANS"
;
this
->
attrs_
[
name
]
=
std
::
vector
<
bool
>
();
this
->
attrs_
[
name
]
=
std
::
vector
<
bool
>
();
break
;
break
;
}
}
case
proto
::
AttrType
::
INTS
:
{
case
proto
::
AttrType
::
INTS
:
{
VLOG
(
11
0
)
<<
"SetAttr: "
<<
Type
()
<<
", "
<<
name
VLOG
(
11
)
<<
"SetAttr: "
<<
Type
()
<<
", "
<<
name
<<
" from INTS to INTS"
;
<<
" from INTS to INTS"
;
this
->
attrs_
[
name
]
=
std
::
vector
<
int
>
();
this
->
attrs_
[
name
]
=
std
::
vector
<
int
>
();
break
;
break
;
}
}
case
proto
::
AttrType
::
LONGS
:
{
case
proto
::
AttrType
::
LONGS
:
{
VLOG
(
11
0
)
<<
"SetAttr: "
<<
Type
()
<<
", "
<<
name
VLOG
(
11
)
<<
"SetAttr: "
<<
Type
()
<<
", "
<<
name
<<
" from LONGS to LONGS"
;
<<
" from LONGS to LONGS"
;
this
->
attrs_
[
name
]
=
std
::
vector
<
int64_t
>
();
this
->
attrs_
[
name
]
=
std
::
vector
<
int64_t
>
();
break
;
break
;
}
}
case
proto
::
AttrType
::
FLOATS
:
{
case
proto
::
AttrType
::
FLOATS
:
{
VLOG
(
11
0
)
<<
"SetAttr: "
<<
Type
()
<<
", "
<<
name
VLOG
(
11
)
<<
"SetAttr: "
<<
Type
()
<<
", "
<<
name
<<
" from INTS to FLOATS"
;
<<
" from INTS to FLOATS"
;
this
->
attrs_
[
name
]
=
std
::
vector
<
float
>
();
this
->
attrs_
[
name
]
=
std
::
vector
<
float
>
();
break
;
break
;
}
}
case
proto
::
AttrType
::
STRINGS
:
{
case
proto
::
AttrType
::
STRINGS
:
{
VLOG
(
11
0
)
<<
"SetAttr: "
<<
Type
()
<<
", "
<<
name
VLOG
(
11
)
<<
"SetAttr: "
<<
Type
()
<<
", "
<<
name
<<
" from INTS to STRINGS"
;
<<
" from INTS to STRINGS"
;
this
->
attrs_
[
name
]
=
std
::
vector
<
std
::
string
>
();
this
->
attrs_
[
name
]
=
std
::
vector
<
std
::
string
>
();
break
;
break
;
}
}
case
proto
::
AttrType
::
BLOCKS
:
{
case
proto
::
AttrType
::
BLOCKS
:
{
VLOG
(
11
0
)
<<
"SetAttr: "
<<
Type
()
<<
", "
<<
name
VLOG
(
11
)
<<
"SetAttr: "
<<
Type
()
<<
", "
<<
name
<<
" from INTS to BLOCKS"
;
<<
" from INTS to BLOCKS"
;
this
->
SetBlocksAttr
(
name
,
std
::
vector
<
BlockDesc
*>
());
this
->
SetBlocksAttr
(
name
,
std
::
vector
<
BlockDesc
*>
());
return
;
return
;
...
@@ -505,13 +505,13 @@ void OpDesc::CheckAttrs() {
...
@@ -505,13 +505,13 @@ void OpDesc::CheckAttrs() {
}
}
void
OpDesc
::
InferShape
(
const
BlockDesc
&
block
)
const
{
void
OpDesc
::
InferShape
(
const
BlockDesc
&
block
)
const
{
VLOG
(
3
0
)
<<
"CompileTime infer shape on "
<<
Type
();
VLOG
(
3
)
<<
"CompileTime infer shape on "
<<
Type
();
InitInferShapeFuncs
();
InitInferShapeFuncs
();
auto
&
infer_shape
=
OpInfoMap
::
Instance
().
Get
(
this
->
Type
()).
infer_shape_
;
auto
&
infer_shape
=
OpInfoMap
::
Instance
().
Get
(
this
->
Type
()).
infer_shape_
;
PADDLE_ENFORCE
(
static_cast
<
bool
>
(
infer_shape
),
PADDLE_ENFORCE
(
static_cast
<
bool
>
(
infer_shape
),
"%s's infer_shape has not been registered"
,
this
->
Type
());
"%s's infer_shape has not been registered"
,
this
->
Type
());
CompileTimeInferShapeContext
ctx
(
*
this
,
block
);
CompileTimeInferShapeContext
ctx
(
*
this
,
block
);
if
(
VLOG_IS_ON
(
10
0
))
{
if
(
VLOG_IS_ON
(
10
))
{
std
::
ostringstream
sout
;
std
::
ostringstream
sout
;
auto
inames
=
this
->
InputArgumentNames
();
auto
inames
=
this
->
InputArgumentNames
();
sout
<<
" From ["
;
sout
<<
" From ["
;
...
@@ -522,7 +522,7 @@ void OpDesc::InferShape(const BlockDesc &block) const {
...
@@ -522,7 +522,7 @@ void OpDesc::InferShape(const BlockDesc &block) const {
std
::
copy
(
onames
.
begin
(),
onames
.
end
(),
std
::
copy
(
onames
.
begin
(),
onames
.
end
(),
std
::
ostream_iterator
<
std
::
string
>
(
sout
,
", "
));
std
::
ostream_iterator
<
std
::
string
>
(
sout
,
", "
));
sout
<<
"]"
;
sout
<<
"]"
;
VLOG
(
10
0
)
<<
sout
.
str
();
VLOG
(
10
)
<<
sout
.
str
();
}
}
infer_shape
(
&
ctx
);
infer_shape
(
&
ctx
);
}
}
...
@@ -613,7 +613,7 @@ DDim CompileTimeInferShapeContext::GetDim(const std::string &name) const {
...
@@ -613,7 +613,7 @@ DDim CompileTimeInferShapeContext::GetDim(const std::string &name) const {
auto
shape
=
var
->
GetShape
();
auto
shape
=
var
->
GetShape
();
res
=
shape
.
empty
()
?
make_ddim
({
0UL
})
:
make_ddim
(
shape
);
res
=
shape
.
empty
()
?
make_ddim
({
0UL
})
:
make_ddim
(
shape
);
}
catch
(...)
{
}
catch
(...)
{
VLOG
(
5
0
)
<<
"GetDim of variable "
<<
name
<<
" error"
;
VLOG
(
5
)
<<
"GetDim of variable "
<<
name
<<
" error"
;
std
::
rethrow_exception
(
std
::
current_exception
());
std
::
rethrow_exception
(
std
::
current_exception
());
}
}
return
res
;
return
res
;
...
@@ -630,7 +630,7 @@ std::vector<DDim> CompileTimeInferShapeContext::GetRepeatedDims(
...
@@ -630,7 +630,7 @@ std::vector<DDim> CompileTimeInferShapeContext::GetRepeatedDims(
res
.
push_back
(
s
.
empty
()
?
make_ddim
({
0UL
})
:
make_ddim
(
s
));
res
.
push_back
(
s
.
empty
()
?
make_ddim
({
0UL
})
:
make_ddim
(
s
));
}
}
}
catch
(...)
{
}
catch
(...)
{
VLOG
(
5
0
)
<<
"GetRepeatedDim of variable "
<<
name
<<
" error."
;
VLOG
(
5
)
<<
"GetRepeatedDim of variable "
<<
name
<<
" error."
;
std
::
rethrow_exception
(
std
::
current_exception
());
std
::
rethrow_exception
(
std
::
current_exception
());
}
}
return
res
;
return
res
;
...
...
paddle/fluid/framework/op_registry.cc
浏览文件 @
dfd4a111
...
@@ -46,7 +46,7 @@ static VariableNameMap ConvertOpDescVarsToVarNameMap(
...
@@ -46,7 +46,7 @@ static VariableNameMap ConvertOpDescVarsToVarNameMap(
std
::
unique_ptr
<
OperatorBase
>
OpRegistry
::
CreateOp
(
std
::
unique_ptr
<
OperatorBase
>
OpRegistry
::
CreateOp
(
const
proto
::
OpDesc
&
op_desc
)
{
const
proto
::
OpDesc
&
op_desc
)
{
VLOG
(
1
0
)
<<
"CreateOp directly from OpDesc is deprecated. It should only be"
VLOG
(
1
)
<<
"CreateOp directly from OpDesc is deprecated. It should only be"
"used in unit tests. Use CreateOp(const OpDesc& op_desc) "
"used in unit tests. Use CreateOp(const OpDesc& op_desc) "
"instead."
;
"instead."
;
VariableNameMap
inputs
=
ConvertOpDescVarsToVarNameMap
(
op_desc
.
inputs
());
VariableNameMap
inputs
=
ConvertOpDescVarsToVarNameMap
(
op_desc
.
inputs
());
...
...
paddle/fluid/framework/operator.cc
浏览文件 @
dfd4a111
...
@@ -139,7 +139,7 @@ static LoD GetLoD(const Scope& scope, const std::string& name) {
...
@@ -139,7 +139,7 @@ static LoD GetLoD(const Scope& scope, const std::string& name) {
}
}
void
OperatorBase
::
Run
(
const
Scope
&
scope
,
const
platform
::
Place
&
place
)
{
void
OperatorBase
::
Run
(
const
Scope
&
scope
,
const
platform
::
Place
&
place
)
{
VLOG
(
4
0
)
<<
place
<<
" "
<<
DebugStringEx
(
&
scope
);
VLOG
(
4
)
<<
place
<<
" "
<<
DebugStringEx
(
&
scope
);
if
(
platform
::
is_gpu_place
(
place
))
{
if
(
platform
::
is_gpu_place
(
place
))
{
#ifndef PADDLE_WITH_CUDA
#ifndef PADDLE_WITH_CUDA
PADDLE_THROW
(
"Cannot run operator on place %s"
,
place
);
PADDLE_THROW
(
"Cannot run operator on place %s"
,
place
);
...
@@ -159,7 +159,7 @@ void OperatorBase::Run(const Scope& scope, const platform::Place& place) {
...
@@ -159,7 +159,7 @@ void OperatorBase::Run(const Scope& scope, const platform::Place& place) {
}
else
{
}
else
{
RunImpl
(
scope
,
place
);
RunImpl
(
scope
,
place
);
}
}
VLOG
(
3
0
)
<<
place
<<
" "
<<
DebugStringEx
(
&
scope
);
VLOG
(
3
)
<<
place
<<
" "
<<
DebugStringEx
(
&
scope
);
}
}
bool
OperatorBase
::
HasInputs
(
const
std
::
string
&
name
)
const
{
bool
OperatorBase
::
HasInputs
(
const
std
::
string
&
name
)
const
{
...
@@ -716,14 +716,14 @@ void OperatorWithKernel::RunImpl(const Scope& scope,
...
@@ -716,14 +716,14 @@ void OperatorWithKernel::RunImpl(const Scope& scope,
auto
expected_kernel_key
=
auto
expected_kernel_key
=
this
->
GetExpectedKernelType
(
ExecutionContext
(
*
this
,
scope
,
*
dev_ctx
));
this
->
GetExpectedKernelType
(
ExecutionContext
(
*
this
,
scope
,
*
dev_ctx
));
VLOG
(
3
0
)
<<
"expected_kernel_key:"
<<
expected_kernel_key
;
VLOG
(
3
)
<<
"expected_kernel_key:"
<<
expected_kernel_key
;
auto
kernel_iter
=
kernels
.
find
(
expected_kernel_key
);
auto
kernel_iter
=
kernels
.
find
(
expected_kernel_key
);
#ifdef PADDLE_WITH_MKLDNN
#ifdef PADDLE_WITH_MKLDNN
// workaround for missing MKLDNN kernel when FLAGS_use_mkldnn env var is set
// workaround for missing MKLDNN kernel when FLAGS_use_mkldnn env var is set
if
(
kernel_iter
==
kernels
.
end
()
&&
if
(
kernel_iter
==
kernels
.
end
()
&&
expected_kernel_key
.
library_type_
==
LibraryType
::
kMKLDNN
)
{
expected_kernel_key
.
library_type_
==
LibraryType
::
kMKLDNN
)
{
VLOG
(
3
0
)
<<
"missing MKLDNN kernel: fallbacking to PLAIN one"
;
VLOG
(
3
)
<<
"missing MKLDNN kernel: fallbacking to PLAIN one"
;
expected_kernel_key
.
library_type_
=
LibraryType
::
kPlain
;
expected_kernel_key
.
library_type_
=
LibraryType
::
kPlain
;
expected_kernel_key
.
data_layout_
=
DataLayout
::
kAnyLayout
;
expected_kernel_key
.
data_layout_
=
DataLayout
::
kAnyLayout
;
kernel_iter
=
kernels
.
find
(
expected_kernel_key
);
kernel_iter
=
kernels
.
find
(
expected_kernel_key
);
...
@@ -775,8 +775,7 @@ void OperatorWithKernel::TransferInplaceVarsBack(
...
@@ -775,8 +775,7 @@ void OperatorWithKernel::TransferInplaceVarsBack(
const
Scope
&
scope
,
const
std
::
vector
<
std
::
string
>&
inplace_vars
,
const
Scope
&
scope
,
const
std
::
vector
<
std
::
string
>&
inplace_vars
,
const
Scope
&
transfer_scope
)
const
{
const
Scope
&
transfer_scope
)
const
{
for
(
auto
&
var_name
:
inplace_vars
)
{
for
(
auto
&
var_name
:
inplace_vars
)
{
VLOG
(
30
)
<<
"share inplace var "
+
var_name
+
VLOG
(
3
)
<<
"share inplace var "
+
var_name
+
" back to it's original scope"
;
" back to it's original scope"
;
auto
*
original_tensor
=
auto
*
original_tensor
=
GetMutableLoDTensorOrSelectedRowsValueFromVar
(
scope
.
FindVar
(
var_name
));
GetMutableLoDTensorOrSelectedRowsValueFromVar
(
scope
.
FindVar
(
var_name
));
auto
*
var
=
transfer_scope
.
FindVar
(
var_name
);
auto
*
var
=
transfer_scope
.
FindVar
(
var_name
);
...
@@ -817,7 +816,7 @@ Scope* OperatorWithKernel::TryTransferData(
...
@@ -817,7 +816,7 @@ Scope* OperatorWithKernel::TryTransferData(
transfered_inplace_vars
->
emplace_back
(
var_name
);
transfered_inplace_vars
->
emplace_back
(
var_name
);
}
}
VLOG
(
3
0
)
<<
"Transform Variable "
<<
var_name
<<
" from "
VLOG
(
3
)
<<
"Transform Variable "
<<
var_name
<<
" from "
<<
kernel_type_for_var
<<
" to "
<<
expected_kernel_key
;
<<
kernel_type_for_var
<<
" to "
<<
expected_kernel_key
;
// In the inference scenerio, the scopes will be reused across the
// In the inference scenerio, the scopes will be reused across the
...
...
paddle/fluid/framework/parallel_executor.cc
浏览文件 @
dfd4a111
...
@@ -208,7 +208,7 @@ void ParallelExecutor::BCastParamsToDevices(
...
@@ -208,7 +208,7 @@ void ParallelExecutor::BCastParamsToDevices(
auto
&
main_tensor
=
main_var
->
Get
<
LoDTensor
>
();
auto
&
main_tensor
=
main_var
->
Get
<
LoDTensor
>
();
if
(
!
main_tensor
.
IsInitialized
())
{
if
(
!
main_tensor
.
IsInitialized
())
{
VLOG
(
3
0
)
<<
"one in var not inited, return!"
;
VLOG
(
3
)
<<
"one in var not inited, return!"
;
continue
;
continue
;
}
}
auto
&
dims
=
main_tensor
.
dims
();
auto
&
dims
=
main_tensor
.
dims
();
...
...
paddle/fluid/framework/scope.cc
浏览文件 @
dfd4a111
...
@@ -162,7 +162,7 @@ Variable* Scope::VarInternal(const std::string& name) {
...
@@ -162,7 +162,7 @@ Variable* Scope::VarInternal(const std::string& name) {
v
=
new
Variable
();
v
=
new
Variable
();
vars_
[
name
].
reset
(
v
);
vars_
[
name
].
reset
(
v
);
VLOG
(
3
0
)
<<
"Create variable "
<<
name
;
VLOG
(
3
)
<<
"Create variable "
<<
name
;
v
->
name_
=
&
(
vars_
.
find
(
name
)
->
first
);
v
->
name_
=
&
(
vars_
.
find
(
name
)
->
first
);
return
v
;
return
v
;
}
}
...
...
paddle/fluid/framework/selected_rows.cc
浏览文件 @
dfd4a111
...
@@ -206,7 +206,7 @@ void SelectedRows::Get(const framework::Tensor& ids, framework::Tensor* value,
...
@@ -206,7 +206,7 @@ void SelectedRows::Get(const framework::Tensor& ids, framework::Tensor* value,
PADDLE_ENFORCE
(
value
->
IsInitialized
(),
PADDLE_ENFORCE
(
value
->
IsInitialized
(),
"The value tensor should be initialized."
);
"The value tensor should be initialized."
);
if
(
ids
.
numel
()
==
0
)
{
if
(
ids
.
numel
()
==
0
)
{
VLOG
(
3
0
)
<<
"keys is empty, please check data!"
;
VLOG
(
3
)
<<
"keys is empty, please check data!"
;
}
else
{
}
else
{
int64_t
value_width
=
value_
->
numel
()
/
value_
->
dims
()[
0
];
int64_t
value_width
=
value_
->
numel
()
/
value_
->
dims
()[
0
];
PADDLE_ENFORCE_EQ
(
value_width
,
value
->
numel
()
/
value
->
dims
()[
0
],
PADDLE_ENFORCE_EQ
(
value_width
,
value
->
numel
()
/
value
->
dims
()[
0
],
...
...
paddle/fluid/framework/tensor_util.cc
浏览文件 @
dfd4a111
...
@@ -22,7 +22,7 @@ namespace framework {
...
@@ -22,7 +22,7 @@ namespace framework {
void
TensorCopy
(
const
Tensor
&
src
,
const
platform
::
Place
&
dst_place
,
void
TensorCopy
(
const
Tensor
&
src
,
const
platform
::
Place
&
dst_place
,
const
platform
::
DeviceContext
&
ctx
,
Tensor
*
dst
)
{
const
platform
::
DeviceContext
&
ctx
,
Tensor
*
dst
)
{
VLOG
(
3
0
)
<<
"TensorCopy "
<<
src
.
dims
()
<<
" from "
<<
src
.
place
()
<<
" to "
VLOG
(
3
)
<<
"TensorCopy "
<<
src
.
dims
()
<<
" from "
<<
src
.
place
()
<<
" to "
<<
dst_place
;
<<
dst_place
;
src
.
check_memory_size
();
src
.
check_memory_size
();
...
@@ -37,7 +37,7 @@ void TensorCopy(const Tensor& src, const platform::Place& dst_place,
...
@@ -37,7 +37,7 @@ void TensorCopy(const Tensor& src, const platform::Place& dst_place,
if
(
platform
::
is_cpu_place
(
src_place
)
&&
platform
::
is_cpu_place
(
dst_place
))
{
if
(
platform
::
is_cpu_place
(
src_place
)
&&
platform
::
is_cpu_place
(
dst_place
))
{
if
(
src_ptr
==
dst_ptr
)
{
if
(
src_ptr
==
dst_ptr
)
{
VLOG
(
3
0
)
<<
"Skip copy the same data async from "
<<
src_place
<<
" to "
VLOG
(
3
)
<<
"Skip copy the same data async from "
<<
src_place
<<
" to "
<<
dst_place
;
<<
dst_place
;
return
;
return
;
}
}
...
@@ -77,7 +77,7 @@ void TensorCopy(const Tensor& src, const platform::Place& dst_place,
...
@@ -77,7 +77,7 @@ void TensorCopy(const Tensor& src, const platform::Place& dst_place,
reinterpret_cast
<
const
platform
::
CUDADeviceContext
&>
(
ctx
).
stream
();
reinterpret_cast
<
const
platform
::
CUDADeviceContext
&>
(
ctx
).
stream
();
if
(
platform
::
is_same_place
(
src_place
,
dst_place
))
{
if
(
platform
::
is_same_place
(
src_place
,
dst_place
))
{
if
(
src_ptr
==
dst_ptr
)
{
if
(
src_ptr
==
dst_ptr
)
{
VLOG
(
3
0
)
<<
"Skip copy the same data async from "
<<
src_place
<<
" to "
VLOG
(
3
)
<<
"Skip copy the same data async from "
<<
src_place
<<
" to "
<<
dst_place
;
<<
dst_place
;
return
;
return
;
}
}
...
@@ -114,7 +114,7 @@ void TensorCopy(const Tensor& src, const platform::Place& dst_place,
...
@@ -114,7 +114,7 @@ void TensorCopy(const Tensor& src, const platform::Place& dst_place,
void
TensorCopySync
(
const
Tensor
&
src
,
const
platform
::
Place
&
dst_place
,
void
TensorCopySync
(
const
Tensor
&
src
,
const
platform
::
Place
&
dst_place
,
Tensor
*
dst
)
{
Tensor
*
dst
)
{
VLOG
(
3
0
)
<<
"TensorCopySync "
<<
src
.
dims
()
<<
" from "
<<
src
.
place
()
VLOG
(
3
)
<<
"TensorCopySync "
<<
src
.
dims
()
<<
" from "
<<
src
.
place
()
<<
" to "
<<
dst_place
;
<<
" to "
<<
dst_place
;
src
.
check_memory_size
();
src
.
check_memory_size
();
dst
->
Resize
(
src
.
dims
());
dst
->
Resize
(
src
.
dims
());
...
@@ -125,7 +125,7 @@ void TensorCopySync(const Tensor& src, const platform::Place& dst_place,
...
@@ -125,7 +125,7 @@ void TensorCopySync(const Tensor& src, const platform::Place& dst_place,
auto
size
=
src
.
numel
()
*
SizeOfType
(
src
.
type
());
auto
size
=
src
.
numel
()
*
SizeOfType
(
src
.
type
());
if
(
platform
::
is_cpu_place
(
src_place
)
&&
platform
::
is_cpu_place
(
dst_place
))
{
if
(
platform
::
is_cpu_place
(
src_place
)
&&
platform
::
is_cpu_place
(
dst_place
))
{
if
(
src_ptr
==
dst_ptr
)
{
if
(
src_ptr
==
dst_ptr
)
{
VLOG
(
3
0
)
<<
"Skip copy the same data from "
<<
src_place
<<
" to "
VLOG
(
3
)
<<
"Skip copy the same data from "
<<
src_place
<<
" to "
<<
dst_place
;
<<
dst_place
;
return
;
return
;
}
}
...
@@ -146,7 +146,7 @@ void TensorCopySync(const Tensor& src, const platform::Place& dst_place,
...
@@ -146,7 +146,7 @@ void TensorCopySync(const Tensor& src, const platform::Place& dst_place,
}
else
if
(
platform
::
is_gpu_place
(
src_place
)
&&
}
else
if
(
platform
::
is_gpu_place
(
src_place
)
&&
platform
::
is_gpu_place
(
dst_place
))
{
platform
::
is_gpu_place
(
dst_place
))
{
if
(
src_ptr
==
dst_ptr
&&
platform
::
is_same_place
(
src_place
,
dst_place
))
{
if
(
src_ptr
==
dst_ptr
&&
platform
::
is_same_place
(
src_place
,
dst_place
))
{
VLOG
(
3
0
)
<<
"Skip copy the same data from "
<<
src_place
<<
" to "
VLOG
(
3
)
<<
"Skip copy the same data from "
<<
src_place
<<
" to "
<<
dst_place
;
<<
dst_place
;
return
;
return
;
}
}
...
...
paddle/fluid/framework/threadpool.cc
浏览文件 @
dfd4a111
...
@@ -39,7 +39,7 @@ void ThreadPool::Init() {
...
@@ -39,7 +39,7 @@ void ThreadPool::Init() {
int
num_threads
=
std
::
thread
::
hardware_concurrency
();
int
num_threads
=
std
::
thread
::
hardware_concurrency
();
if
(
FLAGS_dist_threadpool_size
>
0
)
{
if
(
FLAGS_dist_threadpool_size
>
0
)
{
num_threads
=
FLAGS_dist_threadpool_size
;
num_threads
=
FLAGS_dist_threadpool_size
;
VLOG
(
1
0
)
<<
"set dist_threadpool_size to "
<<
num_threads
;
VLOG
(
1
)
<<
"set dist_threadpool_size to "
<<
num_threads
;
}
}
PADDLE_ENFORCE_GT
(
num_threads
,
0
);
PADDLE_ENFORCE_GT
(
num_threads
,
0
);
threadpool_
.
reset
(
new
ThreadPool
(
num_threads
));
threadpool_
.
reset
(
new
ThreadPool
(
num_threads
));
...
...
paddle/fluid/framework/var_desc.cc
浏览文件 @
dfd4a111
...
@@ -61,7 +61,7 @@ size_t VarDesc::GetTensorDescNum() const {
...
@@ -61,7 +61,7 @@ size_t VarDesc::GetTensorDescNum() const {
void
VarDesc
::
SetShapes
(
void
VarDesc
::
SetShapes
(
const
std
::
vector
<
std
::
vector
<
int64_t
>>
&
multiple_dims
)
{
const
std
::
vector
<
std
::
vector
<
int64_t
>>
&
multiple_dims
)
{
if
(
multiple_dims
.
size
()
!=
GetTensorDescNum
())
{
if
(
multiple_dims
.
size
()
!=
GetTensorDescNum
())
{
VLOG
(
3
0
)
<<
"WARNING: The number of given shapes("
<<
multiple_dims
.
size
()
VLOG
(
3
)
<<
"WARNING: The number of given shapes("
<<
multiple_dims
.
size
()
<<
") doesn't match the existing tensor number("
<<
") doesn't match the existing tensor number("
<<
GetTensorDescNum
()
<<
GetTensorDescNum
()
<<
"). The Reader is going to be reinitialized."
;
<<
"). The Reader is going to be reinitialized."
;
...
@@ -94,7 +94,7 @@ void VarDesc::SetDataType(proto::VarType::Type data_type) {
...
@@ -94,7 +94,7 @@ void VarDesc::SetDataType(proto::VarType::Type data_type) {
void
VarDesc
::
SetDataTypes
(
void
VarDesc
::
SetDataTypes
(
const
std
::
vector
<
proto
::
VarType
::
Type
>
&
multiple_data_type
)
{
const
std
::
vector
<
proto
::
VarType
::
Type
>
&
multiple_data_type
)
{
if
(
multiple_data_type
.
size
()
!=
GetTensorDescNum
())
{
if
(
multiple_data_type
.
size
()
!=
GetTensorDescNum
())
{
VLOG
(
3
0
)
<<
"WARNING: The number of given data types("
VLOG
(
3
)
<<
"WARNING: The number of given data types("
<<
multiple_data_type
.
size
()
<<
multiple_data_type
.
size
()
<<
") doesn't match the existing tensor number("
<<
") doesn't match the existing tensor number("
<<
GetTensorDescNum
()
<<
GetTensorDescNum
()
...
@@ -139,7 +139,7 @@ void VarDesc::SetLoDLevel(int32_t lod_level) {
...
@@ -139,7 +139,7 @@ void VarDesc::SetLoDLevel(int32_t lod_level) {
void
VarDesc
::
SetLoDLevels
(
const
std
::
vector
<
int32_t
>
&
multiple_lod_level
)
{
void
VarDesc
::
SetLoDLevels
(
const
std
::
vector
<
int32_t
>
&
multiple_lod_level
)
{
if
(
multiple_lod_level
.
size
()
!=
GetTensorDescNum
())
{
if
(
multiple_lod_level
.
size
()
!=
GetTensorDescNum
())
{
VLOG
(
3
0
)
<<
"WARNING: The number of given lod_levels("
VLOG
(
3
)
<<
"WARNING: The number of given lod_levels("
<<
multiple_lod_level
.
size
()
<<
multiple_lod_level
.
size
()
<<
") doesn't match the existing tensor number("
<<
") doesn't match the existing tensor number("
<<
GetTensorDescNum
()
<<
GetTensorDescNum
()
...
...
paddle/fluid/inference/api/analysis_predictor.cc
浏览文件 @
dfd4a111
...
@@ -55,7 +55,7 @@ bool IsPersistable(const framework::VarDesc *var) {
...
@@ -55,7 +55,7 @@ bool IsPersistable(const framework::VarDesc *var) {
bool
AnalysisPredictor
::
Init
(
bool
AnalysisPredictor
::
Init
(
const
std
::
shared_ptr
<
framework
::
Scope
>
&
parent_scope
,
const
std
::
shared_ptr
<
framework
::
Scope
>
&
parent_scope
,
const
std
::
shared_ptr
<
framework
::
ProgramDesc
>
&
program
)
{
const
std
::
shared_ptr
<
framework
::
ProgramDesc
>
&
program
)
{
VLOG
(
3
0
)
<<
"Predictor::init()"
;
VLOG
(
3
)
<<
"Predictor::init()"
;
if
(
FLAGS_profile
)
{
if
(
FLAGS_profile
)
{
LOG
(
WARNING
)
<<
"Profiler is actived, might affect the performance"
;
LOG
(
WARNING
)
<<
"Profiler is actived, might affect the performance"
;
LOG
(
INFO
)
<<
"You can turn off by set gflags '-profile false'"
;
LOG
(
INFO
)
<<
"You can turn off by set gflags '-profile false'"
;
...
@@ -169,7 +169,7 @@ void AnalysisPredictor::SetMkldnnThreadID(int tid) {
...
@@ -169,7 +169,7 @@ void AnalysisPredictor::SetMkldnnThreadID(int tid) {
bool
AnalysisPredictor
::
Run
(
const
std
::
vector
<
PaddleTensor
>
&
inputs
,
bool
AnalysisPredictor
::
Run
(
const
std
::
vector
<
PaddleTensor
>
&
inputs
,
std
::
vector
<
PaddleTensor
>
*
output_data
,
std
::
vector
<
PaddleTensor
>
*
output_data
,
int
batch_size
)
{
int
batch_size
)
{
VLOG
(
3
0
)
<<
"Predictor::predict"
;
VLOG
(
3
)
<<
"Predictor::predict"
;
inference
::
Timer
timer
;
inference
::
Timer
timer
;
timer
.
tic
();
timer
.
tic
();
// set feed variable
// set feed variable
...
@@ -188,7 +188,7 @@ bool AnalysisPredictor::Run(const std::vector<PaddleTensor> &inputs,
...
@@ -188,7 +188,7 @@ bool AnalysisPredictor::Run(const std::vector<PaddleTensor> &inputs,
LOG
(
ERROR
)
<<
"fail to get fetches"
;
LOG
(
ERROR
)
<<
"fail to get fetches"
;
return
false
;
return
false
;
}
}
VLOG
(
3
0
)
<<
"predict cost: "
<<
timer
.
toc
()
<<
"ms"
;
VLOG
(
3
)
<<
"predict cost: "
<<
timer
.
toc
()
<<
"ms"
;
// Fix TensorArray reuse not cleaned bug.
// Fix TensorArray reuse not cleaned bug.
tensor_array_batch_cleaner_
.
CollectTensorArrays
(
scope_
.
get
());
tensor_array_batch_cleaner_
.
CollectTensorArrays
(
scope_
.
get
());
...
@@ -198,7 +198,7 @@ bool AnalysisPredictor::Run(const std::vector<PaddleTensor> &inputs,
...
@@ -198,7 +198,7 @@ bool AnalysisPredictor::Run(const std::vector<PaddleTensor> &inputs,
bool
AnalysisPredictor
::
SetFeed
(
const
std
::
vector
<
PaddleTensor
>
&
inputs
,
bool
AnalysisPredictor
::
SetFeed
(
const
std
::
vector
<
PaddleTensor
>
&
inputs
,
framework
::
Scope
*
scope
)
{
framework
::
Scope
*
scope
)
{
VLOG
(
3
0
)
<<
"Predictor::set_feed"
;
VLOG
(
3
)
<<
"Predictor::set_feed"
;
if
(
inputs
.
size
()
!=
feeds_
.
size
())
{
if
(
inputs
.
size
()
!=
feeds_
.
size
())
{
LOG
(
ERROR
)
<<
"wrong feed input size, need "
<<
feeds_
.
size
()
<<
" but get "
LOG
(
ERROR
)
<<
"wrong feed input size, need "
<<
feeds_
.
size
()
<<
" but get "
<<
inputs
.
size
();
<<
inputs
.
size
();
...
@@ -275,7 +275,7 @@ void AnalysisPredictor::GetFetchOne(const framework::LoDTensor &fetch,
...
@@ -275,7 +275,7 @@ void AnalysisPredictor::GetFetchOne(const framework::LoDTensor &fetch,
bool
AnalysisPredictor
::
GetFetch
(
std
::
vector
<
PaddleTensor
>
*
outputs
,
bool
AnalysisPredictor
::
GetFetch
(
std
::
vector
<
PaddleTensor
>
*
outputs
,
framework
::
Scope
*
scope
)
{
framework
::
Scope
*
scope
)
{
VLOG
(
3
0
)
<<
"Predictor::get_fetch"
;
VLOG
(
3
)
<<
"Predictor::get_fetch"
;
outputs
->
resize
(
fetchs_
.
size
());
outputs
->
resize
(
fetchs_
.
size
());
for
(
size_t
i
=
0
;
i
<
fetchs_
.
size
();
++
i
)
{
for
(
size_t
i
=
0
;
i
<
fetchs_
.
size
();
++
i
)
{
int
idx
=
boost
::
get
<
int
>
(
fetchs_
[
i
]
->
GetAttr
(
"col"
));
int
idx
=
boost
::
get
<
int
>
(
fetchs_
[
i
]
->
GetAttr
(
"col"
));
...
@@ -339,7 +339,7 @@ void AnalysisPredictor::OptimizeInferenceProgram() {
...
@@ -339,7 +339,7 @@ void AnalysisPredictor::OptimizeInferenceProgram() {
template
<
>
template
<
>
std
::
unique_ptr
<
PaddlePredictor
>
CreatePaddlePredictor
<
std
::
unique_ptr
<
PaddlePredictor
>
CreatePaddlePredictor
<
AnalysisConfig
,
PaddleEngineKind
::
kAnalysis
>
(
const
AnalysisConfig
&
config
)
{
AnalysisConfig
,
PaddleEngineKind
::
kAnalysis
>
(
const
AnalysisConfig
&
config
)
{
VLOG
(
3
0
)
<<
"create AnalysisConfig"
;
VLOG
(
3
)
<<
"create AnalysisConfig"
;
if
(
config
.
use_gpu
)
{
if
(
config
.
use_gpu
)
{
// 1. GPU memeroy
// 1. GPU memeroy
PADDLE_ENFORCE_GT
(
PADDLE_ENFORCE_GT
(
...
@@ -353,7 +353,7 @@ std::unique_ptr<PaddlePredictor> CreatePaddlePredictor<
...
@@ -353,7 +353,7 @@ std::unique_ptr<PaddlePredictor> CreatePaddlePredictor<
std
::
string
flag
=
"--fraction_of_gpu_memory_to_use="
+
std
::
string
flag
=
"--fraction_of_gpu_memory_to_use="
+
std
::
to_string
(
config
.
fraction_of_gpu_memory
);
std
::
to_string
(
config
.
fraction_of_gpu_memory
);
flags
.
push_back
(
flag
);
flags
.
push_back
(
flag
);
VLOG
(
3
0
)
<<
"set flag: "
<<
flag
;
VLOG
(
3
)
<<
"set flag: "
<<
flag
;
framework
::
InitGflags
(
flags
);
framework
::
InitGflags
(
flags
);
}
}
}
}
...
...
paddle/fluid/inference/api/api_impl.cc
浏览文件 @
dfd4a111
...
@@ -152,7 +152,7 @@ bool NativePaddlePredictor::Run(const std::vector<PaddleTensor> &inputs,
...
@@ -152,7 +152,7 @@ bool NativePaddlePredictor::Run(const std::vector<PaddleTensor> &inputs,
LOG
(
ERROR
)
<<
"fail to get fetches"
;
LOG
(
ERROR
)
<<
"fail to get fetches"
;
return
false
;
return
false
;
}
}
VLOG
(
3
0
)
<<
"predict cost: "
<<
timer
.
toc
()
<<
"ms"
;
VLOG
(
3
)
<<
"predict cost: "
<<
timer
.
toc
()
<<
"ms"
;
// Fix TensorArray reuse not cleaned bug.
// Fix TensorArray reuse not cleaned bug.
tensor_array_batch_cleaner_
.
CollectTensorArrays
(
scope_
.
get
());
tensor_array_batch_cleaner_
.
CollectTensorArrays
(
scope_
.
get
());
...
...
paddle/fluid/inference/api/demo_ci/trt_mobilenet_demo.cc
浏览文件 @
dfd4a111
...
@@ -44,7 +44,7 @@ void Main() {
...
@@ -44,7 +44,7 @@ void Main() {
config
.
fraction_of_gpu_memory
=
0.1
;
// set by yourself
config
.
fraction_of_gpu_memory
=
0.1
;
// set by yourself
predictor
=
CreatePaddlePredictor
(
config
);
predictor
=
CreatePaddlePredictor
(
config
);
VLOG
(
3
0
)
<<
"begin to process data"
;
VLOG
(
3
)
<<
"begin to process data"
;
// Just a single batch of data.
// Just a single batch of data.
std
::
string
line
;
std
::
string
line
;
std
::
ifstream
file
(
FLAGS_data
);
std
::
ifstream
file
(
FLAGS_data
);
...
@@ -59,13 +59,13 @@ void Main() {
...
@@ -59,13 +59,13 @@ void Main() {
PaddleBuf
(
record
.
data
.
data
(),
record
.
data
.
size
()
*
sizeof
(
float
));
PaddleBuf
(
record
.
data
.
data
(),
record
.
data
.
size
()
*
sizeof
(
float
));
input
.
dtype
=
PaddleDType
::
FLOAT32
;
input
.
dtype
=
PaddleDType
::
FLOAT32
;
VLOG
(
3
0
)
<<
"run executor"
;
VLOG
(
3
)
<<
"run executor"
;
std
::
vector
<
PaddleTensor
>
output
;
std
::
vector
<
PaddleTensor
>
output
;
predictor
->
Run
({
input
},
&
output
,
1
);
predictor
->
Run
({
input
},
&
output
,
1
);
VLOG
(
3
0
)
<<
"output.size "
<<
output
.
size
();
VLOG
(
3
)
<<
"output.size "
<<
output
.
size
();
auto
&
tensor
=
output
.
front
();
auto
&
tensor
=
output
.
front
();
VLOG
(
3
0
)
<<
"output: "
<<
SummaryTensor
(
tensor
);
VLOG
(
3
)
<<
"output: "
<<
SummaryTensor
(
tensor
);
// compare with reference result
// compare with reference result
CheckOutput
(
FLAGS_refer
,
tensor
);
CheckOutput
(
FLAGS_refer
,
tensor
);
...
...
paddle/fluid/inference/api/demo_ci/utils.h
浏览文件 @
dfd4a111
...
@@ -47,7 +47,7 @@ static void split(const std::string& str, char sep,
...
@@ -47,7 +47,7 @@ static void split(const std::string& str, char sep,
}
}
Record
ProcessALine
(
const
std
::
string
&
line
)
{
Record
ProcessALine
(
const
std
::
string
&
line
)
{
VLOG
(
3
0
)
<<
"process a line"
;
VLOG
(
3
)
<<
"process a line"
;
std
::
vector
<
std
::
string
>
columns
;
std
::
vector
<
std
::
string
>
columns
;
split
(
line
,
'\t'
,
&
columns
);
split
(
line
,
'\t'
,
&
columns
);
CHECK_EQ
(
columns
.
size
(),
2UL
)
CHECK_EQ
(
columns
.
size
(),
2UL
)
...
@@ -65,8 +65,8 @@ Record ProcessALine(const std::string& line) {
...
@@ -65,8 +65,8 @@ Record ProcessALine(const std::string& line) {
for
(
auto
&
s
:
shape_strs
)
{
for
(
auto
&
s
:
shape_strs
)
{
record
.
shape
.
push_back
(
std
::
stoi
(
s
));
record
.
shape
.
push_back
(
std
::
stoi
(
s
));
}
}
VLOG
(
3
0
)
<<
"data size "
<<
record
.
data
.
size
();
VLOG
(
3
)
<<
"data size "
<<
record
.
data
.
size
();
VLOG
(
3
0
)
<<
"data shape size "
<<
record
.
shape
.
size
();
VLOG
(
3
)
<<
"data shape size "
<<
record
.
shape
.
size
();
return
record
;
return
record
;
}
}
...
@@ -78,8 +78,8 @@ void CheckOutput(const std::string& referfile, const PaddleTensor& output) {
...
@@ -78,8 +78,8 @@ void CheckOutput(const std::string& referfile, const PaddleTensor& output) {
file
.
close
();
file
.
close
();
size_t
numel
=
output
.
data
.
length
()
/
PaddleDtypeSize
(
output
.
dtype
);
size_t
numel
=
output
.
data
.
length
()
/
PaddleDtypeSize
(
output
.
dtype
);
VLOG
(
3
0
)
<<
"predictor output numel "
<<
numel
;
VLOG
(
3
)
<<
"predictor output numel "
<<
numel
;
VLOG
(
3
0
)
<<
"reference output numel "
<<
refer
.
data
.
size
();
VLOG
(
3
)
<<
"reference output numel "
<<
refer
.
data
.
size
();
CHECK_EQ
(
numel
,
refer
.
data
.
size
());
CHECK_EQ
(
numel
,
refer
.
data
.
size
());
switch
(
output
.
dtype
)
{
switch
(
output
.
dtype
)
{
case
PaddleDType
::
INT64
:
{
case
PaddleDType
::
INT64
:
{
...
...
paddle/fluid/inference/api/details/reset_tensor_array.cc
浏览文件 @
dfd4a111
...
@@ -26,7 +26,7 @@ void TensorArrayBatchCleaner::CollectTensorArrays(framework::Scope *scope) {
...
@@ -26,7 +26,7 @@ void TensorArrayBatchCleaner::CollectTensorArrays(framework::Scope *scope) {
// parameter.
// parameter.
if
(
var_name
==
"feed"
||
var_name
==
"fetch"
)
continue
;
if
(
var_name
==
"feed"
||
var_name
==
"fetch"
)
continue
;
if
(
var
->
Type
()
==
typeid
(
framework
::
LoDTensorArray
))
{
if
(
var
->
Type
()
==
typeid
(
framework
::
LoDTensorArray
))
{
VLOG
(
4
0
)
<<
"collect "
<<
var_name
;
VLOG
(
4
)
<<
"collect "
<<
var_name
;
arrays_
.
push_back
(
var
->
GetMutable
<
framework
::
LoDTensorArray
>
());
arrays_
.
push_back
(
var
->
GetMutable
<
framework
::
LoDTensorArray
>
());
}
}
}
}
...
@@ -34,7 +34,7 @@ void TensorArrayBatchCleaner::CollectTensorArrays(framework::Scope *scope) {
...
@@ -34,7 +34,7 @@ void TensorArrayBatchCleaner::CollectTensorArrays(framework::Scope *scope) {
CollectTensorArrays
(
kid
);
CollectTensorArrays
(
kid
);
}
}
VLOG
(
3
0
)
<<
"Collect "
<<
arrays_
.
size
()
<<
" arrays"
;
VLOG
(
3
)
<<
"Collect "
<<
arrays_
.
size
()
<<
" arrays"
;
flag_
=
false
;
flag_
=
false
;
}
}
}
}
...
...
paddle/fluid/inference/io.cc
浏览文件 @
dfd4a111
...
@@ -78,7 +78,7 @@ void LoadPersistables(framework::Executor* executor, framework::Scope* scope,
...
@@ -78,7 +78,7 @@ void LoadPersistables(framework::Executor* executor, framework::Scope* scope,
for
(
auto
*
var
:
global_block
.
AllVars
())
{
for
(
auto
*
var
:
global_block
.
AllVars
())
{
if
(
IsPersistable
(
var
))
{
if
(
IsPersistable
(
var
))
{
VLOG
(
3
0
)
<<
"persistable variable's name: "
<<
var
->
Name
();
VLOG
(
3
)
<<
"persistable variable's name: "
<<
var
->
Name
();
framework
::
VarDesc
*
new_var
=
load_block
->
Var
(
var
->
Name
());
framework
::
VarDesc
*
new_var
=
load_block
->
Var
(
var
->
Name
());
new_var
->
SetShape
(
var
->
GetShape
());
new_var
->
SetShape
(
var
->
GetShape
());
...
@@ -121,7 +121,7 @@ std::unique_ptr<framework::ProgramDesc> Load(framework::Executor* executor,
...
@@ -121,7 +121,7 @@ std::unique_ptr<framework::ProgramDesc> Load(framework::Executor* executor,
const
std
::
string
&
dirname
)
{
const
std
::
string
&
dirname
)
{
std
::
string
model_filename
=
dirname
+
"/__model__"
;
std
::
string
model_filename
=
dirname
+
"/__model__"
;
std
::
string
program_desc_str
;
std
::
string
program_desc_str
;
VLOG
(
3
0
)
<<
"loading model from "
<<
model_filename
;
VLOG
(
3
)
<<
"loading model from "
<<
model_filename
;
ReadBinaryFile
(
model_filename
,
&
program_desc_str
);
ReadBinaryFile
(
model_filename
,
&
program_desc_str
);
std
::
unique_ptr
<
framework
::
ProgramDesc
>
main_program
(
std
::
unique_ptr
<
framework
::
ProgramDesc
>
main_program
(
...
...
paddle/fluid/inference/tensorrt/convert/pool2d_op.cc
浏览文件 @
dfd4a111
...
@@ -53,7 +53,7 @@ class Pool2dOpConverter : public OpConverter {
...
@@ -53,7 +53,7 @@ class Pool2dOpConverter : public OpConverter {
public:
public:
void
operator
()(
const
framework
::
proto
::
OpDesc
&
op
,
void
operator
()(
const
framework
::
proto
::
OpDesc
&
op
,
const
framework
::
Scope
&
scope
,
bool
test_mode
)
override
{
const
framework
::
Scope
&
scope
,
bool
test_mode
)
override
{
VLOG
(
4
0
)
VLOG
(
4
)
<<
"convert a fluid pool2d op to tensorrt pool2d layer without bias"
;
<<
"convert a fluid pool2d op to tensorrt pool2d layer without bias"
;
framework
::
OpDesc
op_desc
(
op
,
nullptr
);
framework
::
OpDesc
op_desc
(
op
,
nullptr
);
// Declare inputs
// Declare inputs
...
...
paddle/fluid/inference/tests/api/analyzer_vis_tester.cc
浏览文件 @
dfd4a111
...
@@ -27,7 +27,7 @@ struct Record {
...
@@ -27,7 +27,7 @@ struct Record {
};
};
Record
ProcessALine
(
const
std
::
string
&
line
)
{
Record
ProcessALine
(
const
std
::
string
&
line
)
{
VLOG
(
3
0
)
<<
"process a line"
;
VLOG
(
3
)
<<
"process a line"
;
std
::
vector
<
std
::
string
>
columns
;
std
::
vector
<
std
::
string
>
columns
;
split
(
line
,
'\t'
,
&
columns
);
split
(
line
,
'\t'
,
&
columns
);
CHECK_EQ
(
columns
.
size
(),
2UL
)
CHECK_EQ
(
columns
.
size
(),
2UL
)
...
@@ -45,8 +45,8 @@ Record ProcessALine(const std::string &line) {
...
@@ -45,8 +45,8 @@ Record ProcessALine(const std::string &line) {
for
(
auto
&
s
:
shape_strs
)
{
for
(
auto
&
s
:
shape_strs
)
{
record
.
shape
.
push_back
(
std
::
stoi
(
s
));
record
.
shape
.
push_back
(
std
::
stoi
(
s
));
}
}
VLOG
(
3
0
)
<<
"data size "
<<
record
.
data
.
size
();
VLOG
(
3
)
<<
"data size "
<<
record
.
data
.
size
();
VLOG
(
3
0
)
<<
"data shape size "
<<
record
.
shape
.
size
();
VLOG
(
3
)
<<
"data shape size "
<<
record
.
shape
.
size
();
return
record
;
return
record
;
}
}
...
...
paddle/fluid/memory/allocation/allocator_facade.cc
浏览文件 @
dfd4a111
...
@@ -76,11 +76,11 @@ class ChunkedAllocator : public Allocator {
...
@@ -76,11 +76,11 @@ class ChunkedAllocator : public Allocator {
default_allocator_
=
raw_allocator_
;
default_allocator_
=
raw_allocator_
;
}
else
{
}
else
{
if
(
capacity
==
1
)
{
if
(
capacity
==
1
)
{
VLOG
(
1
0
)
<<
"Create BestFitAllocator with chunk_size "
VLOG
(
1
)
<<
"Create BestFitAllocator with chunk_size "
<<
max_chunk_size_
;
<<
max_chunk_size_
;
default_allocator_
=
CreateAllocatorWithChunk
();
default_allocator_
=
CreateAllocatorWithChunk
();
}
else
{
}
else
{
VLOG
(
1
0
)
<<
"Create AutoIncrementAllocator with chunk_size "
VLOG
(
1
)
<<
"Create AutoIncrementAllocator with chunk_size "
<<
max_chunk_size_
<<
" and capacity "
<<
capacity
;
<<
max_chunk_size_
<<
" and capacity "
<<
capacity
;
default_allocator_
=
std
::
make_shared
<
AutoIncrementAllocator
>
(
default_allocator_
=
std
::
make_shared
<
AutoIncrementAllocator
>
(
[
this
]
{
return
std
::
move
(
CreateAllocatorWithChunk
());
},
capacity
);
[
this
]
{
return
std
::
move
(
CreateAllocatorWithChunk
());
},
capacity
);
...
...
paddle/fluid/memory/allocation/legacy_allocator.cc
浏览文件 @
dfd4a111
...
@@ -86,18 +86,18 @@ struct NaiveAllocator {
...
@@ -86,18 +86,18 @@ struct NaiveAllocator {
template
<
>
template
<
>
void
*
Alloc
<
platform
::
CPUPlace
>
(
const
platform
::
CPUPlace
&
place
,
size_t
size
)
{
void
*
Alloc
<
platform
::
CPUPlace
>
(
const
platform
::
CPUPlace
&
place
,
size_t
size
)
{
VLOG
(
1
0
)
<<
"Allocate "
<<
size
<<
" bytes on "
<<
platform
::
Place
(
place
);
VLOG
(
1
)
<<
"Allocate "
<<
size
<<
" bytes on "
<<
platform
::
Place
(
place
);
void
*
p
=
GetCPUBuddyAllocator
()
->
Alloc
(
size
);
void
*
p
=
GetCPUBuddyAllocator
()
->
Alloc
(
size
);
if
(
FLAGS_init_allocated_mem
)
{
if
(
FLAGS_init_allocated_mem
)
{
memset
(
p
,
0xEF
,
size
);
memset
(
p
,
0xEF
,
size
);
}
}
VLOG
(
10
0
)
<<
" pointer="
<<
p
;
VLOG
(
10
)
<<
" pointer="
<<
p
;
return
p
;
return
p
;
}
}
template
<
>
template
<
>
void
Free
<
platform
::
CPUPlace
>
(
const
platform
::
CPUPlace
&
place
,
void
*
p
)
{
void
Free
<
platform
::
CPUPlace
>
(
const
platform
::
CPUPlace
&
place
,
void
*
p
)
{
VLOG
(
1
0
)
<<
"Free pointer="
<<
p
<<
" on "
<<
platform
::
Place
(
place
);
VLOG
(
1
)
<<
"Free pointer="
<<
p
<<
" on "
<<
platform
::
Place
(
place
);
GetCPUBuddyAllocator
()
->
Free
(
p
);
GetCPUBuddyAllocator
()
->
Free
(
p
);
}
}
...
@@ -124,7 +124,7 @@ BuddyAllocator *GetGPUBuddyAllocator(int gpu_id) {
...
@@ -124,7 +124,7 @@ BuddyAllocator *GetGPUBuddyAllocator(int gpu_id) {
std
::
unique_ptr
<
detail
::
SystemAllocator
>
(
new
detail
::
GPUAllocator
(
i
)),
std
::
unique_ptr
<
detail
::
SystemAllocator
>
(
new
detail
::
GPUAllocator
(
i
)),
platform
::
GpuMinChunkSize
(),
platform
::
GpuMaxChunkSize
());
platform
::
GpuMinChunkSize
(),
platform
::
GpuMaxChunkSize
());
VLOG
(
10
0
)
<<
"
\n\n
NOTE: each GPU device use "
VLOG
(
10
)
<<
"
\n\n
NOTE: each GPU device use "
<<
FLAGS_fraction_of_gpu_memory_to_use
*
100
<<
FLAGS_fraction_of_gpu_memory_to_use
*
100
<<
"% of GPU memory.
\n
"
<<
"% of GPU memory.
\n
"
<<
"You can set GFlags environment variable '"
<<
"You can set GFlags environment variable '"
...
...
paddle/fluid/memory/detail/buddy_allocator.cc
浏览文件 @
dfd4a111
...
@@ -32,11 +32,11 @@ BuddyAllocator::BuddyAllocator(
...
@@ -32,11 +32,11 @@ BuddyAllocator::BuddyAllocator(
system_allocator_
(
std
::
move
(
system_allocator
))
{}
system_allocator_
(
std
::
move
(
system_allocator
))
{}
BuddyAllocator
::~
BuddyAllocator
()
{
BuddyAllocator
::~
BuddyAllocator
()
{
VLOG
(
10
0
)
<<
"BuddyAllocator Disconstructor makes sure that all of these "
VLOG
(
10
)
<<
"BuddyAllocator Disconstructor makes sure that all of these "
"have actually been freed"
;
"have actually been freed"
;
while
(
!
pool_
.
empty
())
{
while
(
!
pool_
.
empty
())
{
auto
block
=
static_cast
<
MemoryBlock
*>
(
std
::
get
<
2
>
(
*
pool_
.
begin
()));
auto
block
=
static_cast
<
MemoryBlock
*>
(
std
::
get
<
2
>
(
*
pool_
.
begin
()));
VLOG
(
10
0
)
<<
"Free from block ("
<<
block
<<
", "
<<
max_chunk_size_
<<
")"
;
VLOG
(
10
)
<<
"Free from block ("
<<
block
<<
", "
<<
max_chunk_size_
<<
")"
;
system_allocator_
->
Free
(
block
,
max_chunk_size_
,
block
->
index
(
cache_
));
system_allocator_
->
Free
(
block
,
max_chunk_size_
,
block
->
index
(
cache_
));
cache_
.
invalidate
(
block
);
cache_
.
invalidate
(
block
);
...
@@ -57,12 +57,12 @@ void* BuddyAllocator::Alloc(size_t unaligned_size) {
...
@@ -57,12 +57,12 @@ void* BuddyAllocator::Alloc(size_t unaligned_size) {
// acquire the allocator lock
// acquire the allocator lock
std
::
lock_guard
<
std
::
mutex
>
lock
(
mutex_
);
std
::
lock_guard
<
std
::
mutex
>
lock
(
mutex_
);
VLOG
(
10
0
)
<<
"Allocate "
<<
unaligned_size
<<
" bytes from chunk size "
VLOG
(
10
)
<<
"Allocate "
<<
unaligned_size
<<
" bytes from chunk size "
<<
size
;
<<
size
;
// if the allocation is huge, send directly to the system allocator
// if the allocation is huge, send directly to the system allocator
if
(
size
>
max_chunk_size_
)
{
if
(
size
>
max_chunk_size_
)
{
VLOG
(
10
0
)
<<
"Allocate from system allocator."
;
VLOG
(
10
)
<<
"Allocate from system allocator."
;
return
SystemAlloc
(
size
);
return
SystemAlloc
(
size
);
}
}
...
@@ -77,7 +77,7 @@ void* BuddyAllocator::Alloc(size_t unaligned_size) {
...
@@ -77,7 +77,7 @@ void* BuddyAllocator::Alloc(size_t unaligned_size) {
return
nullptr
;
return
nullptr
;
}
}
}
else
{
}
else
{
VLOG
(
10
0
)
<<
"Allocation from existing memory block "
<<
std
::
get
<
2
>
(
*
it
)
VLOG
(
10
)
<<
"Allocation from existing memory block "
<<
std
::
get
<
2
>
(
*
it
)
<<
" at address "
<<
" at address "
<<
reinterpret_cast
<
MemoryBlock
*>
(
std
::
get
<
2
>
(
*
it
))
->
data
();
<<
reinterpret_cast
<
MemoryBlock
*>
(
std
::
get
<
2
>
(
*
it
))
->
data
();
}
}
...
@@ -96,10 +96,10 @@ void BuddyAllocator::Free(void* p) {
...
@@ -96,10 +96,10 @@ void BuddyAllocator::Free(void* p) {
// Acquire the allocator lock
// Acquire the allocator lock
std
::
lock_guard
<
std
::
mutex
>
lock
(
mutex_
);
std
::
lock_guard
<
std
::
mutex
>
lock
(
mutex_
);
VLOG
(
10
0
)
<<
"Free from address "
<<
block
;
VLOG
(
10
)
<<
"Free from address "
<<
block
;
if
(
block
->
type
(
cache_
)
==
MemoryBlock
::
HUGE_CHUNK
)
{
if
(
block
->
type
(
cache_
)
==
MemoryBlock
::
HUGE_CHUNK
)
{
VLOG
(
10
0
)
<<
"Free directly from system allocator"
;
VLOG
(
10
)
<<
"Free directly from system allocator"
;
system_allocator_
->
Free
(
block
,
block
->
total_size
(
cache_
),
system_allocator_
->
Free
(
block
,
block
->
total_size
(
cache_
),
block
->
index
(
cache_
));
block
->
index
(
cache_
));
...
@@ -116,7 +116,7 @@ void BuddyAllocator::Free(void* p) {
...
@@ -116,7 +116,7 @@ void BuddyAllocator::Free(void* p) {
// Trying to merge the right buddy
// Trying to merge the right buddy
if
(
block
->
has_right_buddy
(
cache_
))
{
if
(
block
->
has_right_buddy
(
cache_
))
{
VLOG
(
10
0
)
<<
"Merging this block "
<<
block
<<
" with its right buddy "
VLOG
(
10
)
<<
"Merging this block "
<<
block
<<
" with its right buddy "
<<
block
->
right_buddy
(
cache_
);
<<
block
->
right_buddy
(
cache_
);
auto
right_buddy
=
block
->
right_buddy
(
cache_
);
auto
right_buddy
=
block
->
right_buddy
(
cache_
);
...
@@ -134,7 +134,7 @@ void BuddyAllocator::Free(void* p) {
...
@@ -134,7 +134,7 @@ void BuddyAllocator::Free(void* p) {
// Trying to merge the left buddy
// Trying to merge the left buddy
if
(
block
->
has_left_buddy
(
cache_
))
{
if
(
block
->
has_left_buddy
(
cache_
))
{
VLOG
(
10
0
)
<<
"Merging this block "
<<
block
<<
" with its left buddy "
VLOG
(
10
)
<<
"Merging this block "
<<
block
<<
" with its left buddy "
<<
block
->
left_buddy
(
cache_
);
<<
block
->
left_buddy
(
cache_
);
auto
left_buddy
=
block
->
left_buddy
(
cache_
);
auto
left_buddy
=
block
->
left_buddy
(
cache_
);
...
@@ -151,7 +151,7 @@ void BuddyAllocator::Free(void* p) {
...
@@ -151,7 +151,7 @@ void BuddyAllocator::Free(void* p) {
}
}
// Dumping this block into pool
// Dumping this block into pool
VLOG
(
10
0
)
<<
"Inserting free block ("
<<
block
<<
", "
VLOG
(
10
)
<<
"Inserting free block ("
<<
block
<<
", "
<<
block
->
total_size
(
cache_
)
<<
")"
;
<<
block
->
total_size
(
cache_
)
<<
")"
;
pool_
.
insert
(
pool_
.
insert
(
IndexSizeAddress
(
block
->
index
(
cache_
),
block
->
total_size
(
cache_
),
block
));
IndexSizeAddress
(
block
->
index
(
cache_
),
block
->
total_size
(
cache_
),
block
));
...
@@ -174,7 +174,7 @@ void* BuddyAllocator::SystemAlloc(size_t size) {
...
@@ -174,7 +174,7 @@ void* BuddyAllocator::SystemAlloc(size_t size) {
size_t
index
=
0
;
size_t
index
=
0
;
void
*
p
=
system_allocator_
->
Alloc
(
&
index
,
size
);
void
*
p
=
system_allocator_
->
Alloc
(
&
index
,
size
);
VLOG
(
10
0
)
<<
"Allocated "
<<
p
<<
" from system allocator."
;
VLOG
(
10
)
<<
"Allocated "
<<
p
<<
" from system allocator."
;
if
(
p
==
nullptr
)
return
nullptr
;
if
(
p
==
nullptr
)
return
nullptr
;
...
@@ -200,7 +200,7 @@ BuddyAllocator::PoolSet::iterator BuddyAllocator::RefillPool() {
...
@@ -200,7 +200,7 @@ BuddyAllocator::PoolSet::iterator BuddyAllocator::RefillPool() {
if
(
p
==
nullptr
)
return
pool_
.
end
();
if
(
p
==
nullptr
)
return
pool_
.
end
();
VLOG
(
10
0
)
<<
"Creating and inserting new block "
<<
p
VLOG
(
10
)
<<
"Creating and inserting new block "
<<
p
<<
" from system allocator"
;
<<
" from system allocator"
;
static_cast
<
MemoryBlock
*>
(
p
)
->
init
(
&
cache_
,
MemoryBlock
::
FREE_CHUNK
,
index
,
static_cast
<
MemoryBlock
*>
(
p
)
->
init
(
&
cache_
,
MemoryBlock
::
FREE_CHUNK
,
index
,
...
@@ -245,18 +245,18 @@ void* BuddyAllocator::SplitToAlloc(BuddyAllocator::PoolSet::iterator it,
...
@@ -245,18 +245,18 @@ void* BuddyAllocator::SplitToAlloc(BuddyAllocator::PoolSet::iterator it,
auto
block
=
static_cast
<
MemoryBlock
*>
(
std
::
get
<
2
>
(
*
it
));
auto
block
=
static_cast
<
MemoryBlock
*>
(
std
::
get
<
2
>
(
*
it
));
pool_
.
erase
(
it
);
pool_
.
erase
(
it
);
VLOG
(
10
0
)
<<
"Split block ("
<<
block
<<
", "
<<
block
->
total_size
(
cache_
)
VLOG
(
10
)
<<
"Split block ("
<<
block
<<
", "
<<
block
->
total_size
(
cache_
)
<<
") into"
;
<<
") into"
;
block
->
split
(
&
cache_
,
size
);
block
->
split
(
&
cache_
,
size
);
VLOG
(
10
0
)
<<
"Left block ("
<<
block
<<
", "
<<
block
->
total_size
(
cache_
)
VLOG
(
10
)
<<
"Left block ("
<<
block
<<
", "
<<
block
->
total_size
(
cache_
)
<<
")"
;
<<
")"
;
block
->
set_type
(
&
cache_
,
MemoryBlock
::
ARENA_CHUNK
);
block
->
set_type
(
&
cache_
,
MemoryBlock
::
ARENA_CHUNK
);
// the rest of memory if exist
// the rest of memory if exist
if
(
block
->
has_right_buddy
(
cache_
))
{
if
(
block
->
has_right_buddy
(
cache_
))
{
if
(
block
->
right_buddy
(
cache_
)
->
type
(
cache_
)
==
MemoryBlock
::
FREE_CHUNK
)
{
if
(
block
->
right_buddy
(
cache_
)
->
type
(
cache_
)
==
MemoryBlock
::
FREE_CHUNK
)
{
VLOG
(
10
0
)
<<
"Insert right block ("
<<
block
->
right_buddy
(
cache_
)
<<
", "
VLOG
(
10
)
<<
"Insert right block ("
<<
block
->
right_buddy
(
cache_
)
<<
", "
<<
block
->
right_buddy
(
cache_
)
->
total_size
(
cache_
)
<<
")"
;
<<
block
->
right_buddy
(
cache_
)
->
total_size
(
cache_
)
<<
")"
;
pool_
.
insert
(
pool_
.
insert
(
...
@@ -284,7 +284,7 @@ void BuddyAllocator::CleanIdleFallBackAlloc() {
...
@@ -284,7 +284,7 @@ void BuddyAllocator::CleanIdleFallBackAlloc() {
return
;
return
;
}
}
VLOG
(
10
0
)
<<
"Return block "
<<
block
<<
" to fallback allocator."
;
VLOG
(
10
)
<<
"Return block "
<<
block
<<
" to fallback allocator."
;
system_allocator_
->
Free
(
block
,
max_chunk_size_
,
block
->
index
(
cache_
));
system_allocator_
->
Free
(
block
,
max_chunk_size_
,
block
->
index
(
cache_
));
cache_
.
invalidate
(
block
);
cache_
.
invalidate
(
block
);
...
@@ -320,7 +320,7 @@ void BuddyAllocator::CleanIdleNormalAlloc() {
...
@@ -320,7 +320,7 @@ void BuddyAllocator::CleanIdleNormalAlloc() {
MemoryBlock
*
block
=
static_cast
<
MemoryBlock
*>
(
std
::
get
<
2
>
(
*
pool
));
MemoryBlock
*
block
=
static_cast
<
MemoryBlock
*>
(
std
::
get
<
2
>
(
*
pool
));
VLOG
(
10
0
)
<<
"Return block "
<<
block
<<
" to base allocator."
;
VLOG
(
10
)
<<
"Return block "
<<
block
<<
" to base allocator."
;
system_allocator_
->
Free
(
block
,
max_chunk_size_
,
block
->
index
(
cache_
));
system_allocator_
->
Free
(
block
,
max_chunk_size_
,
block
->
index
(
cache_
));
cache_
.
invalidate
(
block
);
cache_
.
invalidate
(
block
);
...
...
paddle/fluid/memory/detail/meta_cache.cc
浏览文件 @
dfd4a111
...
@@ -29,7 +29,7 @@ MemoryBlock::Desc MetadataCache::load(const MemoryBlock* block) const {
...
@@ -29,7 +29,7 @@ MemoryBlock::Desc MetadataCache::load(const MemoryBlock* block) const {
return
existing_desc
->
second
;
return
existing_desc
->
second
;
}
else
{
}
else
{
auto
*
desc
=
reinterpret_cast
<
const
MemoryBlock
::
Desc
*>
(
block
);
auto
*
desc
=
reinterpret_cast
<
const
MemoryBlock
::
Desc
*>
(
block
);
VLOG
(
10
0
)
<<
"Load MemoryBlock::Desc type="
<<
desc
->
type
;
VLOG
(
10
)
<<
"Load MemoryBlock::Desc type="
<<
desc
->
type
;
PADDLE_ASSERT
(
desc
->
check_guards
());
PADDLE_ASSERT
(
desc
->
check_guards
());
return
*
reinterpret_cast
<
const
MemoryBlock
::
Desc
*>
(
block
);
return
*
reinterpret_cast
<
const
MemoryBlock
::
Desc
*>
(
block
);
}
}
...
...
paddle/fluid/operators/activation_op.h
浏览文件 @
dfd4a111
...
@@ -100,7 +100,7 @@ class ActivationGradKernel
...
@@ -100,7 +100,7 @@ class ActivationGradKernel
auto
x
=
framework
::
EigenVector
<
T
>::
Flatten
(
*
X
);
auto
x
=
framework
::
EigenVector
<
T
>::
Flatten
(
*
X
);
functor
(
*
place
,
x
,
out
,
dout
,
dx
);
functor
(
*
place
,
x
,
out
,
dout
,
dx
);
}
else
{
}
else
{
VLOG
(
10
0
)
<<
" Inplace activation "
;
VLOG
(
10
)
<<
" Inplace activation "
;
auto
x
=
framework
::
EigenVector
<
T
>::
Flatten
(
*
dX
);
auto
x
=
framework
::
EigenVector
<
T
>::
Flatten
(
*
dX
);
functor
(
*
place
,
x
,
out
,
dout
,
dx
);
functor
(
*
place
,
x
,
out
,
dout
,
dx
);
}
}
...
...
paddle/fluid/operators/array_operator.h
浏览文件 @
dfd4a111
...
@@ -49,7 +49,7 @@ class ArrayOp : public framework::OperatorBase {
...
@@ -49,7 +49,7 @@ class ArrayOp : public framework::OperatorBase {
}
else
{
}
else
{
offset
=
static_cast
<
size_t
>
(
*
i_tensor
.
data
<
int64_t
>
());
offset
=
static_cast
<
size_t
>
(
*
i_tensor
.
data
<
int64_t
>
());
}
}
VLOG
(
10
0
)
<<
" Offset = "
<<
offset
;
VLOG
(
10
)
<<
" Offset = "
<<
offset
;
return
offset
;
return
offset
;
}
}
};
};
...
...
paddle/fluid/operators/array_to_lod_tensor_op.cc
浏览文件 @
dfd4a111
...
@@ -148,7 +148,7 @@ class ArrayToLoDTensorOp : public framework::OperatorBase {
...
@@ -148,7 +148,7 @@ class ArrayToLoDTensorOp : public framework::OperatorBase {
size_t
start_offset
=
lod_and_offset
.
second
.
first
;
size_t
start_offset
=
lod_and_offset
.
second
.
first
;
size_t
end_offset
=
lod_and_offset
.
second
.
second
;
size_t
end_offset
=
lod_and_offset
.
second
.
second
;
VLOG
(
10
0
)
<<
"idx="
<<
idx
<<
" x_idx="
<<
x_idx
<<
" ["
VLOG
(
10
)
<<
"idx="
<<
idx
<<
" x_idx="
<<
x_idx
<<
" ["
<<
", "
<<
end_offset
<<
"]"
;
<<
", "
<<
end_offset
<<
"]"
;
// Copy data
// Copy data
PADDLE_ENFORCE_GE
(
end_offset
,
start_offset
);
PADDLE_ENFORCE_GE
(
end_offset
,
start_offset
);
...
...
paddle/fluid/operators/batch_norm_op.cu.cc
浏览文件 @
dfd4a111
...
@@ -96,7 +96,7 @@ class BatchNormKernel<platform::CUDADeviceContext, T>
...
@@ -96,7 +96,7 @@ class BatchNormKernel<platform::CUDADeviceContext, T>
mode_
=
CUDNN_BATCHNORM_SPATIAL
;
mode_
=
CUDNN_BATCHNORM_SPATIAL
;
#endif
#endif
VLOG
(
3
0
)
<<
"Setting descriptors."
;
VLOG
(
3
)
<<
"Setting descriptors."
;
std
::
vector
<
int
>
dims
;
std
::
vector
<
int
>
dims
;
std
::
vector
<
int
>
strides
;
std
::
vector
<
int
>
strides
;
if
(
data_layout
==
DataLayout
::
kNCHW
)
{
if
(
data_layout
==
DataLayout
::
kNCHW
)
{
...
...
paddle/fluid/operators/beam_search_op.cc
浏览文件 @
dfd4a111
...
@@ -33,11 +33,11 @@ void BeamSearch::operator()(const framework::LoDTensor &pre_ids,
...
@@ -33,11 +33,11 @@ void BeamSearch::operator()(const framework::LoDTensor &pre_ids,
auto
items
=
SelectTopBeamSizeItems
(
pre_ids
,
pre_scores
);
auto
items
=
SelectTopBeamSizeItems
(
pre_ids
,
pre_scores
);
auto
selected_items
=
ToMap
(
items
,
high_level
.
back
());
auto
selected_items
=
ToMap
(
items
,
high_level
.
back
());
VLOG
(
3
0
)
<<
"selected_items:"
;
VLOG
(
3
)
<<
"selected_items:"
;
for
(
size_t
i
=
0
;
i
<
selected_items
.
size
();
++
i
)
{
for
(
size_t
i
=
0
;
i
<
selected_items
.
size
();
++
i
)
{
VLOG
(
3
0
)
<<
"offset:"
<<
i
;
VLOG
(
3
)
<<
"offset:"
<<
i
;
for
(
auto
&
item
:
selected_items
[
i
])
{
for
(
auto
&
item
:
selected_items
[
i
])
{
VLOG
(
3
0
)
<<
ItemToString
(
item
);
VLOG
(
3
)
<<
ItemToString
(
item
);
}
}
}
}
...
@@ -138,11 +138,11 @@ std::vector<std::vector<BeamSearch::Item>> BeamSearch::SelectTopBeamSizeItems(
...
@@ -138,11 +138,11 @@ std::vector<std::vector<BeamSearch::Item>> BeamSearch::SelectTopBeamSizeItems(
}
}
result
.
emplace_back
(
items
);
result
.
emplace_back
(
items
);
}
}
VLOG
(
3
0
)
<<
"SelectTopBeamSizeItems result size "
<<
result
.
size
();
VLOG
(
3
)
<<
"SelectTopBeamSizeItems result size "
<<
result
.
size
();
for
(
auto
&
items
:
result
)
{
for
(
auto
&
items
:
result
)
{
VLOG
(
3
0
)
<<
"item set:"
;
VLOG
(
3
)
<<
"item set:"
;
for
(
auto
&
item
:
items
)
{
for
(
auto
&
item
:
items
)
{
VLOG
(
3
0
)
<<
ItemToString
(
item
);
VLOG
(
3
)
<<
ItemToString
(
item
);
}
}
}
}
...
...
paddle/fluid/operators/concat_op.cc
浏览文件 @
dfd4a111
...
@@ -37,7 +37,7 @@ class ConcatOp : public framework::OperatorWithKernel {
...
@@ -37,7 +37,7 @@ class ConcatOp : public framework::OperatorWithKernel {
PADDLE_ENFORCE_GT
(
n
,
0
,
"Input tensors count should > 0."
);
PADDLE_ENFORCE_GT
(
n
,
0
,
"Input tensors count should > 0."
);
if
(
n
==
1
)
{
if
(
n
==
1
)
{
VLOG
(
3
0
)
<<
"Warning: concat op have only one input, may waste memory"
;
VLOG
(
3
)
<<
"Warning: concat op have only one input, may waste memory"
;
}
}
auto
out_dims
=
ins
[
0
];
auto
out_dims
=
ins
[
0
];
...
...
paddle/fluid/operators/controlflow/feed_op.cc
浏览文件 @
dfd4a111
...
@@ -47,8 +47,8 @@ class FeedOp : public framework::OperatorBase {
...
@@ -47,8 +47,8 @@ class FeedOp : public framework::OperatorBase {
auto
col
=
Attr
<
int
>
(
"col"
);
auto
col
=
Attr
<
int
>
(
"col"
);
VLOG
(
3
0
)
<<
"Feed Var "
<<
feed_var_name
<<
"'s "
<<
col
VLOG
(
3
)
<<
"Feed Var "
<<
feed_var_name
<<
"'s "
<<
col
<<
" column to var "
<<
" column to var "
<<
out_name
;
<<
out_name
;
auto
&
feed_list
=
feed_var
->
Get
<
framework
::
FeedFetchList
>
();
auto
&
feed_list
=
feed_var
->
Get
<
framework
::
FeedFetchList
>
();
auto
&
feed_item
=
feed_list
.
at
(
static_cast
<
size_t
>
(
col
));
auto
&
feed_item
=
feed_list
.
at
(
static_cast
<
size_t
>
(
col
));
...
...
paddle/fluid/operators/controlflow/fetch_op.cc
浏览文件 @
dfd4a111
...
@@ -57,7 +57,7 @@ class FetchOp : public framework::OperatorBase {
...
@@ -57,7 +57,7 @@ class FetchOp : public framework::OperatorBase {
TensorCopySync
(
src_item
,
platform
::
CPUPlace
(),
&
dst_item
);
TensorCopySync
(
src_item
,
platform
::
CPUPlace
(),
&
dst_item
);
dst_item
.
set_lod
(
src_item
.
lod
());
dst_item
.
set_lod
(
src_item
.
lod
());
VLOG
(
3
0
)
<<
"Fetch variable "
<<
fetch_var_name
<<
" to "
<<
out_name
;
VLOG
(
3
)
<<
"Fetch variable "
<<
fetch_var_name
<<
" to "
<<
out_name
;
}
}
};
};
...
...
paddle/fluid/operators/controlflow/parallel_do_op.cc
浏览文件 @
dfd4a111
...
@@ -48,7 +48,7 @@ static void SplitTensorAndMoveTensorToScopes(
...
@@ -48,7 +48,7 @@ static void SplitTensorAndMoveTensorToScopes(
auto
lod_tensors
=
tensor
.
SplitLoDTensor
(
places
);
auto
lod_tensors
=
tensor
.
SplitLoDTensor
(
places
);
for
(
auto
&
lod
:
lod_tensors
)
{
for
(
auto
&
lod
:
lod_tensors
)
{
VLOG
(
3
0
)
<<
lod
.
dims
();
VLOG
(
3
)
<<
lod
.
dims
();
}
}
if
(
num_sub_scopes
==
0
)
{
if
(
num_sub_scopes
==
0
)
{
num_sub_scopes
=
lod_tensors
.
size
();
num_sub_scopes
=
lod_tensors
.
size
();
...
@@ -263,7 +263,7 @@ class ParallelDoGradOp : public framework::OperatorBase {
...
@@ -263,7 +263,7 @@ class ParallelDoGradOp : public framework::OperatorBase {
if
(
s
==
framework
::
kEmptyVarName
)
{
if
(
s
==
framework
::
kEmptyVarName
)
{
continue
;
continue
;
}
}
VLOG
(
3
0
)
<<
"Moving "
<<
s
;
VLOG
(
3
)
<<
"Moving "
<<
s
;
CopyOrShare
(
*
sub_scopes
[
0
]
->
FindVar
(
s
),
place
,
scope
.
FindVar
(
s
));
CopyOrShare
(
*
sub_scopes
[
0
]
->
FindVar
(
s
),
place
,
scope
.
FindVar
(
s
));
}
}
WaitOnPlaces
(
places
);
WaitOnPlaces
(
places
);
...
@@ -277,7 +277,7 @@ class ParallelDoGradOp : public framework::OperatorBase {
...
@@ -277,7 +277,7 @@ class ParallelDoGradOp : public framework::OperatorBase {
if
(
s
==
framework
::
kEmptyVarName
)
{
if
(
s
==
framework
::
kEmptyVarName
)
{
continue
;
continue
;
}
}
VLOG
(
3
0
)
<<
"Accumulating "
<<
s
;
VLOG
(
3
)
<<
"Accumulating "
<<
s
;
if
(
s
==
framework
::
kEmptyVarName
)
continue
;
if
(
s
==
framework
::
kEmptyVarName
)
continue
;
std
::
string
tmp_name
;
std
::
string
tmp_name
;
auto
*
tmp
=
sub_scopes
[
0
]
->
Var
(
&
tmp_name
);
auto
*
tmp
=
sub_scopes
[
0
]
->
Var
(
&
tmp_name
);
...
@@ -289,7 +289,7 @@ class ParallelDoGradOp : public framework::OperatorBase {
...
@@ -289,7 +289,7 @@ class ParallelDoGradOp : public framework::OperatorBase {
auto
sum_op
=
framework
::
OpRegistry
::
CreateOp
(
auto
sum_op
=
framework
::
OpRegistry
::
CreateOp
(
"sum"
,
{{
"X"
,
{
s
,
tmp_name
}}},
{{
"Out"
,
{
s
}}},
"sum"
,
{{
"X"
,
{
s
,
tmp_name
}}},
{{
"Out"
,
{
s
}}},
framework
::
AttributeMap
{{
"use_mkldnn"
,
{
false
}}});
framework
::
AttributeMap
{{
"use_mkldnn"
,
{
false
}}});
VLOG
(
10
0
)
<<
sum_op
->
DebugStringEx
(
sub_scopes
[
0
]);
VLOG
(
10
)
<<
sum_op
->
DebugStringEx
(
sub_scopes
[
0
]);
sum_op
->
Run
(
*
sub_scopes
[
0
],
places
[
0
]);
sum_op
->
Run
(
*
sub_scopes
[
0
],
places
[
0
]);
WaitOnPlace
(
places
[
0
]);
WaitOnPlace
(
places
[
0
]);
}
}
...
@@ -316,7 +316,7 @@ class ParallelDoGradOpDescMaker : public framework::SingleGradOpDescMaker {
...
@@ -316,7 +316,7 @@ class ParallelDoGradOpDescMaker : public framework::SingleGradOpDescMaker {
auto
*
grad
=
new
framework
::
OpDesc
();
auto
*
grad
=
new
framework
::
OpDesc
();
grad
->
SetType
(
"parallel_do_grad"
);
grad
->
SetType
(
"parallel_do_grad"
);
for
(
auto
&
input_param
:
this
->
InputNames
())
{
for
(
auto
&
input_param
:
this
->
InputNames
())
{
VLOG
(
3
0
)
<<
input_param
;
VLOG
(
3
)
<<
input_param
;
grad
->
SetInput
(
input_param
,
this
->
Input
(
input_param
));
grad
->
SetInput
(
input_param
,
this
->
Input
(
input_param
));
if
(
input_param
!=
kPlaces
)
{
if
(
input_param
!=
kPlaces
)
{
grad
->
SetOutput
(
framework
::
GradVarName
(
input_param
),
grad
->
SetOutput
(
framework
::
GradVarName
(
input_param
),
...
...
paddle/fluid/operators/controlflow/tensor_array_read_write_op.cc
浏览文件 @
dfd4a111
...
@@ -34,7 +34,7 @@ class WriteToArrayOp : public ArrayOp {
...
@@ -34,7 +34,7 @@ class WriteToArrayOp : public ArrayOp {
auto
*
out
=
auto
*
out
=
scope
.
FindVar
(
Output
(
"Out"
))
->
GetMutable
<
framework
::
LoDTensorArray
>
();
scope
.
FindVar
(
Output
(
"Out"
))
->
GetMutable
<
framework
::
LoDTensorArray
>
();
if
(
offset
>=
out
->
size
())
{
if
(
offset
>=
out
->
size
())
{
VLOG
(
10
0
)
<<
"Resize "
<<
Output
(
"Out"
)
<<
" from "
<<
out
->
size
()
VLOG
(
10
)
<<
"Resize "
<<
Output
(
"Out"
)
<<
" from "
<<
out
->
size
()
<<
" to "
<<
offset
+
1
;
<<
" to "
<<
offset
+
1
;
out
->
resize
(
offset
+
1
);
out
->
resize
(
offset
+
1
);
}
}
...
@@ -47,7 +47,7 @@ class WriteToArrayOp : public ArrayOp {
...
@@ -47,7 +47,7 @@ class WriteToArrayOp : public ArrayOp {
TensorCopy
(
x_tensor
,
place
,
dev_ctx
,
out_tensor
);
TensorCopy
(
x_tensor
,
place
,
dev_ctx
,
out_tensor
);
}
else
{
}
else
{
VLOG
(
10
0
)
<<
"WARNING: The input tensor 'x_tensor' holds no memory, so "
VLOG
(
10
)
<<
"WARNING: The input tensor 'x_tensor' holds no memory, so "
"nothing has been written to output array["
"nothing has been written to output array["
<<
offset
<<
"]."
;
<<
offset
<<
"]."
;
}
}
...
@@ -104,7 +104,7 @@ class WriteToArrayInferVarType : public framework::VarTypeInference {
...
@@ -104,7 +104,7 @@ class WriteToArrayInferVarType : public framework::VarTypeInference {
framework
::
BlockDesc
*
block
)
const
override
{
framework
::
BlockDesc
*
block
)
const
override
{
auto
x_name
=
op_desc
.
Input
(
"X"
)[
0
];
auto
x_name
=
op_desc
.
Input
(
"X"
)[
0
];
auto
out_name
=
op_desc
.
Output
(
"Out"
)[
0
];
auto
out_name
=
op_desc
.
Output
(
"Out"
)[
0
];
VLOG
(
10
0
)
<<
"Set Variable "
<<
out_name
<<
" as LOD_TENSOR_ARRAY"
;
VLOG
(
10
)
<<
"Set Variable "
<<
out_name
<<
" as LOD_TENSOR_ARRAY"
;
auto
&
out
=
block
->
FindRecursiveOrCreateVar
(
out_name
);
auto
&
out
=
block
->
FindRecursiveOrCreateVar
(
out_name
);
out
.
SetType
(
framework
::
proto
::
VarType
::
LOD_TENSOR_ARRAY
);
out
.
SetType
(
framework
::
proto
::
VarType
::
LOD_TENSOR_ARRAY
);
auto
*
x
=
block
->
FindVarRecursive
(
x_name
);
auto
*
x
=
block
->
FindVarRecursive
(
x_name
);
...
@@ -139,7 +139,7 @@ class ReadFromArrayOp : public ArrayOp {
...
@@ -139,7 +139,7 @@ class ReadFromArrayOp : public ArrayOp {
framework
::
TensorCopy
(
x_array
[
offset
],
place
,
dev_ctx
,
out_tensor
);
framework
::
TensorCopy
(
x_array
[
offset
],
place
,
dev_ctx
,
out_tensor
);
out_tensor
->
set_lod
(
x_array
[
offset
].
lod
());
out_tensor
->
set_lod
(
x_array
[
offset
].
lod
());
}
else
{
}
else
{
VLOG
(
10
0
)
<<
"offset "
<<
offset
<<
" >= "
<<
x_array
.
size
();
VLOG
(
10
)
<<
"offset "
<<
offset
<<
" >= "
<<
x_array
.
size
();
}
}
}
}
};
};
...
...
paddle/fluid/operators/controlflow/while_op.cc
浏览文件 @
dfd4a111
...
@@ -132,14 +132,14 @@ class WhileGradOp : public framework::OperatorBase {
...
@@ -132,14 +132,14 @@ class WhileGradOp : public framework::OperatorBase {
for
(
auto
cur_scope_iter
=
step_scopes
->
rbegin
();
for
(
auto
cur_scope_iter
=
step_scopes
->
rbegin
();
cur_scope_iter
!=
step_scopes
->
rend
();
++
cur_scope_iter
)
{
cur_scope_iter
!=
step_scopes
->
rend
();
++
cur_scope_iter
)
{
VLOG
(
3
0
)
<<
"Start backward at time_step "
VLOG
(
3
)
<<
"Start backward at time_step "
<<
cur_scope_iter
-
step_scopes
->
rbegin
();
<<
cur_scope_iter
-
step_scopes
->
rbegin
();
framework
::
Scope
&
cur_scope
=
**
cur_scope_iter
;
framework
::
Scope
&
cur_scope
=
**
cur_scope_iter
;
// Link OG from outside to inside
// Link OG from outside to inside
for
(
size_t
i
=
0
;
i
<
outside_og_names
.
size
();
++
i
)
{
for
(
size_t
i
=
0
;
i
<
outside_og_names
.
size
();
++
i
)
{
auto
outside_og_name
=
outside_og_names
[
i
];
auto
outside_og_name
=
outside_og_names
[
i
];
auto
inside_og_name
=
inside_og_names
[
i
];
auto
inside_og_name
=
inside_og_names
[
i
];
VLOG
(
8
0
)
<<
"Linking outside "
<<
outside_og_name
<<
" --> inside "
VLOG
(
8
)
<<
"Linking outside "
<<
outside_og_name
<<
" --> inside "
<<
inside_og_name
;
<<
inside_og_name
;
if
(
scope
.
FindVar
(
outside_og_name
)
==
nullptr
)
{
if
(
scope
.
FindVar
(
outside_og_name
)
==
nullptr
)
{
continue
;
continue
;
...
@@ -162,11 +162,11 @@ class WhileGradOp : public framework::OperatorBase {
...
@@ -162,11 +162,11 @@ class WhileGradOp : public framework::OperatorBase {
auto
&
outside_array
=
og_outside
.
Get
<
framework
::
LoDTensorArray
>
();
auto
&
outside_array
=
og_outside
.
Get
<
framework
::
LoDTensorArray
>
();
auto
&
inside_array
=
auto
&
inside_array
=
detail
::
Ref
(
og_inside
.
GetMutable
<
framework
::
LoDTensorArray
>
());
detail
::
Ref
(
og_inside
.
GetMutable
<
framework
::
LoDTensorArray
>
());
VLOG
(
8
0
)
<<
outside_og_name
<<
" size = "
<<
outside_array
.
size
();
VLOG
(
8
)
<<
outside_og_name
<<
" size = "
<<
outside_array
.
size
();
inside_array
.
resize
(
outside_array
.
size
());
inside_array
.
resize
(
outside_array
.
size
());
for
(
size_t
j
=
0
;
j
<
inside_array
.
size
();
++
j
)
{
for
(
size_t
j
=
0
;
j
<
inside_array
.
size
();
++
j
)
{
VLOG
(
8
0
)
<<
j
<<
" "
<<
outside_array
[
j
].
numel
();
VLOG
(
8
)
<<
j
<<
" "
<<
outside_array
[
j
].
numel
();
if
(
outside_array
[
j
].
numel
()
!=
0
)
{
if
(
outside_array
[
j
].
numel
()
!=
0
)
{
inside_array
[
j
].
set_lod
(
outside_array
[
j
].
lod
());
inside_array
[
j
].
set_lod
(
outside_array
[
j
].
lod
());
inside_array
[
j
].
ShareDataWith
(
outside_array
[
j
]);
inside_array
[
j
].
ShareDataWith
(
outside_array
[
j
]);
...
@@ -292,7 +292,7 @@ class WhileGradOpDescMaker : public framework::SingleGradOpDescMaker {
...
@@ -292,7 +292,7 @@ class WhileGradOpDescMaker : public framework::SingleGradOpDescMaker {
auto
igs
=
InputGrad
(
kX
,
/*do not drop empty gradient*/
false
);
auto
igs
=
InputGrad
(
kX
,
/*do not drop empty gradient*/
false
);
for
(
auto
&
each_ig
:
igs
)
{
for
(
auto
&
each_ig
:
igs
)
{
if
(
inner_op_outputs
.
find
(
each_ig
)
==
inner_op_outputs
.
end
())
{
if
(
inner_op_outputs
.
find
(
each_ig
)
==
inner_op_outputs
.
end
())
{
VLOG
(
8
0
)
<<
"Ignore "
<<
each_ig
;
VLOG
(
8
)
<<
"Ignore "
<<
each_ig
;
each_ig
=
framework
::
kEmptyVarName
;
each_ig
=
framework
::
kEmptyVarName
;
}
}
}
}
...
@@ -356,7 +356,7 @@ class WhileGradOpVarTypeInference : public framework::VarTypeInference {
...
@@ -356,7 +356,7 @@ class WhileGradOpVarTypeInference : public framework::VarTypeInference {
auto
&
p_var
=
detail
::
Ref
(
block
->
FindVarRecursive
(
p_names
[
i
]));
auto
&
p_var
=
detail
::
Ref
(
block
->
FindVarRecursive
(
p_names
[
i
]));
auto
*
g_var
=
block
->
FindVarRecursive
(
pg_ig_names
[
i
]);
auto
*
g_var
=
block
->
FindVarRecursive
(
pg_ig_names
[
i
]);
if
(
g_var
!=
nullptr
)
{
// Gradient could be @EMPTY@
if
(
g_var
!=
nullptr
)
{
// Gradient could be @EMPTY@
VLOG
(
5
0
)
<<
"Setting "
<<
pg_ig_names
[
i
]
<<
" following "
<<
p_names
[
i
]
VLOG
(
5
)
<<
"Setting "
<<
pg_ig_names
[
i
]
<<
" following "
<<
p_names
[
i
]
<<
" type: "
<<
p_var
.
GetType
();
<<
" type: "
<<
p_var
.
GetType
();
g_var
->
SetType
(
p_var
.
GetType
());
g_var
->
SetType
(
p_var
.
GetType
());
g_var
->
SetDataType
(
p_var
.
GetDataType
());
g_var
->
SetDataType
(
p_var
.
GetDataType
());
...
...
paddle/fluid/operators/conv_cudnn_op.cu.cc
浏览文件 @
dfd4a111
...
@@ -151,11 +151,11 @@ class CUDNNConvOpKernel : public framework::OpKernel<T> {
...
@@ -151,11 +151,11 @@ class CUDNNConvOpKernel : public framework::OpKernel<T> {
// Currently tensor core is only enabled using this algo
// Currently tensor core is only enabled using this algo
algo
=
CUDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_PRECOMP_GEMM
;
algo
=
CUDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_PRECOMP_GEMM
;
half_float
=
true
;
half_float
=
true
;
VLOG
(
5
0
)
<<
"use cudnn_tensor_op_math"
;
VLOG
(
5
)
<<
"use cudnn_tensor_op_math"
;
}
else
{
}
else
{
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnSetConvolutionMathType
(
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnSetConvolutionMathType
(
cudnn_conv_desc
,
CUDNN_DEFAULT_MATH
));
cudnn_conv_desc
,
CUDNN_DEFAULT_MATH
));
VLOG
(
5
0
)
<<
"NOT use cudnn_tensor_op_math"
;
VLOG
(
5
)
<<
"NOT use cudnn_tensor_op_math"
;
}
}
#endif
#endif
...
...
paddle/fluid/operators/distributed/brpc_server.cc
浏览文件 @
dfd4a111
...
@@ -133,10 +133,10 @@ void AsyncBRPCServer::StartServer() {
...
@@ -133,10 +133,10 @@ void AsyncBRPCServer::StartServer() {
void
AsyncBRPCServer
::
ShutDownImpl
()
{
server_
.
Stop
(
1000
);
}
void
AsyncBRPCServer
::
ShutDownImpl
()
{
server_
.
Stop
(
1000
);
}
void
AsyncBRPCServer
::
WaitServerReady
()
{
void
AsyncBRPCServer
::
WaitServerReady
()
{
VLOG
(
3
0
)
<<
"AsyncGRPCServer is wait server ready"
;
VLOG
(
3
)
<<
"AsyncGRPCServer is wait server ready"
;
std
::
unique_lock
<
std
::
mutex
>
lock
(
this
->
mutex_ready_
);
std
::
unique_lock
<
std
::
mutex
>
lock
(
this
->
mutex_ready_
);
condition_ready_
.
wait
(
lock
,
[
=
]
{
return
this
->
ready_
==
1
;
});
condition_ready_
.
wait
(
lock
,
[
=
]
{
return
this
->
ready_
==
1
;
});
VLOG
(
3
0
)
<<
"AsyncGRPCServer WaitSeverReady"
;
VLOG
(
3
)
<<
"AsyncGRPCServer WaitSeverReady"
;
}
}
};
// namespace distributed
};
// namespace distributed
...
...
paddle/fluid/operators/distributed/grpc_client.cc
浏览文件 @
dfd4a111
...
@@ -40,7 +40,7 @@ void GRPCClient::SendComplete() {
...
@@ -40,7 +40,7 @@ void GRPCClient::SendComplete() {
std
::
unique_lock
<
std
::
mutex
>
lk
(
completed_mutex_
);
std
::
unique_lock
<
std
::
mutex
>
lk
(
completed_mutex_
);
if
(
!
completed_
)
{
if
(
!
completed_
)
{
for
(
auto
&
it
:
channels_
)
{
for
(
auto
&
it
:
channels_
)
{
VLOG
(
3
0
)
<<
"send complete message to "
<<
it
.
first
;
VLOG
(
3
)
<<
"send complete message to "
<<
it
.
first
;
this
->
AsyncSendComplete
(
it
.
first
);
this
->
AsyncSendComplete
(
it
.
first
);
}
}
PADDLE_ENFORCE
(
this
->
Wait
(),
"internal grpc error"
);
PADDLE_ENFORCE
(
this
->
Wait
(),
"internal grpc error"
);
...
@@ -83,7 +83,7 @@ VarHandlePtr GRPCClient::AsyncSendVar(const std::string& ep,
...
@@ -83,7 +83,7 @@ VarHandlePtr GRPCClient::AsyncSendVar(const std::string& ep,
::
grpc
::
ByteBuffer
req
;
::
grpc
::
ByteBuffer
req
;
SerializeToByteBuffer
(
var_name_val
,
var
,
*
p_ctx
,
&
req
,
""
,
trainer_id_
);
SerializeToByteBuffer
(
var_name_val
,
var
,
*
p_ctx
,
&
req
,
""
,
trainer_id_
);
VLOG
(
3
0
)
<<
s
->
GetVarHandlePtr
()
->
String
()
<<
" begin"
;
VLOG
(
3
)
<<
s
->
GetVarHandlePtr
()
->
String
()
<<
" begin"
;
// stub context
// stub context
s
->
response_call_back_
=
nullptr
;
s
->
response_call_back_
=
nullptr
;
...
@@ -144,7 +144,7 @@ VarHandlePtr GRPCClient::AsyncGetVar(const std::string& ep,
...
@@ -144,7 +144,7 @@ VarHandlePtr GRPCClient::AsyncGetVar(const std::string& ep,
::
grpc
::
ByteBuffer
buf
;
::
grpc
::
ByteBuffer
buf
;
RequestToByteBuffer
<
sendrecv
::
VariableMessage
>
(
req
,
&
buf
);
RequestToByteBuffer
<
sendrecv
::
VariableMessage
>
(
req
,
&
buf
);
VLOG
(
3
0
)
<<
s
->
GetVarHandlePtr
()
->
String
()
<<
" begin"
;
VLOG
(
3
)
<<
s
->
GetVarHandlePtr
()
->
String
()
<<
" begin"
;
// stub context
// stub context
s
->
response_call_back_
=
ProcGetResponse
;
s
->
response_call_back_
=
ProcGetResponse
;
...
@@ -192,7 +192,7 @@ VarHandlePtr GRPCClient::AsyncPrefetchVar(const std::string& ep,
...
@@ -192,7 +192,7 @@ VarHandlePtr GRPCClient::AsyncPrefetchVar(const std::string& ep,
::
grpc
::
ByteBuffer
req
;
::
grpc
::
ByteBuffer
req
;
SerializeToByteBuffer
(
in_var_name_val
,
var
,
*
p_ctx
,
&
req
,
out_var_name_val
);
SerializeToByteBuffer
(
in_var_name_val
,
var
,
*
p_ctx
,
&
req
,
out_var_name_val
);
VLOG
(
3
0
)
<<
s
->
GetVarHandlePtr
()
->
String
()
<<
" begin"
;
VLOG
(
3
)
<<
s
->
GetVarHandlePtr
()
->
String
()
<<
" begin"
;
// stub context
// stub context
s
->
response_call_back_
=
ProcGetResponse
;
s
->
response_call_back_
=
ProcGetResponse
;
...
@@ -330,14 +330,14 @@ void GRPCClient::Proceed() {
...
@@ -330,14 +330,14 @@ void GRPCClient::Proceed() {
void
*
tag
=
nullptr
;
void
*
tag
=
nullptr
;
bool
ok
=
false
;
bool
ok
=
false
;
VLOG
(
3
0
)
<<
"GRPCClient Proceed begin"
;
VLOG
(
3
)
<<
"GRPCClient Proceed begin"
;
while
(
!
stopped_
&&
cq_
.
Next
(
&
tag
,
&
ok
))
{
while
(
!
stopped_
&&
cq_
.
Next
(
&
tag
,
&
ok
))
{
BaseProcessor
*
c
=
static_cast
<
BaseProcessor
*>
(
tag
);
BaseProcessor
*
c
=
static_cast
<
BaseProcessor
*>
(
tag
);
GPR_ASSERT
(
ok
);
GPR_ASSERT
(
ok
);
PADDLE_ENFORCE
(
c
);
PADDLE_ENFORCE
(
c
);
if
(
c
->
status_
.
ok
())
{
if
(
c
->
status_
.
ok
())
{
VLOG
(
3
0
)
<<
c
->
GetVarHandlePtr
()
->
String
()
<<
" process"
;
VLOG
(
3
)
<<
c
->
GetVarHandlePtr
()
->
String
()
<<
" process"
;
c
->
Process
();
c
->
Process
();
}
else
if
(
c
->
status_
.
error_code
()
==
grpc
::
StatusCode
::
DEADLINE_EXCEEDED
)
{
}
else
if
(
c
->
status_
.
error_code
()
==
grpc
::
StatusCode
::
DEADLINE_EXCEEDED
)
{
// FIXME(gongwb): parse error_details?
// FIXME(gongwb): parse error_details?
...
@@ -372,7 +372,7 @@ void GRPCClient::Proceed() {
...
@@ -372,7 +372,7 @@ void GRPCClient::Proceed() {
sync_cond_
.
notify_all
();
sync_cond_
.
notify_all
();
}
}
}
}
VLOG
(
3
0
)
<<
"GRPCClient Proceed end"
;
VLOG
(
3
)
<<
"GRPCClient Proceed end"
;
}
}
std
::
shared_ptr
<
grpc
::
Channel
>
GRPCClient
::
GetChannel
(
const
std
::
string
&
ep
)
{
std
::
shared_ptr
<
grpc
::
Channel
>
GRPCClient
::
GetChannel
(
const
std
::
string
&
ep
)
{
...
...
paddle/fluid/operators/distributed/grpc_server.cc
浏览文件 @
dfd4a111
...
@@ -100,7 +100,7 @@ class RequestSend final : public RequestBase {
...
@@ -100,7 +100,7 @@ class RequestSend final : public RequestBase {
void
Process
()
override
{
void
Process
()
override
{
std
::
string
varname
=
GetReqName
();
std
::
string
varname
=
GetReqName
();
VLOG
(
4
0
)
<<
"RequestSend var_name:"
<<
varname
;
VLOG
(
4
)
<<
"RequestSend var_name:"
<<
varname
;
auto
scope
=
request_
->
GetMutableLocalScope
();
auto
scope
=
request_
->
GetMutableLocalScope
();
auto
invar
=
request_
->
GetVar
();
auto
invar
=
request_
->
GetVar
();
...
@@ -137,7 +137,7 @@ class RequestGet final : public RequestBase {
...
@@ -137,7 +137,7 @@ class RequestGet final : public RequestBase {
// proc request.
// proc request.
std
::
string
varname
=
request_
.
varname
();
std
::
string
varname
=
request_
.
varname
();
int
trainer_id
=
request_
.
trainer_id
();
int
trainer_id
=
request_
.
trainer_id
();
VLOG
(
4
0
)
<<
"RequestGet "
<<
varname
;
VLOG
(
4
)
<<
"RequestGet "
<<
varname
;
auto
scope
=
request_handler_
->
scope
();
auto
scope
=
request_handler_
->
scope
();
auto
invar
=
scope
->
FindVar
(
varname
);
auto
invar
=
scope
->
FindVar
(
varname
);
...
@@ -184,7 +184,7 @@ class RequestPrefetch final : public RequestBase {
...
@@ -184,7 +184,7 @@ class RequestPrefetch final : public RequestBase {
std
::
string
in_var_name
=
request_
->
Varname
();
std
::
string
in_var_name
=
request_
->
Varname
();
std
::
string
out_var_name
=
request_
->
OutVarname
();
std
::
string
out_var_name
=
request_
->
OutVarname
();
int
trainer_id
=
request_
->
GetTrainerId
();
int
trainer_id
=
request_
->
GetTrainerId
();
VLOG
(
4
0
)
<<
"RequestPrefetch, in_var_name: "
<<
in_var_name
VLOG
(
4
)
<<
"RequestPrefetch, in_var_name: "
<<
in_var_name
<<
" out_var_name: "
<<
out_var_name
;
<<
" out_var_name: "
<<
out_var_name
;
auto
scope
=
request_
->
GetMutableLocalScope
();
auto
scope
=
request_
->
GetMutableLocalScope
();
...
@@ -233,7 +233,7 @@ class RequestCheckpointNotify final : public RequestBase {
...
@@ -233,7 +233,7 @@ class RequestCheckpointNotify final : public RequestBase {
std
::
string
checkpoint_dir
=
request_
->
OutVarname
();
std
::
string
checkpoint_dir
=
request_
->
OutVarname
();
int
trainer_id
=
request_
->
GetTrainerId
();
int
trainer_id
=
request_
->
GetTrainerId
();
VLOG
(
4
0
)
<<
"RequestCheckpointNotify notify: "
<<
checkpoint_notify
VLOG
(
4
)
<<
"RequestCheckpointNotify notify: "
<<
checkpoint_notify
<<
", dir: "
<<
checkpoint_dir
;
<<
", dir: "
<<
checkpoint_dir
;
request_handler_
->
Handle
(
checkpoint_notify
,
scope
,
nullptr
,
nullptr
,
request_handler_
->
Handle
(
checkpoint_notify
,
scope
,
nullptr
,
nullptr
,
...
@@ -248,10 +248,10 @@ class RequestCheckpointNotify final : public RequestBase {
...
@@ -248,10 +248,10 @@ class RequestCheckpointNotify final : public RequestBase {
};
};
void
AsyncGRPCServer
::
WaitServerReady
()
{
void
AsyncGRPCServer
::
WaitServerReady
()
{
VLOG
(
4
0
)
<<
"AsyncGRPCServer is wait server ready"
;
VLOG
(
4
)
<<
"AsyncGRPCServer is wait server ready"
;
std
::
unique_lock
<
std
::
mutex
>
lock
(
this
->
mutex_ready_
);
std
::
unique_lock
<
std
::
mutex
>
lock
(
this
->
mutex_ready_
);
condition_ready_
.
wait
(
lock
,
[
=
]
{
return
this
->
ready_
==
1
;
});
condition_ready_
.
wait
(
lock
,
[
=
]
{
return
this
->
ready_
==
1
;
});
VLOG
(
4
0
)
<<
"AsyncGRPCServer WaitSeverReady"
;
VLOG
(
4
)
<<
"AsyncGRPCServer WaitSeverReady"
;
}
}
// Define an option subclass in order to disable SO_REUSEPORT for the
// Define an option subclass in order to disable SO_REUSEPORT for the
...
@@ -302,15 +302,14 @@ void AsyncGRPCServer::StartServer() {
...
@@ -302,15 +302,14 @@ void AsyncGRPCServer::StartServer() {
reqs
.
reserve
(
kRequestBufSize
);
reqs
.
reserve
(
kRequestBufSize
);
for
(
int
i
=
0
;
i
<
kRequestBufSize
;
i
++
)
{
for
(
int
i
=
0
;
i
<
kRequestBufSize
;
i
++
)
{
VLOG
(
60
)
<<
"TryToRegisterNewOne on RPC NAME: "
<<
rpc_name
VLOG
(
6
)
<<
"TryToRegisterNewOne on RPC NAME: "
<<
rpc_name
<<
" I: "
<<
i
;
<<
" I: "
<<
i
;
TryToRegisterNewOne
(
rpc_name
,
i
);
TryToRegisterNewOne
(
rpc_name
,
i
);
}
}
for
(
int
i
=
0
;
i
<
threadnum
;
i
++
)
{
for
(
int
i
=
0
;
i
<
threadnum
;
i
++
)
{
rpc_threads_
[
rpc_name
].
emplace_back
(
new
std
::
thread
(
std
::
bind
(
rpc_threads_
[
rpc_name
].
emplace_back
(
new
std
::
thread
(
std
::
bind
(
&
AsyncGRPCServer
::
HandleRequest
,
this
,
cq
.
get
(),
rpc_name
,
f
)));
&
AsyncGRPCServer
::
HandleRequest
,
this
,
cq
.
get
(),
rpc_name
,
f
)));
VLOG
(
4
0
)
<<
t
.
first
<<
" creates threads!"
;
VLOG
(
4
)
<<
t
.
first
<<
" creates threads!"
;
}
}
}
}
...
@@ -327,7 +326,7 @@ void AsyncGRPCServer::StartServer() {
...
@@ -327,7 +326,7 @@ void AsyncGRPCServer::StartServer() {
auto
&
threads
=
t
.
second
;
auto
&
threads
=
t
.
second
;
for
(
size_t
i
=
0
;
i
<
threads
.
size
();
++
i
)
{
for
(
size_t
i
=
0
;
i
<
threads
.
size
();
++
i
)
{
threads
[
i
]
->
join
();
threads
[
i
]
->
join
();
VLOG
(
4
0
)
<<
t
.
first
<<
" threads ends!"
;
VLOG
(
4
)
<<
t
.
first
<<
" threads ends!"
;
}
}
}
}
}
}
...
@@ -335,7 +334,7 @@ void AsyncGRPCServer::StartServer() {
...
@@ -335,7 +334,7 @@ void AsyncGRPCServer::StartServer() {
void
AsyncGRPCServer
::
ShutdownQueue
()
{
void
AsyncGRPCServer
::
ShutdownQueue
()
{
for
(
auto
&
t
:
rpc_cq_
)
{
for
(
auto
&
t
:
rpc_cq_
)
{
t
.
second
->
Shutdown
();
t
.
second
->
Shutdown
();
VLOG
(
4
0
)
<<
t
.
first
<<
" queue shutdown!"
;
VLOG
(
4
)
<<
t
.
first
<<
" queue shutdown!"
;
}
}
}
}
...
@@ -344,7 +343,7 @@ void AsyncGRPCServer::ShutDownImpl() {
...
@@ -344,7 +343,7 @@ void AsyncGRPCServer::ShutDownImpl() {
is_shut_down_
=
true
;
is_shut_down_
=
true
;
ShutdownQueue
();
ShutdownQueue
();
VLOG
(
4
0
)
<<
"server_ shutdown!"
;
VLOG
(
4
)
<<
"server_ shutdown!"
;
server_
->
Shutdown
();
server_
->
Shutdown
();
}
}
...
@@ -352,11 +351,11 @@ void AsyncGRPCServer::TryToRegisterNewOne(const std::string& rpc_name,
...
@@ -352,11 +351,11 @@ void AsyncGRPCServer::TryToRegisterNewOne(const std::string& rpc_name,
int
req_id
)
{
int
req_id
)
{
std
::
unique_lock
<
std
::
mutex
>
lock
(
cq_mutex_
);
std
::
unique_lock
<
std
::
mutex
>
lock
(
cq_mutex_
);
if
(
is_shut_down_
)
{
if
(
is_shut_down_
)
{
VLOG
(
4
0
)
<<
"shutdown, do not TryToRegisterNewSendOne"
;
VLOG
(
4
)
<<
"shutdown, do not TryToRegisterNewSendOne"
;
return
;
return
;
}
}
VLOG
(
4
0
)
<<
"TryToRegisterNewOne on RPC NAME: "
<<
rpc_name
VLOG
(
4
)
<<
"TryToRegisterNewOne on RPC NAME: "
<<
rpc_name
<<
" REQ ID: "
<<
req_id
;
<<
" REQ ID: "
<<
req_id
;
auto
&
reqs
=
rpc_reqs_
[
rpc_name
];
auto
&
reqs
=
rpc_reqs_
[
rpc_name
];
...
@@ -378,7 +377,7 @@ void AsyncGRPCServer::TryToRegisterNewOne(const std::string& rpc_name,
...
@@ -378,7 +377,7 @@ void AsyncGRPCServer::TryToRegisterNewOne(const std::string& rpc_name,
reqs
[
req_id
]
=
b
;
reqs
[
req_id
]
=
b
;
VLOG
(
4
0
)
<<
"Create RequestSend status:"
<<
b
->
Status
();
VLOG
(
4
)
<<
"Create RequestSend status:"
<<
b
->
Status
();
}
}
void
AsyncGRPCServer
::
HandleRequest
(
void
AsyncGRPCServer
::
HandleRequest
(
...
@@ -388,14 +387,14 @@ void AsyncGRPCServer::HandleRequest(
...
@@ -388,14 +387,14 @@ void AsyncGRPCServer::HandleRequest(
bool
ok
=
false
;
bool
ok
=
false
;
while
(
true
)
{
while
(
true
)
{
VLOG
(
4
0
)
<<
"HandleRequest "
<<
rpc_name
<<
" wait next"
;
VLOG
(
4
)
<<
"HandleRequest "
<<
rpc_name
<<
" wait next"
;
if
(
!
cq
->
Next
(
&
tag
,
&
ok
))
{
if
(
!
cq
->
Next
(
&
tag
,
&
ok
))
{
VLOG
(
3
0
)
<<
"CompletionQueue "
<<
rpc_name
<<
" shutdown!"
;
VLOG
(
3
)
<<
"CompletionQueue "
<<
rpc_name
<<
" shutdown!"
;
break
;
break
;
}
}
int
req_id
=
static_cast
<
int
>
(
reinterpret_cast
<
intptr_t
>
(
tag
));
int
req_id
=
static_cast
<
int
>
(
reinterpret_cast
<
intptr_t
>
(
tag
));
VLOG
(
4
0
)
<<
"HandleRequest "
<<
rpc_name
<<
", req_id:"
<<
req_id
VLOG
(
4
)
<<
"HandleRequest "
<<
rpc_name
<<
", req_id:"
<<
req_id
<<
" get next"
;
<<
" get next"
;
auto
&
reqs
=
rpc_reqs_
[
rpc_name
];
auto
&
reqs
=
rpc_reqs_
[
rpc_name
];
...
@@ -406,7 +405,7 @@ void AsyncGRPCServer::HandleRequest(
...
@@ -406,7 +405,7 @@ void AsyncGRPCServer::HandleRequest(
base
=
reqs
[
req_id
];
base
=
reqs
[
req_id
];
}
}
VLOG
(
3
0
)
<<
base
->
Status2String
(
rpc_name
);
VLOG
(
3
)
<<
base
->
Status2String
(
rpc_name
);
// reference:
// reference:
// https://github.com/tensorflow/tensorflow/issues/5596
// https://github.com/tensorflow/tensorflow/issues/5596
...
...
paddle/fluid/operators/distributed/request_handler.h
浏览文件 @
dfd4a111
...
@@ -75,7 +75,7 @@ class VarHandle {
...
@@ -75,7 +75,7 @@ class VarHandle {
wait_cond_
.
wait
(
lk
,
[
this
]
{
return
status_
!=
kDefaultState
;
});
wait_cond_
.
wait
(
lk
,
[
this
]
{
return
status_
!=
kDefaultState
;
});
ret
=
status_
;
ret
=
status_
;
}
}
VLOG
(
7
0
)
<<
"VarHandle wait:"
<<
ret
;
VLOG
(
7
)
<<
"VarHandle wait:"
<<
ret
;
return
ret
!=
kErrorState
;
return
ret
!=
kErrorState
;
}
}
...
@@ -84,7 +84,7 @@ class VarHandle {
...
@@ -84,7 +84,7 @@ class VarHandle {
std
::
unique_lock
<
std
::
mutex
>
lk
(
sync_mutex_
);
std
::
unique_lock
<
std
::
mutex
>
lk
(
sync_mutex_
);
status_
=
ok
?
kFinishState
:
kErrorState
;
status_
=
ok
?
kFinishState
:
kErrorState
;
}
}
VLOG
(
7
0
)
<<
"VarHandle finish:"
<<
ok
;
VLOG
(
7
)
<<
"VarHandle finish:"
<<
ok
;
wait_cond_
.
notify_all
();
wait_cond_
.
notify_all
();
}
}
...
...
paddle/fluid/operators/distributed/request_handler_impl.cc
浏览文件 @
dfd4a111
...
@@ -38,19 +38,19 @@ bool RequestSendHandler::Handle(const std::string& varname,
...
@@ -38,19 +38,19 @@ bool RequestSendHandler::Handle(const std::string& varname,
framework
::
Variable
**
outvar
,
framework
::
Variable
**
outvar
,
const
int
trainer_id
,
const
int
trainer_id
,
const
std
::
string
&
out_var_name
)
{
const
std
::
string
&
out_var_name
)
{
VLOG
(
4
0
)
<<
"RequestSendHandler:"
<<
varname
;
VLOG
(
4
)
<<
"RequestSendHandler:"
<<
varname
;
// Sync
// Sync
if
(
varname
==
BATCH_BARRIER_MESSAGE
)
{
if
(
varname
==
BATCH_BARRIER_MESSAGE
)
{
VLOG
(
3
0
)
<<
"sync: recv BATCH_BARRIER_MESSAGE"
;
VLOG
(
3
)
<<
"sync: recv BATCH_BARRIER_MESSAGE"
;
rpc_server_
->
IncreaseBatchBarrier
(
kRequestSend
);
rpc_server_
->
IncreaseBatchBarrier
(
kRequestSend
);
}
else
if
(
varname
==
COMPLETE_MESSAGE
)
{
}
else
if
(
varname
==
COMPLETE_MESSAGE
)
{
VLOG
(
3
0
)
<<
"sync: recv complete message"
;
VLOG
(
3
)
<<
"sync: recv complete message"
;
rpc_server_
->
Complete
();
rpc_server_
->
Complete
();
}
else
{
}
else
{
// Async
// Async
if
(
!
sync_mode_
)
{
if
(
!
sync_mode_
)
{
VLOG
(
3
0
)
<<
"async process var: "
<<
varname
;
VLOG
(
3
)
<<
"async process var: "
<<
varname
;
try
{
try
{
executor_
->
RunPreparedContext
((
*
grad_to_prepared_ctx_
)[
varname
].
get
(),
executor_
->
RunPreparedContext
((
*
grad_to_prepared_ctx_
)[
varname
].
get
(),
scope
);
scope
);
...
@@ -61,7 +61,7 @@ bool RequestSendHandler::Handle(const std::string& varname,
...
@@ -61,7 +61,7 @@ bool RequestSendHandler::Handle(const std::string& varname,
return
true
;
return
true
;
}
else
{
// sync
}
else
{
// sync
rpc_server_
->
WaitCond
(
kRequestSend
);
rpc_server_
->
WaitCond
(
kRequestSend
);
VLOG
(
3
0
)
<<
"sync: processing received var: "
<<
varname
;
VLOG
(
3
)
<<
"sync: processing received var: "
<<
varname
;
if
(
invar
==
nullptr
)
{
if
(
invar
==
nullptr
)
{
LOG
(
FATAL
)
<<
"sync: Can not find server side var: "
<<
varname
;
LOG
(
FATAL
)
<<
"sync: Can not find server side var: "
<<
varname
;
...
@@ -78,10 +78,10 @@ bool RequestGetHandler::Handle(const std::string& varname,
...
@@ -78,10 +78,10 @@ bool RequestGetHandler::Handle(const std::string& varname,
framework
::
Variable
**
outvar
,
framework
::
Variable
**
outvar
,
const
int
trainer_id
,
const
int
trainer_id
,
const
std
::
string
&
out_var_name
)
{
const
std
::
string
&
out_var_name
)
{
VLOG
(
4
0
)
<<
"RequestGetHandler:"
<<
varname
;
VLOG
(
4
)
<<
"RequestGetHandler:"
<<
varname
;
if
(
sync_mode_
)
{
if
(
sync_mode_
)
{
if
(
varname
==
FETCH_BARRIER_MESSAGE
)
{
if
(
varname
==
FETCH_BARRIER_MESSAGE
)
{
VLOG
(
3
0
)
<<
"sync: recv fetch barrier message"
;
VLOG
(
3
)
<<
"sync: recv fetch barrier message"
;
rpc_server_
->
IncreaseBatchBarrier
(
kRequestGet
);
rpc_server_
->
IncreaseBatchBarrier
(
kRequestGet
);
}
else
{
}
else
{
rpc_server_
->
WaitCond
(
kRequestGet
);
rpc_server_
->
WaitCond
(
kRequestGet
);
...
@@ -93,14 +93,13 @@ bool RequestGetHandler::Handle(const std::string& varname,
...
@@ -93,14 +93,13 @@ bool RequestGetHandler::Handle(const std::string& varname,
// NOTE: the format is determined by distributed_transpiler.py
// NOTE: the format is determined by distributed_transpiler.py
std
::
string
param_bak_name
=
std
::
string
param_bak_name
=
string
::
Sprintf
(
"%s.trainer_%d_bak"
,
varname
,
trainer_id
);
string
::
Sprintf
(
"%s.trainer_%d_bak"
,
varname
,
trainer_id
);
VLOG
(
30
)
<<
"getting "
<<
param_bak_name
<<
" trainer_id "
VLOG
(
3
)
<<
"getting "
<<
param_bak_name
<<
" trainer_id "
<<
trainer_id
;
<<
trainer_id
;
auto
var
=
scope_
->
FindVar
(
varname
);
auto
var
=
scope_
->
FindVar
(
varname
);
auto
t_orig
=
var
->
Get
<
framework
::
LoDTensor
>
();
auto
t_orig
=
var
->
Get
<
framework
::
LoDTensor
>
();
auto
param_bak
=
scope_
->
Var
(
param_bak_name
);
auto
param_bak
=
scope_
->
Var
(
param_bak_name
);
auto
t
=
param_bak
->
GetMutable
<
framework
::
LoDTensor
>
();
auto
t
=
param_bak
->
GetMutable
<
framework
::
LoDTensor
>
();
t
->
mutable_data
(
dev_ctx_
->
GetPlace
(),
t_orig
.
type
());
t
->
mutable_data
(
dev_ctx_
->
GetPlace
(),
t_orig
.
type
());
VLOG
(
3
0
)
<<
"copying "
<<
varname
<<
" to "
<<
param_bak_name
;
VLOG
(
3
)
<<
"copying "
<<
varname
<<
" to "
<<
param_bak_name
;
framework
::
TensorCopy
(
t_orig
,
dev_ctx_
->
GetPlace
(),
t
);
framework
::
TensorCopy
(
t_orig
,
dev_ctx_
->
GetPlace
(),
t
);
}
}
*
outvar
=
scope_
->
FindVar
(
varname
);
*
outvar
=
scope_
->
FindVar
(
varname
);
...
@@ -115,7 +114,7 @@ bool RequestPrefetchHandler::Handle(const std::string& varname,
...
@@ -115,7 +114,7 @@ bool RequestPrefetchHandler::Handle(const std::string& varname,
framework
::
Variable
**
outvar
,
framework
::
Variable
**
outvar
,
const
int
trainer_id
,
const
int
trainer_id
,
const
std
::
string
&
out_var_name
)
{
const
std
::
string
&
out_var_name
)
{
VLOG
(
4
0
)
<<
"RequestPrefetchHandler "
<<
varname
;
VLOG
(
4
)
<<
"RequestPrefetchHandler "
<<
varname
;
auto
var_desc
=
program_
->
Block
(
0
).
FindVar
(
out_var_name
);
auto
var_desc
=
program_
->
Block
(
0
).
FindVar
(
out_var_name
);
InitializeVariable
(
*
outvar
,
var_desc
->
GetType
());
InitializeVariable
(
*
outvar
,
var_desc
->
GetType
());
...
@@ -139,7 +138,7 @@ bool RequestCheckpointHandler::Handle(const std::string& varname,
...
@@ -139,7 +138,7 @@ bool RequestCheckpointHandler::Handle(const std::string& varname,
auto
*
lt_var
=
scope_
->
FindVar
(
LOOKUP_TABLE_PATH
)
->
GetMutable
<
std
::
string
>
();
auto
*
lt_var
=
scope_
->
FindVar
(
LOOKUP_TABLE_PATH
)
->
GetMutable
<
std
::
string
>
();
lt_var
->
clear
();
lt_var
->
clear
();
lt_var
->
append
(
out_var_name
);
lt_var
->
append
(
out_var_name
);
VLOG
(
4
0
)
<<
"RequestCheckpointHandler update var kLookupTablePath to: "
VLOG
(
4
)
<<
"RequestCheckpointHandler update var kLookupTablePath to: "
<<
out_var_name
;
<<
out_var_name
;
executor_
->
RunPreparedContext
(
checkpoint_prepared_ctx_
.
get
(),
scope_
);
executor_
->
RunPreparedContext
(
checkpoint_prepared_ctx_
.
get
(),
scope_
);
return
true
;
return
true
;
...
...
paddle/fluid/operators/distributed/rpc_server.cc
浏览文件 @
dfd4a111
...
@@ -39,7 +39,7 @@ void RPCServer::SavePort() const {
...
@@ -39,7 +39,7 @@ void RPCServer::SavePort() const {
port_file
.
open
(
file_path
);
port_file
.
open
(
file_path
);
port_file
<<
selected_port_
;
port_file
<<
selected_port_
;
port_file
.
close
();
port_file
.
close
();
VLOG
(
4
0
)
<<
"selected port written to "
<<
file_path
;
VLOG
(
4
)
<<
"selected port written to "
<<
file_path
;
}
}
void
RPCServer
::
WaitBarrier
(
const
std
::
string
&
rpc_name
)
{
void
RPCServer
::
WaitBarrier
(
const
std
::
string
&
rpc_name
)
{
...
@@ -49,12 +49,12 @@ void RPCServer::WaitBarrier(const std::string& rpc_name) {
...
@@ -49,12 +49,12 @@ void RPCServer::WaitBarrier(const std::string& rpc_name) {
exit_flag_
.
load
());
exit_flag_
.
load
());
});
});
VLOG
(
3
0
)
<<
"batch_barrier_: "
<<
rpc_name
<<
" "
VLOG
(
3
)
<<
"batch_barrier_: "
<<
rpc_name
<<
" "
<<
barrier_counter_
[
rpc_name
];
<<
barrier_counter_
[
rpc_name
];
}
}
void
RPCServer
::
IncreaseBatchBarrier
(
const
std
::
string
rpc_name
)
{
void
RPCServer
::
IncreaseBatchBarrier
(
const
std
::
string
rpc_name
)
{
VLOG
(
4
0
)
<<
"RPCServer begin IncreaseBatchBarrier "
<<
rpc_name
;
VLOG
(
4
)
<<
"RPCServer begin IncreaseBatchBarrier "
<<
rpc_name
;
int
b
=
0
;
int
b
=
0
;
std
::
unique_lock
<
std
::
mutex
>
lock
(
mutex_
);
std
::
unique_lock
<
std
::
mutex
>
lock
(
mutex_
);
b
=
++
barrier_counter_
[
rpc_name
];
b
=
++
barrier_counter_
[
rpc_name
];
...
@@ -71,7 +71,7 @@ void RPCServer::Complete() {
...
@@ -71,7 +71,7 @@ void RPCServer::Complete() {
client_num_
--
;
client_num_
--
;
need_reset_all_vars_
=
true
;
need_reset_all_vars_
=
true
;
VLOG
(
4
0
)
<<
"decrease client_num to: "
<<
client_num_
;
VLOG
(
4
)
<<
"decrease client_num to: "
<<
client_num_
;
if
(
cur_cond_
.
load
()
==
rpc_cond_map_
[
kRequestGet
])
{
if
(
cur_cond_
.
load
()
==
rpc_cond_map_
[
kRequestGet
])
{
barrier_counter_
[
kRequestGet
]
--
;
barrier_counter_
[
kRequestGet
]
--
;
}
}
...
@@ -90,7 +90,7 @@ int RPCServer::GetClientNum() {
...
@@ -90,7 +90,7 @@ int RPCServer::GetClientNum() {
}
}
void
RPCServer
::
ResetBarrierCounter
()
{
void
RPCServer
::
ResetBarrierCounter
()
{
VLOG
(
3
0
)
<<
"RPCServer ResetBarrierCounter "
;
VLOG
(
3
)
<<
"RPCServer ResetBarrierCounter "
;
std
::
unique_lock
<
std
::
mutex
>
lock
(
mutex_
);
std
::
unique_lock
<
std
::
mutex
>
lock
(
mutex_
);
for
(
auto
&
t
:
barrier_counter_
)
{
for
(
auto
&
t
:
barrier_counter_
)
{
t
.
second
=
0
;
t
.
second
=
0
;
...
@@ -105,12 +105,12 @@ void RPCServer::RegisterRPC(const std::string& rpc_name,
...
@@ -105,12 +105,12 @@ void RPCServer::RegisterRPC(const std::string& rpc_name,
static
int
cond
=
-
1
;
static
int
cond
=
-
1
;
rpc_cond_map_
[
rpc_name
]
=
++
cond
;
rpc_cond_map_
[
rpc_name
]
=
++
cond
;
VLOG
(
4
0
)
<<
"RegisterRPC rpc_name:"
<<
rpc_name
<<
", handler:"
<<
handler
VLOG
(
4
)
<<
"RegisterRPC rpc_name:"
<<
rpc_name
<<
", handler:"
<<
handler
<<
", cond:"
<<
rpc_cond_map_
[
rpc_name
];
<<
", cond:"
<<
rpc_cond_map_
[
rpc_name
];
}
}
void
RPCServer
::
SetCond
(
const
std
::
string
&
rpc_name
)
{
void
RPCServer
::
SetCond
(
const
std
::
string
&
rpc_name
)
{
VLOG
(
3
0
)
<<
"RPCServer SetCond "
<<
rpc_name
;
VLOG
(
3
)
<<
"RPCServer SetCond "
<<
rpc_name
;
{
{
std
::
unique_lock
<
std
::
mutex
>
lock
(
mutex_
);
std
::
unique_lock
<
std
::
mutex
>
lock
(
mutex_
);
cur_cond_
=
rpc_cond_map_
[
rpc_name
];
cur_cond_
=
rpc_cond_map_
[
rpc_name
];
...
@@ -120,7 +120,7 @@ void RPCServer::SetCond(const std::string& rpc_name) {
...
@@ -120,7 +120,7 @@ void RPCServer::SetCond(const std::string& rpc_name) {
}
}
void
RPCServer
::
WaitCond
(
const
std
::
string
&
rpc_name
)
{
void
RPCServer
::
WaitCond
(
const
std
::
string
&
rpc_name
)
{
VLOG
(
4
0
)
<<
"RPCServer WaitCond "
<<
rpc_name
;
VLOG
(
4
)
<<
"RPCServer WaitCond "
<<
rpc_name
;
int
cond
=
0
;
int
cond
=
0
;
{
{
std
::
unique_lock
<
std
::
mutex
>
lock
(
mutex_
);
std
::
unique_lock
<
std
::
mutex
>
lock
(
mutex_
);
...
...
paddle/fluid/operators/distributed/variable_response.cc
浏览文件 @
dfd4a111
...
@@ -50,7 +50,7 @@ bool VariableResponse::ReadRaw(::google::protobuf::io::CodedInputStream* input,
...
@@ -50,7 +50,7 @@ bool VariableResponse::ReadRaw(::google::protobuf::io::CodedInputStream* input,
size_to_write
=
length
-
total_written
;
size_to_write
=
length
-
total_written
;
}
}
// This log is useful to see how long a internal block size is of rpc.
// This log is useful to see how long a internal block size is of rpc.
VLOG
(
7
0
)
<<
"copy "
<<
size_to_write
<<
" data to CUDAPlace"
;
VLOG
(
7
)
<<
"copy "
<<
size_to_write
<<
" data to CUDAPlace"
;
memory
::
Copy
(
boost
::
get
<
platform
::
CUDAPlace
>
(
place
),
memory
::
Copy
(
boost
::
get
<
platform
::
CUDAPlace
>
(
place
),
reinterpret_cast
<
void
*>
(
p
),
cpu
,
data
,
size_to_write
,
reinterpret_cast
<
void
*>
(
p
),
cpu
,
data
,
size_to_write
,
gpu_dev_ctx
.
stream
());
gpu_dev_ctx
.
stream
());
...
@@ -79,7 +79,7 @@ bool VariableResponse::ReadRaw(::google::protobuf::io::CodedInputStream* input,
...
@@ -79,7 +79,7 @@ bool VariableResponse::ReadRaw(::google::protobuf::io::CodedInputStream* input,
// TODO(gongwb): can we avoid copy?
// TODO(gongwb): can we avoid copy?
platform
::
CPUPlace
cpu
;
platform
::
CPUPlace
cpu
;
// This log is useful to see how long a internal block size is of rpc.
// This log is useful to see how long a internal block size is of rpc.
VLOG
(
7
0
)
<<
"copy "
<<
size_to_write
<<
" data to CPUPlace"
;
VLOG
(
7
)
<<
"copy "
<<
size_to_write
<<
" data to CPUPlace"
;
memory
::
Copy
(
cpu
,
reinterpret_cast
<
void
*>
(
p
),
cpu
,
data
,
size_to_write
);
memory
::
Copy
(
cpu
,
reinterpret_cast
<
void
*>
(
p
),
cpu
,
data
,
size_to_write
);
p
+=
size_to_write
;
p
+=
size_to_write
;
...
@@ -198,7 +198,7 @@ bool VariableResponse::ProcSerializedField(
...
@@ -198,7 +198,7 @@ bool VariableResponse::ProcSerializedField(
#endif
#endif
}
}
VLOG
(
7
0
)
<<
"ProcSerializedField:"
<<
meta_
.
varname
()
VLOG
(
7
)
<<
"ProcSerializedField:"
<<
meta_
.
varname
()
<<
", type:"
<<
meta_
.
type
()
<<
std
::
endl
;
<<
", type:"
<<
meta_
.
type
()
<<
std
::
endl
;
framework
::
DDim
dims
=
GetDims
(
meta_
.
dims
());
framework
::
DDim
dims
=
GetDims
(
meta_
.
dims
());
if
(
meta_
.
type
()
==
sendrecv
::
LOD_TENSOR
)
{
if
(
meta_
.
type
()
==
sendrecv
::
LOD_TENSOR
)
{
...
...
paddle/fluid/operators/distributed_ops/checkpoint_notify_op.cc
浏览文件 @
dfd4a111
...
@@ -46,8 +46,8 @@ class CheckpointNotifyOp : public framework::OperatorBase {
...
@@ -46,8 +46,8 @@ class CheckpointNotifyOp : public framework::OperatorBase {
auto
lookup_table_save_dir
=
auto
lookup_table_save_dir
=
string
::
Sprintf
(
"%s/%s_%d"
,
dir
,
lookup_table_name
,
i
);
string
::
Sprintf
(
"%s/%s_%d"
,
dir
,
lookup_table_name
,
i
);
rpc_client
->
AsyncCheckpointNotify
(
epmap
[
i
],
lookup_table_save_dir
);
rpc_client
->
AsyncCheckpointNotify
(
epmap
[
i
],
lookup_table_save_dir
);
VLOG
(
3
0
)
<<
"checkpoint notify sending lookup table: "
VLOG
(
3
)
<<
"checkpoint notify sending lookup table: "
<<
lookup_table_name
<<
lookup_table_name
<<
" and dir:"
<<
dir
<<
" to "
<<
epmap
[
i
];
<<
" and dir:"
<<
dir
<<
" to "
<<
epmap
[
i
];
}
}
PADDLE_ENFORCE
(
rpc_client
->
Wait
(),
"internal error in RPCClient"
);
PADDLE_ENFORCE
(
rpc_client
->
Wait
(),
"internal error in RPCClient"
);
}
}
...
...
paddle/fluid/operators/distributed_ops/fetch_barrier_op.cc
浏览文件 @
dfd4a111
...
@@ -43,7 +43,7 @@ class FetchBarrierOp : public framework::OperatorBase {
...
@@ -43,7 +43,7 @@ class FetchBarrierOp : public framework::OperatorBase {
PADDLE_ENFORCE
(
rpc_client
->
Wait
(),
"internal error in RPCClient"
);
PADDLE_ENFORCE
(
rpc_client
->
Wait
(),
"internal error in RPCClient"
);
for
(
auto
&
ep
:
eps
)
{
for
(
auto
&
ep
:
eps
)
{
VLOG
(
3
0
)
<<
"fetch barrier, ep: "
<<
ep
;
VLOG
(
3
)
<<
"fetch barrier, ep: "
<<
ep
;
rpc_client
->
AsyncSendFetchBarrier
(
ep
);
rpc_client
->
AsyncSendFetchBarrier
(
ep
);
}
}
PADDLE_ENFORCE
(
rpc_client
->
Wait
(),
"internal error in RPCClient"
);
PADDLE_ENFORCE
(
rpc_client
->
Wait
(),
"internal error in RPCClient"
);
...
...
paddle/fluid/operators/distributed_ops/gen_nccl_id_op.cc
浏览文件 @
dfd4a111
...
@@ -64,7 +64,7 @@ class GenNCCLIdOp : public framework::OperatorBase {
...
@@ -64,7 +64,7 @@ class GenNCCLIdOp : public framework::OperatorBase {
distributed
::
RPCClient
::
GetInstance
<
RPCCLIENT_T
>
(
0
);
distributed
::
RPCClient
::
GetInstance
<
RPCCLIENT_T
>
(
0
);
for
(
auto
&
ep
:
endpoint_list
)
{
for
(
auto
&
ep
:
endpoint_list
)
{
VLOG
(
3
0
)
<<
"sending nccl id to "
<<
ep
;
VLOG
(
3
)
<<
"sending nccl id to "
<<
ep
;
client
->
AsyncSendVar
(
ep
,
dev_ctx
,
*
scope
,
NCCL_ID_VARNAME
);
client
->
AsyncSendVar
(
ep
,
dev_ctx
,
*
scope
,
NCCL_ID_VARNAME
);
}
}
client
->
Wait
();
client
->
Wait
();
...
@@ -72,7 +72,7 @@ class GenNCCLIdOp : public framework::OperatorBase {
...
@@ -72,7 +72,7 @@ class GenNCCLIdOp : public framework::OperatorBase {
client
->
AsyncSendBatchBarrier
(
ep
);
client
->
AsyncSendBatchBarrier
(
ep
);
}
}
client
->
Wait
();
client
->
Wait
();
VLOG
(
3
0
)
<<
"sending completed..."
;
VLOG
(
3
)
<<
"sending completed..."
;
}
}
void
GetIdByServer
(
framework
::
Scope
*
scope
,
void
GetIdByServer
(
framework
::
Scope
*
scope
,
...
@@ -99,11 +99,11 @@ class GenNCCLIdOp : public framework::OperatorBase {
...
@@ -99,11 +99,11 @@ class GenNCCLIdOp : public framework::OperatorBase {
std
::
bind
(
&
distributed
::
RPCServer
::
StartServer
,
rpc_service
.
get
()));
std
::
bind
(
&
distributed
::
RPCServer
::
StartServer
,
rpc_service
.
get
()));
rpc_service
->
SetCond
(
distributed
::
kRequestSend
);
rpc_service
->
SetCond
(
distributed
::
kRequestSend
);
VLOG
(
3
0
)
<<
"start getting nccl id from trainer 0..."
;
VLOG
(
3
)
<<
"start getting nccl id from trainer 0..."
;
rpc_service
->
WaitBarrier
(
distributed
::
kRequestSend
);
rpc_service
->
WaitBarrier
(
distributed
::
kRequestSend
);
VLOG
(
3
0
)
<<
"got nccl id and stop server..."
;
VLOG
(
3
)
<<
"got nccl id and stop server..."
;
rpc_service
->
ShutDown
();
rpc_service
->
ShutDown
();
VLOG
(
3
0
)
<<
"rpc server stopped"
;
VLOG
(
3
)
<<
"rpc server stopped"
;
server_thread
.
join
();
server_thread
.
join
();
}
}
};
};
...
...
paddle/fluid/operators/distributed_ops/listen_and_serv_op.cc
浏览文件 @
dfd4a111
...
@@ -36,7 +36,7 @@ namespace operators {
...
@@ -36,7 +36,7 @@ namespace operators {
void
RunServer
(
std
::
shared_ptr
<
distributed
::
RPCServer
>
service
)
{
void
RunServer
(
std
::
shared_ptr
<
distributed
::
RPCServer
>
service
)
{
service
->
StartServer
();
service
->
StartServer
();
VLOG
(
4
0
)
<<
"RunServer thread end"
;
VLOG
(
4
)
<<
"RunServer thread end"
;
}
}
static
void
split
(
const
std
::
string
&
str
,
char
sep
,
static
void
split
(
const
std
::
string
&
str
,
char
sep
,
std
::
vector
<
std
::
string
>
*
pieces
)
{
std
::
vector
<
std
::
string
>
*
pieces
)
{
...
@@ -66,7 +66,7 @@ static void ParallelExecuteBlocks(
...
@@ -66,7 +66,7 @@ static void ParallelExecuteBlocks(
fs
.
push_back
(
framework
::
Async
([
&
executor
,
&
prepared
,
&
scope
,
idx
]()
{
fs
.
push_back
(
framework
::
Async
([
&
executor
,
&
prepared
,
&
scope
,
idx
]()
{
int
run_block
=
idx
;
// thread local
int
run_block
=
idx
;
// thread local
try
{
try
{
VLOG
(
3
0
)
<<
"running server block: "
<<
run_block
VLOG
(
3
)
<<
"running server block: "
<<
run_block
<<
"pointer: "
<<
prepared
[
run_block
].
get
();
<<
"pointer: "
<<
prepared
[
run_block
].
get
();
executor
->
RunPreparedContext
(
prepared
[
run_block
].
get
(),
scope
);
executor
->
RunPreparedContext
(
prepared
[
run_block
].
get
(),
scope
);
}
catch
(
const
std
::
exception
&
e
)
{
}
catch
(
const
std
::
exception
&
e
)
{
...
@@ -108,7 +108,7 @@ void ListenAndServOp::RunSyncLoop(
...
@@ -108,7 +108,7 @@ void ListenAndServOp::RunSyncLoop(
framework
::
Scope
*
recv_scope
,
platform
::
DeviceContext
*
dev_ctx
,
framework
::
Scope
*
recv_scope
,
platform
::
DeviceContext
*
dev_ctx
,
const
std
::
vector
<
int
>
&
prefetch_block_id_list
,
const
std
::
vector
<
int
>
&
prefetch_block_id_list
,
const
int
checkpoint_point_block_id
)
const
{
const
int
checkpoint_point_block_id
)
const
{
VLOG
(
2
0
)
<<
"RunSyncLoop"
;
VLOG
(
2
)
<<
"RunSyncLoop"
;
size_t
num_blocks
=
program
->
Size
();
size_t
num_blocks
=
program
->
Size
();
auto
optimize_blocks
=
auto
optimize_blocks
=
Attr
<
std
::
vector
<
framework
::
BlockDesc
*>>
(
kOptimizeBlocks
);
Attr
<
std
::
vector
<
framework
::
BlockDesc
*>>
(
kOptimizeBlocks
);
...
@@ -167,7 +167,7 @@ void ListenAndServOp::RunSyncLoop(
...
@@ -167,7 +167,7 @@ void ListenAndServOp::RunSyncLoop(
}
}
ParallelExecuteBlocks
(
parallel_blkids
,
executor
,
optimize_prepared
,
program
,
ParallelExecuteBlocks
(
parallel_blkids
,
executor
,
optimize_prepared
,
program
,
recv_scope
);
recv_scope
);
VLOG
(
2
0
)
<<
"run all blocks spent "
<<
GetTimestamp
()
-
ts
<<
"(ms)"
;
VLOG
(
2
)
<<
"run all blocks spent "
<<
GetTimestamp
()
-
ts
<<
"(ms)"
;
ResetReceivedVars
(
recv_scope
,
dev_ctx
,
rpc_service_
->
NeedResetAllVars
());
ResetReceivedVars
(
recv_scope
,
dev_ctx
,
rpc_service_
->
NeedResetAllVars
());
...
@@ -183,11 +183,11 @@ void ListenAndServOp::ResetReceivedVars(framework::Scope *recv_scope,
...
@@ -183,11 +183,11 @@ void ListenAndServOp::ResetReceivedVars(framework::Scope *recv_scope,
for
(
auto
&
varname
:
sparse_vars_
)
{
for
(
auto
&
varname
:
sparse_vars_
)
{
auto
var
=
recv_scope
->
FindVar
(
varname
);
auto
var
=
recv_scope
->
FindVar
(
varname
);
if
(
var
==
nullptr
)
{
if
(
var
==
nullptr
)
{
VLOG
(
2
0
)
<<
"can not find var "
<<
varname
<<
" in received scope"
;
VLOG
(
2
)
<<
"can not find var "
<<
varname
<<
" in received scope"
;
continue
;
continue
;
}
}
if
(
var
->
IsType
<
framework
::
SelectedRows
>
())
{
if
(
var
->
IsType
<
framework
::
SelectedRows
>
())
{
VLOG
(
3
0
)
<<
"reset sparse var: "
<<
varname
;
VLOG
(
3
)
<<
"reset sparse var: "
<<
varname
;
var
->
GetMutable
<
framework
::
SelectedRows
>
()
->
mutable_rows
()
->
clear
();
var
->
GetMutable
<
framework
::
SelectedRows
>
()
->
mutable_rows
()
->
clear
();
}
else
{
}
else
{
PADDLE_THROW
(
"The type of sparse var should be SelectedRows"
);
PADDLE_THROW
(
"The type of sparse var should be SelectedRows"
);
...
@@ -197,7 +197,7 @@ void ListenAndServOp::ResetReceivedVars(framework::Scope *recv_scope,
...
@@ -197,7 +197,7 @@ void ListenAndServOp::ResetReceivedVars(framework::Scope *recv_scope,
for
(
auto
&
varname
:
dense_vars_
)
{
for
(
auto
&
varname
:
dense_vars_
)
{
auto
var
=
recv_scope
->
FindVar
(
varname
);
auto
var
=
recv_scope
->
FindVar
(
varname
);
if
(
var
==
nullptr
)
{
if
(
var
==
nullptr
)
{
VLOG
(
2
0
)
<<
"can not find var "
<<
varname
<<
" in received scope"
;
VLOG
(
2
)
<<
"can not find var "
<<
varname
<<
" in received scope"
;
continue
;
continue
;
}
}
if
(
var
->
IsType
<
framework
::
LoDTensor
>
())
{
if
(
var
->
IsType
<
framework
::
LoDTensor
>
())
{
...
@@ -216,7 +216,7 @@ void ListenAndServOp::ResetReceivedVars(framework::Scope *recv_scope,
...
@@ -216,7 +216,7 @@ void ListenAndServOp::ResetReceivedVars(framework::Scope *recv_scope,
void
ListenAndServOp
::
RunAsyncLoop
(
framework
::
Executor
*
executor
,
void
ListenAndServOp
::
RunAsyncLoop
(
framework
::
Executor
*
executor
,
framework
::
ProgramDesc
*
program
,
framework
::
ProgramDesc
*
program
,
framework
::
Scope
*
recv_scope
)
const
{
framework
::
Scope
*
recv_scope
)
const
{
VLOG
(
2
0
)
<<
"RunAsyncLoop"
;
VLOG
(
2
)
<<
"RunAsyncLoop"
;
auto
grad_to_block_id_str
=
auto
grad_to_block_id_str
=
Attr
<
std
::
vector
<
std
::
string
>>
(
"grad_to_block_id"
);
Attr
<
std
::
vector
<
std
::
string
>>
(
"grad_to_block_id"
);
DoubleFindMap
<
std
::
string
,
int32_t
>
grad_to_block_id
;
DoubleFindMap
<
std
::
string
,
int32_t
>
grad_to_block_id
;
...
@@ -225,7 +225,7 @@ void ListenAndServOp::RunAsyncLoop(framework::Executor *executor,
...
@@ -225,7 +225,7 @@ void ListenAndServOp::RunAsyncLoop(framework::Executor *executor,
const
std
::
string
&
grad_and_id
)
{
const
std
::
string
&
grad_and_id
)
{
std
::
vector
<
std
::
string
>
pieces
;
std
::
vector
<
std
::
string
>
pieces
;
split
(
grad_and_id
,
':'
,
&
pieces
);
split
(
grad_and_id
,
':'
,
&
pieces
);
VLOG
(
3
0
)
<<
"after split, key = "
<<
pieces
[
0
]
<<
", id="
<<
pieces
[
1
];
VLOG
(
3
)
<<
"after split, key = "
<<
pieces
[
0
]
<<
", id="
<<
pieces
[
1
];
PADDLE_ENFORCE_EQ
(
pieces
.
size
(),
2
);
PADDLE_ENFORCE_EQ
(
pieces
.
size
(),
2
);
PADDLE_ENFORCE_EQ
(
out_map
->
count
(
pieces
[
0
]),
0
);
PADDLE_ENFORCE_EQ
(
out_map
->
count
(
pieces
[
0
]),
0
);
...
@@ -270,7 +270,7 @@ void ListenAndServOp::RunAsyncLoop(framework::Executor *executor,
...
@@ -270,7 +270,7 @@ void ListenAndServOp::RunAsyncLoop(framework::Executor *executor,
while
(
true
)
{
while
(
true
)
{
if
(
rpc_service_
->
IsExit
())
{
if
(
rpc_service_
->
IsExit
())
{
VLOG
(
4
0
)
<<
"get exit!rpc_processor break!"
;
VLOG
(
4
)
<<
"get exit!rpc_processor break!"
;
break
;
break
;
}
}
...
@@ -332,7 +332,7 @@ void ListenAndServOp::RunImpl(const framework::Scope &scope,
...
@@ -332,7 +332,7 @@ void ListenAndServOp::RunImpl(const framework::Scope &scope,
std
::
string
endpoint
=
Attr
<
std
::
string
>
(
"endpoint"
);
std
::
string
endpoint
=
Attr
<
std
::
string
>
(
"endpoint"
);
int
checkpoint_block_id
=
Attr
<
int
>
(
kCheckpointBlockId
);
int
checkpoint_block_id
=
Attr
<
int
>
(
kCheckpointBlockId
);
VLOG
(
4
0
)
<<
"sync_mode:"
<<
sync_mode
<<
", fan_in:"
<<
fan_in
VLOG
(
4
)
<<
"sync_mode:"
<<
sync_mode
<<
", fan_in:"
<<
fan_in
<<
", end_point:"
<<
endpoint
<<
", end_point:"
<<
endpoint
<<
", checkpoint_block_id: "
<<
checkpoint_block_id
;
<<
", checkpoint_block_id: "
<<
checkpoint_block_id
;
...
@@ -383,7 +383,7 @@ void ListenAndServOp::RunImpl(const framework::Scope &scope,
...
@@ -383,7 +383,7 @@ void ListenAndServOp::RunImpl(const framework::Scope &scope,
prefetch_var_name_to_block_id_str
)
{
prefetch_var_name_to_block_id_str
)
{
std
::
vector
<
std
::
string
>
pieces
;
std
::
vector
<
std
::
string
>
pieces
;
split
(
prefetch_var_name_and_id
,
':'
,
&
pieces
);
split
(
prefetch_var_name_and_id
,
':'
,
&
pieces
);
VLOG
(
3
0
)
<<
"after split, prefetch_var = "
<<
pieces
[
0
]
VLOG
(
3
)
<<
"after split, prefetch_var = "
<<
pieces
[
0
]
<<
", id="
<<
pieces
[
1
];
<<
", id="
<<
pieces
[
1
];
PADDLE_ENFORCE_EQ
(
pieces
.
size
(),
2
);
PADDLE_ENFORCE_EQ
(
pieces
.
size
(),
2
);
...
@@ -415,7 +415,7 @@ void ListenAndServOp::RunImpl(const framework::Scope &scope,
...
@@ -415,7 +415,7 @@ void ListenAndServOp::RunImpl(const framework::Scope &scope,
// start the server listening after all member initialized.
// start the server listening after all member initialized.
server_thread_
.
reset
(
new
std
::
thread
(
RunServer
,
rpc_service_
));
server_thread_
.
reset
(
new
std
::
thread
(
RunServer
,
rpc_service_
));
VLOG
(
3
0
)
<<
"wait server thread to become ready..."
;
VLOG
(
3
)
<<
"wait server thread to become ready..."
;
rpc_service_
->
WaitServerReady
();
rpc_service_
->
WaitServerReady
();
// register SIGINT(from ctrl+C) and SIGTERM(from kill) signal handlers
// register SIGINT(from ctrl+C) and SIGTERM(from kill) signal handlers
...
...
paddle/fluid/operators/distributed_ops/prefetch_op.cc
浏览文件 @
dfd4a111
...
@@ -48,12 +48,12 @@ class PrefetchOp : public framework::OperatorBase {
...
@@ -48,12 +48,12 @@ class PrefetchOp : public framework::OperatorBase {
std
::
vector
<
distributed
::
VarHandlePtr
>
rets
;
std
::
vector
<
distributed
::
VarHandlePtr
>
rets
;
for
(
size_t
i
=
0
;
i
<
ins
.
size
();
i
++
)
{
for
(
size_t
i
=
0
;
i
<
ins
.
size
();
i
++
)
{
if
(
NeedSend
(
scope
,
ins
[
i
]))
{
if
(
NeedSend
(
scope
,
ins
[
i
]))
{
VLOG
(
3
0
)
<<
"sending "
<<
ins
[
i
]
<<
" to "
<<
epmap
[
i
]
<<
" to get "
VLOG
(
3
)
<<
"sending "
<<
ins
[
i
]
<<
" to "
<<
epmap
[
i
]
<<
" to get "
<<
outs
[
i
]
<<
" back"
;
<<
outs
[
i
]
<<
" back"
;
rets
.
push_back
(
rpc_client
->
AsyncPrefetchVar
(
epmap
[
i
],
ctx
,
scope
,
rets
.
push_back
(
rpc_client
->
AsyncPrefetchVar
(
epmap
[
i
],
ctx
,
scope
,
ins
[
i
],
outs
[
i
]));
ins
[
i
],
outs
[
i
]));
}
else
{
}
else
{
VLOG
(
3
0
)
<<
"don't send no-initialied variable: "
<<
ins
[
i
];
VLOG
(
3
)
<<
"don't send no-initialied variable: "
<<
ins
[
i
];
}
}
}
}
for
(
size_t
i
=
0
;
i
<
rets
.
size
();
i
++
)
{
for
(
size_t
i
=
0
;
i
<
rets
.
size
();
i
++
)
{
...
...
paddle/fluid/operators/distributed_ops/recv_op.cc
浏览文件 @
dfd4a111
...
@@ -47,7 +47,7 @@ class RecvOp : public framework::OperatorBase {
...
@@ -47,7 +47,7 @@ class RecvOp : public framework::OperatorBase {
std
::
vector
<
distributed
::
VarHandlePtr
>
rets
;
std
::
vector
<
distributed
::
VarHandlePtr
>
rets
;
for
(
size_t
i
=
0
;
i
<
outs
.
size
();
i
++
)
{
for
(
size_t
i
=
0
;
i
<
outs
.
size
();
i
++
)
{
VLOG
(
3
0
)
<<
"getting "
<<
outs
[
i
]
<<
" from "
<<
epmap
[
i
];
VLOG
(
3
)
<<
"getting "
<<
outs
[
i
]
<<
" from "
<<
epmap
[
i
];
rets
.
push_back
(
rpc_client
->
AsyncGetVar
(
epmap
[
i
],
ctx
,
scope
,
outs
[
i
]));
rets
.
push_back
(
rpc_client
->
AsyncGetVar
(
epmap
[
i
],
ctx
,
scope
,
outs
[
i
]));
}
}
if
(
sync_mode
)
{
if
(
sync_mode
)
{
...
...
paddle/fluid/operators/distributed_ops/send_barrier_op.cc
浏览文件 @
dfd4a111
...
@@ -42,12 +42,12 @@ class SendBarrierOp : public framework::OperatorBase {
...
@@ -42,12 +42,12 @@ class SendBarrierOp : public framework::OperatorBase {
distributed
::
RPCClient
::
GetInstance
<
RPCCLIENT_T
>
(
distributed
::
RPCClient
::
GetInstance
<
RPCCLIENT_T
>
(
Attr
<
int
>
(
"trainer_id"
));
Attr
<
int
>
(
"trainer_id"
));
VLOG
(
3
0
)
<<
"SendBarrierOp sync"
;
VLOG
(
3
)
<<
"SendBarrierOp sync"
;
// need to wait before sending send_barrier message
// need to wait before sending send_barrier message
PADDLE_ENFORCE
(
rpc_client
->
Wait
(),
"internal error in RPCClient"
);
PADDLE_ENFORCE
(
rpc_client
->
Wait
(),
"internal error in RPCClient"
);
for
(
auto
&
ep
:
eps
)
{
for
(
auto
&
ep
:
eps
)
{
VLOG
(
3
0
)
<<
"send barrier, ep: "
<<
ep
;
VLOG
(
3
)
<<
"send barrier, ep: "
<<
ep
;
rpc_client
->
AsyncSendBatchBarrier
(
ep
);
rpc_client
->
AsyncSendBatchBarrier
(
ep
);
}
}
PADDLE_ENFORCE
(
rpc_client
->
Wait
(),
"internal error in RPCClient"
);
PADDLE_ENFORCE
(
rpc_client
->
Wait
(),
"internal error in RPCClient"
);
...
...
paddle/fluid/operators/distributed_ops/send_op.cc
浏览文件 @
dfd4a111
...
@@ -50,10 +50,10 @@ class SendOp : public framework::OperatorBase {
...
@@ -50,10 +50,10 @@ class SendOp : public framework::OperatorBase {
std
::
vector
<
distributed
::
VarHandlePtr
>
rets
;
std
::
vector
<
distributed
::
VarHandlePtr
>
rets
;
for
(
size_t
i
=
0
;
i
<
ins
.
size
();
i
++
)
{
for
(
size_t
i
=
0
;
i
<
ins
.
size
();
i
++
)
{
if
(
NeedSend
(
scope
,
ins
[
i
]))
{
if
(
NeedSend
(
scope
,
ins
[
i
]))
{
VLOG
(
3
0
)
<<
"sending "
<<
ins
[
i
]
<<
" to "
<<
epmap
[
i
];
VLOG
(
3
)
<<
"sending "
<<
ins
[
i
]
<<
" to "
<<
epmap
[
i
];
rets
.
push_back
(
rpc_client
->
AsyncSendVar
(
epmap
[
i
],
ctx
,
scope
,
ins
[
i
]));
rets
.
push_back
(
rpc_client
->
AsyncSendVar
(
epmap
[
i
],
ctx
,
scope
,
ins
[
i
]));
}
else
{
}
else
{
VLOG
(
3
0
)
<<
"don't send no-initialied variable: "
<<
ins
[
i
];
VLOG
(
3
)
<<
"don't send no-initialied variable: "
<<
ins
[
i
];
}
}
}
}
if
(
sync_send
)
{
if
(
sync_send
)
{
...
...
paddle/fluid/operators/distributed_ops/send_recv_op_test.cc
浏览文件 @
dfd4a111
...
@@ -120,7 +120,7 @@ void AddOp(const std::string &type, const f::VariableNameMap &inputs,
...
@@ -120,7 +120,7 @@ void AddOp(const std::string &type, const f::VariableNameMap &inputs,
void
StartServerNet
(
bool
is_sparse
,
std
::
atomic
<
bool
>
*
initialized
)
{
void
StartServerNet
(
bool
is_sparse
,
std
::
atomic
<
bool
>
*
initialized
)
{
f
::
Scope
scope
;
f
::
Scope
scope
;
p
::
CPUPlace
place
;
p
::
CPUPlace
place
;
VLOG
(
4
0
)
<<
"before init tensor"
;
VLOG
(
4
)
<<
"before init tensor"
;
if
(
is_sparse
)
{
if
(
is_sparse
)
{
InitSelectedRowsInScope
(
place
,
&
scope
);
InitSelectedRowsInScope
(
place
,
&
scope
);
}
else
{
}
else
{
...
@@ -146,7 +146,7 @@ void StartServerNet(bool is_sparse, std::atomic<bool> *initialized) {
...
@@ -146,7 +146,7 @@ void StartServerNet(bool is_sparse, std::atomic<bool> *initialized) {
attrs
.
insert
({
"PrefetchBlock"
,
prefetch_block
});
attrs
.
insert
({
"PrefetchBlock"
,
prefetch_block
});
attrs
.
insert
({
"grad_to_block_id"
,
std
::
vector
<
std
::
string
>
({
""
})});
attrs
.
insert
({
"grad_to_block_id"
,
std
::
vector
<
std
::
string
>
({
""
})});
attrs
.
insert
({
"sync_mode"
,
true
});
attrs
.
insert
({
"sync_mode"
,
true
});
VLOG
(
4
0
)
<<
"before init op"
;
VLOG
(
4
)
<<
"before init op"
;
listen_and_serv_op
=
listen_and_serv_op
=
f
::
OpRegistry
::
CreateOp
(
"listen_and_serv"
,
{{
"X"
,
{
"x1"
}}},
{},
attrs
);
f
::
OpRegistry
::
CreateOp
(
"listen_and_serv"
,
{{
"X"
,
{
"x1"
}}},
{},
attrs
);
*
initialized
=
true
;
*
initialized
=
true
;
...
...
paddle/fluid/operators/distributed_ops/split_byref_op.h
浏览文件 @
dfd4a111
...
@@ -32,7 +32,7 @@ class SplitByrefOpKernel : public framework::OpKernel<T> {
...
@@ -32,7 +32,7 @@ class SplitByrefOpKernel : public framework::OpKernel<T> {
for
(
size_t
i
=
0
;
i
<
outs
.
size
();
++
i
)
{
for
(
size_t
i
=
0
;
i
<
outs
.
size
();
++
i
)
{
// NOTE: no need to call mutable_data here to allocate memory.
// NOTE: no need to call mutable_data here to allocate memory.
auto
*
out
=
outs
[
i
];
auto
*
out
=
outs
[
i
];
VLOG
(
3
0
)
<<
"spliting by ref: "
<<
row_offset
<<
" "
<<
out
->
dims
()[
0
];
VLOG
(
3
)
<<
"spliting by ref: "
<<
row_offset
<<
" "
<<
out
->
dims
()[
0
];
*
out
=
in
->
Slice
(
row_offset
,
row_offset
+
out
->
dims
()[
0
]);
*
out
=
in
->
Slice
(
row_offset
,
row_offset
+
out
->
dims
()[
0
]);
row_offset
+=
out
->
dims
()[
0
];
row_offset
+=
out
->
dims
()[
0
];
}
}
...
...
paddle/fluid/operators/distributed_ops/split_ids_op.h
浏览文件 @
dfd4a111
...
@@ -44,7 +44,7 @@ class SplitIdsOpKernel : public framework::OpKernel<T> {
...
@@ -44,7 +44,7 @@ class SplitIdsOpKernel : public framework::OpKernel<T> {
for
(
size_t
i
=
0
;
i
<
ids_tensors
.
size
();
++
i
)
{
for
(
size_t
i
=
0
;
i
<
ids_tensors
.
size
();
++
i
)
{
batch_size
+=
ids_tensors
[
i
]
->
dims
()[
0
];
batch_size
+=
ids_tensors
[
i
]
->
dims
()[
0
];
}
}
VLOG
(
4
0
)
<<
"Get Total BatchSize is: "
<<
batch_size
;
VLOG
(
4
)
<<
"Get Total BatchSize is: "
<<
batch_size
;
std
::
vector
<
T
>
all_ids
(
batch_size
);
std
::
vector
<
T
>
all_ids
(
batch_size
);
int
offset
=
0
;
int
offset
=
0
;
...
...
paddle/fluid/operators/lod_rank_table_op.cc
浏览文件 @
dfd4a111
...
@@ -30,9 +30,9 @@ class LoDRankTableOp : public framework::OperatorBase {
...
@@ -30,9 +30,9 @@ class LoDRankTableOp : public framework::OperatorBase {
auto
x
=
scope
.
FindVar
(
Input
(
"X"
))
->
Get
<
framework
::
LoDTensor
>
();
auto
x
=
scope
.
FindVar
(
Input
(
"X"
))
->
Get
<
framework
::
LoDTensor
>
();
auto
*
out
=
auto
*
out
=
scope
.
FindVar
(
Output
(
"Out"
))
->
GetMutable
<
framework
::
LoDRankTable
>
();
scope
.
FindVar
(
Output
(
"Out"
))
->
GetMutable
<
framework
::
LoDRankTable
>
();
VLOG
(
10
0
)
<<
"Level = "
<<
static_cast
<
size_t
>
(
Attr
<
int
>
(
"level"
));
VLOG
(
10
)
<<
"Level = "
<<
static_cast
<
size_t
>
(
Attr
<
int
>
(
"level"
));
out
->
Reset
(
x
.
lod
(),
static_cast
<
size_t
>
(
Attr
<
int
>
(
"level"
)));
out
->
Reset
(
x
.
lod
(),
static_cast
<
size_t
>
(
Attr
<
int
>
(
"level"
)));
VLOG
(
10
0
)
<<
Input
(
"X"
)
<<
"'s lod information is "
<<
*
out
;
VLOG
(
10
)
<<
Input
(
"X"
)
<<
"'s lod information is "
<<
*
out
;
}
}
};
};
...
...
paddle/fluid/operators/lookup_table_op.cc
浏览文件 @
dfd4a111
...
@@ -134,12 +134,12 @@ class LookupTableOpGradVarTypeInference : public framework::VarTypeInference {
...
@@ -134,12 +134,12 @@ class LookupTableOpGradVarTypeInference : public framework::VarTypeInference {
auto
attr
=
op_desc
.
GetAttr
(
"is_sparse"
);
auto
attr
=
op_desc
.
GetAttr
(
"is_sparse"
);
bool
is_sparse
=
boost
::
get
<
bool
>
(
attr
);
bool
is_sparse
=
boost
::
get
<
bool
>
(
attr
);
if
(
is_sparse
)
{
if
(
is_sparse
)
{
VLOG
(
3
0
)
<<
"lookup_table_grad op "
<<
framework
::
GradVarName
(
"W"
)
VLOG
(
3
)
<<
"lookup_table_grad op "
<<
framework
::
GradVarName
(
"W"
)
<<
" is set to SelectedRows"
;
<<
" is set to SelectedRows"
;
block
->
Var
(
out_var_name
)
block
->
Var
(
out_var_name
)
->
SetType
(
framework
::
proto
::
VarType
::
SELECTED_ROWS
);
->
SetType
(
framework
::
proto
::
VarType
::
SELECTED_ROWS
);
}
else
{
}
else
{
VLOG
(
3
0
)
<<
"lookup_table_grad op "
<<
framework
::
GradVarName
(
"W"
)
VLOG
(
3
)
<<
"lookup_table_grad op "
<<
framework
::
GradVarName
(
"W"
)
<<
" is set to LoDTensor"
;
<<
" is set to LoDTensor"
;
block
->
Var
(
out_var_name
)
->
SetType
(
framework
::
proto
::
VarType
::
LOD_TENSOR
);
block
->
Var
(
out_var_name
)
->
SetType
(
framework
::
proto
::
VarType
::
LOD_TENSOR
);
}
}
...
...
paddle/fluid/operators/math/cpu_vec_test.cc
浏览文件 @
dfd4a111
...
@@ -96,7 +96,7 @@ void TestAndBench(const int n, std::function<void(const int, const T*, T*)> tgt,
...
@@ -96,7 +96,7 @@ void TestAndBench(const int n, std::function<void(const int, const T*, T*)> tgt,
}
}
auto
et
=
GetCurrentUS
();
auto
et
=
GetCurrentUS
();
VLOG
(
3
0
)
<<
"Vec size "
<<
n
<<
": refer takes: "
<<
(
et
-
mt
)
/
repeat
VLOG
(
3
)
<<
"Vec size "
<<
n
<<
": refer takes: "
<<
(
et
-
mt
)
/
repeat
<<
" us, tgt takes: "
<<
(
mt
-
st
)
/
repeat
;
<<
" us, tgt takes: "
<<
(
mt
-
st
)
/
repeat
;
for
(
int
i
=
0
;
i
<
n
;
++
i
)
{
for
(
int
i
=
0
;
i
<
n
;
++
i
)
{
EXPECT_NEAR
(
ytgt_data
[
i
],
yref_data
[
i
],
1e-3
);
EXPECT_NEAR
(
ytgt_data
[
i
],
yref_data
[
i
],
1e-3
);
...
...
paddle/fluid/operators/math/jit_kernel_test.cc
浏览文件 @
dfd4a111
...
@@ -86,7 +86,7 @@ TEST(JitKernel, vrelu) {
...
@@ -86,7 +86,7 @@ TEST(JitKernel, vrelu) {
vrelu_intri8
(
d
,
x_data
,
zref_data
);
vrelu_intri8
(
d
,
x_data
,
zref_data
);
}
}
auto
si1
=
GetCurrentUS
();
auto
si1
=
GetCurrentUS
();
VLOG
(
3
0
)
<<
"Vec size 8 intr takes: "
<<
(
si1
-
si0
)
/
repeat
<<
" us"
;
VLOG
(
3
)
<<
"Vec size 8 intr takes: "
<<
(
si1
-
si0
)
/
repeat
<<
" us"
;
}
}
#endif
#endif
auto
ttgts
=
GetCurrentUS
();
auto
ttgts
=
GetCurrentUS
();
...
@@ -94,8 +94,7 @@ TEST(JitKernel, vrelu) {
...
@@ -94,8 +94,7 @@ TEST(JitKernel, vrelu) {
ker
->
Compute
(
x_data
,
ztgt_data
,
d
);
ker
->
Compute
(
x_data
,
ztgt_data
,
d
);
}
}
auto
ttgte
=
GetCurrentUS
();
auto
ttgte
=
GetCurrentUS
();
VLOG
(
30
)
<<
"Vec size "
<<
d
VLOG
(
3
)
<<
"Vec size "
<<
d
<<
": refer takes: "
<<
(
trefe
-
trefs
)
/
repeat
<<
": refer takes: "
<<
(
trefe
-
trefs
)
/
repeat
<<
" us, tgt takes: "
<<
(
ttgte
-
ttgts
)
/
repeat
<<
" us"
;
<<
" us, tgt takes: "
<<
(
ttgte
-
ttgts
)
/
repeat
<<
" us"
;
for
(
int
i
=
0
;
i
<
d
;
++
i
)
{
for
(
int
i
=
0
;
i
<
d
;
++
i
)
{
EXPECT_NEAR
(
ztgt_data
[
i
],
zref_data
[
i
],
1e-3
);
EXPECT_NEAR
(
ztgt_data
[
i
],
zref_data
[
i
],
1e-3
);
...
@@ -127,8 +126,7 @@ TEST(JitKernel, vaddbias) {
...
@@ -127,8 +126,7 @@ TEST(JitKernel, vaddbias) {
}
}
auto
ttgte
=
GetCurrentUS
();
auto
ttgte
=
GetCurrentUS
();
VLOG
(
30
)
<<
"Vec size "
<<
d
VLOG
(
3
)
<<
"Vec size "
<<
d
<<
": refer takes: "
<<
(
trefe
-
trefs
)
/
repeat
<<
": refer takes: "
<<
(
trefe
-
trefs
)
/
repeat
<<
" us, tgt takes: "
<<
(
ttgte
-
ttgts
)
/
repeat
<<
" us"
;
<<
" us, tgt takes: "
<<
(
ttgte
-
ttgts
)
/
repeat
<<
" us"
;
for
(
int
i
=
0
;
i
<
d
;
++
i
)
{
for
(
int
i
=
0
;
i
<
d
;
++
i
)
{
EXPECT_NEAR
(
ztgt_data
[
i
],
zref_data
[
i
],
1e-3
);
EXPECT_NEAR
(
ztgt_data
[
i
],
zref_data
[
i
],
1e-3
);
...
@@ -175,13 +173,13 @@ TEST(JitKernel, vexp) {
...
@@ -175,13 +173,13 @@ TEST(JitKernel, vexp) {
}
}
auto
ttgte
=
GetCurrentUS
();
auto
ttgte
=
GetCurrentUS
();
VLOG
(
30
)
<<
"Vec size "
<<
d
VLOG
(
3
)
<<
"Vec size "
<<
d
<<
": refer takes: "
<<
(
trefe
-
trefs
)
/
repeat
<<
": refer takes: "
<<
(
trefe
-
trefs
)
/
repeat
#ifdef PADDLE_WITH_MKLML
#ifdef PADDLE_WITH_MKLML
<<
" us, mkl takes: "
<<
(
tmkle
-
tmkls
)
/
repeat
<<
" us, "
<<
" us, mkl takes: "
<<
(
tmkle
-
tmkls
)
/
repeat
<<
" us, "
#else
#else
<<
" us, "
<<
" us, "
#endif
#endif
<<
"tgt takes: "
<<
(
ttgte
-
ttgts
)
/
repeat
<<
" us"
;
<<
"tgt takes: "
<<
(
ttgte
-
ttgts
)
/
repeat
<<
" us"
;
for
(
int
i
=
0
;
i
<
d
;
++
i
)
{
for
(
int
i
=
0
;
i
<
d
;
++
i
)
{
EXPECT_NEAR
(
ztgt_data
[
i
],
zref_data
[
i
],
1e-3
);
EXPECT_NEAR
(
ztgt_data
[
i
],
zref_data
[
i
],
1e-3
);
...
@@ -235,8 +233,7 @@ TEST(JitKernel, vsigmoid) {
...
@@ -235,8 +233,7 @@ TEST(JitKernel, vsigmoid) {
}
}
auto
ttgte
=
GetCurrentUS
();
auto
ttgte
=
GetCurrentUS
();
VLOG
(
30
)
<<
"Vec size "
<<
d
VLOG
(
3
)
<<
"Vec size "
<<
d
<<
": refer takes: "
<<
(
trefe
-
trefs
)
/
repeat
<<
": refer takes: "
<<
(
trefe
-
trefs
)
/
repeat
<<
" us, better(jit exp) takes: "
<<
(
tmkle
-
tmkls
)
/
repeat
<<
" us, better(jit exp) takes: "
<<
(
tmkle
-
tmkls
)
/
repeat
<<
" us, tgt takes: "
<<
(
ttgte
-
ttgts
)
/
repeat
<<
" us"
;
<<
" us, tgt takes: "
<<
(
ttgte
-
ttgts
)
/
repeat
<<
" us"
;
for
(
int
i
=
0
;
i
<
d
;
++
i
)
{
for
(
int
i
=
0
;
i
<
d
;
++
i
)
{
...
@@ -296,8 +293,7 @@ TEST(JitKernel, vtanh) {
...
@@ -296,8 +293,7 @@ TEST(JitKernel, vtanh) {
}
}
auto
ttgte
=
GetCurrentUS
();
auto
ttgte
=
GetCurrentUS
();
VLOG
(
30
)
<<
"Vec size "
<<
d
VLOG
(
3
)
<<
"Vec size "
<<
d
<<
": refer takes: "
<<
(
trefe
-
trefs
)
/
repeat
<<
": refer takes: "
<<
(
trefe
-
trefs
)
/
repeat
<<
" us, better(jit exp) takes: "
<<
(
tmkle
-
tmkls
)
/
repeat
<<
" us, better(jit exp) takes: "
<<
(
tmkle
-
tmkls
)
/
repeat
<<
" us, tgt takes: "
<<
(
ttgte
-
ttgts
)
/
repeat
<<
" us"
;
<<
" us, tgt takes: "
<<
(
ttgte
-
ttgts
)
/
repeat
<<
" us"
;
for
(
int
i
=
0
;
i
<
d
;
++
i
)
{
for
(
int
i
=
0
;
i
<
d
;
++
i
)
{
...
@@ -397,8 +393,7 @@ TEST(JitKernel, lstm) {
...
@@ -397,8 +393,7 @@ TEST(JitKernel, lstm) {
ker
->
ComputeCtHt
(
&
step
,
&
attr
);
ker
->
ComputeCtHt
(
&
step
,
&
attr
);
}
}
auto
ttgte
=
GetCurrentUS
();
auto
ttgte
=
GetCurrentUS
();
VLOG
(
30
)
<<
"Vec size "
<<
d
VLOG
(
3
)
<<
"Vec size "
<<
d
<<
": refer takes: "
<<
(
trefe
-
trefs
)
/
repeat
<<
": refer takes: "
<<
(
trefe
-
trefs
)
/
repeat
<<
" us, better(jit) takes: "
<<
(
tmkle
-
tmkls
)
/
repeat
<<
" us, better(jit) takes: "
<<
(
tmkle
-
tmkls
)
/
repeat
<<
" us, tgt takes: "
<<
(
ttgte
-
ttgts
)
/
repeat
<<
" us"
;
<<
" us, tgt takes: "
<<
(
ttgte
-
ttgts
)
/
repeat
<<
" us"
;
}
}
...
@@ -473,7 +468,7 @@ TEST(JitKernel, vscal) {
...
@@ -473,7 +468,7 @@ TEST(JitKernel, vscal) {
vscal_inp_intri8
(
d
,
a
,
y_data
);
vscal_inp_intri8
(
d
,
a
,
y_data
);
}
}
auto
si3
=
GetCurrentUS
();
auto
si3
=
GetCurrentUS
();
VLOG
(
3
0
)
<<
"Vec size 8 intr takes: "
<<
(
si1
-
si0
)
/
repeat
VLOG
(
3
)
<<
"Vec size 8 intr takes: "
<<
(
si1
-
si0
)
/
repeat
<<
" us, inplace: "
<<
(
si3
-
si2
)
/
repeat
<<
" us"
;
<<
" us, inplace: "
<<
(
si3
-
si2
)
/
repeat
<<
" us"
;
}
}
#endif
#endif
...
@@ -488,18 +483,15 @@ TEST(JitKernel, vscal) {
...
@@ -488,18 +483,15 @@ TEST(JitKernel, vscal) {
ker
->
Compute
(
&
a
,
y_data
,
y_data
,
d
);
ker
->
Compute
(
&
a
,
y_data
,
y_data
,
d
);
}
}
auto
ttgte1
=
GetCurrentUS
();
auto
ttgte1
=
GetCurrentUS
();
VLOG
(
30
)
<<
"Vec size "
<<
d
VLOG
(
3
)
<<
"Vec size "
<<
d
<<
": refer takes: "
<<
(
trefe
-
trefs
)
/
repeat
<<
": refer takes: "
<<
(
trefe
-
trefs
)
/
repeat
<<
" us, inplace takes: "
<<
(
trefe1
-
trefs1
)
/
repeat
<<
" us, inplace takes: "
<<
(
trefe1
-
trefs1
)
/
repeat
#ifdef PADDLE_WITH_MKLML
#ifdef PADDLE_WITH_MKLML
<<
" us, mkl inplace takes: "
<<
(
tmkle
-
tmkls
)
/
repeat
<<
" us, mkl inplace takes: "
<<
(
tmkle
-
tmkls
)
/
repeat
<<
" us, "
<<
" us, "
#else
#else
<<
" us, "
<<
" us, "
#endif
#endif
<<
"tgt takes: "
<<
(
ttgte
-
ttgts
)
/
repeat
<<
"tgt takes: "
<<
(
ttgte
-
ttgts
)
/
repeat
<<
"us, tgt inplace takes: "
<<
(
ttgte1
-
ttgts1
)
/
repeat
<<
"us, tgt inplace takes: "
<<
(
ttgte1
-
ttgts1
)
/
repeat
<<
" us"
;
<<
" us"
;
for
(
int
i
=
0
;
i
<
d
;
++
i
)
{
for
(
int
i
=
0
;
i
<
d
;
++
i
)
{
EXPECT_NEAR
(
ztgt_data
[
i
],
zref_data
[
i
],
1e-3
);
EXPECT_NEAR
(
ztgt_data
[
i
],
zref_data
[
i
],
1e-3
);
}
}
...
@@ -557,7 +549,7 @@ TEST(JitKernel, vmul) {
...
@@ -557,7 +549,7 @@ TEST(JitKernel, vmul) {
vmul_intri8
(
d
,
x_data
,
y_data
,
zref_data
);
vmul_intri8
(
d
,
x_data
,
y_data
,
zref_data
);
}
}
auto
si1
=
GetCurrentUS
();
auto
si1
=
GetCurrentUS
();
VLOG
(
3
0
)
<<
"Vec size 8 intr takes: "
<<
(
si1
-
si0
)
/
repeat
;
VLOG
(
3
)
<<
"Vec size 8 intr takes: "
<<
(
si1
-
si0
)
/
repeat
;
}
}
#endif
#endif
...
@@ -567,8 +559,7 @@ TEST(JitKernel, vmul) {
...
@@ -567,8 +559,7 @@ TEST(JitKernel, vmul) {
}
}
auto
ttgte
=
GetCurrentUS
();
auto
ttgte
=
GetCurrentUS
();
VLOG
(
30
)
<<
"Vec size "
<<
d
VLOG
(
3
)
<<
"Vec size "
<<
d
<<
": refer takes: "
<<
(
trefe
-
trefs
)
/
repeat
<<
": refer takes: "
<<
(
trefe
-
trefs
)
/
repeat
#ifdef PADDLE_WITH_MKLML
#ifdef PADDLE_WITH_MKLML
<<
" us, mkl takes: "
<<
(
tmkle
-
tmkls
)
/
repeat
<<
" us, "
<<
" us, mkl takes: "
<<
(
tmkle
-
tmkls
)
/
repeat
<<
" us, "
#else
#else
...
@@ -632,7 +623,7 @@ TEST(JitKernel, vadd) {
...
@@ -632,7 +623,7 @@ TEST(JitKernel, vadd) {
vadd_intri8
(
d
,
x_data
,
y_data
,
zref_data
);
vadd_intri8
(
d
,
x_data
,
y_data
,
zref_data
);
}
}
auto
si1
=
GetCurrentUS
();
auto
si1
=
GetCurrentUS
();
VLOG
(
3
0
)
<<
"Vec size 8 intr takes: "
<<
(
si1
-
si0
)
/
repeat
;
VLOG
(
3
)
<<
"Vec size 8 intr takes: "
<<
(
si1
-
si0
)
/
repeat
;
}
}
#endif
#endif
...
@@ -642,8 +633,7 @@ TEST(JitKernel, vadd) {
...
@@ -642,8 +633,7 @@ TEST(JitKernel, vadd) {
}
}
auto
ttgte
=
GetCurrentUS
();
auto
ttgte
=
GetCurrentUS
();
VLOG
(
30
)
<<
"Vec size "
<<
d
VLOG
(
3
)
<<
"Vec size "
<<
d
<<
": refer takes: "
<<
(
trefe
-
trefs
)
/
repeat
<<
": refer takes: "
<<
(
trefe
-
trefs
)
/
repeat
#ifdef PADDLE_WITH_MKLML
#ifdef PADDLE_WITH_MKLML
<<
" us, mkl takes: "
<<
(
tmkle
-
tmkls
)
/
repeat
<<
" us, "
<<
" us, mkl takes: "
<<
(
tmkle
-
tmkls
)
/
repeat
<<
" us, "
#else
#else
...
@@ -699,8 +689,7 @@ TEST(JitKernel, vaddrelu) {
...
@@ -699,8 +689,7 @@ TEST(JitKernel, vaddrelu) {
ker
->
Compute
(
x_data
,
y_data
,
ztgt_data
,
d
);
ker
->
Compute
(
x_data
,
y_data
,
ztgt_data
,
d
);
}
}
auto
ttgte
=
GetCurrentUS
();
auto
ttgte
=
GetCurrentUS
();
VLOG
(
30
)
<<
"Vec size "
<<
d
VLOG
(
3
)
<<
"Vec size "
<<
d
<<
": refer takes: "
<<
(
trefe
-
trefs
)
/
repeat
<<
": refer takes: "
<<
(
trefe
-
trefs
)
/
repeat
<<
" us, better takes: "
<<
(
tmkle
-
tmkls
)
/
repeat
<<
" us, "
<<
" us, better takes: "
<<
(
tmkle
-
tmkls
)
/
repeat
<<
" us, "
<<
"tgt takes: "
<<
(
ttgte
-
ttgts
)
/
repeat
<<
" us"
;
<<
"tgt takes: "
<<
(
ttgte
-
ttgts
)
/
repeat
<<
" us"
;
for
(
int
i
=
0
;
i
<
d
;
++
i
)
{
for
(
int
i
=
0
;
i
<
d
;
++
i
)
{
...
...
paddle/fluid/operators/math/selected_rows_functor.cc
浏览文件 @
dfd4a111
...
@@ -270,7 +270,7 @@ struct MergeAdd<platform::CPUDeviceContext, T> {
...
@@ -270,7 +270,7 @@ struct MergeAdd<platform::CPUDeviceContext, T> {
const
std
::
vector
<
const
framework
::
SelectedRows
*>&
inputs
,
const
std
::
vector
<
const
framework
::
SelectedRows
*>&
inputs
,
framework
::
SelectedRows
*
output
)
{
framework
::
SelectedRows
*
output
)
{
if
(
inputs
.
size
()
==
0
)
{
if
(
inputs
.
size
()
==
0
)
{
VLOG
(
3
0
)
<<
"no input! return"
;
VLOG
(
3
)
<<
"no input! return"
;
return
;
return
;
}
}
const
framework
::
SelectedRows
*
has_value_input
=
nullptr
;
const
framework
::
SelectedRows
*
has_value_input
=
nullptr
;
...
@@ -281,7 +281,7 @@ struct MergeAdd<platform::CPUDeviceContext, T> {
...
@@ -281,7 +281,7 @@ struct MergeAdd<platform::CPUDeviceContext, T> {
}
}
}
}
if
(
has_value_input
==
nullptr
)
{
if
(
has_value_input
==
nullptr
)
{
VLOG
(
3
0
)
<<
"no input has value! just return"
<<
std
::
endl
;
VLOG
(
3
)
<<
"no input has value! just return"
<<
std
::
endl
;
return
;
return
;
}
}
auto
input_width
=
has_value_input
->
value
().
dims
()[
1
];
auto
input_width
=
has_value_input
->
value
().
dims
()[
1
];
...
...
paddle/fluid/operators/math/selected_rows_functor.cu
浏览文件 @
dfd4a111
...
@@ -314,7 +314,7 @@ struct MergeAdd<platform::CUDADeviceContext, T> {
...
@@ -314,7 +314,7 @@ struct MergeAdd<platform::CUDADeviceContext, T> {
const
std
::
vector
<
const
framework
::
SelectedRows
*>&
inputs
,
const
std
::
vector
<
const
framework
::
SelectedRows
*>&
inputs
,
framework
::
SelectedRows
*
output
)
{
framework
::
SelectedRows
*
output
)
{
if
(
inputs
.
size
()
==
0
)
{
if
(
inputs
.
size
()
==
0
)
{
VLOG
(
3
0
)
<<
"no input! return"
;
VLOG
(
3
)
<<
"no input! return"
;
return
;
return
;
}
}
const
framework
::
SelectedRows
*
has_value_input
=
nullptr
;
const
framework
::
SelectedRows
*
has_value_input
=
nullptr
;
...
@@ -325,7 +325,7 @@ struct MergeAdd<platform::CUDADeviceContext, T> {
...
@@ -325,7 +325,7 @@ struct MergeAdd<platform::CUDADeviceContext, T> {
}
}
}
}
if
(
has_value_input
==
nullptr
)
{
if
(
has_value_input
==
nullptr
)
{
VLOG
(
3
0
)
<<
"no input has value! just return"
<<
std
::
endl
;
VLOG
(
3
)
<<
"no input has value! just return"
<<
std
::
endl
;
return
;
return
;
}
}
auto
input_width
=
has_value_input
->
value
().
dims
()[
1
];
auto
input_width
=
has_value_input
->
value
().
dims
()[
1
];
...
...
paddle/fluid/operators/mul_op.cc
浏览文件 @
dfd4a111
...
@@ -38,7 +38,7 @@ class MulOp : public framework::OperatorWithKernel {
...
@@ -38,7 +38,7 @@ class MulOp : public framework::OperatorWithKernel {
int
x_num_col_dims
=
ctx
->
Attrs
().
Get
<
int
>
(
"x_num_col_dims"
);
int
x_num_col_dims
=
ctx
->
Attrs
().
Get
<
int
>
(
"x_num_col_dims"
);
int
y_num_col_dims
=
ctx
->
Attrs
().
Get
<
int
>
(
"y_num_col_dims"
);
int
y_num_col_dims
=
ctx
->
Attrs
().
Get
<
int
>
(
"y_num_col_dims"
);
VLOG
(
3
0
)
<<
"mul operator x.shape="
<<
x_dims
<<
" y.shape="
<<
y_dims
VLOG
(
3
)
<<
"mul operator x.shape="
<<
x_dims
<<
" y.shape="
<<
y_dims
<<
" x_num_col_dims="
<<
x_num_col_dims
<<
" x_num_col_dims="
<<
x_num_col_dims
<<
" y_num_col_dims="
<<
y_num_col_dims
;
<<
" y_num_col_dims="
<<
y_num_col_dims
;
...
...
paddle/fluid/operators/nccl/nccl_op.cu.cc
浏览文件 @
dfd4a111
...
@@ -63,14 +63,14 @@ class NCCLAllReduceKernel : public framework::OpKernel<T> {
...
@@ -63,14 +63,14 @@ class NCCLAllReduceKernel : public framework::OpKernel<T> {
// device id
// device id
int
gpu_id
=
boost
::
get
<
platform
::
CUDAPlace
>
(
ctx
.
GetPlace
()).
GetDeviceId
();
int
gpu_id
=
boost
::
get
<
platform
::
CUDAPlace
>
(
ctx
.
GetPlace
()).
GetDeviceId
();
int
idx
=
comm
->
GetCommId
(
gpu_id
);
int
idx
=
comm
->
GetCommId
(
gpu_id
);
VLOG
(
3
0
)
<<
"gpu : "
VLOG
(
3
)
<<
"gpu : "
<<
" invoke allreduce. send "
<<
x
->
numel
()
<<
" recv "
<<
" invoke allreduce. send "
<<
x
->
numel
()
<<
" recv "
<<
out
->
numel
();
<<
out
->
numel
();
PADDLE_ENFORCE
(
platform
::
dynload
::
ncclAllReduce
(
PADDLE_ENFORCE
(
platform
::
dynload
::
ncclAllReduce
(
x
->
data
<
T
>
(),
out
->
mutable_data
<
T
>
(
ctx
.
GetPlace
()),
out
->
numel
(),
x
->
data
<
T
>
(),
out
->
mutable_data
<
T
>
(
ctx
.
GetPlace
()),
out
->
numel
(),
NCCLTypeWrapper
<
T
>::
type
,
reduction_op_
,
comm
->
comms
().
at
(
idx
),
NCCLTypeWrapper
<
T
>::
type
,
reduction_op_
,
comm
->
comms
().
at
(
idx
),
ctx
.
cuda_device_context
().
stream
()));
ctx
.
cuda_device_context
().
stream
()));
VLOG
(
3
0
)
<<
"gpu : "
VLOG
(
3
)
<<
"gpu : "
<<
" finished allreduce. send "
<<
x
->
numel
()
<<
" recv "
<<
" finished allreduce. send "
<<
x
->
numel
()
<<
" recv "
<<
out
->
numel
();
<<
out
->
numel
();
}
}
...
@@ -109,13 +109,13 @@ class NCCLReduceKernel : public framework::OpKernel<T> {
...
@@ -109,13 +109,13 @@ class NCCLReduceKernel : public framework::OpKernel<T> {
}
else
{
}
else
{
out
->
Resize
(
framework
::
make_ddim
({
0
}));
out
->
Resize
(
framework
::
make_ddim
({
0
}));
}
}
VLOG
(
3
0
)
<<
"gpu : "
<<
gpu_id
<<
" invoke reduce. send "
<<
x
->
numel
()
VLOG
(
3
)
<<
"gpu : "
<<
gpu_id
<<
" invoke reduce. send "
<<
x
->
numel
()
<<
" recv "
<<
out
->
numel
();
<<
" recv "
<<
out
->
numel
();
PADDLE_ENFORCE
(
platform
::
dynload
::
ncclReduce
(
PADDLE_ENFORCE
(
platform
::
dynload
::
ncclReduce
(
x
->
data
<
T
>
(),
recvbuffer
,
x
->
numel
(),
NCCLTypeWrapper
<
T
>::
type
,
x
->
data
<
T
>
(),
recvbuffer
,
x
->
numel
(),
NCCLTypeWrapper
<
T
>::
type
,
reduction_op_
,
root
,
comm
->
comms
().
at
(
idx
),
reduction_op_
,
root
,
comm
->
comms
().
at
(
idx
),
ctx
.
cuda_device_context
().
stream
()));
ctx
.
cuda_device_context
().
stream
()));
VLOG
(
3
0
)
<<
"gpu : "
<<
gpu_id
<<
" finished reduce. send "
<<
x
->
numel
()
VLOG
(
3
)
<<
"gpu : "
<<
gpu_id
<<
" finished reduce. send "
<<
x
->
numel
()
<<
" recv "
<<
out
->
numel
();
<<
" recv "
<<
out
->
numel
();
}
}
};
};
...
@@ -133,22 +133,21 @@ class NCCLBcastKernel : public framework::OpKernel<T> {
...
@@ -133,22 +133,21 @@ class NCCLBcastKernel : public framework::OpKernel<T> {
int
idx
=
comm
->
GetCommId
(
gpu_id
);
int
idx
=
comm
->
GetCommId
(
gpu_id
);
if
(
idx
==
root
)
{
if
(
idx
==
root
)
{
auto
*
x
=
ctx
.
Input
<
LoDTensor
>
(
"X"
);
auto
*
x
=
ctx
.
Input
<
LoDTensor
>
(
"X"
);
VLOG
(
3
0
)
<<
"gpu : "
<<
gpu_id
<<
" invoke Bcast. send "
<<
x
->
numel
();
VLOG
(
3
)
<<
"gpu : "
<<
gpu_id
<<
" invoke Bcast. send "
<<
x
->
numel
();
PADDLE_ENFORCE
(
platform
::
dynload
::
ncclBcast
(
PADDLE_ENFORCE
(
platform
::
dynload
::
ncclBcast
(
reinterpret_cast
<
void
*>
(
const_cast
<
T
*>
(
x
->
data
<
T
>
())),
x
->
numel
(),
reinterpret_cast
<
void
*>
(
const_cast
<
T
*>
(
x
->
data
<
T
>
())),
x
->
numel
(),
NCCLTypeWrapper
<
T
>::
type
,
root
,
comm
->
comms
().
at
(
idx
),
NCCLTypeWrapper
<
T
>::
type
,
root
,
comm
->
comms
().
at
(
idx
),
ctx
.
cuda_device_context
().
stream
()));
ctx
.
cuda_device_context
().
stream
()));
VLOG
(
3
0
)
<<
"gpu : "
<<
gpu_id
<<
" finished Bcast."
;
VLOG
(
3
)
<<
"gpu : "
<<
gpu_id
<<
" finished Bcast."
;
}
else
{
}
else
{
auto
*
out
=
ctx
.
Output
<
LoDTensor
>
(
"Out"
);
auto
*
out
=
ctx
.
Output
<
LoDTensor
>
(
"Out"
);
VLOG
(
3
0
)
<<
"gpu : "
<<
gpu_id
<<
" invoke Bcast. recv buffer "
VLOG
(
3
)
<<
"gpu : "
<<
gpu_id
<<
" invoke Bcast. recv buffer "
<<
framework
::
product
(
out
->
dims
());
<<
framework
::
product
(
out
->
dims
());
PADDLE_ENFORCE
(
platform
::
dynload
::
ncclBcast
(
PADDLE_ENFORCE
(
platform
::
dynload
::
ncclBcast
(
out
->
mutable_data
<
T
>
(
ctx
.
GetPlace
()),
out
->
numel
(),
out
->
mutable_data
<
T
>
(
ctx
.
GetPlace
()),
out
->
numel
(),
NCCLTypeWrapper
<
T
>::
type
,
root
,
comm
->
comms
().
at
(
idx
),
NCCLTypeWrapper
<
T
>::
type
,
root
,
comm
->
comms
().
at
(
idx
),
ctx
.
cuda_device_context
().
stream
()));
ctx
.
cuda_device_context
().
stream
()));
VLOG
(
30
)
<<
"gpu : "
<<
gpu_id
<<
" finished Bcast. recv "
VLOG
(
3
)
<<
"gpu : "
<<
gpu_id
<<
" finished Bcast. recv "
<<
out
->
numel
();
<<
out
->
numel
();
}
}
}
}
};
};
...
...
paddle/fluid/operators/nccl/nccl_op_test.cu.cc
浏览文件 @
dfd4a111
...
@@ -86,9 +86,9 @@ class NCCLTester : public ::testing::Test {
...
@@ -86,9 +86,9 @@ class NCCLTester : public ::testing::Test {
(
*
p_scopes
).
resize
(
gpu_list_
.
size
());
(
*
p_scopes
).
resize
(
gpu_list_
.
size
());
auto
op
=
f
::
OpRegistry
::
CreateOp
(
*
op1
);
auto
op
=
f
::
OpRegistry
::
CreateOp
(
*
op1
);
VLOG
(
1
0
)
<<
"invoke NCCLInitOp."
;
VLOG
(
1
)
<<
"invoke NCCLInitOp."
;
op
->
Run
(
g_scope_
,
cpu_place
);
op
->
Run
(
g_scope_
,
cpu_place
);
VLOG
(
1
0
)
<<
"NCCLInitOp finished."
;
VLOG
(
1
)
<<
"NCCLInitOp finished."
;
}
}
int
GetGPUData
(
int
gpu_id
)
{
return
gpu_id
+
42
;
}
int
GetGPUData
(
int
gpu_id
)
{
return
gpu_id
+
42
;
}
...
@@ -109,7 +109,7 @@ class NCCLTester : public ::testing::Test {
...
@@ -109,7 +109,7 @@ class NCCLTester : public ::testing::Test {
std
::
vector
<
T
>
send_vector
(
f
::
product
(
kDims
),
GetGPUData
(
gpu_id
));
std
::
vector
<
T
>
send_vector
(
f
::
product
(
kDims
),
GetGPUData
(
gpu_id
));
paddle
::
framework
::
TensorFromVector
<
T
>
(
send_vector
,
*
ctx
,
send_tensor
);
paddle
::
framework
::
TensorFromVector
<
T
>
(
send_vector
,
*
ctx
,
send_tensor
);
VLOG
(
1
0
)
<<
"Send Tensor filled with elements "
<<
send_tensor
->
numel
();
VLOG
(
1
)
<<
"Send Tensor filled with elements "
<<
send_tensor
->
numel
();
}
}
lk
.
unlock
();
lk
.
unlock
();
...
@@ -119,11 +119,11 @@ class NCCLTester : public ::testing::Test {
...
@@ -119,11 +119,11 @@ class NCCLTester : public ::testing::Test {
auto
op
=
f
::
OpRegistry
::
CreateOp
(
*
op1
);
auto
op
=
f
::
OpRegistry
::
CreateOp
(
*
op1
);
VLOG
(
1
0
)
<<
"Device : "
<<
gpu_id
<<
" invoke "
<<
op_desc
.
Type
();
VLOG
(
1
)
<<
"Device : "
<<
gpu_id
<<
" invoke "
<<
op_desc
.
Type
();
VLOG
(
1
0
)
<<
" send_tensor : "
<<
send_tensor
->
numel
()
VLOG
(
1
)
<<
" send_tensor : "
<<
send_tensor
->
numel
()
<<
" recv_tensor : "
<<
recv_tensor
->
numel
();
<<
" recv_tensor : "
<<
recv_tensor
->
numel
();
op
->
Run
(
*
scope
,
place
);
op
->
Run
(
*
scope
,
place
);
VLOG
(
1
0
)
<<
"Device : "
<<
gpu_id
<<
" finished "
<<
op_desc
.
Type
();
VLOG
(
1
)
<<
"Device : "
<<
gpu_id
<<
" finished "
<<
op_desc
.
Type
();
}
}
public:
public:
...
...
paddle/fluid/operators/nce_op.cc
浏览文件 @
dfd4a111
...
@@ -230,13 +230,13 @@ class NCEOpGradVarTypeInference : public framework::VarTypeInference {
...
@@ -230,13 +230,13 @@ class NCEOpGradVarTypeInference : public framework::VarTypeInference {
auto
attr
=
op_desc
.
GetAttr
(
"is_sparse"
);
auto
attr
=
op_desc
.
GetAttr
(
"is_sparse"
);
bool
is_sparse
=
boost
::
get
<
bool
>
(
attr
);
bool
is_sparse
=
boost
::
get
<
bool
>
(
attr
);
if
(
is_sparse
)
{
if
(
is_sparse
)
{
VLOG
(
3
0
)
<<
"nce_op_grad op "
<<
weight_grad
<<
" and "
<<
bias_grad
VLOG
(
3
)
<<
"nce_op_grad op "
<<
weight_grad
<<
" and "
<<
bias_grad
<<
" is set to SelectedRows"
;
<<
" is set to SelectedRows"
;
block
->
Var
(
weight_grad
)
block
->
Var
(
weight_grad
)
->
SetType
(
framework
::
proto
::
VarType
::
SELECTED_ROWS
);
->
SetType
(
framework
::
proto
::
VarType
::
SELECTED_ROWS
);
block
->
Var
(
bias_grad
)
->
SetType
(
framework
::
proto
::
VarType
::
SELECTED_ROWS
);
block
->
Var
(
bias_grad
)
->
SetType
(
framework
::
proto
::
VarType
::
SELECTED_ROWS
);
}
else
{
}
else
{
VLOG
(
3
0
)
<<
"nce_op_grad op "
<<
weight_grad
<<
" and "
<<
bias_grad
VLOG
(
3
)
<<
"nce_op_grad op "
<<
weight_grad
<<
" and "
<<
bias_grad
<<
" is set to LoDTensor"
;
<<
" is set to LoDTensor"
;
block
->
Var
(
weight_grad
)
->
SetType
(
framework
::
proto
::
VarType
::
LOD_TENSOR
);
block
->
Var
(
weight_grad
)
->
SetType
(
framework
::
proto
::
VarType
::
LOD_TENSOR
);
block
->
Var
(
bias_grad
)
->
SetType
(
framework
::
proto
::
VarType
::
LOD_TENSOR
);
block
->
Var
(
bias_grad
)
->
SetType
(
framework
::
proto
::
VarType
::
LOD_TENSOR
);
...
...
paddle/fluid/operators/optimizers/adam_op.h
浏览文件 @
dfd4a111
...
@@ -297,7 +297,7 @@ class AdamOpKernel : public framework::OpKernel<T> {
...
@@ -297,7 +297,7 @@ class AdamOpKernel : public framework::OpKernel<T> {
auto
&
grad
=
auto
&
grad
=
Ref
(
ctx
.
Input
<
framework
::
SelectedRows
>
(
"Grad"
),
"Must set Grad"
);
Ref
(
ctx
.
Input
<
framework
::
SelectedRows
>
(
"Grad"
),
"Must set Grad"
);
if
(
grad
.
rows
().
size
()
==
0
)
{
if
(
grad
.
rows
().
size
()
==
0
)
{
VLOG
(
3
0
)
<<
"grad row size is 0!!"
;
VLOG
(
3
)
<<
"grad row size is 0!!"
;
return
;
return
;
}
}
...
...
paddle/fluid/operators/optimizers/momentum_op.h
浏览文件 @
dfd4a111
...
@@ -346,7 +346,7 @@ class MomentumOpKernel : public framework::OpKernel<T> {
...
@@ -346,7 +346,7 @@ class MomentumOpKernel : public framework::OpKernel<T> {
// sparse update maybe empty.
// sparse update maybe empty.
if
(
grad
->
rows
().
size
()
==
0
)
{
if
(
grad
->
rows
().
size
()
==
0
)
{
VLOG
(
3
0
)
<<
"Grad SelectedRows contains no data!"
;
VLOG
(
3
)
<<
"Grad SelectedRows contains no data!"
;
return
;
return
;
}
}
auto
*
merged_grad
=
const_cast
<
framework
::
Scope
&>
(
ctx
.
scope
())
auto
*
merged_grad
=
const_cast
<
framework
::
Scope
&>
(
ctx
.
scope
())
...
...
paddle/fluid/operators/optimizers/sgd_op.h
浏览文件 @
dfd4a111
...
@@ -98,7 +98,7 @@ class SGDOpKernel : public framework::OpKernel<T> {
...
@@ -98,7 +98,7 @@ class SGDOpKernel : public framework::OpKernel<T> {
auto
param_row_width
=
param
.
value
().
dims
()[
1
];
auto
param_row_width
=
param
.
value
().
dims
()[
1
];
auto
grad_row_width
=
grad
.
value
().
dims
()[
1
];
auto
grad_row_width
=
grad
.
value
().
dims
()[
1
];
VLOG
(
4
0
)
<<
" param rows: "
<<
param
.
rows
().
size
()
VLOG
(
4
)
<<
" param rows: "
<<
param
.
rows
().
size
()
<<
" param memory rows: "
<<
param
.
value
().
dims
()[
0
]
<<
" param memory rows: "
<<
param
.
value
().
dims
()[
0
]
<<
" grad rows: "
<<
grad
.
rows
().
size
()
<<
" grad rows: "
<<
grad
.
rows
().
size
()
<<
" grad memory rows: "
<<
grad
.
value
().
dims
()[
0
];
<<
" grad memory rows: "
<<
grad
.
value
().
dims
()[
0
];
...
...
paddle/fluid/operators/random_crop_op.h
浏览文件 @
dfd4a111
...
@@ -155,7 +155,7 @@ class RandomCropKernel : public framework::OpKernel<T> {
...
@@ -155,7 +155,7 @@ class RandomCropKernel : public framework::OpKernel<T> {
seed
=
*
cpu_seed
.
data
<
int64_t
>
();
seed
=
*
cpu_seed
.
data
<
int64_t
>
();
}
}
}
else
{
}
else
{
VLOG
(
5
0
)
<<
"WARNING: The input 'Seed' is not initialized, use attribute "
VLOG
(
5
)
<<
"WARNING: The input 'Seed' is not initialized, use attribute "
"'startup_seed' instead."
;
"'startup_seed' instead."
;
seed
=
ctx
.
Attr
<
int
>
(
"startup_seed"
);
seed
=
ctx
.
Attr
<
int
>
(
"startup_seed"
);
}
}
...
...
paddle/fluid/operators/reader/blocking_queue.h
浏览文件 @
dfd4a111
...
@@ -42,7 +42,7 @@ class BlockingQueue {
...
@@ -42,7 +42,7 @@ class BlockingQueue {
std
::
unique_lock
<
std
::
mutex
>
lock
(
mutex_
);
std
::
unique_lock
<
std
::
mutex
>
lock
(
mutex_
);
send_cv_
.
wait
(
lock
,
[
&
]
{
return
queue_
.
size
()
<
capacity_
||
closed_
;
});
send_cv_
.
wait
(
lock
,
[
&
]
{
return
queue_
.
size
()
<
capacity_
||
closed_
;
});
if
(
closed_
)
{
if
(
closed_
)
{
VLOG
(
5
0
)
VLOG
(
5
)
<<
"WARNING: Sending an element to a closed reader::BlokcingQueue."
;
<<
"WARNING: Sending an element to a closed reader::BlokcingQueue."
;
return
false
;
return
false
;
}
}
...
@@ -56,7 +56,7 @@ class BlockingQueue {
...
@@ -56,7 +56,7 @@ class BlockingQueue {
std
::
unique_lock
<
std
::
mutex
>
lock
(
mutex_
);
std
::
unique_lock
<
std
::
mutex
>
lock
(
mutex_
);
send_cv_
.
wait
(
lock
,
[
&
]
{
return
queue_
.
size
()
<
capacity_
||
closed_
;
});
send_cv_
.
wait
(
lock
,
[
&
]
{
return
queue_
.
size
()
<
capacity_
||
closed_
;
});
if
(
closed_
)
{
if
(
closed_
)
{
VLOG
(
5
0
)
VLOG
(
5
)
<<
"WARNING: Sending an element to a closed reader::BlokcingQueue."
;
<<
"WARNING: Sending an element to a closed reader::BlokcingQueue."
;
return
false
;
return
false
;
}
}
...
...
paddle/fluid/operators/reader/create_shuffle_reader_op.cc
浏览文件 @
dfd4a111
...
@@ -26,7 +26,7 @@ class ShuffleReader : public framework::DecoratedReader {
...
@@ -26,7 +26,7 @@ class ShuffleReader : public framework::DecoratedReader {
ShuffleReader
(
const
std
::
shared_ptr
<
ReaderBase
>&
reader
,
size_t
buffer_size
,
ShuffleReader
(
const
std
::
shared_ptr
<
ReaderBase
>&
reader
,
size_t
buffer_size
,
size_t
seed
=
0
)
size_t
seed
=
0
)
:
DecoratedReader
(
reader
),
buffer_size_
(
buffer_size
),
seed_
(
seed
)
{
:
DecoratedReader
(
reader
),
buffer_size_
(
buffer_size
),
seed_
(
seed
)
{
VLOG
(
10
0
)
<<
"Create shuffle reader of "
<<
reader_
;
VLOG
(
10
)
<<
"Create shuffle reader of "
<<
reader_
;
if
(
seed_
==
0
)
{
if
(
seed_
==
0
)
{
std
::
random_device
device
;
std
::
random_device
device
;
seed_
=
device
();
seed_
=
device
();
...
@@ -37,7 +37,7 @@ class ShuffleReader : public framework::DecoratedReader {
...
@@ -37,7 +37,7 @@ class ShuffleReader : public framework::DecoratedReader {
void
ReadNextImpl
(
std
::
vector
<
framework
::
LoDTensor
>*
out
)
override
{
void
ReadNextImpl
(
std
::
vector
<
framework
::
LoDTensor
>*
out
)
override
{
out
->
clear
();
out
->
clear
();
if
(
iteration_pos_
>=
buffer_
.
size
())
{
if
(
iteration_pos_
>=
buffer_
.
size
())
{
VLOG
(
10
0
)
<<
"Resetting shuffle buffer"
;
VLOG
(
10
)
<<
"Resetting shuffle buffer"
;
ReloadBuffer
();
ReloadBuffer
();
if
(
buffer_
.
empty
())
{
if
(
buffer_
.
empty
())
{
return
;
return
;
...
@@ -73,7 +73,7 @@ class ShuffleReader : public framework::DecoratedReader {
...
@@ -73,7 +73,7 @@ class ShuffleReader : public framework::DecoratedReader {
std
::
mt19937
g
(
seed_
);
std
::
mt19937
g
(
seed_
);
std
::
shuffle
(
buffer_
.
begin
(),
buffer_
.
end
(),
g
);
std
::
shuffle
(
buffer_
.
begin
(),
buffer_
.
end
(),
g
);
seed_
=
g
();
// update seed_;
seed_
=
g
();
// update seed_;
VLOG
(
10
0
)
<<
"random buffer size = "
<<
buffer_
.
size
();
VLOG
(
10
)
<<
"random buffer size = "
<<
buffer_
.
size
();
}
}
size_t
buffer_size_
;
size_t
buffer_size_
;
...
...
paddle/fluid/operators/recurrent_op.cc
浏览文件 @
dfd4a111
...
@@ -160,7 +160,7 @@ class RecurrentBase : public framework::OperatorBase {
...
@@ -160,7 +160,7 @@ class RecurrentBase : public framework::OperatorBase {
Callback
callback
)
{
Callback
callback
)
{
PADDLE_ENFORCE_EQ
(
src_vars
.
size
(),
dst_vars
.
size
());
PADDLE_ENFORCE_EQ
(
src_vars
.
size
(),
dst_vars
.
size
());
for
(
size_t
i
=
0
;
i
<
dst_vars
.
size
();
++
i
)
{
for
(
size_t
i
=
0
;
i
<
dst_vars
.
size
();
++
i
)
{
VLOG
(
10
0
)
<<
"Link "
<<
src_vars
[
i
]
<<
" to "
<<
dst_vars
[
i
];
VLOG
(
10
)
<<
"Link "
<<
src_vars
[
i
]
<<
" to "
<<
dst_vars
[
i
];
AccessTensor
(
src_scope
,
src_vars
[
i
],
dst_scope
,
dst_vars
[
i
],
callback
);
AccessTensor
(
src_scope
,
src_vars
[
i
],
dst_scope
,
dst_vars
[
i
],
callback
);
}
}
}
}
...
@@ -176,7 +176,7 @@ class RecurrentBase : public framework::OperatorBase {
...
@@ -176,7 +176,7 @@ class RecurrentBase : public framework::OperatorBase {
Callback
callback
)
{
Callback
callback
)
{
PADDLE_ENFORCE_EQ
(
src_vars
.
size
(),
dst_vars
.
size
());
PADDLE_ENFORCE_EQ
(
src_vars
.
size
(),
dst_vars
.
size
());
for
(
size_t
i
=
0
;
i
<
dst_vars
.
size
();
++
i
)
{
for
(
size_t
i
=
0
;
i
<
dst_vars
.
size
();
++
i
)
{
VLOG
(
10
0
)
<<
"Link "
<<
src_vars
[
i
]
<<
" to "
<<
dst_vars
[
i
];
VLOG
(
10
)
<<
"Link "
<<
src_vars
[
i
]
<<
" to "
<<
dst_vars
[
i
];
AccessTensor
(
src_scope
,
src_vars
[
i
],
dst_scope
,
dst_vars
[
i
],
callback
);
AccessTensor
(
src_scope
,
src_vars
[
i
],
dst_scope
,
dst_vars
[
i
],
callback
);
}
}
}
}
...
@@ -230,7 +230,7 @@ class RecurrentOp : public RecurrentBase {
...
@@ -230,7 +230,7 @@ class RecurrentOp : public RecurrentBase {
void
RunImpl
(
const
framework
::
Scope
&
scope
,
void
RunImpl
(
const
framework
::
Scope
&
scope
,
const
platform
::
Place
&
place
)
const
override
{
const
platform
::
Place
&
place
)
const
override
{
auto
seq_len
=
static_cast
<
size_t
>
(
this
->
GetSequenceLength
(
scope
));
auto
seq_len
=
static_cast
<
size_t
>
(
this
->
GetSequenceLength
(
scope
));
VLOG
(
3
0
)
<<
"Static RNN input sequence length = "
<<
seq_len
;
VLOG
(
3
)
<<
"Static RNN input sequence length = "
<<
seq_len
;
StepScopes
scopes
=
CreateStepScopes
(
scope
,
seq_len
);
StepScopes
scopes
=
CreateStepScopes
(
scope
,
seq_len
);
auto
reverse
=
Attr
<
bool
>
(
kReverse
);
auto
reverse
=
Attr
<
bool
>
(
kReverse
);
...
@@ -241,7 +241,7 @@ class RecurrentOp : public RecurrentBase {
...
@@ -241,7 +241,7 @@ class RecurrentOp : public RecurrentBase {
for
(
size_t
i
=
0
;
i
<
seq_len
;
++
i
)
{
for
(
size_t
i
=
0
;
i
<
seq_len
;
++
i
)
{
size_t
seq_offset
=
reverse
?
seq_len
-
i
-
1
:
i
;
size_t
seq_offset
=
reverse
?
seq_len
-
i
-
1
:
i
;
VLOG
(
3
0
)
<<
"Recurrent operate at the time step "
<<
seq_offset
;
VLOG
(
3
)
<<
"Recurrent operate at the time step "
<<
seq_offset
;
auto
&
cur_scope
=
scopes
.
CurScope
();
auto
&
cur_scope
=
scopes
.
CurScope
();
...
@@ -334,7 +334,7 @@ class RecurrentGradOp : public RecurrentBase {
...
@@ -334,7 +334,7 @@ class RecurrentGradOp : public RecurrentBase {
for
(
size_t
step_id
=
0
;
step_id
<
seq_len
;
++
step_id
)
{
for
(
size_t
step_id
=
0
;
step_id
<
seq_len
;
++
step_id
)
{
size_t
seq_offset
=
reverse
?
step_id
:
seq_len
-
step_id
-
1
;
size_t
seq_offset
=
reverse
?
step_id
:
seq_len
-
step_id
-
1
;
VLOG
(
3
0
)
<<
"Recurrent backward operate at the time step "
<<
seq_offset
;
VLOG
(
3
)
<<
"Recurrent backward operate at the time step "
<<
seq_offset
;
auto
&
cur_scope
=
scopes
.
CurScope
();
auto
&
cur_scope
=
scopes
.
CurScope
();
// Link outside::output_grads --> inside::output_grads
// Link outside::output_grads --> inside::output_grads
// inside::output_grad = outside::output_grad[seq_offset:seq_offset+1]
// inside::output_grad = outside::output_grad[seq_offset:seq_offset+1]
...
@@ -348,11 +348,11 @@ class RecurrentGradOp : public RecurrentBase {
...
@@ -348,11 +348,11 @@ class RecurrentGradOp : public RecurrentBase {
});
});
auto
og_set
=
List2Set
(
Inputs
(
kOutputGrads
));
auto
og_set
=
List2Set
(
Inputs
(
kOutputGrads
));
if
(
VLOG_IS_ON
(
10
0
))
{
if
(
VLOG_IS_ON
(
10
))
{
std
::
ostringstream
sout
;
std
::
ostringstream
sout
;
std
::
copy
(
og_set
.
begin
(),
og_set
.
end
(),
std
::
copy
(
og_set
.
begin
(),
og_set
.
end
(),
std
::
ostream_iterator
<
std
::
string
>
(
sout
,
","
));
std
::
ostream_iterator
<
std
::
string
>
(
sout
,
","
));
VLOG
(
10
0
)
<<
" RNN output gradients = ["
<<
sout
.
str
()
<<
"]"
;
VLOG
(
10
)
<<
" RNN output gradients = ["
<<
sout
.
str
()
<<
"]"
;
}
}
// Link states
// Link states
...
@@ -374,7 +374,7 @@ class RecurrentGradOp : public RecurrentBase {
...
@@ -374,7 +374,7 @@ class RecurrentGradOp : public RecurrentBase {
auto
&
ex_tensor
=
auto
&
ex_tensor
=
ex_scope
.
FindVar
(
ex_grad
)
->
Get
<
framework
::
LoDTensor
>
();
ex_scope
.
FindVar
(
ex_grad
)
->
Get
<
framework
::
LoDTensor
>
();
VLOG
(
10
0
)
<<
" RNN link "
<<
cur_grad
<<
" from "
<<
ex_grad
;
VLOG
(
10
)
<<
" RNN link "
<<
cur_grad
<<
" from "
<<
ex_grad
;
auto
*
cur_grad_var
=
cur_scope
.
Var
(
cur_grad
);
auto
*
cur_grad_var
=
cur_scope
.
Var
(
cur_grad
);
auto
cur_grad_tensor
=
auto
cur_grad_tensor
=
cur_grad_var
->
GetMutable
<
framework
::
LoDTensor
>
();
cur_grad_var
->
GetMutable
<
framework
::
LoDTensor
>
();
...
@@ -382,12 +382,12 @@ class RecurrentGradOp : public RecurrentBase {
...
@@ -382,12 +382,12 @@ class RecurrentGradOp : public RecurrentBase {
}
}
}
}
VLOG
(
5
0
)
<<
"Recurrent memory linking finished "
;
VLOG
(
5
)
<<
"Recurrent memory linking finished "
;
// Run step block with cur_scope
// Run step block with cur_scope
executor
.
Run
(
*
program
,
&
cur_scope
,
block
->
ID
(),
executor
.
Run
(
*
program
,
&
cur_scope
,
block
->
ID
(),
false
/*create_local_scope*/
);
false
/*create_local_scope*/
);
VLOG
(
5
0
)
<<
"executor.Run finished "
;
VLOG
(
5
)
<<
"executor.Run finished "
;
auto
local_var_names
=
LocalVarNames
(
cur_scope
);
auto
local_var_names
=
LocalVarNames
(
cur_scope
);
...
@@ -436,7 +436,7 @@ class RecurrentGradOp : public RecurrentBase {
...
@@ -436,7 +436,7 @@ class RecurrentGradOp : public RecurrentBase {
cur_scope
.
Rename
(
new_inside_name
,
inside_grad_name
);
cur_scope
.
Rename
(
new_inside_name
,
inside_grad_name
);
}
}
}
}
VLOG
(
5
0
)
<<
"Accumulate Parameter finished "
;
VLOG
(
5
)
<<
"Accumulate Parameter finished "
;
// Copy input gradient from inside to outside
// Copy input gradient from inside to outside
// outside::input_grad[seq_offset: seq_offset + 1] = inside::input_grad
// outside::input_grad[seq_offset: seq_offset + 1] = inside::input_grad
...
@@ -455,7 +455,7 @@ class RecurrentGradOp : public RecurrentBase {
...
@@ -455,7 +455,7 @@ class RecurrentGradOp : public RecurrentBase {
auto
dst
=
outside
->
Slice
(
seq_offset
,
seq_offset
+
1
);
auto
dst
=
outside
->
Slice
(
seq_offset
,
seq_offset
+
1
);
framework
::
TensorCopy
(
inside
,
place
,
dev_ctx
,
&
dst
);
framework
::
TensorCopy
(
inside
,
place
,
dev_ctx
,
&
dst
);
});
});
VLOG
(
5
0
)
<<
"Link outside gradient finished "
;
VLOG
(
5
)
<<
"Link outside gradient finished "
;
if
(
step_id
+
1
==
seq_len
)
{
// at_end
if
(
step_id
+
1
==
seq_len
)
{
// at_end
// copy initialize states gradient from inside to outside
// copy initialize states gradient from inside to outside
...
@@ -468,7 +468,7 @@ class RecurrentGradOp : public RecurrentBase {
...
@@ -468,7 +468,7 @@ class RecurrentGradOp : public RecurrentBase {
outside
->
mutable_data
(
place
,
inside
.
type
());
outside
->
mutable_data
(
place
,
inside
.
type
());
framework
::
TensorCopy
(
inside
,
place
,
dev_ctx
,
outside
);
framework
::
TensorCopy
(
inside
,
place
,
dev_ctx
,
outside
);
});
});
VLOG
(
5
0
)
<<
"Link initialize state gradient finished "
;
VLOG
(
5
)
<<
"Link initialize state gradient finished "
;
}
}
scopes
.
Next
();
scopes
.
Next
();
}
}
...
...
paddle/fluid/operators/rnn_memory_helper_op.cc
浏览文件 @
dfd4a111
...
@@ -93,7 +93,7 @@ class RNNMemoryHelperGradOp : public framework::OperatorBase {
...
@@ -93,7 +93,7 @@ class RNNMemoryHelperGradOp : public framework::OperatorBase {
in_grad_var_name
);
in_grad_var_name
);
if
(
out_grad_var
==
nullptr
)
{
if
(
out_grad_var
==
nullptr
)
{
VLOG
(
5
0
)
<<
"Using fill constant 0 as starting gradient"
;
VLOG
(
5
)
<<
"Using fill constant 0 as starting gradient"
;
auto
in_var_name
=
Input
(
"X"
);
auto
in_var_name
=
Input
(
"X"
);
auto
*
in_var
=
scope
.
FindVar
(
in_var_name
);
auto
*
in_var
=
scope
.
FindVar
(
in_var_name
);
auto
&
in_var_tensor
=
in_var
->
Get
<
framework
::
LoDTensor
>
();
auto
&
in_var_tensor
=
in_var
->
Get
<
framework
::
LoDTensor
>
();
...
...
paddle/fluid/operators/save_op.cc
浏览文件 @
dfd4a111
...
@@ -110,7 +110,7 @@ class SaveOp : public framework::OperatorBase {
...
@@ -110,7 +110,7 @@ class SaveOp : public framework::OperatorBase {
lt_var
!=
nullptr
,
lt_var
!=
nullptr
,
"Can not find variable kLookupTablePath for SaveSelectedRows"
);
"Can not find variable kLookupTablePath for SaveSelectedRows"
);
std
::
string
filename
=
lt_var
->
data
();
std
::
string
filename
=
lt_var
->
data
();
VLOG
(
4
0
)
<<
"SaveSelectedRows get File name: "
<<
filename
;
VLOG
(
4
)
<<
"SaveSelectedRows get File name: "
<<
filename
;
MkDirRecursively
(
DirName
(
filename
).
c_str
());
MkDirRecursively
(
DirName
(
filename
).
c_str
());
...
...
paddle/fluid/operators/sequence_ops/sequence_mask_op.h
浏览文件 @
dfd4a111
此差异已折叠。
点击以展开。
paddle/fluid/operators/sum_mkldnn_op.cc
浏览文件 @
dfd4a111
此差异已折叠。
点击以展开。
paddle/fluid/operators/sum_op.cc
浏览文件 @
dfd4a111
此差异已折叠。
点击以展开。
paddle/fluid/operators/tensorrt/tensorrt_engine_op.h
浏览文件 @
dfd4a111
此差异已折叠。
点击以展开。
paddle/fluid/platform/device_tracer.cc
浏览文件 @
dfd4a111
此差异已折叠。
点击以展开。
paddle/fluid/platform/dynload/dynamic_loader.cc
浏览文件 @
dfd4a111
此差异已折叠。
点击以展开。
paddle/fluid/platform/gpu_info.cc
浏览文件 @
dfd4a111
此差异已折叠。
点击以展开。
paddle/fluid/platform/init.cc
浏览文件 @
dfd4a111
此差异已折叠。
点击以展开。
paddle/fluid/platform/nccl_helper.h
浏览文件 @
dfd4a111
此差异已折叠。
点击以展开。
paddle/fluid/pybind/protobuf.cc
浏览文件 @
dfd4a111
此差异已折叠。
点击以展开。
paddle/fluid/train/demo/demo_trainer.cc
浏览文件 @
dfd4a111
此差异已折叠。
点击以展开。
paddle/legacy/pserver/ParameterClient2.cpp
浏览文件 @
dfd4a111
此差异已折叠。
点击以展开。
paddle/testing/TestUtil.cpp
浏览文件 @
dfd4a111
此差异已折叠。
点击以展开。
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录