Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
Paddle
提交
f5532877
P
Paddle
项目概览
PaddlePaddle
/
Paddle
接近 2 年 前同步成功
通知
2323
Star
20933
Fork
5424
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1423
列表
看板
标记
里程碑
合并请求
543
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1,423
Issue
1,423
列表
看板
标记
里程碑
合并请求
543
合并请求
543
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
f5532877
编写于
12月 20, 2018
作者:
T
tensor-tang
浏览文件
操作
浏览文件
下载
差异文件
Merge remote-tracking branch 'ups/develop' into refine/jit
上级
a369c802
3babc801
变更
45
隐藏空白更改
内联
并排
Showing
45 changed file
with
1057 addition
and
629 deletion
+1057
-629
paddle/fluid/API.spec
paddle/fluid/API.spec
+16
-0
paddle/fluid/framework/details/build_strategy.cc
paddle/fluid/framework/details/build_strategy.cc
+1
-6
paddle/fluid/framework/details/build_strategy.h
paddle/fluid/framework/details/build_strategy.h
+7
-8
paddle/fluid/framework/details/multi_devices_graph_pass.cc
paddle/fluid/framework/details/multi_devices_graph_pass.cc
+5
-8
paddle/fluid/framework/details/multi_devices_graph_pass.h
paddle/fluid/framework/details/multi_devices_graph_pass.h
+2
-2
paddle/fluid/framework/details/scale_loss_grad_op_handle.cc
paddle/fluid/framework/details/scale_loss_grad_op_handle.cc
+44
-17
paddle/fluid/framework/details/scale_loss_grad_op_handle.h
paddle/fluid/framework/details/scale_loss_grad_op_handle.h
+3
-2
paddle/fluid/framework/op_desc.cc
paddle/fluid/framework/op_desc.cc
+108
-24
paddle/fluid/framework/operator.cc
paddle/fluid/framework/operator.cc
+142
-46
paddle/fluid/framework/parallel_executor.cc
paddle/fluid/framework/parallel_executor.cc
+4
-5
paddle/fluid/framework/parallel_executor.h
paddle/fluid/framework/parallel_executor.h
+0
-1
paddle/fluid/framework/shape_inference.cc
paddle/fluid/framework/shape_inference.cc
+0
-98
paddle/fluid/framework/shape_inference.h
paddle/fluid/framework/shape_inference.h
+16
-27
paddle/fluid/operators/controlflow/while_op.cc
paddle/fluid/operators/controlflow/while_op.cc
+29
-14
paddle/fluid/operators/conv_mkldnn_op.cc
paddle/fluid/operators/conv_mkldnn_op.cc
+14
-8
paddle/fluid/operators/distributed/grpc_client.cc
paddle/fluid/operators/distributed/grpc_client.cc
+10
-1
paddle/fluid/operators/elementwise/elementwise_div_op.cu
paddle/fluid/operators/elementwise/elementwise_div_op.cu
+5
-0
paddle/fluid/operators/elementwise/elementwise_mul_op.cu
paddle/fluid/operators/elementwise/elementwise_mul_op.cu
+12
-10
paddle/fluid/operators/fill_zeros_like_op.cu.cc
paddle/fluid/operators/fill_zeros_like_op.cu.cc
+3
-0
paddle/fluid/operators/metrics/accuracy_op.cu
paddle/fluid/operators/metrics/accuracy_op.cu
+5
-3
paddle/fluid/operators/optimizers/momentum_op.cu
paddle/fluid/operators/optimizers/momentum_op.cu
+4
-1
paddle/fluid/operators/optimizers/momentum_op.h
paddle/fluid/operators/optimizers/momentum_op.h
+4
-2
paddle/fluid/operators/top_k_op.cu
paddle/fluid/operators/top_k_op.cu
+9
-6
paddle/fluid/platform/nccl_helper.h
paddle/fluid/platform/nccl_helper.h
+3
-0
paddle/fluid/pybind/pybind.cc
paddle/fluid/pybind/pybind.cc
+0
-1
python/paddle/fluid/contrib/__init__.py
python/paddle/fluid/contrib/__init__.py
+3
-0
python/paddle/fluid/contrib/utils/__init__.py
python/paddle/fluid/contrib/utils/__init__.py
+5
-4
python/paddle/fluid/contrib/utils/hdfs_utils.py
python/paddle/fluid/contrib/utils/hdfs_utils.py
+163
-138
python/paddle/fluid/contrib/utils/lookup_table_utils.py
python/paddle/fluid/contrib/utils/lookup_table_utils.py
+125
-58
python/paddle/fluid/data_feeder.py
python/paddle/fluid/data_feeder.py
+2
-0
python/paddle/fluid/initializer.py
python/paddle/fluid/initializer.py
+50
-4
python/paddle/fluid/layers/learning_rate_scheduler.py
python/paddle/fluid/layers/learning_rate_scheduler.py
+100
-75
python/paddle/fluid/layers/nn.py
python/paddle/fluid/layers/nn.py
+6
-2
python/paddle/fluid/optimizer.py
python/paddle/fluid/optimizer.py
+13
-4
python/paddle/fluid/parallel_executor.py
python/paddle/fluid/parallel_executor.py
+38
-41
python/paddle/fluid/tests/unittests/op_test.py
python/paddle/fluid/tests/unittests/op_test.py
+2
-0
python/paddle/fluid/tests/unittests/test_accuracy_op.py
python/paddle/fluid/tests/unittests/test_accuracy_op.py
+15
-2
python/paddle/fluid/tests/unittests/test_conv2d_mkldnn_op.py
python/paddle/fluid/tests/unittests/test_conv2d_mkldnn_op.py
+19
-1
python/paddle/fluid/tests/unittests/test_elementwise_div_op.py
...n/paddle/fluid/tests/unittests/test_elementwise_div_op.py
+23
-2
python/paddle/fluid/tests/unittests/test_elementwise_mul_op.py
...n/paddle/fluid/tests/unittests/test_elementwise_mul_op.py
+5
-0
python/paddle/fluid/tests/unittests/test_fill_zeros_like_op.py
...n/paddle/fluid/tests/unittests/test_fill_zeros_like_op.py
+11
-1
python/paddle/fluid/tests/unittests/test_learning_rate_scheduler.py
...dle/fluid/tests/unittests/test_learning_rate_scheduler.py
+1
-1
python/paddle/fluid/tests/unittests/test_momentum_op.py
python/paddle/fluid/tests/unittests/test_momentum_op.py
+17
-4
python/paddle/fluid/tests/unittests/test_top_k_op.py
python/paddle/fluid/tests/unittests/test_top_k_op.py
+12
-1
python/setup.py.in
python/setup.py.in
+1
-1
未找到文件。
paddle/fluid/API.spec
浏览文件 @
f5532877
...
@@ -350,6 +350,22 @@ paddle.fluid.contrib.QuantizeTranspiler.__init__ ArgSpec(args=['self', 'weight_b
...
@@ -350,6 +350,22 @@ paddle.fluid.contrib.QuantizeTranspiler.__init__ ArgSpec(args=['self', 'weight_b
paddle.fluid.contrib.QuantizeTranspiler.convert_to_int8 ArgSpec(args=['self', 'program', 'place', 'scope'], varargs=None, keywords=None, defaults=(None,))
paddle.fluid.contrib.QuantizeTranspiler.convert_to_int8 ArgSpec(args=['self', 'program', 'place', 'scope'], varargs=None, keywords=None, defaults=(None,))
paddle.fluid.contrib.QuantizeTranspiler.freeze_program ArgSpec(args=['self', 'program', 'place', 'fuse_bn', 'scope'], varargs=None, keywords=None, defaults=(False, None))
paddle.fluid.contrib.QuantizeTranspiler.freeze_program ArgSpec(args=['self', 'program', 'place', 'fuse_bn', 'scope'], varargs=None, keywords=None, defaults=(False, None))
paddle.fluid.contrib.QuantizeTranspiler.training_transpile ArgSpec(args=['self', 'program', 'startup_program'], varargs=None, keywords=None, defaults=(None, None))
paddle.fluid.contrib.QuantizeTranspiler.training_transpile ArgSpec(args=['self', 'program', 'startup_program'], varargs=None, keywords=None, defaults=(None, None))
paddle.fluid.contrib.load_persistables_for_increment ArgSpec(args=['dirname', 'executor', 'program', 'lookup_table_var', 'lookup_table_var_path'], varargs=None, keywords=None, defaults=None)
paddle.fluid.contrib.load_persistables_for_inference ArgSpec(args=['dirname', 'executor', 'program', 'lookup_table_var_name'], varargs=None, keywords=None, defaults=None)
paddle.fluid.contrib.convert_dist_to_sparse_program ArgSpec(args=['program'], varargs=None, keywords=None, defaults=None)
paddle.fluid.contrib.HDFSClient.__init__ ArgSpec(args=['self', 'hadoop_home', 'configs'], varargs=None, keywords=None, defaults=None)
paddle.fluid.contrib.HDFSClient.delete ArgSpec(args=['self', 'hdfs_path'], varargs=None, keywords=None, defaults=None)
paddle.fluid.contrib.HDFSClient.download ArgSpec(args=['self', 'hdfs_path', 'local_path', 'overwrite', 'unzip'], varargs=None, keywords=None, defaults=(False, False))
paddle.fluid.contrib.HDFSClient.is_dir ArgSpec(args=['self', 'hdfs_path'], varargs=None, keywords=None, defaults=(None,))
paddle.fluid.contrib.HDFSClient.is_exist ArgSpec(args=['self', 'hdfs_path'], varargs=None, keywords=None, defaults=(None,))
paddle.fluid.contrib.HDFSClient.ls ArgSpec(args=['self', 'hdfs_path'], varargs=None, keywords=None, defaults=None)
paddle.fluid.contrib.HDFSClient.lsr ArgSpec(args=['self', 'hdfs_path', 'only_file', 'sort'], varargs=None, keywords=None, defaults=(True, True))
paddle.fluid.contrib.HDFSClient.make_local_dirs ArgSpec(args=['local_path'], varargs=None, keywords=None, defaults=None)
paddle.fluid.contrib.HDFSClient.makedirs ArgSpec(args=['self', 'hdfs_path'], varargs=None, keywords=None, defaults=None)
paddle.fluid.contrib.HDFSClient.rename ArgSpec(args=['self', 'hdfs_src_path', 'hdfs_dst_path', 'overwrite'], varargs=None, keywords=None, defaults=(False,))
paddle.fluid.contrib.HDFSClient.upload ArgSpec(args=['self', 'hdfs_path', 'local_path', 'overwrite', 'retry_times'], varargs=None, keywords=None, defaults=(False, 5))
paddle.fluid.contrib.multi_download ArgSpec(args=['client', 'hdfs_path', 'local_path', 'trainer_id', 'trainers', 'multi_processes'], varargs=None, keywords=None, defaults=(5,))
paddle.fluid.contrib.multi_upload ArgSpec(args=['client', 'hdfs_path', 'local_path', 'multi_processes', 'overwrite', 'sync'], varargs=None, keywords=None, defaults=(5, False, True))
paddle.fluid.transpiler.DistributeTranspiler.__init__ ArgSpec(args=['self', 'config'], varargs=None, keywords=None, defaults=(None,))
paddle.fluid.transpiler.DistributeTranspiler.__init__ ArgSpec(args=['self', 'config'], varargs=None, keywords=None, defaults=(None,))
paddle.fluid.transpiler.DistributeTranspiler.get_pserver_program ArgSpec(args=['self', 'endpoint'], varargs=None, keywords=None, defaults=None)
paddle.fluid.transpiler.DistributeTranspiler.get_pserver_program ArgSpec(args=['self', 'endpoint'], varargs=None, keywords=None, defaults=None)
paddle.fluid.transpiler.DistributeTranspiler.get_pserver_programs ArgSpec(args=['self', 'endpoint'], varargs=None, keywords=None, defaults=None)
paddle.fluid.transpiler.DistributeTranspiler.get_pserver_programs ArgSpec(args=['self', 'endpoint'], varargs=None, keywords=None, defaults=None)
...
...
paddle/fluid/framework/details/build_strategy.cc
浏览文件 @
f5532877
...
@@ -131,9 +131,7 @@ std::shared_ptr<ir::PassBuilder> BuildStrategy::CreatePassesFromStrategy(
...
@@ -131,9 +131,7 @@ std::shared_ptr<ir::PassBuilder> BuildStrategy::CreatePassesFromStrategy(
std
::
unique_ptr
<
ir
::
Graph
>
BuildStrategy
::
Apply
(
std
::
unique_ptr
<
ir
::
Graph
>
BuildStrategy
::
Apply
(
const
ProgramDesc
&
main_program
,
const
std
::
vector
<
platform
::
Place
>
&
places
,
const
ProgramDesc
&
main_program
,
const
std
::
vector
<
platform
::
Place
>
&
places
,
const
std
::
string
&
loss_var_name
,
const
std
::
string
&
loss_var_name
,
const
std
::
vector
<
Scope
*>
&
local_scopes
,
const
std
::
unordered_set
<
std
::
string
>
&
param_names
,
const
std
::
vector
<
Scope
*>
&
local_scopes
,
#if defined(PADDLE_WITH_CUDA) && !defined(_WIN32)
#if defined(PADDLE_WITH_CUDA) && !defined(_WIN32)
const
bool
use_cuda
,
platform
::
NCCLContextMap
*
nccl_ctxs
)
const
{
const
bool
use_cuda
,
platform
::
NCCLContextMap
*
nccl_ctxs
)
const
{
#else
#else
...
@@ -149,9 +147,6 @@ std::unique_ptr<ir::Graph> BuildStrategy::Apply(
...
@@ -149,9 +147,6 @@ std::unique_ptr<ir::Graph> BuildStrategy::Apply(
pass
->
SetNotOwned
<
const
std
::
vector
<
platform
::
Place
>>
(
"places"
,
&
places
);
pass
->
SetNotOwned
<
const
std
::
vector
<
platform
::
Place
>>
(
"places"
,
&
places
);
pass
->
Erase
(
"loss_var_name"
);
pass
->
Erase
(
"loss_var_name"
);
pass
->
SetNotOwned
<
const
std
::
string
>
(
"loss_var_name"
,
&
loss_var_name
);
pass
->
SetNotOwned
<
const
std
::
string
>
(
"loss_var_name"
,
&
loss_var_name
);
pass
->
Erase
(
"params"
);
pass
->
SetNotOwned
<
const
std
::
unordered_set
<
std
::
string
>>
(
"params"
,
&
param_names
);
pass
->
Erase
(
"local_scopes"
);
pass
->
Erase
(
"local_scopes"
);
pass
->
SetNotOwned
<
const
std
::
vector
<
Scope
*>>
(
"local_scopes"
,
pass
->
SetNotOwned
<
const
std
::
vector
<
Scope
*>>
(
"local_scopes"
,
&
local_scopes
);
&
local_scopes
);
...
...
paddle/fluid/framework/details/build_strategy.h
浏览文件 @
f5532877
...
@@ -106,16 +106,15 @@ struct BuildStrategy {
...
@@ -106,16 +106,15 @@ struct BuildStrategy {
// Apply the passes built by the pass_builder_. The passes will be
// Apply the passes built by the pass_builder_. The passes will be
// applied to the Program and output an ir::Graph.
// applied to the Program and output an ir::Graph.
std
::
unique_ptr
<
ir
::
Graph
>
Apply
(
std
::
unique_ptr
<
ir
::
Graph
>
Apply
(
const
ProgramDesc
&
main_program
,
const
ProgramDesc
&
main_program
,
const
std
::
vector
<
platform
::
Place
>
&
places
,
const
std
::
vector
<
platform
::
Place
>
&
places
,
const
std
::
string
&
loss_var_name
,
const
std
::
string
&
loss_var_name
,
const
std
::
vector
<
Scope
*>
&
local_scopes
,
const
std
::
unordered_set
<
std
::
string
>
&
param_names
,
const
std
::
vector
<
Scope
*>
&
local_scopes
,
#if defined(PADDLE_WITH_CUDA) && !defined(_WIN32)
#if defined(PADDLE_WITH_CUDA) && !defined(_WIN32)
const
bool
use_cuda
,
platform
::
NCCLContextMap
*
nccl_ctxs
)
const
;
const
bool
use_cuda
,
platform
::
NCCLContextMap
*
nccl_ctxs
)
const
;
#else
#else
const
bool
use_cuda
)
const
;
const
bool
use_cuda
)
const
;
#endif
#endif
private:
private:
...
...
paddle/fluid/framework/details/multi_devices_graph_pass.cc
浏览文件 @
f5532877
...
@@ -130,7 +130,6 @@ void AddOutputToLeafOps(ir::Graph *graph) {
...
@@ -130,7 +130,6 @@ void AddOutputToLeafOps(ir::Graph *graph) {
static
const
char
kLossVarName
[]
=
"loss_var_name"
;
static
const
char
kLossVarName
[]
=
"loss_var_name"
;
static
const
char
kPlaces
[]
=
"places"
;
static
const
char
kPlaces
[]
=
"places"
;
static
const
char
kParams
[]
=
"params"
;
static
const
char
kLocalScopes
[]
=
"local_scopes"
;
static
const
char
kLocalScopes
[]
=
"local_scopes"
;
static
const
char
kStrategy
[]
=
"strategy"
;
static
const
char
kStrategy
[]
=
"strategy"
;
static
const
char
kNumTrainers
[]
=
"num_trainers"
;
static
const
char
kNumTrainers
[]
=
"num_trainers"
;
...
@@ -147,9 +146,6 @@ void MultiDevSSAGraphBuilder::Init() const {
...
@@ -147,9 +146,6 @@ void MultiDevSSAGraphBuilder::Init() const {
nccl_ctxs_
=
&
Get
<
platform
::
NCCLContextMap
>
(
"nccl_ctxs"
);
nccl_ctxs_
=
&
Get
<
platform
::
NCCLContextMap
>
(
"nccl_ctxs"
);
#endif
#endif
for
(
auto
&
p
:
Get
<
const
std
::
unordered_set
<
std
::
string
>>
(
kParams
))
{
grad_names_
.
insert
(
GradVarName
(
p
));
}
balance_vars_
.
resize
(
places_
.
size
(),
0
);
balance_vars_
.
resize
(
places_
.
size
(),
0
);
if
(
strategy_
.
enable_data_balance_
&&
places_
.
size
()
==
1
)
{
if
(
strategy_
.
enable_data_balance_
&&
places_
.
size
()
==
1
)
{
LOG
(
WARNING
)
<<
"It is no need to enable data balance when there is only "
LOG
(
WARNING
)
<<
"It is no need to enable data balance when there is only "
...
@@ -359,7 +355,9 @@ std::unique_ptr<ir::Graph> MultiDevSSAGraphBuilder::ApplyImpl(
...
@@ -359,7 +355,9 @@ std::unique_ptr<ir::Graph> MultiDevSSAGraphBuilder::ApplyImpl(
BuildStrategy
::
GradientScaleStrategy
::
kCustomized
)
{
BuildStrategy
::
GradientScaleStrategy
::
kCustomized
)
{
// TODO(paddle-dev): Why is there no input for this op_handle?
// TODO(paddle-dev): Why is there no input for this op_handle?
auto
loss_grad_name
=
node
->
Op
()
->
OutputArgumentNames
()[
0
];
auto
loss_grad_name
=
node
->
Op
()
->
OutputArgumentNames
()[
0
];
CreateScaleLossGradOp
(
&
result
,
loss_grad_name
,
node
->
outputs
[
0
]);
auto
out_dtype
=
all_vars_
.
at
(
loss_grad_name
)
->
GetDataType
();
CreateScaleLossGradOp
(
&
result
,
loss_grad_name
,
node
->
outputs
[
0
],
out_dtype
);
}
}
// This assumes the backward generating code will ensure IsScaleLossOp
// This assumes the backward generating code will ensure IsScaleLossOp
// is true only for the op that scale the final scalar loss.
// is true only for the op that scale the final scalar loss.
...
@@ -662,13 +660,13 @@ int MultiDevSSAGraphBuilder::GetVarDeviceID(
...
@@ -662,13 +660,13 @@ int MultiDevSSAGraphBuilder::GetVarDeviceID(
void
MultiDevSSAGraphBuilder
::
CreateScaleLossGradOp
(
void
MultiDevSSAGraphBuilder
::
CreateScaleLossGradOp
(
ir
::
Graph
*
result
,
const
std
::
string
&
loss_grad_name
,
ir
::
Graph
*
result
,
const
std
::
string
&
loss_grad_name
,
ir
::
Node
*
out_var_node
)
const
{
ir
::
Node
*
out_var_node
,
proto
::
VarType
::
Type
dtype
)
const
{
for
(
size_t
i
=
0
;
i
<
places_
.
size
();
++
i
)
{
for
(
size_t
i
=
0
;
i
<
places_
.
size
();
++
i
)
{
// Insert ScaleCost OpHandle
// Insert ScaleCost OpHandle
auto
*
dev_ctx
=
platform
::
DeviceContextPool
::
Instance
().
Get
(
places_
[
i
]);
auto
*
dev_ctx
=
platform
::
DeviceContextPool
::
Instance
().
Get
(
places_
[
i
]);
auto
*
op_handle
=
new
ScaleLossGradOpHandle
(
auto
*
op_handle
=
new
ScaleLossGradOpHandle
(
result
->
CreateEmptyNode
(
"scale_loss_grad"
,
ir
::
Node
::
Type
::
kOperation
),
result
->
CreateEmptyNode
(
"scale_loss_grad"
,
ir
::
Node
::
Type
::
kOperation
),
local_scopes_
.
size
(),
local_scopes_
[
i
],
places_
[
i
],
dev_ctx
);
local_scopes_
.
size
(),
local_scopes_
[
i
],
places_
[
i
],
dev_ctx
,
dtype
);
result
->
Get
<
GraphOps
>
(
kGraphOps
).
emplace_back
(
op_handle
);
result
->
Get
<
GraphOps
>
(
kGraphOps
).
emplace_back
(
op_handle
);
// FIXME: Currently ScaleLossGradOp only use device_count as scale
// FIXME: Currently ScaleLossGradOp only use device_count as scale
...
@@ -896,7 +894,6 @@ REGISTER_PASS(multi_devices_pass,
...
@@ -896,7 +894,6 @@ REGISTER_PASS(multi_devices_pass,
paddle
::
framework
::
details
::
MultiDevSSAGraphBuilder
)
paddle
::
framework
::
details
::
MultiDevSSAGraphBuilder
)
.
RequirePassAttr
(
paddle
::
framework
::
details
::
kLossVarName
)
.
RequirePassAttr
(
paddle
::
framework
::
details
::
kLossVarName
)
.
RequirePassAttr
(
paddle
::
framework
::
details
::
kPlaces
)
.
RequirePassAttr
(
paddle
::
framework
::
details
::
kPlaces
)
.
RequirePassAttr
(
paddle
::
framework
::
details
::
kParams
)
.
RequirePassAttr
(
paddle
::
framework
::
details
::
kLocalScopes
)
.
RequirePassAttr
(
paddle
::
framework
::
details
::
kLocalScopes
)
.
RequirePassAttr
(
paddle
::
framework
::
details
::
kStrategy
)
.
RequirePassAttr
(
paddle
::
framework
::
details
::
kStrategy
)
.
RequirePassAttr
(
paddle
::
framework
::
details
::
kNumTrainers
);
.
RequirePassAttr
(
paddle
::
framework
::
details
::
kNumTrainers
);
paddle/fluid/framework/details/multi_devices_graph_pass.h
浏览文件 @
f5532877
...
@@ -68,7 +68,8 @@ class MultiDevSSAGraphBuilder : public ir::Pass {
...
@@ -68,7 +68,8 @@ class MultiDevSSAGraphBuilder : public ir::Pass {
void
CreateScaleLossGradOp
(
ir
::
Graph
*
result
,
void
CreateScaleLossGradOp
(
ir
::
Graph
*
result
,
const
std
::
string
&
loss_grad_name
,
const
std
::
string
&
loss_grad_name
,
ir
::
Node
*
out_var_node
)
const
;
ir
::
Node
*
out_var_node
,
proto
::
VarType
::
Type
dtype
)
const
;
VarHandle
*
CreateReduceOp
(
ir
::
Graph
*
result
,
const
std
::
string
&
og
,
VarHandle
*
CreateReduceOp
(
ir
::
Graph
*
result
,
const
std
::
string
&
og
,
int
dst_dev_id
)
const
;
int
dst_dev_id
)
const
;
...
@@ -102,7 +103,6 @@ class MultiDevSSAGraphBuilder : public ir::Pass {
...
@@ -102,7 +103,6 @@ class MultiDevSSAGraphBuilder : public ir::Pass {
mutable
std
::
string
loss_var_name_
;
mutable
std
::
string
loss_var_name_
;
mutable
std
::
vector
<
platform
::
Place
>
places_
;
mutable
std
::
vector
<
platform
::
Place
>
places_
;
mutable
std
::
vector
<
Scope
*>
local_scopes_
;
mutable
std
::
vector
<
Scope
*>
local_scopes_
;
mutable
std
::
unordered_set
<
std
::
string
>
grad_names_
;
mutable
BuildStrategy
strategy_
;
mutable
BuildStrategy
strategy_
;
mutable
std
::
unordered_map
<
std
::
string
,
VarDesc
*>
all_vars_
;
mutable
std
::
unordered_map
<
std
::
string
,
VarDesc
*>
all_vars_
;
...
...
paddle/fluid/framework/details/scale_loss_grad_op_handle.cc
浏览文件 @
f5532877
...
@@ -22,39 +22,66 @@ namespace details {
...
@@ -22,39 +22,66 @@ namespace details {
ScaleLossGradOpHandle
::
ScaleLossGradOpHandle
(
ir
::
Node
*
node
,
size_t
num_dev
,
ScaleLossGradOpHandle
::
ScaleLossGradOpHandle
(
ir
::
Node
*
node
,
size_t
num_dev
,
Scope
*
scope
,
Scope
*
scope
,
platform
::
Place
place
,
platform
::
Place
place
,
platform
::
DeviceContext
*
dev_ctx
)
platform
::
DeviceContext
*
dev_ctx
,
proto
::
VarType
::
Type
dtype
)
:
OpHandleBase
(
node
),
:
OpHandleBase
(
node
),
coeff_
(
static_cast
<
float
>
(
1.0
/
num_dev
)),
coeff_
(
static_cast
<
float
>
(
1.0
/
num_dev
)),
scope_
(
scope
),
scope_
(
scope
),
place_
(
place
)
{
place_
(
place
),
out_dtype_
(
dtype
)
{
this
->
SetDeviceContext
(
place_
,
dev_ctx
);
this
->
SetDeviceContext
(
place_
,
dev_ctx
);
}
}
ScaleLossGradOpHandle
::~
ScaleLossGradOpHandle
()
{}
ScaleLossGradOpHandle
::~
ScaleLossGradOpHandle
()
{}
struct
ScaleLossGradFunctor
{
float
coeff_
;
Tensor
*
out_
;
platform
::
Place
place_
;
OpHandleBase
*
op_handle_
;
proto
::
VarType
::
Type
out_dtype_
;
platform
::
DeviceContext
*
ctx_
;
ScaleLossGradFunctor
(
float
coeff
,
Tensor
*
out
,
platform
::
Place
place
,
OpHandleBase
*
op_handle
,
proto
::
VarType
::
Type
dtype
,
platform
::
DeviceContext
*
ctx
)
:
coeff_
(
coeff
),
out_
(
out
),
place_
(
place
),
out_dtype_
(
dtype
),
ctx_
(
ctx
)
{}
template
<
typename
OutT
>
void
apply
()
const
{
auto
*
out_data
=
out_
->
mutable_data
<
OutT
>
(
place_
);
if
(
platform
::
is_cpu_place
(
place_
))
{
*
out_data
=
static_cast
<
OutT
>
(
coeff_
);
}
else
{
#ifdef PADDLE_WITH_CUDA
OutT
cast_coeff
=
static_cast
<
OutT
>
(
coeff_
);
auto
stream
=
static_cast
<
platform
::
CUDADeviceContext
*>
(
ctx_
)
->
stream
();
memory
::
Copy
(
boost
::
get
<
platform
::
CUDAPlace
>
(
place_
),
out_data
,
platform
::
CPUPlace
(),
&
cast_coeff
,
SizeOfType
(
out_dtype_
),
stream
);
VLOG
(
10
)
<<
place_
<<
"RUN Scale loss grad op"
;
#endif
}
}
};
void
ScaleLossGradOpHandle
::
RunImpl
()
{
void
ScaleLossGradOpHandle
::
RunImpl
()
{
// Doesn't wait any event
// Doesn't wait any event
std
::
string
var_name
=
static_cast
<
VarHandle
*>
(
this
->
outputs_
[
0
])
->
name_
;
std
::
string
var_name
=
static_cast
<
VarHandle
*>
(
this
->
outputs_
[
0
])
->
name_
;
auto
&
local_scope
=
*
scope_
->
FindVar
(
kLocalExecScopeName
)
->
Get
<
Scope
*>
();
auto
&
local_scope
=
*
scope_
->
FindVar
(
kLocalExecScopeName
)
->
Get
<
Scope
*>
();
float
*
tmp
=
local_scope
.
FindVar
(
var_name
)
auto
*
tensor
=
local_scope
.
FindVar
(
var_name
)
->
GetMutable
<
LoDTensor
>
();
->
GetMutable
<
LoDTensor
>
()
tensor
->
Resize
(
make_ddim
({
1
}));
->
mutable_data
<
float
>
(
make_ddim
({
1
}),
place_
);
if
(
platform
::
is_cpu_place
(
place_
))
{
*
tmp
=
coeff_
;
}
else
{
#ifdef PADDLE_WITH_CUDA
#ifdef PADDLE_WITH_CUDA
this
->
RunAndRecordEvent
([
&
]
{
ScaleLossGradFunctor
func
(
coeff_
,
tensor
,
place_
,
this
,
out_dtype_
,
auto
stream
=
static_cast
<
platform
::
CUDADeviceContext
*>
(
this
->
dev_ctxes_
.
at
(
place_
));
this
->
dev_ctxes_
.
at
(
place_
))
this
->
RunAndRecordEvent
([
&
]
{
framework
::
VisitDataType
(
out_dtype_
,
func
);
});
->
stream
();
#else
memory
::
Copy
(
boost
::
get
<
platform
::
CUDAPlace
>
(
place_
),
tmp
,
ScaleLossGradFunctor
func
(
coeff_
,
tensor
,
place_
,
this
,
out_dtype_
,
nullptr
);
platform
::
CPUPlace
(),
&
coeff_
,
sizeof
(
float
),
stream
);
framework
::
VisitDataType
(
out_dtype_
,
func
);
VLOG
(
10
)
<<
place_
<<
"RUN Scale loss grad op"
;
});
#endif
#endif
}
}
}
std
::
string
ScaleLossGradOpHandle
::
Name
()
const
{
return
"Scale LossGrad"
;
}
std
::
string
ScaleLossGradOpHandle
::
Name
()
const
{
return
"Scale LossGrad"
;
}
...
...
paddle/fluid/framework/details/scale_loss_grad_op_handle.h
浏览文件 @
f5532877
...
@@ -26,8 +26,8 @@ namespace details {
...
@@ -26,8 +26,8 @@ namespace details {
struct
ScaleLossGradOpHandle
:
public
OpHandleBase
{
struct
ScaleLossGradOpHandle
:
public
OpHandleBase
{
ScaleLossGradOpHandle
(
ir
::
Node
*
node
,
size_t
num_dev
,
Scope
*
scope
,
ScaleLossGradOpHandle
(
ir
::
Node
*
node
,
size_t
num_dev
,
Scope
*
scope
,
platform
::
Place
place
,
platform
::
Place
place
,
platform
::
DeviceContext
*
context
,
p
latform
::
DeviceContext
*
context
);
p
roto
::
VarType
::
Type
dtype
);
~
ScaleLossGradOpHandle
()
final
;
~
ScaleLossGradOpHandle
()
final
;
...
@@ -40,6 +40,7 @@ struct ScaleLossGradOpHandle : public OpHandleBase {
...
@@ -40,6 +40,7 @@ struct ScaleLossGradOpHandle : public OpHandleBase {
float
coeff_
;
float
coeff_
;
Scope
*
scope_
;
Scope
*
scope_
;
platform
::
Place
place_
;
platform
::
Place
place_
;
proto
::
VarType
::
Type
out_dtype_
;
};
};
}
// namespace details
}
// namespace details
...
...
paddle/fluid/framework/op_desc.cc
浏览文件 @
f5532877
...
@@ -110,22 +110,125 @@ class CompileTimeInferShapeContext : public InferShapeContext {
...
@@ -110,22 +110,125 @@ class CompileTimeInferShapeContext : public InferShapeContext {
}
}
}
}
std
::
vector
<
InferShapeVarPtr
>
GetInputVarPtrs
(
const
std
::
string
&
name
)
override
{
const
std
::
vector
<
std
::
string
>
arg_names
=
Inputs
(
name
);
std
::
vector
<
InferShapeVarPtr
>
res
;
res
.
reserve
(
arg_names
.
size
());
std
::
transform
(
arg_names
.
begin
(),
arg_names
.
end
(),
std
::
back_inserter
(
res
),
[
this
](
const
std
::
string
&
name
)
{
return
block_
.
FindVarRecursive
(
name
);
});
return
res
;
}
std
::
vector
<
InferShapeVarPtr
>
GetOutputVarPtrs
(
const
std
::
string
&
name
)
override
{
const
std
::
vector
<
std
::
string
>
arg_names
=
Outputs
(
name
);
std
::
vector
<
InferShapeVarPtr
>
res
;
res
.
reserve
(
arg_names
.
size
());
std
::
transform
(
arg_names
.
begin
(),
arg_names
.
end
(),
std
::
back_inserter
(
res
),
[
this
](
const
std
::
string
&
name
)
{
return
block_
.
FindVarRecursive
(
name
);
});
return
res
;
}
DDim
GetInputDim
(
const
std
::
string
&
name
)
const
override
{
const
std
::
vector
<
std
::
string
>
&
arg_names
=
Inputs
(
name
);
PADDLE_ENFORCE_EQ
(
arg_names
.
size
(),
1UL
,
"Input(%s) should hold one element, but now it holds %d"
,
name
,
arg_names
.
size
());
return
this
->
GetDim
(
arg_names
[
0
]);
}
std
::
vector
<
DDim
>
GetInputsDim
(
const
std
::
string
&
name
)
const
override
{
const
std
::
vector
<
std
::
string
>
&
arg_names
=
Inputs
(
name
);
return
GetDims
(
arg_names
);
}
bool
IsRuntime
()
const
override
;
bool
IsRuntime
()
const
override
;
std
::
vector
<
proto
::
VarType
::
Type
>
GetInputsVarType
(
const
std
::
string
&
name
)
const
override
{
return
GetVarTypes
(
Inputs
(
name
));
}
std
::
vector
<
proto
::
VarType
::
Type
>
GetOutputsVarType
(
const
std
::
string
&
name
)
const
override
{
return
GetVarTypes
(
Outputs
(
name
));
}
void
SetOutputDim
(
const
std
::
string
&
name
,
const
DDim
&
dim
)
override
{
auto
&
arg_names
=
Outputs
(
name
);
PADDLE_ENFORCE_EQ
(
arg_names
.
size
(),
1UL
,
"Output(%s) should hold one element, but now it holds %d"
,
name
,
arg_names
.
size
());
SetDim
(
arg_names
[
0
],
dim
);
}
void
SetOutputsDim
(
const
std
::
string
&
name
,
const
std
::
vector
<
DDim
>
&
dims
)
override
{
auto
&
names
=
Outputs
(
name
);
SetDims
(
names
,
dims
);
}
protected:
protected:
proto
::
VarType
::
Type
GetVarType
(
const
std
::
string
&
name
)
const
override
;
std
::
vector
<
proto
::
VarType
::
Type
>
GetVarTypes
(
const
std
::
vector
<
std
::
string
>
&
names
)
const
{
std
::
vector
<
proto
::
VarType
::
Type
>
retv
;
retv
.
resize
(
names
.
size
());
std
::
transform
(
names
.
begin
(),
names
.
end
(),
retv
.
begin
(),
std
::
bind
(
std
::
mem_fn
(
&
CompileTimeInferShapeContext
::
GetVarType
),
this
,
std
::
placeholders
::
_1
));
return
retv
;
}
proto
::
VarType
::
Type
GetVarType
(
const
std
::
string
&
name
)
const
;
DDim
GetDim
(
const
std
::
string
&
name
)
const
{
auto
var
=
block_
.
FindVarRecursive
(
name
);
PADDLE_ENFORCE
(
var
!=
nullptr
,
"Cannot find variable %s"
,
name
);
DDim
res
;
try
{
auto
shape
=
var
->
GetShape
();
res
=
shape
.
empty
()
?
make_ddim
({
0UL
})
:
make_ddim
(
shape
);
}
catch
(...)
{
VLOG
(
5
)
<<
"GetDim of variable "
<<
name
<<
" error"
;
std
::
rethrow_exception
(
std
::
current_exception
());
}
return
res
;
}
DDim
GetDim
(
const
std
::
string
&
name
)
const
override
;
std
::
vector
<
DDim
>
GetDims
(
const
std
::
vector
<
std
::
string
>
&
names
)
const
{
std
::
vector
<
DDim
>
ret
;
ret
.
reserve
(
names
.
size
());
std
::
transform
(
names
.
begin
(),
names
.
end
(),
std
::
back_inserter
(
ret
),
[
this
](
const
std
::
string
&
name
)
{
return
this
->
GetDim
(
name
);
});
return
ret
;
}
void
SetDim
(
const
std
::
string
&
name
,
const
DDim
&
dim
);
void
SetDim
(
const
std
::
string
&
name
,
const
DDim
&
dim
)
override
;
void
SetDims
(
const
std
::
vector
<
std
::
string
>
&
names
,
const
std
::
vector
<
DDim
>
&
dims
)
{
size_t
length
=
names
.
size
();
PADDLE_ENFORCE_EQ
(
length
,
dims
.
size
());
for
(
size_t
i
=
0
;
i
<
length
;
++
i
)
{
if
(
names
[
i
]
==
framework
::
kEmptyVarName
)
{
continue
;
}
SetDim
(
names
[
i
],
dims
[
i
]);
}
}
std
::
vector
<
DDim
>
GetRepeatedDims
(
const
std
::
string
&
name
)
const
override
;
std
::
vector
<
DDim
>
GetRepeatedDims
(
const
std
::
string
&
name
)
const
override
;
void
SetRepeatedDims
(
const
std
::
string
&
name
,
void
SetRepeatedDims
(
const
std
::
string
&
name
,
const
std
::
vector
<
DDim
>
&
dims
)
override
;
const
std
::
vector
<
DDim
>
&
dims
)
override
;
InferShapeVarPtr
GetVarPtr
(
const
std
::
string
&
name
)
override
;
const
OpDesc
&
op_
;
const
OpDesc
&
op_
;
const
BlockDesc
&
block_
;
const
BlockDesc
&
block_
;
};
};
...
@@ -644,20 +747,6 @@ const std::vector<std::string> &CompileTimeInferShapeContext::Outputs(
...
@@ -644,20 +747,6 @@ const std::vector<std::string> &CompileTimeInferShapeContext::Outputs(
return
op_
.
Output
(
name
);
return
op_
.
Output
(
name
);
}
}
DDim
CompileTimeInferShapeContext
::
GetDim
(
const
std
::
string
&
name
)
const
{
auto
var
=
block_
.
FindVarRecursive
(
name
);
PADDLE_ENFORCE
(
var
!=
nullptr
,
"Cannot find variable %s"
,
name
);
DDim
res
;
try
{
auto
shape
=
var
->
GetShape
();
res
=
shape
.
empty
()
?
make_ddim
({
0UL
})
:
make_ddim
(
shape
);
}
catch
(...)
{
VLOG
(
5
)
<<
"GetDim of variable "
<<
name
<<
" error"
;
std
::
rethrow_exception
(
std
::
current_exception
());
}
return
res
;
}
std
::
vector
<
DDim
>
CompileTimeInferShapeContext
::
GetRepeatedDims
(
std
::
vector
<
DDim
>
CompileTimeInferShapeContext
::
GetRepeatedDims
(
const
std
::
string
&
name
)
const
{
const
std
::
string
&
name
)
const
{
auto
var
=
block_
.
FindVarRecursive
(
name
);
auto
var
=
block_
.
FindVarRecursive
(
name
);
...
@@ -696,10 +785,5 @@ proto::VarType::Type CompileTimeInferShapeContext::GetVarType(
...
@@ -696,10 +785,5 @@ proto::VarType::Type CompileTimeInferShapeContext::GetVarType(
return
block_
.
FindVarRecursive
(
name
)
->
GetType
();
return
block_
.
FindVarRecursive
(
name
)
->
GetType
();
}
}
InferShapeVarPtr
CompileTimeInferShapeContext
::
GetVarPtr
(
const
std
::
string
&
name
)
{
return
block_
.
FindVarRecursive
(
name
);
}
}
// namespace framework
}
// namespace framework
}
// namespace paddle
}
// namespace paddle
paddle/fluid/framework/operator.cc
浏览文件 @
f5532877
...
@@ -142,12 +142,14 @@ RuntimeContext::RuntimeContext(const VariableNameMap& innames,
...
@@ -142,12 +142,14 @@ RuntimeContext::RuntimeContext(const VariableNameMap& innames,
const
Scope
&
scope
)
{
const
Scope
&
scope
)
{
for
(
auto
&
var_name_item
:
innames
)
{
for
(
auto
&
var_name_item
:
innames
)
{
std
::
vector
<
Variable
*>&
input_vars
=
inputs
[
var_name_item
.
first
];
std
::
vector
<
Variable
*>&
input_vars
=
inputs
[
var_name_item
.
first
];
input_vars
.
reserve
(
var_name_item
.
second
.
size
());
for
(
auto
&
var_name
:
var_name_item
.
second
)
{
for
(
auto
&
var_name
:
var_name_item
.
second
)
{
input_vars
.
push_back
(
scope
.
FindVar
(
var_name
));
input_vars
.
push_back
(
scope
.
FindVar
(
var_name
));
}
}
}
}
for
(
auto
&
var_name_item
:
outnames
)
{
for
(
auto
&
var_name_item
:
outnames
)
{
std
::
vector
<
Variable
*>&
output_vars
=
outputs
[
var_name_item
.
first
];
std
::
vector
<
Variable
*>&
output_vars
=
outputs
[
var_name_item
.
first
];
output_vars
.
reserve
(
var_name_item
.
second
.
size
());
for
(
auto
&
var_name
:
var_name_item
.
second
)
{
for
(
auto
&
var_name
:
var_name_item
.
second
)
{
output_vars
.
push_back
(
scope
.
FindVar
(
var_name
));
output_vars
.
push_back
(
scope
.
FindVar
(
var_name
));
}
}
...
@@ -556,30 +558,28 @@ class RuntimeInferShapeContext : public InferShapeContext {
...
@@ -556,30 +558,28 @@ class RuntimeInferShapeContext : public InferShapeContext {
bool
HasOutput
(
const
std
::
string
&
name
)
const
override
{
bool
HasOutput
(
const
std
::
string
&
name
)
const
override
{
// has only one output
// has only one output
const
auto
&
outs
=
op_
.
Outputs
()
;
const
auto
&
outs
=
ctx_
.
outputs
;
auto
it
=
outs
.
find
(
name
);
auto
it
=
outs
.
find
(
name
);
if
(
it
==
outs
.
end
())
{
if
(
it
==
outs
.
end
())
{
return
false
;
return
false
;
}
}
const
auto
&
out
=
it
->
second
;
const
auto
&
out
=
it
->
second
;
if
(
out
.
size
()
==
0
||
out
[
0
]
==
kEmptyVarName
)
{
if
(
out
.
size
()
==
0
)
{
return
false
;
return
false
;
}
}
PADDLE_ENFORCE_EQ
(
out
.
size
(),
1UL
,
PADDLE_ENFORCE_EQ
(
out
.
size
(),
1UL
,
"Output %s should not have more than one outputs"
,
name
);
"Output %s should not have more than one outputs"
,
name
);
return
scope_
.
FindVar
(
out
[
0
])
!=
nullptr
;
return
out
[
0
]
!=
nullptr
;
}
}
bool
HasInputs
(
const
std
::
string
&
name
)
const
override
{
bool
HasInputs
(
const
std
::
string
&
name
)
const
override
{
if
(
!
op_
.
HasInputs
(
name
))
{
const
auto
&
ins
=
ctx_
.
inputs
;
return
false
;
auto
it
=
ins
.
find
(
name
);
}
if
(
it
==
ins
.
end
()
||
it
->
second
.
empty
())
{
auto
inputs
=
op_
.
Inputs
(
name
);
if
(
inputs
.
empty
())
{
return
false
;
return
false
;
}
}
for
(
auto
&
input
:
i
nputs
)
{
for
(
auto
&
input
:
i
t
->
second
)
{
if
(
scope_
.
FindVar
(
input
)
==
nullptr
)
{
if
(
input
==
nullptr
)
{
return
false
;
return
false
;
}
}
}
}
...
@@ -587,15 +587,13 @@ class RuntimeInferShapeContext : public InferShapeContext {
...
@@ -587,15 +587,13 @@ class RuntimeInferShapeContext : public InferShapeContext {
}
}
bool
HasOutputs
(
const
std
::
string
&
name
)
const
override
{
bool
HasOutputs
(
const
std
::
string
&
name
)
const
override
{
if
(
!
op_
.
HasOutputs
(
name
))
{
const
auto
&
outs
=
ctx_
.
outputs
;
return
false
;
auto
it
=
outs
.
find
(
name
);
}
if
(
it
==
outs
.
end
()
||
it
->
second
.
empty
())
{
auto
outputs
=
op_
.
Outputs
(
name
);
if
(
outputs
.
empty
())
{
return
false
;
return
false
;
}
}
for
(
auto
&
output
:
outputs
)
{
for
(
auto
&
output
:
it
->
second
)
{
if
(
scope_
.
FindVar
(
output
)
==
nullptr
)
{
if
(
output
==
nullptr
)
{
return
false
;
return
false
;
}
}
}
}
...
@@ -616,16 +614,18 @@ class RuntimeInferShapeContext : public InferShapeContext {
...
@@ -616,16 +614,18 @@ class RuntimeInferShapeContext : public InferShapeContext {
void
ShareDim
(
const
std
::
string
&
in
,
const
std
::
string
&
out
,
size_t
i
=
0
,
void
ShareDim
(
const
std
::
string
&
in
,
const
std
::
string
&
out
,
size_t
i
=
0
,
size_t
j
=
0
)
override
{
size_t
j
=
0
)
override
{
PADDLE_ENFORCE_LT
(
i
,
Inputs
(
in
).
size
());
auto
in_it
=
ctx_
.
inputs
.
find
(
in
);
PADDLE_ENFORCE_LT
(
j
,
Outputs
(
out
).
size
());
auto
out_it
=
ctx_
.
outputs
.
find
(
out
);
const
std
::
string
&
input_n
=
Inputs
(
in
)[
i
];
PADDLE_ENFORCE
(
in_it
!=
ctx_
.
inputs
.
end
()
&&
in_it
->
second
.
size
()
>
i
,
const
std
::
string
&
output_n
=
Outputs
(
out
)[
j
];
"Inputs %s should have %llu argument"
,
in
,
i
);
PADDLE_ENFORCE
(
out_it
!=
ctx_
.
outputs
.
end
()
&&
out_it
->
second
.
size
()
>
j
,
"Outputs %s should have %llu argument"
,
out
,
j
);
Variable
*
in_var
=
in_it
->
second
[
i
];
Variable
*
out_var
=
out_it
->
second
[
j
];
Variable
*
in_var
=
scope_
.
FindVar
(
input_n
);
Variable
*
out_var
=
scope_
.
FindVar
(
output_n
);
PADDLE_ENFORCE
(
in_var
->
Type
()
==
out_var
->
Type
(),
PADDLE_ENFORCE
(
in_var
->
Type
()
==
out_var
->
Type
(),
"The type of %s and %s is not the same."
,
output_n
,
"The type of %s and %s is not the same."
,
in
,
out
);
GetDim
(
input_n
));
if
(
in_var
->
IsType
<
framework
::
SelectedRows
>
())
{
if
(
in_var
->
IsType
<
framework
::
SelectedRows
>
())
{
auto
&
in_sele_rows
=
in_var
->
Get
<
framework
::
SelectedRows
>
();
auto
&
in_sele_rows
=
in_var
->
Get
<
framework
::
SelectedRows
>
();
...
@@ -646,13 +646,16 @@ class RuntimeInferShapeContext : public InferShapeContext {
...
@@ -646,13 +646,16 @@ class RuntimeInferShapeContext : public InferShapeContext {
void
ShareLoD
(
const
std
::
string
&
in
,
const
std
::
string
&
out
,
size_t
i
=
0
,
void
ShareLoD
(
const
std
::
string
&
in
,
const
std
::
string
&
out
,
size_t
i
=
0
,
size_t
j
=
0
)
const
override
{
size_t
j
=
0
)
const
override
{
const
std
::
vector
<
std
::
string
>&
inputs
=
Inputs
(
in
);
auto
in_it
=
ctx_
.
inputs
.
find
(
in
);
const
std
::
vector
<
std
::
string
>&
outputs
=
Outputs
(
out
);
auto
out_it
=
ctx_
.
outputs
.
find
(
out
);
PADDLE_ENFORCE_LT
(
i
,
inputs
.
size
());
PADDLE_ENFORCE
(
in_it
!=
ctx_
.
inputs
.
end
()
&&
in_it
->
second
.
size
()
>
i
,
PADDLE_ENFORCE_LT
(
j
,
outputs
.
size
());
"Inputs %s should have %llu argument"
,
in
,
i
);
Variable
*
in_var
=
scope_
.
FindVar
(
inputs
.
at
(
i
));
PADDLE_ENFORCE
(
out_it
!=
ctx_
.
outputs
.
end
()
&&
out_it
->
second
.
size
()
>
j
,
"Outputs %s should have %llu argument"
,
out
,
j
);
Variable
*
in_var
=
in_it
->
second
.
at
(
i
);
if
(
!
in_var
->
IsType
<
LoDTensor
>
())
return
;
if
(
!
in_var
->
IsType
<
LoDTensor
>
())
return
;
Variable
*
out_var
=
scope_
.
FindVar
(
outputs
.
at
(
j
)
);
Variable
*
out_var
=
out_it
->
second
.
at
(
j
);
PADDLE_ENFORCE
(
out_var
->
IsType
<
LoDTensor
>
(),
PADDLE_ENFORCE
(
out_var
->
IsType
<
LoDTensor
>
(),
"The %d-th output of Output(%s) must be LoDTensor."
,
j
,
out
);
"The %d-th output of Output(%s) must be LoDTensor."
,
j
,
out
);
auto
in_tensor
=
in_var
->
Get
<
LoDTensor
>
();
auto
in_tensor
=
in_var
->
Get
<
LoDTensor
>
();
...
@@ -687,9 +690,64 @@ class RuntimeInferShapeContext : public InferShapeContext {
...
@@ -687,9 +690,64 @@ class RuntimeInferShapeContext : public InferShapeContext {
bool
IsRuntime
()
const
override
{
return
true
;
}
bool
IsRuntime
()
const
override
{
return
true
;
}
// TODO(paddle-dev): Can this be template?
std
::
vector
<
InferShapeVarPtr
>
GetInputVarPtrs
(
const
std
::
string
&
name
)
override
{
const
std
::
vector
<
Variable
*>&
vars
=
InputVars
(
name
);
std
::
vector
<
InferShapeVarPtr
>
res
;
res
.
reserve
(
vars
.
size
());
res
.
insert
(
res
.
begin
(),
vars
.
begin
(),
vars
.
end
());
return
res
;
}
std
::
vector
<
InferShapeVarPtr
>
GetOutputVarPtrs
(
const
std
::
string
&
name
)
override
{
const
std
::
vector
<
Variable
*>&
vars
=
OutputVars
(
name
);
std
::
vector
<
InferShapeVarPtr
>
res
;
res
.
reserve
(
vars
.
size
());
res
.
insert
(
res
.
begin
(),
vars
.
begin
(),
vars
.
end
());
return
res
;
}
DDim
GetInputDim
(
const
std
::
string
&
name
)
const
override
{
const
std
::
vector
<
Variable
*>&
vars
=
InputVars
(
name
);
PADDLE_ENFORCE_EQ
(
vars
.
size
(),
1UL
,
"Input(%s) should hold one element, but now it holds %d"
,
name
,
vars
.
size
());
return
this
->
GetDim
(
vars
[
0
]);
}
std
::
vector
<
DDim
>
GetInputsDim
(
const
std
::
string
&
name
)
const
override
{
const
std
::
vector
<
Variable
*>&
vars
=
InputVars
(
name
);
return
GetDims
(
vars
);
}
std
::
vector
<
proto
::
VarType
::
Type
>
GetInputsVarType
(
const
std
::
string
&
name
)
const
override
{
return
GetVarTypes
(
InputVars
(
name
));
}
std
::
vector
<
proto
::
VarType
::
Type
>
GetOutputsVarType
(
const
std
::
string
&
name
)
const
override
{
return
GetVarTypes
(
OutputVars
(
name
));
}
void
SetOutputDim
(
const
std
::
string
&
name
,
const
DDim
&
dim
)
override
{
auto
&
vars
=
OutputVars
(
name
);
PADDLE_ENFORCE_EQ
(
vars
.
size
(),
1UL
,
"Output(%s) should hold one element, but now it holds %d"
,
name
,
vars
.
size
());
SetDim
(
vars
[
0
],
dim
);
}
void
SetOutputsDim
(
const
std
::
string
&
name
,
const
std
::
vector
<
DDim
>&
dims
)
override
{
auto
&
vars
=
OutputVars
(
name
);
SetDims
(
vars
,
dims
);
}
protected:
protected:
DDim
GetDim
(
const
std
::
string
&
name
)
const
override
{
DDim
GetDim
(
Variable
*
var
)
const
{
Variable
*
var
=
scope_
.
FindVar
(
name
);
PADDLE_ENFORCE_NOT_NULL
(
var
);
PADDLE_ENFORCE_NOT_NULL
(
var
);
if
(
var
->
IsType
<
LoDTensor
>
())
{
if
(
var
->
IsType
<
LoDTensor
>
())
{
return
var
->
Get
<
LoDTensor
>
().
dims
();
return
var
->
Get
<
LoDTensor
>
().
dims
();
...
@@ -697,25 +755,44 @@ class RuntimeInferShapeContext : public InferShapeContext {
...
@@ -697,25 +755,44 @@ class RuntimeInferShapeContext : public InferShapeContext {
return
var
->
Get
<
SelectedRows
>
().
GetCompleteDims
();
return
var
->
Get
<
SelectedRows
>
().
GetCompleteDims
();
}
else
{
}
else
{
PADDLE_THROW
(
PADDLE_THROW
(
"Only LoDTensor/SelectedRows support 'GetDim', but Variable
%s'
s "
"Only LoDTensor/SelectedRows support 'GetDim', but Variables "
"type_id is %s."
,
"type_id is %s."
,
name
,
var
->
Type
().
name
());
var
->
Type
().
name
());
}
}
}
}
std
::
vector
<
DDim
>
GetDims
(
const
std
::
vector
<
Variable
*>&
vars
)
const
{
std
::
vector
<
DDim
>
ret
;
ret
.
reserve
(
vars
.
size
());
std
::
transform
(
vars
.
begin
(),
vars
.
end
(),
std
::
back_inserter
(
ret
),
[
this
](
Variable
*
var
)
{
return
this
->
GetDim
(
var
);
});
return
ret
;
}
std
::
vector
<
DDim
>
GetRepeatedDims
(
const
std
::
string
&
name
)
const
override
{
std
::
vector
<
DDim
>
GetRepeatedDims
(
const
std
::
string
&
name
)
const
override
{
PADDLE_THROW
(
"Only compile time support this method"
);
PADDLE_THROW
(
"Only compile time support this method"
);
}
}
void
SetDim
(
const
std
::
string
&
name
,
const
DDim
&
dim
)
override
{
void
SetDim
(
Variable
*
var
,
const
DDim
&
dim
)
{
Variable
*
var
=
scope_
.
FindVar
(
name
);
if
(
var
->
IsType
<
LoDTensor
>
())
{
if
(
var
->
IsType
<
LoDTensor
>
())
{
var
->
GetMutable
<
LoDTensor
>
()
->
Resize
(
dim
);
var
->
GetMutable
<
LoDTensor
>
()
->
Resize
(
dim
);
}
else
if
(
var
->
IsType
<
SelectedRows
>
())
{
}
else
if
(
var
->
IsType
<
SelectedRows
>
())
{
var
->
GetMutable
<
SelectedRows
>
()
->
set_height
(
dim
[
0
]);
var
->
GetMutable
<
SelectedRows
>
()
->
set_height
(
dim
[
0
]);
}
else
{
}
else
{
PADDLE_THROW
(
"Variable %s type_id %s, expect LoDTensor/SelectedRows."
,
PADDLE_THROW
(
"Variable type_id %s, expect LoDTensor/SelectedRows."
,
name
,
var
->
Type
().
name
());
var
->
Type
().
name
());
}
}
void
SetDims
(
const
std
::
vector
<
Variable
*>&
vars
,
const
std
::
vector
<
DDim
>&
dims
)
{
size_t
length
=
vars
.
size
();
PADDLE_ENFORCE_EQ
(
length
,
dims
.
size
());
for
(
size_t
i
=
0
;
i
<
length
;
++
i
)
{
if
(
vars
[
i
]
==
nullptr
)
{
continue
;
}
SetDim
(
vars
[
i
],
dims
[
i
]);
}
}
}
}
...
@@ -724,16 +801,36 @@ class RuntimeInferShapeContext : public InferShapeContext {
...
@@ -724,16 +801,36 @@ class RuntimeInferShapeContext : public InferShapeContext {
PADDLE_THROW
(
"Only compile time support this method"
);
PADDLE_THROW
(
"Only compile time support this method"
);
}
}
proto
::
VarType
::
Type
GetVarType
(
const
std
::
string
&
name
)
const
override
{
std
::
vector
<
proto
::
VarType
::
Type
>
GetVarTypes
(
auto
*
var
=
scope_
.
FindVar
(
name
);
const
std
::
vector
<
Variable
*>&
vars
)
const
{
return
ToVarType
(
var
->
Type
());
std
::
vector
<
proto
::
VarType
::
Type
>
retv
;
retv
.
resize
(
vars
.
size
());
std
::
transform
(
vars
.
begin
(),
vars
.
end
(),
retv
.
begin
(),
std
::
bind
(
std
::
mem_fn
(
&
RuntimeInferShapeContext
::
GetVarType
),
this
,
std
::
placeholders
::
_1
));
return
retv
;
}
}
InferShapeVarPtr
GetVarPtr
(
const
std
::
string
&
name
)
override
{
proto
::
VarType
::
Type
GetVarType
(
Variable
*
var
)
const
{
return
scope_
.
FindVar
(
name
);
return
ToVarType
(
var
->
Type
()
);
}
}
private:
private:
const
std
::
vector
<
Variable
*>&
InputVars
(
const
std
::
string
&
name
)
const
{
auto
it
=
ctx_
.
inputs
.
find
(
name
);
PADDLE_ENFORCE
(
it
!=
ctx_
.
inputs
.
end
(),
"Operator %s does not have the input %s."
,
op_
.
Type
(),
name
);
return
it
->
second
;
}
const
std
::
vector
<
Variable
*>&
OutputVars
(
const
std
::
string
&
name
)
const
{
auto
it
=
ctx_
.
outputs
.
find
(
name
);
PADDLE_ENFORCE
(
it
!=
ctx_
.
outputs
.
end
(),
"Operator %s does not have the outputs %s."
,
op_
.
Type
(),
name
);
return
it
->
second
;
}
const
OperatorBase
&
op_
;
const
OperatorBase
&
op_
;
const
Scope
&
scope_
;
const
Scope
&
scope_
;
const
RuntimeContext
&
ctx_
;
const
RuntimeContext
&
ctx_
;
...
@@ -864,8 +961,7 @@ Scope* OperatorWithKernel::PrepareData(
...
@@ -864,8 +961,7 @@ Scope* OperatorWithKernel::PrepareData(
for
(
size_t
i
=
0
;
i
<
var_name_item
.
second
.
size
();
++
i
)
{
for
(
size_t
i
=
0
;
i
<
var_name_item
.
second
.
size
();
++
i
)
{
auto
&
var_name
=
var_name_item
.
second
[
i
];
auto
&
var_name
=
var_name_item
.
second
[
i
];
auto
*
var
=
scope
.
FindVar
(
var_name
);
auto
*
var
=
input_vars
[
i
];
input_vars
[
i
]
=
var
;
// Only tensor can be tranfer to another device.
// Only tensor can be tranfer to another device.
if
(
var
==
nullptr
||
!
VarIsTensor
(
*
var
))
{
if
(
var
==
nullptr
||
!
VarIsTensor
(
*
var
))
{
...
...
paddle/fluid/framework/parallel_executor.cc
浏览文件 @
f5532877
...
@@ -190,7 +190,6 @@ std::vector<Scope *> &ParallelExecutor::GetLocalScopes() {
...
@@ -190,7 +190,6 @@ std::vector<Scope *> &ParallelExecutor::GetLocalScopes() {
ParallelExecutor
::
ParallelExecutor
(
ParallelExecutor
::
ParallelExecutor
(
const
std
::
vector
<
platform
::
Place
>
&
places
,
const
std
::
vector
<
platform
::
Place
>
&
places
,
const
std
::
unordered_set
<
std
::
string
>
&
params
,
const
std
::
unordered_set
<
std
::
string
>
&
bcast_vars
,
const
std
::
unordered_set
<
std
::
string
>
&
bcast_vars
,
const
ProgramDesc
&
main_program
,
const
std
::
string
&
loss_var_name
,
const
ProgramDesc
&
main_program
,
const
std
::
string
&
loss_var_name
,
Scope
*
scope
,
const
std
::
vector
<
Scope
*>
&
local_scopes
,
Scope
*
scope
,
const
std
::
vector
<
Scope
*>
&
local_scopes
,
...
@@ -209,7 +208,7 @@ ParallelExecutor::ParallelExecutor(
...
@@ -209,7 +208,7 @@ ParallelExecutor::ParallelExecutor(
"the number of places must be greater than 1."
);
"the number of places must be greater than 1."
);
}
}
// Step 1. Bcast the
param
s to devs.
// Step 1. Bcast the
bcast_var
s to devs.
// Create local scopes
// Create local scopes
if
(
local_scopes
.
empty
())
{
if
(
local_scopes
.
empty
())
{
member_
->
own_local_scope_
=
true
;
member_
->
own_local_scope_
=
true
;
...
@@ -249,12 +248,12 @@ ParallelExecutor::ParallelExecutor(
...
@@ -249,12 +248,12 @@ ParallelExecutor::ParallelExecutor(
// ncclOp
// ncclOp
#if defined(PADDLE_WITH_CUDA) && !defined(_WIN32)
#if defined(PADDLE_WITH_CUDA) && !defined(_WIN32)
std
::
unique_ptr
<
ir
::
Graph
>
graph
=
build_strategy
.
Apply
(
std
::
unique_ptr
<
ir
::
Graph
>
graph
=
build_strategy
.
Apply
(
main_program
,
member_
->
places_
,
loss_var_name
,
params
,
main_program
,
member_
->
places_
,
loss_var_name
,
member_
->
local_scopes_
,
member_
->
local_scopes_
,
member_
->
use_cuda_
,
member_
->
nccl_ctxs_
.
get
());
member_
->
use_cuda_
,
member_
->
nccl_ctxs_
.
get
());
#else
#else
std
::
unique_ptr
<
ir
::
Graph
>
graph
=
std
::
unique_ptr
<
ir
::
Graph
>
graph
=
build_strategy
.
Apply
(
main_program
,
member_
->
places_
,
loss_var_name
,
build_strategy
.
Apply
(
main_program
,
member_
->
places_
,
loss_var_name
,
params
,
member_
->
local_scopes_
,
member_
->
use_cuda_
);
member_
->
local_scopes_
,
member_
->
use_cuda_
);
#endif
#endif
auto
max_memory_size
=
GetEagerDeletionThreshold
();
auto
max_memory_size
=
GetEagerDeletionThreshold
();
if
(
max_memory_size
>=
0
)
{
if
(
max_memory_size
>=
0
)
{
...
...
paddle/fluid/framework/parallel_executor.h
浏览文件 @
f5532877
...
@@ -41,7 +41,6 @@ class ParallelExecutor {
...
@@ -41,7 +41,6 @@ class ParallelExecutor {
public:
public:
explicit
ParallelExecutor
(
const
std
::
vector
<
platform
::
Place
>
&
places
,
explicit
ParallelExecutor
(
const
std
::
vector
<
platform
::
Place
>
&
places
,
const
std
::
unordered_set
<
std
::
string
>
&
params
,
const
std
::
unordered_set
<
std
::
string
>
&
bcast_vars
,
const
std
::
unordered_set
<
std
::
string
>
&
bcast_vars
,
const
ProgramDesc
&
main_program
,
const
ProgramDesc
&
main_program
,
const
std
::
string
&
loss_var_name
,
Scope
*
scope
,
const
std
::
string
&
loss_var_name
,
Scope
*
scope
,
...
...
paddle/fluid/framework/shape_inference.cc
浏览文件 @
f5532877
...
@@ -22,20 +22,6 @@ limitations under the License. */
...
@@ -22,20 +22,6 @@ limitations under the License. */
namespace
paddle
{
namespace
paddle
{
namespace
framework
{
namespace
framework
{
DDim
InferShapeContext
::
GetInputDim
(
const
std
::
string
&
name
)
const
{
const
std
::
vector
<
std
::
string
>
&
arg_names
=
Inputs
(
name
);
PADDLE_ENFORCE_EQ
(
arg_names
.
size
(),
1UL
,
"Input(%s) should hold one element, but now it holds %d"
,
name
,
arg_names
.
size
());
return
this
->
GetDim
(
arg_names
[
0
]);
}
std
::
vector
<
DDim
>
InferShapeContext
::
GetInputsDim
(
const
std
::
string
&
name
)
const
{
const
std
::
vector
<
std
::
string
>
&
arg_names
=
Inputs
(
name
);
return
GetDims
(
arg_names
);
}
std
::
vector
<
DDim
>
InferShapeContext
::
GetReaderDims
(
std
::
vector
<
DDim
>
InferShapeContext
::
GetReaderDims
(
const
std
::
string
&
name
)
const
{
const
std
::
string
&
name
)
const
{
const
std
::
vector
<
std
::
string
>
&
arg_names
=
Inputs
(
name
);
const
std
::
vector
<
std
::
string
>
&
arg_names
=
Inputs
(
name
);
...
@@ -46,26 +32,6 @@ std::vector<DDim> InferShapeContext::GetReaderDims(
...
@@ -46,26 +32,6 @@ std::vector<DDim> InferShapeContext::GetReaderDims(
return
this
->
GetRepeatedDims
(
arg_names
[
0
]);
return
this
->
GetRepeatedDims
(
arg_names
[
0
]);
}
}
DDim
InferShapeContext
::
GetInputsElementDim
(
const
std
::
string
&
name
,
int
idx
)
const
{
const
std
::
vector
<
std
::
string
>
&
names
=
Inputs
(
name
);
return
this
->
GetDim
(
names
[
idx
]);
}
void
InferShapeContext
::
SetOutputDim
(
const
std
::
string
&
name
,
const
DDim
&
dim
)
{
auto
&
arg_names
=
Outputs
(
name
);
PADDLE_ENFORCE_EQ
(
arg_names
.
size
(),
1UL
,
"Output(%s) should hold one element, but now it holds %d"
,
name
,
arg_names
.
size
());
SetDim
(
arg_names
[
0
],
dim
);
}
void
InferShapeContext
::
SetOutputsDim
(
const
std
::
string
&
name
,
const
std
::
vector
<
DDim
>
&
dims
)
{
auto
&
names
=
Outputs
(
name
);
SetDims
(
names
,
dims
);
}
void
InferShapeContext
::
SetReaderDims
(
const
std
::
string
&
name
,
void
InferShapeContext
::
SetReaderDims
(
const
std
::
string
&
name
,
const
std
::
vector
<
DDim
>
&
dims
)
{
const
std
::
vector
<
DDim
>
&
dims
)
{
const
std
::
vector
<
std
::
string
>
&
arg_names
=
Outputs
(
name
);
const
std
::
vector
<
std
::
string
>
&
arg_names
=
Outputs
(
name
);
...
@@ -76,69 +42,5 @@ void InferShapeContext::SetReaderDims(const std::string &name,
...
@@ -76,69 +42,5 @@ void InferShapeContext::SetReaderDims(const std::string &name,
return
this
->
SetRepeatedDims
(
arg_names
[
0
],
dims
);
return
this
->
SetRepeatedDims
(
arg_names
[
0
],
dims
);
}
}
std
::
vector
<
InferShapeVarPtr
>
InferShapeContext
::
GetInputVarPtrs
(
const
std
::
string
&
name
)
{
const
std
::
vector
<
std
::
string
>
arg_names
=
Inputs
(
name
);
std
::
vector
<
InferShapeVarPtr
>
res
;
res
.
reserve
(
arg_names
.
size
());
std
::
transform
(
arg_names
.
begin
(),
arg_names
.
end
(),
std
::
back_inserter
(
res
),
[
this
](
const
std
::
string
&
name
)
{
return
this
->
GetVarPtr
(
name
);
});
return
res
;
}
std
::
vector
<
InferShapeVarPtr
>
InferShapeContext
::
GetOutputVarPtrs
(
const
std
::
string
&
name
)
{
const
std
::
vector
<
std
::
string
>
arg_names
=
Outputs
(
name
);
std
::
vector
<
InferShapeVarPtr
>
res
;
res
.
reserve
(
arg_names
.
size
());
std
::
transform
(
arg_names
.
begin
(),
arg_names
.
end
(),
std
::
back_inserter
(
res
),
[
this
](
const
std
::
string
&
name
)
{
return
this
->
GetVarPtr
(
name
);
});
return
res
;
}
std
::
vector
<
DDim
>
InferShapeContext
::
GetDims
(
const
std
::
vector
<
std
::
string
>
&
names
)
const
{
std
::
vector
<
DDim
>
ret
;
ret
.
reserve
(
names
.
size
());
std
::
transform
(
names
.
begin
(),
names
.
end
(),
std
::
back_inserter
(
ret
),
[
this
](
const
std
::
string
&
name
)
{
return
this
->
GetDim
(
name
);
});
return
ret
;
}
void
InferShapeContext
::
SetDims
(
const
std
::
vector
<
std
::
string
>
&
names
,
const
std
::
vector
<
DDim
>
&
dims
)
{
size_t
length
=
names
.
size
();
PADDLE_ENFORCE_EQ
(
length
,
dims
.
size
());
for
(
size_t
i
=
0
;
i
<
length
;
++
i
)
{
if
(
names
[
i
]
==
framework
::
kEmptyVarName
)
{
continue
;
}
SetDim
(
names
[
i
],
dims
[
i
]);
}
}
std
::
vector
<
proto
::
VarType
::
Type
>
InferShapeContext
::
GetInputsVarType
(
const
std
::
string
&
name
)
const
{
return
GetVarTypes
(
Inputs
(
name
));
}
std
::
vector
<
proto
::
VarType
::
Type
>
InferShapeContext
::
GetOutputsVarType
(
const
std
::
string
&
name
)
const
{
return
GetVarTypes
(
Outputs
(
name
));
}
std
::
vector
<
proto
::
VarType
::
Type
>
InferShapeContext
::
GetVarTypes
(
const
std
::
vector
<
std
::
string
>
&
names
)
const
{
std
::
vector
<
proto
::
VarType
::
Type
>
retv
;
retv
.
resize
(
names
.
size
());
std
::
transform
(
names
.
begin
(),
names
.
end
(),
retv
.
begin
(),
std
::
bind
(
std
::
mem_fn
(
&
InferShapeContext
::
GetVarType
),
this
,
std
::
placeholders
::
_1
));
return
retv
;
}
}
// namespace framework
}
// namespace framework
}
// namespace paddle
}
// namespace paddle
paddle/fluid/framework/shape_inference.h
浏览文件 @
f5532877
...
@@ -33,22 +33,23 @@ class InferShapeContext {
...
@@ -33,22 +33,23 @@ class InferShapeContext {
virtual
bool
HasInput
(
const
std
::
string
&
name
)
const
=
0
;
virtual
bool
HasInput
(
const
std
::
string
&
name
)
const
=
0
;
virtual
bool
HasOutput
(
const
std
::
string
&
name
)
const
=
0
;
virtual
bool
HasOutput
(
const
std
::
string
&
name
)
const
=
0
;
std
::
vector
<
proto
::
VarType
::
Type
>
GetInputsVarType
(
virtual
std
::
vector
<
proto
::
VarType
::
Type
>
GetInputsVarType
(
const
std
::
string
&
name
)
const
;
const
std
::
string
&
name
)
const
=
0
;
std
::
vector
<
proto
::
VarType
::
Type
>
GetOutputsVarType
(
virtual
std
::
vector
<
proto
::
VarType
::
Type
>
GetOutputsVarType
(
const
std
::
string
&
name
)
const
;
const
std
::
string
&
name
)
const
=
0
;
virtual
bool
HasInputs
(
const
std
::
string
&
name
)
const
=
0
;
virtual
bool
HasInputs
(
const
std
::
string
&
name
)
const
=
0
;
virtual
bool
HasOutputs
(
const
std
::
string
&
name
)
const
=
0
;
virtual
bool
HasOutputs
(
const
std
::
string
&
name
)
const
=
0
;
DDim
GetInputDim
(
const
std
::
string
&
name
)
const
;
virtual
DDim
GetInputDim
(
const
std
::
string
&
name
)
const
=
0
;
std
::
vector
<
DDim
>
GetInputsDim
(
const
std
::
string
&
name
)
const
;
virtual
std
::
vector
<
DDim
>
GetInputsDim
(
const
std
::
string
&
name
)
const
=
0
;
std
::
vector
<
DDim
>
GetReaderDims
(
const
std
::
string
&
name
)
const
;
virtual
std
::
vector
<
DDim
>
GetReaderDims
(
const
std
::
string
&
name
)
const
;
DDim
GetInputsElementDim
(
const
std
::
string
&
name
,
int
idx
)
const
;
void
SetOutputDim
(
const
std
::
string
&
name
,
const
DDim
&
dim
);
virtual
void
SetOutputDim
(
const
std
::
string
&
name
,
const
DDim
&
dim
)
=
0
;
void
SetOutputsDim
(
const
std
::
string
&
name
,
const
std
::
vector
<
DDim
>
&
dims
);
virtual
void
SetOutputsDim
(
const
std
::
string
&
name
,
void
SetReaderDims
(
const
std
::
string
&
name
,
const
std
::
vector
<
DDim
>
&
dims
);
const
std
::
vector
<
DDim
>
&
dims
)
=
0
;
virtual
void
SetReaderDims
(
const
std
::
string
&
name
,
const
std
::
vector
<
DDim
>
&
dims
);
virtual
AttrReader
Attrs
()
const
=
0
;
virtual
AttrReader
Attrs
()
const
=
0
;
virtual
const
std
::
vector
<
std
::
string
>
&
Inputs
(
virtual
const
std
::
vector
<
std
::
string
>
&
Inputs
(
...
@@ -67,27 +68,15 @@ class InferShapeContext {
...
@@ -67,27 +68,15 @@ class InferShapeContext {
virtual
bool
IsRuntime
()
const
=
0
;
virtual
bool
IsRuntime
()
const
=
0
;
std
::
vector
<
InferShapeVarPtr
>
GetInputVarPtrs
(
const
std
::
string
&
name
);
virtual
std
::
vector
<
InferShapeVarPtr
>
GetInputVarPtrs
(
std
::
vector
<
InferShapeVarPtr
>
GetOutputVarPtrs
(
const
std
::
string
&
name
);
const
std
::
string
&
name
)
=
0
;
virtual
InferShapeVarPtr
GetVarPtr
(
const
std
::
string
&
name
)
=
0
;
virtual
std
::
vector
<
InferShapeVarPtr
>
GetOutputVarPtrs
(
const
std
::
string
&
name
)
=
0
;
// Note: In while op, we need this to be public
void
SetDims
(
const
std
::
vector
<
std
::
string
>
&
names
,
const
std
::
vector
<
DDim
>
&
dims
);
protected:
protected:
virtual
DDim
GetDim
(
const
std
::
string
&
name
)
const
=
0
;
virtual
void
SetDim
(
const
std
::
string
&
name
,
const
DDim
&
dim
)
=
0
;
virtual
std
::
vector
<
DDim
>
GetRepeatedDims
(
const
std
::
string
&
name
)
const
=
0
;
virtual
std
::
vector
<
DDim
>
GetRepeatedDims
(
const
std
::
string
&
name
)
const
=
0
;
virtual
void
SetRepeatedDims
(
const
std
::
string
&
name
,
virtual
void
SetRepeatedDims
(
const
std
::
string
&
name
,
const
std
::
vector
<
DDim
>
&
dims
)
=
0
;
const
std
::
vector
<
DDim
>
&
dims
)
=
0
;
std
::
vector
<
DDim
>
GetDims
(
const
std
::
vector
<
std
::
string
>
&
names
)
const
;
std
::
vector
<
proto
::
VarType
::
Type
>
GetVarTypes
(
const
std
::
vector
<
std
::
string
>
&
names
)
const
;
virtual
proto
::
VarType
::
Type
GetVarType
(
const
std
::
string
&
name
)
const
=
0
;
};
};
}
// namespace framework
}
// namespace framework
...
...
paddle/fluid/operators/controlflow/while_op.cc
浏览文件 @
f5532877
...
@@ -399,26 +399,41 @@ class WhileGradOpShapeInference : public framework::InferShapeBase {
...
@@ -399,26 +399,41 @@ class WhileGradOpShapeInference : public framework::InferShapeBase {
ctx
->
HasInputs
(
kOutputs
);
ctx
->
HasInputs
(
kOutputs
);
ctx
->
HasInputs
(
framework
::
GradVarName
(
kOutputs
));
ctx
->
HasInputs
(
framework
::
GradVarName
(
kOutputs
));
auto
p_names
=
ctx
->
Inputs
(
kX
);
auto
pg_ig_names
=
ctx
->
Outputs
(
kXGRAD
);
auto
pg_ig_names
=
ctx
->
Outputs
(
kXGRAD
);
auto
var_types
=
ctx
->
GetInputsVarType
(
kX
);
std
::
vector
<
framework
::
InferShapeVarPtr
>
in_var_ptrs
=
std
::
vector
<
std
::
string
>
names_to_set
;
ctx
->
GetInputVarPtrs
(
kX
);
std
::
vector
<
framework
::
DDim
>
dims_to_set
;
std
::
vector
<
framework
::
InferShapeVarPtr
>
out_var_ptrs
=
for
(
size_t
i
=
0
;
i
<
p_names
.
size
();
++
i
)
{
ctx
->
GetOutputVarPtrs
(
kXGRAD
);
PADDLE_ENFORCE
(
in_var_ptrs
.
size
()
==
out_var_ptrs
.
size
());
for
(
size_t
i
=
0
;
i
<
in_var_ptrs
.
size
();
++
i
)
{
if
(
pg_ig_names
[
i
]
==
framework
::
kEmptyVarName
)
{
if
(
pg_ig_names
[
i
]
==
framework
::
kEmptyVarName
)
{
continue
;
continue
;
}
}
auto
dims
=
ctx
->
GetInputsElementDim
(
kX
,
i
);
if
(
ctx
->
IsRuntime
())
{
if
(
var_types
[
i
]
==
framework
::
proto
::
VarType
::
LOD_TENSOR
)
{
framework
::
Variable
*
in_var
=
names_to_set
.
push_back
(
pg_ig_names
[
i
]);
boost
::
get
<
framework
::
Variable
*>
(
in_var_ptrs
[
i
]);
dims_to_set
.
push_back
(
dims
);
framework
::
Variable
*
out_var
=
}
else
if
(
var_types
[
i
]
==
framework
::
proto
::
VarType
::
LOD_TENSOR_ARRAY
)
{
boost
::
get
<
framework
::
Variable
*>
(
out_var_ptrs
[
i
]);
// not sure how to set the dim of LOD_TENSOR_ARRAY
names_to_set
.
push_back
(
pg_ig_names
[
i
]);
auto
type
=
framework
::
ToVarType
(
in_var
->
Type
());
dims_to_set
.
push_back
(
dims
);
if
(
type
==
framework
::
proto
::
VarType
::
LOD_TENSOR
)
{
out_var
->
GetMutable
<
LoDTensor
>
()
->
Resize
(
in_var
->
Get
<
framework
::
LoDTensor
>
().
dims
());
}
else
if
(
type
==
framework
::
proto
::
VarType
::
SELECTED_ROWS
)
{
out_var
->
GetMutable
<
framework
::
SelectedRows
>
()
->
set_height
(
in_var
->
Get
<
framework
::
SelectedRows
>
().
GetCompleteDims
()[
0
]);
}
else
if
(
type
==
framework
::
proto
::
VarType
::
LOD_TENSOR_ARRAY
)
{
PADDLE_THROW
(
"WhileGradOp doesn't support type %d"
,
static_cast
<
int
>
(
type
));
}
}
else
{
framework
::
VarDesc
*
in_var
=
boost
::
get
<
framework
::
VarDesc
*>
(
in_var_ptrs
[
i
]);
boost
::
get
<
framework
::
VarDesc
*>
(
out_var_ptrs
[
i
])
->
SetShape
(
in_var
->
GetShape
());
}
}
}
}
ctx
->
SetDims
(
names_to_set
,
dims_to_set
);
}
}
};
};
...
...
paddle/fluid/operators/conv_mkldnn_op.cc
浏览文件 @
f5532877
...
@@ -155,11 +155,14 @@ class ConvMKLDNNOpKernel : public paddle::framework::OpKernel<T> {
...
@@ -155,11 +155,14 @@ class ConvMKLDNNOpKernel : public paddle::framework::OpKernel<T> {
auto
chosen_memory_format
=
auto
chosen_memory_format
=
platform
::
data_format_to_memory_format
(
data_format
);
platform
::
data_format_to_memory_format
(
data_format
);
if
(
is_conv3d
)
{
weights_format
=
mkldnn
::
memory
::
format
::
any
;
chosen_memory_format
=
// Check the format for user's special output
platform
::
MKLDNNFormatForSize
(
src_tz
.
size
(),
chosen_memory_format
);
if
(
chosen_memory_format
!=
mkldnn
::
memory
::
format
::
any
)
{
if
(
is_conv3d
)
{
chosen_memory_format
=
platform
::
MKLDNNFormatForSize
(
src_tz
.
size
(),
chosen_memory_format
);
}
}
}
weights_format
=
GetWeightsFormat
(
chosen_memory_format
,
g
,
is_conv3d
);
auto
src_md
=
platform
::
MKLDNNMemDesc
(
auto
src_md
=
platform
::
MKLDNNMemDesc
(
src_tz
,
platform
::
MKLDNNGetDataType
<
T
>
(),
chosen_memory_format
);
src_tz
,
platform
::
MKLDNNGetDataType
<
T
>
(),
chosen_memory_format
);
...
@@ -435,11 +438,14 @@ class ConvMKLDNNGradOpKernel : public paddle::framework::OpKernel<T> {
...
@@ -435,11 +438,14 @@ class ConvMKLDNNGradOpKernel : public paddle::framework::OpKernel<T> {
auto
chosen_memory_format
=
auto
chosen_memory_format
=
platform
::
data_format_to_memory_format
(
data_format
);
platform
::
data_format_to_memory_format
(
data_format
);
if
(
is_conv3d
)
{
weights_format
=
mkldnn
::
memory
::
format
::
any
;
chosen_memory_format
=
// Check the format for user's special output
platform
::
MKLDNNFormatForSize
(
src_tz
.
size
(),
chosen_memory_format
);
if
(
chosen_memory_format
!=
mkldnn
::
memory
::
format
::
any
)
{
if
(
is_conv3d
)
{
chosen_memory_format
=
platform
::
MKLDNNFormatForSize
(
src_tz
.
size
(),
chosen_memory_format
);
}
}
}
weights_format
=
GetWeightsFormat
(
chosen_memory_format
,
g
,
is_conv3d
);
auto
src_md
=
platform
::
MKLDNNMemDesc
(
auto
src_md
=
platform
::
MKLDNNMemDesc
(
src_tz
,
platform
::
MKLDNNGetDataType
<
T
>
(),
chosen_memory_format
);
src_tz
,
platform
::
MKLDNNGetDataType
<
T
>
(),
chosen_memory_format
);
...
...
paddle/fluid/operators/distributed/grpc_client.cc
浏览文件 @
f5532877
...
@@ -12,6 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
...
@@ -12,6 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
See the License for the specific language governing permissions and
limitations under the License. */
limitations under the License. */
#include <stdlib.h>
#include <limits>
#include <limits>
#include "glog/logging.h" // For VLOG
#include "glog/logging.h" // For VLOG
...
@@ -420,7 +421,15 @@ void GRPCClient::Proceed() {
...
@@ -420,7 +421,15 @@ void GRPCClient::Proceed() {
sync_cond_
.
notify_all
();
sync_cond_
.
notify_all
();
}
}
}
}
VLOG
(
3
)
<<
"GRPCClient Proceed end"
;
// Last log message
// Avoid using VLOG() and LOG(): in the destructor of google::LogMessage() a
// static Mutex log_mutex is used for synchronization, which might have been
// destructed at this moment.
if
(
FLAGS_v
>=
3
)
{
std
::
string
msg
(
"GRPCClient Proceed end"
);
fwrite
(
msg
.
c_str
(),
msg
.
length
(),
1
,
stdout
);
}
}
}
std
::
shared_ptr
<
grpc
::
Channel
>
GRPCClient
::
GetChannel
(
const
std
::
string
&
ep
)
{
std
::
shared_ptr
<
grpc
::
Channel
>
GRPCClient
::
GetChannel
(
const
std
::
string
&
ep
)
{
...
...
paddle/fluid/operators/elementwise/elementwise_div_op.cu
浏览文件 @
f5532877
...
@@ -12,18 +12,23 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
...
@@ -12,18 +12,23 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
See the License for the specific language governing permissions and
limitations under the License. */
limitations under the License. */
#include "paddle/fluid/operators/elementwise/elementwise_div_op.h"
#include "paddle/fluid/operators/elementwise/elementwise_div_op.h"
#include "paddle/fluid/platform/float16.h"
namespace
ops
=
paddle
::
operators
;
namespace
ops
=
paddle
::
operators
;
REGISTER_OP_CUDA_KERNEL
(
REGISTER_OP_CUDA_KERNEL
(
elementwise_div
,
elementwise_div
,
ops
::
ElementwiseDivKernel
<
paddle
::
platform
::
CUDADeviceContext
,
float
>
,
ops
::
ElementwiseDivKernel
<
paddle
::
platform
::
CUDADeviceContext
,
float
>
,
ops
::
ElementwiseDivKernel
<
paddle
::
platform
::
CUDADeviceContext
,
paddle
::
platform
::
float16
>
,
ops
::
ElementwiseDivKernel
<
paddle
::
platform
::
CUDADeviceContext
,
double
>
,
ops
::
ElementwiseDivKernel
<
paddle
::
platform
::
CUDADeviceContext
,
double
>
,
ops
::
ElementwiseDivKernel
<
paddle
::
platform
::
CUDADeviceContext
,
int
>
,
ops
::
ElementwiseDivKernel
<
paddle
::
platform
::
CUDADeviceContext
,
int
>
,
ops
::
ElementwiseDivKernel
<
paddle
::
platform
::
CUDADeviceContext
,
int64_t
>
);
ops
::
ElementwiseDivKernel
<
paddle
::
platform
::
CUDADeviceContext
,
int64_t
>
);
REGISTER_OP_CUDA_KERNEL
(
REGISTER_OP_CUDA_KERNEL
(
elementwise_div_grad
,
elementwise_div_grad
,
ops
::
ElementwiseDivGradKernel
<
paddle
::
platform
::
CUDADeviceContext
,
float
>
,
ops
::
ElementwiseDivGradKernel
<
paddle
::
platform
::
CUDADeviceContext
,
float
>
,
ops
::
ElementwiseDivGradKernel
<
paddle
::
platform
::
CUDADeviceContext
,
paddle
::
platform
::
float16
>
,
ops
::
ElementwiseDivGradKernel
<
paddle
::
platform
::
CUDADeviceContext
,
double
>
,
ops
::
ElementwiseDivGradKernel
<
paddle
::
platform
::
CUDADeviceContext
,
double
>
,
ops
::
ElementwiseDivGradKernel
<
paddle
::
platform
::
CUDADeviceContext
,
int
>
,
ops
::
ElementwiseDivGradKernel
<
paddle
::
platform
::
CUDADeviceContext
,
int
>
,
ops
::
ElementwiseDivGradKernel
<
paddle
::
platform
::
CUDADeviceContext
,
ops
::
ElementwiseDivGradKernel
<
paddle
::
platform
::
CUDADeviceContext
,
...
...
paddle/fluid/operators/elementwise/elementwise_mul_op.cu
浏览文件 @
f5532877
...
@@ -12,19 +12,21 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
...
@@ -12,19 +12,21 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
See the License for the specific language governing permissions and
limitations under the License. */
limitations under the License. */
#include "paddle/fluid/operators/elementwise/elementwise_mul_op.h"
#include "paddle/fluid/operators/elementwise/elementwise_mul_op.h"
#include "paddle/fluid/platform/float16.h"
namespace
ops
=
paddle
::
operators
;
namespace
ops
=
paddle
::
operators
;
namespace
plat
=
paddle
::
platform
;
REGISTER_OP_CUDA_KERNEL
(
REGISTER_OP_CUDA_KERNEL
(
elementwise_mul
,
elementwise_mul
,
ops
::
ElementwiseMulKernel
<
plat
::
CUDADeviceContext
,
float
>
,
ops
::
ElementwiseMulKernel
<
p
addle
::
platform
::
CUDADeviceContext
,
float
>
,
ops
::
ElementwiseMulKernel
<
p
lat
::
CUDADeviceContext
,
double
>
,
ops
::
ElementwiseMulKernel
<
p
addle
::
platform
::
CUDADeviceContext
,
double
>
,
ops
::
ElementwiseMulKernel
<
p
lat
::
CUDADeviceContext
,
int
>
,
ops
::
ElementwiseMulKernel
<
p
addle
::
platform
::
CUDADeviceContext
,
in
t
>
,
ops
::
ElementwiseMulKernel
<
p
lat
::
CUDADeviceContext
,
int64_
t
>
,
ops
::
ElementwiseMulKernel
<
p
addle
::
platform
::
CUDADeviceContext
,
int64_t
>
);
ops
::
ElementwiseMulKernel
<
p
lat
::
CUDADeviceContext
,
plat
::
float16
>
);
REGISTER_OP_CUDA_KERNEL
(
REGISTER_OP_CUDA_KERNEL
(
elementwise_mul_grad
,
elementwise_mul_grad
,
ops
::
ElementwiseMulGradKernel
<
p
addle
::
platform
::
CUDADeviceContext
,
float
>
,
ops
::
ElementwiseMulGradKernel
<
p
lat
::
CUDADeviceContext
,
float
>
,
ops
::
ElementwiseMulGradKernel
<
p
addle
::
platform
::
CUDADeviceContext
,
double
>
,
ops
::
ElementwiseMulGradKernel
<
p
lat
::
CUDADeviceContext
,
double
>
,
ops
::
ElementwiseMulGradKernel
<
p
addle
::
platform
::
CUDADeviceContext
,
int
>
,
ops
::
ElementwiseMulGradKernel
<
p
lat
::
CUDADeviceContext
,
int
>
,
ops
::
ElementwiseMulGradKernel
<
p
addle
::
platform
::
CUDADeviceContext
,
ops
::
ElementwiseMulGradKernel
<
p
lat
::
CUDADeviceContext
,
int64_t
>
,
int64_t
>
);
ops
::
ElementwiseMulGradKernel
<
plat
::
CUDADeviceContext
,
plat
::
float16
>
);
paddle/fluid/operators/fill_zeros_like_op.cu.cc
浏览文件 @
f5532877
...
@@ -14,6 +14,7 @@ limitations under the License. */
...
@@ -14,6 +14,7 @@ limitations under the License. */
#include "paddle/fluid/operators/fill_zeros_like_op.h"
#include "paddle/fluid/operators/fill_zeros_like_op.h"
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/platform/float16.h"
namespace
ops
=
paddle
::
operators
;
namespace
ops
=
paddle
::
operators
;
REGISTER_OP_CUDA_KERNEL
(
REGISTER_OP_CUDA_KERNEL
(
...
@@ -22,4 +23,6 @@ REGISTER_OP_CUDA_KERNEL(
...
@@ -22,4 +23,6 @@ REGISTER_OP_CUDA_KERNEL(
ops
::
FillZerosLikeKernel
<
paddle
::
platform
::
CUDADeviceContext
,
int64_t
>
,
ops
::
FillZerosLikeKernel
<
paddle
::
platform
::
CUDADeviceContext
,
int64_t
>
,
ops
::
FillZerosLikeKernel
<
paddle
::
platform
::
CUDADeviceContext
,
float
>
,
ops
::
FillZerosLikeKernel
<
paddle
::
platform
::
CUDADeviceContext
,
float
>
,
ops
::
FillZerosLikeKernel
<
paddle
::
platform
::
CUDADeviceContext
,
double
>
,
ops
::
FillZerosLikeKernel
<
paddle
::
platform
::
CUDADeviceContext
,
double
>
,
ops
::
FillZerosLikeKernel
<
paddle
::
platform
::
CUDADeviceContext
,
paddle
::
platform
::
float16
>
,
ops
::
FillZerosLikeKernel
<
paddle
::
platform
::
CUDADeviceContext
,
bool
>
);
ops
::
FillZerosLikeKernel
<
paddle
::
platform
::
CUDADeviceContext
,
bool
>
);
paddle/fluid/operators/metrics/accuracy_op.cu
浏览文件 @
f5532877
...
@@ -16,6 +16,7 @@ limitations under the License. */
...
@@ -16,6 +16,7 @@ limitations under the License. */
#include <thrust/reduce.h>
#include <thrust/reduce.h>
#include "paddle/fluid/operators/metrics/accuracy_op.h"
#include "paddle/fluid/operators/metrics/accuracy_op.h"
#include "paddle/fluid/platform/cuda_primitives.h"
#include "paddle/fluid/platform/cuda_primitives.h"
#include "paddle/fluid/platform/float16.h"
#include "paddle/fluid/platform/gpu_info.h"
#include "paddle/fluid/platform/gpu_info.h"
namespace
paddle
{
namespace
paddle
{
...
@@ -94,6 +95,7 @@ class AccuracyOpCUDAKernel : public framework::OpKernel<T> {
...
@@ -94,6 +95,7 @@ class AccuracyOpCUDAKernel : public framework::OpKernel<T> {
// FIXME(typhoonzero): types of T is for inference data.
// FIXME(typhoonzero): types of T is for inference data.
// label data is always int64
// label data is always int64
REGISTER_OP_CUDA_KERNEL
(
accuracy
,
REGISTER_OP_CUDA_KERNEL
(
paddle
::
operators
::
AccuracyOpCUDAKernel
<
float
>
,
accuracy
,
paddle
::
operators
::
AccuracyOpCUDAKernel
<
float
>
,
paddle
::
operators
::
AccuracyOpCUDAKernel
<
double
>
);
paddle
::
operators
::
AccuracyOpCUDAKernel
<
double
>
,
paddle
::
operators
::
AccuracyOpCUDAKernel
<
paddle
::
platform
::
float16
>
);
paddle/fluid/operators/optimizers/momentum_op.cu
浏览文件 @
f5532877
...
@@ -14,8 +14,11 @@ limitations under the License. */
...
@@ -14,8 +14,11 @@ limitations under the License. */
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/operators/optimizers/momentum_op.h"
#include "paddle/fluid/operators/optimizers/momentum_op.h"
#include "paddle/fluid/platform/float16.h"
namespace
ops
=
paddle
::
operators
;
namespace
ops
=
paddle
::
operators
;
REGISTER_OP_CUDA_KERNEL
(
REGISTER_OP_CUDA_KERNEL
(
momentum
,
ops
::
MomentumOpKernel
<
paddle
::
platform
::
CUDADeviceContext
,
float
>
,
momentum
,
ops
::
MomentumOpKernel
<
paddle
::
platform
::
CUDADeviceContext
,
float
>
,
ops
::
MomentumOpKernel
<
paddle
::
platform
::
CUDADeviceContext
,
double
>
);
ops
::
MomentumOpKernel
<
paddle
::
platform
::
CUDADeviceContext
,
double
>
,
ops
::
MomentumOpKernel
<
paddle
::
platform
::
CUDADeviceContext
,
paddle
::
platform
::
float16
>
);
paddle/fluid/operators/optimizers/momentum_op.h
浏览文件 @
f5532877
...
@@ -237,7 +237,8 @@ class SparseMomentumFunctor<T, UseNesterov> {
...
@@ -237,7 +237,8 @@ class SparseMomentumFunctor<T, UseNesterov> {
inline
HOSTDEVICE
void
operator
()(
size_t
i
)
{
inline
HOSTDEVICE
void
operator
()(
size_t
i
)
{
auto
row_idx
=
auto
row_idx
=
math
::
BinarySearch
<
int64_t
>
(
rows_
,
row_height_
,
i
/
row_numel_
);
math
::
BinarySearch
<
int64_t
>
(
rows_
,
row_height_
,
i
/
row_numel_
);
T
g
=
row_idx
>=
0
?
g_
[
row_idx
*
row_numel_
+
i
%
row_numel_
]
:
0
;
T
g
=
row_idx
>=
0
?
g_
[
row_idx
*
row_numel_
+
i
%
row_numel_
]
:
static_cast
<
T
>
(
0
);
// put memory access in register
// put memory access in register
const
T
p
=
p_
[
i
];
const
T
p
=
p_
[
i
];
const
T
lr
=
lr_
[
0
];
const
T
lr
=
lr_
[
0
];
...
@@ -282,7 +283,8 @@ class SparseMomentumFunctor<T, NoNesterov> {
...
@@ -282,7 +283,8 @@ class SparseMomentumFunctor<T, NoNesterov> {
inline
HOSTDEVICE
void
operator
()(
size_t
i
)
{
inline
HOSTDEVICE
void
operator
()(
size_t
i
)
{
auto
row_idx
=
auto
row_idx
=
math
::
BinarySearch
<
int64_t
>
(
rows_
,
row_height_
,
i
/
row_numel_
);
math
::
BinarySearch
<
int64_t
>
(
rows_
,
row_height_
,
i
/
row_numel_
);
T
g
=
row_idx
>=
0
?
g_
[
row_idx
*
row_numel_
+
i
%
row_numel_
]
:
0
;
T
g
=
row_idx
>=
0
?
g_
[
row_idx
*
row_numel_
+
i
%
row_numel_
]
:
static_cast
<
T
>
(
0
);
// put memory access in register
// put memory access in register
const
T
p
=
p_
[
i
];
const
T
p
=
p_
[
i
];
const
T
lr
=
lr_
[
0
];
const
T
lr
=
lr_
[
0
];
...
...
paddle/fluid/operators/top_k_op.cu
浏览文件 @
f5532877
...
@@ -16,6 +16,7 @@ limitations under the License. */
...
@@ -16,6 +16,7 @@ limitations under the License. */
#include "paddle/fluid/operators/top_k_op.h"
#include "paddle/fluid/operators/top_k_op.h"
#include "paddle/fluid/platform/assert.h"
#include "paddle/fluid/platform/assert.h"
#include "paddle/fluid/platform/cuda_device_function.h"
#include "paddle/fluid/platform/cuda_device_function.h"
#include "paddle/fluid/platform/float16.h"
namespace
paddle
{
namespace
paddle
{
namespace
operators
{
namespace
operators
{
...
@@ -150,7 +151,7 @@ __device__ __forceinline__ void ThreadGetTopK(Pair<T> topk[], int* beam,
...
@@ -150,7 +151,7 @@ __device__ __forceinline__ void ThreadGetTopK(Pair<T> topk[], int* beam,
if
(
k
<
MaxLength
-
(
*
beam
))
{
if
(
k
<
MaxLength
-
(
*
beam
))
{
topk
[
k
]
=
topk
[
k
+
*
beam
];
topk
[
k
]
=
topk
[
k
+
*
beam
];
}
else
{
}
else
{
topk
[
k
].
set
(
-
INFINITY
,
-
1
);
topk
[
k
].
set
(
-
static_cast
<
T
>
(
INFINITY
)
,
-
1
);
}
}
}
}
if
(
!
(
*
is_empty
))
{
if
(
!
(
*
is_empty
))
{
...
@@ -160,7 +161,7 @@ __device__ __forceinline__ void ThreadGetTopK(Pair<T> topk[], int* beam,
...
@@ -160,7 +161,7 @@ __device__ __forceinline__ void ThreadGetTopK(Pair<T> topk[], int* beam,
}
}
*
max
=
topk
[
MaxLength
-
1
];
*
max
=
topk
[
MaxLength
-
1
];
if
((
*
max
).
v
==
-
1
)
*
is_empty
=
true
;
if
((
*
max
).
v
==
-
static_cast
<
T
>
(
1
)
)
*
is_empty
=
true
;
*
beam
=
0
;
*
beam
=
0
;
}
}
}
}
...
@@ -181,7 +182,7 @@ __device__ __forceinline__ void ThreadGetTopK(Pair<T> topk[], int* beam,
...
@@ -181,7 +182,7 @@ __device__ __forceinline__ void ThreadGetTopK(Pair<T> topk[], int* beam,
if
(
k
<
MaxLength
-
*
beam
)
{
if
(
k
<
MaxLength
-
*
beam
)
{
topk
[
k
]
=
topk
[
k
+
*
beam
];
topk
[
k
]
=
topk
[
k
+
*
beam
];
}
else
{
}
else
{
topk
[
k
].
set
(
-
INFINITY
,
-
1
);
topk
[
k
].
set
(
-
static_cast
<
T
>
(
INFINITY
)
,
-
1
);
}
}
}
}
if
(
!
(
*
is_empty
))
{
if
(
!
(
*
is_empty
))
{
...
@@ -278,7 +279,7 @@ __global__ void KeMatrixTopK(T* output, int output_stride, int64_t* indices,
...
@@ -278,7 +279,7 @@ __global__ void KeMatrixTopK(T* output, int output_stride, int64_t* indices,
bool
firststep
=
true
;
bool
firststep
=
true
;
for
(
int
j
=
0
;
j
<
MaxLength
;
j
++
)
{
for
(
int
j
=
0
;
j
<
MaxLength
;
j
++
)
{
topk
[
j
].
set
(
-
INFINITY
,
-
1
);
topk
[
j
].
set
(
-
static_cast
<
T
>
(
INFINITY
)
,
-
1
);
}
}
while
(
top_num
)
{
while
(
top_num
)
{
ThreadGetTopK
<
T
,
MaxLength
,
BlockSize
>
(
ThreadGetTopK
<
T
,
MaxLength
,
BlockSize
>
(
...
@@ -362,5 +363,7 @@ class TopkOpCUDAKernel : public framework::OpKernel<T> {
...
@@ -362,5 +363,7 @@ class TopkOpCUDAKernel : public framework::OpKernel<T> {
}
// namespace operators
}
// namespace operators
}
// namespace paddle
}
// namespace paddle
REGISTER_OP_CUDA_KERNEL
(
top_k
,
paddle
::
operators
::
TopkOpCUDAKernel
<
float
>
,
REGISTER_OP_CUDA_KERNEL
(
paddle
::
operators
::
TopkOpCUDAKernel
<
double
>
);
top_k
,
paddle
::
operators
::
TopkOpCUDAKernel
<
float
>
,
paddle
::
operators
::
TopkOpCUDAKernel
<
double
>
,
paddle
::
operators
::
TopkOpCUDAKernel
<
paddle
::
platform
::
float16
>
);
paddle/fluid/platform/nccl_helper.h
浏览文件 @
f5532877
...
@@ -23,6 +23,7 @@
...
@@ -23,6 +23,7 @@
#include "paddle/fluid/framework/data_type.h"
#include "paddle/fluid/framework/data_type.h"
#include "paddle/fluid/platform/dynload/nccl.h"
#include "paddle/fluid/platform/dynload/nccl.h"
#include "paddle/fluid/platform/enforce.h"
#include "paddle/fluid/platform/enforce.h"
#include "paddle/fluid/platform/float16.h"
#define NCCL_ID_VARNAME "NCCLID"
#define NCCL_ID_VARNAME "NCCLID"
...
@@ -38,6 +39,8 @@ inline ncclDataType_t ToNCCLDataType(framework::proto::VarType::Type type) {
...
@@ -38,6 +39,8 @@ inline ncclDataType_t ToNCCLDataType(framework::proto::VarType::Type type) {
return
ncclInt
;
return
ncclInt
;
}
else
if
(
type
==
framework
::
proto
::
VarType
::
INT64
)
{
}
else
if
(
type
==
framework
::
proto
::
VarType
::
INT64
)
{
return
ncclInt64
;
return
ncclInt64
;
}
else
if
(
type
==
framework
::
proto
::
VarType
::
FP16
)
{
return
ncclFloat16
;
}
else
{
}
else
{
PADDLE_THROW
(
"Not supported"
);
PADDLE_THROW
(
"Not supported"
);
}
}
...
...
paddle/fluid/pybind/pybind.cc
浏览文件 @
f5532877
...
@@ -977,7 +977,6 @@ All parameter, weight, gradient are variables in Paddle.
...
@@ -977,7 +977,6 @@ All parameter, weight, gradient are variables in Paddle.
cannot be updated after being finalized.)DOC"
);
cannot be updated after being finalized.)DOC"
);
pe
.
def
(
py
::
init
<
const
std
::
vector
<
platform
::
Place
>
&
,
pe
.
def
(
py
::
init
<
const
std
::
vector
<
platform
::
Place
>
&
,
const
std
::
unordered_set
<
std
::
string
>
&
,
const
std
::
unordered_set
<
std
::
string
>
&
,
const
ProgramDesc
&
,
const
std
::
unordered_set
<
std
::
string
>
&
,
const
ProgramDesc
&
,
const
std
::
string
&
,
Scope
*
,
std
::
vector
<
Scope
*>
&
,
const
std
::
string
&
,
Scope
*
,
std
::
vector
<
Scope
*>
&
,
const
ExecutionStrategy
&
,
const
BuildStrategy
&
,
size_t
,
const
ExecutionStrategy
&
,
const
BuildStrategy
&
,
size_t
,
...
...
python/paddle/fluid/contrib/__init__.py
浏览文件 @
f5532877
...
@@ -22,9 +22,12 @@ from . import op_frequence
...
@@ -22,9 +22,12 @@ from . import op_frequence
from
.op_frequence
import
*
from
.op_frequence
import
*
from
.
import
quantize
from
.
import
quantize
from
.quantize
import
*
from
.quantize
import
*
from
.
import
utils
from
.utils
import
*
__all__
=
[]
__all__
=
[]
__all__
+=
decoder
.
__all__
__all__
+=
decoder
.
__all__
__all__
+=
memory_usage_calc
.
__all__
__all__
+=
memory_usage_calc
.
__all__
__all__
+=
op_frequence
.
__all__
__all__
+=
op_frequence
.
__all__
__all__
+=
quantize
.
__all__
__all__
+=
quantize
.
__all__
__all__
+=
utils
.
__all__
python/paddle/fluid/contrib/utils/__init__.py
浏览文件 @
f5532877
...
@@ -13,10 +13,11 @@
...
@@ -13,10 +13,11 @@
# limitations under the License.
# limitations under the License.
from
__future__
import
print_function
from
__future__
import
print_function
#
from . import lookup_table_utils
from
.
import
lookup_table_utils
#
from .lookup_table_utils import *
from
.lookup_table_utils
import
*
from
.
import
hdfs_utils
from
.
import
hdfs_utils
from
.hdfs_utils
import
*
from
.hdfs_utils
import
*
#__all__ = lookup_table_utils.__all__
__all__
=
[]
__all__
=
hdfs_utils
.
__all__
__all__
+=
lookup_table_utils
.
__all__
__all__
+=
hdfs_utils
.
__all__
python/paddle/fluid/contrib/utils/hdfs_utils.py
浏览文件 @
f5532877
...
@@ -14,6 +14,7 @@
...
@@ -14,6 +14,7 @@
"""HDFS Utils"""
"""HDFS Utils"""
import
os
import
os
import
sys
import
subprocess
import
subprocess
import
multiprocessing
import
multiprocessing
from
datetime
import
datetime
from
datetime
import
datetime
...
@@ -24,7 +25,7 @@ import errno
...
@@ -24,7 +25,7 @@ import errno
import
logging
import
logging
__all__
=
[
"HDFSClient"
,
"multi_download"
]
__all__
=
[
"HDFSClient"
,
"multi_download"
,
"multi_upload"
]
logging
.
basicConfig
(
format
=
'%(asctime)s - %(levelname)s - %(message)s'
)
logging
.
basicConfig
(
format
=
'%(asctime)s - %(levelname)s - %(message)s'
)
_logger
=
logging
.
getLogger
(
"hdfs_utils"
)
_logger
=
logging
.
getLogger
(
"hdfs_utils"
)
...
@@ -93,13 +94,15 @@ class HDFSClient(object):
...
@@ -93,13 +94,15 @@ class HDFSClient(object):
def
upload
(
self
,
hdfs_path
,
local_path
,
overwrite
=
False
,
retry_times
=
5
):
def
upload
(
self
,
hdfs_path
,
local_path
,
overwrite
=
False
,
retry_times
=
5
):
"""
"""
upload the local file to hdfs
upload the local file to hdfs
Args:
hdfs_path: hdfs path, target path
Args:
local_path: local file path, source path
hdfs_path(str): the hdfs file path
overwrite: will overwrite the original file
local_path(str): the local file path
retry_times: max times retry to upload
overwrite(bool|None): will overwrite the file on HDFS or not
Returns:
retry_times(int|5): retry times
Returns:
True or False
True or False
"""
"""
assert
hdfs_path
is
not
None
assert
hdfs_path
is
not
None
...
@@ -109,7 +112,7 @@ class HDFSClient(object):
...
@@ -109,7 +112,7 @@ class HDFSClient(object):
_logger
.
warn
(
_logger
.
warn
(
"The Local path: {} is dir and I will support it later, return"
.
"The Local path: {} is dir and I will support it later, return"
.
format
(
local_path
))
format
(
local_path
))
return
return
False
base
=
os
.
path
.
basename
(
local_path
)
base
=
os
.
path
.
basename
(
local_path
)
if
not
self
.
is_exist
(
hdfs_path
):
if
not
self
.
is_exist
(
hdfs_path
):
...
@@ -141,14 +144,16 @@ class HDFSClient(object):
...
@@ -141,14 +144,16 @@ class HDFSClient(object):
def
download
(
self
,
hdfs_path
,
local_path
,
overwrite
=
False
,
unzip
=
False
):
def
download
(
self
,
hdfs_path
,
local_path
,
overwrite
=
False
,
unzip
=
False
):
"""
"""
download from hdfs
download file from HDFS
Args:
hdfs_path: hdfs path, target path
Args:
local_path: local file path, source path
hdfs_path(str): the hdfs file path
overwrite: will remove original file and overwrite it.
local_path(str): the local file path
unzip: ignore this param
overwrite(bool|None): will overwrite the file on HDFS or not
Returns
unzip(bool|False): if the download file is compressed by zip, unzip it or not.
True or False
Returns:
True or False
"""
"""
_logger
.
info
(
'Downloading %r to %r.'
,
hdfs_path
,
local_path
)
_logger
.
info
(
'Downloading %r to %r.'
,
hdfs_path
,
local_path
)
_logger
.
info
(
'Download of %s to %r complete.'
,
hdfs_path
,
local_path
)
_logger
.
info
(
'Download of %s to %r complete.'
,
hdfs_path
,
local_path
)
...
@@ -188,13 +193,13 @@ class HDFSClient(object):
...
@@ -188,13 +193,13 @@ class HDFSClient(object):
def
is_exist
(
self
,
hdfs_path
=
None
):
def
is_exist
(
self
,
hdfs_path
=
None
):
"""
"""
whether the remote hdfs path exists?
whether the remote HDFS path exists
Args:
hdfs_path: default value(${OUTPUT_PATH}/${SYS_USER_ID}/${SYS_JOB_ID}/tmp)
Args:
fs_name: The default values are the same as in the job configuration
hdfs_path(str): the hdfs file path
fs_ugi: The default values are the same as in the job configuration
Returns:
Returns:
True or False
True or False
"""
"""
exist_cmd
=
[
'-test'
,
'-e'
,
hdfs_path
]
exist_cmd
=
[
'-test'
,
'-e'
,
hdfs_path
]
returncode
,
output
,
errors
=
self
.
__run_hdfs_cmd
(
returncode
,
output
,
errors
=
self
.
__run_hdfs_cmd
(
...
@@ -211,13 +216,13 @@ class HDFSClient(object):
...
@@ -211,13 +216,13 @@ class HDFSClient(object):
def
is_dir
(
self
,
hdfs_path
=
None
):
def
is_dir
(
self
,
hdfs_path
=
None
):
"""
"""
whether the remote hdfs path exists?
whether the remote HDFS path is directory
Args:
remote_file_path: default value(${OUTPUT_PATH}/${SYS_USER_ID}/${SYS_JOB_ID}/tmp)
Args:
fs_name: The default values are the same as in the job configuration
hdfs_path(str): the hdfs file path
fs_ugi: The default values are the same as in the job configuration
Returns:
Returns:
True or False
True or False
"""
"""
if
not
self
.
is_exist
(
hdfs_path
):
if
not
self
.
is_exist
(
hdfs_path
):
...
@@ -237,17 +242,17 @@ class HDFSClient(object):
...
@@ -237,17 +242,17 @@ class HDFSClient(object):
def
delete
(
self
,
hdfs_path
):
def
delete
(
self
,
hdfs_path
):
"""
"""
Remove a file or directory from HDFS.
Remove a file or directory from HDFS.
whether the remote HDFS path exists
Args:
Args:
param hdfs_path: HDFS path.
hdfs_path: HDFS path.
param recursive: Recursively delete files and directories. By default,
this method will raise an :class:`HdfsError` if trying to delete a
non-empty directory.
Returns:
Returns:
True or False
This function returns `True` if the deletion was successful and `False` if
This function returns `True` if the deletion was successful and `False` if
no file or directory previously existed at `hdfs_path`.
no file or directory previously existed at `hdfs_path`.
"""
"""
_logger
.
info
(
'Deleting %r.'
,
hdfs_path
)
_logger
.
info
(
'Deleting %r.'
,
hdfs_path
)
...
@@ -273,16 +278,14 @@ class HDFSClient(object):
...
@@ -273,16 +278,14 @@ class HDFSClient(object):
def
rename
(
self
,
hdfs_src_path
,
hdfs_dst_path
,
overwrite
=
False
):
def
rename
(
self
,
hdfs_src_path
,
hdfs_dst_path
,
overwrite
=
False
):
"""
"""
Rename a file or folder.
Move a file or folder on HDFS.
Args:
:param hdfs_src_path: Source path.
Args:
:param hdfs_dst_path: Destination path. If the path already exists and is
hdfs_path(str): HDFS path.
a directory, the source will be moved into it. If the path exists and is
overwrite(bool|False): If the path already exists and overwrite is False, will return False.
a file, or if a parent destination directory is missing, this method will
raise an :class:`HdfsError`.
Returns:
Returns:
This function returns `True` if the rename was successful and `False` if
True or False
rename was faild.
"""
"""
assert
hdfs_src_path
is
not
None
assert
hdfs_src_path
is
not
None
assert
hdfs_dst_path
is
not
None
assert
hdfs_dst_path
is
not
None
...
@@ -320,17 +323,20 @@ class HDFSClient(object):
...
@@ -320,17 +323,20 @@ class HDFSClient(object):
raise
raise
def
makedirs
(
self
,
hdfs_path
):
def
makedirs
(
self
,
hdfs_path
):
"""Create a remote directory, recursively if necessary.
"""
Create a remote directory, recursively if necessary.
Args:
Args:
:param hdfs_path: Remote path. Intermediate directories will be created
hdfs_path(str): Remote path. Intermediate directories will be created appropriately.
appropriately.
Returns:
Returns:
True
if make a directories was successful, False when make a directiries was failed.
True
or False
"""
"""
_logger
.
info
(
'Creating directories to %r.'
,
hdfs_path
)
_logger
.
info
(
'Creating directories to %r.'
,
hdfs_path
)
assert
hdfs_path
is
not
None
assert
hdfs_path
is
not
None
if
self
.
is_exist
(
hdfs_path
):
if
self
.
is_exist
(
hdfs_path
):
_logger
.
error
(
"HDFS path is exist: {}"
.
format
(
hdfs_path
))
return
return
mkdirs_commands
=
[
'-mkdir'
,
hdfs_path
]
mkdirs_commands
=
[
'-mkdir'
,
hdfs_path
]
...
@@ -346,11 +352,13 @@ class HDFSClient(object):
...
@@ -346,11 +352,13 @@ class HDFSClient(object):
def
ls
(
self
,
hdfs_path
):
def
ls
(
self
,
hdfs_path
):
"""
"""
ls a hdfs_path.
ls directory contents about HDFS hdfs_path
Args:
:param hdfs_path: hdfs_path will be ls.
Args:
hdfs_path(str): Remote HDFS path will be ls.
Returns:
Returns:
This function returns a `list` that contaion all files in the hdfs_path.
List: a contents list about hdfs_path.
"""
"""
assert
hdfs_path
is
not
None
assert
hdfs_path
is
not
None
...
@@ -378,11 +386,15 @@ class HDFSClient(object):
...
@@ -378,11 +386,15 @@ class HDFSClient(object):
def
lsr
(
self
,
hdfs_path
,
only_file
=
True
,
sort
=
True
):
def
lsr
(
self
,
hdfs_path
,
only_file
=
True
,
sort
=
True
):
"""
"""
ls a hdfs_path sort by time.
list directory contents about HDFS hdfs_path recursively
Args:
:param hdfs_path: hdfs_path will be ls.
Args:
hdfs_path(str): Remote HDFS path.
only_file(bool|True): will discard folders.
sort(bool|True): will be sorted by create time.
Returns:
Returns:
This function returns a `list` that contaion all files sorted by time in the hdfs_path.
List: a contents list about hdfs_path.
"""
"""
def
sort_by_time
(
v1
,
v2
):
def
sort_by_time
(
v1
,
v2
):
...
@@ -422,21 +434,106 @@ class HDFSClient(object):
...
@@ -422,21 +434,106 @@ class HDFSClient(object):
return
ret_lines
return
ret_lines
def
multi_download
(
client
,
hdfs_path
,
local_path
,
trainer_id
,
trainers
,
multi_processes
=
5
):
"""
Download files from HDFS using multi process.
Args:
client(HDFSClient): instance of HDFSClient
hdfs_path(str): path on hdfs
local_path(str): path on local
trainer_id(int): current trainer id
trainers(int): all trainers number
multi_processes(int|5): the download data process at the same time, default=5
Returns:
List:
Download files in local folder.
"""
def
__subprocess_download
(
datas
):
for
data
in
datas
:
re_path
=
os
.
path
.
relpath
(
os
.
path
.
dirname
(
data
),
hdfs_path
)
if
re_path
==
os
.
curdir
:
sub_local_re_path
=
local_path
else
:
sub_local_re_path
=
os
.
path
.
join
(
local_path
,
re_path
)
client
.
download
(
data
,
sub_local_re_path
)
assert
isinstance
(
client
,
HDFSClient
)
client
.
make_local_dirs
(
local_path
)
_logger
.
info
(
"Make local dir {} successfully"
.
format
(
local_path
))
all_need_download
=
client
.
lsr
(
hdfs_path
,
sort
=
True
)
need_download
=
all_need_download
[
trainer_id
::
trainers
]
_logger
.
info
(
"Get {} files From all {} files need to be download from {}"
.
format
(
len
(
need_download
),
len
(
all_need_download
),
hdfs_path
))
_logger
.
info
(
"Start {} multi process to download datas"
.
format
(
multi_processes
))
procs
=
[]
for
i
in
range
(
multi_processes
):
process_datas
=
need_download
[
i
::
multi_processes
]
p
=
multiprocessing
.
Process
(
target
=
__subprocess_download
,
args
=
(
process_datas
,
))
procs
.
append
(
p
)
p
.
start
()
# complete the processes
for
proc
in
procs
:
proc
.
join
()
_logger
.
info
(
"Finish {} multi process to download datas"
.
format
(
multi_processes
))
local_downloads
=
[]
for
data
in
need_download
:
data_name
=
os
.
path
.
basename
(
data
)
re_path
=
os
.
path
.
relpath
(
os
.
path
.
dirname
(
data
),
hdfs_path
)
if
re_path
==
os
.
curdir
:
local_re_path
=
os
.
path
.
join
(
local_path
,
data_name
)
else
:
local_re_path
=
os
.
path
.
join
(
local_path
,
re_path
,
data_name
)
local_downloads
.
append
(
local_re_path
)
return
local_downloads
def
getfilelist
(
path
):
rlist
=
[]
for
dir
,
folder
,
file
in
os
.
walk
(
path
):
for
i
in
file
:
t
=
os
.
path
.
join
(
dir
,
i
)
rlist
.
append
(
t
)
for
r
in
rlist
:
print
(
r
)
def
multi_upload
(
client
,
def
multi_upload
(
client
,
hdfs_path
,
hdfs_path
,
local_path
,
local_path
,
multi_processes
=
5
,
multi_processes
=
5
,
overwrite
=
False
):
overwrite
=
False
,
sync
=
True
):
"""
"""
Upload file to hdfs.
Upload files to HDFS using multi process.
Args:
Args:
:param overwrite: will overwrite hdfs file or not
client(HDFSClient): instance of HDFSClient
:param multi_processes: the upload data process at the same time, default=5
hdfs_path(str): path on hdfs
:param client: instance of HDFSClient
local_path(str): path on local
:param hdfs_path: path on hdfs
multi_processes(int|5): the upload data process at the same time, default=5
:param local_path: path on local
overwrite(bool|False): will overwrite file on HDFS or not
sync(bool|True): upload files sync or not.
Returns:
Returns:
None
"""
"""
def
__subprocess_upload
(
datas
):
def
__subprocess_upload
(
datas
):
...
@@ -446,13 +543,6 @@ def multi_upload(client,
...
@@ -446,13 +543,6 @@ def multi_upload(client,
client
.
upload
(
hdfs_re_path
,
data
,
overwrite
,
retry_times
=
5
)
client
.
upload
(
hdfs_re_path
,
data
,
overwrite
,
retry_times
=
5
)
def
get_local_files
(
path
):
def
get_local_files
(
path
):
"""
Get all local files
Args:
path: local file path
Returns:
A list that contation all files in the path.
"""
rlist
=
[]
rlist
=
[]
if
not
os
.
path
.
isdir
(
path
):
if
not
os
.
path
.
isdir
(
path
):
...
@@ -488,71 +578,6 @@ def multi_upload(client,
...
@@ -488,71 +578,6 @@ def multi_upload(client,
multi_processes
))
multi_processes
))
def
multi_download
(
client
,
hdfs_path
,
local_path
,
trainer_id
,
trainers
,
file_cnt
,
multi_processes
=
5
):
"""
multi_download
Args:
:param client: instance of HDFSClient
:param hdfs_path: path on hdfs
:param local_path: path on local
:param trainer_id: current trainer id
:param trainers: all trainers number
:param file_cnt: all file number
:param multi_processes: the download data process at the same time, default=5
:return: None
Returns:
A list that be downloaded.
"""
def
__subprocess_download
(
datas
):
for
data
in
datas
:
re_path
=
os
.
path
.
relpath
(
os
.
path
.
dirname
(
data
),
hdfs_path
)
local_re_path
=
os
.
path
.
join
(
local_path
,
re_path
)
client
.
download
(
data
,
local_re_path
)
assert
isinstance
(
client
,
HDFSClient
)
client
.
make_local_dirs
(
local_path
)
_logger
.
info
(
"Make local dir {} successfully"
.
format
(
local_path
))
all_need_download
=
client
.
lsr
(
hdfs_path
,
sort
=
True
)[:
file_cnt
]
need_download
=
all_need_download
[
trainer_id
::
trainers
]
_logger
.
info
(
"Get {} files From all {} files need to be download from {}"
.
format
(
len
(
need_download
),
len
(
all_need_download
),
hdfs_path
))
_logger
.
info
(
"Start {} multi process to download datas"
.
format
(
multi_processes
))
procs
=
[]
for
i
in
range
(
multi_processes
):
process_datas
=
need_download
[
i
::
multi_processes
]
p
=
multiprocessing
.
Process
(
target
=
__subprocess_download
,
args
=
(
process_datas
,
))
procs
.
append
(
p
)
p
.
start
()
# complete the processes
for
proc
in
procs
:
proc
.
join
()
_logger
.
info
(
"Finish {} multi process to download datas"
.
format
(
multi_processes
))
local_downloads
=
[]
for
data
in
need_download
:
data_name
=
os
.
path
.
basename
(
data
)
re_path
=
os
.
path
.
relpath
(
os
.
path
.
dirname
(
data
),
hdfs_path
)
local_re_path
=
os
.
path
.
join
(
local_path
,
re_path
,
data_name
)
local_downloads
.
append
(
local_re_path
)
return
local_downloads
if
__name__
==
"__main__"
:
if
__name__
==
"__main__"
:
hadoop_home
=
"/home/client/hadoop-client/hadoop/"
hadoop_home
=
"/home/client/hadoop-client/hadoop/"
...
...
python/paddle/fluid/contrib/utils/lookup_table_utils.py
浏览文件 @
f5532877
...
@@ -18,14 +18,12 @@ import os
...
@@ -18,14 +18,12 @@ import os
import
time
import
time
import
logging
import
logging
import
paddle
import
paddle.fluid
as
fluid
from
paddle.fluid
import
core
from
paddle.fluid
import
core
from
paddle.fluid
import
io
from
paddle.fluid
import
io
from
paddle.fluid
import
Program
from
paddle.fluid
import
Program
__all__
=
[
__all__
=
[
"load_
inference_model"
,
"load_persistable_vars
"
,
"load_
persistables_for_increment"
,
"load_persistables_for_inference
"
,
"convert_dist_to_sparse_program"
"convert_dist_to_sparse_program"
]
]
...
@@ -80,19 +78,28 @@ def __get_prefetch_op_tuples(main_program):
...
@@ -80,19 +78,28 @@ def __get_prefetch_op_tuples(main_program):
return
prefetch_op_tuples
return
prefetch_op_tuples
def
convert_dist_to_sparse_program
(
main_program
):
def
convert_dist_to_sparse_program
(
program
):
if
not
main_program
.
_distributed_lookup_table
:
"""
WARNING: this function will only be used for distributed training with distributed lookup table.
when we train model with distributed lookup table but want to do the local inference, we can use
this function to convert the train program with distributed lookup table to sparse lookup table.
:param program(Program): the program must be the trainer program, which will be get by the distribute transpiler.
:return:
program: The `program` is a Program, it's the program replace distributed lookup table to sparse lookup table.
"""
if
not
program
.
_distributed_lookup_table
:
_logger
.
warn
(
_logger
.
warn
(
"There are no distributed lookup tables need to be converted"
)
"There are no distributed lookup tables need to be converted"
)
return
return
# create table param and grad var in pserver program
# create table param and grad var in pserver program
origin_emb_var
=
"{}.origin"
.
format
(
main_
program
.
_distributed_lookup_table
)
origin_emb_var
=
"{}.origin"
.
format
(
program
.
_distributed_lookup_table
)
emb_var
=
main_
program
.
_distributed_lookup_table
emb_var
=
program
.
_distributed_lookup_table
main_
program
.
global_block
().
_rename_var
(
emb_var
,
origin_emb_var
)
program
.
global_block
().
_rename_var
(
emb_var
,
origin_emb_var
)
origin_param_var
=
main_
program
.
global_block
().
vars
[
origin_emb_var
]
origin_param_var
=
program
.
global_block
().
vars
[
origin_emb_var
]
param_var
=
main_
program
.
global_block
().
create_var
(
param_var
=
program
.
global_block
().
create_var
(
name
=
emb_var
,
name
=
emb_var
,
shape
=
origin_param_var
.
shape
,
shape
=
origin_param_var
.
shape
,
dtype
=
origin_param_var
.
dtype
,
dtype
=
origin_param_var
.
dtype
,
...
@@ -100,28 +107,28 @@ def convert_dist_to_sparse_program(main_program):
...
@@ -100,28 +107,28 @@ def convert_dist_to_sparse_program(main_program):
persistable
=
True
)
persistable
=
True
)
# parameter must be selected rows
# parameter must be selected rows
param_var
.
desc
.
set_type
(
core
.
VarDesc
.
VarType
.
SELECTED_ROWS
)
param_var
.
desc
.
set_type
(
core
.
VarDesc
.
VarType
.
SELECTED_ROWS
)
main_
program
.
_sync_with_cpp
()
program
.
_sync_with_cpp
()
prefetch_op_tuples
=
__get_prefetch_op_tuples
(
main_
program
)
prefetch_op_tuples
=
__get_prefetch_op_tuples
(
program
)
split_ids_id
=
prefetch_op_tuples
[
0
]
split_ids_id
=
prefetch_op_tuples
[
0
]
for
idx
in
range
(
split_ids_id
+
2
,
split_ids_id
-
1
,
-
1
):
for
idx
in
range
(
split_ids_id
+
2
,
split_ids_id
-
1
,
-
1
):
main_
program
.
global_block
().
_remove_op
(
idx
)
program
.
global_block
().
_remove_op
(
idx
)
main_
program
.
desc
.
flush
()
program
.
desc
.
flush
()
in_out_pairs
=
zip
(
prefetch_op_tuples
[
1
],
prefetch_op_tuples
[
2
])
in_out_pairs
=
zip
(
prefetch_op_tuples
[
1
],
prefetch_op_tuples
[
2
])
for
in_out_pair
in
in_out_pairs
:
for
in_out_pair
in
in_out_pairs
:
idx
=
split_ids_id
idx
=
split_ids_id
ids
=
main_
program
.
global_block
().
vars
[
in_out_pair
[
0
]]
ids
=
program
.
global_block
().
vars
[
in_out_pair
[
0
]]
out
=
main_
program
.
global_block
().
vars
[
in_out_pair
[
1
]]
out
=
program
.
global_block
().
vars
[
in_out_pair
[
1
]]
__insert_lookup_sparse_table_op
(
main_
program
,
idx
,
ids
,
param_var
,
out
)
__insert_lookup_sparse_table_op
(
program
,
idx
,
ids
,
param_var
,
out
)
main_
program
.
desc
.
flush
()
program
.
desc
.
flush
()
return
main_
program
return
program
def
load_persistable_vars
(
executor
,
dirname
,
program
,
lookup_table_var
):
def
_load_persistable_vars
(
executor
,
dirname
,
program
,
lookup_table_vars
):
def
_is_checkpoint_var
(
exclude_fluid_vars
=
None
):
def
_is_checkpoint_var
(
exclude_fluid_vars
=
None
):
"""
"""
the checkpoint will not save or load all the variables.
the checkpoint will not save or load all the variables.
...
@@ -159,8 +166,82 @@ def load_persistable_vars(executor, dirname, program, lookup_table_var):
...
@@ -159,8 +166,82 @@ def load_persistable_vars(executor, dirname, program, lookup_table_var):
return
is_valid
return
is_valid
def
_load_lookup_table_vars
(
executor
,
dirname
,
main_program
,
io
.
load_vars
(
lookup_table_vars
):
executor
,
dirname
=
dirname
,
main_program
=
program
,
predicate
=
_is_checkpoint_var
(
lookup_table_vars
),
filename
=
None
)
def
load_persistables_for_increment
(
dirname
,
executor
,
program
,
lookup_table_var
,
lookup_table_var_path
):
"""
WARNING: this function will only be used for distributed training with distributed lookup table.
for increment trainning, the pserver will not only load dense variables,
but also load the suitable lookup table var. Because of slice lookup table
var with HASH, we must load the correct slice var.
:param dirname(str): The directory path
:param executor(Executor): The executor to run for loading inference model.
:param program(Program): The parameter server program, which will run on Pserver.
:param lookup_table_var: the distributed lookup tables var name.
:param lookup_table_var_path: the the distributed lookup tables var location.
:return: None
"""
def
__load_lookup_table_vars
(
executor
,
main_program
,
lookup_table_var
,
lookup_table_var_path
):
emb_var
=
main_program
.
global_block
().
var
(
lookup_table_var
)
load_program
=
Program
()
load_block
=
load_program
.
global_block
()
load_block
.
append_op
(
type
=
'load'
,
inputs
=
{},
outputs
=
{
'Out'
:
[
emb_var
]},
attrs
=
{
'file_path'
:
lookup_table_var_path
})
executor
.
run
(
load_program
)
if
not
os
.
path
.
isdir
(
dirname
):
raise
ValueError
(
"There is no directory named '%s'"
,
dirname
)
if
not
os
.
path
.
exists
(
lookup_table_var_path
):
raise
ValueError
(
"There is no file named '%s'"
,
lookup_table_var_path
)
if
not
isinstance
(
program
,
Program
):
raise
ValueError
(
"program must be an instance of fluid.Program"
)
_logger
.
info
(
"Start Load Sparse Program With "
"Distributed Lookup Table Vars from {}, time = {}"
.
format
(
dirname
,
time
.
ctime
()))
_load_persistable_vars
(
executor
,
dirname
,
program
,
[
lookup_table_var
])
__load_lookup_table_vars
(
executor
,
program
,
lookup_table_var
,
lookup_table_var_path
)
_logger
.
info
(
"Finish Load Sparse Program With "
"Distributed Lookup Table Vars from {}, time = {}"
.
format
(
dirname
,
time
.
ctime
()))
def
load_persistables_for_inference
(
dirname
,
executor
,
program
,
lookup_table_var_name
):
"""
WARNING: this function will only be used for inference with distributed lookup table.
Inference with distributed lookup table is a little funky, this function will load distributed
lookup table vars into sparse var, can be used in local inference mode.
:param dirname(str): The directory path
:param executor(Executor): The executor to run for loading inference model.
:param program(Program): The parameter server program, which will run on Pserver.
:param lookup_table_var_name: the distributed lookup tables var name.
:return: None
"""
def
__load_lookup_table_vars
(
executor
,
dirname
,
main_program
,
lookup_table_vars
):
if
not
os
.
path
.
isdir
(
dirname
):
if
not
os
.
path
.
isdir
(
dirname
):
raise
ValueError
(
"There is no directory named '%s'"
,
dirname
)
raise
ValueError
(
"There is no directory named '%s'"
,
dirname
)
...
@@ -209,48 +290,34 @@ def load_persistable_vars(executor, dirname, program, lookup_table_var):
...
@@ -209,48 +290,34 @@ def load_persistable_vars(executor, dirname, program, lookup_table_var):
global_block
.
append_op
(
type
=
'delete_var'
,
inputs
=
{
'X'
:
sums
})
global_block
.
append_op
(
type
=
'delete_var'
,
inputs
=
{
'X'
:
sums
})
executor
.
run
(
convert_program
)
executor
.
run
(
convert_program
)
_logger
.
info
(
"Start Load Sparse Program With "
"Distributed Lookup Table Vars from {}, time = {}"
.
format
(
dirname
,
time
.
ctime
()))
lookup_table_vars
=
[
lookup_table_var
]
io
.
load_vars
(
executor
,
dirname
=
dirname
,
main_program
=
program
,
predicate
=
_is_checkpoint_var
(
lookup_table_vars
),
filename
=
None
)
_load_lookup_table_vars
(
executor
,
dirname
,
program
,
lookup_table_vars
)
_logger
.
info
(
"Finish Load Sparse Program With "
"Distributed Lookup Table Vars from {}, time = {}"
.
format
(
dirname
,
time
.
ctime
()))
def
load_inference_model
(
dirname
,
executor
,
lookup_table_var_name
):
if
not
os
.
path
.
isdir
(
dirname
):
if
not
os
.
path
.
isdir
(
dirname
):
raise
ValueError
(
"There is no directory named '%s'"
,
dirname
)
raise
ValueError
(
"There is no directory named '%s'"
,
dirname
)
local_model
=
os
.
path
.
join
(
dirname
,
model_filename
)
if
program
:
if
not
isinstance
(
program
,
Program
):
raise
ValueError
(
"program must be an instance of fluid.Program"
)
else
:
local_model
=
os
.
path
.
join
(
dirname
,
model_filename
)
with
open
(
local_model
,
"rb"
)
as
f
:
with
open
(
local_model
,
"rb"
)
as
f
:
program_desc_str
=
f
.
read
()
program_desc_str
=
f
.
read
()
program
=
Program
.
parse_from_string
(
program_desc_str
)
program
=
Program
.
parse_from_string
(
program_desc_str
)
if
not
core
.
_is_program_version_supported
(
program
.
_version
()):
if
not
core
.
_is_program_version_supported
(
program
.
_version
()):
raise
ValueError
(
"Unsupported program version: %d
\n
"
%
raise
ValueError
(
"Unsupported program version: %d
\n
"
%
program
.
_version
())
program
.
_version
())
# Binary data also need version.
_logger
.
info
(
"Start Load Sparse Program With "
load_persistable_vars
(
executor
,
dirname
,
program
,
lookup_table_var_name
)
"Distributed Lookup Table Vars from {}, time = {}"
.
format
(
dirname
,
time
.
ctime
()))
_load_persistable_vars
(
executor
,
dirname
,
program
,
[
lookup_table_var_name
])
__load_lookup_table_vars
(
executor
,
dirname
,
program
,
[
lookup_table_var_name
])
feed_target_names
=
program
.
desc
.
get_feed_target_names
()
_logger
.
info
(
"Finish Load Sparse Program With "
fetch_target_names
=
program
.
desc
.
get_fetch_target_names
()
"Distributed Lookup Table Vars from {}, time = {}"
.
format
(
fetch_targets
=
[
dirname
,
time
.
ctime
()))
program
.
global_block
().
var
(
name
)
for
name
in
fetch_target_names
]
return
[
program
,
feed_target_names
,
fetch_targets
]
return
program
python/paddle/fluid/data_feeder.py
浏览文件 @
f5532877
...
@@ -44,6 +44,8 @@ class DataToLoDTensorConverter(object):
...
@@ -44,6 +44,8 @@ class DataToLoDTensorConverter(object):
self
.
dtype
=
'int64'
self
.
dtype
=
'int64'
elif
dtype
==
core
.
VarDesc
.
VarType
.
FP64
:
elif
dtype
==
core
.
VarDesc
.
VarType
.
FP64
:
self
.
dtype
=
'float64'
self
.
dtype
=
'float64'
elif
dtype
==
core
.
VarDesc
.
VarType
.
FP16
:
self
.
dtype
=
'float16'
elif
dtype
==
core
.
VarDesc
.
VarType
.
INT32
:
elif
dtype
==
core
.
VarDesc
.
VarType
.
INT32
:
self
.
dtype
=
'int32'
self
.
dtype
=
'int32'
elif
dtype
==
core
.
VarDesc
.
VarType
.
UINT8
:
elif
dtype
==
core
.
VarDesc
.
VarType
.
UINT8
:
...
...
python/paddle/fluid/initializer.py
浏览文件 @
f5532877
...
@@ -18,6 +18,7 @@ from . import framework
...
@@ -18,6 +18,7 @@ from . import framework
import
numpy
as
np
import
numpy
as
np
import
contextlib
import
contextlib
from
.core
import
VarDesc
from
.core
import
VarDesc
from
.
import
unique_name
__all__
=
[
__all__
=
[
'Constant'
,
'Uniform'
,
'Normal'
,
'TruncatedNormal'
,
'Xavier'
,
'Bilinear'
,
'Constant'
,
'Uniform'
,
'Normal'
,
'TruncatedNormal'
,
'Xavier'
,
'Bilinear'
,
...
@@ -207,16 +208,39 @@ class UniformInitializer(Initializer):
...
@@ -207,16 +208,39 @@ class UniformInitializer(Initializer):
# Initialization Ops should be prepended and not appended
# Initialization Ops should be prepended and not appended
if
self
.
_seed
==
0
:
if
self
.
_seed
==
0
:
self
.
_seed
=
block
.
program
.
random_seed
self
.
_seed
=
block
.
program
.
random_seed
# to be compatible of fp16 initalizers
if
var
.
dtype
==
VarDesc
.
VarType
.
FP16
:
out_dtype
=
VarDesc
.
VarType
.
FP32
out_var
=
block
.
create_var
(
name
=
unique_name
.
generate
(
"."
.
join
([
'gaussian_random'
,
'tmp'
])),
shape
=
var
.
shape
,
dtype
=
out_dtype
,
type
=
VarDesc
.
VarType
.
LOD_TENSOR
,
persistable
=
False
)
else
:
out_dtype
=
var
.
dtype
out_var
=
var
op
=
block
.
_prepend_op
(
op
=
block
.
_prepend_op
(
type
=
"uniform_random"
,
type
=
"uniform_random"
,
outputs
=
{
"Out"
:
var
},
outputs
=
{
"Out"
:
out_
var
},
attrs
=
{
attrs
=
{
"shape"
:
var
.
shape
,
"shape"
:
var
.
shape
,
"dtype"
:
int
(
var
.
dtype
)
,
"dtype"
:
out_dtype
,
"min"
:
self
.
_low
,
"min"
:
self
.
_low
,
"max"
:
self
.
_high
,
"max"
:
self
.
_high
,
"seed"
:
self
.
_seed
"seed"
:
self
.
_seed
})
})
if
var
.
dtype
==
VarDesc
.
VarType
.
FP16
:
block
.
append_op
(
type
=
"cast"
,
inputs
=
{
"X"
:
out_var
},
outputs
=
{
"Out"
:
var
},
attrs
=
{
"in_dtype"
:
out_var
.
dtype
,
"out_dtype"
:
var
.
dtype
})
var
.
op
=
op
var
.
op
=
op
return
op
return
op
...
@@ -261,17 +285,39 @@ class NormalInitializer(Initializer):
...
@@ -261,17 +285,39 @@ class NormalInitializer(Initializer):
# Initialization Ops should be prepended and not appended
# Initialization Ops should be prepended and not appended
if
self
.
_seed
==
0
:
if
self
.
_seed
==
0
:
self
.
_seed
=
block
.
program
.
random_seed
self
.
_seed
=
block
.
program
.
random_seed
# to be compatible of fp16 initalizers
if
var
.
dtype
==
VarDesc
.
VarType
.
FP16
:
out_dtype
=
VarDesc
.
VarType
.
FP32
out_var
=
block
.
create_var
(
name
=
unique_name
.
generate
(
"."
.
join
([
'gaussian_random'
,
'tmp'
])),
shape
=
var
.
shape
,
dtype
=
out_dtype
,
type
=
VarDesc
.
VarType
.
LOD_TENSOR
,
persistable
=
False
)
else
:
out_dtype
=
var
.
dtype
out_var
=
var
op
=
block
.
_prepend_op
(
op
=
block
.
_prepend_op
(
type
=
"gaussian_random"
,
type
=
"gaussian_random"
,
outputs
=
{
"Out"
:
var
},
outputs
=
{
"Out"
:
out_
var
},
attrs
=
{
attrs
=
{
"shape"
:
var
.
shape
,
"shape"
:
var
.
shape
,
"dtype"
:
int
(
var
.
dtype
)
,
"dtype"
:
out_dtype
,
"mean"
:
self
.
_mean
,
"mean"
:
self
.
_mean
,
"std"
:
self
.
_std_dev
,
"std"
:
self
.
_std_dev
,
"seed"
:
self
.
_seed
,
"seed"
:
self
.
_seed
,
"use_mkldnn"
:
False
"use_mkldnn"
:
False
})
})
if
var
.
dtype
==
VarDesc
.
VarType
.
FP16
:
block
.
append_op
(
type
=
"cast"
,
inputs
=
{
"X"
:
out_var
},
outputs
=
{
"Out"
:
var
},
attrs
=
{
"in_dtype"
:
out_var
.
dtype
,
"out_dtype"
:
var
.
dtype
})
var
.
op
=
op
var
.
op
=
op
return
op
return
op
...
...
python/paddle/fluid/layers/learning_rate_scheduler.py
浏览文件 @
f5532877
...
@@ -63,14 +63,18 @@ def noam_decay(d_model, warmup_steps):
...
@@ -63,14 +63,18 @@ def noam_decay(d_model, warmup_steps):
Returns:
Returns:
The decayed learning rate.
The decayed learning rate.
"""
"""
with
default_main_program
().
_lr_schedule_guard
():
global_step
=
_decay_step_counter
(
1
)
a
=
global_step
**-
0.5
def
_lr_schedule
(
dtype
):
b
=
(
warmup_steps
**-
1.5
)
*
global_step
with
default_main_program
().
_lr_schedule_guard
():
lr_value
=
(
d_model
**-
0.5
)
*
nn
.
elementwise_min
(
a
,
b
)
global_step
=
_decay_step_counter
(
1
)
return
lr_value
a
=
global_step
**-
0.5
b
=
(
warmup_steps
**-
1.5
)
*
global_step
lr_value
=
(
d_model
**-
0.5
)
*
nn
.
elementwise_min
(
a
,
b
)
return
lr_value
return
_lr_schedule
def
exponential_decay
(
learning_rate
,
decay_steps
,
decay_rate
,
staircase
=
False
):
def
exponential_decay
(
learning_rate
,
decay_steps
,
decay_rate
,
staircase
=
False
):
...
@@ -109,15 +113,19 @@ def exponential_decay(learning_rate, decay_steps, decay_rate, staircase=False):
...
@@ -109,15 +113,19 @@ def exponential_decay(learning_rate, decay_steps, decay_rate, staircase=False):
sgd_optimizer.minimize(avg_cost)
sgd_optimizer.minimize(avg_cost)
"""
"""
with
default_main_program
().
_lr_schedule_guard
():
global_step
=
_decay_step_counter
()
div_res
=
global_step
/
decay_steps
def
_lr_schedule
(
dtype
):
if
staircase
:
with
default_main_program
().
_lr_schedule_guard
():
div_res
=
ops
.
floor
(
div_res
)
global_step
=
_decay_step_counter
()
decayed_lr
=
learning_rate
*
(
decay_rate
**
div_res
)
div_res
=
global_step
/
decay_steps
if
staircase
:
div_res
=
ops
.
floor
(
div_res
)
decayed_lr
=
learning_rate
*
(
decay_rate
**
div_res
)
return
decayed_lr
return
decayed_lr
return
_lr_schedule
def
natural_exp_decay
(
learning_rate
,
decay_steps
,
decay_rate
,
staircase
=
False
):
def
natural_exp_decay
(
learning_rate
,
decay_steps
,
decay_rate
,
staircase
=
False
):
...
@@ -138,15 +146,19 @@ def natural_exp_decay(learning_rate, decay_steps, decay_rate, staircase=False):
...
@@ -138,15 +146,19 @@ def natural_exp_decay(learning_rate, decay_steps, decay_rate, staircase=False):
Returns:
Returns:
The decayed learning rate
The decayed learning rate
"""
"""
with
default_main_program
().
_lr_schedule_guard
():
global_step
=
_decay_step_counter
()
div_res
=
global_step
/
decay_steps
def
_lr_schedule
(
dtype
):
if
staircase
:
with
default_main_program
().
_lr_schedule_guard
():
div_res
=
ops
.
floor
(
div_res
)
global_step
=
_decay_step_counter
()
decayed_lr
=
learning_rate
*
ops
.
exp
(
-
1
*
decay_rate
*
div_res
)
div_res
=
global_step
/
decay_steps
if
staircase
:
div_res
=
ops
.
floor
(
div_res
)
decayed_lr
=
learning_rate
*
ops
.
exp
(
-
1
*
decay_rate
*
div_res
)
return
decayed_lr
return
decayed_lr
return
_lr_schedule
def
inverse_time_decay
(
learning_rate
,
decay_steps
,
decay_rate
,
staircase
=
False
):
def
inverse_time_decay
(
learning_rate
,
decay_steps
,
decay_rate
,
staircase
=
False
):
...
@@ -184,16 +196,20 @@ def inverse_time_decay(learning_rate, decay_steps, decay_rate, staircase=False):
...
@@ -184,16 +196,20 @@ def inverse_time_decay(learning_rate, decay_steps, decay_rate, staircase=False):
staircase=True))
staircase=True))
sgd_optimizer.minimize(avg_cost)
sgd_optimizer.minimize(avg_cost)
"""
"""
with
default_main_program
().
_lr_schedule_guard
():
global_step
=
_decay_step_counter
()
div_res
=
global_step
/
decay_steps
def
_lr_schedule
(
dtype
):
if
staircase
:
with
default_main_program
().
_lr_schedule_guard
()
:
div_res
=
ops
.
floor
(
div_res
)
global_step
=
_decay_step_counter
(
)
decayed_lr
=
learning_rate
/
(
1
+
decay_rate
*
div_res
)
div_res
=
global_step
/
decay_steps
if
staircase
:
div_res
=
ops
.
floor
(
div_res
)
return
decayed_lr
decayed_lr
=
learning_rate
/
(
1
+
decay_rate
*
div_res
)
return
decayed_lr
return
_lr_schedule
def
polynomial_decay
(
learning_rate
,
def
polynomial_decay
(
learning_rate
,
...
@@ -224,28 +240,33 @@ def polynomial_decay(learning_rate,
...
@@ -224,28 +240,33 @@ def polynomial_decay(learning_rate,
Returns:
Returns:
Variable: The decayed learning rate
Variable: The decayed learning rate
"""
"""
with
default_main_program
().
_lr_schedule_guard
():
global_step
=
_decay_step_counter
()
if
cycle
:
def
_lr_schedule
(
dtype
,
decay_steps
=
decay_steps
):
div_res
=
ops
.
ceil
(
global_step
/
decay_steps
)
with
default_main_program
().
_lr_schedule_guard
():
zero_var
=
tensor
.
fill_constant
(
global_step
=
_decay_step_counter
()
shape
=
[
1
],
dtype
=
'float32'
,
value
=
0.0
)
one_var
=
tensor
.
fill_constant
(
if
cycle
:
shape
=
[
1
],
dtype
=
'float32'
,
value
=
1.0
)
div_res
=
ops
.
ceil
(
global_step
/
decay_steps
)
zero_var
=
tensor
.
fill_constant
(
shape
=
[
1
],
dtype
=
dtype
,
value
=
0.0
)
one_var
=
tensor
.
fill_constant
(
shape
=
[
1
],
dtype
=
dtype
,
value
=
1.0
)
with
control_flow
.
Switch
()
as
switch
:
with
switch
.
case
(
global_step
==
zero_var
):
tensor
.
assign
(
input
=
one_var
,
output
=
div_res
)
decay_steps
=
decay_steps
*
div_res
else
:
decay_steps_var
=
tensor
.
fill_constant
(
shape
=
[
1
],
dtype
=
dtype
,
value
=
float
(
decay_steps
))
global_step
=
nn
.
elementwise_min
(
x
=
global_step
,
y
=
decay_steps_var
)
with
control_flow
.
Switch
()
as
switch
:
decayed_lr
=
(
learning_rate
-
end_learning_rate
)
*
\
with
switch
.
case
(
global_step
==
zero_var
):
((
1
-
global_step
/
decay_steps
)
**
power
)
+
end_learning_rate
tensor
.
assign
(
input
=
one_var
,
output
=
div_res
)
return
decayed_lr
decay_steps
=
decay_steps
*
div_res
else
:
decay_steps_var
=
tensor
.
fill_constant
(
shape
=
[
1
],
dtype
=
'float32'
,
value
=
float
(
decay_steps
))
global_step
=
nn
.
elementwise_min
(
x
=
global_step
,
y
=
decay_steps_var
)
decayed_lr
=
(
learning_rate
-
end_learning_rate
)
*
\
return
_lr_schedule
((
1
-
global_step
/
decay_steps
)
**
power
)
+
end_learning_rate
return
decayed_lr
def
piecewise_decay
(
boundaries
,
values
):
def
piecewise_decay
(
boundaries
,
values
):
...
@@ -273,38 +294,42 @@ def piecewise_decay(boundaries, values):
...
@@ -273,38 +294,42 @@ def piecewise_decay(boundaries, values):
"""
"""
with
default_main_program
().
_lr_schedule_guard
():
if
len
(
values
)
-
len
(
boundaries
)
!=
1
:
def
_lr_schedule
(
dtype
):
raise
ValueError
(
"len(values) - len(boundaries) should be 1"
)
with
default_main_program
().
_lr_schedule_guard
():
if
len
(
values
)
-
len
(
boundaries
)
!=
1
:
global_step
=
_decay_step_counter
()
raise
ValueError
(
"len(values) - len(boundaries) should be 1"
)
lr
=
tensor
.
create_global_var
(
global_step
=
_decay_step_counter
()
shape
=
[
1
],
value
=
0.0
,
lr
=
tensor
.
create_global_var
(
dtype
=
'float32'
,
persistable
=
True
,
name
=
"learning_rate"
)
with
control_flow
.
Switch
()
as
switch
:
for
i
in
range
(
len
(
boundaries
)):
boundary_val
=
tensor
.
fill_constant
(
shape
=
[
1
],
dtype
=
'float32'
,
value
=
float
(
boundaries
[
i
]),
force_cpu
=
True
)
value_var
=
tensor
.
fill_constant
(
shape
=
[
1
],
dtype
=
'float32'
,
value
=
float
(
values
[
i
]))
with
switch
.
case
(
global_step
<
boundary_val
):
tensor
.
assign
(
value_var
,
lr
)
last_value_var
=
tensor
.
fill_constant
(
shape
=
[
1
],
shape
=
[
1
],
value
=
0.0
,
dtype
=
'float32'
,
dtype
=
'float32'
,
value
=
float
(
values
[
len
(
values
)
-
1
]))
persistable
=
True
,
with
switch
.
default
():
name
=
"learning_rate"
)
tensor
.
assign
(
last_value_var
,
lr
)
with
control_flow
.
Switch
()
as
switch
:
for
i
in
range
(
len
(
boundaries
)):
boundary_val
=
tensor
.
fill_constant
(
shape
=
[
1
],
dtype
=
'float32'
,
value
=
float
(
boundaries
[
i
]),
force_cpu
=
True
)
value_var
=
tensor
.
fill_constant
(
shape
=
[
1
],
dtype
=
'float32'
,
value
=
float
(
values
[
i
]))
with
switch
.
case
(
global_step
<
boundary_val
):
tensor
.
assign
(
value_var
,
lr
)
last_value_var
=
tensor
.
fill_constant
(
shape
=
[
1
],
dtype
=
'float32'
,
value
=
float
(
values
[
len
(
values
)
-
1
]))
with
switch
.
default
():
tensor
.
assign
(
last_value_var
,
lr
)
return
lr
return
lr
return
_lr_schedule
def
append_LARS
(
params_grads
,
learning_rate
,
weight_decay
):
def
append_LARS
(
params_grads
,
learning_rate
,
weight_decay
):
...
...
python/paddle/fluid/layers/nn.py
浏览文件 @
f5532877
...
@@ -2798,6 +2798,10 @@ def batch_norm(input,
...
@@ -2798,6 +2798,10 @@ def batch_norm(input,
helper
=
LayerHelper
(
'batch_norm'
,
**
locals
())
helper
=
LayerHelper
(
'batch_norm'
,
**
locals
())
dtype
=
helper
.
input_dtype
()
dtype
=
helper
.
input_dtype
()
# use fp32 for bn parameter
if
dtype
==
core
.
VarDesc
.
VarType
.
FP16
:
dtype
=
core
.
VarDesc
.
VarType
.
FP32
input_shape
=
input
.
shape
input_shape
=
input
.
shape
if
data_layout
==
'NCHW'
:
if
data_layout
==
'NCHW'
:
channel_num
=
input_shape
[
1
]
channel_num
=
input_shape
[
1
]
...
@@ -2832,7 +2836,7 @@ def batch_norm(input,
...
@@ -2832,7 +2836,7 @@ def batch_norm(input,
trainable
=
False
,
trainable
=
False
,
do_model_average
=
do_model_average_for_mean_and_var
),
do_model_average
=
do_model_average_for_mean_and_var
),
shape
=
param_shape
,
shape
=
param_shape
,
dtype
=
input
.
dtype
)
dtype
=
dtype
)
mean
.
stop_gradient
=
True
mean
.
stop_gradient
=
True
variance
=
helper
.
create_parameter
(
variance
=
helper
.
create_parameter
(
...
@@ -2842,7 +2846,7 @@ def batch_norm(input,
...
@@ -2842,7 +2846,7 @@ def batch_norm(input,
trainable
=
False
,
trainable
=
False
,
do_model_average
=
do_model_average_for_mean_and_var
),
do_model_average
=
do_model_average_for_mean_and_var
),
shape
=
param_shape
,
shape
=
param_shape
,
dtype
=
input
.
dtype
)
dtype
=
dtype
)
variance
.
stop_gradient
=
True
variance
.
stop_gradient
=
True
# create output
# create output
...
...
python/paddle/fluid/optimizer.py
浏览文件 @
f5532877
...
@@ -50,17 +50,21 @@ class Optimizer(object):
...
@@ -50,17 +50,21 @@ class Optimizer(object):
def
__init__
(
self
,
learning_rate
,
regularization
=
None
,
name
=
None
):
def
__init__
(
self
,
learning_rate
,
regularization
=
None
,
name
=
None
):
if
not
isinstance
(
learning_rate
,
float
)
and
\
if
not
isinstance
(
learning_rate
,
float
)
and
\
not
isinstance
(
learning_rate
,
framework
.
Variable
):
not
isinstance
(
learning_rate
,
framework
.
Variable
)
and
\
raise
TypeError
(
"learning rate should be float or Variable"
)
not
callable
(
learning_rate
):
raise
TypeError
(
"learning rate should be float or Variable or callable(dtype)"
)
self
.
_name
=
name
self
.
_name
=
name
self
.
regularization
=
regularization
self
.
regularization
=
regularization
self
.
_learning_rate
=
learning_rate
self
.
_learning_rate
=
learning_rate
# the learning rate type should be inferenced from loss
# the learning rate type should be inferenced from loss
self
.
_dtype
=
None
self
.
_dtype
=
None
# each program should have a independent learning rate
# each program should have a independent learning rate
# program -> Variable(learning_rate)
# program -> Variable(learning_rate) or:
# program -> callable(return learning_rate Variable)
self
.
_learning_rate_map
=
dict
()
self
.
_learning_rate_map
=
dict
()
if
isinstance
(
self
.
_learning_rate
,
framework
.
Variable
):
if
isinstance
(
self
.
_learning_rate
,
framework
.
Variable
)
or
\
callable
(
self
.
_learning_rate
):
self
.
_learning_rate_map
[
framework
.
default_main_program
(
self
.
_learning_rate_map
[
framework
.
default_main_program
(
)]
=
self
.
_learning_rate
)]
=
self
.
_learning_rate
# Dictionary of accumulators. Some optimizer subclasses need to
# Dictionary of accumulators. Some optimizer subclasses need to
...
@@ -75,6 +79,11 @@ class Optimizer(object):
...
@@ -75,6 +79,11 @@ class Optimizer(object):
if
isinstance
(
lr
,
framework
.
Variable
):
if
isinstance
(
lr
,
framework
.
Variable
):
return
return
elif
callable
(
lr
):
dtype
=
'float32'
if
self
.
_dtype
is
None
else
self
.
_dtype
self
.
_learning_rate_map
[
framework
.
default_main_program
()]
=
lr
(
dtype
)
return
else
:
else
:
if
not
isinstance
(
self
.
_learning_rate
,
float
):
if
not
isinstance
(
self
.
_learning_rate
,
float
):
raise
TypeError
(
raise
TypeError
(
...
...
python/paddle/fluid/parallel_executor.py
浏览文件 @
f5532877
...
@@ -92,35 +92,27 @@ class ParallelExecutor(object):
...
@@ -92,35 +92,27 @@ class ParallelExecutor(object):
num_trainers
=
1
,
num_trainers
=
1
,
trainer_id
=
0
,
trainer_id
=
0
,
scope
=
None
):
scope
=
None
):
# step1: get places, the places are used in run too.
self
.
_places
=
[]
self
.
_places
=
[]
self
.
_act_places
=
[]
if
use_cuda
:
if
use_cuda
:
gpus
=
[]
gpus_env
=
os
.
getenv
(
"FLAGS_selected_gpus"
)
gpus_env
=
os
.
getenv
(
"FLAGS_selected_gpus"
)
if
gpus_env
:
if
gpus_env
:
gpus
=
[
int
(
s
)
for
s
in
gpus_env
.
split
(
","
)]
gpus
=
[
int
(
s
)
for
s
in
gpus_env
.
split
(
","
)]
else
:
else
:
for
i
in
six
.
moves
.
range
(
core
.
get_cuda_device_count
()):
gpus
=
[
gpus
.
append
(
i
)
i
for
i
in
six
.
moves
.
range
(
core
.
get_cuda_device_count
())
for
i
in
gpus
:
]
p
=
core
.
Place
()
self
.
_places
=
[
core
.
CUDAPlace
(
i
)
for
i
in
gpus
]
self
.
_act_places
.
append
(
core
.
CUDAPlace
(
i
))
p
.
set_place
(
self
.
_act_places
[
-
1
])
self
.
_places
.
append
(
p
)
else
:
else
:
cpu_num
=
int
(
cpu_num
=
int
(
os
.
environ
.
get
(
'CPU_NUM'
,
multiprocessing
.
cpu_count
()))
os
.
environ
.
get
(
'CPU_NUM'
,
multiprocessing
.
cpu_count
()))
for
i
in
six
.
moves
.
range
(
cpu_num
):
self
.
_places
=
[
core
.
CPUPlace
()
for
_
in
six
.
moves
.
range
(
cpu_num
)]
p
=
core
.
Place
()
self
.
_act_places
.
append
(
core
.
CPUPlace
())
p
.
set_place
(
self
.
_act_places
[
-
1
])
self
.
_places
.
append
(
p
)
assert
self
.
_places
,
"no place for execution"
assert
self
.
_places
,
"no place for execution"
# step2: init exec_strategy
if
exec_strategy
is
None
:
if
exec_strategy
is
None
:
exec_strategy
=
ExecutionStrategy
()
exec_strategy
=
ExecutionStrategy
()
exec_strategy
.
use_cuda
=
use_cuda
exec_strategy
.
use_cuda
=
use_cuda
if
exec_strategy
.
num_threads
==
0
:
if
exec_strategy
.
num_threads
==
0
:
if
use_cuda
:
if
use_cuda
:
# Experiments on se-resnext shows that too many threads hurt
# Experiments on se-resnext shows that too many threads hurt
...
@@ -131,49 +123,54 @@ class ParallelExecutor(object):
...
@@ -131,49 +123,54 @@ class ParallelExecutor(object):
os
.
environ
.
get
(
'CPU_NUM'
,
multiprocessing
.
cpu_count
()))
os
.
environ
.
get
(
'CPU_NUM'
,
multiprocessing
.
cpu_count
()))
exec_strategy
.
num_threads
=
cpu_num
*
2
exec_strategy
.
num_threads
=
cpu_num
*
2
# step3: init build_strategy
if
build_strategy
is
None
:
if
build_strategy
is
None
:
build_strategy
=
BuildStrategy
()
build_strategy
=
BuildStrategy
()
build_strategy
.
num_trainers
=
num_trainers
build_strategy
.
num_trainers
=
num_trainers
build_strategy
.
trainer_id
=
trainer_id
build_strategy
.
trainer_id
=
trainer_id
main
=
main_program
# step4: get main_program, scope, local_scopes
main
=
main
if
main
else
framework
.
default_main_program
()
main
=
main_program
if
main_program
\
else
framework
.
default_main_program
()
scope
=
scope
if
scope
is
not
None
else
executor
.
global_scope
()
if
share_vars_from
and
not
isinstance
(
share_vars_from
,
ParallelExecutor
):
raise
TypeError
(
"share_vars_from must be ParallelExecutor."
)
local_scopes
=
share_vars_from
.
executor
.
local_scopes
()
\
if
share_vars_from
else
[]
# step5: check trainers_endpoints, it is used for distribution.
trainers_endpoints
=
main
.
_trainers_endpoints
trainers_endpoints
=
main
.
_trainers_endpoints
if
num_trainers
>
1
and
trainers_endpoints
:
if
num_trainers
>
1
and
trainers_endpoints
:
assert
num_trainers
==
len
(
assert
num_trainers
==
len
(
trainers_endpoints
),
"num_trainers == len(end_points)"
trainers_endpoints
),
"num_trainers == len(end_points)"
build_strategy
.
trainers_endpoints
=
trainers_endpoints
build_strategy
.
trainers_endpoints
=
trainers_endpoints
if
scope
==
None
:
# step5: get persistable_vars, parameter_vars, places. persistable_vars
scope
=
executor
.
global_scope
()
# need be broadcast to other local_scope.
persistable_vars
=
set
([
if
share_vars_from
and
not
isinstance
(
share_vars_from
,
cpt
.
to_text
(
v
.
name
)
for
v
in
[
ParallelExecutor
):
raise
TypeError
(
"share_vars_from must be ParallelExecutor."
)
local_scopes
=
share_vars_from
.
executor
.
local_scopes
(
)
if
share_vars_from
else
[]
self
.
persistable_vars
=
[
v
.
name
for
v
in
[
var
for
var
in
main
.
list_vars
()
var
for
var
in
main
.
list_vars
()
if
var
.
persistable
and
var
.
type
!=
core
.
VarDesc
.
VarType
.
RAW
if
var
.
persistable
and
var
.
type
!=
core
.
VarDesc
.
VarType
.
RAW
]
]
]
])
def
place_obj
(
place
):
p
=
core
.
Place
()
p
.
set_place
(
place
)
return
p
places
=
list
(
map
(
place_obj
,
self
.
_places
))
# step6: init ParallelExecutor
self
.
executor
=
core
.
ParallelExecutor
(
self
.
executor
=
core
.
ParallelExecutor
(
self
.
_places
,
places
,
persistable_vars
,
main
.
desc
,
set
([
cpt
.
to_text
(
p
.
name
)
for
p
in
main
.
global_block
().
iter_parameters
()
if
not
p
.
stop_gradient
]),
set
(
cpt
.
to_text
(
var
)
for
var
in
self
.
persistable_vars
),
main
.
desc
,
cpt
.
to_text
(
loss_name
)
cpt
.
to_text
(
loss_name
)
if
loss_name
else
six
.
u
(
''
),
scope
,
local_scopes
,
exec_strategy
,
if
loss_name
else
six
.
u
(
''
),
scope
,
local_scopes
,
exec_strategy
,
build_strategy
,
num_trainers
,
trainer_id
)
build_strategy
,
num_trainers
,
trainer_id
)
self
.
scope
=
scope
self
.
scope
=
scope
def
run
(
self
,
fetch_list
,
feed
=
None
,
feed_dict
=
None
,
return_numpy
=
True
):
def
run
(
self
,
fetch_list
,
feed
=
None
,
feed_dict
=
None
,
return_numpy
=
True
):
...
@@ -261,7 +258,7 @@ class ParallelExecutor(object):
...
@@ -261,7 +258,7 @@ class ParallelExecutor(object):
self
.
executor
.
feed_and_split_tensor_into_local_scopes
(
self
.
executor
.
feed_and_split_tensor_into_local_scopes
(
feed_tensor_dict
)
feed_tensor_dict
)
elif
isinstance
(
feed
,
list
)
or
isinstance
(
feed
,
tuple
):
elif
isinstance
(
feed
,
list
)
or
isinstance
(
feed
,
tuple
):
if
len
(
feed
)
!=
len
(
self
.
_
act_
places
):
if
len
(
feed
)
!=
len
(
self
.
_places
):
raise
ValueError
(
raise
ValueError
(
"Feed a list of tensor, the list should be the same size as places"
"Feed a list of tensor, the list should be the same size as places"
)
)
...
@@ -277,7 +274,7 @@ class ParallelExecutor(object):
...
@@ -277,7 +274,7 @@ class ParallelExecutor(object):
tensor
=
each
[
feed_name
]
tensor
=
each
[
feed_name
]
if
not
isinstance
(
tensor
,
core
.
LoDTensor
):
if
not
isinstance
(
tensor
,
core
.
LoDTensor
):
tmp
=
core
.
LoDTensor
()
tmp
=
core
.
LoDTensor
()
tmp
.
set
(
tensor
,
self
.
_
act_
places
[
i
])
tmp
.
set
(
tensor
,
self
.
_places
[
i
])
tensor
=
tmp
tensor
=
tmp
res_dict
[
feed_name
]
=
tensor
res_dict
[
feed_name
]
=
tensor
res
.
append
(
res_dict
)
res
.
append
(
res_dict
)
...
@@ -294,4 +291,4 @@ class ParallelExecutor(object):
...
@@ -294,4 +291,4 @@ class ParallelExecutor(object):
@
property
@
property
def
device_count
(
self
):
def
device_count
(
self
):
return
len
(
self
.
_
act_
places
)
return
len
(
self
.
_places
)
python/paddle/fluid/tests/unittests/op_test.py
浏览文件 @
f5532877
...
@@ -368,6 +368,8 @@ class OpTest(unittest.TestCase):
...
@@ -368,6 +368,8 @@ class OpTest(unittest.TestCase):
place
=
core
.
CUDAPlace
(
0
)
place
=
core
.
CUDAPlace
(
0
)
if
core
.
is_float16_supported
(
place
):
if
core
.
is_float16_supported
(
place
):
return
[
place
]
return
[
place
]
else
:
return
[]
else
:
else
:
return
[]
return
[]
places
=
[
fluid
.
CPUPlace
()]
places
=
[
fluid
.
CPUPlace
()]
...
...
python/paddle/fluid/tests/unittests/test_accuracy_op.py
浏览文件 @
f5532877
...
@@ -22,8 +22,10 @@ from op_test import OpTest
...
@@ -22,8 +22,10 @@ from op_test import OpTest
class
TestAccuracyOp
(
OpTest
):
class
TestAccuracyOp
(
OpTest
):
def
setUp
(
self
):
def
setUp
(
self
):
self
.
op_type
=
"accuracy"
self
.
op_type
=
"accuracy"
self
.
dtype
=
np
.
float32
self
.
init_dtype
()
n
=
8192
n
=
8192
infer
=
np
.
random
.
random
((
n
,
1
)).
astype
(
"float32"
)
infer
=
np
.
random
.
random
((
n
,
1
)).
astype
(
self
.
dtype
)
indices
=
np
.
random
.
randint
(
0
,
2
,
(
n
,
1
))
indices
=
np
.
random
.
randint
(
0
,
2
,
(
n
,
1
))
label
=
np
.
random
.
randint
(
0
,
2
,
(
n
,
1
))
label
=
np
.
random
.
randint
(
0
,
2
,
(
n
,
1
))
self
.
inputs
=
{
'Out'
:
infer
,
'Indices'
:
indices
,
"Label"
:
label
}
self
.
inputs
=
{
'Out'
:
infer
,
'Indices'
:
indices
,
"Label"
:
label
}
...
@@ -34,14 +36,25 @@ class TestAccuracyOp(OpTest):
...
@@ -34,14 +36,25 @@ class TestAccuracyOp(OpTest):
num_correct
+=
1
num_correct
+=
1
break
break
self
.
outputs
=
{
self
.
outputs
=
{
'Accuracy'
:
np
.
array
([
num_correct
/
float
(
n
)]).
astype
(
"float32"
),
'Accuracy'
:
np
.
array
([
num_correct
/
float
(
n
)]).
astype
(
self
.
dtype
),
'Correct'
:
np
.
array
([
num_correct
]).
astype
(
"int32"
),
'Correct'
:
np
.
array
([
num_correct
]).
astype
(
"int32"
),
'Total'
:
np
.
array
([
n
]).
astype
(
"int32"
)
'Total'
:
np
.
array
([
n
]).
astype
(
"int32"
)
}
}
def
init_dtype
(
self
):
pass
def
test_check_output
(
self
):
def
test_check_output
(
self
):
self
.
check_output
()
self
.
check_output
()
class
TestAccuracyOpFp16
(
TestAccuracyOp
):
def
init_dtype
(
self
):
self
.
dtype
=
np
.
float16
def
test_check_output
(
self
):
self
.
check_output
(
atol
=
1e-3
)
if
__name__
==
'__main__'
:
if
__name__
==
'__main__'
:
unittest
.
main
()
unittest
.
main
()
python/paddle/fluid/tests/unittests/test_conv2d_mkldnn_op.py
浏览文件 @
f5532877
...
@@ -16,7 +16,7 @@ from __future__ import print_function
...
@@ -16,7 +16,7 @@ from __future__ import print_function
import
unittest
import
unittest
from
test_conv2d_op
import
TestConv2dOp
,
TestWithPad
,
TestWithStride
from
test_conv2d_op
import
TestConv2dOp
,
TestWithPad
,
TestWithStride
,
TestWithGroup
,
TestWith1x1
,
TestWithInput1x1Filter1x1
class
TestMKLDNN
(
TestConv2dOp
):
class
TestMKLDNN
(
TestConv2dOp
):
...
@@ -37,5 +37,23 @@ class TestMKLDNNWithStride(TestWithStride):
...
@@ -37,5 +37,23 @@ class TestMKLDNNWithStride(TestWithStride):
self
.
data_format
=
"NCHW"
self
.
data_format
=
"NCHW"
class
TestMKLDNNWithGroup
(
TestWithGroup
):
def
init_kernel_type
(
self
):
self
.
use_mkldnn
=
True
self
.
data_format
=
"NCHW"
class
TestMKLDNNWith1x1
(
TestWith1x1
):
def
init_kernel_type
(
self
):
self
.
use_mkldnn
=
True
self
.
data_format
=
"NCHW"
class
TestMKLDNNWithInput1x1Filter1x1
(
TestWithInput1x1Filter1x1
):
def
init_kernel_type
(
self
):
self
.
use_mkldnn
=
True
self
.
data_format
=
"NCHW"
if
__name__
==
'__main__'
:
if
__name__
==
'__main__'
:
unittest
.
main
()
unittest
.
main
()
python/paddle/fluid/tests/unittests/test_elementwise_div_op.py
浏览文件 @
f5532877
...
@@ -21,14 +21,16 @@ from op_test import OpTest
...
@@ -21,14 +21,16 @@ from op_test import OpTest
class
ElementwiseDivOp
(
OpTest
):
class
ElementwiseDivOp
(
OpTest
):
def
setUp
(
self
):
def
setUp
(
self
):
self
.
op_type
=
"elementwise_div"
self
.
op_type
=
"elementwise_div"
self
.
dtype
=
np
.
float32
self
.
init_dtype
()
""" Warning
""" Warning
CPU gradient check error!
CPU gradient check error!
'X': np.random.random((32,84)).astype("float32"),
'X': np.random.random((32,84)).astype("float32"),
'Y': np.random.random((32,84)).astype("float32")
'Y': np.random.random((32,84)).astype("float32")
"""
"""
self
.
inputs
=
{
self
.
inputs
=
{
'X'
:
np
.
random
.
uniform
(
0.1
,
1
,
[
13
,
17
]).
astype
(
"float32"
),
'X'
:
np
.
random
.
uniform
(
0.1
,
1
,
[
13
,
17
]).
astype
(
self
.
dtype
),
'Y'
:
np
.
random
.
uniform
(
0.1
,
1
,
[
13
,
17
]).
astype
(
"float32"
)
'Y'
:
np
.
random
.
uniform
(
0.1
,
1
,
[
13
,
17
]).
astype
(
self
.
dtype
)
}
}
self
.
outputs
=
{
'Out'
:
np
.
divide
(
self
.
inputs
[
'X'
],
self
.
inputs
[
'Y'
])}
self
.
outputs
=
{
'Out'
:
np
.
divide
(
self
.
inputs
[
'X'
],
self
.
inputs
[
'Y'
])}
...
@@ -46,6 +48,9 @@ class ElementwiseDivOp(OpTest):
...
@@ -46,6 +48,9 @@ class ElementwiseDivOp(OpTest):
self
.
check_grad
(
self
.
check_grad
(
[
'X'
],
'Out'
,
max_relative_error
=
0.05
,
no_grad_set
=
set
(
'Y'
))
[
'X'
],
'Out'
,
max_relative_error
=
0.05
,
no_grad_set
=
set
(
'Y'
))
def
init_dtype
(
self
):
pass
class
TestElementwiseDivOp_scalar
(
ElementwiseDivOp
):
class
TestElementwiseDivOp_scalar
(
ElementwiseDivOp
):
def
setUp
(
self
):
def
setUp
(
self
):
...
@@ -126,5 +131,21 @@ class TestElementwiseDivOp_broadcast_3(ElementwiseDivOp):
...
@@ -126,5 +131,21 @@ class TestElementwiseDivOp_broadcast_3(ElementwiseDivOp):
}
}
class
TestElementwiseDivOpFp16
(
ElementwiseDivOp
):
def
init_dtype
(
self
):
self
.
dtype
=
np
.
float16
def
test_check_grad_normal
(
self
):
self
.
check_grad
([
'X'
,
'Y'
],
'Out'
,
max_relative_error
=
1
)
def
test_check_grad_ingore_x
(
self
):
self
.
check_grad
(
[
'Y'
],
'Out'
,
max_relative_error
=
1
,
no_grad_set
=
set
(
"X"
))
def
test_check_grad_ingore_y
(
self
):
self
.
check_grad
(
[
'X'
],
'Out'
,
max_relative_error
=
1
,
no_grad_set
=
set
(
'Y'
))
if
__name__
==
'__main__'
:
if
__name__
==
'__main__'
:
unittest
.
main
()
unittest
.
main
()
python/paddle/fluid/tests/unittests/test_elementwise_mul_op.py
浏览文件 @
f5532877
...
@@ -135,5 +135,10 @@ class TestElementwiseMulOp_broadcast_3(ElementwiseMulOp):
...
@@ -135,5 +135,10 @@ class TestElementwiseMulOp_broadcast_3(ElementwiseMulOp):
}
}
class
TestElementwiseMulOpFp16
(
ElementwiseMulOp
):
def
init_dtype
(
self
):
self
.
dtype
=
np
.
float16
if
__name__
==
'__main__'
:
if
__name__
==
'__main__'
:
unittest
.
main
()
unittest
.
main
()
python/paddle/fluid/tests/unittests/test_fill_zeros_like_op.py
浏览文件 @
f5532877
...
@@ -22,12 +22,22 @@ from op_test import OpTest
...
@@ -22,12 +22,22 @@ from op_test import OpTest
class
TestFillZerosLikeOp
(
OpTest
):
class
TestFillZerosLikeOp
(
OpTest
):
def
setUp
(
self
):
def
setUp
(
self
):
self
.
op_type
=
"fill_zeros_like"
self
.
op_type
=
"fill_zeros_like"
self
.
inputs
=
{
'X'
:
np
.
random
.
random
((
219
,
232
)).
astype
(
"float32"
)}
self
.
dtype
=
np
.
float32
self
.
init_dtype
()
self
.
inputs
=
{
'X'
:
np
.
random
.
random
((
219
,
232
)).
astype
(
self
.
dtype
)}
self
.
outputs
=
{
'Out'
:
np
.
zeros_like
(
self
.
inputs
[
"X"
])}
self
.
outputs
=
{
'Out'
:
np
.
zeros_like
(
self
.
inputs
[
"X"
])}
def
init_dtype
(
self
):
pass
def
test_check_output
(
self
):
def
test_check_output
(
self
):
self
.
check_output
()
self
.
check_output
()
class
TestFillZerosLikeOpFp16
(
TestFillZerosLikeOp
):
def
init_dtype
(
self
):
self
.
dtype
=
np
.
float16
if
__name__
==
"__main__"
:
if
__name__
==
"__main__"
:
unittest
.
main
()
unittest
.
main
()
python/paddle/fluid/tests/unittests/test_learning_rate_scheduler.py
浏览文件 @
f5532877
...
@@ -97,7 +97,7 @@ class TestLearningRateDecay(unittest.TestCase):
...
@@ -97,7 +97,7 @@ class TestLearningRateDecay(unittest.TestCase):
startup_prog
=
fluid
.
Program
()
startup_prog
=
fluid
.
Program
()
with
fluid
.
program_guard
(
main_prog
,
startup_prog
):
with
fluid
.
program_guard
(
main_prog
,
startup_prog
):
decayed_lr
=
fluid_decay_fn
(
**
kwargs
)
decayed_lr
=
fluid_decay_fn
(
**
kwargs
)
(
"float32"
)
place
=
fluid
.
CPUPlace
()
place
=
fluid
.
CPUPlace
()
exe
=
fluid
.
Executor
(
place
)
exe
=
fluid
.
Executor
(
place
)
...
...
python/paddle/fluid/tests/unittests/test_momentum_op.py
浏览文件 @
f5532877
...
@@ -24,11 +24,13 @@ from op_test import OpTest
...
@@ -24,11 +24,13 @@ from op_test import OpTest
class
TestMomentumOp1
(
OpTest
):
class
TestMomentumOp1
(
OpTest
):
def
setUp
(
self
):
def
setUp
(
self
):
self
.
op_type
=
"momentum"
self
.
op_type
=
"momentum"
self
.
dtype
=
np
.
float32
self
.
init_dtype
()
param
=
np
.
random
.
random
((
123
,
321
)).
astype
(
"float32"
)
param
=
np
.
random
.
random
((
123
,
321
)).
astype
(
self
.
dtype
)
grad
=
np
.
random
.
random
((
123
,
321
)).
astype
(
"float32"
)
grad
=
np
.
random
.
random
((
123
,
321
)).
astype
(
self
.
dtype
)
velocity
=
np
.
zeros
((
123
,
321
)).
astype
(
"float32"
)
velocity
=
np
.
zeros
((
123
,
321
)).
astype
(
self
.
dtype
)
learning_rate
=
np
.
array
([
0.001
]).
astype
(
"float32"
)
learning_rate
=
np
.
array
([
0.001
]).
astype
(
self
.
dtype
)
mu
=
0.0001
mu
=
0.0001
use_nesterov
=
False
use_nesterov
=
False
...
@@ -50,10 +52,21 @@ class TestMomentumOp1(OpTest):
...
@@ -50,10 +52,21 @@ class TestMomentumOp1(OpTest):
self
.
outputs
=
{
'ParamOut'
:
param_out
,
'VelocityOut'
:
velocity_out
}
self
.
outputs
=
{
'ParamOut'
:
param_out
,
'VelocityOut'
:
velocity_out
}
def
init_dtype
(
self
):
pass
def
test_check_output
(
self
):
def
test_check_output
(
self
):
self
.
check_output
()
self
.
check_output
()
class
TestMomentumOpFp16
(
TestMomentumOp1
):
def
init_dtype
(
self
):
self
.
dtype
=
np
.
float16
def
test_check_output
(
self
):
self
.
check_output
(
atol
=
1e-3
)
class
TestMomentumOp2
(
OpTest
):
class
TestMomentumOp2
(
OpTest
):
'''Test Momentum with default values for attributes
'''Test Momentum with default values for attributes
'''
'''
...
...
python/paddle/fluid/tests/unittests/test_top_k_op.py
浏览文件 @
f5532877
...
@@ -23,8 +23,11 @@ class TestTopkOp(OpTest):
...
@@ -23,8 +23,11 @@ class TestTopkOp(OpTest):
def
setUp
(
self
):
def
setUp
(
self
):
self
.
set_args
()
self
.
set_args
()
self
.
op_type
=
"top_k"
self
.
op_type
=
"top_k"
self
.
dtype
=
np
.
float32
self
.
init_dtype
()
k
=
self
.
top_k
k
=
self
.
top_k
input
=
np
.
random
.
random
((
self
.
row
,
k
)).
astype
(
"float32"
)
input
=
np
.
random
.
random
((
self
.
row
,
k
)).
astype
(
self
.
dtype
)
output
=
np
.
ndarray
((
self
.
row
,
k
))
output
=
np
.
ndarray
((
self
.
row
,
k
))
indices
=
np
.
ndarray
((
self
.
row
,
k
)).
astype
(
"int64"
)
indices
=
np
.
ndarray
((
self
.
row
,
k
)).
astype
(
"int64"
)
...
@@ -38,6 +41,9 @@ class TestTopkOp(OpTest):
...
@@ -38,6 +41,9 @@ class TestTopkOp(OpTest):
self
.
outputs
=
{
'Out'
:
output
,
'Indices'
:
indices
}
self
.
outputs
=
{
'Out'
:
output
,
'Indices'
:
indices
}
def
init_dtype
(
self
):
pass
def
set_args
(
self
):
def
set_args
(
self
):
self
.
row
=
32
self
.
row
=
32
self
.
top_k
=
1
self
.
top_k
=
1
...
@@ -46,6 +52,11 @@ class TestTopkOp(OpTest):
...
@@ -46,6 +52,11 @@ class TestTopkOp(OpTest):
self
.
check_output
()
self
.
check_output
()
class
TestTopkOpFp16
(
TestTopkOp
):
def
init_dtype
(
self
):
self
.
dtype
=
np
.
float16
class
TestTopkOp3d
(
OpTest
):
class
TestTopkOp3d
(
OpTest
):
def
setUp
(
self
):
def
setUp
(
self
):
self
.
op_type
=
"top_k"
self
.
op_type
=
"top_k"
...
...
python/setup.py.in
浏览文件 @
f5532877
...
@@ -107,9 +107,9 @@ packages=['paddle',
...
@@ -107,9 +107,9 @@ packages=['paddle',
'paddle.fluid.distributed',
'paddle.fluid.distributed',
'paddle.fluid.layers',
'paddle.fluid.layers',
'paddle.fluid.contrib',
'paddle.fluid.contrib',
'paddle.fluid.contrib.utils',
'paddle.fluid.contrib.decoder',
'paddle.fluid.contrib.decoder',
'paddle.fluid.contrib.quantize',
'paddle.fluid.contrib.quantize',
'paddle.fluid.contrib.utils',
'paddle.fluid.transpiler',
'paddle.fluid.transpiler',
'paddle.fluid.transpiler.details']
'paddle.fluid.transpiler.details']
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录