Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
Crayon鑫
Paddle
提交
08295f98
P
Paddle
项目概览
Crayon鑫
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1
Issue
1
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
08295f98
编写于
5月 14, 2018
作者:
Y
yuyang18
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
Add build strategy
上级
c06b4483
变更
10
隐藏空白更改
内联
并排
Showing
10 changed file
with
162 addition
and
77 deletion
+162
-77
paddle/fluid/framework/details/build_strategy.h
paddle/fluid/framework/details/build_strategy.h
+36
-0
paddle/fluid/framework/details/multi_devices_graph_builder.cc
...le/fluid/framework/details/multi_devices_graph_builder.cc
+23
-24
paddle/fluid/framework/details/multi_devices_graph_builder.h
paddle/fluid/framework/details/multi_devices_graph_builder.h
+6
-6
paddle/fluid/framework/parallel_executor.cc
paddle/fluid/framework/parallel_executor.cc
+5
-7
paddle/fluid/framework/parallel_executor.h
paddle/fluid/framework/parallel_executor.h
+4
-3
paddle/fluid/platform/nccl_helper.h
paddle/fluid/platform/nccl_helper.h
+1
-1
paddle/fluid/pybind/pybind.cc
paddle/fluid/pybind/pybind.cc
+29
-2
python/paddle/fluid/__init__.py
python/paddle/fluid/__init__.py
+4
-4
python/paddle/fluid/parallel_executor.py
python/paddle/fluid/parallel_executor.py
+18
-7
python/paddle/fluid/tests/unittests/test_parallel_executor.py
...on/paddle/fluid/tests/unittests/test_parallel_executor.py
+36
-23
未找到文件。
paddle/fluid/framework/details/build_strategy.h
0 → 100644
浏览文件 @
08295f98
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
namespace
paddle
{
namespace
framework
{
namespace
details
{
struct
BuildStrategy
{
enum
class
ReduceStrategy
{
kAllReduce
=
0
,
kReduce
=
1
};
enum
class
GradientScaleStrategy
{
kCoeffNumDevice
=
0
,
kOne
=
1
,
kCustomized
=
2
,
};
ReduceStrategy
reduce_
{
ReduceStrategy
::
kReduce
};
GradientScaleStrategy
gradient_scale_
{
GradientScaleStrategy
::
kCoeffNumDevice
};
};
}
// namespace details
}
// namespace framework
}
// namespace paddle
paddle/fluid/framework/details/multi_devices_graph_builder.cc
浏览文件 @
08295f98
...
...
@@ -37,31 +37,26 @@ MultiDevSSAGraphBuilder::MultiDevSSAGraphBuilder(
const
std
::
string
&
loss_var_name
,
const
std
::
unordered_set
<
std
::
string
>
&
params
,
const
std
::
vector
<
Scope
*>
&
local_scopes
,
platform
::
NCCLContextMap
*
nccl_ctxs
,
bool
use_default_grad_scale
,
bool
balance_parameter_opt_between_cards
)
platform
::
NCCLContextMap
*
nccl_ctxs
,
const
BuildStrategy
&
strategy
)
:
loss_var_name_
(
loss_var_name
),
places_
(
places
),
local_scopes_
(
local_scopes
),
nccl_ctxs_
(
nccl_ctxs
),
balance_parameter_opt_between_cards_
(
balance_parameter_opt_between_cards
)
{
strategy_
(
strategy
)
{
#else
MultiDevSSAGraphBuilder
::
MultiDevSSAGraphBuilder
(
const
std
::
vector
<
platform
::
Place
>
&
places
,
const
std
::
string
&
loss_var_name
,
const
std
::
unordered_set
<
std
::
string
>
&
params
,
const
std
::
vector
<
Scope
*>
&
local_scopes
,
bool
use_default_grad_scale
,
bool
balance_parameter_opt_between_cards
)
const
std
::
vector
<
Scope
*>
&
local_scopes
,
const
BuildStrategy
&
strategy
)
:
loss_var_name_
(
loss_var_name
),
places_
(
places
),
local_scopes_
(
local_scopes
),
balance_parameter_opt_between_cards_
(
balance_parameter_opt_between_cards
)
{
strategy_
(
strategy
)
{
#endif
for
(
auto
&
p
:
params
)
{
grad_names_
.
insert
(
GradVarName
(
p
));
}
use_default_grad_scale_
=
use_default_grad_scale
;
}
void
MultiDevSSAGraphBuilder
::
CreateOpHandleIOs
(
SSAGraph
*
result
,
...
...
@@ -146,7 +141,8 @@ std::unique_ptr<SSAGraph> MultiDevSSAGraphBuilder::Build(
CreateComputationalOps
(
&
result
,
*
op
,
1
);
}
else
if
(
IsScaleLossOp
(
*
op
))
{
// user can customize loss@grad if not use_default_grad_scale_
if
(
use_default_grad_scale_
)
{
if
(
strategy_
.
gradient_scale_
!=
BuildStrategy
::
GradientScaleStrategy
::
kCustomized
)
{
CreateScaleLossGradOp
(
&
result
);
}
is_forwarding
=
false
;
...
...
@@ -165,19 +161,22 @@ std::unique_ptr<SSAGraph> MultiDevSSAGraphBuilder::Build(
// broadcast, and each gradient is only broadcast once.
for
(
auto
&
og
:
op
->
OutputArgumentNames
())
{
if
(
IsParameterGradientOnce
(
og
,
&
og_has_been_broadcast
))
{
if
(
balance_parameter_opt_between_cards_
)
{
CreateReduceOp
(
&
result
,
og
,
cur_device_id
);
var_name_on_devices
[
cur_device_id
].
emplace
(
og
);
bcast_var_name_set
[
cur_device_id
].
emplace
(
og
.
substr
(
0
,
og
.
size
()
-
strlen
(
kGradVarSuffix
)));
cur_device_id
=
(
cur_device_id
+
1
)
%
places_
.
size
();
}
else
{
if
(
IsSparseGradient
(
var_types
,
og
))
{
CreateReduceOp
(
&
result
,
og
,
0
);
CreateBroadcastOp
(
&
result
,
og
,
0
);
}
else
{
InsertNCCLAllReduceOp
(
&
result
,
og
);
}
switch
(
strategy_
.
reduce_
)
{
case
BuildStrategy
::
ReduceStrategy
::
kReduce
:
CreateReduceOp
(
&
result
,
og
,
cur_device_id
);
var_name_on_devices
[
cur_device_id
].
emplace
(
og
);
bcast_var_name_set
[
cur_device_id
].
emplace
(
og
.
substr
(
0
,
og
.
size
()
-
strlen
(
kGradVarSuffix
)));
cur_device_id
=
(
cur_device_id
+
1
)
%
places_
.
size
();
break
;
case
BuildStrategy
::
ReduceStrategy
::
kAllReduce
:
if
(
IsSparseGradient
(
var_types
,
og
))
{
CreateReduceOp
(
&
result
,
og
,
0
);
CreateBroadcastOp
(
&
result
,
og
,
0
);
}
else
{
InsertNCCLAllReduceOp
(
&
result
,
og
);
}
break
;
}
}
}
...
...
@@ -303,7 +302,7 @@ bool MultiDevSSAGraphBuilder::IsParameterGradientOnce(
int
MultiDevSSAGraphBuilder
::
GetOpDeviceID
(
const
std
::
vector
<
std
::
unordered_set
<
std
::
string
>>
&
var_name_on_devices
,
const
OpDesc
&
op
)
const
{
if
(
!
balance_parameter_opt_between_cards_
)
{
if
(
strategy_
.
reduce_
!=
BuildStrategy
::
ReduceStrategy
::
kReduce
)
{
return
-
1
;
}
...
...
paddle/fluid/framework/details/multi_devices_graph_builder.h
浏览文件 @
08295f98
...
...
@@ -17,6 +17,7 @@
#include <utility>
#include <vector>
#include "paddle/fluid/framework/details/build_strategy.h"
#include "paddle/fluid/framework/details/ssa_graph_builder.h"
namespace
paddle
{
...
...
@@ -36,15 +37,13 @@ class MultiDevSSAGraphBuilder : public SSAGraphBuilder {
const
std
::
unordered_set
<
std
::
string
>
&
params
,
const
std
::
vector
<
Scope
*>
&
local_scopes
,
platform
::
NCCLContextMap
*
nccl_ctxs
,
bool
use_default_grad_scale
,
bool
balance_parameter_opt_between_cards
);
const
BuildStrategy
&
strategy
);
#else
MultiDevSSAGraphBuilder
(
const
std
::
vector
<
platform
::
Place
>
&
places
,
const
std
::
string
&
loss_var_name
,
const
std
::
unordered_set
<
std
::
string
>
&
params
,
const
std
::
vector
<
Scope
*>
&
local_scopes
,
bool
use_default_grad_scale
,
bool
balance_parameter_opt_between_cards
);
const
BuildStrategy
&
strategy
);
#endif
std
::
unique_ptr
<
SSAGraph
>
Build
(
const
ProgramDesc
&
program
)
const
override
;
...
...
@@ -62,8 +61,6 @@ class MultiDevSSAGraphBuilder : public SSAGraphBuilder {
#ifdef PADDLE_WITH_CUDA
platform
::
NCCLContextMap
*
nccl_ctxs_
;
#endif
bool
balance_parameter_opt_between_cards_
;
bool
use_default_grad_scale_
;
bool
IsScaleLossOp
(
const
OpDesc
&
op
)
const
;
...
...
@@ -105,6 +102,9 @@ class MultiDevSSAGraphBuilder : public SSAGraphBuilder {
bool
IsSparseGradient
(
const
std
::
unordered_map
<
std
::
string
,
proto
::
VarType
::
Type
>
&
var_types
,
const
std
::
string
&
og
)
const
;
private:
BuildStrategy
strategy_
;
};
}
// namespace details
}
// namespace framework
...
...
paddle/fluid/framework/parallel_executor.cc
浏览文件 @
08295f98
...
...
@@ -57,8 +57,7 @@ ParallelExecutor::ParallelExecutor(
const
std
::
unordered_set
<
std
::
string
>
&
bcast_vars
,
const
ProgramDesc
&
main_program
,
const
std
::
string
&
loss_var_name
,
Scope
*
scope
,
const
std
::
vector
<
Scope
*>
&
local_scopes
,
bool
use_default_grad_scale
,
bool
balance_parameter_opt_between_cards
,
const
ExecutionStrategy
&
exec_strategy
)
const
ExecutionStrategy
&
exec_strategy
,
const
BuildStrategy
&
build_strategy
)
:
member_
(
new
ParallelExecutorPrivate
(
places
))
{
member_
->
global_scope_
=
scope
;
...
...
@@ -93,12 +92,11 @@ ParallelExecutor::ParallelExecutor(
#ifdef PADDLE_WITH_CUDA
details
::
MultiDevSSAGraphBuilder
builder
(
member_
->
places_
,
loss_var_name
,
params
,
member_
->
local_scopes_
,
member_
->
nccl_ctxs_
.
get
(),
use_default_grad_scale
,
balance_parameter_opt_between_cards
);
member_
->
nccl_ctxs_
.
get
(),
build_strategy
);
#else
details
::
MultiDevSSAGraphBuilder
builder
(
member_
->
places_
,
loss_var_name
,
params
,
member_
->
local_scopes_
,
use_default_grad_scale
,
balance_parameter_opt_between_cards
);
details
::
MultiDevSSAGraphBuilder
builder
(
member_
->
places_
,
loss_var_name
,
params
,
member_
->
local_scopes_
,
build_strategy
);
#endif
auto
graph
=
builder
.
Build
(
main_program
);
...
...
paddle/fluid/framework/parallel_executor.h
浏览文件 @
08295f98
...
...
@@ -14,6 +14,7 @@ limitations under the License. */
#pragma once
#include <paddle/fluid/framework/details/build_strategy.h>
#include <string>
#include <unordered_set>
#include <vector>
...
...
@@ -29,6 +30,7 @@ namespace framework {
class
ParallelExecutorPrivate
;
using
details
::
BuildStrategy
;
using
details
::
ExecutionStrategy
;
class
ParallelExecutor
{
...
...
@@ -41,9 +43,8 @@ class ParallelExecutor {
const
ProgramDesc
&
main_program
,
const
std
::
string
&
loss_var_name
,
Scope
*
scope
,
const
std
::
vector
<
Scope
*>
&
local_scopes
,
bool
use_default_grad_scale
,
bool
balance_parameter_opt_between_cards
,
const
ExecutionStrategy
&
exec_strategy
);
const
ExecutionStrategy
&
exec_strategy
,
const
BuildStrategy
&
build_strategy
);
~
ParallelExecutor
();
...
...
paddle/fluid/platform/nccl_helper.h
浏览文件 @
08295f98
...
...
@@ -50,7 +50,7 @@ class NCCLGroupGuard {
}
inline
~
NCCLGroupGuard
()
{
PADDLE_ENFORCE
(
dynload
::
ncclGroupEnd
()
);
CHECK_EQ
(
dynload
::
ncclGroupEnd
(),
ncclSuccess
);
NCCLMutex
().
unlock
();
}
};
...
...
paddle/fluid/pybind/pybind.cc
浏览文件 @
08295f98
...
...
@@ -494,6 +494,7 @@ All parameter, weight, gradient are variables in Paddle.
m
.
def
(
"disable_profiler"
,
platform
::
DisableProfiler
);
m
.
def
(
"reset_profiler"
,
platform
::
ResetProfiler
);
// -- python binds for parallel executor.
py
::
class_
<
ParallelExecutor
>
pe
(
m
,
"ParallelExecutor"
);
py
::
class_
<
ExecutionStrategy
>
(
pe
,
"ExecutionStrategy"
)
.
def
(
py
::
init
())
...
...
@@ -515,12 +516,38 @@ All parameter, weight, gradient are variables in Paddle.
[](
ExecutionStrategy
&
self
,
bool
allow_op_delay
)
{
self
.
allow_op_delay_
=
allow_op_delay
;
});
py
::
class_
<
BuildStrategy
>
build_strategy
(
pe
,
"BuildStrategy"
);
py
::
enum_
<
BuildStrategy
::
ReduceStrategy
>
(
build_strategy
,
"ReduceStrategy"
)
.
value
(
"Reduce"
,
BuildStrategy
::
ReduceStrategy
::
kReduce
)
.
value
(
"AllReduce"
,
BuildStrategy
::
ReduceStrategy
::
kAllReduce
);
py
::
enum_
<
BuildStrategy
::
GradientScaleStrategy
>
(
build_strategy
,
"GradientScaleStrategy"
)
.
value
(
"CoeffNumDevice"
,
BuildStrategy
::
GradientScaleStrategy
::
kCoeffNumDevice
)
.
value
(
"One"
,
BuildStrategy
::
GradientScaleStrategy
::
kOne
)
.
value
(
"Customized"
,
BuildStrategy
::
GradientScaleStrategy
::
kCustomized
);
build_strategy
.
def
(
py
::
init
())
.
def_property
(
"reduce_strategy"
,
[](
const
BuildStrategy
&
self
)
{
return
self
.
reduce_
;
},
[](
BuildStrategy
&
self
,
BuildStrategy
::
ReduceStrategy
strategy
)
{
self
.
reduce_
=
strategy
;
})
.
def_property
(
"gradient_scale_strategy"
,
[](
const
BuildStrategy
&
self
)
{
return
self
.
gradient_scale_
;
},
[](
BuildStrategy
&
self
,
BuildStrategy
::
GradientScaleStrategy
strategy
)
{
self
.
gradient_scale_
=
strategy
;
});
pe
.
def
(
py
::
init
<
const
std
::
vector
<
platform
::
Place
>
&
,
const
std
::
unordered_set
<
std
::
string
>
&
,
const
std
::
unordered_set
<
std
::
string
>
&
,
const
ProgramDesc
&
,
const
std
::
string
&
,
Scope
*
,
std
::
vector
<
Scope
*>
&
,
bool
,
bool
,
const
Execution
Strategy
&>
())
const
std
::
string
&
,
Scope
*
,
std
::
vector
<
Scope
*>
&
,
const
ExecutionStrategy
&
,
const
Build
Strategy
&>
())
.
def
(
"bcast_params"
,
&
ParallelExecutor
::
BCastParamsToGPUs
)
// NOTE: even we return a vec<Scope*>* to Python use reference policy.
// We still cannot get local_scope from this vector, since the element
...
...
python/paddle/fluid/__init__.py
浏览文件 @
08295f98
...
...
@@ -52,12 +52,14 @@ import clip
import
profiler
import
unique_name
import
recordio_writer
from
parallel_executor
import
ParallelExecutor
,
ExecutionStrategy
import
parallel_executor
from
parallel_executor
import
*
Tensor
=
LoDTensor
__all__
=
framework
.
__all__
+
executor
.
__all__
+
concurrency
.
__all__
+
\
trainer
.
__all__
+
inferencer
.
__all__
+
transpiler
.
__all__
+
[
trainer
.
__all__
+
inferencer
.
__all__
+
transpiler
.
__all__
+
\
parallel_executor
.
__all__
+
[
'io'
,
'initializer'
,
'layers'
,
...
...
@@ -79,8 +81,6 @@ __all__ = framework.__all__ + executor.__all__ + concurrency.__all__ + \
'profiler'
,
'unique_name'
,
'recordio_writer'
,
'ParallelExecutor'
,
'ExecutionStrategy'
,
]
...
...
python/paddle/fluid/parallel_executor.py
浏览文件 @
08295f98
...
...
@@ -19,9 +19,10 @@ import executor
import
warnings
import
sys
__all__
=
[
'ParallelExecutor'
,
'ExecutionStrategy'
]
__all__
=
[
'ParallelExecutor'
,
'ExecutionStrategy'
,
'BuildStrategy'
]
ExecutionStrategy
=
core
.
ParallelExecutor
.
ExecutionStrategy
BuildStrategy
=
core
.
ParallelExecutor
.
BuildStrategy
class
ParallelExecutor
(
object
):
...
...
@@ -30,9 +31,8 @@ class ParallelExecutor(object):
loss_name
=
None
,
main_program
=
None
,
share_vars_from
=
None
,
use_default_grad_scale
=
True
,
balance_parameter_opt_between_cards
=
False
,
exec_strategy
=
None
,
build_strategy
=
None
,
**
kwargs
):
"""
ParallelExecutor can run program in parallel.
...
...
@@ -81,7 +81,16 @@ class ParallelExecutor(object):
"Setting {0} by constructor is deprecated. Use "
\
"strategy=ExecutionStrategy(); strategy.{0}=xxx; "
\
"pe=ParallelExecutor(exec_strategy=strategy) "
\
"instead.
\n
"
"instead.
\n
"
.
format
(
key
)
elif
key
in
dir
(
BuildStrategy
):
err_msg
+=
\
"Setting {0} by constructor is deprecated. Use "
\
"strategy=BuildStrategy(); See help("
\
"paddle.fluid.ParallelExecutor.BuildStrategy)
\n
"
.
format
(
key
)
else
:
err_msg
+=
"Setting {0} by constructor is deprecated. Use strategy.
\n
"
.
format
(
key
)
raise
ValueError
(
err_msg
)
self
.
_places
=
[]
...
...
@@ -116,6 +125,9 @@ class ParallelExecutor(object):
exec_strategy
.
num_threads
=
min
(
len
(
self
.
_places
)
*
2
,
multiprocessing
.
cpu_count
())
if
build_strategy
is
None
:
build_strategy
=
BuildStrategy
()
main
=
main_program
main
=
main
if
main
else
framework
.
default_main_program
()
scope
=
executor
.
global_scope
()
...
...
@@ -139,9 +151,8 @@ class ParallelExecutor(object):
p
.
name
for
p
in
main
.
global_block
().
iter_parameters
()
if
not
p
.
stop_gradient
]),
set
(
self
.
persistable_vars
),
main
.
desc
,
loss_name
if
loss_name
else
''
,
scope
,
local_scopes
,
use_default_grad_scale
,
balance_parameter_opt_between_cards
,
exec_strategy
)
set
(
self
.
persistable_vars
),
main
.
desc
,
loss_name
if
loss_name
else
''
,
scope
,
local_scopes
,
exec_strategy
,
build_strategy
)
self
.
scope
=
scope
...
...
python/paddle/fluid/tests/unittests/test_parallel_executor.py
浏览文件 @
08295f98
...
...
@@ -234,12 +234,16 @@ class TestParallelExecutorBase(unittest.TestCase):
startup_exe
.
run
(
startup
)
exec_strategy
=
fluid
.
ExecutionStrategy
()
exec_strategy
.
allow_op_delay
=
allow_op_delay
build_strategy
=
fluid
.
BuildStrategy
()
build_strategy
.
reduce_strategy
=
fluid
.
BuildStrategy
.
ReduceStrategy
.
Reduce
if
balance_parameter_opt_between_cards
else
fluid
.
BuildStrategy
.
ReduceStrategy
.
AllReduce
if
use_parallel_executor
:
exe
=
fluid
.
ParallelExecutor
(
True
,
loss_name
=
loss
.
name
,
balance_parameter_opt_between_cards
=
balance_parameter_opt_between_cards
,
exec_strategy
=
exec
_strategy
)
exec_strategy
=
exec_strategy
,
build_strategy
=
build
_strategy
)
else
:
exe
=
fluid
.
Executor
(
place
=
place
)
...
...
@@ -548,7 +552,7 @@ class TestTransformer(TestParallelExecutorBase):
class
ParallelExecutorTestingDuringTraining
(
unittest
.
TestCase
):
def
check_network_convergence
(
self
,
b
alance_parameter_opt_between_cards
):
def
check_network_convergence
(
self
,
b
uild_strategy
=
None
):
main
=
fluid
.
Program
()
startup
=
fluid
.
Program
()
with
fluid
.
program_guard
(
main
,
startup
):
...
...
@@ -571,15 +575,13 @@ class ParallelExecutorTestingDuringTraining(unittest.TestCase):
use_cuda
=
True
,
loss_name
=
loss
.
name
,
main_program
=
main
,
balance_parameter_opt_between_cards
=
balance_parameter_opt_between_cards
)
build_strategy
=
build_strategy
)
test_exe
=
fluid
.
ParallelExecutor
(
use_cuda
=
True
,
main_program
=
test_program
,
share_vars_from
=
train_exe
,
balance_parameter_opt_between_cards
=
balance_parameter_opt_between_cards
)
build_strategy
=
build_strategy
)
for
i
in
xrange
(
5
):
test_loss
,
=
test_exe
.
run
([
loss
.
name
],
feed
=
feed_dict
)
...
...
@@ -594,10 +596,14 @@ class ParallelExecutorTestingDuringTraining(unittest.TestCase):
str
(
test_loss
))
def
test_parallel_testing
(
self
):
self
.
check_network_convergence
(
False
)
build_strategy
=
fluid
.
BuildStrategy
()
build_strategy
.
reduce_strategy
=
fluid
.
BuildStrategy
.
ReduceStrategy
.
AllReduce
self
.
check_network_convergence
(
build_strategy
)
def
test_parallel_testing_with_new_strategy
(
self
):
self
.
check_network_convergence
(
True
)
build_strategy
=
fluid
.
BuildStrategy
()
build_strategy
.
reduce_strategy
=
fluid
.
BuildStrategy
.
ReduceStrategy
.
Reduce
self
.
check_network_convergence
(
build_strategy
)
import
paddle.dataset.conll05
as
conll05
...
...
@@ -617,7 +623,7 @@ embedding_name = 'emb'
def
db_lstm
(
word
,
predicate
,
ctx_n2
,
ctx_n1
,
ctx_0
,
ctx_p1
,
ctx_p2
,
mark
,
is_sparse
,
balance_parameter_opt_between_cards
,
**
ignored
):
is_sparse
,
**
ignored
):
# 8 features
predicate_embedding
=
fluid
.
layers
.
embedding
(
input
=
predicate
,
...
...
@@ -686,9 +692,7 @@ def db_lstm(word, predicate, ctx_n2, ctx_n1, ctx_0, ctx_p1, ctx_p2, mark,
class
TestCRFModel
(
unittest
.
TestCase
):
def
check_network_convergence
(
self
,
is_sparse
,
balance_parameter_opt_between_cards
=
False
):
def
check_network_convergence
(
self
,
is_sparse
,
build_strategy
=
None
):
main
=
fluid
.
Program
()
startup
=
fluid
.
Program
()
with
fluid
.
program_guard
(
main
,
startup
):
...
...
@@ -739,8 +743,7 @@ class TestCRFModel(unittest.TestCase):
pe
=
fluid
.
ParallelExecutor
(
use_cuda
=
True
,
loss_name
=
avg_cost
.
name
,
balance_parameter_opt_between_cards
=
balance_parameter_opt_between_cards
)
build_strategy
=
build_strategy
)
feeder
=
fluid
.
DataFeeder
(
feed_list
=
[
...
...
@@ -756,19 +759,29 @@ class TestCRFModel(unittest.TestCase):
pe
.
run
(
feed
=
feeder
.
feed
(
cur_batch
),
fetch_list
=
[
avg_cost
.
name
]))[
0
]
def
test_update_sparse_parameter
(
self
):
self
.
check_network_convergence
(
is_sparse
=
True
)
def
test_update_sparse_parameter_all_reduce
(
self
):
build_strategy
=
fluid
.
BuildStrategy
()
build_strategy
.
reduce_strategy
=
fluid
.
BuildStrategy
.
ReduceStrategy
.
AllReduce
self
.
check_network_convergence
(
is_sparse
=
True
,
build_strategy
=
build_strategy
)
def
test_update_dense_parameter
(
self
):
self
.
check_network_convergence
(
is_sparse
=
False
)
def
test_update_dense_parameter_all_reduce
(
self
):
build_strategy
=
fluid
.
BuildStrategy
()
build_strategy
.
reduce_strategy
=
fluid
.
BuildStrategy
.
ReduceStrategy
.
AllReduce
self
.
check_network_convergence
(
is_sparse
=
False
,
build_strategy
=
build_strategy
)
def
test_update_sparse_parameter_with_new_strategy
(
self
):
def
test_update_sparse_parameter_reduce
(
self
):
build_strategy
=
fluid
.
BuildStrategy
()
build_strategy
.
reduce_strategy
=
fluid
.
BuildStrategy
.
ReduceStrategy
.
Reduce
self
.
check_network_convergence
(
is_sparse
=
False
,
b
alance_parameter_opt_between_cards
=
True
)
is_sparse
=
False
,
b
uild_strategy
=
build_strategy
)
def
test_update_dense_parameter_with_new_strategy
(
self
):
def
test_update_dense_parameter_reduce
(
self
):
build_strategy
=
fluid
.
BuildStrategy
()
build_strategy
.
reduce_strategy
=
fluid
.
BuildStrategy
.
ReduceStrategy
.
Reduce
self
.
check_network_convergence
(
is_sparse
=
False
,
b
alance_parameter_opt_between_cards
=
True
)
is_sparse
=
False
,
b
uild_strategy
=
build_strategy
)
# test fetch all the variables of global_block
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录