Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
机器未来
Paddle
提交
0bd6476f
P
Paddle
项目概览
机器未来
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1
Issue
1
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
0bd6476f
编写于
9月 14, 2018
作者:
T
tensor-tang
浏览文件
操作
浏览文件
下载
差异文件
Merge remote-tracking branch 'ups/develop' into fea/ut/vis
上级
3c14d8f7
3ab3a7f3
变更
8
隐藏空白更改
内联
并排
Showing
8 changed file
with
277 addition
and
34 deletion
+277
-34
paddle/fluid/API.spec
paddle/fluid/API.spec
+2
-2
paddle/fluid/inference/tests/api/CMakeLists.txt
paddle/fluid/inference/tests/api/CMakeLists.txt
+30
-28
paddle/fluid/inference/tests/api/analyzer_rnn2_tester.cc
paddle/fluid/inference/tests/api/analyzer_rnn2_tester.cc
+181
-0
paddle/fluid/operators/distributed/grpc_client.cc
paddle/fluid/operators/distributed/grpc_client.cc
+8
-2
python/paddle/fluid/tests/unittests/test_dist_transpiler.py
python/paddle/fluid/tests/unittests/test_dist_transpiler.py
+1
-1
python/paddle/fluid/transpiler/details/__init__.py
python/paddle/fluid/transpiler/details/__init__.py
+1
-0
python/paddle/fluid/transpiler/details/checkport.py
python/paddle/fluid/transpiler/details/checkport.py
+50
-0
python/paddle/fluid/transpiler/distribute_transpiler.py
python/paddle/fluid/transpiler/distribute_transpiler.py
+4
-1
未找到文件。
paddle/fluid/API.spec
浏览文件 @
0bd6476f
...
...
@@ -59,7 +59,7 @@ paddle.fluid.DistributeTranspiler.__init__ ArgSpec(args=['self', 'config'], vara
paddle.fluid.DistributeTranspiler.get_pserver_program ArgSpec(args=['self', 'endpoint'], varargs=None, keywords=None, defaults=None)
paddle.fluid.DistributeTranspiler.get_pserver_programs ArgSpec(args=['self', 'endpoint'], varargs=None, keywords=None, defaults=None)
paddle.fluid.DistributeTranspiler.get_startup_program ArgSpec(args=['self', 'endpoint', 'pserver_program', 'startup_program'], varargs=None, keywords=None, defaults=(None, None))
paddle.fluid.DistributeTranspiler.get_trainer_program ArgSpec(args=['self'
], varargs=None, keywords=None, defaults=None
)
paddle.fluid.DistributeTranspiler.get_trainer_program ArgSpec(args=['self'
, 'wait_port'], varargs=None, keywords=None, defaults=(True,)
)
paddle.fluid.DistributeTranspiler.transpile ArgSpec(args=['self', 'trainer_id', 'program', 'pservers', 'trainers', 'sync_mode', 'startup_program'], varargs=None, keywords=None, defaults=(None, '127.0.0.1:6174', 1, True, None))
paddle.fluid.InferenceTranspiler.__init__
paddle.fluid.InferenceTranspiler.transpile ArgSpec(args=['self', 'program', 'place', 'scope'], varargs=None, keywords=None, defaults=(None,))
...
...
@@ -346,7 +346,7 @@ paddle.fluid.transpiler.DistributeTranspiler.__init__ ArgSpec(args=['self', 'con
paddle.fluid.transpiler.DistributeTranspiler.get_pserver_program ArgSpec(args=['self', 'endpoint'], varargs=None, keywords=None, defaults=None)
paddle.fluid.transpiler.DistributeTranspiler.get_pserver_programs ArgSpec(args=['self', 'endpoint'], varargs=None, keywords=None, defaults=None)
paddle.fluid.transpiler.DistributeTranspiler.get_startup_program ArgSpec(args=['self', 'endpoint', 'pserver_program', 'startup_program'], varargs=None, keywords=None, defaults=(None, None))
paddle.fluid.transpiler.DistributeTranspiler.get_trainer_program ArgSpec(args=['self'
], varargs=None, keywords=None, defaults=None
)
paddle.fluid.transpiler.DistributeTranspiler.get_trainer_program ArgSpec(args=['self'
, 'wait_port'], varargs=None, keywords=None, defaults=(True,)
)
paddle.fluid.transpiler.DistributeTranspiler.transpile ArgSpec(args=['self', 'trainer_id', 'program', 'pservers', 'trainers', 'sync_mode', 'startup_program'], varargs=None, keywords=None, defaults=(None, '127.0.0.1:6174', 1, True, None))
paddle.fluid.transpiler.InferenceTranspiler.__init__
paddle.fluid.transpiler.InferenceTranspiler.transpile ArgSpec(args=['self', 'program', 'place', 'scope'], varargs=None, keywords=None, defaults=(None,))
...
...
paddle/fluid/inference/tests/api/CMakeLists.txt
浏览文件 @
0bd6476f
function
(
inference_download_and_uncompress install_dir url
)
get_filename_component
(
filename
${
url
}
NAME
)
message
(
STATUS
"Download inference test stuff
${
filename
}
from
${
url
}
"
)
set
(
INFERENCE_URL
"http://paddle-inference-dist.bj.bcebos.com"
)
set
(
INFERENCE_DEMO_INSTALL_DIR
"
${
THIRD_PARTY_PATH
}
/inference_demo"
)
set
(
INFERENCE_EXTRA_DEPS paddle_inference_api paddle_fluid_api ir_pass_manager analysis_predictor
)
function
(
inference_download_and_uncompress install_dir filename
)
message
(
STATUS
"Download inference test stuff from
${
INFERENCE_URL
}
/
${
filename
}
"
)
execute_process
(
COMMAND bash -c
"mkdir -p
${
install_dir
}
"
)
execute_process
(
COMMAND bash -c
"cd
${
install_dir
}
&& wget -q
${
url
}
"
)
execute_process
(
COMMAND bash -c
"cd
${
install_dir
}
&& wget -q
${
INFERENCE_URL
}
/
${
filename
}
"
)
execute_process
(
COMMAND bash -c
"cd
${
install_dir
}
&& tar xzf
${
filename
}
"
)
message
(
STATUS
"finish downloading
${
filename
}
"
)
endfunction
(
inference_download_and_uncompress
)
function
(
download_model_and_data install_dir model_
url data_url
)
function
(
download_model_and_data install_dir model_
name data_name
)
if
(
NOT EXISTS
${
install_dir
}
AND WITH_INFERENCE
)
inference_download_and_uncompress
(
${
install_dir
}
${
model_
url
}
)
inference_download_and_uncompress
(
${
install_dir
}
${
data_
url
}
)
inference_download_and_uncompress
(
${
install_dir
}
${
model_
name
}
)
inference_download_and_uncompress
(
${
install_dir
}
${
data_
name
}
)
endif
()
endfunction
()
# RNN1
set
(
RNN1_MODEL_URL
"http://paddle-inference-dist.bj.bcebos.com/rnn1%2Fmodel.tar.gz"
)
set
(
RNN1_DATA_URL
"http://paddle-inference-dist.bj.bcebos.com/rnn1%2Fdata.txt.tar.gz"
)
set
(
RNN1_INSTALL_DIR
"
${
THIRD_PARTY_PATH
}
/inference_demo/rnn1"
)
download_model_and_data
(
${
RNN1_INSTALL_DIR
}
${
RNN1_MODEL_URL
}
${
RNN1_DATA_URL
}
)
inference_analysis_test
(
test_analyzer_rnn1 SRCS analyzer_rnn1_tester.cc
EXTRA_DEPS paddle_inference_api paddle_fluid_api ir_pass_manager analysis_predictor
set
(
RNN1_INSTALL_DIR
"
${
INFERENCE_DEMO_INSTALL_DIR
}
/rnn1"
)
download_model_and_data
(
${
RNN1_INSTALL_DIR
}
"rnn1%2Fmodel.tar.gz"
"rnn1%2Fdata.txt.tar.gz"
)
inference_analysis_test
(
test_analyzer_rnn1 SRCS analyzer_rnn1_tester.cc
EXTRA_DEPS
${
INFERENCE_EXTRA_DEPS
}
ARGS --infer_model=
${
RNN1_INSTALL_DIR
}
/model
--infer_data=
${
RNN1_INSTALL_DIR
}
/data.txt
)
# RNN2
set
(
RNN2_INSTALL_DIR
"
${
INFERENCE_DEMO_INSTALL_DIR
}
/rnn2"
)
download_model_and_data
(
${
RNN2_INSTALL_DIR
}
"rnn2_model.tar.gz"
"rnn2_data.txt.tar.gz"
)
inference_analysis_test
(
test_analyzer_rnn2 SRCS analyzer_rnn2_tester.cc
EXTRA_DEPS
${
INFERENCE_EXTRA_DEPS
}
ARGS --infer_model=
${
RNN2_INSTALL_DIR
}
/model
--infer_data=
${
RNN2_INSTALL_DIR
}
/data.txt
)
# chinese_ner
set
(
CHINESE_NER_MODEL_URL
"http://paddle-inference-dist.bj.bcebos.com/chinese_ner_model.tar.gz"
)
set
(
CHINESE_NER_DATA_URL
"http://paddle-inference-dist.bj.bcebos.com/chinese_ner-data.txt.tar.gz"
)
set
(
CHINESE_NER_INSTALL_DIR
"
${
THIRD_PARTY_PATH
}
/inference_demo/chinese_ner"
)
download_model_and_data
(
${
CHINESE_NER_INSTALL_DIR
}
${
CHINESE_NER_MODEL_URL
}
${
CHINESE_NER_DATA_URL
}
)
set
(
CHINESE_NER_INSTALL_DIR
"
${
INFERENCE_DEMO_INSTALL_DIR
}
/chinese_ner"
)
download_model_and_data
(
${
CHINESE_NER_INSTALL_DIR
}
"chinese_ner_model.tar.gz"
"chinese_ner-data.txt.tar.gz"
)
inference_analysis_test
(
test_analyzer_ner SRCS analyzer_ner_tester.cc
EXTRA_DEPS
paddle_inference_api paddle_fluid_api analysis_predictor
EXTRA_DEPS
${
INFERENCE_EXTRA_DEPS
}
ARGS --infer_model=
${
CHINESE_NER_INSTALL_DIR
}
/model
--infer_data=
${
CHINESE_NER_INSTALL_DIR
}
/data.txt
)
# lac
set
(
LAC_MODEL_URL
"http://paddle-inference-dist.bj.bcebos.com/lac_model.tar.gz"
)
set
(
LAC_DATA_URL
"http://paddle-inference-dist.bj.bcebos.com/lac_data.txt.tar.gz"
)
set
(
LAC_INSTALL_DIR
"
${
THIRD_PARTY_PATH
}
/inference_demo/lac"
)
download_model_and_data
(
${
LAC_INSTALL_DIR
}
${
LAC_MODEL_URL
}
${
LAC_DATA_URL
}
)
set
(
LAC_INSTALL_DIR
"
${
INFERENCE_DEMO_INSTALL_DIR
}
/lac"
)
download_model_and_data
(
${
LAC_INSTALL_DIR
}
"lac_model.tar.gz"
"lac_data.txt.tar.gz"
)
inference_analysis_test
(
test_analyzer_lac SRCS analyzer_lac_tester.cc
EXTRA_DEPS
paddle_inference_api paddle_fluid_api ir_pass_manager analysis_predictor
EXTRA_DEPS
${
INFERENCE_EXTRA_DEPS
}
ARGS --infer_model=
${
LAC_INSTALL_DIR
}
/model
--infer_data=
${
LAC_INSTALL_DIR
}
/data.txt
)
# text_classification
set
(
TEXT_CLASSIFICATION_MODEL_URL
"http://paddle-inference-dist.bj.bcebos.com/text-classification-Senta.tar.gz"
)
set
(
TEXT_CLASSIFICATION_DATA_URL
"http://paddle-inference-dist.bj.bcebos.com/text_classification_data.txt.tar.gz"
)
set
(
TEXT_CLASSIFICATION_INSTALL_DIR
"
${
THIRD_PARTY_PATH
}
/inference_demo/text_classification"
)
download_model_and_data
(
${
TEXT_CLASSIFICATION_INSTALL_DIR
}
${
TEXT_CLASSIFICATION_MODEL_URL
}
${
TEXT_CLASSIFICATION_DATA_URL
}
)
set
(
TEXT_CLASSIFICATION_INSTALL_DIR
"
${
INFERENCE_DEMO_INSTALL_DIR
}
/text_classification"
)
download_model_and_data
(
${
TEXT_CLASSIFICATION_INSTALL_DIR
}
"text-classification-Senta.tar.gz"
"text_classification_data.txt.tar.gz"
)
inference_analysis_test
(
test_text_classification SRCS analyzer_text_classification_tester.cc
EXTRA_DEPS
paddle_inference_api paddle_fluid_api analysis_predictor
EXTRA_DEPS
${
INFERENCE_EXTRA_DEPS
}
ARGS --infer_model=
${
TEXT_CLASSIFICATION_INSTALL_DIR
}
/text-classification-Senta
--infer_data=
${
TEXT_CLASSIFICATION_INSTALL_DIR
}
/data.txt
--topn=1
# Just run top 1 batch.
...
...
paddle/fluid/inference/tests/api/analyzer_rnn2_tester.cc
0 → 100644
浏览文件 @
0bd6476f
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/inference/analysis/analyzer.h"
#include <google/protobuf/text_format.h>
#include <gtest/gtest.h>
#include <thread> // NOLINT
#include "paddle/fluid/framework/ir/fuse_pass_base.h"
#include "paddle/fluid/framework/ir/pass.h"
#include "paddle/fluid/inference/analysis/ut_helper.h"
#include "paddle/fluid/inference/api/analysis_predictor.h"
#include "paddle/fluid/inference/api/helper.h"
#include "paddle/fluid/inference/api/paddle_inference_api.h"
#include "paddle/fluid/inference/api/paddle_inference_pass.h"
DEFINE_string
(
infer_model
,
""
,
"model path"
);
DEFINE_string
(
infer_data
,
""
,
"data path"
);
DEFINE_int32
(
batch_size
,
1
,
"batch size."
);
DEFINE_int32
(
repeat
,
1
,
"Running the inference program repeat times."
);
DEFINE_int32
(
num_threads
,
1
,
"Running the inference program in multi-threads."
);
namespace
paddle
{
namespace
inference
{
using
namespace
framework
;
// NOLINT
struct
DataRecord
{
std
::
vector
<
std
::
vector
<
std
::
vector
<
float
>>>
link_step_data_all
;
std
::
vector
<
size_t
>
lod
;
std
::
vector
<
std
::
vector
<
float
>>
rnn_link_data
;
std
::
vector
<
float
>
result_data
;
size_t
batch_iter
{
0
};
size_t
batch_size
{
1
};
DataRecord
()
=
default
;
explicit
DataRecord
(
const
std
::
string
&
path
,
int
batch_size
=
1
)
:
batch_size
(
batch_size
)
{
Load
(
path
);
}
DataRecord
NextBatch
()
{
DataRecord
data
;
size_t
batch_end
=
batch_iter
+
batch_size
;
// NOTE skip the final batch, if no enough data is provided.
if
(
batch_end
<=
link_step_data_all
.
size
())
{
data
.
link_step_data_all
.
assign
(
link_step_data_all
.
begin
()
+
batch_iter
,
link_step_data_all
.
begin
()
+
batch_end
);
// Prepare LoDs
data
.
lod
.
push_back
(
0
);
CHECK
(
!
data
.
link_step_data_all
.
empty
())
<<
"empty"
;
for
(
size_t
j
=
0
;
j
<
data
.
link_step_data_all
.
size
();
j
++
)
{
for
(
const
auto
&
d
:
data
.
link_step_data_all
[
j
])
{
data
.
rnn_link_data
.
push_back
(
d
);
// calculate lod
data
.
lod
.
push_back
(
data
.
lod
.
back
()
+
11
);
}
}
}
batch_iter
+=
batch_size
;
return
data
;
}
void
Load
(
const
std
::
string
&
path
)
{
std
::
ifstream
file
(
path
);
std
::
string
line
;
int
num_lines
=
0
;
while
(
std
::
getline
(
file
,
line
))
{
num_lines
++
;
std
::
vector
<
std
::
string
>
data
;
split
(
line
,
':'
,
&
data
);
if
(
num_lines
%
2
)
{
// feature
std
::
vector
<
std
::
string
>
feature_data
;
split
(
data
[
1
],
' '
,
&
feature_data
);
std
::
vector
<
std
::
vector
<
float
>>
link_step_data
;
int
feature_count
=
1
;
std
::
vector
<
float
>
feature
;
for
(
auto
&
step_data
:
feature_data
)
{
std
::
vector
<
float
>
tmp
;
split_to_float
(
step_data
,
','
,
&
tmp
);
feature
.
insert
(
feature
.
end
(),
tmp
.
begin
(),
tmp
.
end
());
if
(
feature_count
%
11
==
0
)
{
// each sample has 11 features
link_step_data
.
push_back
(
feature
);
feature
.
clear
();
}
feature_count
++
;
}
link_step_data_all
.
push_back
(
std
::
move
(
link_step_data
));
}
else
{
// result
std
::
vector
<
float
>
tmp
;
split_to_float
(
data
[
1
],
','
,
&
tmp
);
result_data
.
insert
(
result_data
.
end
(),
tmp
.
begin
(),
tmp
.
end
());
}
}
}
};
void
PrepareInputs
(
std
::
vector
<
PaddleTensor
>
*
input_slots
,
DataRecord
*
data
,
int
batch_size
)
{
PaddleTensor
feed_tensor
;
feed_tensor
.
name
=
"feed"
;
auto
one_batch
=
data
->
NextBatch
();
int
token_size
=
one_batch
.
rnn_link_data
.
size
();
// each token has 11 features, each feature's dim is 54.
std
::
vector
<
int
>
rnn_link_data_shape
({
token_size
*
11
,
54
});
feed_tensor
.
shape
=
rnn_link_data_shape
;
feed_tensor
.
lod
.
assign
({
one_batch
.
lod
});
feed_tensor
.
dtype
=
PaddleDType
::
FLOAT32
;
TensorAssignData
<
float
>
(
&
feed_tensor
,
one_batch
.
rnn_link_data
);
// Set inputs.
input_slots
->
assign
({
feed_tensor
});
}
void
CompareResult
(
const
std
::
vector
<
PaddleTensor
>
&
outputs
,
const
std
::
vector
<
float
>
&
base_result
)
{
PADDLE_ENFORCE_GT
(
outputs
.
size
(),
0
);
for
(
size_t
i
=
0
;
i
<
outputs
.
size
();
i
++
)
{
auto
&
out
=
outputs
[
i
];
size_t
size
=
std
::
accumulate
(
out
.
shape
.
begin
(),
out
.
shape
.
end
(),
1
,
[](
int
a
,
int
b
)
{
return
a
*
b
;
});
PADDLE_ENFORCE_GT
(
size
,
0
);
float
*
data
=
static_cast
<
float
*>
(
out
.
data
.
data
());
for
(
size_t
i
=
0
;
i
<
size
;
i
++
)
{
EXPECT_NEAR
(
data
[
i
],
base_result
[
i
],
1e-3
);
}
}
}
// Test with a really complicate model.
void
TestRNN2Prediction
()
{
AnalysisConfig
config
;
config
.
prog_file
=
FLAGS_infer_model
+
"/__model__"
;
config
.
param_file
=
FLAGS_infer_model
+
"/param"
;
config
.
use_gpu
=
false
;
config
.
device
=
0
;
config
.
specify_input_name
=
true
;
config
.
enable_ir_optim
=
true
;
PADDLE_ENFORCE
(
config
.
ir_mode
==
AnalysisConfig
::
IrPassMode
::
kExclude
);
// default
int
batch_size
=
FLAGS_batch_size
;
int
num_times
=
FLAGS_repeat
;
auto
base_predictor
=
CreatePaddlePredictor
<
NativeConfig
,
PaddleEngineKind
::
kNative
>
(
config
);
auto
predictor
=
CreatePaddlePredictor
<
AnalysisConfig
,
PaddleEngineKind
::
kAnalysis
>
(
config
);
std
::
vector
<
PaddleTensor
>
input_slots
;
DataRecord
data
(
FLAGS_infer_data
,
batch_size
);
PrepareInputs
(
&
input_slots
,
&
data
,
batch_size
);
std
::
vector
<
PaddleTensor
>
outputs
,
base_outputs
;
Timer
timer1
;
timer1
.
tic
();
for
(
int
i
=
0
;
i
<
num_times
;
i
++
)
{
base_predictor
->
Run
(
input_slots
,
&
base_outputs
);
}
PrintTime
(
batch_size
,
num_times
,
1
,
0
,
timer1
.
toc
()
/
num_times
);
Timer
timer2
;
timer2
.
tic
();
for
(
int
i
=
0
;
i
<
num_times
;
i
++
)
{
predictor
->
Run
(
input_slots
,
&
outputs
);
}
PrintTime
(
batch_size
,
num_times
,
1
,
0
,
timer2
.
toc
()
/
num_times
);
CompareResult
(
base_outputs
,
data
.
result_data
);
CompareResult
(
outputs
,
data
.
result_data
);
}
TEST
(
Analyzer
,
rnn2
)
{
TestRNN2Prediction
();
}
}
// namespace inference
}
// namespace paddle
paddle/fluid/operators/distributed/grpc_client.cc
浏览文件 @
0bd6476f
...
...
@@ -290,12 +290,18 @@ void GRPCClient::Proceed() {
c
->
Finish
(
false
);
}
delete
c
;
bool
notify
=
false
;
{
std
::
lock_guard
<
std
::
mutex
>
lk
(
sync_mutex_
);
req_count_
--
;
notify
=
(
req_count_
<=
0
||
!
c
->
status_
.
ok
());
}
delete
c
;
if
(
notify
)
{
sync_cond_
.
notify_all
();
}
sync_cond_
.
notify_all
();
}
VLOG
(
3
)
<<
"GRPCClient Proceed end"
;
}
...
...
python/paddle/fluid/tests/unittests/test_dist_transpiler.py
浏览文件 @
0bd6476f
...
...
@@ -62,7 +62,7 @@ class TranspilerTest(unittest.TestCase):
t
=
self
.
_transpiler_instance
(
config
)
trainer_main
=
t
.
get_trainer_program
()
trainer_main
=
t
.
get_trainer_program
(
wait_port
=
False
)
trainer_startup
=
fluid
.
default_startup_program
()
assert
(
src
.
num_blocks
==
1
)
...
...
python/paddle/fluid/transpiler/details/__init__.py
浏览文件 @
0bd6476f
...
...
@@ -16,3 +16,4 @@ from __future__ import print_function
from
.program_utils
import
*
from
.ufind
import
*
from
.checkport
import
*
python/paddle/fluid/transpiler/details/checkport.py
0 → 100644
浏览文件 @
0bd6476f
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import
sys
import
time
import
socket
from
contextlib
import
closing
def
wait_server_ready
(
endpoints
):
"""
Wait until parameter servers are ready, use connext_ex to detect
port readiness.
Args:
endpoints (list): endpoints string list, like:
["127.0.0.1:8080", "127.0.0.1:8081"]
Examples:
.. code-block:: python
wait_server_ready(["127.0.0.1:8080", "127.0.0.1:8081"])
"""
while
True
:
all_ok
=
True
for
ep
in
endpoints
:
ip_port
=
ep
.
split
(
":"
)
with
closing
(
socket
.
socket
(
socket
.
AF_INET
,
socket
.
SOCK_STREAM
))
as
sock
:
sock
.
settimeout
(
2
)
result
=
sock
.
connect_ex
((
ip_port
[
0
],
int
(
ip_port
[
1
])))
if
result
!=
0
:
all_ok
=
False
if
not
all_ok
:
sys
.
stderr
.
write
(
"pserver not ready, wait 3 sec to retry...
\n
"
)
sys
.
stderr
.
flush
()
time
.
sleep
(
3
)
else
:
break
python/paddle/fluid/transpiler/distribute_transpiler.py
浏览文件 @
0bd6476f
...
...
@@ -381,7 +381,7 @@ class DistributeTranspiler(object):
pserver_endpoints
)
self
.
_split_table_grad_and_add_send_vars
(
program
,
pserver_endpoints
)
def
get_trainer_program
(
self
):
def
get_trainer_program
(
self
,
wait_port
=
True
):
"""
Get transpiled trainer side program.
...
...
@@ -393,6 +393,9 @@ class DistributeTranspiler(object):
delete_ops
(
self
.
origin_program
.
global_block
(),
self
.
optimize_ops
)
self
.
origin_program
.
__str__
()
if
wait_port
:
wait_server_ready
(
self
.
pserver_endpoints
)
return
self
.
origin_program
def
_get_trainer_startup_program
(
self
,
recv_vars
,
eplist
):
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录