Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
Paddle
提交
bddd4bc0
P
Paddle
项目概览
PaddlePaddle
/
Paddle
1 年多 前同步成功
通知
2302
Star
20931
Fork
5422
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1423
列表
看板
标记
里程碑
合并请求
543
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1,423
Issue
1,423
列表
看板
标记
里程碑
合并请求
543
合并请求
543
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
bddd4bc0
编写于
9月 17, 2018
作者:
D
dzhwinter
浏览文件
操作
浏览文件
下载
差异文件
Merge remote-tracking branch 'origin/develop' into memory/stable
上级
da8adf1d
437debf4
变更
8
隐藏空白更改
内联
并排
Showing
8 changed file
with
153 addition
and
41 deletion
+153
-41
cmake/tensorrt.cmake
cmake/tensorrt.cmake
+2
-0
paddle/fluid/framework/ir/graph_pattern_detector.h
paddle/fluid/framework/ir/graph_pattern_detector.h
+1
-1
paddle/fluid/operators/distributed/grpc_client.cc
paddle/fluid/operators/distributed/grpc_client.cc
+2
-2
paddle/fluid/operators/math/sequence_pooling.cc
paddle/fluid/operators/math/sequence_pooling.cc
+62
-4
paddle/scripts/paddle_build.sh
paddle/scripts/paddle_build.sh
+57
-17
python/paddle/fluid/tests/unittests/dist_transformer.py
python/paddle/fluid/tests/unittests/dist_transformer.py
+10
-8
python/paddle/fluid/tests/unittests/test_dist_base.py
python/paddle/fluid/tests/unittests/test_dist_base.py
+10
-9
python/paddle/fluid/tests/unittests/test_dist_transformer.py
python/paddle/fluid/tests/unittests/test_dist_transformer.py
+9
-0
未找到文件。
cmake/tensorrt.cmake
浏览文件 @
bddd4bc0
...
...
@@ -16,7 +16,9 @@ find_library(TENSORRT_LIBRARY NAMES libnvinfer.so libnvinfer.a
DOC
"Path to TensorRT library."
)
if
(
TENSORRT_INCLUDE_DIR AND TENSORRT_LIBRARY
)
if
(
WITH_DSO
)
set
(
TENSORRT_FOUND ON
)
endif
(
WITH DSO
)
else
()
set
(
TENSORRT_FOUND OFF
)
endif
()
...
...
paddle/fluid/framework/ir/graph_pattern_detector.h
浏览文件 @
bddd4bc0
...
...
@@ -429,7 +429,7 @@ struct LSTM : public PatternBase {
struct
GRU
:
public
PatternBase
{
GRU
(
PDPattern
*
pattern
,
const
std
::
string
&
name_scope
)
:
PatternBase
(
pattern
,
name_scope
,
"
lstm
"
)
{}
:
PatternBase
(
pattern
,
name_scope
,
"
gru
"
)
{}
PDNode
*
operator
()(
PDNode
*
x
);
...
...
paddle/fluid/operators/distributed/grpc_client.cc
浏览文件 @
bddd4bc0
...
...
@@ -125,7 +125,7 @@ VarHandlePtr GRPCClient::AsyncGetVar(const std::string& ep,
VarHandlePtr
h
(
new
VarHandle
(
ep
,
"Get"
,
var_name_val
,
p_ctx
,
p_scope
));
s
->
Prepare
(
h
,
time_out
);
framework
::
AsyncIO
([
var_name_val
,
p_scope
,
p_ctx
,
s
,
this
]
{
framework
::
AsyncIO
([
var_name_val
,
s
,
this
]
{
// prepare input
sendrecv
::
VariableMessage
req
;
req
.
set_varname
(
var_name_val
);
...
...
@@ -166,7 +166,7 @@ VarHandlePtr GRPCClient::AsyncPrefetchVar(const std::string& ep,
s
->
Prepare
(
h
,
time_out
);
framework
::
AsyncIO
([
in_var_name_val
,
out_var_name_val
,
ep_val
,
p_scope
,
p_ctx
,
time_out
,
s
,
this
]
{
s
,
this
]
{
auto
*
var
=
p_scope
->
FindVar
(
in_var_name_val
);
::
grpc
::
ByteBuffer
req
;
...
...
paddle/fluid/operators/math/sequence_pooling.cc
浏览文件 @
bddd4bc0
...
...
@@ -103,6 +103,58 @@ class MaxSeqPoolGradFunctor {
}
};
template
<
typename
T
>
class
LastSeqPoolFunctor
{
public:
void
operator
()(
const
platform
::
CPUDeviceContext
&
context
,
const
framework
::
LoDTensor
&
input
,
framework
::
Tensor
*
output
)
{
// Create pointers to input and output data
auto
*
in_data
=
input
.
data
<
T
>
();
auto
*
out_data
=
output
->
data
<
T
>
();
// Calculate the size of each item in sequence
int64_t
item_size
=
input
.
numel
()
/
input
.
dims
()[
0
];
auto
lod
=
input
.
lod
()[
0
];
int
seq_num
=
static_cast
<
int
>
(
lod
.
size
())
-
1
;
for
(
int
i
=
0
;
i
<
seq_num
;
++
i
)
{
// Calculate the length of each sequence
int64_t
seq_len
=
static_cast
<
int64_t
>
(
lod
[
i
+
1
]
-
lod
[
i
]);
// Point to the begin of next sequence
in_data
+=
seq_len
*
item_size
;
// Copy the last item of sequence to output
std
::
memcpy
(
out_data
,
(
in_data
-
item_size
),
item_size
*
sizeof
(
T
));
out_data
+=
item_size
;
}
}
};
template
<
typename
T
>
class
FirstSeqPoolFunctor
{
public:
void
operator
()(
const
platform
::
CPUDeviceContext
&
context
,
const
framework
::
LoDTensor
&
input
,
framework
::
Tensor
*
output
)
{
// Create pointers to input and output data
auto
*
in_data
=
input
.
data
<
T
>
();
auto
*
out_data
=
output
->
data
<
T
>
();
// Calculate the size of each item in sequence
int64_t
item_size
=
input
.
numel
()
/
input
.
dims
()[
0
];
auto
lod
=
input
.
lod
()[
0
];
int
seq_num
=
static_cast
<
int
>
(
lod
.
size
())
-
1
;
for
(
int
i
=
0
;
i
<
seq_num
;
++
i
)
{
// Calculate the length of each sequence
int64_t
seq_len
=
static_cast
<
int64_t
>
(
lod
[
i
+
1
]
-
lod
[
i
]);
// Copy the first item of sequence to output
std
::
memcpy
(
out_data
,
in_data
,
item_size
*
sizeof
(
T
));
// Point to the next sequence
in_data
+=
seq_len
*
item_size
;
out_data
+=
item_size
;
}
}
};
template
<
typename
T
>
class
SequencePoolFunctor
<
platform
::
CPUDeviceContext
,
T
>
{
public:
...
...
@@ -116,6 +168,16 @@ class SequencePoolFunctor<platform::CPUDeviceContext, T> {
max_pool
(
context
,
input
,
output
,
index
);
return
;
}
if
(
pooltype
==
"LAST"
)
{
math
::
LastSeqPoolFunctor
<
T
>
last_pool
;
last_pool
(
context
,
input
,
output
);
return
;
}
if
(
pooltype
==
"FIRST"
)
{
math
::
FirstSeqPoolFunctor
<
T
>
first_pool
;
first_pool
(
context
,
input
,
output
);
return
;
}
auto
lod
=
input
.
lod
()[
0
];
auto
&
place
=
*
context
.
eigen_device
();
for
(
int
i
=
0
;
i
<
static_cast
<
int
>
(
lod
.
size
())
-
1
;
++
i
)
{
...
...
@@ -133,10 +195,6 @@ class SequencePoolFunctor<platform::CPUDeviceContext, T> {
}
else
if
(
pooltype
==
"SQRT"
)
{
out_e
.
device
(
place
)
=
in_e
.
sum
(
Eigen
::
array
<
int
,
1
>
({{
0
}}))
/
std
::
sqrt
(
static_cast
<
T
>
(
h
));
}
else
if
(
pooltype
==
"LAST"
)
{
out_e
.
device
(
place
)
=
in_e
.
chip
(
h
-
1
,
0
);
}
else
if
(
pooltype
==
"FIRST"
)
{
out_e
.
device
(
place
)
=
in_e
.
chip
(
0
,
0
);
}
else
{
PADDLE_THROW
(
"unsupported pooling pooltype"
);
}
...
...
paddle/scripts/paddle_build.sh
浏览文件 @
bddd4bc0
...
...
@@ -33,6 +33,7 @@ function print_usage() {
${
BLUE
}
single_test
${
NONE
}
: run a single unit test
${
BLUE
}
bind_test
${
NONE
}
: parallel tests bind to different GPU
${
BLUE
}
doc
${
NONE
}
: generate paddle documents
${
BLUE
}
gen_doc_lib
${
NONE
}
: generate paddle documents library
${
BLUE
}
html
${
NONE
}
: convert C++ source code into HTML
${
BLUE
}
dockerfile
${
NONE
}
: generate paddle release dockerfile
${
BLUE
}
capi
${
NONE
}
: generate paddle CAPI package
...
...
@@ -431,24 +432,60 @@ EOF
linkchecker doc/v2/cn/html/index.html
linkchecker doc/v2/api/en/html/index.html
if
[[
"
$TRAVIS_PULL_REQUEST
"
!=
"false"
]]
;
then
exit
0
;
fi
;
# if [[ "$TRAVIS_PULL_REQUEST" != "false" ]]; then exit 0; fi;
#
# # Deploy to the the content server if its a "develop" or "release/version" branch
# # The "develop_doc" branch is reserved to test full deploy process without impacting the real content.
# if [ "$TRAVIS_BRANCH" == "develop_doc" ]; then
# PPO_SCRIPT_BRANCH=develop
# elif [[ "$TRAVIS_BRANCH" == "develop" || "$TRAVIS_BRANCH" =~ ^v|release/[[:digit:]]+\.[[:digit:]]+(\.[[:digit:]]+)?(-\S*)?$ ]]; then
# PPO_SCRIPT_BRANCH=master
# else
# # Early exit, this branch doesn't require documentation build
# return 0;
# fi
# # Fetch the paddlepaddle.org deploy_docs.sh from the appopriate branch
# export DEPLOY_DOCS_SH=https://raw.githubusercontent.com/PaddlePaddle/PaddlePaddle.org/$PPO_SCRIPT_BRANCH/scripts/deploy/deploy_docs.sh
# export PYTHONPATH=$PYTHONPATH:${PADDLE_ROOT}/build/python:/paddle/build/python
# cd ..
# curl $DEPLOY_DOCS_SH | bash -s $CONTENT_DEC_PASSWD $TRAVIS_BRANCH ${PADDLE_ROOT} ${PADDLE_ROOT}/build/doc/ ${PPO_SCRIPT_BRANCH}
# cd -
}
# Deploy to the the content server if its a "develop" or "release/version" branch
# The "develop_doc" branch is reserved to test full deploy process without impacting the real content.
if
[
"
$TRAVIS_BRANCH
"
==
"develop_doc"
]
;
then
PPO_SCRIPT_BRANCH
=
develop
elif
[[
"
$TRAVIS_BRANCH
"
==
"develop"
||
"
$TRAVIS_BRANCH
"
=
~ ^v|release/[[:digit:]]+
\.
[[
:digit:]]+
(
\.
[[
:digit:]]+
)
?
(
-
\S
*
)
?
$
]]
;
then
PPO_SCRIPT_BRANCH
=
master
else
# Early exit, this branch doesn't require documentation build
return
0
;
fi
# Fetch the paddlepaddle.org deploy_docs.sh from the appopriate branch
export
DEPLOY_DOCS_SH
=
https://raw.githubusercontent.com/PaddlePaddle/PaddlePaddle.org/
$PPO_SCRIPT_BRANCH
/scripts/deploy/deploy_docs.sh
export
PYTHONPATH
=
$PYTHONPATH
:
${
PADDLE_ROOT
}
/build/python:/paddle/build/python
cd
..
curl
$DEPLOY_DOCS_SH
| bash
-s
$CONTENT_DEC_PASSWD
$TRAVIS_BRANCH
${
PADDLE_ROOT
}
${
PADDLE_ROOT
}
/build/doc/
${
PPO_SCRIPT_BRANCH
}
cd
-
function
gen_doc_lib
()
{
mkdir
-p
${
PADDLE_ROOT
}
/build
cd
${
PADDLE_ROOT
}
/build
cat
<<
EOF
========================================
Building documentation library ...
In /paddle/build
========================================
EOF
cmake ..
\
-DCMAKE_BUILD_TYPE
=
Release
\
-DWITH_DOC
=
ON
\
-DWITH_GPU
=
OFF
\
-DWITH_MKL
=
OFF
\
-DWITH_FLUID_ONLY
=
ON
local
LIB_TYPE
=
$1
case
$LIB_TYPE
in
full
)
# Build full Paddle Python module. Will timeout without caching 'copy_paddle_pybind' first
make
-j
`
nproc
`
gen_proto_py framework_py_proto copy_paddle_pybind paddle_python
;;
pybind
)
# Build paddle pybind library. Takes 49 minutes to build. Might timeout
make
-j
`
nproc
`
copy_paddle_pybind
;;
proto
)
# Even smaller library.
make
-j
`
nproc
`
framework_py_proto
;;
*
)
exit
0
;;
esac
}
function
gen_html
()
{
...
...
@@ -608,6 +645,9 @@ function main() {
doc
)
gen_docs
;;
gen_doc_lib
)
gen_doc_lib
$2
;;
html
)
gen_html
;;
...
...
python/paddle/fluid/tests/unittests/dist_transformer.py
浏览文件 @
bddd4bc0
...
...
@@ -92,7 +92,7 @@ class TrainTaskConfig(object):
src_vocab_fpath
=
data_path
+
"vocab.bpe.32000"
trg_vocab_fpath
=
data_path
+
"vocab.bpe.32000"
train_file_pattern
=
data_path
+
"train.tok.clean.bpe.32000.en-de"
val_file_pattern
=
data_path
+
"newstest2013.tok.bpe.32000.en-de"
val_file_pattern
=
data_path
+
"newstest2013.tok.bpe.32000.en-de
.cut
"
pool_size
=
2000
sort_type
=
None
local
=
True
...
...
@@ -624,11 +624,12 @@ def train_loop(exe, train_progm, dev_count, sum_cost, avg_cost, lr_scheduler,
init
=
True
# Validate and save the model for inference.
if
TrainTaskConfig
.
val_file_pattern
is
not
None
:
val_avg_cost
,
val_ppl
=
test
()
print
(
"[%f]"
%
val_avg_cost
)
else
:
assert
(
False
)
if
batch_id
==
0
or
batch_id
==
4
:
if
TrainTaskConfig
.
val_file_pattern
is
not
None
:
val_avg_cost
,
val_ppl
=
test
()
print
(
"[%f]"
%
val_avg_cost
)
else
:
assert
(
False
)
#import transformer_reader as reader
...
...
@@ -1701,8 +1702,9 @@ class DistTransformer2x2(TestDistRunnerBase):
exe
.
run
(
startup_prog
)
exe
.
run
(
pserver_prog
)
def
run_trainer
(
self
,
place
,
args
):
def
run_trainer
(
self
,
use_cuda
,
args
):
place
=
fluid
.
CUDAPlace
(
0
)
if
use_cuda
else
fluid
.
CPUPlace
()
TrainTaskConfig
.
use_gpu
=
use_cuda
sum_cost
,
avg_cost
,
predict
,
token_num
,
local_lr_scheduler
=
get_model
(
args
.
is_dist
,
not
args
.
sync_mode
)
...
...
python/paddle/fluid/tests/unittests/test_dist_base.py
浏览文件 @
bddd4bc0
...
...
@@ -61,9 +61,10 @@ class TestDistRunnerBase(object):
exe
.
run
(
startup_prog
)
exe
.
run
(
pserver_prog
)
def
run_trainer
(
self
,
place
,
args
):
def
run_trainer
(
self
,
use_cuda
,
args
):
import
paddle
import
paddle.fluid
as
fluid
place
=
fluid
.
CUDAPlace
(
0
)
if
use_cuda
else
fluid
.
CPUPlace
()
test_program
,
avg_cost
,
train_reader
,
test_reader
,
batch_acc
,
predict
=
\
self
.
get_model
(
batch_size
=
2
)
if
args
.
mem_opt
:
...
...
@@ -91,7 +92,7 @@ class TestDistRunnerBase(object):
build_stra
.
reduce_strategy
=
fluid
.
BuildStrategy
.
ReduceStrategy
.
AllReduce
exe
=
fluid
.
ParallelExecutor
(
True
,
use_cuda
,
loss_name
=
avg_cost
.
name
,
exec_strategy
=
strategy
,
build_strategy
=
build_stra
)
...
...
@@ -142,9 +143,8 @@ def runtime_main(test_class):
if
args
.
role
==
"pserver"
and
args
.
is_dist
:
model
.
run_pserver
(
args
)
else
:
p
=
fluid
.
CUDAPlace
(
0
)
if
core
.
is_compiled_with_cuda
(
)
else
fluid
.
CPUPlace
()
model
.
run_trainer
(
p
,
args
)
use_cuda
=
True
if
core
.
is_compiled_with_cuda
()
else
False
model
.
run_trainer
(
use_cuda
,
args
)
import
paddle.compat
as
cpt
...
...
@@ -225,11 +225,12 @@ class TestDistBase(unittest.TestCase):
def
check_with_place
(
self
,
model_file
,
delta
=
1e-3
,
check_error_log
=
False
):
# TODO(typhoonzero): should auto adapt GPU count on the machine.
required_envs
=
{
"PATH"
:
os
.
getenv
(
"PATH"
),
"PYTHONPATH"
:
os
.
getenv
(
"PYTHONPATH"
),
"LD_LIBRARY_PATH"
:
os
.
getenv
(
"LD_LIBRARY_PATH"
),
"PATH"
:
os
.
getenv
(
"PATH"
,
""
),
"PYTHONPATH"
:
os
.
getenv
(
"PYTHONPATH"
,
""
),
"LD_LIBRARY_PATH"
:
os
.
getenv
(
"LD_LIBRARY_PATH"
,
""
),
"FLAGS_fraction_of_gpu_memory_to_use"
:
"0.15"
,
"FLAGS_cudnn_deterministic"
:
"1"
"FLAGS_cudnn_deterministic"
:
"1"
,
"CPU_NUM"
:
"1"
}
if
check_error_log
:
...
...
python/paddle/fluid/tests/unittests/test_dist_transformer.py
浏览文件 @
bddd4bc0
...
...
@@ -14,6 +14,7 @@
from
__future__
import
print_function
import
os
import
unittest
import
paddle
from
test_dist_base
import
TestDistBase
...
...
@@ -44,6 +45,14 @@ def download_files():
test_url
=
url_prefix
+
'newstest2013.tok.bpe.32000.en-de'
test_md5
=
'9dd74a266dbdb25314183899f269b4a2'
paddle
.
dataset
.
common
.
download
(
test_url
,
'test_dist_transformer'
,
test_md5
)
# cut test data for faster CI
orig_path
=
os
.
path
.
join
(
paddle
.
dataset
.
common
.
DATA_HOME
,
"test_dist_transformer"
,
"newstest2013.tok.bpe.32000.en-de"
)
head_path
=
os
.
path
.
join
(
paddle
.
dataset
.
common
.
DATA_HOME
,
"test_dist_transformer"
,
"newstest2013.tok.bpe.32000.en-de.cut"
)
os
.
system
(
"head -n10 %s > %s"
%
(
orig_path
,
head_path
))
class
TestDistTransformer2x2Sync
(
TestDistBase
):
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录