Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
BaiXuePrincess
Paddle
提交
bddd4bc0
P
Paddle
项目概览
BaiXuePrincess
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
bddd4bc0
编写于
9月 17, 2018
作者:
D
dzhwinter
浏览文件
操作
浏览文件
下载
差异文件
Merge remote-tracking branch 'origin/develop' into memory/stable
上级
da8adf1d
437debf4
变更
8
显示空白变更内容
内联
并排
Showing
8 changed file
with
153 addition
and
41 deletion
+153
-41
cmake/tensorrt.cmake
cmake/tensorrt.cmake
+2
-0
paddle/fluid/framework/ir/graph_pattern_detector.h
paddle/fluid/framework/ir/graph_pattern_detector.h
+1
-1
paddle/fluid/operators/distributed/grpc_client.cc
paddle/fluid/operators/distributed/grpc_client.cc
+2
-2
paddle/fluid/operators/math/sequence_pooling.cc
paddle/fluid/operators/math/sequence_pooling.cc
+62
-4
paddle/scripts/paddle_build.sh
paddle/scripts/paddle_build.sh
+57
-17
python/paddle/fluid/tests/unittests/dist_transformer.py
python/paddle/fluid/tests/unittests/dist_transformer.py
+10
-8
python/paddle/fluid/tests/unittests/test_dist_base.py
python/paddle/fluid/tests/unittests/test_dist_base.py
+10
-9
python/paddle/fluid/tests/unittests/test_dist_transformer.py
python/paddle/fluid/tests/unittests/test_dist_transformer.py
+9
-0
未找到文件。
cmake/tensorrt.cmake
浏览文件 @
bddd4bc0
...
...
@@ -16,7 +16,9 @@ find_library(TENSORRT_LIBRARY NAMES libnvinfer.so libnvinfer.a
DOC
"Path to TensorRT library."
)
if
(
TENSORRT_INCLUDE_DIR AND TENSORRT_LIBRARY
)
if
(
WITH_DSO
)
set
(
TENSORRT_FOUND ON
)
endif
(
WITH DSO
)
else
()
set
(
TENSORRT_FOUND OFF
)
endif
()
...
...
paddle/fluid/framework/ir/graph_pattern_detector.h
浏览文件 @
bddd4bc0
...
...
@@ -429,7 +429,7 @@ struct LSTM : public PatternBase {
struct
GRU
:
public
PatternBase
{
GRU
(
PDPattern
*
pattern
,
const
std
::
string
&
name_scope
)
:
PatternBase
(
pattern
,
name_scope
,
"
lstm
"
)
{}
:
PatternBase
(
pattern
,
name_scope
,
"
gru
"
)
{}
PDNode
*
operator
()(
PDNode
*
x
);
...
...
paddle/fluid/operators/distributed/grpc_client.cc
浏览文件 @
bddd4bc0
...
...
@@ -125,7 +125,7 @@ VarHandlePtr GRPCClient::AsyncGetVar(const std::string& ep,
VarHandlePtr
h
(
new
VarHandle
(
ep
,
"Get"
,
var_name_val
,
p_ctx
,
p_scope
));
s
->
Prepare
(
h
,
time_out
);
framework
::
AsyncIO
([
var_name_val
,
p_scope
,
p_ctx
,
s
,
this
]
{
framework
::
AsyncIO
([
var_name_val
,
s
,
this
]
{
// prepare input
sendrecv
::
VariableMessage
req
;
req
.
set_varname
(
var_name_val
);
...
...
@@ -166,7 +166,7 @@ VarHandlePtr GRPCClient::AsyncPrefetchVar(const std::string& ep,
s
->
Prepare
(
h
,
time_out
);
framework
::
AsyncIO
([
in_var_name_val
,
out_var_name_val
,
ep_val
,
p_scope
,
p_ctx
,
time_out
,
s
,
this
]
{
s
,
this
]
{
auto
*
var
=
p_scope
->
FindVar
(
in_var_name_val
);
::
grpc
::
ByteBuffer
req
;
...
...
paddle/fluid/operators/math/sequence_pooling.cc
浏览文件 @
bddd4bc0
...
...
@@ -103,6 +103,58 @@ class MaxSeqPoolGradFunctor {
}
};
template
<
typename
T
>
class
LastSeqPoolFunctor
{
public:
void
operator
()(
const
platform
::
CPUDeviceContext
&
context
,
const
framework
::
LoDTensor
&
input
,
framework
::
Tensor
*
output
)
{
// Create pointers to input and output data
auto
*
in_data
=
input
.
data
<
T
>
();
auto
*
out_data
=
output
->
data
<
T
>
();
// Calculate the size of each item in sequence
int64_t
item_size
=
input
.
numel
()
/
input
.
dims
()[
0
];
auto
lod
=
input
.
lod
()[
0
];
int
seq_num
=
static_cast
<
int
>
(
lod
.
size
())
-
1
;
for
(
int
i
=
0
;
i
<
seq_num
;
++
i
)
{
// Calculate the length of each sequence
int64_t
seq_len
=
static_cast
<
int64_t
>
(
lod
[
i
+
1
]
-
lod
[
i
]);
// Point to the begin of next sequence
in_data
+=
seq_len
*
item_size
;
// Copy the last item of sequence to output
std
::
memcpy
(
out_data
,
(
in_data
-
item_size
),
item_size
*
sizeof
(
T
));
out_data
+=
item_size
;
}
}
};
template
<
typename
T
>
class
FirstSeqPoolFunctor
{
public:
void
operator
()(
const
platform
::
CPUDeviceContext
&
context
,
const
framework
::
LoDTensor
&
input
,
framework
::
Tensor
*
output
)
{
// Create pointers to input and output data
auto
*
in_data
=
input
.
data
<
T
>
();
auto
*
out_data
=
output
->
data
<
T
>
();
// Calculate the size of each item in sequence
int64_t
item_size
=
input
.
numel
()
/
input
.
dims
()[
0
];
auto
lod
=
input
.
lod
()[
0
];
int
seq_num
=
static_cast
<
int
>
(
lod
.
size
())
-
1
;
for
(
int
i
=
0
;
i
<
seq_num
;
++
i
)
{
// Calculate the length of each sequence
int64_t
seq_len
=
static_cast
<
int64_t
>
(
lod
[
i
+
1
]
-
lod
[
i
]);
// Copy the first item of sequence to output
std
::
memcpy
(
out_data
,
in_data
,
item_size
*
sizeof
(
T
));
// Point to the next sequence
in_data
+=
seq_len
*
item_size
;
out_data
+=
item_size
;
}
}
};
template
<
typename
T
>
class
SequencePoolFunctor
<
platform
::
CPUDeviceContext
,
T
>
{
public:
...
...
@@ -116,6 +168,16 @@ class SequencePoolFunctor<platform::CPUDeviceContext, T> {
max_pool
(
context
,
input
,
output
,
index
);
return
;
}
if
(
pooltype
==
"LAST"
)
{
math
::
LastSeqPoolFunctor
<
T
>
last_pool
;
last_pool
(
context
,
input
,
output
);
return
;
}
if
(
pooltype
==
"FIRST"
)
{
math
::
FirstSeqPoolFunctor
<
T
>
first_pool
;
first_pool
(
context
,
input
,
output
);
return
;
}
auto
lod
=
input
.
lod
()[
0
];
auto
&
place
=
*
context
.
eigen_device
();
for
(
int
i
=
0
;
i
<
static_cast
<
int
>
(
lod
.
size
())
-
1
;
++
i
)
{
...
...
@@ -133,10 +195,6 @@ class SequencePoolFunctor<platform::CPUDeviceContext, T> {
}
else
if
(
pooltype
==
"SQRT"
)
{
out_e
.
device
(
place
)
=
in_e
.
sum
(
Eigen
::
array
<
int
,
1
>
({{
0
}}))
/
std
::
sqrt
(
static_cast
<
T
>
(
h
));
}
else
if
(
pooltype
==
"LAST"
)
{
out_e
.
device
(
place
)
=
in_e
.
chip
(
h
-
1
,
0
);
}
else
if
(
pooltype
==
"FIRST"
)
{
out_e
.
device
(
place
)
=
in_e
.
chip
(
0
,
0
);
}
else
{
PADDLE_THROW
(
"unsupported pooling pooltype"
);
}
...
...
paddle/scripts/paddle_build.sh
浏览文件 @
bddd4bc0
...
...
@@ -33,6 +33,7 @@ function print_usage() {
${
BLUE
}
single_test
${
NONE
}
: run a single unit test
${
BLUE
}
bind_test
${
NONE
}
: parallel tests bind to different GPU
${
BLUE
}
doc
${
NONE
}
: generate paddle documents
${
BLUE
}
gen_doc_lib
${
NONE
}
: generate paddle documents library
${
BLUE
}
html
${
NONE
}
: convert C++ source code into HTML
${
BLUE
}
dockerfile
${
NONE
}
: generate paddle release dockerfile
${
BLUE
}
capi
${
NONE
}
: generate paddle CAPI package
...
...
@@ -431,24 +432,60 @@ EOF
linkchecker doc/v2/cn/html/index.html
linkchecker doc/v2/api/en/html/index.html
if
[[
"
$TRAVIS_PULL_REQUEST
"
!=
"false"
]]
;
then
exit
0
;
fi
;
# if [[ "$TRAVIS_PULL_REQUEST" != "false" ]]; then exit 0; fi;
#
# # Deploy to the the content server if its a "develop" or "release/version" branch
# # The "develop_doc" branch is reserved to test full deploy process without impacting the real content.
# if [ "$TRAVIS_BRANCH" == "develop_doc" ]; then
# PPO_SCRIPT_BRANCH=develop
# elif [[ "$TRAVIS_BRANCH" == "develop" || "$TRAVIS_BRANCH" =~ ^v|release/[[:digit:]]+\.[[:digit:]]+(\.[[:digit:]]+)?(-\S*)?$ ]]; then
# PPO_SCRIPT_BRANCH=master
# else
# # Early exit, this branch doesn't require documentation build
# return 0;
# fi
# # Fetch the paddlepaddle.org deploy_docs.sh from the appopriate branch
# export DEPLOY_DOCS_SH=https://raw.githubusercontent.com/PaddlePaddle/PaddlePaddle.org/$PPO_SCRIPT_BRANCH/scripts/deploy/deploy_docs.sh
# export PYTHONPATH=$PYTHONPATH:${PADDLE_ROOT}/build/python:/paddle/build/python
# cd ..
# curl $DEPLOY_DOCS_SH | bash -s $CONTENT_DEC_PASSWD $TRAVIS_BRANCH ${PADDLE_ROOT} ${PADDLE_ROOT}/build/doc/ ${PPO_SCRIPT_BRANCH}
# cd -
}
# Deploy to the the content server if its a "develop" or "release/version" branch
# The "develop_doc" branch is reserved to test full deploy process without impacting the real content.
if
[
"
$TRAVIS_BRANCH
"
==
"develop_doc"
]
;
then
PPO_SCRIPT_BRANCH
=
develop
elif
[[
"
$TRAVIS_BRANCH
"
==
"develop"
||
"
$TRAVIS_BRANCH
"
=
~ ^v|release/[[:digit:]]+
\.
[[
:digit:]]+
(
\.
[[
:digit:]]+
)
?
(
-
\S
*
)
?
$
]]
;
then
PPO_SCRIPT_BRANCH
=
master
else
# Early exit, this branch doesn't require documentation build
return
0
;
fi
# Fetch the paddlepaddle.org deploy_docs.sh from the appopriate branch
export
DEPLOY_DOCS_SH
=
https://raw.githubusercontent.com/PaddlePaddle/PaddlePaddle.org/
$PPO_SCRIPT_BRANCH
/scripts/deploy/deploy_docs.sh
export
PYTHONPATH
=
$PYTHONPATH
:
${
PADDLE_ROOT
}
/build/python:/paddle/build/python
cd
..
curl
$DEPLOY_DOCS_SH
| bash
-s
$CONTENT_DEC_PASSWD
$TRAVIS_BRANCH
${
PADDLE_ROOT
}
${
PADDLE_ROOT
}
/build/doc/
${
PPO_SCRIPT_BRANCH
}
cd
-
function
gen_doc_lib
()
{
mkdir
-p
${
PADDLE_ROOT
}
/build
cd
${
PADDLE_ROOT
}
/build
cat
<<
EOF
========================================
Building documentation library ...
In /paddle/build
========================================
EOF
cmake ..
\
-DCMAKE_BUILD_TYPE
=
Release
\
-DWITH_DOC
=
ON
\
-DWITH_GPU
=
OFF
\
-DWITH_MKL
=
OFF
\
-DWITH_FLUID_ONLY
=
ON
local
LIB_TYPE
=
$1
case
$LIB_TYPE
in
full
)
# Build full Paddle Python module. Will timeout without caching 'copy_paddle_pybind' first
make
-j
`
nproc
`
gen_proto_py framework_py_proto copy_paddle_pybind paddle_python
;;
pybind
)
# Build paddle pybind library. Takes 49 minutes to build. Might timeout
make
-j
`
nproc
`
copy_paddle_pybind
;;
proto
)
# Even smaller library.
make
-j
`
nproc
`
framework_py_proto
;;
*
)
exit
0
;;
esac
}
function
gen_html
()
{
...
...
@@ -608,6 +645,9 @@ function main() {
doc
)
gen_docs
;;
gen_doc_lib
)
gen_doc_lib
$2
;;
html
)
gen_html
;;
...
...
python/paddle/fluid/tests/unittests/dist_transformer.py
浏览文件 @
bddd4bc0
...
...
@@ -92,7 +92,7 @@ class TrainTaskConfig(object):
src_vocab_fpath
=
data_path
+
"vocab.bpe.32000"
trg_vocab_fpath
=
data_path
+
"vocab.bpe.32000"
train_file_pattern
=
data_path
+
"train.tok.clean.bpe.32000.en-de"
val_file_pattern
=
data_path
+
"newstest2013.tok.bpe.32000.en-de"
val_file_pattern
=
data_path
+
"newstest2013.tok.bpe.32000.en-de
.cut
"
pool_size
=
2000
sort_type
=
None
local
=
True
...
...
@@ -624,6 +624,7 @@ def train_loop(exe, train_progm, dev_count, sum_cost, avg_cost, lr_scheduler,
init
=
True
# Validate and save the model for inference.
if
batch_id
==
0
or
batch_id
==
4
:
if
TrainTaskConfig
.
val_file_pattern
is
not
None
:
val_avg_cost
,
val_ppl
=
test
()
print
(
"[%f]"
%
val_avg_cost
)
...
...
@@ -1701,8 +1702,9 @@ class DistTransformer2x2(TestDistRunnerBase):
exe
.
run
(
startup_prog
)
exe
.
run
(
pserver_prog
)
def
run_trainer
(
self
,
place
,
args
):
def
run_trainer
(
self
,
use_cuda
,
args
):
place
=
fluid
.
CUDAPlace
(
0
)
if
use_cuda
else
fluid
.
CPUPlace
()
TrainTaskConfig
.
use_gpu
=
use_cuda
sum_cost
,
avg_cost
,
predict
,
token_num
,
local_lr_scheduler
=
get_model
(
args
.
is_dist
,
not
args
.
sync_mode
)
...
...
python/paddle/fluid/tests/unittests/test_dist_base.py
浏览文件 @
bddd4bc0
...
...
@@ -61,9 +61,10 @@ class TestDistRunnerBase(object):
exe
.
run
(
startup_prog
)
exe
.
run
(
pserver_prog
)
def
run_trainer
(
self
,
place
,
args
):
def
run_trainer
(
self
,
use_cuda
,
args
):
import
paddle
import
paddle.fluid
as
fluid
place
=
fluid
.
CUDAPlace
(
0
)
if
use_cuda
else
fluid
.
CPUPlace
()
test_program
,
avg_cost
,
train_reader
,
test_reader
,
batch_acc
,
predict
=
\
self
.
get_model
(
batch_size
=
2
)
if
args
.
mem_opt
:
...
...
@@ -91,7 +92,7 @@ class TestDistRunnerBase(object):
build_stra
.
reduce_strategy
=
fluid
.
BuildStrategy
.
ReduceStrategy
.
AllReduce
exe
=
fluid
.
ParallelExecutor
(
True
,
use_cuda
,
loss_name
=
avg_cost
.
name
,
exec_strategy
=
strategy
,
build_strategy
=
build_stra
)
...
...
@@ -142,9 +143,8 @@ def runtime_main(test_class):
if
args
.
role
==
"pserver"
and
args
.
is_dist
:
model
.
run_pserver
(
args
)
else
:
p
=
fluid
.
CUDAPlace
(
0
)
if
core
.
is_compiled_with_cuda
(
)
else
fluid
.
CPUPlace
()
model
.
run_trainer
(
p
,
args
)
use_cuda
=
True
if
core
.
is_compiled_with_cuda
()
else
False
model
.
run_trainer
(
use_cuda
,
args
)
import
paddle.compat
as
cpt
...
...
@@ -225,11 +225,12 @@ class TestDistBase(unittest.TestCase):
def
check_with_place
(
self
,
model_file
,
delta
=
1e-3
,
check_error_log
=
False
):
# TODO(typhoonzero): should auto adapt GPU count on the machine.
required_envs
=
{
"PATH"
:
os
.
getenv
(
"PATH"
),
"PYTHONPATH"
:
os
.
getenv
(
"PYTHONPATH"
),
"LD_LIBRARY_PATH"
:
os
.
getenv
(
"LD_LIBRARY_PATH"
),
"PATH"
:
os
.
getenv
(
"PATH"
,
""
),
"PYTHONPATH"
:
os
.
getenv
(
"PYTHONPATH"
,
""
),
"LD_LIBRARY_PATH"
:
os
.
getenv
(
"LD_LIBRARY_PATH"
,
""
),
"FLAGS_fraction_of_gpu_memory_to_use"
:
"0.15"
,
"FLAGS_cudnn_deterministic"
:
"1"
"FLAGS_cudnn_deterministic"
:
"1"
,
"CPU_NUM"
:
"1"
}
if
check_error_log
:
...
...
python/paddle/fluid/tests/unittests/test_dist_transformer.py
浏览文件 @
bddd4bc0
...
...
@@ -14,6 +14,7 @@
from
__future__
import
print_function
import
os
import
unittest
import
paddle
from
test_dist_base
import
TestDistBase
...
...
@@ -44,6 +45,14 @@ def download_files():
test_url
=
url_prefix
+
'newstest2013.tok.bpe.32000.en-de'
test_md5
=
'9dd74a266dbdb25314183899f269b4a2'
paddle
.
dataset
.
common
.
download
(
test_url
,
'test_dist_transformer'
,
test_md5
)
# cut test data for faster CI
orig_path
=
os
.
path
.
join
(
paddle
.
dataset
.
common
.
DATA_HOME
,
"test_dist_transformer"
,
"newstest2013.tok.bpe.32000.en-de"
)
head_path
=
os
.
path
.
join
(
paddle
.
dataset
.
common
.
DATA_HOME
,
"test_dist_transformer"
,
"newstest2013.tok.bpe.32000.en-de.cut"
)
os
.
system
(
"head -n10 %s > %s"
%
(
orig_path
,
head_path
))
class
TestDistTransformer2x2Sync
(
TestDistBase
):
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录