Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
Paddle
提交
8edf60ce
P
Paddle
项目概览
PaddlePaddle
/
Paddle
大约 1 年 前同步成功
通知
2298
Star
20931
Fork
5422
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1423
列表
看板
标记
里程碑
合并请求
543
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1,423
Issue
1,423
列表
看板
标记
里程碑
合并请求
543
合并请求
543
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
8edf60ce
编写于
9月 16, 2018
作者:
Y
Yibing Liu
浏览文件
操作
浏览文件
下载
差异文件
Merge branch 'develop' of upstream into fix_seq_pad
上级
ce773ed7
437debf4
变更
16
显示空白变更内容
内联
并排
Showing
16 changed file
with
269 addition
and
91 deletion
+269
-91
cmake/tensorrt.cmake
cmake/tensorrt.cmake
+2
-0
paddle/fluid/framework/ir/graph_pattern_detector.h
paddle/fluid/framework/ir/graph_pattern_detector.h
+1
-1
paddle/fluid/inference/api/api.cc
paddle/fluid/inference/api/api.cc
+9
-7
paddle/fluid/inference/tests/api/CMakeLists.txt
paddle/fluid/inference/tests/api/CMakeLists.txt
+1
-1
paddle/fluid/operators/conv_mkldnn_op.cc
paddle/fluid/operators/conv_mkldnn_op.cc
+31
-25
paddle/fluid/operators/conv_op.cc
paddle/fluid/operators/conv_op.cc
+5
-0
paddle/fluid/operators/distributed/grpc_client.cc
paddle/fluid/operators/distributed/grpc_client.cc
+2
-2
paddle/fluid/operators/distributed/proto_encoder_helper.h
paddle/fluid/operators/distributed/proto_encoder_helper.h
+3
-1
paddle/fluid/operators/listen_and_serv_op.cc
paddle/fluid/operators/listen_and_serv_op.cc
+10
-11
paddle/fluid/operators/math/sequence_pooling.cc
paddle/fluid/operators/math/sequence_pooling.cc
+62
-4
paddle/fluid/operators/prelu_op.cc
paddle/fluid/operators/prelu_op.cc
+6
-3
paddle/scripts/paddle_build.sh
paddle/scripts/paddle_build.sh
+57
-17
python/paddle/fluid/tests/unittests/dist_transformer.py
python/paddle/fluid/tests/unittests/dist_transformer.py
+10
-8
python/paddle/fluid/tests/unittests/test_dist_base.py
python/paddle/fluid/tests/unittests/test_dist_base.py
+10
-9
python/paddle/fluid/tests/unittests/test_dist_transformer.py
python/paddle/fluid/tests/unittests/test_dist_transformer.py
+9
-0
python/paddle/fluid/transpiler/inference_transpiler.py
python/paddle/fluid/transpiler/inference_transpiler.py
+51
-2
未找到文件。
cmake/tensorrt.cmake
浏览文件 @
8edf60ce
...
...
@@ -16,7 +16,9 @@ find_library(TENSORRT_LIBRARY NAMES libnvinfer.so libnvinfer.a
DOC
"Path to TensorRT library."
)
if
(
TENSORRT_INCLUDE_DIR AND TENSORRT_LIBRARY
)
if
(
WITH_DSO
)
set
(
TENSORRT_FOUND ON
)
endif
(
WITH DSO
)
else
()
set
(
TENSORRT_FOUND OFF
)
endif
()
...
...
paddle/fluid/framework/ir/graph_pattern_detector.h
浏览文件 @
8edf60ce
...
...
@@ -429,7 +429,7 @@ struct LSTM : public PatternBase {
struct
GRU
:
public
PatternBase
{
GRU
(
PDPattern
*
pattern
,
const
std
::
string
&
name_scope
)
:
PatternBase
(
pattern
,
name_scope
,
"
lstm
"
)
{}
:
PatternBase
(
pattern
,
name_scope
,
"
gru
"
)
{}
PDNode
*
operator
()(
PDNode
*
x
);
...
...
paddle/fluid/inference/api/api.cc
浏览文件 @
8edf60ce
...
...
@@ -9,8 +9,8 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include <glog/logging.h>
#include "paddle/fluid/inference/api/paddle_inference_api.h"
#include "paddle/fluid/platform/enforce.h"
namespace
paddle
{
...
...
@@ -64,13 +64,15 @@ PaddleBuf& PaddleBuf::operator=(PaddleBuf&& other) {
void
PaddleBuf
::
Resize
(
size_t
length
)
{
// Only the owned memory can be reset, the external memory can't be changed.
if
(
length_
=
=
length
)
return
;
if
(
length_
>
=
length
)
return
;
if
(
memory_owned_
)
{
Free
();
}
data_
=
new
char
[
length
];
data_
=
malloc
(
length
);
length_
=
length
;
memory_owned_
=
true
;
}
else
{
PADDLE_THROW
(
"The memory is allocated externally, can not Resized"
);
}
}
void
PaddleBuf
::
Reset
(
void
*
data
,
size_t
length
)
{
...
...
@@ -82,8 +84,8 @@ void PaddleBuf::Reset(void* data, size_t length) {
void
PaddleBuf
::
Free
()
{
if
(
memory_owned_
&&
data_
)
{
assert
(
length_
>
0
);
delete
[]
static_cast
<
char
*>
(
data_
);
PADDLE_ENFORCE_GT
(
length_
,
0
);
free
(
static_cast
<
char
*>
(
data_
)
);
data_
=
nullptr
;
length_
=
0
;
}
...
...
paddle/fluid/inference/tests/api/CMakeLists.txt
浏览文件 @
8edf60ce
...
...
@@ -53,7 +53,7 @@ set(TEXT_CLASSIFICATION_INSTALL_DIR "${INFERENCE_DEMO_INSTALL_DIR}/text_classifi
download_model_and_data
(
${
TEXT_CLASSIFICATION_INSTALL_DIR
}
"text-classification-Senta.tar.gz"
"text_classification_data.txt.tar.gz"
)
inference_analysis_test
(
test_analyzer_text_classification SRCS analyzer_text_classification_tester.cc
EXTRA_DEPS
${
INFERENCE_EXTRA_DEPS
}
ARGS --infer_model=
${
TEXT_CLASSIFICATION_INSTALL_DIR
}
/
text-classification-Senta
ARGS --infer_model=
${
TEXT_CLASSIFICATION_INSTALL_DIR
}
/
model
--infer_data=
${
TEXT_CLASSIFICATION_INSTALL_DIR
}
/data.txt
)
# ocr
...
...
paddle/fluid/operators/conv_mkldnn_op.cc
浏览文件 @
8edf60ce
...
...
@@ -300,6 +300,7 @@ class ConvMKLDNNOpKernel : public paddle::framework::OpKernel<T> {
std
::
vector
<
int
>
paddings
=
ctx
.
Attr
<
std
::
vector
<
int
>>
(
"paddings"
);
std
::
vector
<
int
>
dilations
=
ctx
.
Attr
<
std
::
vector
<
int
>>
(
"dilations"
);
bool
fuse_relu
=
ctx
.
Attr
<
bool
>
(
"fuse_relu"
);
bool
fuse_eltwise
=
ctx
.
Attr
<
bool
>
(
"fuse_eltwise"
);
int
groups
=
ctx
.
Attr
<
int
>
(
"groups"
);
// TODO: add support for dilation
...
...
@@ -366,12 +367,13 @@ class ConvMKLDNNOpKernel : public paddle::framework::OpKernel<T> {
bias_tz
=
paddle
::
framework
::
vectorize2int
(
bias
->
dims
());
auto
bias_md
=
platform
::
MKLDNNMemDesc
(
bias_tz
,
platform
::
MKLDNNGetDataType
<
T
>
(),
memory
::
format
::
x
);
conv_pd
=
ConvFwdPrimitiveDesc
(
src_md
,
weights_md
,
bias_md
,
dst_md
,
strides
,
paddings
,
mkldnn_engine
,
fuse_relu
);
conv_pd
=
ConvFwdPrimitiveDesc
(
src_md
,
weights_md
,
bias_md
,
dst_md
,
strides
,
paddings
,
mkldnn_engine
,
fuse_relu
,
fuse_eltwise
);
}
else
{
conv_pd
=
ConvFwdPrimitiveDesc
(
src_md
,
weights_md
,
dst_md
,
strides
,
paddings
,
mkldnn_engine
,
fuse_relu
);
conv_pd
=
ConvFwdPrimitiveDesc
(
src_md
,
weights_md
,
dst_md
,
strides
,
paddings
,
mkldnn_engine
,
fuse_relu
,
fuse_eltwise
);
}
// Save conv_pd/src_memory/weights_memory for backward pass
dev_ctx
.
SetBlob
(
key_conv_pd
,
conv_pd
);
...
...
@@ -421,16 +423,26 @@ class ConvMKLDNNOpKernel : public paddle::framework::OpKernel<T> {
}
private:
mkldnn
::
primitive_attr
AddRelu
()
const
{
mkldnn
::
primitive_attr
CreatePostOps
(
bool
fuse_relu
,
bool
fuse_eltwise
)
const
{
mkldnn
::
primitive_attr
conv_attr
;
mkldnn
::
post_ops
post_operations
;
// Fusion with Elementwise layer relies on adding a sum post-operation with
// the scale parameter. It is assumed that when fuse_eltwise is true, the
// Output tensor contains the data coming from residual connection. The
// result of this post_op is: Output = scale * Output + Conv_Out.
if
(
fuse_eltwise
)
{
post_operations
.
append_sum
(
1.0
f
);
}
// Fusion with ReLU layer is executed through the PostOps feature. Create a
// PostOps object and configure it to execute an eltwise relu operation.
mkldnn
::
primitive_attr
conv_attr
;
if
(
fuse_relu
)
{
constexpr
float
scale
=
1.0
f
;
constexpr
float
negative_slope
=
0.0
f
;
constexpr
float
placeholder
=
0.0
f
;
mkldnn
::
post_ops
post_operations
;
post_operations
.
append_eltwise
(
scale
,
mkldnn
::
algorithm
::
eltwise_relu
,
negative_slope
,
placeholder
);
}
conv_attr
.
set_post_ops
(
post_operations
);
return
conv_attr
;
}
...
...
@@ -439,8 +451,8 @@ class ConvMKLDNNOpKernel : public paddle::framework::OpKernel<T> {
ConvFwdPrimitiveDesc
(
const
memory
::
desc
&
src
,
const
memory
::
desc
&
weights
,
const
memory
::
desc
&
dst
,
const
std
::
vector
<
int
>&
strides
,
const
std
::
vector
<
int
>&
paddings
,
const
mkldnn
::
engine
&
engine
,
const
bool
fuse_
relu
)
const
{
const
mkldnn
::
engine
&
engine
,
const
bool
fuse_relu
,
const
bool
fuse_
eltwise
)
const
{
memory
::
dims
stride_dims
=
{
strides
[
0
],
strides
[
1
]};
memory
::
dims
padding_dims
=
{
paddings
[
0
],
paddings
[
1
]};
...
...
@@ -449,10 +461,7 @@ class ConvMKLDNNOpKernel : public paddle::framework::OpKernel<T> {
dst
,
stride_dims
,
padding_dims
,
padding_dims
,
mkldnn
::
padding_kind
::
zero
);
mkldnn
::
primitive_attr
conv_attr
;
if
(
fuse_relu
)
{
conv_attr
=
AddRelu
();
}
mkldnn
::
primitive_attr
conv_attr
=
CreatePostOps
(
fuse_relu
,
fuse_eltwise
);
auto
p_conv_pd
=
new
mkldnn
::
convolution_forward
::
primitive_desc
(
conv_desc
,
conv_attr
,
engine
);
...
...
@@ -466,8 +475,8 @@ class ConvMKLDNNOpKernel : public paddle::framework::OpKernel<T> {
const
memory
::
desc
&
bias
,
const
memory
::
desc
&
dst
,
const
std
::
vector
<
int
>&
strides
,
const
std
::
vector
<
int
>&
paddings
,
const
mkldnn
::
engine
&
engine
,
const
bool
fuse_
relu
)
const
{
const
mkldnn
::
engine
&
engine
,
const
bool
fuse_relu
,
const
bool
fuse_
eltwise
)
const
{
memory
::
dims
stride_dims
=
{
strides
[
0
],
strides
[
1
]};
memory
::
dims
padding_dims
=
{
paddings
[
0
],
paddings
[
1
]};
...
...
@@ -476,10 +485,7 @@ class ConvMKLDNNOpKernel : public paddle::framework::OpKernel<T> {
bias
,
dst
,
stride_dims
,
padding_dims
,
padding_dims
,
mkldnn
::
padding_kind
::
zero
);
mkldnn
::
primitive_attr
conv_attr
;
if
(
fuse_relu
)
{
conv_attr
=
AddRelu
();
}
mkldnn
::
primitive_attr
conv_attr
=
CreatePostOps
(
fuse_relu
,
fuse_eltwise
);
auto
p_conv_pd
=
new
mkldnn
::
convolution_forward
::
primitive_desc
(
conv_desc
,
conv_attr
,
engine
);
...
...
paddle/fluid/operators/conv_op.cc
浏览文件 @
8edf60ce
...
...
@@ -164,6 +164,11 @@ void Conv2DOpMaker::Make() {
.
SetDefault
(
false
);
AddAttr
<
bool
>
(
"fuse_relu"
,
"(bool, default false) Only used in mkldnn kernel"
)
.
SetDefault
(
false
);
AddAttr
<
bool
>
(
"fuse_eltwise"
,
"(bool, default false) Only used in mkldnn kernel. Used "
"whenever convolution output is connected via skip connection "
"to a previous layer."
)
.
SetDefault
(
false
);
AddAttr
<
std
::
string
>
(
"data_format"
,
"(string, default NCHW) Only used in "
...
...
paddle/fluid/operators/distributed/grpc_client.cc
浏览文件 @
8edf60ce
...
...
@@ -125,7 +125,7 @@ VarHandlePtr GRPCClient::AsyncGetVar(const std::string& ep,
VarHandlePtr
h
(
new
VarHandle
(
ep
,
"Get"
,
var_name_val
,
p_ctx
,
p_scope
));
s
->
Prepare
(
h
,
time_out
);
framework
::
AsyncIO
([
var_name_val
,
p_scope
,
p_ctx
,
s
,
this
]
{
framework
::
AsyncIO
([
var_name_val
,
s
,
this
]
{
// prepare input
sendrecv
::
VariableMessage
req
;
req
.
set_varname
(
var_name_val
);
...
...
@@ -166,7 +166,7 @@ VarHandlePtr GRPCClient::AsyncPrefetchVar(const std::string& ep,
s
->
Prepare
(
h
,
time_out
);
framework
::
AsyncIO
([
in_var_name_val
,
out_var_name_val
,
ep_val
,
p_scope
,
p_ctx
,
time_out
,
s
,
this
]
{
s
,
this
]
{
auto
*
var
=
p_scope
->
FindVar
(
in_var_name_val
);
::
grpc
::
ByteBuffer
req
;
...
...
paddle/fluid/operators/distributed/proto_encoder_helper.h
浏览文件 @
8edf60ce
...
...
@@ -82,8 +82,10 @@ class ProtoEncodeHelper {
:
base_
(
buf
),
p_
(
buf
),
limit_
(
base_
+
max_size
)
{}
~
ProtoEncodeHelper
()
{
#define REPLACE_ENFORCE_GLOG 1
// Make sure callers didn't do operations that went over max_size promised
PADDLE_ENFORCE_LE
(
p_
,
limit_
);
paddle
::
platform
::
throw_on_error
(
p_
<=
limit_
);
#undef REPLACE_ENFORCE_GLOG
}
const
char
*
data
()
const
{
return
base_
;
}
...
...
paddle/fluid/operators/listen_and_serv_op.cc
浏览文件 @
8edf60ce
...
...
@@ -59,8 +59,7 @@ static void ParallelExecuteBlocks(
framework
::
ProgramDesc
*
program
,
framework
::
Scope
*
scope
)
{
std
::
vector
<
std
::
future
<
void
>>
fs
;
for
(
size_t
idx
:
parallel_blkids
)
{
fs
.
push_back
(
framework
::
Async
([
&
executor
,
&
prepared
,
&
program
,
&
scope
,
idx
]()
{
fs
.
push_back
(
framework
::
Async
([
&
executor
,
&
prepared
,
&
scope
,
idx
]()
{
int
run_block
=
idx
;
// thread local
try
{
VLOG
(
3
)
<<
"running server block: "
<<
run_block
...
...
paddle/fluid/operators/math/sequence_pooling.cc
浏览文件 @
8edf60ce
...
...
@@ -103,6 +103,58 @@ class MaxSeqPoolGradFunctor {
}
};
template
<
typename
T
>
class
LastSeqPoolFunctor
{
public:
void
operator
()(
const
platform
::
CPUDeviceContext
&
context
,
const
framework
::
LoDTensor
&
input
,
framework
::
Tensor
*
output
)
{
// Create pointers to input and output data
auto
*
in_data
=
input
.
data
<
T
>
();
auto
*
out_data
=
output
->
data
<
T
>
();
// Calculate the size of each item in sequence
int64_t
item_size
=
input
.
numel
()
/
input
.
dims
()[
0
];
auto
lod
=
input
.
lod
()[
0
];
int
seq_num
=
static_cast
<
int
>
(
lod
.
size
())
-
1
;
for
(
int
i
=
0
;
i
<
seq_num
;
++
i
)
{
// Calculate the length of each sequence
int64_t
seq_len
=
static_cast
<
int64_t
>
(
lod
[
i
+
1
]
-
lod
[
i
]);
// Point to the begin of next sequence
in_data
+=
seq_len
*
item_size
;
// Copy the last item of sequence to output
std
::
memcpy
(
out_data
,
(
in_data
-
item_size
),
item_size
*
sizeof
(
T
));
out_data
+=
item_size
;
}
}
};
template
<
typename
T
>
class
FirstSeqPoolFunctor
{
public:
void
operator
()(
const
platform
::
CPUDeviceContext
&
context
,
const
framework
::
LoDTensor
&
input
,
framework
::
Tensor
*
output
)
{
// Create pointers to input and output data
auto
*
in_data
=
input
.
data
<
T
>
();
auto
*
out_data
=
output
->
data
<
T
>
();
// Calculate the size of each item in sequence
int64_t
item_size
=
input
.
numel
()
/
input
.
dims
()[
0
];
auto
lod
=
input
.
lod
()[
0
];
int
seq_num
=
static_cast
<
int
>
(
lod
.
size
())
-
1
;
for
(
int
i
=
0
;
i
<
seq_num
;
++
i
)
{
// Calculate the length of each sequence
int64_t
seq_len
=
static_cast
<
int64_t
>
(
lod
[
i
+
1
]
-
lod
[
i
]);
// Copy the first item of sequence to output
std
::
memcpy
(
out_data
,
in_data
,
item_size
*
sizeof
(
T
));
// Point to the next sequence
in_data
+=
seq_len
*
item_size
;
out_data
+=
item_size
;
}
}
};
template
<
typename
T
>
class
SequencePoolFunctor
<
platform
::
CPUDeviceContext
,
T
>
{
public:
...
...
@@ -116,6 +168,16 @@ class SequencePoolFunctor<platform::CPUDeviceContext, T> {
max_pool
(
context
,
input
,
output
,
index
);
return
;
}
if
(
pooltype
==
"LAST"
)
{
math
::
LastSeqPoolFunctor
<
T
>
last_pool
;
last_pool
(
context
,
input
,
output
);
return
;
}
if
(
pooltype
==
"FIRST"
)
{
math
::
FirstSeqPoolFunctor
<
T
>
first_pool
;
first_pool
(
context
,
input
,
output
);
return
;
}
auto
lod
=
input
.
lod
()[
0
];
auto
&
place
=
*
context
.
eigen_device
();
for
(
int
i
=
0
;
i
<
static_cast
<
int
>
(
lod
.
size
())
-
1
;
++
i
)
{
...
...
@@ -133,10 +195,6 @@ class SequencePoolFunctor<platform::CPUDeviceContext, T> {
}
else
if
(
pooltype
==
"SQRT"
)
{
out_e
.
device
(
place
)
=
in_e
.
sum
(
Eigen
::
array
<
int
,
1
>
({{
0
}}))
/
std
::
sqrt
(
static_cast
<
T
>
(
h
));
}
else
if
(
pooltype
==
"LAST"
)
{
out_e
.
device
(
place
)
=
in_e
.
chip
(
h
-
1
,
0
);
}
else
if
(
pooltype
==
"FIRST"
)
{
out_e
.
device
(
place
)
=
in_e
.
chip
(
0
,
0
);
}
else
{
PADDLE_THROW
(
"unsupported pooling pooltype"
);
}
...
...
paddle/fluid/operators/prelu_op.cc
浏览文件 @
8edf60ce
...
...
@@ -26,10 +26,13 @@ class PReluOp : public framework::OperatorWithKernel {
std
::
string
mode
=
ctx
->
Attrs
().
Get
<
std
::
string
>
(
"mode"
);
auto
x_dim
=
ctx
->
GetInputDim
(
"X"
);
PADDLE_ENFORCE
(
ctx
->
HasInput
(
"X"
),
"Input(X) should not be null"
);
PADDLE_ENFORCE
(
ctx
->
HasInput
(
"Alpha"
),
"Input(Alpha) should not be null"
);
PADDLE_ENFORCE
(
ctx
->
HasInput
(
"X"
),
"Input(X) of PreluOp should not be null"
);
PADDLE_ENFORCE
(
ctx
->
HasInput
(
"Alpha"
),
"Input(Alpha) of PreluOp should not be null"
);
PADDLE_ENFORCE
(
ctx
->
HasOutput
(
"Out"
),
"Output(Out) should not be null"
);
PADDLE_ENFORCE
(
ctx
->
HasOutput
(
"Out"
),
"Output(Out) of PreluOp should not be null"
);
if
(
mode
==
"all"
)
{
PADDLE_ENFORCE
(
product
(
ctx
->
GetInputDim
(
"Alpha"
))
==
1
,
"For mode 'all', size of weight Alpha must be one."
);
...
...
paddle/scripts/paddle_build.sh
浏览文件 @
8edf60ce
...
...
@@ -33,6 +33,7 @@ function print_usage() {
${
BLUE
}
single_test
${
NONE
}
: run a single unit test
${
BLUE
}
bind_test
${
NONE
}
: parallel tests bind to different GPU
${
BLUE
}
doc
${
NONE
}
: generate paddle documents
${
BLUE
}
gen_doc_lib
${
NONE
}
: generate paddle documents library
${
BLUE
}
html
${
NONE
}
: convert C++ source code into HTML
${
BLUE
}
dockerfile
${
NONE
}
: generate paddle release dockerfile
${
BLUE
}
capi
${
NONE
}
: generate paddle CAPI package
...
...
@@ -431,24 +432,60 @@ EOF
linkchecker doc/v2/cn/html/index.html
linkchecker doc/v2/api/en/html/index.html
if
[[
"
$TRAVIS_PULL_REQUEST
"
!=
"false"
]]
;
then
exit
0
;
fi
;
# if [[ "$TRAVIS_PULL_REQUEST" != "false" ]]; then exit 0; fi;
#
# # Deploy to the the content server if its a "develop" or "release/version" branch
# # The "develop_doc" branch is reserved to test full deploy process without impacting the real content.
# if [ "$TRAVIS_BRANCH" == "develop_doc" ]; then
# PPO_SCRIPT_BRANCH=develop
# elif [[ "$TRAVIS_BRANCH" == "develop" || "$TRAVIS_BRANCH" =~ ^v|release/[[:digit:]]+\.[[:digit:]]+(\.[[:digit:]]+)?(-\S*)?$ ]]; then
# PPO_SCRIPT_BRANCH=master
# else
# # Early exit, this branch doesn't require documentation build
# return 0;
# fi
# # Fetch the paddlepaddle.org deploy_docs.sh from the appopriate branch
# export DEPLOY_DOCS_SH=https://raw.githubusercontent.com/PaddlePaddle/PaddlePaddle.org/$PPO_SCRIPT_BRANCH/scripts/deploy/deploy_docs.sh
# export PYTHONPATH=$PYTHONPATH:${PADDLE_ROOT}/build/python:/paddle/build/python
# cd ..
# curl $DEPLOY_DOCS_SH | bash -s $CONTENT_DEC_PASSWD $TRAVIS_BRANCH ${PADDLE_ROOT} ${PADDLE_ROOT}/build/doc/ ${PPO_SCRIPT_BRANCH}
# cd -
}
# Deploy to the the content server if its a "develop" or "release/version" branch
# The "develop_doc" branch is reserved to test full deploy process without impacting the real content.
if
[
"
$TRAVIS_BRANCH
"
==
"develop_doc"
]
;
then
PPO_SCRIPT_BRANCH
=
develop
elif
[[
"
$TRAVIS_BRANCH
"
==
"develop"
||
"
$TRAVIS_BRANCH
"
=
~ ^v|release/[[:digit:]]+
\.
[[
:digit:]]+
(
\.
[[
:digit:]]+
)
?
(
-
\S
*
)
?
$
]]
;
then
PPO_SCRIPT_BRANCH
=
master
else
# Early exit, this branch doesn't require documentation build
return
0
;
fi
# Fetch the paddlepaddle.org deploy_docs.sh from the appopriate branch
export
DEPLOY_DOCS_SH
=
https://raw.githubusercontent.com/PaddlePaddle/PaddlePaddle.org/
$PPO_SCRIPT_BRANCH
/scripts/deploy/deploy_docs.sh
export
PYTHONPATH
=
$PYTHONPATH
:
${
PADDLE_ROOT
}
/build/python:/paddle/build/python
cd
..
curl
$DEPLOY_DOCS_SH
| bash
-s
$CONTENT_DEC_PASSWD
$TRAVIS_BRANCH
${
PADDLE_ROOT
}
${
PADDLE_ROOT
}
/build/doc/
${
PPO_SCRIPT_BRANCH
}
cd
-
function
gen_doc_lib
()
{
mkdir
-p
${
PADDLE_ROOT
}
/build
cd
${
PADDLE_ROOT
}
/build
cat
<<
EOF
========================================
Building documentation library ...
In /paddle/build
========================================
EOF
cmake ..
\
-DCMAKE_BUILD_TYPE
=
Release
\
-DWITH_DOC
=
ON
\
-DWITH_GPU
=
OFF
\
-DWITH_MKL
=
OFF
\
-DWITH_FLUID_ONLY
=
ON
local
LIB_TYPE
=
$1
case
$LIB_TYPE
in
full
)
# Build full Paddle Python module. Will timeout without caching 'copy_paddle_pybind' first
make
-j
`
nproc
`
gen_proto_py framework_py_proto copy_paddle_pybind paddle_python
;;
pybind
)
# Build paddle pybind library. Takes 49 minutes to build. Might timeout
make
-j
`
nproc
`
copy_paddle_pybind
;;
proto
)
# Even smaller library.
make
-j
`
nproc
`
framework_py_proto
;;
*
)
exit
0
;;
esac
}
function
gen_html
()
{
...
...
@@ -608,6 +645,9 @@ function main() {
doc
)
gen_docs
;;
gen_doc_lib
)
gen_doc_lib
$2
;;
html
)
gen_html
;;
...
...
python/paddle/fluid/tests/unittests/dist_transformer.py
浏览文件 @
8edf60ce
...
...
@@ -92,7 +92,7 @@ class TrainTaskConfig(object):
src_vocab_fpath
=
data_path
+
"vocab.bpe.32000"
trg_vocab_fpath
=
data_path
+
"vocab.bpe.32000"
train_file_pattern
=
data_path
+
"train.tok.clean.bpe.32000.en-de"
val_file_pattern
=
data_path
+
"newstest2013.tok.bpe.32000.en-de"
val_file_pattern
=
data_path
+
"newstest2013.tok.bpe.32000.en-de
.cut
"
pool_size
=
2000
sort_type
=
None
local
=
True
...
...
@@ -624,6 +624,7 @@ def train_loop(exe, train_progm, dev_count, sum_cost, avg_cost, lr_scheduler,
init
=
True
# Validate and save the model for inference.
if
batch_id
==
0
or
batch_id
==
4
:
if
TrainTaskConfig
.
val_file_pattern
is
not
None
:
val_avg_cost
,
val_ppl
=
test
()
print
(
"[%f]"
%
val_avg_cost
)
...
...
@@ -1701,8 +1702,9 @@ class DistTransformer2x2(TestDistRunnerBase):
exe
.
run
(
startup_prog
)
exe
.
run
(
pserver_prog
)
def
run_trainer
(
self
,
place
,
args
):
def
run_trainer
(
self
,
use_cuda
,
args
):
place
=
fluid
.
CUDAPlace
(
0
)
if
use_cuda
else
fluid
.
CPUPlace
()
TrainTaskConfig
.
use_gpu
=
use_cuda
sum_cost
,
avg_cost
,
predict
,
token_num
,
local_lr_scheduler
=
get_model
(
args
.
is_dist
,
not
args
.
sync_mode
)
...
...
python/paddle/fluid/tests/unittests/test_dist_base.py
浏览文件 @
8edf60ce
...
...
@@ -61,9 +61,10 @@ class TestDistRunnerBase(object):
exe
.
run
(
startup_prog
)
exe
.
run
(
pserver_prog
)
def
run_trainer
(
self
,
place
,
args
):
def
run_trainer
(
self
,
use_cuda
,
args
):
import
paddle
import
paddle.fluid
as
fluid
place
=
fluid
.
CUDAPlace
(
0
)
if
use_cuda
else
fluid
.
CPUPlace
()
test_program
,
avg_cost
,
train_reader
,
test_reader
,
batch_acc
,
predict
=
\
self
.
get_model
(
batch_size
=
2
)
if
args
.
mem_opt
:
...
...
@@ -91,7 +92,7 @@ class TestDistRunnerBase(object):
build_stra
.
reduce_strategy
=
fluid
.
BuildStrategy
.
ReduceStrategy
.
AllReduce
exe
=
fluid
.
ParallelExecutor
(
True
,
use_cuda
,
loss_name
=
avg_cost
.
name
,
exec_strategy
=
strategy
,
build_strategy
=
build_stra
)
...
...
@@ -142,9 +143,8 @@ def runtime_main(test_class):
if
args
.
role
==
"pserver"
and
args
.
is_dist
:
model
.
run_pserver
(
args
)
else
:
p
=
fluid
.
CUDAPlace
(
0
)
if
core
.
is_compiled_with_cuda
(
)
else
fluid
.
CPUPlace
()
model
.
run_trainer
(
p
,
args
)
use_cuda
=
True
if
core
.
is_compiled_with_cuda
()
else
False
model
.
run_trainer
(
use_cuda
,
args
)
import
paddle.compat
as
cpt
...
...
@@ -225,11 +225,12 @@ class TestDistBase(unittest.TestCase):
def
check_with_place
(
self
,
model_file
,
delta
=
1e-3
,
check_error_log
=
False
):
# TODO(typhoonzero): should auto adapt GPU count on the machine.
required_envs
=
{
"PATH"
:
os
.
getenv
(
"PATH"
),
"PYTHONPATH"
:
os
.
getenv
(
"PYTHONPATH"
),
"LD_LIBRARY_PATH"
:
os
.
getenv
(
"LD_LIBRARY_PATH"
),
"PATH"
:
os
.
getenv
(
"PATH"
,
""
),
"PYTHONPATH"
:
os
.
getenv
(
"PYTHONPATH"
,
""
),
"LD_LIBRARY_PATH"
:
os
.
getenv
(
"LD_LIBRARY_PATH"
,
""
),
"FLAGS_fraction_of_gpu_memory_to_use"
:
"0.15"
,
"FLAGS_cudnn_deterministic"
:
"1"
"FLAGS_cudnn_deterministic"
:
"1"
,
"CPU_NUM"
:
"1"
}
if
check_error_log
:
...
...
python/paddle/fluid/tests/unittests/test_dist_transformer.py
浏览文件 @
8edf60ce
...
...
@@ -14,6 +14,7 @@
from
__future__
import
print_function
import
os
import
unittest
import
paddle
from
test_dist_base
import
TestDistBase
...
...
@@ -44,6 +45,14 @@ def download_files():
test_url
=
url_prefix
+
'newstest2013.tok.bpe.32000.en-de'
test_md5
=
'9dd74a266dbdb25314183899f269b4a2'
paddle
.
dataset
.
common
.
download
(
test_url
,
'test_dist_transformer'
,
test_md5
)
# cut test data for faster CI
orig_path
=
os
.
path
.
join
(
paddle
.
dataset
.
common
.
DATA_HOME
,
"test_dist_transformer"
,
"newstest2013.tok.bpe.32000.en-de"
)
head_path
=
os
.
path
.
join
(
paddle
.
dataset
.
common
.
DATA_HOME
,
"test_dist_transformer"
,
"newstest2013.tok.bpe.32000.en-de.cut"
)
os
.
system
(
"head -n10 %s > %s"
%
(
orig_path
,
head_path
))
class
TestDistTransformer2x2Sync
(
TestDistBase
):
...
...
python/paddle/fluid/transpiler/inference_transpiler.py
浏览文件 @
8edf60ce
...
...
@@ -65,8 +65,43 @@ class InferenceTranspiler(object):
if
use_mkldnn
:
self
.
_fuse_conv_bias_mkldnn
(
program
)
self
.
_fuse_conv_relu_mkldnn
(
program
)
self
.
_fuse_conv_eltwise_mkldnn
(
program
)
self
.
_fuse_conv_relu_mkldnn
(
program
)
# ResNet residual block merging
self
.
_fuse_bn_relu_mkldnn
(
program
)
def
_fuse_conv_eltwise_mkldnn
(
self
,
program
):
'''
Transpile the program fusing elementwise_add into conv for MKLDNN
program. Elementwise add following convolution OP can be fused by adding
'fuse_eltwise' attribute to convolution OP and replacing its output
Tensor with second parameter of elementwise_add.
The result of fuse is:
- before:
- conv->elementwise_add->any_other_op
- after:
- conv->any_other_op
:param program: program to transpile
:type program: Program
'''
self
.
block
=
program
.
block
(
0
)
i
=
0
while
i
<
len
(
self
.
block
.
ops
):
current_op
=
self
.
block
.
ops
[
i
]
if
current_op
.
type
in
[
'conv2d'
]:
next_op
=
self
.
block
.
ops
[
i
+
1
]
if
next_op
.
type
==
'elementwise_add'
:
self
.
_fuse_conv_eltwise
(
current_op
,
next_op
)
self
.
block
.
_remove_op
(
i
+
1
)
# Remove elementwise_add
i
=
i
+
1
self
.
_adjust_input
()
self
.
_remove_unused_var
()
# TODO(luotao): use clone() method to flush the program.desc in force,
# since some large program.desc will not be flushed immediately.
# And a better solution will be considered later.
program
=
program
.
clone
()
def
_fuse_conv_relu_mkldnn
(
self
,
program
):
'''
Transpile the program by fused relu activation for MKLDNN program.
...
...
@@ -88,9 +123,9 @@ class InferenceTranspiler(object):
if
current_op
.
type
in
[
'conv2d'
]:
next_op
=
self
.
block
.
ops
[
i
+
1
]
if
next_op
.
type
==
'relu'
:
# modify
conv
OP to include relu
# modify
bnorm
OP to include relu
current_op
.
set_attr
(
"fuse_relu"
,
True
)
# remove
conv
OP
# remove
relu
OP
self
.
block
.
_remove_op
(
i
+
1
)
i
=
i
+
1
...
...
@@ -409,6 +444,20 @@ class InferenceTranspiler(object):
outputs
=
{
"Output"
:
out_var
},
attrs
=
attrs
)
def
_fuse_conv_eltwise
(
self
,
conv_op
,
eltwise_op
):
'''
fuse the conv op with elementwise_add
:param conv_op: convolution operator
:type conv_op: Operator
:param eltwise_op: operator adding data from skip connection
:type eltwise_op: Operator
'''
conv_op
.
set_attr
(
"fuse_eltwise"
,
True
)
self
.
input_map
[
conv_op
.
output
(
"Output"
)[
0
]]
=
eltwise_op
.
input
(
"Y"
)[
0
]
self
.
input_map
[
eltwise_op
.
output
(
"Out"
)[
0
]]
=
eltwise_op
.
input
(
"Y"
)[
0
]
def
_adjust_input
(
self
):
for
i
in
range
(
len
(
self
.
block
.
ops
)):
current_op
=
self
.
block
.
ops
[
i
]
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录