提交 bddd4bc0 编写于 作者: D dzhwinter

Merge remote-tracking branch 'origin/develop' into memory/stable

...@@ -16,7 +16,9 @@ find_library(TENSORRT_LIBRARY NAMES libnvinfer.so libnvinfer.a ...@@ -16,7 +16,9 @@ find_library(TENSORRT_LIBRARY NAMES libnvinfer.so libnvinfer.a
DOC "Path to TensorRT library.") DOC "Path to TensorRT library.")
if(TENSORRT_INCLUDE_DIR AND TENSORRT_LIBRARY) if(TENSORRT_INCLUDE_DIR AND TENSORRT_LIBRARY)
if(WITH_DSO)
set(TENSORRT_FOUND ON) set(TENSORRT_FOUND ON)
endif(WITH DSO)
else() else()
set(TENSORRT_FOUND OFF) set(TENSORRT_FOUND OFF)
endif() endif()
......
...@@ -429,7 +429,7 @@ struct LSTM : public PatternBase { ...@@ -429,7 +429,7 @@ struct LSTM : public PatternBase {
struct GRU : public PatternBase { struct GRU : public PatternBase {
GRU(PDPattern* pattern, const std::string& name_scope) GRU(PDPattern* pattern, const std::string& name_scope)
: PatternBase(pattern, name_scope, "lstm") {} : PatternBase(pattern, name_scope, "gru") {}
PDNode* operator()(PDNode* x); PDNode* operator()(PDNode* x);
......
...@@ -125,7 +125,7 @@ VarHandlePtr GRPCClient::AsyncGetVar(const std::string& ep, ...@@ -125,7 +125,7 @@ VarHandlePtr GRPCClient::AsyncGetVar(const std::string& ep,
VarHandlePtr h(new VarHandle(ep, "Get", var_name_val, p_ctx, p_scope)); VarHandlePtr h(new VarHandle(ep, "Get", var_name_val, p_ctx, p_scope));
s->Prepare(h, time_out); s->Prepare(h, time_out);
framework::AsyncIO([var_name_val, p_scope, p_ctx, s, this] { framework::AsyncIO([var_name_val, s, this] {
// prepare input // prepare input
sendrecv::VariableMessage req; sendrecv::VariableMessage req;
req.set_varname(var_name_val); req.set_varname(var_name_val);
...@@ -166,7 +166,7 @@ VarHandlePtr GRPCClient::AsyncPrefetchVar(const std::string& ep, ...@@ -166,7 +166,7 @@ VarHandlePtr GRPCClient::AsyncPrefetchVar(const std::string& ep,
s->Prepare(h, time_out); s->Prepare(h, time_out);
framework::AsyncIO([in_var_name_val, out_var_name_val, ep_val, p_scope, p_ctx, framework::AsyncIO([in_var_name_val, out_var_name_val, ep_val, p_scope, p_ctx,
time_out, s, this] { s, this] {
auto* var = p_scope->FindVar(in_var_name_val); auto* var = p_scope->FindVar(in_var_name_val);
::grpc::ByteBuffer req; ::grpc::ByteBuffer req;
......
...@@ -103,6 +103,58 @@ class MaxSeqPoolGradFunctor { ...@@ -103,6 +103,58 @@ class MaxSeqPoolGradFunctor {
} }
}; };
template <typename T>
class LastSeqPoolFunctor {
public:
void operator()(const platform::CPUDeviceContext& context,
const framework::LoDTensor& input,
framework::Tensor* output) {
// Create pointers to input and output data
auto* in_data = input.data<T>();
auto* out_data = output->data<T>();
// Calculate the size of each item in sequence
int64_t item_size = input.numel() / input.dims()[0];
auto lod = input.lod()[0];
int seq_num = static_cast<int>(lod.size()) - 1;
for (int i = 0; i < seq_num; ++i) {
// Calculate the length of each sequence
int64_t seq_len = static_cast<int64_t>(lod[i + 1] - lod[i]);
// Point to the begin of next sequence
in_data += seq_len * item_size;
// Copy the last item of sequence to output
std::memcpy(out_data, (in_data - item_size), item_size * sizeof(T));
out_data += item_size;
}
}
};
template <typename T>
class FirstSeqPoolFunctor {
public:
void operator()(const platform::CPUDeviceContext& context,
const framework::LoDTensor& input,
framework::Tensor* output) {
// Create pointers to input and output data
auto* in_data = input.data<T>();
auto* out_data = output->data<T>();
// Calculate the size of each item in sequence
int64_t item_size = input.numel() / input.dims()[0];
auto lod = input.lod()[0];
int seq_num = static_cast<int>(lod.size()) - 1;
for (int i = 0; i < seq_num; ++i) {
// Calculate the length of each sequence
int64_t seq_len = static_cast<int64_t>(lod[i + 1] - lod[i]);
// Copy the first item of sequence to output
std::memcpy(out_data, in_data, item_size * sizeof(T));
// Point to the next sequence
in_data += seq_len * item_size;
out_data += item_size;
}
}
};
template <typename T> template <typename T>
class SequencePoolFunctor<platform::CPUDeviceContext, T> { class SequencePoolFunctor<platform::CPUDeviceContext, T> {
public: public:
...@@ -116,6 +168,16 @@ class SequencePoolFunctor<platform::CPUDeviceContext, T> { ...@@ -116,6 +168,16 @@ class SequencePoolFunctor<platform::CPUDeviceContext, T> {
max_pool(context, input, output, index); max_pool(context, input, output, index);
return; return;
} }
if (pooltype == "LAST") {
math::LastSeqPoolFunctor<T> last_pool;
last_pool(context, input, output);
return;
}
if (pooltype == "FIRST") {
math::FirstSeqPoolFunctor<T> first_pool;
first_pool(context, input, output);
return;
}
auto lod = input.lod()[0]; auto lod = input.lod()[0];
auto& place = *context.eigen_device(); auto& place = *context.eigen_device();
for (int i = 0; i < static_cast<int>(lod.size()) - 1; ++i) { for (int i = 0; i < static_cast<int>(lod.size()) - 1; ++i) {
...@@ -133,10 +195,6 @@ class SequencePoolFunctor<platform::CPUDeviceContext, T> { ...@@ -133,10 +195,6 @@ class SequencePoolFunctor<platform::CPUDeviceContext, T> {
} else if (pooltype == "SQRT") { } else if (pooltype == "SQRT") {
out_e.device(place) = in_e.sum(Eigen::array<int, 1>({{0}})) / out_e.device(place) = in_e.sum(Eigen::array<int, 1>({{0}})) /
std::sqrt(static_cast<T>(h)); std::sqrt(static_cast<T>(h));
} else if (pooltype == "LAST") {
out_e.device(place) = in_e.chip(h - 1, 0);
} else if (pooltype == "FIRST") {
out_e.device(place) = in_e.chip(0, 0);
} else { } else {
PADDLE_THROW("unsupported pooling pooltype"); PADDLE_THROW("unsupported pooling pooltype");
} }
......
...@@ -33,6 +33,7 @@ function print_usage() { ...@@ -33,6 +33,7 @@ function print_usage() {
${BLUE}single_test${NONE}: run a single unit test ${BLUE}single_test${NONE}: run a single unit test
${BLUE}bind_test${NONE}: parallel tests bind to different GPU ${BLUE}bind_test${NONE}: parallel tests bind to different GPU
${BLUE}doc${NONE}: generate paddle documents ${BLUE}doc${NONE}: generate paddle documents
${BLUE}gen_doc_lib${NONE}: generate paddle documents library
${BLUE}html${NONE}: convert C++ source code into HTML ${BLUE}html${NONE}: convert C++ source code into HTML
${BLUE}dockerfile${NONE}: generate paddle release dockerfile ${BLUE}dockerfile${NONE}: generate paddle release dockerfile
${BLUE}capi${NONE}: generate paddle CAPI package ${BLUE}capi${NONE}: generate paddle CAPI package
...@@ -431,24 +432,60 @@ EOF ...@@ -431,24 +432,60 @@ EOF
linkchecker doc/v2/cn/html/index.html linkchecker doc/v2/cn/html/index.html
linkchecker doc/v2/api/en/html/index.html linkchecker doc/v2/api/en/html/index.html
if [[ "$TRAVIS_PULL_REQUEST" != "false" ]]; then exit 0; fi; # if [[ "$TRAVIS_PULL_REQUEST" != "false" ]]; then exit 0; fi;
#
# # Deploy to the the content server if its a "develop" or "release/version" branch
# # The "develop_doc" branch is reserved to test full deploy process without impacting the real content.
# if [ "$TRAVIS_BRANCH" == "develop_doc" ]; then
# PPO_SCRIPT_BRANCH=develop
# elif [[ "$TRAVIS_BRANCH" == "develop" || "$TRAVIS_BRANCH" =~ ^v|release/[[:digit:]]+\.[[:digit:]]+(\.[[:digit:]]+)?(-\S*)?$ ]]; then
# PPO_SCRIPT_BRANCH=master
# else
# # Early exit, this branch doesn't require documentation build
# return 0;
# fi
# # Fetch the paddlepaddle.org deploy_docs.sh from the appopriate branch
# export DEPLOY_DOCS_SH=https://raw.githubusercontent.com/PaddlePaddle/PaddlePaddle.org/$PPO_SCRIPT_BRANCH/scripts/deploy/deploy_docs.sh
# export PYTHONPATH=$PYTHONPATH:${PADDLE_ROOT}/build/python:/paddle/build/python
# cd ..
# curl $DEPLOY_DOCS_SH | bash -s $CONTENT_DEC_PASSWD $TRAVIS_BRANCH ${PADDLE_ROOT} ${PADDLE_ROOT}/build/doc/ ${PPO_SCRIPT_BRANCH}
# cd -
}
# Deploy to the the content server if its a "develop" or "release/version" branch function gen_doc_lib() {
# The "develop_doc" branch is reserved to test full deploy process without impacting the real content. mkdir -p ${PADDLE_ROOT}/build
if [ "$TRAVIS_BRANCH" == "develop_doc" ]; then cd ${PADDLE_ROOT}/build
PPO_SCRIPT_BRANCH=develop cat <<EOF
elif [[ "$TRAVIS_BRANCH" == "develop" || "$TRAVIS_BRANCH" =~ ^v|release/[[:digit:]]+\.[[:digit:]]+(\.[[:digit:]]+)?(-\S*)?$ ]]; then ========================================
PPO_SCRIPT_BRANCH=master Building documentation library ...
else In /paddle/build
# Early exit, this branch doesn't require documentation build ========================================
return 0; EOF
fi cmake .. \
# Fetch the paddlepaddle.org deploy_docs.sh from the appopriate branch -DCMAKE_BUILD_TYPE=Release \
export DEPLOY_DOCS_SH=https://raw.githubusercontent.com/PaddlePaddle/PaddlePaddle.org/$PPO_SCRIPT_BRANCH/scripts/deploy/deploy_docs.sh -DWITH_DOC=ON \
export PYTHONPATH=$PYTHONPATH:${PADDLE_ROOT}/build/python:/paddle/build/python -DWITH_GPU=OFF \
cd .. -DWITH_MKL=OFF \
curl $DEPLOY_DOCS_SH | bash -s $CONTENT_DEC_PASSWD $TRAVIS_BRANCH ${PADDLE_ROOT} ${PADDLE_ROOT}/build/doc/ ${PPO_SCRIPT_BRANCH} -DWITH_FLUID_ONLY=ON
cd -
local LIB_TYPE=$1
case $LIB_TYPE in
full)
# Build full Paddle Python module. Will timeout without caching 'copy_paddle_pybind' first
make -j `nproc` gen_proto_py framework_py_proto copy_paddle_pybind paddle_python
;;
pybind)
# Build paddle pybind library. Takes 49 minutes to build. Might timeout
make -j `nproc` copy_paddle_pybind
;;
proto)
# Even smaller library.
make -j `nproc` framework_py_proto
;;
*)
exit 0
;;
esac
} }
function gen_html() { function gen_html() {
...@@ -608,6 +645,9 @@ function main() { ...@@ -608,6 +645,9 @@ function main() {
doc) doc)
gen_docs gen_docs
;; ;;
gen_doc_lib)
gen_doc_lib $2
;;
html) html)
gen_html gen_html
;; ;;
......
...@@ -92,7 +92,7 @@ class TrainTaskConfig(object): ...@@ -92,7 +92,7 @@ class TrainTaskConfig(object):
src_vocab_fpath = data_path + "vocab.bpe.32000" src_vocab_fpath = data_path + "vocab.bpe.32000"
trg_vocab_fpath = data_path + "vocab.bpe.32000" trg_vocab_fpath = data_path + "vocab.bpe.32000"
train_file_pattern = data_path + "train.tok.clean.bpe.32000.en-de" train_file_pattern = data_path + "train.tok.clean.bpe.32000.en-de"
val_file_pattern = data_path + "newstest2013.tok.bpe.32000.en-de" val_file_pattern = data_path + "newstest2013.tok.bpe.32000.en-de.cut"
pool_size = 2000 pool_size = 2000
sort_type = None sort_type = None
local = True local = True
...@@ -624,11 +624,12 @@ def train_loop(exe, train_progm, dev_count, sum_cost, avg_cost, lr_scheduler, ...@@ -624,11 +624,12 @@ def train_loop(exe, train_progm, dev_count, sum_cost, avg_cost, lr_scheduler,
init = True init = True
# Validate and save the model for inference. # Validate and save the model for inference.
if TrainTaskConfig.val_file_pattern is not None: if batch_id == 0 or batch_id == 4:
val_avg_cost, val_ppl = test() if TrainTaskConfig.val_file_pattern is not None:
print("[%f]" % val_avg_cost) val_avg_cost, val_ppl = test()
else: print("[%f]" % val_avg_cost)
assert (False) else:
assert (False)
#import transformer_reader as reader #import transformer_reader as reader
...@@ -1701,8 +1702,9 @@ class DistTransformer2x2(TestDistRunnerBase): ...@@ -1701,8 +1702,9 @@ class DistTransformer2x2(TestDistRunnerBase):
exe.run(startup_prog) exe.run(startup_prog)
exe.run(pserver_prog) exe.run(pserver_prog)
def run_trainer(self, place, args): def run_trainer(self, use_cuda, args):
place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace()
TrainTaskConfig.use_gpu = use_cuda
sum_cost, avg_cost, predict, token_num, local_lr_scheduler = get_model( sum_cost, avg_cost, predict, token_num, local_lr_scheduler = get_model(
args.is_dist, not args.sync_mode) args.is_dist, not args.sync_mode)
......
...@@ -61,9 +61,10 @@ class TestDistRunnerBase(object): ...@@ -61,9 +61,10 @@ class TestDistRunnerBase(object):
exe.run(startup_prog) exe.run(startup_prog)
exe.run(pserver_prog) exe.run(pserver_prog)
def run_trainer(self, place, args): def run_trainer(self, use_cuda, args):
import paddle import paddle
import paddle.fluid as fluid import paddle.fluid as fluid
place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace()
test_program, avg_cost, train_reader, test_reader, batch_acc, predict = \ test_program, avg_cost, train_reader, test_reader, batch_acc, predict = \
self.get_model(batch_size=2) self.get_model(batch_size=2)
if args.mem_opt: if args.mem_opt:
...@@ -91,7 +92,7 @@ class TestDistRunnerBase(object): ...@@ -91,7 +92,7 @@ class TestDistRunnerBase(object):
build_stra.reduce_strategy = fluid.BuildStrategy.ReduceStrategy.AllReduce build_stra.reduce_strategy = fluid.BuildStrategy.ReduceStrategy.AllReduce
exe = fluid.ParallelExecutor( exe = fluid.ParallelExecutor(
True, use_cuda,
loss_name=avg_cost.name, loss_name=avg_cost.name,
exec_strategy=strategy, exec_strategy=strategy,
build_strategy=build_stra) build_strategy=build_stra)
...@@ -142,9 +143,8 @@ def runtime_main(test_class): ...@@ -142,9 +143,8 @@ def runtime_main(test_class):
if args.role == "pserver" and args.is_dist: if args.role == "pserver" and args.is_dist:
model.run_pserver(args) model.run_pserver(args)
else: else:
p = fluid.CUDAPlace(0) if core.is_compiled_with_cuda( use_cuda = True if core.is_compiled_with_cuda() else False
) else fluid.CPUPlace() model.run_trainer(use_cuda, args)
model.run_trainer(p, args)
import paddle.compat as cpt import paddle.compat as cpt
...@@ -225,11 +225,12 @@ class TestDistBase(unittest.TestCase): ...@@ -225,11 +225,12 @@ class TestDistBase(unittest.TestCase):
def check_with_place(self, model_file, delta=1e-3, check_error_log=False): def check_with_place(self, model_file, delta=1e-3, check_error_log=False):
# TODO(typhoonzero): should auto adapt GPU count on the machine. # TODO(typhoonzero): should auto adapt GPU count on the machine.
required_envs = { required_envs = {
"PATH": os.getenv("PATH"), "PATH": os.getenv("PATH", ""),
"PYTHONPATH": os.getenv("PYTHONPATH"), "PYTHONPATH": os.getenv("PYTHONPATH", ""),
"LD_LIBRARY_PATH": os.getenv("LD_LIBRARY_PATH"), "LD_LIBRARY_PATH": os.getenv("LD_LIBRARY_PATH", ""),
"FLAGS_fraction_of_gpu_memory_to_use": "0.15", "FLAGS_fraction_of_gpu_memory_to_use": "0.15",
"FLAGS_cudnn_deterministic": "1" "FLAGS_cudnn_deterministic": "1",
"CPU_NUM": "1"
} }
if check_error_log: if check_error_log:
......
...@@ -14,6 +14,7 @@ ...@@ -14,6 +14,7 @@
from __future__ import print_function from __future__ import print_function
import os
import unittest import unittest
import paddle import paddle
from test_dist_base import TestDistBase from test_dist_base import TestDistBase
...@@ -44,6 +45,14 @@ def download_files(): ...@@ -44,6 +45,14 @@ def download_files():
test_url = url_prefix + 'newstest2013.tok.bpe.32000.en-de' test_url = url_prefix + 'newstest2013.tok.bpe.32000.en-de'
test_md5 = '9dd74a266dbdb25314183899f269b4a2' test_md5 = '9dd74a266dbdb25314183899f269b4a2'
paddle.dataset.common.download(test_url, 'test_dist_transformer', test_md5) paddle.dataset.common.download(test_url, 'test_dist_transformer', test_md5)
# cut test data for faster CI
orig_path = os.path.join(paddle.dataset.common.DATA_HOME,
"test_dist_transformer",
"newstest2013.tok.bpe.32000.en-de")
head_path = os.path.join(paddle.dataset.common.DATA_HOME,
"test_dist_transformer",
"newstest2013.tok.bpe.32000.en-de.cut")
os.system("head -n10 %s > %s" % (orig_path, head_path))
class TestDistTransformer2x2Sync(TestDistBase): class TestDistTransformer2x2Sync(TestDistBase):
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册