diff --git a/cmake/tensorrt.cmake b/cmake/tensorrt.cmake index ac19b1651893f18b14c62a0986df75bed25d7e80..8f65a737c43a124c05574d6eb9c3050fdab5299a 100644 --- a/cmake/tensorrt.cmake +++ b/cmake/tensorrt.cmake @@ -16,7 +16,9 @@ find_library(TENSORRT_LIBRARY NAMES libnvinfer.so libnvinfer.a DOC "Path to TensorRT library.") if(TENSORRT_INCLUDE_DIR AND TENSORRT_LIBRARY) + if(WITH_DSO) set(TENSORRT_FOUND ON) + endif(WITH DSO) else() set(TENSORRT_FOUND OFF) endif() diff --git a/paddle/fluid/framework/ir/graph_pattern_detector.h b/paddle/fluid/framework/ir/graph_pattern_detector.h index 371384dc56eec91db1f621c0ebb65113e7a5a5cc..1a8d9cefbfa570d2ac3f4fc32d50d705ddc67a75 100644 --- a/paddle/fluid/framework/ir/graph_pattern_detector.h +++ b/paddle/fluid/framework/ir/graph_pattern_detector.h @@ -429,7 +429,7 @@ struct LSTM : public PatternBase { struct GRU : public PatternBase { GRU(PDPattern* pattern, const std::string& name_scope) - : PatternBase(pattern, name_scope, "lstm") {} + : PatternBase(pattern, name_scope, "gru") {} PDNode* operator()(PDNode* x); diff --git a/paddle/fluid/operators/distributed/grpc_client.cc b/paddle/fluid/operators/distributed/grpc_client.cc index e22bc552f85b85c75f06b4158f2abac2d3843256..13682b78f0eccf049daa315f3a26aafd22e42a41 100644 --- a/paddle/fluid/operators/distributed/grpc_client.cc +++ b/paddle/fluid/operators/distributed/grpc_client.cc @@ -125,7 +125,7 @@ VarHandlePtr GRPCClient::AsyncGetVar(const std::string& ep, VarHandlePtr h(new VarHandle(ep, "Get", var_name_val, p_ctx, p_scope)); s->Prepare(h, time_out); - framework::AsyncIO([var_name_val, p_scope, p_ctx, s, this] { + framework::AsyncIO([var_name_val, s, this] { // prepare input sendrecv::VariableMessage req; req.set_varname(var_name_val); @@ -166,7 +166,7 @@ VarHandlePtr GRPCClient::AsyncPrefetchVar(const std::string& ep, s->Prepare(h, time_out); framework::AsyncIO([in_var_name_val, out_var_name_val, ep_val, p_scope, p_ctx, - time_out, s, this] { + s, this] { auto* var = p_scope->FindVar(in_var_name_val); ::grpc::ByteBuffer req; diff --git a/paddle/fluid/operators/math/sequence_pooling.cc b/paddle/fluid/operators/math/sequence_pooling.cc index f25d3d3f1ee1f89d46b8e7c88ca68048f5203544..69318a6598c8c69eceab7216df6382537153d34f 100644 --- a/paddle/fluid/operators/math/sequence_pooling.cc +++ b/paddle/fluid/operators/math/sequence_pooling.cc @@ -103,6 +103,58 @@ class MaxSeqPoolGradFunctor { } }; +template +class LastSeqPoolFunctor { + public: + void operator()(const platform::CPUDeviceContext& context, + const framework::LoDTensor& input, + framework::Tensor* output) { + // Create pointers to input and output data + auto* in_data = input.data(); + auto* out_data = output->data(); + + // Calculate the size of each item in sequence + int64_t item_size = input.numel() / input.dims()[0]; + auto lod = input.lod()[0]; + int seq_num = static_cast(lod.size()) - 1; + for (int i = 0; i < seq_num; ++i) { + // Calculate the length of each sequence + int64_t seq_len = static_cast(lod[i + 1] - lod[i]); + // Point to the begin of next sequence + in_data += seq_len * item_size; + // Copy the last item of sequence to output + std::memcpy(out_data, (in_data - item_size), item_size * sizeof(T)); + out_data += item_size; + } + } +}; + +template +class FirstSeqPoolFunctor { + public: + void operator()(const platform::CPUDeviceContext& context, + const framework::LoDTensor& input, + framework::Tensor* output) { + // Create pointers to input and output data + auto* in_data = input.data(); + auto* out_data = output->data(); + + // Calculate the size of each item in sequence + int64_t item_size = input.numel() / input.dims()[0]; + auto lod = input.lod()[0]; + int seq_num = static_cast(lod.size()) - 1; + for (int i = 0; i < seq_num; ++i) { + // Calculate the length of each sequence + int64_t seq_len = static_cast(lod[i + 1] - lod[i]); + // Copy the first item of sequence to output + std::memcpy(out_data, in_data, item_size * sizeof(T)); + // Point to the next sequence + in_data += seq_len * item_size; + out_data += item_size; + } + } +}; + template class SequencePoolFunctor { public: @@ -116,6 +168,16 @@ class SequencePoolFunctor { max_pool(context, input, output, index); return; } + if (pooltype == "LAST") { + math::LastSeqPoolFunctor last_pool; + last_pool(context, input, output); + return; + } + if (pooltype == "FIRST") { + math::FirstSeqPoolFunctor first_pool; + first_pool(context, input, output); + return; + } auto lod = input.lod()[0]; auto& place = *context.eigen_device(); for (int i = 0; i < static_cast(lod.size()) - 1; ++i) { @@ -133,10 +195,6 @@ class SequencePoolFunctor { } else if (pooltype == "SQRT") { out_e.device(place) = in_e.sum(Eigen::array({{0}})) / std::sqrt(static_cast(h)); - } else if (pooltype == "LAST") { - out_e.device(place) = in_e.chip(h - 1, 0); - } else if (pooltype == "FIRST") { - out_e.device(place) = in_e.chip(0, 0); } else { PADDLE_THROW("unsupported pooling pooltype"); } diff --git a/paddle/scripts/paddle_build.sh b/paddle/scripts/paddle_build.sh index ad095b92711dccb44f26748bcfa89a0b4123c6e7..ba5065f468376d83488d7eade5dc2041d86dfd39 100755 --- a/paddle/scripts/paddle_build.sh +++ b/paddle/scripts/paddle_build.sh @@ -33,6 +33,7 @@ function print_usage() { ${BLUE}single_test${NONE}: run a single unit test ${BLUE}bind_test${NONE}: parallel tests bind to different GPU ${BLUE}doc${NONE}: generate paddle documents + ${BLUE}gen_doc_lib${NONE}: generate paddle documents library ${BLUE}html${NONE}: convert C++ source code into HTML ${BLUE}dockerfile${NONE}: generate paddle release dockerfile ${BLUE}capi${NONE}: generate paddle CAPI package @@ -431,24 +432,60 @@ EOF linkchecker doc/v2/cn/html/index.html linkchecker doc/v2/api/en/html/index.html - if [[ "$TRAVIS_PULL_REQUEST" != "false" ]]; then exit 0; fi; +# if [[ "$TRAVIS_PULL_REQUEST" != "false" ]]; then exit 0; fi; +# +# # Deploy to the the content server if its a "develop" or "release/version" branch +# # The "develop_doc" branch is reserved to test full deploy process without impacting the real content. +# if [ "$TRAVIS_BRANCH" == "develop_doc" ]; then +# PPO_SCRIPT_BRANCH=develop +# elif [[ "$TRAVIS_BRANCH" == "develop" || "$TRAVIS_BRANCH" =~ ^v|release/[[:digit:]]+\.[[:digit:]]+(\.[[:digit:]]+)?(-\S*)?$ ]]; then +# PPO_SCRIPT_BRANCH=master +# else +# # Early exit, this branch doesn't require documentation build +# return 0; +# fi +# # Fetch the paddlepaddle.org deploy_docs.sh from the appopriate branch +# export DEPLOY_DOCS_SH=https://raw.githubusercontent.com/PaddlePaddle/PaddlePaddle.org/$PPO_SCRIPT_BRANCH/scripts/deploy/deploy_docs.sh +# export PYTHONPATH=$PYTHONPATH:${PADDLE_ROOT}/build/python:/paddle/build/python +# cd .. +# curl $DEPLOY_DOCS_SH | bash -s $CONTENT_DEC_PASSWD $TRAVIS_BRANCH ${PADDLE_ROOT} ${PADDLE_ROOT}/build/doc/ ${PPO_SCRIPT_BRANCH} +# cd - +} - # Deploy to the the content server if its a "develop" or "release/version" branch - # The "develop_doc" branch is reserved to test full deploy process without impacting the real content. - if [ "$TRAVIS_BRANCH" == "develop_doc" ]; then - PPO_SCRIPT_BRANCH=develop - elif [[ "$TRAVIS_BRANCH" == "develop" || "$TRAVIS_BRANCH" =~ ^v|release/[[:digit:]]+\.[[:digit:]]+(\.[[:digit:]]+)?(-\S*)?$ ]]; then - PPO_SCRIPT_BRANCH=master - else - # Early exit, this branch doesn't require documentation build - return 0; - fi - # Fetch the paddlepaddle.org deploy_docs.sh from the appopriate branch - export DEPLOY_DOCS_SH=https://raw.githubusercontent.com/PaddlePaddle/PaddlePaddle.org/$PPO_SCRIPT_BRANCH/scripts/deploy/deploy_docs.sh - export PYTHONPATH=$PYTHONPATH:${PADDLE_ROOT}/build/python:/paddle/build/python - cd .. - curl $DEPLOY_DOCS_SH | bash -s $CONTENT_DEC_PASSWD $TRAVIS_BRANCH ${PADDLE_ROOT} ${PADDLE_ROOT}/build/doc/ ${PPO_SCRIPT_BRANCH} - cd - +function gen_doc_lib() { + mkdir -p ${PADDLE_ROOT}/build + cd ${PADDLE_ROOT}/build + cat < %s" % (orig_path, head_path)) class TestDistTransformer2x2Sync(TestDistBase):