diff --git a/paddle/fluid/framework/operator.h b/paddle/fluid/framework/operator.h index bfdfdc56b34098dd170f8ca98d27b41759c2f57b..5bd68f9ac2e1b30bc6ce3094960bb89842b99e01 100644 --- a/paddle/fluid/framework/operator.h +++ b/paddle/fluid/framework/operator.h @@ -71,7 +71,7 @@ class OperatorBase; class ExecutionContext; /** - * OperatorBase has the basic element that Net will call to do computation. + * OperatorBase has the basic elements that Net will call to do computation. * Only CreateOperator from OpRegistry will new Operator directly. User * should always construct a proto message OpDesc and call * OpRegistry::CreateOp(op_desc) to get an Operator instance. diff --git a/paddle/fluid/framework/transfer_scope_cache.cc b/paddle/fluid/framework/transfer_scope_cache.cc index f6219a14173094d15e9c60a2e26f98da1b04ec2e..e52a8317e2113a9489f8c05bcf47bc96bea33c64 100644 --- a/paddle/fluid/framework/transfer_scope_cache.cc +++ b/paddle/fluid/framework/transfer_scope_cache.cc @@ -17,28 +17,16 @@ namespace paddle { namespace framework { -// Holds all the transfer scope across the process. std::unordered_map& global_transfer_data_cache() { - typedef std::unordered_map map_t; - thread_local std::unique_ptr x(new map_t); + thread_local auto* x = new std::unordered_map; return *x; } -// Holds all the transfer scope for this thread. std::unordered_set& global_transfer_scope_cache() { - typedef std::unordered_set set_t; - thread_local std::unique_ptr x(new set_t); + thread_local auto* x = new std::unordered_set; return *x; } -// Try to create a transfer scope. If one cached scope has match the -// requirement, just return that one. -// Inputs: -// @type0: the source kernel type. -// @type1: the target kernel type. -// @scope: the execution scope of this op. -// Returns: A scope used to hold the transfer data across the different kernel -// type. Scope* TryCreateTransferScope(OpKernelType type0, OpKernelType type1, const Scope* scope) { Scope* new_scope{nullptr}; @@ -58,5 +46,27 @@ Scope* TryCreateTransferScope(OpKernelType type0, OpKernelType type1, return new_scope; } +void RemoveKidsFromTransferScopeCache(Scope* scope) { + auto it = global_transfer_scope_cache().find(scope); + if (it != global_transfer_scope_cache().end()) { + global_transfer_scope_cache().erase(it); + } + for (auto* s : scope->kids()) { + auto it = global_transfer_scope_cache().find(s); + if (it != global_transfer_scope_cache().end()) { + global_transfer_scope_cache().erase(it); + } + } + + // remove global transfer data cache + auto& cache = global_transfer_data_cache(); + for (auto it = cache.begin(); it != cache.end();) { + if (it->second == scope) + it = cache.erase(it); + else + it++; + } +} + } // namespace framework } // namespace paddle diff --git a/paddle/fluid/inference/analysis/CMakeLists.txt b/paddle/fluid/inference/analysis/CMakeLists.txt index 4bd3f93ef75ada545751fef5af77a78e4872b690..27b6b80955e45446cd9ea6c8edf29a3173f0263b 100644 --- a/paddle/fluid/inference/analysis/CMakeLists.txt +++ b/paddle/fluid/inference/analysis/CMakeLists.txt @@ -35,4 +35,5 @@ function(inference_analysis_test TARGET) endif() endfunction(inference_analysis_test) -inference_analysis_test(test_analyzer SRCS analyzer_tester.cc EXTRA_DEPS reset_tensor_array paddle_inference_api) +inference_analysis_test(test_analyzer SRCS analyzer_tester.cc + EXTRA_DEPS reset_tensor_array paddle_inference_api) diff --git a/paddle/fluid/inference/api/analysis_predictor.cc b/paddle/fluid/inference/api/analysis_predictor.cc index ebe56734c6d6a9137e877f8a5ea188f59a1b4f98..d998533977566e9d9c8e67921e9f14feea29e95b 100644 --- a/paddle/fluid/inference/api/analysis_predictor.cc +++ b/paddle/fluid/inference/api/analysis_predictor.cc @@ -284,6 +284,7 @@ bool AnalysisPredictor::GetFetch(std::vector *outputs, framework::GetFetchVariable(*scope, "fetch", idx); auto type = fetch.type(); auto output = &(outputs->at(i)); + output->name = fetchs_[idx]->Input("X")[0]; if (type == typeid(float)) { GetFetchOne(fetch, output); output->dtype = PaddleDType::FLOAT32; diff --git a/paddle/fluid/inference/api/analysis_predictor.h b/paddle/fluid/inference/api/analysis_predictor.h index db57812bc3ba8e6d578e665524cb5749e6bfecd6..12ecb7c15e92c3efcdb27a7058e9481a6f476674 100644 --- a/paddle/fluid/inference/api/analysis_predictor.h +++ b/paddle/fluid/inference/api/analysis_predictor.h @@ -109,7 +109,7 @@ class AnalysisPredictor : public PaddlePredictor { std::map feed_names_; std::vector fetchs_; // Memory buffer for feed inputs. The temporary LoDTensor will cause serious - // concurrency problems, so cache them. + // concurrency problems, wrong results and memory leak, so cache them. std::vector feed_tensors_; details::TensorArrayBatchCleaner tensor_array_batch_cleaner_; diff --git a/paddle/fluid/inference/api/api_impl.cc b/paddle/fluid/inference/api/api_impl.cc index a7cef426d17908bfae59a4a5a204237ab580a5a1..9c5703c91fedcbb7807c29eac9a52b30b8b4330a 100644 --- a/paddle/fluid/inference/api/api_impl.cc +++ b/paddle/fluid/inference/api/api_impl.cc @@ -185,8 +185,12 @@ bool NativePaddlePredictor::SetFeed(const std::vector &inputs, << inputs.size(); return false; } + + // Cache the inputs memory for better concurrency performance. + feed_tensors_.resize(inputs.size()); + for (size_t i = 0; i < inputs.size(); ++i) { - framework::LoDTensor input; + auto &input = feed_tensors_[i]; framework::DDim ddim = framework::make_ddim(inputs[i].shape); void *input_ptr; if (inputs[i].dtype == PaddleDType::INT64) { @@ -261,6 +265,7 @@ bool NativePaddlePredictor::GetFetch(std::vector *outputs, framework::GetFetchVariable(*scope, "fetch", idx); auto type = fetch.type(); auto output = &(outputs->at(i)); + output->name = fetchs_[idx]->Input("X")[0]; if (type == typeid(float)) { GetFetchOne(fetch, output); output->dtype = PaddleDType::FLOAT32; diff --git a/paddle/fluid/inference/api/api_impl.h b/paddle/fluid/inference/api/api_impl.h index 9dfa48d501f17fa654ec50049608b1a87c586cb6..c1fcd198ccda07bb6cdd9911716be911ffef6e8d 100644 --- a/paddle/fluid/inference/api/api_impl.h +++ b/paddle/fluid/inference/api/api_impl.h @@ -69,6 +69,9 @@ class NativePaddlePredictor : public PaddlePredictor { std::vector feeds_; std::map feed_names_; std::vector fetchs_; + // Memory buffer for feed inputs. The temporary LoDTensor will cause serious + // concurrency problems, wrong results and memory leak, so cache them. + std::vector feed_tensors_; // Do not use unique_ptr, use parent scope to delete framework::Scope *sub_scope_{nullptr}; details::TensorArrayBatchCleaner tensor_array_batch_cleaner_; diff --git a/paddle/scripts/paddle_build.sh b/paddle/scripts/paddle_build.sh index c4e283e76a54b9883729b32381911b5a3fe5f658..a6720fa798ec5cf60a8806a7f72fe6febaf4f7ac 100755 --- a/paddle/scripts/paddle_build.sh +++ b/paddle/scripts/paddle_build.sh @@ -469,18 +469,21 @@ function assert_api_spec_approvals() { BRANCH="develop" fi - API_CHANGE=`git diff --name-only upstream/$BRANCH | grep "paddle/fluid/API.spec" || true` - echo "checking API.spec change, PR: ${GIT_PR_ID}, changes: ${API_CHANGE}" - if [ ${API_CHANGE} ] && [ "${GIT_PR_ID}" != "" ]; then - # NOTE: per_page=10000 should be ok for all cases, a PR review > 10000 is not human readable. - APPROVALS=`curl -H "Authorization: token ${GITHUB_API_TOKEN}" https://api.github.com/repos/PaddlePaddle/Paddle/pulls/${GIT_PR_ID}/reviews?per_page=10000 | \ - python ${PADDLE_ROOT}/tools/check_pr_approval.py 2 7845005 2887803 728699 13348433` - echo "current pr ${GIT_PR_ID} got approvals: ${APPROVALS}" - if [ "${APPROVALS}" == "FALSE" ]; then - echo "You must have at least 2 approvals for the api change!" - exit 1 - fi - fi + API_FILES=("paddle/fluid/API.spec" "paddle/fluid/framework/operator.h") + for API_FILE in ${API_FILES[*]}; do + API_CHANGE=`git diff --name-only upstream/$BRANCH | grep "${API_FILE}" || true` + echo "checking ${API_FILE} change, PR: ${GIT_PR_ID}, changes: ${API_CHANGE}" + if [ ${API_CHANGE} ] && [ "${GIT_PR_ID}" != "" ]; then + # NOTE: per_page=10000 should be ok for all cases, a PR review > 10000 is not human readable. + APPROVALS=`curl -H "Authorization: token ${GITHUB_API_TOKEN}" https://api.github.com/repos/PaddlePaddle/Paddle/pulls/${GIT_PR_ID}/reviews?per_page=10000 | \ + python ${PADDLE_ROOT}/tools/check_pr_approval.py 2 7845005 2887803 728699 13348433` + echo "current pr ${GIT_PR_ID} got approvals: ${APPROVALS}" + if [ "${APPROVALS}" == "FALSE" ]; then + echo "You must have at least 2 approvals for the api change! ${API_FILE}" + exit 1 + fi + fi + done }