提交 7594787d 编写于 作者: J JiabinYang

Merge branch 'develop' of https://github.com/PaddlePaddle/Paddle into enhance_hierachical_sigmod_op

......@@ -71,7 +71,7 @@ class OperatorBase;
class ExecutionContext;
/**
* OperatorBase has the basic element that Net will call to do computation.
* OperatorBase has the basic elements that Net will call to do computation.
* Only CreateOperator from OpRegistry will new Operator directly. User
* should always construct a proto message OpDesc and call
* OpRegistry::CreateOp(op_desc) to get an Operator instance.
......
......@@ -17,28 +17,16 @@
namespace paddle {
namespace framework {
// Holds all the transfer scope across the process.
std::unordered_map<size_t, Scope*>& global_transfer_data_cache() {
typedef std::unordered_map<size_t, Scope*> map_t;
thread_local std::unique_ptr<map_t> x(new map_t);
thread_local auto* x = new std::unordered_map<size_t, Scope*>;
return *x;
}
// Holds all the transfer scope for this thread.
std::unordered_set<Scope*>& global_transfer_scope_cache() {
typedef std::unordered_set<Scope*> set_t;
thread_local std::unique_ptr<set_t> x(new set_t);
thread_local auto* x = new std::unordered_set<Scope*>;
return *x;
}
// Try to create a transfer scope. If one cached scope has match the
// requirement, just return that one.
// Inputs:
// @type0: the source kernel type.
// @type1: the target kernel type.
// @scope: the execution scope of this op.
// Returns: A scope used to hold the transfer data across the different kernel
// type.
Scope* TryCreateTransferScope(OpKernelType type0, OpKernelType type1,
const Scope* scope) {
Scope* new_scope{nullptr};
......@@ -58,5 +46,27 @@ Scope* TryCreateTransferScope(OpKernelType type0, OpKernelType type1,
return new_scope;
}
void RemoveKidsFromTransferScopeCache(Scope* scope) {
auto it = global_transfer_scope_cache().find(scope);
if (it != global_transfer_scope_cache().end()) {
global_transfer_scope_cache().erase(it);
}
for (auto* s : scope->kids()) {
auto it = global_transfer_scope_cache().find(s);
if (it != global_transfer_scope_cache().end()) {
global_transfer_scope_cache().erase(it);
}
}
// remove global transfer data cache
auto& cache = global_transfer_data_cache();
for (auto it = cache.begin(); it != cache.end();) {
if (it->second == scope)
it = cache.erase(it);
else
it++;
}
}
} // namespace framework
} // namespace paddle
......@@ -35,4 +35,5 @@ function(inference_analysis_test TARGET)
endif()
endfunction(inference_analysis_test)
inference_analysis_test(test_analyzer SRCS analyzer_tester.cc EXTRA_DEPS reset_tensor_array paddle_inference_api)
inference_analysis_test(test_analyzer SRCS analyzer_tester.cc
EXTRA_DEPS reset_tensor_array paddle_inference_api)
......@@ -284,6 +284,7 @@ bool AnalysisPredictor::GetFetch(std::vector<PaddleTensor> *outputs,
framework::GetFetchVariable(*scope, "fetch", idx);
auto type = fetch.type();
auto output = &(outputs->at(i));
output->name = fetchs_[idx]->Input("X")[0];
if (type == typeid(float)) {
GetFetchOne<float>(fetch, output);
output->dtype = PaddleDType::FLOAT32;
......
......@@ -109,7 +109,7 @@ class AnalysisPredictor : public PaddlePredictor {
std::map<std::string, size_t> feed_names_;
std::vector<framework::OpDesc *> fetchs_;
// Memory buffer for feed inputs. The temporary LoDTensor will cause serious
// concurrency problems, so cache them.
// concurrency problems, wrong results and memory leak, so cache them.
std::vector<framework::LoDTensor> feed_tensors_;
details::TensorArrayBatchCleaner tensor_array_batch_cleaner_;
......
......@@ -185,8 +185,12 @@ bool NativePaddlePredictor::SetFeed(const std::vector<PaddleTensor> &inputs,
<< inputs.size();
return false;
}
// Cache the inputs memory for better concurrency performance.
feed_tensors_.resize(inputs.size());
for (size_t i = 0; i < inputs.size(); ++i) {
framework::LoDTensor input;
auto &input = feed_tensors_[i];
framework::DDim ddim = framework::make_ddim(inputs[i].shape);
void *input_ptr;
if (inputs[i].dtype == PaddleDType::INT64) {
......@@ -261,6 +265,7 @@ bool NativePaddlePredictor::GetFetch(std::vector<PaddleTensor> *outputs,
framework::GetFetchVariable(*scope, "fetch", idx);
auto type = fetch.type();
auto output = &(outputs->at(i));
output->name = fetchs_[idx]->Input("X")[0];
if (type == typeid(float)) {
GetFetchOne<float>(fetch, output);
output->dtype = PaddleDType::FLOAT32;
......
......@@ -69,6 +69,9 @@ class NativePaddlePredictor : public PaddlePredictor {
std::vector<framework::OpDesc *> feeds_;
std::map<std::string, size_t> feed_names_;
std::vector<framework::OpDesc *> fetchs_;
// Memory buffer for feed inputs. The temporary LoDTensor will cause serious
// concurrency problems, wrong results and memory leak, so cache them.
std::vector<framework::LoDTensor> feed_tensors_;
// Do not use unique_ptr, use parent scope to delete
framework::Scope *sub_scope_{nullptr};
details::TensorArrayBatchCleaner tensor_array_batch_cleaner_;
......
......@@ -469,18 +469,21 @@ function assert_api_spec_approvals() {
BRANCH="develop"
fi
API_CHANGE=`git diff --name-only upstream/$BRANCH | grep "paddle/fluid/API.spec" || true`
echo "checking API.spec change, PR: ${GIT_PR_ID}, changes: ${API_CHANGE}"
API_FILES=("paddle/fluid/API.spec" "paddle/fluid/framework/operator.h")
for API_FILE in ${API_FILES[*]}; do
API_CHANGE=`git diff --name-only upstream/$BRANCH | grep "${API_FILE}" || true`
echo "checking ${API_FILE} change, PR: ${GIT_PR_ID}, changes: ${API_CHANGE}"
if [ ${API_CHANGE} ] && [ "${GIT_PR_ID}" != "" ]; then
# NOTE: per_page=10000 should be ok for all cases, a PR review > 10000 is not human readable.
APPROVALS=`curl -H "Authorization: token ${GITHUB_API_TOKEN}" https://api.github.com/repos/PaddlePaddle/Paddle/pulls/${GIT_PR_ID}/reviews?per_page=10000 | \
python ${PADDLE_ROOT}/tools/check_pr_approval.py 2 7845005 2887803 728699 13348433`
echo "current pr ${GIT_PR_ID} got approvals: ${APPROVALS}"
if [ "${APPROVALS}" == "FALSE" ]; then
echo "You must have at least 2 approvals for the api change!"
echo "You must have at least 2 approvals for the api change! ${API_FILE}"
exit 1
fi
fi
done
}
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册