提交 02149ed1 编写于 作者: Z zhouwei25

Merge branch 'develop' of https://github.com/PaddlePaddle/Paddle into develop

...@@ -133,6 +133,32 @@ struct ExtractAttribute<std::vector<int64_t>> { ...@@ -133,6 +133,32 @@ struct ExtractAttribute<std::vector<int64_t>> {
const std::string& attr_name_; const std::string& attr_name_;
}; };
template <>
struct ExtractAttribute<float> {
explicit ExtractAttribute(const std::string& attr_name)
: attr_name_(attr_name) {}
float* operator()(Attribute& attr) const {
if (attr.type() == typeid(int)) { // NOLINT
int val = boost::get<int>(attr);
attr = static_cast<float>(val);
} else if (attr.type() == typeid(int64_t)) { // NOLINT
int64_t val = boost::get<int64_t>(attr);
attr = static_cast<float>(val);
}
float* attr_value = nullptr;
try {
attr_value = &boost::get<float>(attr);
} catch (boost::bad_get& bad_get) {
PADDLE_THROW("Cannot get attribute %s by type float, its type is %s",
attr_name_, paddle::platform::demangle(attr.type().name()));
}
return attr_value;
}
const std::string& attr_name_;
};
template <typename T> template <typename T>
inline proto::AttrType AttrTypeID() { inline proto::AttrType AttrTypeID() {
Attribute tmp = T(); Attribute tmp = T();
......
...@@ -885,12 +885,12 @@ void OperatorWithKernel::RunImpl(const Scope& scope, ...@@ -885,12 +885,12 @@ void OperatorWithKernel::RunImpl(const Scope& scope,
const platform::Place& place) const { const platform::Place& place) const {
// To reduce the elapsed time of HasAttr, we use bool variable to record the // To reduce the elapsed time of HasAttr, we use bool variable to record the
// result of HasAttr. // result of HasAttr.
if (!enable_cache_runtime_context && HasAttr(kEnableCacheRuntimeContext)) if (!enable_cache_runtime_context_ && HasAttr(kEnableCacheRuntimeContext))
enable_cache_runtime_context = true; enable_cache_runtime_context_ = true;
if (!all_kernels_must_compute_runtime_shape && if (!all_kernels_must_compute_runtime_shape_ &&
HasAttr(kAllKernelsMustComputeRuntimeShape)) HasAttr(kAllKernelsMustComputeRuntimeShape))
all_kernels_must_compute_runtime_shape = true; all_kernels_must_compute_runtime_shape_ = true;
if (!enable_cache_runtime_context) { if (!enable_cache_runtime_context_) {
RuntimeContext ctx(Inputs(), Outputs(), scope); RuntimeContext ctx(Inputs(), Outputs(), scope);
RunImpl(scope, place, &ctx); RunImpl(scope, place, &ctx);
} else { } else {
...@@ -931,7 +931,7 @@ void OperatorWithKernel::RunImpl(const Scope& scope, ...@@ -931,7 +931,7 @@ void OperatorWithKernel::RunImpl(const Scope& scope,
dev_ctx = pool.Get(kernel_type_->place_); dev_ctx = pool.Get(kernel_type_->place_);
} }
if (!all_kernels_must_compute_runtime_shape) { if (!all_kernels_must_compute_runtime_shape_) {
RuntimeInferShapeContext infer_shape_ctx(*this, exec_scope, *runtime_ctx); RuntimeInferShapeContext infer_shape_ctx(*this, exec_scope, *runtime_ctx);
this->InferShape(&infer_shape_ctx); this->InferShape(&infer_shape_ctx);
} }
...@@ -981,6 +981,13 @@ void OperatorWithKernel::RunImpl(const Scope& scope, ...@@ -981,6 +981,13 @@ void OperatorWithKernel::RunImpl(const Scope& scope,
} }
} }
} }
// To solve issue #15032, have a discussion with @Luotao for cpu inference,
// do not cache transfer scope, hence in this case delete transfer scope
// after run to avoid memory leak
if (transfer_scope && !run_by_executor_ && !enable_cache_transfer_scope_) {
scope.DeleteScope(transfer_scope);
}
} }
void OperatorWithKernel::ChooseKernel(const RuntimeContext& ctx, void OperatorWithKernel::ChooseKernel(const RuntimeContext& ctx,
...@@ -1114,9 +1121,18 @@ Scope* OperatorWithKernel::PrepareData( ...@@ -1114,9 +1121,18 @@ Scope* OperatorWithKernel::PrepareData(
// If this op is not called by an Executor or ParallelExecutor, it should // If this op is not called by an Executor or ParallelExecutor, it should
// called by a NaiveExecutor, the NaiveExecutor will cache the scopes and // called by a NaiveExecutor, the NaiveExecutor will cache the scopes and
// variables, that behavior a lot different. // variables, that behavior a lot different.
if (!run_by_executor_) { //
// To solve issue #15032, have a discussion with @Luotao for cpu
// inference, for all cpu kernels cases without GPU participation, here
// not do transfer scope caching, and cpu inference performance is not
// impacted by test.
enable_cache_transfer_scope_ = false;
if (!run_by_executor_ &&
(platform::is_gpu_place(kernel_type_for_var.place_) ||
platform::is_gpu_place(expected_kernel_key.place_))) {
new_scope = TryCreateTransferScope(kernel_type_for_var, new_scope = TryCreateTransferScope(kernel_type_for_var,
expected_kernel_key, &scope); expected_kernel_key, &scope);
enable_cache_transfer_scope_ = true;
} }
if (!new_scope) { if (!new_scope) {
new_scope = &scope.NewScope(); new_scope = &scope.NewScope();
...@@ -1125,11 +1141,11 @@ Scope* OperatorWithKernel::PrepareData( ...@@ -1125,11 +1141,11 @@ Scope* OperatorWithKernel::PrepareData(
// each result of different input will be the same with the first one. // each result of different input will be the same with the first one.
// The reason is that if a gpu tensor is the input of a cpu kernel, // The reason is that if a gpu tensor is the input of a cpu kernel,
// we will create a new cpu tensor in new scope. // we will create a new cpu tensor in new scope.
// However, if enable_cache_runtime_context, we get the cpu tensor each // However, if enable_cache_runtime_context_, we get the cpu tensor each
// time, not the gpu tensor. // time, not the gpu tensor.
// Thus, we set pre_scope_ = nullptr to trigger `new RuntimeContext()` in // Thus, we set pre_scope_ = nullptr to trigger `new RuntimeContext()` in
// RunImpl(). // RunImpl().
if (enable_cache_runtime_context) { if (enable_cache_runtime_context_) {
pre_scope_ = nullptr; pre_scope_ = nullptr;
} }
......
...@@ -499,9 +499,10 @@ class OperatorWithKernel : public OperatorBase { ...@@ -499,9 +499,10 @@ class OperatorWithKernel : public OperatorBase {
mutable std::unique_ptr<OpKernelFunc> kernel_func_; mutable std::unique_ptr<OpKernelFunc> kernel_func_;
mutable std::unique_ptr<RuntimeContext> runtime_ctx_; mutable std::unique_ptr<RuntimeContext> runtime_ctx_;
mutable const Scope* pre_scope_ = nullptr; mutable const Scope* pre_scope_ = nullptr;
mutable bool enable_cache_runtime_context = false; mutable bool enable_cache_runtime_context_ = false;
mutable bool all_kernels_must_compute_runtime_shape = false; mutable bool all_kernels_must_compute_runtime_shape_ = false;
mutable std::mutex cache_update_mutex_; mutable std::mutex cache_update_mutex_;
mutable bool enable_cache_transfer_scope_ = false;
}; };
extern bool OpSupportGPU(const std::string& op_type); extern bool OpSupportGPU(const std::string& op_type);
......
...@@ -295,7 +295,9 @@ class AllocatorFacadePrivate { ...@@ -295,7 +295,9 @@ class AllocatorFacadePrivate {
// Pimpl. Make interface clean. // Pimpl. Make interface clean.
AllocatorFacade::AllocatorFacade() : m_(new AllocatorFacadePrivate()) {} AllocatorFacade::AllocatorFacade() : m_(new AllocatorFacadePrivate()) {}
AllocatorFacade::~AllocatorFacade() { delete m_; } // delete m_ may cause core dump when the destructor of python in conflict with
// cpp.
AllocatorFacade::~AllocatorFacade() {}
AllocatorFacade& AllocatorFacade::Instance() { AllocatorFacade& AllocatorFacade::Instance() {
static AllocatorFacade instance; static AllocatorFacade instance;
......
...@@ -77,6 +77,15 @@ struct paddle_variant_caster<V<Ts...>> { ...@@ -77,6 +77,15 @@ struct paddle_variant_caster<V<Ts...>> {
} }
} }
if (std::is_same<T, float>::value) {
auto caster_int64 = make_caster<int64_t>();
if (caster_int64.load(src, convert)) {
VLOG(4) << "this value are float and int64 satisfy simula.";
value = cast_op<int64_t>(caster_int64);
return true;
}
}
value = cast_op<T>(caster); value = cast_op<T>(caster);
return true; return true;
} }
......
...@@ -50,6 +50,34 @@ class TestFillConstantOp2(OpTest): ...@@ -50,6 +50,34 @@ class TestFillConstantOp2(OpTest):
self.check_output() self.check_output()
class TestFillConstantOp3(OpTest):
def setUp(self):
'''Test fill_constant op with specified int64 value
'''
self.op_type = "fill_constant"
self.inputs = {}
self.attrs = {'shape': [123, 92], 'value': 10000000000}
self.outputs = {'Out': np.full((123, 92), 10000000000)}
def test_check_output(self):
self.check_output()
class TestFillConstantOp4(OpTest):
def setUp(self):
'''Test fill_constant op with specified int value
'''
self.op_type = "fill_constant"
self.inputs = {}
self.attrs = {'shape': [123, 92], 'value': 3}
self.outputs = {'Out': np.full((123, 92), 3)}
def test_check_output(self):
self.check_output()
class TestFillConstantOpWithSelectedRows(OpTest): class TestFillConstantOpWithSelectedRows(OpTest):
def check_with_place(self, place): def check_with_place(self, place):
scope = core.Scope() scope = core.Scope()
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册