diff --git a/paddle/fluid/framework/attribute.h b/paddle/fluid/framework/attribute.h index 67054eccb3397ea40f0fb3e2ff2530ee1ea64736..aa452ac220ea63bbf7a79c09b90aadfd2764856b 100644 --- a/paddle/fluid/framework/attribute.h +++ b/paddle/fluid/framework/attribute.h @@ -133,6 +133,32 @@ struct ExtractAttribute> { const std::string& attr_name_; }; +template <> +struct ExtractAttribute { + explicit ExtractAttribute(const std::string& attr_name) + : attr_name_(attr_name) {} + + float* operator()(Attribute& attr) const { + if (attr.type() == typeid(int)) { // NOLINT + int val = boost::get(attr); + attr = static_cast(val); + } else if (attr.type() == typeid(int64_t)) { // NOLINT + int64_t val = boost::get(attr); + attr = static_cast(val); + } + float* attr_value = nullptr; + try { + attr_value = &boost::get(attr); + } catch (boost::bad_get& bad_get) { + PADDLE_THROW("Cannot get attribute %s by type float, its type is %s", + attr_name_, paddle::platform::demangle(attr.type().name())); + } + return attr_value; + } + + const std::string& attr_name_; +}; + template inline proto::AttrType AttrTypeID() { Attribute tmp = T(); diff --git a/paddle/fluid/framework/operator.cc b/paddle/fluid/framework/operator.cc index fcd98fb52d1f0236ec58c6ecd1a4269de7f804b5..2f7476aa38c363bee015ecf502ce68f10fbab9f6 100644 --- a/paddle/fluid/framework/operator.cc +++ b/paddle/fluid/framework/operator.cc @@ -885,12 +885,12 @@ void OperatorWithKernel::RunImpl(const Scope& scope, const platform::Place& place) const { // To reduce the elapsed time of HasAttr, we use bool variable to record the // result of HasAttr. - if (!enable_cache_runtime_context && HasAttr(kEnableCacheRuntimeContext)) - enable_cache_runtime_context = true; - if (!all_kernels_must_compute_runtime_shape && + if (!enable_cache_runtime_context_ && HasAttr(kEnableCacheRuntimeContext)) + enable_cache_runtime_context_ = true; + if (!all_kernels_must_compute_runtime_shape_ && HasAttr(kAllKernelsMustComputeRuntimeShape)) - all_kernels_must_compute_runtime_shape = true; - if (!enable_cache_runtime_context) { + all_kernels_must_compute_runtime_shape_ = true; + if (!enable_cache_runtime_context_) { RuntimeContext ctx(Inputs(), Outputs(), scope); RunImpl(scope, place, &ctx); } else { @@ -931,7 +931,7 @@ void OperatorWithKernel::RunImpl(const Scope& scope, dev_ctx = pool.Get(kernel_type_->place_); } - if (!all_kernels_must_compute_runtime_shape) { + if (!all_kernels_must_compute_runtime_shape_) { RuntimeInferShapeContext infer_shape_ctx(*this, exec_scope, *runtime_ctx); this->InferShape(&infer_shape_ctx); } @@ -981,6 +981,13 @@ void OperatorWithKernel::RunImpl(const Scope& scope, } } } + + // To solve issue #15032, have a discussion with @Luotao for cpu inference, + // do not cache transfer scope, hence in this case delete transfer scope + // after run to avoid memory leak + if (transfer_scope && !run_by_executor_ && !enable_cache_transfer_scope_) { + scope.DeleteScope(transfer_scope); + } } void OperatorWithKernel::ChooseKernel(const RuntimeContext& ctx, @@ -1114,9 +1121,18 @@ Scope* OperatorWithKernel::PrepareData( // If this op is not called by an Executor or ParallelExecutor, it should // called by a NaiveExecutor, the NaiveExecutor will cache the scopes and // variables, that behavior a lot different. - if (!run_by_executor_) { + // + // To solve issue #15032, have a discussion with @Luotao for cpu + // inference, for all cpu kernels cases without GPU participation, here + // not do transfer scope caching, and cpu inference performance is not + // impacted by test. + enable_cache_transfer_scope_ = false; + if (!run_by_executor_ && + (platform::is_gpu_place(kernel_type_for_var.place_) || + platform::is_gpu_place(expected_kernel_key.place_))) { new_scope = TryCreateTransferScope(kernel_type_for_var, expected_kernel_key, &scope); + enable_cache_transfer_scope_ = true; } if (!new_scope) { new_scope = &scope.NewScope(); @@ -1125,11 +1141,11 @@ Scope* OperatorWithKernel::PrepareData( // each result of different input will be the same with the first one. // The reason is that if a gpu tensor is the input of a cpu kernel, // we will create a new cpu tensor in new scope. - // However, if enable_cache_runtime_context, we get the cpu tensor each + // However, if enable_cache_runtime_context_, we get the cpu tensor each // time, not the gpu tensor. // Thus, we set pre_scope_ = nullptr to trigger `new RuntimeContext()` in // RunImpl(). - if (enable_cache_runtime_context) { + if (enable_cache_runtime_context_) { pre_scope_ = nullptr; } diff --git a/paddle/fluid/framework/operator.h b/paddle/fluid/framework/operator.h index 8e158e93063cb7620440b0af8433c0baa02eab22..07e7abd5b29abde1473d26e5aea2719658b65838 100644 --- a/paddle/fluid/framework/operator.h +++ b/paddle/fluid/framework/operator.h @@ -499,9 +499,10 @@ class OperatorWithKernel : public OperatorBase { mutable std::unique_ptr kernel_func_; mutable std::unique_ptr runtime_ctx_; mutable const Scope* pre_scope_ = nullptr; - mutable bool enable_cache_runtime_context = false; - mutable bool all_kernels_must_compute_runtime_shape = false; + mutable bool enable_cache_runtime_context_ = false; + mutable bool all_kernels_must_compute_runtime_shape_ = false; mutable std::mutex cache_update_mutex_; + mutable bool enable_cache_transfer_scope_ = false; }; extern bool OpSupportGPU(const std::string& op_type); diff --git a/paddle/fluid/memory/allocation/allocator_facade.cc b/paddle/fluid/memory/allocation/allocator_facade.cc index 440b2475f1631ce5b0a1018ccd13849cc2568cd5..7de32094a07b95c3ec656892fa6adeacfbab8d0f 100644 --- a/paddle/fluid/memory/allocation/allocator_facade.cc +++ b/paddle/fluid/memory/allocation/allocator_facade.cc @@ -295,7 +295,9 @@ class AllocatorFacadePrivate { // Pimpl. Make interface clean. AllocatorFacade::AllocatorFacade() : m_(new AllocatorFacadePrivate()) {} -AllocatorFacade::~AllocatorFacade() { delete m_; } +// delete m_ may cause core dump when the destructor of python in conflict with +// cpp. +AllocatorFacade::~AllocatorFacade() {} AllocatorFacade& AllocatorFacade::Instance() { static AllocatorFacade instance; diff --git a/paddle/fluid/pybind/pybind_boost_headers.h b/paddle/fluid/pybind/pybind_boost_headers.h index 70c3136d095fbdcf27d6fec0b0b17140a3ee82ee..3eb4db175a745c8ea7a3afaff919e4f21d430a8b 100644 --- a/paddle/fluid/pybind/pybind_boost_headers.h +++ b/paddle/fluid/pybind/pybind_boost_headers.h @@ -77,6 +77,15 @@ struct paddle_variant_caster> { } } + if (std::is_same::value) { + auto caster_int64 = make_caster(); + if (caster_int64.load(src, convert)) { + VLOG(4) << "this value are float and int64 satisfy simula."; + value = cast_op(caster_int64); + return true; + } + } + value = cast_op(caster); return true; } diff --git a/python/paddle/fluid/tests/unittests/test_fill_constant_op.py b/python/paddle/fluid/tests/unittests/test_fill_constant_op.py index fd59c5bb7cff5dd33fae284ba3efe04e667ed75a..e22bd09ed06a5dc2385006498a7794a70c776de8 100644 --- a/python/paddle/fluid/tests/unittests/test_fill_constant_op.py +++ b/python/paddle/fluid/tests/unittests/test_fill_constant_op.py @@ -50,6 +50,34 @@ class TestFillConstantOp2(OpTest): self.check_output() +class TestFillConstantOp3(OpTest): + def setUp(self): + '''Test fill_constant op with specified int64 value + ''' + self.op_type = "fill_constant" + + self.inputs = {} + self.attrs = {'shape': [123, 92], 'value': 10000000000} + self.outputs = {'Out': np.full((123, 92), 10000000000)} + + def test_check_output(self): + self.check_output() + + +class TestFillConstantOp4(OpTest): + def setUp(self): + '''Test fill_constant op with specified int value + ''' + self.op_type = "fill_constant" + + self.inputs = {} + self.attrs = {'shape': [123, 92], 'value': 3} + self.outputs = {'Out': np.full((123, 92), 3)} + + def test_check_output(self): + self.check_output() + + class TestFillConstantOpWithSelectedRows(OpTest): def check_with_place(self, place): scope = core.Scope()