diff --git a/CMakeLists.txt b/CMakeLists.txt index 8e7ffe72b5fb846fb55ab8dc4809d87a40cfe06c..6bb0e5f51f4bee20905016579a99715859ab37c5 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -24,6 +24,8 @@ message(STATUS "CXX compiler: ${CMAKE_CXX_COMPILER}, version: " "${CMAKE_CXX_COMPILER_ID} ${CMAKE_CXX_COMPILER_VERSION}") message(STATUS "C compiler: ${CMAKE_C_COMPILER}, version: " "${CMAKE_C_COMPILER_ID} ${CMAKE_C_COMPILER_VERSION}") +message(STATUS "AR tools: ${CMAKE_AR}") + if(WIN32) set(CMAKE_SUPPRESS_REGENERATION ON) set(CMAKE_STATIC_LIBRARY_PREFIX lib) diff --git a/paddle/fluid/framework/grad_op_desc_maker.h b/paddle/fluid/framework/grad_op_desc_maker.h index 9bccb1a32bf63b30351ef4428594691b0eef0b6a..f2f4c53eea2150b68f15d2a655809d94611b2034 100644 --- a/paddle/fluid/framework/grad_op_desc_maker.h +++ b/paddle/fluid/framework/grad_op_desc_maker.h @@ -14,7 +14,9 @@ limitations under the License. */ #pragma once #include +#include #include +#include #include #include #include "paddle/fluid/framework/op_desc.h" @@ -55,11 +57,11 @@ class GradOpDescMakerBase { std::back_inserter(ret_val), [this](const std::string& fwd_var_name) -> std::string { auto g_name = GradVarName(fwd_var_name); - if (no_grad_set_.count(g_name)) { - return kEmptyVarName; - } else { + if (no_grad_set_.empty() || !no_grad_set_.count(g_name)) { (*this->grad_to_var_)[g_name] = fwd_var_name; return g_name; + } else { + return kEmptyVarName; } }); if (!drop_empty_grad) { diff --git a/paddle/fluid/imperative/CMakeLists.txt b/paddle/fluid/imperative/CMakeLists.txt index ec8dedd605235a2d197e6a313bd589d5b9520cdf..0d116a6495477ca69c10c130e63247a4f6c03b23 100644 --- a/paddle/fluid/imperative/CMakeLists.txt +++ b/paddle/fluid/imperative/CMakeLists.txt @@ -2,4 +2,5 @@ if(WITH_PYTHON) cc_library(layer SRCS layer.cc DEPS proto_desc operator device_context blas pybind) cc_library(tracer SRCS tracer.cc DEPS proto_desc device_context pybind) cc_library(engine SRCS engine.cc) +cc_library(imperative_profiler SRCS profiler.cc) endif() diff --git a/paddle/fluid/imperative/layer.cc b/paddle/fluid/imperative/layer.cc index 5530823b90f6580692456253b0eb9d0af4e3240b..d7f7967f72596001018f91ecfcc985c1d0920ec2 100644 --- a/paddle/fluid/imperative/layer.cc +++ b/paddle/fluid/imperative/layer.cc @@ -239,7 +239,7 @@ std::map> OpBase::ApplyGrad() { VLOG(3) << "apply grad op " << grad_op_desc->Type(); // Allocate tmp grad output variable - for (auto it : grad_output_variable_map) { + for (const auto& it : grad_output_variable_map) { auto& outputs = tmp_grad_outputs[k][it.first]; outputs.reserve(it.second.size()); for (size_t i = 0; i < it.second.size(); ++i) { @@ -273,9 +273,9 @@ std::map> OpBase::ApplyGrad() { // Add tmp grad outputs to original grad vars for (size_t k = 0; k < grad_output_vars_.size(); ++k) { - for (auto it : grad_output_vars_[k]) { + for (const auto& it : grad_output_vars_[k]) { auto& outputs = tmp_grad_outputs[k][it.first]; - auto& origin_outputs = it.second; + const auto& origin_outputs = it.second; PADDLE_ENFORCE_EQ(outputs.size(), origin_outputs.size()); for (size_t i = 0; i < outputs.size(); ++i) { diff --git a/paddle/fluid/imperative/layer.h b/paddle/fluid/imperative/layer.h index 618a5b7a03295ce679dc6a88e0eac57069e78b8b..27cb1c84f568867506afc0ca5ce6eb1b28447efe 100644 --- a/paddle/fluid/imperative/layer.h +++ b/paddle/fluid/imperative/layer.h @@ -294,17 +294,23 @@ class PYBIND11_HIDDEN OpBase { void InvokeBackwardHooks(); - void TrackPreOp(const VarBase* inp_var, const std::string& inp_name) { - if (inp_var->PreOp() && !inp_var->IsStopGradient()) { - VLOG(3) << "add pre op " << inp_var->PreOp()->Type() << " in slot " - << inp_name; - pre_ops_[inp_name].push_back(inp_var->PreOp()); - pre_ops_out_idx_[inp_name].push_back(inp_var->PreOpOutIdx()); - } else { - VLOG(3) << "no pre op in slot " << inp_name - << " input var stop_gradient: " << inp_var->IsStopGradient(); - pre_ops_[inp_name].push_back(nullptr); - // pre_ops_out_idx_[inp_name].push_back(-1); + void TrackPreOp(const std::string& inp_name, + const std::vector& inputs) { + auto& pre_ops_list = pre_ops_[inp_name]; + pre_ops_list.reserve(inputs.size()); + auto& pre_ops_out_idx_list = pre_ops_out_idx_[inp_name]; + for (VarBase* inp_var : inputs) { + if (inp_var->PreOp() && !inp_var->IsStopGradient()) { + VLOG(3) << "add pre op " << inp_var->PreOp()->Type() << " in slot " + << inp_name; + pre_ops_list.emplace_back(inp_var->PreOp()); + pre_ops_out_idx_list.push_back(inp_var->PreOpOutIdx()); + } else { + VLOG(3) << "no pre op in slot " << inp_name + << " input var stop_gradient: " << inp_var->IsStopGradient(); + pre_ops_list.emplace_back(nullptr); + // pre_ops_out_idx_list.push_back(-1); + } } } diff --git a/paddle/fluid/imperative/profiler.cc b/paddle/fluid/imperative/profiler.cc new file mode 100644 index 0000000000000000000000000000000000000000..34570b3a60ec83fdeb1577789271942125b16eb1 --- /dev/null +++ b/paddle/fluid/imperative/profiler.cc @@ -0,0 +1,62 @@ +// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "paddle/fluid/imperative/profiler.h" + +#ifdef WITH_GPERFTOOLS +#include "gperftools/profiler.h" +#endif +#include +#include +#include // NOLINT +#include // NOLINT + +DEFINE_string( + tracer_profile_fname, "xxgperf", + "Profiler filename for imperative tracer, which generated by gperftools." + "Only valid when compiled `WITH_PROFILER=ON`. Empty if disable."); + +namespace paddle { +namespace imperative { + +static std::once_flag gTracerProfileOnce; +#ifdef WITH_GPERFTOOLS +static bool gTracerProfilerStarted = false; +#endif + +void StartProfile() { + if (!FLAGS_tracer_profile_fname.empty()) { + std::call_once(gTracerProfileOnce, [] { +#ifdef WITH_GPERFTOOLS + ProfilerStart(FLAGS_tracer_profile_fname.c_str()); + gTracerProfilerStarted = true; +#else + LOG(WARNING) << "Paddle is not compiled with gperftools. " + "FLAGS_tracer_profile_fname will be ignored"; +#endif + }); + } +} + +void StopProfile() { +#ifdef WITH_GPERFTOOLS + ProfilerFlush(); +#else + LOG(WARNING) << "Paddle is not compiled with gperftools. " + "FLAGS_tracer_profile_fname will be ignored"; +#endif +} + +} // namespace imperative +} // namespace paddle diff --git a/paddle/fluid/imperative/profiler.h b/paddle/fluid/imperative/profiler.h new file mode 100644 index 0000000000000000000000000000000000000000..d52aeed4e81755cfa285616d7b0a7e79061c6af8 --- /dev/null +++ b/paddle/fluid/imperative/profiler.h @@ -0,0 +1,25 @@ +// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +namespace paddle { +namespace imperative { + +extern void StartProfile(); + +extern void StopProfile(); + +} // namespace imperative +} // namespace paddle diff --git a/paddle/fluid/imperative/tracer.cc b/paddle/fluid/imperative/tracer.cc index 7ee92b4d8c46d8814400dbc02847d701005f3d5b..3a6427aa917abc72c7fbe945193406d294b023b8 100644 --- a/paddle/fluid/imperative/tracer.cc +++ b/paddle/fluid/imperative/tracer.cc @@ -23,23 +23,9 @@ #include "paddle/fluid/platform/device_context.h" #include "paddle/fluid/platform/enforce.h" -#ifdef WITH_GPERFTOOLS -#include "gperftools/profiler.h" -#endif - -DEFINE_string( - tracer_profile_fname, "", - "Profiler filename for imperative tracer, which generated by gperftools." - "Only valid when compiled `WITH_PROFILER=ON`. Empty if disable."); - namespace paddle { namespace imperative { -static std::once_flag gTracerProfileOnce; -#ifdef WITH_GPERFTOOLS -static bool gTracerProfilerStarted = false; -#endif - void CreateGradOp(const framework::OpDesc& op_desc, const std::unordered_set& no_grad_set, const std::vector& grad_sub_block, @@ -145,31 +131,13 @@ framework::VariableNameMap CreateOutputVarNameMap( return result; } -Tracer::Tracer(framework::BlockDesc* root_block) : root_block_(root_block) { - if (!FLAGS_tracer_profile_fname.empty()) { - std::call_once(gTracerProfileOnce, [] { -#ifdef WITH_GPERFTOOLS - ProfilerStart(FLAGS_tracer_profile_fname.c_str()); - gTracerProfilerStarted = true; -#else - LOG(WARNING) << "Paddle is not compiled with gperftools. " - "FLAGS_tracer_profile_fname will be ignored"; -#endif - }); - } -} +Tracer::Tracer(framework::BlockDesc* root_block) : root_block_(root_block) {} std::set Tracer::Trace(OpBase* op, const VarBasePtrMap& inputs, const VarBasePtrMap& outputs, framework::AttributeMap attrs_map, const platform::Place expected_place, const bool stop_gradient) { -#ifdef WITH_GPERFTOOLS - if (gTracerProfilerStarted) { - ProfilerFlush(); - } -#endif - framework::VariableValueMap invars_map; framework::VariableValueMap outvars_map; @@ -184,7 +152,6 @@ std::set Tracer::Trace(OpBase* op, const VarBasePtrMap& inputs, inp->Name()); invars.emplace_back(inp->var_); - op->TrackPreOp(inp, it.first); if (!stop_gradient) { current_vars_map[inp->Name()] = inp; } @@ -192,6 +159,7 @@ std::set Tracer::Trace(OpBase* op, const VarBasePtrMap& inputs, << " inited: " << inp->var_->IsInitialized() << " stop_grad: " << inp->IsStopGradient(); } + op->TrackPreOp(it.first, it.second); } op->output_vars_ = outputs; @@ -319,9 +287,7 @@ std::vector Tracer::PyTrace(OpBase* op, std::vector ret_vars = PyLayer::Apply(op->forward_id_, inputs); - for (VarBase* inp : inputs) { - op->TrackPreOp(inp, PyLayer::kFwdInp); - } + op->TrackPreOp(PyLayer::kFwdInp, inputs); std::vector& outputs = op->output_vars_[PyLayer::kFwdOut]; outputs.reserve(ret_vars.size()); diff --git a/paddle/fluid/inference/CMakeLists.txt b/paddle/fluid/inference/CMakeLists.txt index 762640d6d1ce12dff511fc7149e872efa834036c..d27ef8fe3c33f0b293671a4fdac9e574cb92c806 100644 --- a/paddle/fluid/inference/CMakeLists.txt +++ b/paddle/fluid/inference/CMakeLists.txt @@ -91,5 +91,5 @@ if(WITH_TESTING) add_subdirectory(tests/book) if(WITH_INFERENCE_API_TEST) add_subdirectory(tests/api) - endif() + endif() endif() diff --git a/paddle/fluid/pybind/CMakeLists.txt b/paddle/fluid/pybind/CMakeLists.txt index 4ac5b83c56b114f4e3e4c78710716adc636ebe1d..f1385f57184eceec49b791cf6c89641b098f036a 100644 --- a/paddle/fluid/pybind/CMakeLists.txt +++ b/paddle/fluid/pybind/CMakeLists.txt @@ -1,6 +1,6 @@ set(PYBIND_DEPS pybind python proto_desc memory executor async_executor prune feed_fetch_method pass_builder parallel_executor profiler layer scope_pool - tracer analysis_predictor) + tracer analysis_predictor imperative_profiler) if(WITH_PYTHON) list(APPEND PYBIND_DEPS py_func_op) diff --git a/paddle/fluid/pybind/imperative.cc b/paddle/fluid/pybind/imperative.cc index 6bbda69297a48ce27ce23282c4e08d49ee3cce6c..29c8e6a12940eb5652a0a97bea27c6fed5612001 100644 --- a/paddle/fluid/pybind/imperative.cc +++ b/paddle/fluid/pybind/imperative.cc @@ -42,6 +42,7 @@ void BindTracer(pybind11::module* m) { framework::AttributeMap attrs_map, const platform::CPUPlace expected_place, const bool stop_gradient = false) { + pybind11::gil_scoped_release release; return self.Trace(op, inputs, outputs, attrs_map, expected_place, stop_gradient); }) @@ -52,6 +53,7 @@ void BindTracer(pybind11::module* m) { framework::AttributeMap attrs_map, const platform::CUDAPlace expected_place, const bool stop_gradient = false) { + pybind11::gil_scoped_release release; return self.Trace(op, inputs, outputs, attrs_map, expected_place, stop_gradient); }) diff --git a/paddle/fluid/pybind/pybind.cc b/paddle/fluid/pybind/pybind.cc index 5a753d0a780996727791469cd3aea27009b35e33..7ea3e1109638beedb7490bc64c9ac08c6ac09713 100644 --- a/paddle/fluid/pybind/pybind.cc +++ b/paddle/fluid/pybind/pybind.cc @@ -36,6 +36,7 @@ limitations under the License. */ #include "paddle/fluid/framework/selected_rows.h" #include "paddle/fluid/framework/version.h" #include "paddle/fluid/imperative/layer.h" +#include "paddle/fluid/imperative/profiler.h" #include "paddle/fluid/memory/allocation/allocator_strategy.h" #include "paddle/fluid/memory/allocation/legacy_allocator.h" #include "paddle/fluid/operators/activation_op.h" @@ -156,6 +157,11 @@ PYBIND11_MODULE(core, m) { m.def("print_mem_usage", []() { return memory::allocation::GPUMemMonitor.PrintMemUsage(); }); + m.def("start_imperative_gperf_profiler", + []() { imperative::StartProfile(); }); + + m.def("stop_imperative_gperf_profiler", []() { imperative::StopProfile(); }); + py::class_(m, "VarBase", R"DOC()DOC") .def( py::init