Remove unnessesary code

test=develop

Remove unnessesary code
test=develop
0a4b6fc0 · minqiyang · 53619a79 · 0a4b6fc0 · 53619a79 · 0a4b6fc0
13 changed file
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -294,7 +294,7 @@ if(WITH_PSLIB)
    list(APPEND EXTERNAL_LIBS pslib_brpc)
    list(APPEND EXTERNAL_LIBS libmct)
 endif(WITH_PSLIB)
 if(WITH_AMD_GPU)
    find_package(HIP)
    include(hip)

--- a/cmake/external/robin_map.cmake
+++ b/cmake/external/robin_map.cmake
-include(ExternalProject)
-set(ROBIN_MAP_SOURCE_DIR ${THIRD_PARTY_PATH}/robin_map)
-set(ROBIN_MAP_INCLUDE_DIR ${ROBIN_MAP_SOURCE_DIR}/src/extern_robin_map/include)
-include_directories(${ROBIN_MAP_INCLUDE_DIR})
-ExternalProject_Add(
-  extern_robin_map
-  ${EXTERNAL_PROJECT_LOG_ARGS}
-  GIT_REPOSITORY "https://github.com/Tessil/robin-map.git"
-  GIT_TAG        "v0.5.0"
-  PREFIX         ${ROBIN_MAP_SOURCE_DIR}
-  UPDATE_COMMAND ""
-  CONFIGURE_COMMAND ""
-  BUILD_COMMAND     ""
-  INSTALL_COMMAND   ""
-  TEST_COMMAND      ""
-)
-if(${CMAKE_VERSION} VERSION_LESS "3.3.0")
-  set(dummyfile ${CMAKE_CURRENT_BINARY_DIR}/robin_map_dummy.c)
-  file(WRITE ${dummyfile} "const char *dummy = \"${dummyfile}\";")
-  add_library(robin_map STATIC ${dummyfile})
-else()
-  add_library(robin_map INTERFACE)
-endif()
-add_dependencies(robin_map extern_robin_map)
-LIST(APPEND externl_project_dependencies robin_map)
--- a/paddle/fluid/framework/CMakeLists.txt
+++ b/paddle/fluid/framework/CMakeLists.txt
@@ -83,7 +83,7 @@ cc_test(variable_test SRCS variable_test.cc)
 cc_library(threadpool SRCS threadpool.cc DEPS enforce)
 cc_test(threadpool_test SRCS threadpool_test.cc DEPS threadpool)
-cc_library(scope SRCS scope.cc DEPS glog threadpool xxhash)
+cc_library(scope SRCS scope.cc DEPS glog threadpool)
 cc_test(scope_test SRCS scope_test.cc DEPS scope)
 cc_library(data_device_transform SRCS data_device_transform.cc DEPS tensor)

--- a/paddle/fluid/framework/details/execution_strategy.h
+++ b/paddle/fluid/framework/details/execution_strategy.h
@@ -25,7 +25,7 @@ struct ExecutionStrategy {
  size_t num_threads_{0};
  bool use_cuda_{true};
  bool allow_op_delay_{false};
-  size_t num_iteration_per_drop_scope_{1};
+  size_t num_iteration_per_drop_scope_{100};
  ExecutorType type_{kDefault};
  bool dry_run_{false};
 };

--- a/paddle/fluid/framework/details/scope_buffered_ssa_graph_executor.cc
+++ b/paddle/fluid/framework/details/scope_buffered_ssa_graph_executor.cc
@@ -64,24 +64,21 @@ FeedFetchList ScopeBufferedSSAGraphExecutor::Run(
  }
  platform::RecordEvent e("ScopeBufferedSSAGraphExecutorAfterRun", nullptr);
-  ++drop_scope_counter_;
+  drop_scope_counter_ += 1;
-  if (!fetch_tensors.empty()) {
+  if (!fetch_tensors.empty() ||
+      drop_scope_counter_ == strategy_.num_iteration_per_drop_scope_) {
+    drop_scope_counter_ = 0;
    // Wait All computational streams
    for (auto p : places_) {
      platform::DeviceContextPool::Instance().Get(p)->Wait();
    }
-  }
-  if (drop_scope_counter_ == strategy_.num_iteration_per_drop_scope_) {
-    drop_scope_counter_ = 0;
    for (auto &scope : local_scopes_) {
      auto &local_scope =
          *scope->Var(details::kLocalExecScopeName)->GetMutable<Scope *>();
      scope->DeleteScope(local_scope);
    }
  }
  if (eptr) {
    std::rethrow_exception(eptr);
  } else {

--- a/paddle/fluid/framework/ir/graph.cc
+++ b/paddle/fluid/framework/ir/graph.cc
@@ -20,10 +20,6 @@ limitations under the License. */
 #include "paddle/fluid/framework/program_desc.h"
 #include "paddle/fluid/framework/var_desc.h"
-DEFINE_bool(enforce_when_check_program, true,
-            "Checking whether the program is correct or not. We will log "
-            "errors rather than throwing exceptions if this flag turned off");
 namespace paddle {
 namespace framework {
 namespace ir {
@@ -48,56 +44,27 @@ void CheckProgram(const ProgramDesc &program) {
        break;
      case _INT(OpRole::kBackward):
      case _INT(OpRole::kBackward) | _INT(OpRole::kLoss):
-        if (!FLAGS_enforce_when_check_program) {
+        PADDLE_ENFORCE(
-          PADDLE_ENFORCE(
+            visit.find(_INT(OpRole::kOptimize)) == visit.end(),
-              visit.find(_INT(OpRole::kOptimize)) == visit.end(),
+            "Cannot add backward operator %s after optimize operator.",
-              "Cannot add backward operator %s after optimize operator.",
+            op->Type());
-              op->Type());
-        } else {
-          if (visit.find(_INT(OpRole::kOptimize)) != visit.end()) {
-            LOG(ERROR)
-                << "Cannot add backward operator %s after optimize operator."
-                << op->Type();
-          }
-        }
        break;
      case _INT(OpRole::kForward) | _INT(OpRole::kLoss):
-        if (!FLAGS_enforce_when_check_program) {
+        PADDLE_ENFORCE(visit.find(_INT(OpRole::kBackward) |
-          PADDLE_ENFORCE(visit.find(_INT(OpRole::kBackward) |
+                                  _INT(OpRole::kLoss)) == visit.end(),
-                                    _INT(OpRole::kLoss)) == visit.end(),
+                       "Cannot add backward|loss operator before "
-                         "Cannot add backward|loss operator before "
+                       "forward|loss operator %s.",
-                         "forward|loss operator %s.",
+                       op->Type());
-                         op->Type());
+        PADDLE_ENFORCE(
-          PADDLE_ENFORCE(
+            visit.find(_INT(OpRole::kOptimize)) == visit.end(),
-              visit.find(_INT(OpRole::kOptimize)) == visit.end(),
+            "Cannot add forward|loss operator %s after optimize operator.",
-              "Cannot add forward|loss operator %s after optimize operator.",
+            op->Type());
-              op->Type());
-        } else {
-          if (visit.find(_INT(OpRole::kBackward) | _INT(OpRole::kLoss)) !=
-              visit.end()) {
-            LOG(ERROR) << "Cannot add backward|loss operator before "
-                       << "forward|loss operator %s." << op->Type();
-          }
-          if (visit.find(_INT(OpRole::kOptimize)) != visit.end()) {
-            LOG(ERROR) << "Cannot add forward|loss operator %s after optimize "
-                          "operator."
-                       << op->Type();
-          }
-        }
        break;
      case _INT(OpRole::kOptimize):
      case _INT(OpRole::kOptimize) | _INT(OpRole::kLRSched):
-        if (!FLAGS_enforce_when_check_program) {
+        PADDLE_ENFORCE(visit.find(_INT(OpRole::kBackward)) != visit.end(),
-          PADDLE_ENFORCE(visit.find(_INT(OpRole::kBackward)) != visit.end(),
+                       "Optimize operators %s must follow backward operator.",
-                         "Optimize operators %s must follow backward operator.",
+                       op->Type());
-                         op->Type());
-        } else {
-          if (visit.find(_INT(OpRole::kBackward)) == visit.end()) {
-            LOG(ERROR) << "Optimize operators %s must follow backward operator."
-                       << op->Type();
-          }
-        }
        break;
      case _INT(OpRole::kLRSched):
      case _INT(OpRole::kDist):

--- a/paddle/fluid/framework/rw_lock.h
+++ b/paddle/fluid/framework/rw_lock.h
@@ -16,9 +16,7 @@ limitations under the License. */
 #if !defined(_WIN32)
 #include <pthread.h>
-#else
+#endif  // !_WIN32
-#include <mutex>  // NOLINT
-#endif            // !_WIN32
 #include "paddle/fluid/platform/enforce.h"
@@ -31,17 +29,17 @@ struct RWLock {
  ~RWLock() { pthread_rwlock_destroy(&lock_); }
-  inline void RDLock() {
+  void RDLock() {
    PADDLE_ENFORCE_EQ(pthread_rwlock_rdlock(&lock_), 0,
                      "acquire read lock failed");
  }
-  inline void WRLock() {
+  void WRLock() {
    PADDLE_ENFORCE_EQ(pthread_rwlock_wrlock(&lock_), 0,
                      "acquire write lock failed");
  }
-  inline void UNLock() {
+  void UNLock() {
    PADDLE_ENFORCE_EQ(pthread_rwlock_unlock(&lock_), 0, "unlock failed");
  }
@@ -53,44 +51,81 @@ struct RWLock {
 // https://stackoverflow.com/questions/7125250/making-pthread-rwlock-wrlock-recursive
 // In windows, rw_lock seems like a hack. Use empty object and do nothing.
 struct RWLock {
-  // FIXME(minqiyang): use mutex here to do fake lock
+  void RDLock() {}
-  inline void RDLock() { mutex_.lock(); }
+  void WRLock() {}
+  void UNLock() {}
-  inline void WRLock() { mutex_.lock(); }
-  inline void UNLock() { mutex_.unlock(); }
- private:
-  std::mutex mutex_;
 };
 #endif
-class AutoWRLock {
+class RWLockGuard {
 public:
-  explicit AutoWRLock(RWLock* rw_lock) : lock_(rw_lock) { Lock(); }
+  enum Status { kUnLock, kWRLock, kRDLock };
-  inline void Lock() { lock_->WRLock(); }
+  RWLockGuard(RWLock* rw_lock, Status init_status)
+      : lock_(rw_lock), status_(Status::kUnLock) {
-  inline void UnLock() { lock_->UNLock(); }
+    switch (init_status) {
+      case Status::kRDLock: {
-  ~AutoWRLock() { UnLock(); }
+        RDLock();
+        break;
- private:
+      }
-  RWLock* lock_;
+      case Status::kWRLock: {
-};
+        WRLock();
+        break;
+      }
+      case Status::kUnLock: {
+        break;
+      }
+    }
+  }
-class AutoRDLock {
+  void WRLock() {
- public:
+    switch (status_) {
-  explicit AutoRDLock(RWLock* rw_lock) : lock_(rw_lock) { Lock(); }
+      case Status::kUnLock: {
+        lock_->WRLock();
+        status_ = Status::kWRLock;
+        break;
+      }
+      case Status::kWRLock: {
+        break;
+      }
+      case Status::kRDLock: {
+        PADDLE_THROW(
+            "Please unlock read lock first before invoking write lock.");
+        break;
+      }
+    }
+  }
-  inline void Lock() { lock_->RDLock(); }
+  void RDLock() {
+    switch (status_) {
+      case Status::kUnLock: {
+        lock_->RDLock();
+        status_ = Status::kRDLock;
+        break;
+      }
+      case Status::kRDLock: {
+        break;
+      }
+      case Status::kWRLock: {
+        PADDLE_THROW(
+            "Please unlock write lock first before invoking read lock.");
+        break;
+      }
+    }
+  }
-  inline void UnLock() { lock_->UNLock(); }
+  void UnLock() {
+    if (status_ != Status::kUnLock) {
+      lock_->UNLock();
+      status_ = Status::kUnLock;
+    }
+  }
-  ~AutoRDLock() { UnLock(); }
+  ~RWLockGuard() { UnLock(); }
 private:
  RWLock* lock_;
+  Status status_;
 };
 }  // namespace framework

--- a/paddle/fluid/framework/scope.cc
+++ b/paddle/fluid/framework/scope.cc
@@ -47,15 +47,9 @@ DEFINE_bool(fast_eager_deletion_mode, false,
 // the mutex will cause serious performance issue.
 // So the mutex is disabled when `ON_INFER`.
 #ifdef PADDLE_ON_INFERENCE
-#define SCOPE_KIDS_READER_LOCK
+#define SCOPE_LOCK_GUARD
-#define SCOPE_KIDS_WRITER_LOCK
-#define SCOPE_VARS_READER_LOCK
-#define SCOPE_VARS_WRITER_LOCK
 #else
-#define SCOPE_KIDS_READER_LOCK AutoRDLock auto_lock(&kids_lock_);
+#define SCOPE_LOCK_GUARD std::lock_guard<std::mutex> lock(mutex_);
-#define SCOPE_KIDS_WRITER_LOCK AutoWRLock auto_lock(&kids_lock_);
-#define SCOPE_VARS_READER_LOCK AutoRDLock auto_lock(&vars_lock_);
-#define SCOPE_VARS_WRITER_LOCK AutoWRLock auto_lock(&vars_lock_);
 #endif
 namespace paddle {
@@ -73,69 +67,64 @@ bool IsFastEagerDeletionModeEnabled() { return FLAGS_fast_eager_deletion_mode; }
 Scope::~Scope() { DropKids(); }
 Scope& Scope::NewScope() const {
-  Scope* child = new Scope(this);
+  SCOPE_LOCK_GUARD
-  {
+  kids_.push_back(new Scope(this));
-    SCOPE_KIDS_WRITER_LOCK
+  return *kids_.back();
-    kids_.push_back(child);
-  }
-  return *child;
 }
 Variable* Scope::Var(const std::string& name) {
-  SCOPE_VARS_WRITER_LOCK
+  SCOPE_LOCK_GUARD
  return VarInternal(name);
 }
 Variable* Scope::Var(std::string* name) {
+  SCOPE_LOCK_GUARD
  auto new_name = string::Sprintf("%p.%d", this, vars_.size());
  if (name != nullptr) {
    *name = new_name;
  }
-  SCOPE_VARS_WRITER_LOCK
  return VarInternal(new_name);
 }
 Variable* Scope::FindVar(const std::string& name) const {
-  SCOPE_VARS_READER_LOCK
+  SCOPE_LOCK_GUARD
  return FindVarInternal(name);
 }
 Variable* Scope::FindLocalVar(const std::string& name) const {
-  SCOPE_VARS_READER_LOCK
+  SCOPE_LOCK_GUARD
  return FindVarLocally(name);
 }
 const Scope* Scope::FindScope(const Variable* var) const {
-  SCOPE_VARS_READER_LOCK
+  SCOPE_LOCK_GUARD
  return FindScopeInternal(var);
 }
 void Scope::DropKids() {
-  SCOPE_KIDS_WRITER_LOCK
+  SCOPE_LOCK_GUARD
  for (Scope* s : kids_) delete s;
  kids_.clear();
 }
 bool Scope::HasKid(const Scope* scope) const {
-  SCOPE_KIDS_READER_LOCK
+  SCOPE_LOCK_GUARD
  auto it = std::find(this->kids_.begin(), this->kids_.end(), scope);
  return it != this->kids_.end();
 }
 std::vector<std::string> Scope::LocalVarNames() const {
+  SCOPE_LOCK_GUARD
  std::vector<std::string> known_vars;
-  {
+  known_vars.reserve(this->vars_.size());
-    SCOPE_VARS_READER_LOCK
+  for (auto& p : vars_) {
-    known_vars.reserve(this->vars_.size());
+    known_vars.emplace_back(p.first);
-    for (auto& p : vars_) {
-      known_vars.emplace_back(p.first);
-    }
  }
  return known_vars;
 }
 void Scope::DeleteScope(Scope* scope) const {
-  SCOPE_KIDS_WRITER_LOCK
+  SCOPE_LOCK_GUARD
  auto it = std::find(this->kids_.begin(), this->kids_.end(), scope);
  PADDLE_ENFORCE(it != this->kids_.end(), "%p Cannot find %p as kid scope",
                 this, scope);
@@ -149,8 +138,8 @@ void Scope::DeleteScope(Scope* scope) const {
 }
 void Scope::EraseVars(const std::vector<std::string>& var_names) {
+  SCOPE_LOCK_GUARD
  std::set<std::string> var_set(var_names.begin(), var_names.end());
-  SCOPE_VARS_WRITER_LOCK
  for (auto it = vars_.begin(); it != vars_.end();) {
    if (var_set.find(it->first) != var_set.end()) {
      it = vars_.erase(it);
@@ -162,12 +151,12 @@ void Scope::EraseVars(const std::vector<std::string>& var_names) {
 void Scope::Rename(const std::string& origin_name,
                   const std::string& new_name) const {
-  SCOPE_VARS_WRITER_LOCK
+  SCOPE_LOCK_GUARD
  RenameInternal(origin_name, new_name);
 }
 std::string Scope::Rename(const std::string& origin_name) const {
-  SCOPE_VARS_WRITER_LOCK
+  SCOPE_LOCK_GUARD
  auto new_name = string::Sprintf("%p.%d", this, vars_.size());
  RenameInternal(origin_name, new_name);
  return new_name;

--- a/paddle/fluid/framework/scope.h
+++ b/paddle/fluid/framework/scope.h
@@ -14,19 +14,12 @@ limitations under the License. */
 #pragma once
-extern "C" {
-#include <xxhash.h>
-}
-#include <functional>
 #include <list>
-#include <memory>
+#include <mutex>  // NOLINT
 #include <string>
 #include <unordered_map>
-#include <utility>
 #include <vector>
-#include "paddle/fluid/framework/rw_lock.h"
 #include "paddle/fluid/framework/variable.h"
 #include "paddle/fluid/platform/macros.h"
@@ -38,14 +31,6 @@ bool IsFastEagerDeletionModeEnabled();
 class Scope;
-namespace inner {
-struct KeyHasher {
-  std::size_t operator()(const std::string& key) const {
-    return XXH32(key.c_str(), key.size(), 1);
-  }
-};
-}  // namespace inner
 /**
 * @brief Scope that manage all variables.
 *
@@ -110,14 +95,7 @@ class Scope {
  std::string Rename(const std::string& origin_name) const;
 protected:
-  mutable std::unordered_map<std::string, std::unique_ptr<Variable>,
+  mutable std::unordered_map<std::string, std::unique_ptr<Variable>> vars_;
-                             inner::KeyHasher>
-      vars_;
-  // mutable tsl::robin_map<
-  // std::string, std::unique_ptr<Variable>, std::hash<std::string>,
-  // std::equal_to<std::string>,
-  // std::allocator<std::pair<std::string, std::unique_ptr<Variable>>>, true>
-  // vars_;
 private:
  // Call Scope::NewScope for a sub-scope.
@@ -146,8 +124,7 @@ class Scope {
  DISABLE_COPY_AND_ASSIGN(Scope);
 private:
-  mutable RWLock kids_lock_;
+  mutable std::mutex mutex_;
-  mutable RWLock vars_lock_;
 };
 // Generate some debug string about the inherience structure of scope, quite

--- a/paddle/fluid/framework/spin_lock.h
+++ b/paddle/fluid/framework/spin_lock.h
-/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-http://www.apache.org/licenses/LICENSE-2.0
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License. */
-#pragma once
-#if !defined(_WIN32)
-#include <pthread.h>
-#else
-#include <mutex>  // NOLINT
-#endif            // !_WIN32
-#include "paddle/fluid/platform/enforce.h"
-namespace paddle {
-namespace framework {
-#if !defined(_WIN32)
-struct SpinLock {
-  SpinLock() { pthread_spin_init(&lock_, PTHREAD_PROCESS_PRIVATE); }
-  ~SpinLock() { pthread_spin_destroy(&lock_); }
-  void Lock() {
-    PADDLE_ENFORCE_EQ(pthread_spin_lock(&lock_), 0, "acquire spin lock failed");
-  }
-  void Unlock() {
-    PADDLE_ENFORCE_EQ(pthread_spin_unlock(&lock_), 0,
-                      "release spin lock failed");
-  }
- private:
-  pthread_spinlock_t lock_;
-};
-#else
-// FIXME(minqiyang): use mutex here to do fake spin lock
-struct SpinLock {
-  void Lock() { mutex_.lock(); }
-  void Unlock() { mutex_.lock(); }
- private:
-  std::mutex mutex_;
-};
-#endif
-class AutoSpinLock {
- public:
-  explicit SpinLockGuard(SpinLock* spin_lock) : lock_(spin_lock) {
-    lock_->Lock();
-  }
-  ~SpinLockGuard() { lock_->Unlock(); }
- private:
-  SpinLock* lock_;
-};
-}  // namespace framework
-}  // namespace paddle
--- a/paddle/fluid/operators/math/concat_and_split.cu
+++ b/paddle/fluid/operators/math/concat_and_split.cu
@@ -180,7 +180,7 @@ class ConcatFunctor<platform::CUDADeviceContext, T> {
    }
    // Wait() must be called because `inputs_data` may be destructed before
    // kernel ends
-    /* context.Wait(); */
+    context.Wait();
  }
 };
@@ -258,7 +258,7 @@ class SplitFunctor<platform::CUDADeviceContext, T> {
    }
    // Wait() must be called because `outputs_data` may be destructed before
    // kernel ends
-    /* context.Wait(); */
+    context.Wait();
  }
 };

--- a/paddle/fluid/pybind/pybind.cc
+++ b/paddle/fluid/pybind/pybind.cc
@@ -822,7 +822,7 @@ All parameter, weight, gradient are variables in Paddle.
          R"DOC(The type is INT, num_iteration_per_drop_scope indicates how
                many iterations to clean up the temp variables which
                is generated during execution. It may make the execution faster,
-                because the temp variable's shape maybe the same between two iterations. Default 1.
+                because the temp variable's shape maybe the same between two iterations. Default 100.
                NOTES:
                    1. If you fetch data when calling the 'run', the ParallelExecutor

--- a/python/paddle/fluid/profiler.py
+++ b/python/paddle/fluid/profiler.py
@@ -92,7 +92,6 @@ def cuda_profiler(output_file, output_mode=None, config=None):
    config_file = 'nvprof_config_file'
    with open(config_file, 'wb') as fp:
        fp.writelines([six.b("%s\n" % item) for item in config])
-    #Comment this for nvprof
    core.nvprof_init(output_file, output_mode, config_file)
    # Enables profiler collection by the active CUDA profiling tool.
    core.nvprof_start()