Remove unnessesary code

test=develop

Remove unnessesary code
test=develop
0a4b6fc0 · minqiyang · 53619a79 · 0a4b6fc0 · 53619a79 · 0a4b6fc0
13 changed file
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -294,7 +294,7 @@ if(WITH_PSLIB)
    list(APPEND EXTERNAL_LIBS pslib_brpc)
    list(APPEND EXTERNAL_LIBS libmct)
 endif(WITH_PSLIB)
-
+    
 if(WITH_AMD_GPU)
    find_package(HIP)
    include(hip)

--- a/cmake/external/robin_map.cmake
+++ b/cmake/external/robin_map.cmake
-include(ExternalProject)
-
-set(ROBIN_MAP_SOURCE_DIR ${THIRD_PARTY_PATH}/robin_map)
-set(ROBIN_MAP_INCLUDE_DIR ${ROBIN_MAP_SOURCE_DIR}/src/extern_robin_map/include)
-
-include_directories(${ROBIN_MAP_INCLUDE_DIR})
-
-ExternalProject_Add(
-  extern_robin_map
-  ${EXTERNAL_PROJECT_LOG_ARGS}
-  GIT_REPOSITORY "https://github.com/Tessil/robin-map.git"
-  GIT_TAG        "v0.5.0"
-  PREFIX         ${ROBIN_MAP_SOURCE_DIR}
-  UPDATE_COMMAND ""
-  CONFIGURE_COMMAND ""
-  BUILD_COMMAND     ""
-  INSTALL_COMMAND   ""
-  TEST_COMMAND      ""
-)
-
-if(${CMAKE_VERSION} VERSION_LESS "3.3.0")
-  set(dummyfile ${CMAKE_CURRENT_BINARY_DIR}/robin_map_dummy.c)
-  file(WRITE ${dummyfile} "const char *dummy = \"${dummyfile}\";")
-  add_library(robin_map STATIC ${dummyfile})
-else()
-  add_library(robin_map INTERFACE)
-endif()
-
-add_dependencies(robin_map extern_robin_map)
-
-LIST(APPEND externl_project_dependencies robin_map)
--- a/paddle/fluid/framework/CMakeLists.txt
+++ b/paddle/fluid/framework/CMakeLists.txt
@@ -83,7 +83,7 @@ cc_test(variable_test SRCS variable_test.cc)
 cc_library(threadpool SRCS threadpool.cc DEPS enforce)
 cc_test(threadpool_test SRCS threadpool_test.cc DEPS threadpool)

-cc_library(scope SRCS scope.cc DEPS glog threadpool xxhash)
+cc_library(scope SRCS scope.cc DEPS glog threadpool)
 cc_test(scope_test SRCS scope_test.cc DEPS scope)

 cc_library(data_device_transform SRCS data_device_transform.cc DEPS tensor)

--- a/paddle/fluid/framework/details/execution_strategy.h
+++ b/paddle/fluid/framework/details/execution_strategy.h
@@ -25,7 +25,7 @@ struct ExecutionStrategy {
  size_t num_threads_{0};
  bool use_cuda_{true};
  bool allow_op_delay_{false};
-  size_t num_iteration_per_drop_scope_{1};
+  size_t num_iteration_per_drop_scope_{100};
  ExecutorType type_{kDefault};
  bool dry_run_{false};
 };

--- a/paddle/fluid/framework/details/scope_buffered_ssa_graph_executor.cc
+++ b/paddle/fluid/framework/details/scope_buffered_ssa_graph_executor.cc
@@ -64,24 +64,21 @@ FeedFetchList ScopeBufferedSSAGraphExecutor::Run(
  }

  platform::RecordEvent e("ScopeBufferedSSAGraphExecutorAfterRun", nullptr);
-  ++drop_scope_counter_;
+  drop_scope_counter_ += 1;

-  if (!fetch_tensors.empty()) {
+  if (!fetch_tensors.empty() ||
+      drop_scope_counter_ == strategy_.num_iteration_per_drop_scope_) {
+    drop_scope_counter_ = 0;
    // Wait All computational streams
    for (auto p : places_) {
      platform::DeviceContextPool::Instance().Get(p)->Wait();
    }
-  }
-
-  if (drop_scope_counter_ == strategy_.num_iteration_per_drop_scope_) {
-    drop_scope_counter_ = 0;
    for (auto &scope : local_scopes_) {
      auto &local_scope =
          *scope->Var(details::kLocalExecScopeName)->GetMutable<Scope *>();
      scope->DeleteScope(local_scope);
    }
  }
-
  if (eptr) {
    std::rethrow_exception(eptr);
  } else {

--- a/paddle/fluid/framework/ir/graph.cc
+++ b/paddle/fluid/framework/ir/graph.cc
@@ -20,10 +20,6 @@ limitations under the License. */
 #include "paddle/fluid/framework/program_desc.h"
 #include "paddle/fluid/framework/var_desc.h"

-DEFINE_bool(enforce_when_check_program, true,
-            "Checking whether the program is correct or not. We will log "
-            "errors rather than throwing exceptions if this flag turned off");
-
 namespace paddle {
 namespace framework {
 namespace ir {
@@ -48,56 +44,27 @@ void CheckProgram(const ProgramDesc &program) {
        break;
      case _INT(OpRole::kBackward):
      case _INT(OpRole::kBackward) | _INT(OpRole::kLoss):
-        if (!FLAGS_enforce_when_check_program) {
-          PADDLE_ENFORCE(
-              visit.find(_INT(OpRole::kOptimize)) == visit.end(),
-              "Cannot add backward operator %s after optimize operator.",
-              op->Type());
-        } else {
-          if (visit.find(_INT(OpRole::kOptimize)) != visit.end()) {
-            LOG(ERROR)
-                << "Cannot add backward operator %s after optimize operator."
-                << op->Type();
-          }
-        }
+        PADDLE_ENFORCE(
+            visit.find(_INT(OpRole::kOptimize)) == visit.end(),
+            "Cannot add backward operator %s after optimize operator.",
+            op->Type());
        break;
      case _INT(OpRole::kForward) | _INT(OpRole::kLoss):
-        if (!FLAGS_enforce_when_check_program) {
-          PADDLE_ENFORCE(visit.find(_INT(OpRole::kBackward) |
-                                    _INT(OpRole::kLoss)) == visit.end(),
-                         "Cannot add backward|loss operator before "
-                         "forward|loss operator %s.",
-                         op->Type());
-          PADDLE_ENFORCE(
-              visit.find(_INT(OpRole::kOptimize)) == visit.end(),
-              "Cannot add forward|loss operator %s after optimize operator.",
-              op->Type());
-        } else {
-          if (visit.find(_INT(OpRole::kBackward) | _INT(OpRole::kLoss)) !=
-              visit.end()) {
-            LOG(ERROR) << "Cannot add backward|loss operator before "
-                       << "forward|loss operator %s." << op->Type();
-          }
-
-          if (visit.find(_INT(OpRole::kOptimize)) != visit.end()) {
-            LOG(ERROR) << "Cannot add forward|loss operator %s after optimize "
-                          "operator."
-                       << op->Type();
-          }
-        }
+        PADDLE_ENFORCE(visit.find(_INT(OpRole::kBackward) |
+                                  _INT(OpRole::kLoss)) == visit.end(),
+                       "Cannot add backward|loss operator before "
+                       "forward|loss operator %s.",
+                       op->Type());
+        PADDLE_ENFORCE(
+            visit.find(_INT(OpRole::kOptimize)) == visit.end(),
+            "Cannot add forward|loss operator %s after optimize operator.",
+            op->Type());
        break;
      case _INT(OpRole::kOptimize):
      case _INT(OpRole::kOptimize) | _INT(OpRole::kLRSched):
-        if (!FLAGS_enforce_when_check_program) {
-          PADDLE_ENFORCE(visit.find(_INT(OpRole::kBackward)) != visit.end(),
-                         "Optimize operators %s must follow backward operator.",
-                         op->Type());
-        } else {
-          if (visit.find(_INT(OpRole::kBackward)) == visit.end()) {
-            LOG(ERROR) << "Optimize operators %s must follow backward operator."
-                       << op->Type();
-          }
-        }
+        PADDLE_ENFORCE(visit.find(_INT(OpRole::kBackward)) != visit.end(),
+                       "Optimize operators %s must follow backward operator.",
+                       op->Type());
        break;
      case _INT(OpRole::kLRSched):
      case _INT(OpRole::kDist):

--- a/paddle/fluid/framework/rw_lock.h
+++ b/paddle/fluid/framework/rw_lock.h
@@ -16,9 +16,7 @@ limitations under the License. */

 #if !defined(_WIN32)
 #include <pthread.h>
-#else
-#include <mutex>  // NOLINT
-#endif            // !_WIN32
+#endif  // !_WIN32

 #include "paddle/fluid/platform/enforce.h"

@@ -31,17 +29,17 @@ struct RWLock {

  ~RWLock() { pthread_rwlock_destroy(&lock_); }

-  inline void RDLock() {
+  void RDLock() {
    PADDLE_ENFORCE_EQ(pthread_rwlock_rdlock(&lock_), 0,
                      "acquire read lock failed");
  }

-  inline void WRLock() {
+  void WRLock() {
    PADDLE_ENFORCE_EQ(pthread_rwlock_wrlock(&lock_), 0,
                      "acquire write lock failed");
  }

-  inline void UNLock() {
+  void UNLock() {
    PADDLE_ENFORCE_EQ(pthread_rwlock_unlock(&lock_), 0, "unlock failed");
  }

@@ -53,44 +51,81 @@ struct RWLock {
 // https://stackoverflow.com/questions/7125250/making-pthread-rwlock-wrlock-recursive
 // In windows, rw_lock seems like a hack. Use empty object and do nothing.
 struct RWLock {
-  // FIXME(minqiyang): use mutex here to do fake lock
-  inline void RDLock() { mutex_.lock(); }
-
-  inline void WRLock() { mutex_.lock(); }
-
-  inline void UNLock() { mutex_.unlock(); }
-
- private:
-  std::mutex mutex_;
+  void RDLock() {}
+  void WRLock() {}
+  void UNLock() {}
 };
 #endif

-class AutoWRLock {
+class RWLockGuard {
 public:
-  explicit AutoWRLock(RWLock* rw_lock) : lock_(rw_lock) { Lock(); }
-
-  inline void Lock() { lock_->WRLock(); }
-
-  inline void UnLock() { lock_->UNLock(); }
-
-  ~AutoWRLock() { UnLock(); }
-
- private:
-  RWLock* lock_;
-};
+  enum Status { kUnLock, kWRLock, kRDLock };
+
+  RWLockGuard(RWLock* rw_lock, Status init_status)
+      : lock_(rw_lock), status_(Status::kUnLock) {
+    switch (init_status) {
+      case Status::kRDLock: {
+        RDLock();
+        break;
+      }
+      case Status::kWRLock: {
+        WRLock();
+        break;
+      }
+      case Status::kUnLock: {
+        break;
+      }
+    }
+  }

-class AutoRDLock {
- public:
-  explicit AutoRDLock(RWLock* rw_lock) : lock_(rw_lock) { Lock(); }
+  void WRLock() {
+    switch (status_) {
+      case Status::kUnLock: {
+        lock_->WRLock();
+        status_ = Status::kWRLock;
+        break;
+      }
+      case Status::kWRLock: {
+        break;
+      }
+      case Status::kRDLock: {
+        PADDLE_THROW(
+            "Please unlock read lock first before invoking write lock.");
+        break;
+      }
+    }
+  }

-  inline void Lock() { lock_->RDLock(); }
+  void RDLock() {
+    switch (status_) {
+      case Status::kUnLock: {
+        lock_->RDLock();
+        status_ = Status::kRDLock;
+        break;
+      }
+      case Status::kRDLock: {
+        break;
+      }
+      case Status::kWRLock: {
+        PADDLE_THROW(
+            "Please unlock write lock first before invoking read lock.");
+        break;
+      }
+    }
+  }

-  inline void UnLock() { lock_->UNLock(); }
+  void UnLock() {
+    if (status_ != Status::kUnLock) {
+      lock_->UNLock();
+      status_ = Status::kUnLock;
+    }
+  }

-  ~AutoRDLock() { UnLock(); }
+  ~RWLockGuard() { UnLock(); }

 private:
  RWLock* lock_;
+  Status status_;
 };

 }  // namespace framework

--- a/paddle/fluid/framework/scope.cc
+++ b/paddle/fluid/framework/scope.cc
@@ -47,15 +47,9 @@ DEFINE_bool(fast_eager_deletion_mode, false,
 // the mutex will cause serious performance issue.
 // So the mutex is disabled when `ON_INFER`.
 #ifdef PADDLE_ON_INFERENCE
-#define SCOPE_KIDS_READER_LOCK
-#define SCOPE_KIDS_WRITER_LOCK
-#define SCOPE_VARS_READER_LOCK
-#define SCOPE_VARS_WRITER_LOCK
+#define SCOPE_LOCK_GUARD
 #else
-#define SCOPE_KIDS_READER_LOCK AutoRDLock auto_lock(&kids_lock_);
-#define SCOPE_KIDS_WRITER_LOCK AutoWRLock auto_lock(&kids_lock_);
-#define SCOPE_VARS_READER_LOCK AutoRDLock auto_lock(&vars_lock_);
-#define SCOPE_VARS_WRITER_LOCK AutoWRLock auto_lock(&vars_lock_);
+#define SCOPE_LOCK_GUARD std::lock_guard<std::mutex> lock(mutex_);
 #endif

 namespace paddle {
@@ -73,69 +67,64 @@ bool IsFastEagerDeletionModeEnabled() { return FLAGS_fast_eager_deletion_mode; }
 Scope::~Scope() { DropKids(); }

 Scope& Scope::NewScope() const {
-  Scope* child = new Scope(this);
-  {
-    SCOPE_KIDS_WRITER_LOCK
-    kids_.push_back(child);
-  }
-  return *child;
+  SCOPE_LOCK_GUARD
+  kids_.push_back(new Scope(this));
+  return *kids_.back();
 }

 Variable* Scope::Var(const std::string& name) {
-  SCOPE_VARS_WRITER_LOCK
+  SCOPE_LOCK_GUARD
  return VarInternal(name);
 }

 Variable* Scope::Var(std::string* name) {
+  SCOPE_LOCK_GUARD
  auto new_name = string::Sprintf("%p.%d", this, vars_.size());
  if (name != nullptr) {
    *name = new_name;
  }
-  SCOPE_VARS_WRITER_LOCK
  return VarInternal(new_name);
 }

 Variable* Scope::FindVar(const std::string& name) const {
-  SCOPE_VARS_READER_LOCK
+  SCOPE_LOCK_GUARD
  return FindVarInternal(name);
 }

 Variable* Scope::FindLocalVar(const std::string& name) const {
-  SCOPE_VARS_READER_LOCK
+  SCOPE_LOCK_GUARD
  return FindVarLocally(name);
 }

 const Scope* Scope::FindScope(const Variable* var) const {
-  SCOPE_VARS_READER_LOCK
+  SCOPE_LOCK_GUARD
  return FindScopeInternal(var);
 }

 void Scope::DropKids() {
-  SCOPE_KIDS_WRITER_LOCK
+  SCOPE_LOCK_GUARD
  for (Scope* s : kids_) delete s;
  kids_.clear();
 }

 bool Scope::HasKid(const Scope* scope) const {
-  SCOPE_KIDS_READER_LOCK
+  SCOPE_LOCK_GUARD
  auto it = std::find(this->kids_.begin(), this->kids_.end(), scope);
  return it != this->kids_.end();
 }

 std::vector<std::string> Scope::LocalVarNames() const {
+  SCOPE_LOCK_GUARD
  std::vector<std::string> known_vars;
-  {
-    SCOPE_VARS_READER_LOCK
-    known_vars.reserve(this->vars_.size());
-    for (auto& p : vars_) {
-      known_vars.emplace_back(p.first);
-    }
+  known_vars.reserve(this->vars_.size());
+  for (auto& p : vars_) {
+    known_vars.emplace_back(p.first);
  }
  return known_vars;
 }

 void Scope::DeleteScope(Scope* scope) const {
-  SCOPE_KIDS_WRITER_LOCK
+  SCOPE_LOCK_GUARD
  auto it = std::find(this->kids_.begin(), this->kids_.end(), scope);
  PADDLE_ENFORCE(it != this->kids_.end(), "%p Cannot find %p as kid scope",
                 this, scope);
@@ -149,8 +138,8 @@ void Scope::DeleteScope(Scope* scope) const {
 }

 void Scope::EraseVars(const std::vector<std::string>& var_names) {
+  SCOPE_LOCK_GUARD
  std::set<std::string> var_set(var_names.begin(), var_names.end());
-  SCOPE_VARS_WRITER_LOCK
  for (auto it = vars_.begin(); it != vars_.end();) {
    if (var_set.find(it->first) != var_set.end()) {
      it = vars_.erase(it);
@@ -162,12 +151,12 @@ void Scope::EraseVars(const std::vector<std::string>& var_names) {

 void Scope::Rename(const std::string& origin_name,
                   const std::string& new_name) const {
-  SCOPE_VARS_WRITER_LOCK
+  SCOPE_LOCK_GUARD
  RenameInternal(origin_name, new_name);
 }

 std::string Scope::Rename(const std::string& origin_name) const {
-  SCOPE_VARS_WRITER_LOCK
+  SCOPE_LOCK_GUARD
  auto new_name = string::Sprintf("%p.%d", this, vars_.size());
  RenameInternal(origin_name, new_name);
  return new_name;

--- a/paddle/fluid/framework/scope.h
+++ b/paddle/fluid/framework/scope.h
@@ -14,19 +14,12 @@ limitations under the License. */

 #pragma once

-extern "C" {
-#include <xxhash.h>
-}
-
-#include <functional>
 #include <list>
-#include <memory>
+#include <mutex>  // NOLINT
 #include <string>
 #include <unordered_map>
-#include <utility>
 #include <vector>

-#include "paddle/fluid/framework/rw_lock.h"
 #include "paddle/fluid/framework/variable.h"
 #include "paddle/fluid/platform/macros.h"

@@ -38,14 +31,6 @@ bool IsFastEagerDeletionModeEnabled();

 class Scope;

-namespace inner {
-struct KeyHasher {
-  std::size_t operator()(const std::string& key) const {
-    return XXH32(key.c_str(), key.size(), 1);
-  }
-};
-}  // namespace inner
-
 /**
 * @brief Scope that manage all variables.
 *
@@ -110,14 +95,7 @@ class Scope {
  std::string Rename(const std::string& origin_name) const;

 protected:
-  mutable std::unordered_map<std::string, std::unique_ptr<Variable>,
-                             inner::KeyHasher>
-      vars_;
-  // mutable tsl::robin_map<
-  // std::string, std::unique_ptr<Variable>, std::hash<std::string>,
-  // std::equal_to<std::string>,
-  // std::allocator<std::pair<std::string, std::unique_ptr<Variable>>>, true>
-  // vars_;
+  mutable std::unordered_map<std::string, std::unique_ptr<Variable>> vars_;

 private:
  // Call Scope::NewScope for a sub-scope.
@@ -146,8 +124,7 @@ class Scope {
  DISABLE_COPY_AND_ASSIGN(Scope);

 private:
-  mutable RWLock kids_lock_;
-  mutable RWLock vars_lock_;
+  mutable std::mutex mutex_;
 };

 // Generate some debug string about the inherience structure of scope, quite

--- a/paddle/fluid/framework/spin_lock.h
+++ b/paddle/fluid/framework/spin_lock.h
-/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License. */
-
-#pragma once
-
-#if !defined(_WIN32)
-#include <pthread.h>
-#else
-#include <mutex>  // NOLINT
-#endif            // !_WIN32
-
-#include "paddle/fluid/platform/enforce.h"
-
-namespace paddle {
-namespace framework {
-
-#if !defined(_WIN32)
-struct SpinLock {
-  SpinLock() { pthread_spin_init(&lock_, PTHREAD_PROCESS_PRIVATE); }
-
-  ~SpinLock() { pthread_spin_destroy(&lock_); }
-
-  void Lock() {
-    PADDLE_ENFORCE_EQ(pthread_spin_lock(&lock_), 0, "acquire spin lock failed");
-  }
-
-  void Unlock() {
-    PADDLE_ENFORCE_EQ(pthread_spin_unlock(&lock_), 0,
-                      "release spin lock failed");
-  }
-
- private:
-  pthread_spinlock_t lock_;
-};
-#else
-// FIXME(minqiyang): use mutex here to do fake spin lock
-struct SpinLock {
-  void Lock() { mutex_.lock(); }
-
-  void Unlock() { mutex_.lock(); }
-
- private:
-  std::mutex mutex_;
-};
-#endif
-
-class AutoSpinLock {
- public:
-  explicit SpinLockGuard(SpinLock* spin_lock) : lock_(spin_lock) {
-    lock_->Lock();
-  }
-
-  ~SpinLockGuard() { lock_->Unlock(); }
-
- private:
-  SpinLock* lock_;
-};
-
-}  // namespace framework
-}  // namespace paddle
--- a/paddle/fluid/operators/math/concat_and_split.cu
+++ b/paddle/fluid/operators/math/concat_and_split.cu
@@ -180,7 +180,7 @@ class ConcatFunctor<platform::CUDADeviceContext, T> {
    }
    // Wait() must be called because `inputs_data` may be destructed before
    // kernel ends
-    /* context.Wait(); */
+    context.Wait();
  }
 };

@@ -258,7 +258,7 @@ class SplitFunctor<platform::CUDADeviceContext, T> {
    }
    // Wait() must be called because `outputs_data` may be destructed before
    // kernel ends
-    /* context.Wait(); */
+    context.Wait();
  }
 };


--- a/paddle/fluid/pybind/pybind.cc
+++ b/paddle/fluid/pybind/pybind.cc
@@ -822,7 +822,7 @@ All parameter, weight, gradient are variables in Paddle.
          R"DOC(The type is INT, num_iteration_per_drop_scope indicates how
                many iterations to clean up the temp variables which
                is generated during execution. It may make the execution faster,
-                because the temp variable's shape maybe the same between two iterations. Default 1.
+                because the temp variable's shape maybe the same between two iterations. Default 100.

                NOTES:
                    1. If you fetch data when calling the 'run', the ParallelExecutor

--- a/python/paddle/fluid/profiler.py
+++ b/python/paddle/fluid/profiler.py
@@ -92,7 +92,6 @@ def cuda_profiler(output_file, output_mode=None, config=None):
    config_file = 'nvprof_config_file'
    with open(config_file, 'wb') as fp:
        fp.writelines([six.b("%s\n" % item) for item in config])
-    #Comment this for nvprof
    core.nvprof_init(output_file, output_mode, config_file)
    # Enables profiler collection by the active CUDA profiling tool.
    core.nvprof_start()