"remove cudnn devicecontext" (#7207)

* "remove cudnndevicecontext" * "remove unused init code" * "fix hash functions"

"remove cudnn devicecontext" (#7207)
* "remove cudnndevicecontext" * "remove unused init code" * "fix hash functions"
a4024a5f · dzhwinter · GitHub · cd5fad13 · a4024a5f · a4024a5f
7 changed file
--- a/doc/design/support_new_device.md
+++ b/doc/design/support_new_device.md
@@ -48,8 +48,8 @@ Fluid uses class [DeviceContext](https://github.com/PaddlePaddle/Paddle/blob/dev


 ```
-                /->  CPUDeviceContext   --> MKLDeviceContext
-DeviceContext ---->  CUDADeviceContext  --> CUDNNDeviceContext
+                /->  CPUDeviceContext   
+DeviceContext ---->  CUDADeviceContext  
                \->  FPGADeviceContext
 ```

@@ -79,16 +79,6 @@ private:
 };
 ```

- CUDNNDeviceContext
-
-```
-class CUDNNDeviceContext : public CUDADeviceContext {
-  private:
-    cudnnHandle_t cudnn_handle_;
-};
-```
-
-
 ### Memory and Tensor



--- a/paddle/framework/library_type.h
+++ b/paddle/framework/library_type.h
@@ -13,6 +13,7 @@ See the License for the specific language governing permissions and
 limitations under the License. */

 #pragma once
+#include <cctype>

 namespace paddle {
 namespace framework {
@@ -41,6 +42,9 @@ inline std::string LibraryTypeToString(const LibraryType& library_type) {

 inline LibraryType StringToLibraryType(const char* ctype) {
  std::string s(ctype);
+  for (size_t i = 0; i < s.size(); ++i) {
+    s[i] = toupper(s[i]);
+  }
  if (s == std::string("PLAIN")) {
    return LibraryType::kPlain;
  } else if (s == std::string("MKLDNN")) {

--- a/paddle/framework/op_kernel_type.h
+++ b/paddle/framework/op_kernel_type.h
@@ -26,13 +26,12 @@ namespace framework {
 struct OpKernelType {
  struct Hash {
    size_t operator()(const OpKernelType& key) const {
-      int place = key.place_.which() + (1 << LEFT_SHIFT);
-      int data_type =
-          static_cast<int>(key.data_type_) + (1 << (LEFT_SHIFT + 1));
-      int data_layout =
-          static_cast<int>(key.data_layout_) + (1 << (LEFT_SHIFT + 2));
-      int library_type =
-          static_cast<int>(key.library_type_) + (1 << (LEFT_SHIFT + 3));
+      int place = key.place_.which();
+      int data_type = static_cast<int>(key.data_type_) << LEFT_SHIFT;
+      int data_layout = static_cast<int>(key.data_layout_) << (LEFT_SHIFT * 2);
+      int library_type = static_cast<int>(key.library_type_)
+                         << (LEFT_SHIFT * 3);
+
      std::hash<int> hasher;
      return hasher(place + data_type + data_layout + library_type);
    }

--- a/paddle/platform/device_context.cc
+++ b/paddle/platform/device_context.cc
@@ -127,15 +127,21 @@ CUDADeviceContext::CUDADeviceContext(CUDAPlace place) : place_(place) {
  eigen_device_.reset(new Eigen::GpuDevice(eigen_stream_.get()));
  PADDLE_ENFORCE(dynload::cublasCreate(&cublas_handle_));
  PADDLE_ENFORCE(dynload::cublasSetStream(cublas_handle_, stream_));
+  if (dynload::HasCUDNN()) {
    PADDLE_ENFORCE(dynload::cudnnCreate(&cudnn_handle_));
    PADDLE_ENFORCE(dynload::cudnnSetStream(cudnn_handle_, stream_));
+  } else {
+    cudnn_handle_ = nullptr;
+  }
 }

 CUDADeviceContext::~CUDADeviceContext() {
  SetDeviceId(place_.device);
  Wait();
  PADDLE_ENFORCE(dynload::cublasDestroy(cublas_handle_));
+  if (cudnn_handle_ != nullptr) {
    PADDLE_ENFORCE(dynload::cudnnDestroy(cudnn_handle_));
+  }
  eigen_stream_.reset();
  eigen_device_.reset();
  PADDLE_ENFORCE(cudaStreamDestroy(stream_));
@@ -160,20 +166,6 @@ cudnnHandle_t CUDADeviceContext::cudnn_handle() const { return cudnn_handle_; }

 cudaStream_t CUDADeviceContext::stream() const { return stream_; }

-CUDNNDeviceContext::CUDNNDeviceContext(CUDAPlace place)
-    : CUDADeviceContext(place) {
-  PADDLE_ENFORCE(dynload::cudnnCreate(&cudnn_handle_));
-  PADDLE_ENFORCE(dynload::cudnnSetStream(cudnn_handle_, stream()));
-}
-
-CUDNNDeviceContext::~CUDNNDeviceContext() {
-  SetDeviceId(boost::get<CUDAPlace>(GetPlace()).device);
-  Wait();
-  PADDLE_ENFORCE(dynload::cudnnDestroy(cudnn_handle_));
-}
-
-cudnnHandle_t CUDNNDeviceContext::cudnn_handle() const { return cudnn_handle_; }
-
 #endif

 }  // namespace platform

--- a/paddle/platform/device_context.h
+++ b/paddle/platform/device_context.h
@@ -103,18 +103,6 @@ struct DefaultDeviceContextType<platform::CUDAPlace> {
  using TYPE = CUDADeviceContext;
 };

-class CUDNNDeviceContext : public CUDADeviceContext {
- public:
-  explicit CUDNNDeviceContext(CUDAPlace place);
-  virtual ~CUDNNDeviceContext();
-
-  /*! \brief  Return cudnn  handle in the device context. */
-  cudnnHandle_t cudnn_handle() const;
-
- private:
-  cudnnHandle_t cudnn_handle_;
-};
-
 #endif

 /*! \brief device context pool singleton */
@@ -151,7 +139,7 @@ class DeviceContextPool {
  struct Hash {
    std::hash<int> hash_;
    size_t operator()(const platform::Place& place) const {
-      int pre_hash = place.which() + (1 << LEFT_SHIFT);
+      int pre_hash = place.which() << LEFT_SHIFT;
      if (platform::is_gpu_place(place)) {
        pre_hash += boost::get<platform::CUDAPlace>(place).GetDeviceId();
      }

--- a/paddle/platform/device_context_test.cu
+++ b/paddle/platform/device_context_test.cu
@@ -49,21 +49,6 @@ TEST(Device, CUDADeviceContext) {
  }
 }

-TEST(Device, CUDNNDeviceContext) {
-  using paddle::platform::CUDNNDeviceContext;
-  using paddle::platform::CUDAPlace;
-  if (paddle::platform::dynload::HasCUDNN()) {
-    int count = paddle::platform::GetCUDADeviceCount();
-    for (int i = 0; i < count; ++i) {
-      CUDNNDeviceContext* device_context = new CUDNNDeviceContext(CUDAPlace(i));
-      cudnnHandle_t cudnn_handle = device_context->cudnn_handle();
-      ASSERT_NE(nullptr, cudnn_handle);
-      ASSERT_NE(nullptr, device_context->stream());
-      delete device_context;
-    }
-  }
-}
-
 TEST(Device, DeviceContextPool) {
  using paddle::platform::DeviceContextPool;
  using paddle::platform::CUDADeviceContext;

--- a/python/paddle/v2/fluid/executor.py
+++ b/python/paddle/v2/fluid/executor.py
@@ -65,13 +65,6 @@ class Executor(object):
            p.set_place(each)
            act_places.append(p)

-        # TODO(dzhwinter) : consider that our fluid tests all written in 
-        # CUDAPlace(gpu_id), this will be changed in the future
-        if core.is_compile_gpu():
-            core.init_devices(["CPU", "GPU:0"])
-        else:
-            core.init_devices(["CPU"])
-
        # TODO(dzhwinter) : only use the first place
        self.executor = core.Executor(act_places[0])
        self.places = places