fix gpu build error

2f47f35b · qijun · 7c274dc0 · 2f47f35b · 2f47f35b · 2f47f35b
7 changed file
--- a/paddle/operators/math/CMakeLists.txt
+++ b/paddle/operators/math/CMakeLists.txt

 if(WITH_GPU)
-    nv_library(math_function SRCS math_function.cc math_function.cu DEPS cblas device_context)
+    nv_library(math_function SRCS math_function.cc math_function.cu DEPS cblas device_context eigen3)
 else()
-    cc_library(math_function SRCS math_function.cc DEPS cblas device_context)
+    cc_library(math_function SRCS math_function.cc DEPS cblas device_context eigen3)
 endif()

 nv_test(math_function_test SRCS math_function_test.cc DEPS math_function tensor)
--- a/paddle/operators/math/math_function.cc
+++ b/paddle/operators/math/math_function.cc
@@ -110,12 +110,12 @@ void matmul<platform::CPUPlace, double>(const framework::Tensor& matrix_a,
 }

 template <>
-void Set<typename CPUPlace, typename float>(const int n, const float alpha,
-                                            float* output,
-                                            platform::DeviceContext* context) {
+void Set<platform::CPUPlace, float>(const int n, const float alpha,
+                                    float* output,
+                                    platform::DeviceContext* context) {
  auto* cpu_context = reinterpret_cast<platform::CPUDeviceContext*>(context);
-  framework::EigenVector::Type<T> out(output, n);
-  out.device(*(cpu_context->eigen_device())) = t.constant(T(alpha));
+  framework::EigenVector<float>::Type out(output, n);
+  out.device(*(cpu_context->eigen_device())) = out.constant(float(alpha));
 }

 template <>

--- a/paddle/operators/math/math_function.cu
+++ b/paddle/operators/math/math_function.cu
@@ -127,12 +127,12 @@ void matmul<platform::GPUPlace, double>(const framework::Tensor& matrix_a,
 }

 template <>
-void Set<typename GPUPlace, typename float>(const int n, const float alpha,
-                                            float* output,
-                                            platform::DeviceContext* context) {
+void Set<platform::GPUPlace, float>(const int n, const float alpha,
+                                    float* output,
+                                    platform::DeviceContext* context) {
  auto* cuda_context = reinterpret_cast<platform::CUDADeviceContext*>(context);
-  framework::EigenVector::Type<T> out(output, n);
-  out.device(*(cuda_context->eigen_device())) = t.constant(T(alpha));
+  framework::EigenVector<float>::Type out(output, n);
+  out.device(*(cuda_context->eigen_device())) = out.constant(float(alpha));
 }

 template <typename T>
@@ -159,12 +159,13 @@ void RandUniform<platform::GPUPlace, float>(const int n, const float min,

 template <typename T>
 int HandleOddLengthRandGaussian(const int n, const T mean, const T std,
-                                T* output, CUDADeviceContext* context) {
+                                T* output,
+                                platform::CUDADeviceContext* context) {
  if (n % 2 == 1) {
    std::default_random_engine generator;
    std::normal_distribution<T> distribution(mean, std);
    const T random_value = distribution(generator);
-    Set<T, platform::GPUPlace>(1, random_value, output + (n - 1), context);
+    Set<platform::GPUPlace, T>(1, random_value, output + (n - 1), context);
    return n - 1;
  }
  return n;

--- a/paddle/operators/math/math_function.h
+++ b/paddle/operators/math/math_function.h
@@ -52,9 +52,9 @@ int LAPACKE_dgetri(int matrix_layout, int n, double* a, int lda,

 #include <cmath>

+#include "paddle/framework/eigen.h"
 #include "paddle/framework/tensor.h"
 #include "paddle/platform/device_context.h"
-#include "paddle/platform/eigen.h"
 #include "paddle/platform/enforce.h"

 namespace paddle {
@@ -80,10 +80,7 @@ void matmul(const framework::Tensor& matrix_a, bool trans_a,

 template <typename Place, typename T>
 void Set(const int n, const T alpha, T* output,
-         platform::DeviceContext* context) {
-  framework::EigenVector::Type<T> out(output, n);
-  out.device(*(context->eigen_device())) = t.constant(T(alpha));
-}
+         platform::DeviceContext* context);

 template <typename Place, typename T>
 void RandUniform(const int n, const T min, const T max, T* output,

--- a/paddle/operators/uniform_random_op.cu
+++ b/paddle/operators/uniform_random_op.cu
@@ -14,9 +14,6 @@

 #include "paddle/operators/uniform_random_op.h"

-namespace paddle {
-namespace operators {
-
-REGISTER_OP_GPU_KERNEL(uniform_random,
-                       paddle::operators::GPUUniformRandomKernel<
-                           paddle::platform::GPUPlace, float>);
+REGISTER_OP_GPU_KERNEL(
+    uniform_random,
+    paddle::operators::UniformRandomKernel<paddle::platform::GPUPlace, float>);
--- a/paddle/platform/device_context.cc
+++ b/paddle/platform/device_context.cc
@@ -25,9 +25,9 @@ CPUDeviceContext::CPUDeviceContext() {
  eigen_device_.reset(new Eigen::DefaultDevice());
 }

-CPUDeviceContext::CPUDeviceContext(CPUPlace place, int rand_seed) {
+CPUDeviceContext::CPUDeviceContext(CPUPlace place, int seed) {
  eigen_device_.reset(new Eigen::DefaultDevice());
-  rand_seed_ = rand_seed;
+  rand_seed_ = seed;
 }

 std::minstd_rand& CPUDeviceContext::rand_engine() {
@@ -105,7 +105,7 @@ Eigen::GpuDevice* DeviceContext::get_eigen_device<Eigen::GpuDevice>() const {
 }

 CUDADeviceContext::CUDADeviceContext(GPUPlace place, uint64_t seed)
-    : place_(place), seed_(seed) {
+    : place_(place), rand_seed_(seed) {
  SetDeviceId(place_.device);
  PADDLE_ENFORCE(cudaStreamCreate(&stream_));
  eigen_stream_.reset(new EigenCudaStreamDevice());
@@ -162,8 +162,8 @@ curandGenerator_t CUDADeviceContext::curand_generator() {
    SetDeviceId(place_.device);
    PADDLE_ENFORCE(dynload::curandCreateGenerator(&curand_generator_,
                                                  CURAND_RNG_PSEUDO_DEFAULT));
-    PADDLE_ENFORCE(
-        dynload::curandSetPseudoRandomGeneratorSeed(curand_generator_, seed_));
+    PADDLE_ENFORCE(dynload::curandSetPseudoRandomGeneratorSeed(
+        curand_generator_, rand_seed_));

    PADDLE_ENFORCE(dynload::curandSetStream(curand_generator_, stream_));
  }

--- a/paddle/platform/device_context.h
+++ b/paddle/platform/device_context.h
@@ -40,7 +40,7 @@ class DeviceContext {
 class CPUDeviceContext : public DeviceContext {
 public:
  CPUDeviceContext();
-  explicit CPUDeviceContext(CPUPlace place, int rand_seed = 0);
+  explicit CPUDeviceContext(CPUPlace place, int seed = 0);
  virtual ~CPUDeviceContext() {}

  Eigen::DefaultDevice* eigen_device() const;
@@ -60,7 +60,7 @@ class EigenCudaStreamDevice;

 class CUDADeviceContext : public DeviceContext {
 public:
-  explicit CUDADeviceContext(GPUPlace place, uint64_t rand_seed = 0);
+  explicit CUDADeviceContext(GPUPlace place, uint64_t seed = 0);
  virtual ~CUDADeviceContext();

  /*! \brief  Wait for all operations completion in the stream. */
@@ -93,12 +93,12 @@ class CUDADeviceContext : public DeviceContext {
  std::unique_ptr<EigenCudaStreamDevice> eigen_stream_;

  uint64_t rand_seed_;
-  std::unique_ptr<thrust::minstd_rand> rand_engine_;

  // clang-format off
  cudaStream_t       stream_{nullptr};
  cudnnHandle_t      cudnn_handle_{nullptr};
  cublasHandle_t     cublas_handle_{nullptr};
+  curandGenerator_t  curand_generator_{nullptr};
  // clang-format on
 };