diff --git a/paddle/operators/math/CMakeLists.txt b/paddle/operators/math/CMakeLists.txt index ed51d416ed9497eee45ba826ad672b8fb1ad3678..228f463f2bd630874741c5a8d3680008c56ebb41 100644 --- a/paddle/operators/math/CMakeLists.txt +++ b/paddle/operators/math/CMakeLists.txt @@ -1,8 +1,8 @@ if(WITH_GPU) - nv_library(math_function SRCS math_function.cc math_function.cu DEPS cblas device_context) + nv_library(math_function SRCS math_function.cc math_function.cu DEPS cblas device_context eigen3) else() - cc_library(math_function SRCS math_function.cc DEPS cblas device_context) + cc_library(math_function SRCS math_function.cc DEPS cblas device_context eigen3) endif() nv_test(math_function_test SRCS math_function_test.cc DEPS math_function tensor) diff --git a/paddle/operators/math/math_function.cc b/paddle/operators/math/math_function.cc index d0b1f8ee48f209724162c500435a021b9dedc32a..a098e02f95d6a641fef0831eacf3be054b93f62b 100644 --- a/paddle/operators/math/math_function.cc +++ b/paddle/operators/math/math_function.cc @@ -110,12 +110,12 @@ void matmul(const framework::Tensor& matrix_a, } template <> -void Set(const int n, const float alpha, - float* output, - platform::DeviceContext* context) { +void Set(const int n, const float alpha, + float* output, + platform::DeviceContext* context) { auto* cpu_context = reinterpret_cast(context); - framework::EigenVector::Type out(output, n); - out.device(*(cpu_context->eigen_device())) = t.constant(T(alpha)); + framework::EigenVector::Type out(output, n); + out.device(*(cpu_context->eigen_device())) = out.constant(float(alpha)); } template <> diff --git a/paddle/operators/math/math_function.cu b/paddle/operators/math/math_function.cu index 76bbf790db38f55c714286e7807790bb582c0953..3ff622f30824db65110a0215a23efc816e076989 100644 --- a/paddle/operators/math/math_function.cu +++ b/paddle/operators/math/math_function.cu @@ -127,12 +127,12 @@ void matmul(const framework::Tensor& matrix_a, } template <> -void Set(const int n, const float alpha, - float* output, - platform::DeviceContext* context) { +void Set(const int n, const float alpha, + float* output, + platform::DeviceContext* context) { auto* cuda_context = reinterpret_cast(context); - framework::EigenVector::Type out(output, n); - out.device(*(cuda_context->eigen_device())) = t.constant(T(alpha)); + framework::EigenVector::Type out(output, n); + out.device(*(cuda_context->eigen_device())) = out.constant(float(alpha)); } template @@ -159,12 +159,13 @@ void RandUniform(const int n, const float min, template int HandleOddLengthRandGaussian(const int n, const T mean, const T std, - T* output, CUDADeviceContext* context) { + T* output, + platform::CUDADeviceContext* context) { if (n % 2 == 1) { std::default_random_engine generator; std::normal_distribution distribution(mean, std); const T random_value = distribution(generator); - Set(1, random_value, output + (n - 1), context); + Set(1, random_value, output + (n - 1), context); return n - 1; } return n; diff --git a/paddle/operators/math/math_function.h b/paddle/operators/math/math_function.h index afe6de7483db8f03a1c6ad78887d82885940c5d6..6543a1b515e8c6d79d59e0811aa92caaffa829bb 100644 --- a/paddle/operators/math/math_function.h +++ b/paddle/operators/math/math_function.h @@ -52,9 +52,9 @@ int LAPACKE_dgetri(int matrix_layout, int n, double* a, int lda, #include +#include "paddle/framework/eigen.h" #include "paddle/framework/tensor.h" #include "paddle/platform/device_context.h" -#include "paddle/platform/eigen.h" #include "paddle/platform/enforce.h" namespace paddle { @@ -80,10 +80,7 @@ void matmul(const framework::Tensor& matrix_a, bool trans_a, template void Set(const int n, const T alpha, T* output, - platform::DeviceContext* context) { - framework::EigenVector::Type out(output, n); - out.device(*(context->eigen_device())) = t.constant(T(alpha)); -} + platform::DeviceContext* context); template void RandUniform(const int n, const T min, const T max, T* output, diff --git a/paddle/operators/uniform_random_op.cu b/paddle/operators/uniform_random_op.cu index 91368fa73e9769b03d0dbf82973a7261ca23e30f..1bfffc47783d54beaf839808829f962a131637d5 100644 --- a/paddle/operators/uniform_random_op.cu +++ b/paddle/operators/uniform_random_op.cu @@ -14,9 +14,6 @@ #include "paddle/operators/uniform_random_op.h" -namespace paddle { -namespace operators { - -REGISTER_OP_GPU_KERNEL(uniform_random, - paddle::operators::GPUUniformRandomKernel< - paddle::platform::GPUPlace, float>); +REGISTER_OP_GPU_KERNEL( + uniform_random, + paddle::operators::UniformRandomKernel); diff --git a/paddle/platform/device_context.cc b/paddle/platform/device_context.cc index 5fd93555a514a2648ffcf13776e43ffb5ca530df..ad9b4e42f334523255bc3f110eefe9b5575679cb 100644 --- a/paddle/platform/device_context.cc +++ b/paddle/platform/device_context.cc @@ -25,9 +25,9 @@ CPUDeviceContext::CPUDeviceContext() { eigen_device_.reset(new Eigen::DefaultDevice()); } -CPUDeviceContext::CPUDeviceContext(CPUPlace place, int rand_seed) { +CPUDeviceContext::CPUDeviceContext(CPUPlace place, int seed) { eigen_device_.reset(new Eigen::DefaultDevice()); - rand_seed_ = rand_seed; + rand_seed_ = seed; } std::minstd_rand& CPUDeviceContext::rand_engine() { @@ -105,7 +105,7 @@ Eigen::GpuDevice* DeviceContext::get_eigen_device() const { } CUDADeviceContext::CUDADeviceContext(GPUPlace place, uint64_t seed) - : place_(place), seed_(seed) { + : place_(place), rand_seed_(seed) { SetDeviceId(place_.device); PADDLE_ENFORCE(cudaStreamCreate(&stream_)); eigen_stream_.reset(new EigenCudaStreamDevice()); @@ -162,8 +162,8 @@ curandGenerator_t CUDADeviceContext::curand_generator() { SetDeviceId(place_.device); PADDLE_ENFORCE(dynload::curandCreateGenerator(&curand_generator_, CURAND_RNG_PSEUDO_DEFAULT)); - PADDLE_ENFORCE( - dynload::curandSetPseudoRandomGeneratorSeed(curand_generator_, seed_)); + PADDLE_ENFORCE(dynload::curandSetPseudoRandomGeneratorSeed( + curand_generator_, rand_seed_)); PADDLE_ENFORCE(dynload::curandSetStream(curand_generator_, stream_)); } diff --git a/paddle/platform/device_context.h b/paddle/platform/device_context.h index 7013343a8de59752db87c924c23b05fd15747c26..e18f48fef59fcce065d8a1c8bfa72ab5c7a75439 100644 --- a/paddle/platform/device_context.h +++ b/paddle/platform/device_context.h @@ -40,7 +40,7 @@ class DeviceContext { class CPUDeviceContext : public DeviceContext { public: CPUDeviceContext(); - explicit CPUDeviceContext(CPUPlace place, int rand_seed = 0); + explicit CPUDeviceContext(CPUPlace place, int seed = 0); virtual ~CPUDeviceContext() {} Eigen::DefaultDevice* eigen_device() const; @@ -60,7 +60,7 @@ class EigenCudaStreamDevice; class CUDADeviceContext : public DeviceContext { public: - explicit CUDADeviceContext(GPUPlace place, uint64_t rand_seed = 0); + explicit CUDADeviceContext(GPUPlace place, uint64_t seed = 0); virtual ~CUDADeviceContext(); /*! \brief Wait for all operations completion in the stream. */ @@ -93,12 +93,12 @@ class CUDADeviceContext : public DeviceContext { std::unique_ptr eigen_stream_; uint64_t rand_seed_; - std::unique_ptr rand_engine_; // clang-format off cudaStream_t stream_{nullptr}; cudnnHandle_t cudnn_handle_{nullptr}; cublasHandle_t cublas_handle_{nullptr}; + curandGenerator_t curand_generator_{nullptr}; // clang-format on };