提交 2f47f35b 编写于 作者: Q qijun

fix gpu build error

上级 7c274dc0
if(WITH_GPU) if(WITH_GPU)
nv_library(math_function SRCS math_function.cc math_function.cu DEPS cblas device_context) nv_library(math_function SRCS math_function.cc math_function.cu DEPS cblas device_context eigen3)
else() else()
cc_library(math_function SRCS math_function.cc DEPS cblas device_context) cc_library(math_function SRCS math_function.cc DEPS cblas device_context eigen3)
endif() endif()
nv_test(math_function_test SRCS math_function_test.cc DEPS math_function tensor) nv_test(math_function_test SRCS math_function_test.cc DEPS math_function tensor)
...@@ -110,12 +110,12 @@ void matmul<platform::CPUPlace, double>(const framework::Tensor& matrix_a, ...@@ -110,12 +110,12 @@ void matmul<platform::CPUPlace, double>(const framework::Tensor& matrix_a,
} }
template <> template <>
void Set<typename CPUPlace, typename float>(const int n, const float alpha, void Set<platform::CPUPlace, float>(const int n, const float alpha,
float* output, float* output,
platform::DeviceContext* context) { platform::DeviceContext* context) {
auto* cpu_context = reinterpret_cast<platform::CPUDeviceContext*>(context); auto* cpu_context = reinterpret_cast<platform::CPUDeviceContext*>(context);
framework::EigenVector::Type<T> out(output, n); framework::EigenVector<float>::Type out(output, n);
out.device(*(cpu_context->eigen_device())) = t.constant(T(alpha)); out.device(*(cpu_context->eigen_device())) = out.constant(float(alpha));
} }
template <> template <>
......
...@@ -127,12 +127,12 @@ void matmul<platform::GPUPlace, double>(const framework::Tensor& matrix_a, ...@@ -127,12 +127,12 @@ void matmul<platform::GPUPlace, double>(const framework::Tensor& matrix_a,
} }
template <> template <>
void Set<typename GPUPlace, typename float>(const int n, const float alpha, void Set<platform::GPUPlace, float>(const int n, const float alpha,
float* output, float* output,
platform::DeviceContext* context) { platform::DeviceContext* context) {
auto* cuda_context = reinterpret_cast<platform::CUDADeviceContext*>(context); auto* cuda_context = reinterpret_cast<platform::CUDADeviceContext*>(context);
framework::EigenVector::Type<T> out(output, n); framework::EigenVector<float>::Type out(output, n);
out.device(*(cuda_context->eigen_device())) = t.constant(T(alpha)); out.device(*(cuda_context->eigen_device())) = out.constant(float(alpha));
} }
template <typename T> template <typename T>
...@@ -159,12 +159,13 @@ void RandUniform<platform::GPUPlace, float>(const int n, const float min, ...@@ -159,12 +159,13 @@ void RandUniform<platform::GPUPlace, float>(const int n, const float min,
template <typename T> template <typename T>
int HandleOddLengthRandGaussian(const int n, const T mean, const T std, int HandleOddLengthRandGaussian(const int n, const T mean, const T std,
T* output, CUDADeviceContext* context) { T* output,
platform::CUDADeviceContext* context) {
if (n % 2 == 1) { if (n % 2 == 1) {
std::default_random_engine generator; std::default_random_engine generator;
std::normal_distribution<T> distribution(mean, std); std::normal_distribution<T> distribution(mean, std);
const T random_value = distribution(generator); const T random_value = distribution(generator);
Set<T, platform::GPUPlace>(1, random_value, output + (n - 1), context); Set<platform::GPUPlace, T>(1, random_value, output + (n - 1), context);
return n - 1; return n - 1;
} }
return n; return n;
......
...@@ -52,9 +52,9 @@ int LAPACKE_dgetri(int matrix_layout, int n, double* a, int lda, ...@@ -52,9 +52,9 @@ int LAPACKE_dgetri(int matrix_layout, int n, double* a, int lda,
#include <cmath> #include <cmath>
#include "paddle/framework/eigen.h"
#include "paddle/framework/tensor.h" #include "paddle/framework/tensor.h"
#include "paddle/platform/device_context.h" #include "paddle/platform/device_context.h"
#include "paddle/platform/eigen.h"
#include "paddle/platform/enforce.h" #include "paddle/platform/enforce.h"
namespace paddle { namespace paddle {
...@@ -80,10 +80,7 @@ void matmul(const framework::Tensor& matrix_a, bool trans_a, ...@@ -80,10 +80,7 @@ void matmul(const framework::Tensor& matrix_a, bool trans_a,
template <typename Place, typename T> template <typename Place, typename T>
void Set(const int n, const T alpha, T* output, void Set(const int n, const T alpha, T* output,
platform::DeviceContext* context) { platform::DeviceContext* context);
framework::EigenVector::Type<T> out(output, n);
out.device(*(context->eigen_device())) = t.constant(T(alpha));
}
template <typename Place, typename T> template <typename Place, typename T>
void RandUniform(const int n, const T min, const T max, T* output, void RandUniform(const int n, const T min, const T max, T* output,
......
...@@ -14,9 +14,6 @@ ...@@ -14,9 +14,6 @@
#include "paddle/operators/uniform_random_op.h" #include "paddle/operators/uniform_random_op.h"
namespace paddle { REGISTER_OP_GPU_KERNEL(
namespace operators { uniform_random,
paddle::operators::UniformRandomKernel<paddle::platform::GPUPlace, float>);
REGISTER_OP_GPU_KERNEL(uniform_random,
paddle::operators::GPUUniformRandomKernel<
paddle::platform::GPUPlace, float>);
...@@ -25,9 +25,9 @@ CPUDeviceContext::CPUDeviceContext() { ...@@ -25,9 +25,9 @@ CPUDeviceContext::CPUDeviceContext() {
eigen_device_.reset(new Eigen::DefaultDevice()); eigen_device_.reset(new Eigen::DefaultDevice());
} }
CPUDeviceContext::CPUDeviceContext(CPUPlace place, int rand_seed) { CPUDeviceContext::CPUDeviceContext(CPUPlace place, int seed) {
eigen_device_.reset(new Eigen::DefaultDevice()); eigen_device_.reset(new Eigen::DefaultDevice());
rand_seed_ = rand_seed; rand_seed_ = seed;
} }
std::minstd_rand& CPUDeviceContext::rand_engine() { std::minstd_rand& CPUDeviceContext::rand_engine() {
...@@ -105,7 +105,7 @@ Eigen::GpuDevice* DeviceContext::get_eigen_device<Eigen::GpuDevice>() const { ...@@ -105,7 +105,7 @@ Eigen::GpuDevice* DeviceContext::get_eigen_device<Eigen::GpuDevice>() const {
} }
CUDADeviceContext::CUDADeviceContext(GPUPlace place, uint64_t seed) CUDADeviceContext::CUDADeviceContext(GPUPlace place, uint64_t seed)
: place_(place), seed_(seed) { : place_(place), rand_seed_(seed) {
SetDeviceId(place_.device); SetDeviceId(place_.device);
PADDLE_ENFORCE(cudaStreamCreate(&stream_)); PADDLE_ENFORCE(cudaStreamCreate(&stream_));
eigen_stream_.reset(new EigenCudaStreamDevice()); eigen_stream_.reset(new EigenCudaStreamDevice());
...@@ -162,8 +162,8 @@ curandGenerator_t CUDADeviceContext::curand_generator() { ...@@ -162,8 +162,8 @@ curandGenerator_t CUDADeviceContext::curand_generator() {
SetDeviceId(place_.device); SetDeviceId(place_.device);
PADDLE_ENFORCE(dynload::curandCreateGenerator(&curand_generator_, PADDLE_ENFORCE(dynload::curandCreateGenerator(&curand_generator_,
CURAND_RNG_PSEUDO_DEFAULT)); CURAND_RNG_PSEUDO_DEFAULT));
PADDLE_ENFORCE( PADDLE_ENFORCE(dynload::curandSetPseudoRandomGeneratorSeed(
dynload::curandSetPseudoRandomGeneratorSeed(curand_generator_, seed_)); curand_generator_, rand_seed_));
PADDLE_ENFORCE(dynload::curandSetStream(curand_generator_, stream_)); PADDLE_ENFORCE(dynload::curandSetStream(curand_generator_, stream_));
} }
......
...@@ -40,7 +40,7 @@ class DeviceContext { ...@@ -40,7 +40,7 @@ class DeviceContext {
class CPUDeviceContext : public DeviceContext { class CPUDeviceContext : public DeviceContext {
public: public:
CPUDeviceContext(); CPUDeviceContext();
explicit CPUDeviceContext(CPUPlace place, int rand_seed = 0); explicit CPUDeviceContext(CPUPlace place, int seed = 0);
virtual ~CPUDeviceContext() {} virtual ~CPUDeviceContext() {}
Eigen::DefaultDevice* eigen_device() const; Eigen::DefaultDevice* eigen_device() const;
...@@ -60,7 +60,7 @@ class EigenCudaStreamDevice; ...@@ -60,7 +60,7 @@ class EigenCudaStreamDevice;
class CUDADeviceContext : public DeviceContext { class CUDADeviceContext : public DeviceContext {
public: public:
explicit CUDADeviceContext(GPUPlace place, uint64_t rand_seed = 0); explicit CUDADeviceContext(GPUPlace place, uint64_t seed = 0);
virtual ~CUDADeviceContext(); virtual ~CUDADeviceContext();
/*! \brief Wait for all operations completion in the stream. */ /*! \brief Wait for all operations completion in the stream. */
...@@ -93,12 +93,12 @@ class CUDADeviceContext : public DeviceContext { ...@@ -93,12 +93,12 @@ class CUDADeviceContext : public DeviceContext {
std::unique_ptr<EigenCudaStreamDevice> eigen_stream_; std::unique_ptr<EigenCudaStreamDevice> eigen_stream_;
uint64_t rand_seed_; uint64_t rand_seed_;
std::unique_ptr<thrust::minstd_rand> rand_engine_;
// clang-format off // clang-format off
cudaStream_t stream_{nullptr}; cudaStream_t stream_{nullptr};
cudnnHandle_t cudnn_handle_{nullptr}; cudnnHandle_t cudnn_handle_{nullptr};
cublasHandle_t cublas_handle_{nullptr}; cublasHandle_t cublas_handle_{nullptr};
curandGenerator_t curand_generator_{nullptr};
// clang-format on // clang-format on
}; };
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册