提交 2f47f35b 编写于 作者: Q qijun

fix gpu build error

上级 7c274dc0
if(WITH_GPU)
nv_library(math_function SRCS math_function.cc math_function.cu DEPS cblas device_context)
nv_library(math_function SRCS math_function.cc math_function.cu DEPS cblas device_context eigen3)
else()
cc_library(math_function SRCS math_function.cc DEPS cblas device_context)
cc_library(math_function SRCS math_function.cc DEPS cblas device_context eigen3)
endif()
nv_test(math_function_test SRCS math_function_test.cc DEPS math_function tensor)
......@@ -110,12 +110,12 @@ void matmul<platform::CPUPlace, double>(const framework::Tensor& matrix_a,
}
template <>
void Set<typename CPUPlace, typename float>(const int n, const float alpha,
float* output,
platform::DeviceContext* context) {
void Set<platform::CPUPlace, float>(const int n, const float alpha,
float* output,
platform::DeviceContext* context) {
auto* cpu_context = reinterpret_cast<platform::CPUDeviceContext*>(context);
framework::EigenVector::Type<T> out(output, n);
out.device(*(cpu_context->eigen_device())) = t.constant(T(alpha));
framework::EigenVector<float>::Type out(output, n);
out.device(*(cpu_context->eigen_device())) = out.constant(float(alpha));
}
template <>
......
......@@ -127,12 +127,12 @@ void matmul<platform::GPUPlace, double>(const framework::Tensor& matrix_a,
}
template <>
void Set<typename GPUPlace, typename float>(const int n, const float alpha,
float* output,
platform::DeviceContext* context) {
void Set<platform::GPUPlace, float>(const int n, const float alpha,
float* output,
platform::DeviceContext* context) {
auto* cuda_context = reinterpret_cast<platform::CUDADeviceContext*>(context);
framework::EigenVector::Type<T> out(output, n);
out.device(*(cuda_context->eigen_device())) = t.constant(T(alpha));
framework::EigenVector<float>::Type out(output, n);
out.device(*(cuda_context->eigen_device())) = out.constant(float(alpha));
}
template <typename T>
......@@ -159,12 +159,13 @@ void RandUniform<platform::GPUPlace, float>(const int n, const float min,
template <typename T>
int HandleOddLengthRandGaussian(const int n, const T mean, const T std,
T* output, CUDADeviceContext* context) {
T* output,
platform::CUDADeviceContext* context) {
if (n % 2 == 1) {
std::default_random_engine generator;
std::normal_distribution<T> distribution(mean, std);
const T random_value = distribution(generator);
Set<T, platform::GPUPlace>(1, random_value, output + (n - 1), context);
Set<platform::GPUPlace, T>(1, random_value, output + (n - 1), context);
return n - 1;
}
return n;
......
......@@ -52,9 +52,9 @@ int LAPACKE_dgetri(int matrix_layout, int n, double* a, int lda,
#include <cmath>
#include "paddle/framework/eigen.h"
#include "paddle/framework/tensor.h"
#include "paddle/platform/device_context.h"
#include "paddle/platform/eigen.h"
#include "paddle/platform/enforce.h"
namespace paddle {
......@@ -80,10 +80,7 @@ void matmul(const framework::Tensor& matrix_a, bool trans_a,
template <typename Place, typename T>
void Set(const int n, const T alpha, T* output,
platform::DeviceContext* context) {
framework::EigenVector::Type<T> out(output, n);
out.device(*(context->eigen_device())) = t.constant(T(alpha));
}
platform::DeviceContext* context);
template <typename Place, typename T>
void RandUniform(const int n, const T min, const T max, T* output,
......
......@@ -14,9 +14,6 @@
#include "paddle/operators/uniform_random_op.h"
namespace paddle {
namespace operators {
REGISTER_OP_GPU_KERNEL(uniform_random,
paddle::operators::GPUUniformRandomKernel<
paddle::platform::GPUPlace, float>);
REGISTER_OP_GPU_KERNEL(
uniform_random,
paddle::operators::UniformRandomKernel<paddle::platform::GPUPlace, float>);
......@@ -25,9 +25,9 @@ CPUDeviceContext::CPUDeviceContext() {
eigen_device_.reset(new Eigen::DefaultDevice());
}
CPUDeviceContext::CPUDeviceContext(CPUPlace place, int rand_seed) {
CPUDeviceContext::CPUDeviceContext(CPUPlace place, int seed) {
eigen_device_.reset(new Eigen::DefaultDevice());
rand_seed_ = rand_seed;
rand_seed_ = seed;
}
std::minstd_rand& CPUDeviceContext::rand_engine() {
......@@ -105,7 +105,7 @@ Eigen::GpuDevice* DeviceContext::get_eigen_device<Eigen::GpuDevice>() const {
}
CUDADeviceContext::CUDADeviceContext(GPUPlace place, uint64_t seed)
: place_(place), seed_(seed) {
: place_(place), rand_seed_(seed) {
SetDeviceId(place_.device);
PADDLE_ENFORCE(cudaStreamCreate(&stream_));
eigen_stream_.reset(new EigenCudaStreamDevice());
......@@ -162,8 +162,8 @@ curandGenerator_t CUDADeviceContext::curand_generator() {
SetDeviceId(place_.device);
PADDLE_ENFORCE(dynload::curandCreateGenerator(&curand_generator_,
CURAND_RNG_PSEUDO_DEFAULT));
PADDLE_ENFORCE(
dynload::curandSetPseudoRandomGeneratorSeed(curand_generator_, seed_));
PADDLE_ENFORCE(dynload::curandSetPseudoRandomGeneratorSeed(
curand_generator_, rand_seed_));
PADDLE_ENFORCE(dynload::curandSetStream(curand_generator_, stream_));
}
......
......@@ -40,7 +40,7 @@ class DeviceContext {
class CPUDeviceContext : public DeviceContext {
public:
CPUDeviceContext();
explicit CPUDeviceContext(CPUPlace place, int rand_seed = 0);
explicit CPUDeviceContext(CPUPlace place, int seed = 0);
virtual ~CPUDeviceContext() {}
Eigen::DefaultDevice* eigen_device() const;
......@@ -60,7 +60,7 @@ class EigenCudaStreamDevice;
class CUDADeviceContext : public DeviceContext {
public:
explicit CUDADeviceContext(GPUPlace place, uint64_t rand_seed = 0);
explicit CUDADeviceContext(GPUPlace place, uint64_t seed = 0);
virtual ~CUDADeviceContext();
/*! \brief Wait for all operations completion in the stream. */
......@@ -93,12 +93,12 @@ class CUDADeviceContext : public DeviceContext {
std::unique_ptr<EigenCudaStreamDevice> eigen_stream_;
uint64_t rand_seed_;
std::unique_ptr<thrust::minstd_rand> rand_engine_;
// clang-format off
cudaStream_t stream_{nullptr};
cudnnHandle_t cudnn_handle_{nullptr};
cublasHandle_t cublas_handle_{nullptr};
curandGenerator_t curand_generator_{nullptr};
// clang-format on
};
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册