提交 1172f249 编写于 作者: Y Yi Wang 提交者: GitHub

Merge pull request #4590 from wangkuiyi/paddle_only_cpu

Add -D PADDLE_WITH_CUDA in cmake/configure.cmake
...@@ -53,7 +53,8 @@ if(NOT WITH_GPU) ...@@ -53,7 +53,8 @@ if(NOT WITH_GPU)
list(APPEND CMAKE_CXX_SOURCE_FILE_EXTENSIONS cu) list(APPEND CMAKE_CXX_SOURCE_FILE_EXTENSIONS cu)
else() else()
add_definitions(-DPADDLE_WITH_GPU) add_definitions(-DPADDLE_WITH_CUDA)
FIND_PACKAGE(CUDA REQUIRED) FIND_PACKAGE(CUDA REQUIRED)
if(${CUDA_VERSION_MAJOR} VERSION_LESS 7) if(${CUDA_VERSION_MAJOR} VERSION_LESS 7)
......
...@@ -47,7 +47,7 @@ bool isUsingGpu() { return FLAGS_use_gpu; } ...@@ -47,7 +47,7 @@ bool isUsingGpu() { return FLAGS_use_gpu; }
void setUseGpu(bool useGpu) { FLAGS_use_gpu = useGpu; } void setUseGpu(bool useGpu) { FLAGS_use_gpu = useGpu; }
bool isGpuVersion() { bool isGpuVersion() {
#ifndef PADDLE_WITH_GPU #ifndef PADDLE_WITH_CUDA
return false; return false;
#else #else
return true; return true;
......
...@@ -46,7 +46,7 @@ paddle_error paddle_matrix_set_row(paddle_matrix mat, ...@@ -46,7 +46,7 @@ paddle_error paddle_matrix_set_row(paddle_matrix mat,
if (rowID >= ptr->mat->getHeight()) return kPD_OUT_OF_RANGE; if (rowID >= ptr->mat->getHeight()) return kPD_OUT_OF_RANGE;
paddle::real* buf = ptr->mat->getRowBuf(rowID); paddle::real* buf = ptr->mat->getRowBuf(rowID);
size_t width = ptr->mat->getWidth(); size_t width = ptr->mat->getWidth();
#ifdef PADDLE_WITH_GPU #ifdef PADDLE_WITH_CUDA
hl_memcpy(buf, rowArray, sizeof(paddle::real) * width); hl_memcpy(buf, rowArray, sizeof(paddle::real) * width);
#else #else
std::copy(rowArray, rowArray + width, buf); std::copy(rowArray, rowArray + width, buf);
......
...@@ -15,7 +15,7 @@ ...@@ -15,7 +15,7 @@
#pragma once #pragma once
#include <memory> #include <memory>
#ifdef PADDLE_WITH_GPU #ifdef PADDLE_WITH_CUDA
#include <thrust/device_vector.h> #include <thrust/device_vector.h>
#include <thrust/host_vector.h> #include <thrust/host_vector.h>
#include <thrust/system/cuda/experimental/pinned_allocator.h> #include <thrust/system/cuda/experimental/pinned_allocator.h>
...@@ -29,7 +29,7 @@ ...@@ -29,7 +29,7 @@
namespace paddle { namespace paddle {
namespace framework { namespace framework {
#ifndef PADDLE_WITH_GPU #ifndef PADDLE_WITH_CUDA
template <typename T> template <typename T>
using Vector = std::vector<T>; using Vector = std::vector<T>;
#else #else
......
...@@ -211,7 +211,7 @@ class OpKernelRegistrar : public Registrar { ...@@ -211,7 +211,7 @@ class OpKernelRegistrar : public Registrar {
// TODO(fengjiayi): The following macros // TODO(fengjiayi): The following macros
// seems ugly, do we have better method? // seems ugly, do we have better method?
#ifndef PADDLE_WITH_GPU #ifndef PADDLE_WITH_CUDA
#define USE_OP_KERNEL(op_type) USE_OP_DEVICE_KERNEL(op_type, CPU) #define USE_OP_KERNEL(op_type) USE_OP_DEVICE_KERNEL(op_type, CPU)
#else #else
#define USE_OP_KERNEL(op_type) \ #define USE_OP_KERNEL(op_type) \
......
...@@ -25,7 +25,7 @@ Eigen::DefaultDevice& ExecutionContext::GetEigenDevice< ...@@ -25,7 +25,7 @@ Eigen::DefaultDevice& ExecutionContext::GetEigenDevice<
return *device_context_.GetEigenDevice<platform::CPUPlace>(); return *device_context_.GetEigenDevice<platform::CPUPlace>();
} }
#ifdef PADDLE_WITH_GPU #ifdef PADDLE_WITH_CUDA
template <> template <>
Eigen::GpuDevice& Eigen::GpuDevice&
ExecutionContext::GetEigenDevice<platform::GPUPlace, Eigen::GpuDevice>() const { ExecutionContext::GetEigenDevice<platform::GPUPlace, Eigen::GpuDevice>() const {
......
...@@ -65,7 +65,7 @@ inline T* Tensor::mutable_data(platform::Place place) { ...@@ -65,7 +65,7 @@ inline T* Tensor::mutable_data(platform::Place place) {
holder_.reset(new PlaceholderImpl<T, platform::CPUPlace>( holder_.reset(new PlaceholderImpl<T, platform::CPUPlace>(
boost::get<platform::CPUPlace>(place), size)); boost::get<platform::CPUPlace>(place), size));
} else if (platform::is_gpu_place(place)) { } else if (platform::is_gpu_place(place)) {
#ifndef PADDLE_WITH_GPU #ifndef PADDLE_WITH_CUDA
PADDLE_THROW("'GPUPlace' is not supported in CPU only device."); PADDLE_THROW("'GPUPlace' is not supported in CPU only device.");
} }
#else #else
...@@ -103,7 +103,7 @@ inline void Tensor::CopyFrom(const Tensor& src, ...@@ -103,7 +103,7 @@ inline void Tensor::CopyFrom(const Tensor& src,
memory::Copy(boost::get<platform::CPUPlace>(dst_place), dst_ptr, memory::Copy(boost::get<platform::CPUPlace>(dst_place), dst_ptr,
boost::get<platform::CPUPlace>(src_place), src_ptr, size); boost::get<platform::CPUPlace>(src_place), src_ptr, size);
} }
#ifdef PADDLE_WITH_GPU #ifdef PADDLE_WITH_CUDA
else if (platform::is_gpu_place(src_place) && else if (platform::is_gpu_place(src_place) &&
platform::is_cpu_place(dst_place)) { platform::is_cpu_place(dst_place)) {
memory::Copy(boost::get<platform::CPUPlace>(dst_place), dst_ptr, memory::Copy(boost::get<platform::CPUPlace>(dst_place), dst_ptr,
......
...@@ -74,7 +74,7 @@ TEST(Tensor, MutableData) { ...@@ -74,7 +74,7 @@ TEST(Tensor, MutableData) {
EXPECT_EQ(p1, p2); EXPECT_EQ(p1, p2);
} }
#ifdef PADDLE_WITH_GPU #ifdef PADDLE_WITH_CUDA
{ {
Tensor src_tensor; Tensor src_tensor;
float* p1 = nullptr; float* p1 = nullptr;
...@@ -126,7 +126,7 @@ TEST(Tensor, ShareDataWith) { ...@@ -126,7 +126,7 @@ TEST(Tensor, ShareDataWith) {
ASSERT_EQ(src_tensor.data<int>(), dst_tensor.data<int>()); ASSERT_EQ(src_tensor.data<int>(), dst_tensor.data<int>());
} }
#ifdef PADDLE_WITH_GPU #ifdef PADDLE_WITH_CUDA
{ {
Tensor src_tensor; Tensor src_tensor;
Tensor dst_tensor; Tensor dst_tensor;
...@@ -163,7 +163,7 @@ TEST(Tensor, Slice) { ...@@ -163,7 +163,7 @@ TEST(Tensor, Slice) {
EXPECT_EQ(src_data_address + 3 * 4 * 1 * sizeof(int), slice_data_address); EXPECT_EQ(src_data_address + 3 * 4 * 1 * sizeof(int), slice_data_address);
} }
#ifdef PADDLE_WITH_GPU #ifdef PADDLE_WITH_CUDA
{ {
Tensor src_tensor; Tensor src_tensor;
src_tensor.mutable_data<double>(make_ddim({6, 9}), GPUPlace()); src_tensor.mutable_data<double>(make_ddim({6, 9}), GPUPlace());
...@@ -218,7 +218,7 @@ TEST(Tensor, CopyFrom) { ...@@ -218,7 +218,7 @@ TEST(Tensor, CopyFrom) {
EXPECT_EQ(dst_ptr[i], slice_ptr[i]); EXPECT_EQ(dst_ptr[i], slice_ptr[i]);
} }
} }
#ifdef PADDLE_WITH_GPU #ifdef PADDLE_WITH_CUDA
{ {
Tensor src_tensor; Tensor src_tensor;
Tensor gpu_tensor; Tensor gpu_tensor;
......
...@@ -194,7 +194,7 @@ public: ...@@ -194,7 +194,7 @@ public:
REGISTER_TYPED_FUNC(BlockExpand, CPU, BlockExpandForward); REGISTER_TYPED_FUNC(BlockExpand, CPU, BlockExpandForward);
REGISTER_TYPED_FUNC(BlockExpandGrad, CPU, BlockExpandBackward); REGISTER_TYPED_FUNC(BlockExpandGrad, CPU, BlockExpandBackward);
#ifdef PADDLE_WITH_GPU #ifdef PADDLE_WITH_CUDA
REGISTER_TYPED_FUNC(BlockExpand, GPU, BlockExpandForward); REGISTER_TYPED_FUNC(BlockExpand, GPU, BlockExpandForward);
REGISTER_TYPED_FUNC(BlockExpandGrad, GPU, BlockExpandBackward); REGISTER_TYPED_FUNC(BlockExpandGrad, GPU, BlockExpandBackward);
#endif #endif
......
...@@ -395,7 +395,7 @@ REGISTER_TYPED_FUNC(ContextProjectionForward, ...@@ -395,7 +395,7 @@ REGISTER_TYPED_FUNC(ContextProjectionForward,
REGISTER_TYPED_FUNC(ContextProjectionBackward, REGISTER_TYPED_FUNC(ContextProjectionBackward,
CPU, CPU,
ContextProjectionBackwardFunc); ContextProjectionBackwardFunc);
#ifdef PADDLE_WITH_GPU #ifdef PADDLE_WITH_CUDA
REGISTER_TYPED_FUNC(ContextProjectionForward, REGISTER_TYPED_FUNC(ContextProjectionForward,
GPU, GPU,
ContextProjectionForwardFunc); ContextProjectionForwardFunc);
......
...@@ -233,7 +233,7 @@ private: ...@@ -233,7 +233,7 @@ private:
REGISTER_TYPED_FUNC(CosSimForward, CPU, CosSimForwardFunc); REGISTER_TYPED_FUNC(CosSimForward, CPU, CosSimForwardFunc);
REGISTER_TYPED_FUNC(CosSimBackward, CPU, CosSimBackwardFunc); REGISTER_TYPED_FUNC(CosSimBackward, CPU, CosSimBackwardFunc);
#ifdef PADDLE_WITH_GPU #ifdef PADDLE_WITH_CUDA
REGISTER_TYPED_FUNC(CosSimForward, GPU, CosSimForwardFunc); REGISTER_TYPED_FUNC(CosSimForward, GPU, CosSimForwardFunc);
REGISTER_TYPED_FUNC(CosSimBackward, GPU, CosSimBackwardFunc); REGISTER_TYPED_FUNC(CosSimBackward, GPU, CosSimBackwardFunc);
#endif #endif
......
...@@ -169,7 +169,7 @@ private: ...@@ -169,7 +169,7 @@ private:
REGISTER_TYPED_FUNC(Crop, CPU, CropFunc); REGISTER_TYPED_FUNC(Crop, CPU, CropFunc);
REGISTER_TYPED_FUNC(CropGrad, CPU, CropGradFunc); REGISTER_TYPED_FUNC(CropGrad, CPU, CropGradFunc);
#ifdef PADDLE_WITH_GPU #ifdef PADDLE_WITH_CUDA
REGISTER_TYPED_FUNC(Crop, GPU, CropFunc); REGISTER_TYPED_FUNC(Crop, GPU, CropFunc);
REGISTER_TYPED_FUNC(CropGrad, GPU, CropGradFunc); REGISTER_TYPED_FUNC(CropGrad, GPU, CropGradFunc);
#endif #endif
......
...@@ -336,7 +336,7 @@ private: ...@@ -336,7 +336,7 @@ private:
REGISTER_TYPED_FUNC(CrossMapNormal, CPU, CrossMapNormalFunc); REGISTER_TYPED_FUNC(CrossMapNormal, CPU, CrossMapNormalFunc);
REGISTER_TYPED_FUNC(CrossMapNormalGrad, CPU, CrossMapNormalGradFunc); REGISTER_TYPED_FUNC(CrossMapNormalGrad, CPU, CrossMapNormalGradFunc);
#ifdef PADDLE_WITH_GPU #ifdef PADDLE_WITH_CUDA
REGISTER_TYPED_FUNC(CrossMapNormal, GPU, CrossMapNormalFunc); REGISTER_TYPED_FUNC(CrossMapNormal, GPU, CrossMapNormalFunc);
REGISTER_TYPED_FUNC(CrossMapNormalGrad, GPU, CrossMapNormalGradFunc); REGISTER_TYPED_FUNC(CrossMapNormalGrad, GPU, CrossMapNormalGradFunc);
#endif #endif
......
...@@ -292,7 +292,7 @@ REGISTER_TYPED_FUNC(DepthwiseConvGradInput, ...@@ -292,7 +292,7 @@ REGISTER_TYPED_FUNC(DepthwiseConvGradInput,
REGISTER_TYPED_FUNC(DepthwiseConvGradFilter, REGISTER_TYPED_FUNC(DepthwiseConvGradFilter,
CPU, CPU,
DepthwiseConvGradFilterFunction); DepthwiseConvGradFilterFunction);
#ifdef PADDLE_WITH_GPU #ifdef PADDLE_WITH_CUDA
REGISTER_TYPED_FUNC(DepthwiseConv, GPU, DepthwiseConvFunction); REGISTER_TYPED_FUNC(DepthwiseConv, GPU, DepthwiseConvFunction);
REGISTER_TYPED_FUNC(DepthwiseConvGradInput, REGISTER_TYPED_FUNC(DepthwiseConvGradInput,
GPU, GPU,
......
...@@ -17,7 +17,7 @@ limitations under the License. */ ...@@ -17,7 +17,7 @@ limitations under the License. */
namespace paddle { namespace paddle {
#ifdef PADDLE_WITH_GPU #ifdef PADDLE_WITH_CUDA
TEST(DepthwiseConv, Forward) { TEST(DepthwiseConv, Forward) {
DepthwiseConvolution<DEVICE_TYPE_CPU, DEVICE_TYPE_GPU>( DepthwiseConvolution<DEVICE_TYPE_CPU, DEVICE_TYPE_GPU>(
"GemmConv-CPU", "DepthwiseConv-GPU", forward); "GemmConv-CPU", "DepthwiseConv-GPU", forward);
......
...@@ -340,7 +340,7 @@ public: ...@@ -340,7 +340,7 @@ public:
REGISTER_TYPED_FUNC(GemmConv, CPU, GemmConvFunction); REGISTER_TYPED_FUNC(GemmConv, CPU, GemmConvFunction);
REGISTER_TYPED_FUNC(GemmConvGradInput, CPU, GemmConvGradInputFunction); REGISTER_TYPED_FUNC(GemmConvGradInput, CPU, GemmConvGradInputFunction);
REGISTER_TYPED_FUNC(GemmConvGradFilter, CPU, GemmConvGradFilterFunction); REGISTER_TYPED_FUNC(GemmConvGradFilter, CPU, GemmConvGradFilterFunction);
#ifdef PADDLE_WITH_GPU #ifdef PADDLE_WITH_CUDA
REGISTER_TYPED_FUNC(GemmConv, GPU, GemmConvFunction); REGISTER_TYPED_FUNC(GemmConv, GPU, GemmConvFunction);
REGISTER_TYPED_FUNC(GemmConvGradInput, GPU, GemmConvGradInputFunction); REGISTER_TYPED_FUNC(GemmConvGradInput, GPU, GemmConvGradInputFunction);
REGISTER_TYPED_FUNC(GemmConvGradFilter, GPU, GemmConvGradFilterFunction); REGISTER_TYPED_FUNC(GemmConvGradFilter, GPU, GemmConvGradFilterFunction);
......
...@@ -24,7 +24,7 @@ TEST(GemmConv, NaiveConv) { ...@@ -24,7 +24,7 @@ TEST(GemmConv, NaiveConv) {
"NaiveConv-CPU", "GemmConv-CPU", forward); "NaiveConv-CPU", "GemmConv-CPU", forward);
} }
#ifdef PADDLE_WITH_GPU #ifdef PADDLE_WITH_CUDA
TEST(GemmConv, Forward) { TEST(GemmConv, Forward) {
Convolution<DEVICE_TYPE_CPU, DEVICE_TYPE_GPU>( Convolution<DEVICE_TYPE_CPU, DEVICE_TYPE_GPU>(
"GemmConv-CPU", "GemmConv-GPU", forward); "GemmConv-CPU", "GemmConv-GPU", forward);
......
...@@ -116,7 +116,7 @@ void TestIm2ColFunctor() { ...@@ -116,7 +116,7 @@ void TestIm2ColFunctor() {
TEST(Im2ColFunctor, CPU) { TestIm2ColFunctor<DEVICE_TYPE_CPU, float>(); } TEST(Im2ColFunctor, CPU) { TestIm2ColFunctor<DEVICE_TYPE_CPU, float>(); }
#ifdef PADDLE_WITH_GPU #ifdef PADDLE_WITH_CUDA
TEST(Im2ColFunctor, GPU) { TestIm2ColFunctor<DEVICE_TYPE_GPU, float>(); } TEST(Im2ColFunctor, GPU) { TestIm2ColFunctor<DEVICE_TYPE_GPU, float>(); }
......
...@@ -341,7 +341,7 @@ private: ...@@ -341,7 +341,7 @@ private:
}; };
REGISTER_TYPED_FUNC(MulOp, CPU, MulFunc); REGISTER_TYPED_FUNC(MulOp, CPU, MulFunc);
#ifdef PADDLE_WITH_GPU #ifdef PADDLE_WITH_CUDA
REGISTER_TYPED_FUNC(MulOp, GPU, MulFunc); REGISTER_TYPED_FUNC(MulOp, GPU, MulFunc);
#endif #endif
} // namespace paddle } // namespace paddle
...@@ -207,7 +207,7 @@ private: ...@@ -207,7 +207,7 @@ private:
REGISTER_TYPED_FUNC(Pad, CPU, PadFunc); REGISTER_TYPED_FUNC(Pad, CPU, PadFunc);
REGISTER_TYPED_FUNC(PadGrad, CPU, PadGradFunc); REGISTER_TYPED_FUNC(PadGrad, CPU, PadGradFunc);
#ifdef PADDLE_WITH_GPU #ifdef PADDLE_WITH_CUDA
REGISTER_TYPED_FUNC(Pad, GPU, PadFunc); REGISTER_TYPED_FUNC(Pad, GPU, PadFunc);
REGISTER_TYPED_FUNC(PadGrad, GPU, PadGradFunc); REGISTER_TYPED_FUNC(PadGrad, GPU, PadGradFunc);
#endif #endif
......
...@@ -217,7 +217,7 @@ public: ...@@ -217,7 +217,7 @@ public:
REGISTER_TYPED_FUNC(RowConv, CPU, RowConvFunc); REGISTER_TYPED_FUNC(RowConv, CPU, RowConvFunc);
REGISTER_TYPED_FUNC(RowConvGrad, CPU, RowConvGradFunc); REGISTER_TYPED_FUNC(RowConvGrad, CPU, RowConvGradFunc);
#ifdef PADDLE_WITH_GPU #ifdef PADDLE_WITH_CUDA
REGISTER_TYPED_FUNC(RowConv, GPU, RowConvFunc); REGISTER_TYPED_FUNC(RowConv, GPU, RowConvFunc);
REGISTER_TYPED_FUNC(RowConvGrad, GPU, RowConvGradFunc); REGISTER_TYPED_FUNC(RowConvGrad, GPU, RowConvGradFunc);
#endif #endif
......
...@@ -132,7 +132,7 @@ public: ...@@ -132,7 +132,7 @@ public:
REGISTER_TYPED_FUNC(NCHW2NHWC, CPU, NCHW2NHWCFunc); REGISTER_TYPED_FUNC(NCHW2NHWC, CPU, NCHW2NHWCFunc);
REGISTER_TYPED_FUNC(NHWC2NCHW, CPU, NHWC2NCHWFunc); REGISTER_TYPED_FUNC(NHWC2NCHW, CPU, NHWC2NCHWFunc);
#ifdef PADDLE_WITH_GPU #ifdef PADDLE_WITH_CUDA
REGISTER_TYPED_FUNC(NCHW2NHWC, GPU, NCHW2NHWCFunc); REGISTER_TYPED_FUNC(NCHW2NHWC, GPU, NCHW2NHWCFunc);
REGISTER_TYPED_FUNC(NHWC2NCHW, GPU, NHWC2NCHWFunc); REGISTER_TYPED_FUNC(NHWC2NCHW, GPU, NHWC2NCHWFunc);
#endif #endif
......
...@@ -16,7 +16,7 @@ limitations under the License. */ ...@@ -16,7 +16,7 @@ limitations under the License. */
#include "BatchNormalizationLayer.h" #include "BatchNormalizationLayer.h"
#include "Layer.h" #include "Layer.h"
#include "paddle/utils/Stat.h" #include "paddle/utils/Stat.h"
#ifdef PADDLE_WITH_GPU #ifdef PADDLE_WITH_CUDA
#include "CudnnBatchNormLayer.h" #include "CudnnBatchNormLayer.h"
#endif #endif
......
...@@ -13,7 +13,7 @@ See the License for the specific language governing permissions and ...@@ -13,7 +13,7 @@ See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#include "paddle/utils/Stat.h" #include "paddle/utils/Stat.h"
#ifdef PADDLE_WITH_GPU #ifdef PADDLE_WITH_CUDA
#include "hl_batch_transpose.h" #include "hl_batch_transpose.h"
#endif #endif
#include "BatchNormalizationLayer.h" #include "BatchNormalizationLayer.h"
...@@ -90,7 +90,7 @@ void BatchNormalizationLayer::expandMat(const MatrixPtr& in, MatrixPtr& out) { ...@@ -90,7 +90,7 @@ void BatchNormalizationLayer::expandMat(const MatrixPtr& in, MatrixPtr& out) {
size_t batchSize = in->getHeight(); size_t batchSize = in->getHeight();
CHECK_EQ(out->getHeight(), batchSize * imgPixels_); CHECK_EQ(out->getHeight(), batchSize * imgPixels_);
if (useGpu_) { if (useGpu_) {
#ifndef PADDLE_WITH_GPU #ifndef PADDLE_WITH_CUDA
LOG(FATAL) << "paddle is compiled only for cpu"; LOG(FATAL) << "paddle is compiled only for cpu";
#else #else
batchTranspose( batchTranspose(
...@@ -127,7 +127,7 @@ void BatchNormalizationLayer::shrinkMat(const MatrixPtr& in, MatrixPtr& out) { ...@@ -127,7 +127,7 @@ void BatchNormalizationLayer::shrinkMat(const MatrixPtr& in, MatrixPtr& out) {
} }
CHECK_EQ(in->getHeight(), static_cast<size_t>(batchSize * imgPixels_)); CHECK_EQ(in->getHeight(), static_cast<size_t>(batchSize * imgPixels_));
if (useGpu_) { if (useGpu_) {
#ifndef PADDLE_WITH_GPU #ifndef PADDLE_WITH_CUDA
LOG(FATAL) << "paddle is compiled only for cpu"; LOG(FATAL) << "paddle is compiled only for cpu";
#else #else
batchTranspose( batchTranspose(
......
...@@ -15,7 +15,7 @@ limitations under the License. */ ...@@ -15,7 +15,7 @@ limitations under the License. */
#include "PoolLayer.h" #include "PoolLayer.h"
#include "PoolProjectionLayer.h" #include "PoolProjectionLayer.h"
#include "paddle/utils/Logging.h" #include "paddle/utils/Logging.h"
#ifdef PADDLE_WITH_GPU #ifdef PADDLE_WITH_CUDA
#include "CudnnPoolLayer.h" #include "CudnnPoolLayer.h"
#endif #endif
namespace paddle { namespace paddle {
...@@ -53,7 +53,7 @@ Layer* PoolLayer::create(const LayerConfig& config) { ...@@ -53,7 +53,7 @@ Layer* PoolLayer::create(const LayerConfig& config) {
const std::string& pool = config.inputs(0).pool_conf().pool_type(); const std::string& pool = config.inputs(0).pool_conf().pool_type();
if (pool == "max-projection" || pool == "avg-projection") { if (pool == "max-projection" || pool == "avg-projection") {
return new PoolProjectionLayer(config); return new PoolProjectionLayer(config);
#ifdef PADDLE_WITH_GPU #ifdef PADDLE_WITH_CUDA
} else if (CudnnPoolLayer::typeCheck(pool)) { } else if (CudnnPoolLayer::typeCheck(pool)) {
return new CudnnPoolLayer(config); return new CudnnPoolLayer(config);
#endif #endif
......
...@@ -674,7 +674,7 @@ void testLayerGradKernel(TestConfig testConf, ...@@ -674,7 +674,7 @@ void testLayerGradKernel(TestConfig testConf,
bool useGpu, bool useGpu,
bool useWeight, bool useWeight,
float epsilon) { float epsilon) {
#ifndef PADDLE_WITH_GPU #ifndef PADDLE_WITH_CUDA
if (useGpu) return; if (useGpu) return;
#endif #endif
FLAGS_use_gpu = useGpu; FLAGS_use_gpu = useGpu;
......
...@@ -119,7 +119,7 @@ TEST(Layer, batchNorm) { ...@@ -119,7 +119,7 @@ TEST(Layer, batchNorm) {
CHECK_EQ(static_cast<int>(convLayer->getOutputValue()->getWidth()), 576); CHECK_EQ(static_cast<int>(convLayer->getOutputValue()->getWidth()), 576);
} }
#ifdef PADDLE_WITH_GPU #ifdef PADDLE_WITH_CUDA
void batchNormInference(int n, int c, int h, int w) { void batchNormInference(int n, int c, int h, int w) {
MatrixPtr input = std::make_shared<GpuMatrix>(n, c * h * w); MatrixPtr input = std::make_shared<GpuMatrix>(n, c * h * w);
MatrixPtr cudnnOut = std::make_shared<GpuMatrix>(n, c * h * w); MatrixPtr cudnnOut = std::make_shared<GpuMatrix>(n, c * h * w);
......
...@@ -117,7 +117,7 @@ MatrixPtr doOneConvTest(size_t imgSize, ...@@ -117,7 +117,7 @@ MatrixPtr doOneConvTest(size_t imgSize,
} }
TEST(Layer, convParaUnified) { TEST(Layer, convParaUnified) {
#ifdef PADDLE_WITH_GPU #ifdef PADDLE_WITH_CUDA
MatrixPtr input, resultCpu, resultGpu; MatrixPtr input, resultCpu, resultGpu;
/// TEST1 for conv /// /// TEST1 for conv ///
......
...@@ -150,7 +150,7 @@ TEST(Layer, detectionOutputLayerFwd) { ...@@ -150,7 +150,7 @@ TEST(Layer, detectionOutputLayerFwd) {
useGpu, useGpu,
result2); result2);
#ifdef PADDLE_WITH_GPU #ifdef PADDLE_WITH_CUDA
// GPU case 1. // GPU case 1.
useGpu = true; useGpu = true;
inputLoc = Matrix::create(1, 16, false, useGpu); inputLoc = Matrix::create(1, 16, false, useGpu);
......
...@@ -51,7 +51,7 @@ void testEvaluator(TestConfig testConf, ...@@ -51,7 +51,7 @@ void testEvaluator(TestConfig testConf,
string testEvaluatorName, string testEvaluatorName,
size_t batchSize, size_t batchSize,
bool useGpu) { bool useGpu) {
#ifndef PADDLE_WITH_GPU #ifndef PADDLE_WITH_CUDA
if (useGpu) return; if (useGpu) return;
#endif #endif
FLAGS_use_gpu = useGpu; FLAGS_use_gpu = useGpu;
......
...@@ -97,7 +97,7 @@ TEST(Layer, kmaxSeqScoreLayer) { ...@@ -97,7 +97,7 @@ TEST(Layer, kmaxSeqScoreLayer) {
Matrix::create(subSeqStartPosition.back(), 1, false, false); Matrix::create(subSeqStartPosition.back(), 1, false, false);
std::vector<bool> mode = {false}; std::vector<bool> mode = {false};
#ifdef PADDLE_WITH_GPU #ifdef PADDLE_WITH_CUDA
mode.push_back(true); mode.push_back(true);
#endif #endif
......
...@@ -12,7 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ...@@ -12,7 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#ifdef PADDLE_WITH_GPU #ifdef PADDLE_WITH_CUDA
#include <cudnn.h> #include <cudnn.h>
#endif #endif
#include <gtest/gtest.h> #include <gtest/gtest.h>
...@@ -258,7 +258,7 @@ void testProjectionConv(size_t groups, bool isDeconv) { ...@@ -258,7 +258,7 @@ void testProjectionConv(size_t groups, bool isDeconv) {
true); true);
} }
#ifdef PADDLE_WITH_GPU #ifdef PADDLE_WITH_CUDA
TEST(Projection, conv) { TEST(Projection, conv) {
/// test ConvProjection /// test ConvProjection
testProjectionConv(1, false); testProjectionConv(1, false);
...@@ -422,7 +422,7 @@ TEST(Layer, depthwiseConvLayer) { ...@@ -422,7 +422,7 @@ TEST(Layer, depthwiseConvLayer) {
// 'depthwise_conv' is a sepecial case of 'exconv' whose // 'depthwise_conv' is a sepecial case of 'exconv' whose
// groups size equals to the input channels size. // groups size equals to the input channels size.
testDepthwiseConvLayer("exconv", /* useGpu= */ false); testDepthwiseConvLayer("exconv", /* useGpu= */ false);
#ifdef PADDLE_WITH_GPU #ifdef PADDLE_WITH_CUDA
testDepthwiseConvLayer("exconv", /* useGpu= */ true); testDepthwiseConvLayer("exconv", /* useGpu= */ true);
#endif #endif
} }
...@@ -480,7 +480,7 @@ void testConvLayer(const string& type, bool trans, bool useGpu) { ...@@ -480,7 +480,7 @@ void testConvLayer(const string& type, bool trans, bool useGpu) {
TEST(Layer, convLayer) { TEST(Layer, convLayer) {
testConvLayer("exconv", /* trans= */ false, /* useGpu= */ false); testConvLayer("exconv", /* trans= */ false, /* useGpu= */ false);
#ifdef PADDLE_WITH_GPU #ifdef PADDLE_WITH_CUDA
testConvLayer("exconv", /* trans= */ false, /* useGpu= */ true); testConvLayer("exconv", /* trans= */ false, /* useGpu= */ true);
testConvLayer("cudnn_conv", /* trans= */ false, /* useGpu= */ true); testConvLayer("cudnn_conv", /* trans= */ false, /* useGpu= */ true);
#endif #endif
...@@ -525,7 +525,7 @@ TEST(Layer, convTransLayer) { ...@@ -525,7 +525,7 @@ TEST(Layer, convTransLayer) {
for (auto useGpu : {false, true}) { for (auto useGpu : {false, true}) {
testConvTransLayer("exconvt", /* trans= */ false, /* useGpu= */ useGpu); testConvTransLayer("exconvt", /* trans= */ false, /* useGpu= */ useGpu);
} }
#ifdef PADDLE_WITH_GPU #ifdef PADDLE_WITH_CUDA
testConvTransLayer("cudnn_convt", /* trans= */ false, /* useGpu= */ true); testConvTransLayer("cudnn_convt", /* trans= */ false, /* useGpu= */ true);
#endif #endif
} }
...@@ -638,7 +638,7 @@ TEST(Layer, SelectiveFullyConnectedLayer) { ...@@ -638,7 +638,7 @@ TEST(Layer, SelectiveFullyConnectedLayer) {
/* trans= */ false, /* trans= */ false,
/* useGup= */ false, /* useGup= */ false,
false); false);
#ifdef PADDLE_WITH_GPU #ifdef PADDLE_WITH_CUDA
testLayerGrad(config, testLayerGrad(config,
"selective_fc", "selective_fc",
100, 100,
...@@ -1210,7 +1210,7 @@ void testPoolLayer(const string& poolType, bool trans, bool useGpu) { ...@@ -1210,7 +1210,7 @@ void testPoolLayer(const string& poolType, bool trans, bool useGpu) {
testLayerGrad(config, "pool", 100, trans, useGpu); testLayerGrad(config, "pool", 100, trans, useGpu);
} }
#ifdef PADDLE_WITH_GPU #ifdef PADDLE_WITH_CUDA
void testPoolLayer2(const string& poolType, bool trans, bool useGpu) { void testPoolLayer2(const string& poolType, bool trans, bool useGpu) {
TestConfig config; TestConfig config;
config.inputDefs.push_back({INPUT_DATA, "layer_0", 3200, 0}); config.inputDefs.push_back({INPUT_DATA, "layer_0", 3200, 0});
...@@ -1236,7 +1236,7 @@ TEST(Layer, PoolLayer) { ...@@ -1236,7 +1236,7 @@ TEST(Layer, PoolLayer) {
testPoolLayer("avg-projection", /* trans= */ false, /* useGpu= */ false); testPoolLayer("avg-projection", /* trans= */ false, /* useGpu= */ false);
testPoolLayer("max-projection", /* trans= */ false, /* useGpu= */ false); testPoolLayer("max-projection", /* trans= */ false, /* useGpu= */ false);
#ifdef PADDLE_WITH_GPU #ifdef PADDLE_WITH_CUDA
testPoolLayer("avg-projection", /* trans= */ false, /* useGpu= */ true); testPoolLayer("avg-projection", /* trans= */ false, /* useGpu= */ true);
testPoolLayer("max-projection", /* trans= */ false, /* useGpu= */ true); testPoolLayer("max-projection", /* trans= */ false, /* useGpu= */ true);
testPoolLayer("cudnn-max-pool", /* trans= */ false, /* useGpu= */ true); testPoolLayer("cudnn-max-pool", /* trans= */ false, /* useGpu= */ true);
...@@ -1309,7 +1309,7 @@ void testPool3DLayer(const string& poolType, bool trans, bool useGpu) { ...@@ -1309,7 +1309,7 @@ void testPool3DLayer(const string& poolType, bool trans, bool useGpu) {
TEST(Layer, Pool3DLayer) { TEST(Layer, Pool3DLayer) {
testPool3DLayer("avg", /* trans= */ false, /* useGpu= */ false); testPool3DLayer("avg", /* trans= */ false, /* useGpu= */ false);
testPool3DLayer("max", /* trans= */ false, /* useGpu= */ false); testPool3DLayer("max", /* trans= */ false, /* useGpu= */ false);
#ifdef PADDLE_WITH_GPU #ifdef PADDLE_WITH_CUDA
testPool3DLayer("avg", /* trans= */ false, /* useGpu= */ true); testPool3DLayer("avg", /* trans= */ false, /* useGpu= */ true);
testPool3DLayer("max", /* trans= */ false, /* useGpu= */ true); testPool3DLayer("max", /* trans= */ false, /* useGpu= */ true);
#endif #endif
...@@ -1695,7 +1695,7 @@ void testBatchNormLayer(const string& type, bool trans, bool useGpu) { ...@@ -1695,7 +1695,7 @@ void testBatchNormLayer(const string& type, bool trans, bool useGpu) {
TEST(Layer, BatchNormalizationLayer) { TEST(Layer, BatchNormalizationLayer) {
testBatchNormLayer("batch_norm", false, false); testBatchNormLayer("batch_norm", false, false);
#ifdef PADDLE_WITH_GPU #ifdef PADDLE_WITH_CUDA
testBatchNormLayer("batch_norm", false, true); testBatchNormLayer("batch_norm", false, true);
if (hl_get_cudnn_lib_version() >= int(4000)) { if (hl_get_cudnn_lib_version() >= int(4000)) {
testBatchNormLayer("cudnn_batch_norm", false, true); testBatchNormLayer("cudnn_batch_norm", false, true);
...@@ -1744,7 +1744,7 @@ void testBatchNorm3DLayer(const string& type, bool trans, bool useGpu) { ...@@ -1744,7 +1744,7 @@ void testBatchNorm3DLayer(const string& type, bool trans, bool useGpu) {
TEST(Layer, testBatchNorm3DLayer) { TEST(Layer, testBatchNorm3DLayer) {
testBatchNorm3DLayer("batch_norm", false, false); testBatchNorm3DLayer("batch_norm", false, false);
#ifdef PADDLE_WITH_GPU #ifdef PADDLE_WITH_CUDA
testBatchNorm3DLayer("batch_norm", false, true); testBatchNorm3DLayer("batch_norm", false, true);
if (hl_get_cudnn_lib_version() >= int(4000)) { if (hl_get_cudnn_lib_version() >= int(4000)) {
testBatchNorm3DLayer("cudnn_batch_norm", false, true); testBatchNorm3DLayer("cudnn_batch_norm", false, true);
...@@ -2262,7 +2262,7 @@ void test3DConvLayer(const string& type, bool trans, bool useGpu) { ...@@ -2262,7 +2262,7 @@ void test3DConvLayer(const string& type, bool trans, bool useGpu) {
TEST(Layer, test3DConvLayer) { TEST(Layer, test3DConvLayer) {
test3DConvLayer("conv3d", /* trans= */ false, /* useGpu= */ false); test3DConvLayer("conv3d", /* trans= */ false, /* useGpu= */ false);
#ifdef PADDLE_WITH_GPU #ifdef PADDLE_WITH_CUDA
test3DConvLayer("conv3d", /* trans= */ false, /* useGpu= */ true); test3DConvLayer("conv3d", /* trans= */ false, /* useGpu= */ true);
#endif #endif
} }
...@@ -2339,7 +2339,7 @@ void test3DDeConvLayer(const string& type, bool trans, bool useGpu) { ...@@ -2339,7 +2339,7 @@ void test3DDeConvLayer(const string& type, bool trans, bool useGpu) {
TEST(Layer, test3DDeConvLayer) { TEST(Layer, test3DDeConvLayer) {
test3DDeConvLayer("deconv3d", /* trans= */ false, /* useGpu= */ false); test3DDeConvLayer("deconv3d", /* trans= */ false, /* useGpu= */ false);
#ifdef PADDLE_WITH_GPU #ifdef PADDLE_WITH_CUDA
test3DDeConvLayer("deconv3d", /* trans= */ false, /* useGpu= */ true); test3DDeConvLayer("deconv3d", /* trans= */ false, /* useGpu= */ true);
#endif #endif
} }
......
...@@ -243,7 +243,7 @@ TEST(Compare, concat_slice) { ...@@ -243,7 +243,7 @@ TEST(Compare, concat_slice) {
compareNetwork(config_file_a, config_file_b); compareNetwork(config_file_a, config_file_b);
} }
#ifdef PADDLE_WITH_GPU #ifdef PADDLE_WITH_CUDA
TEST(Compare, img_pool) { TEST(Compare, img_pool) {
std::string config_file_a = "./gserver/tests/img_pool_a.conf"; std::string config_file_a = "./gserver/tests/img_pool_a.conf";
std::string config_file_b = "./gserver/tests/img_pool_b.conf"; std::string config_file_b = "./gserver/tests/img_pool_b.conf";
......
...@@ -151,7 +151,7 @@ TEST(Layer, priorBoxLayerFwd) { ...@@ -151,7 +151,7 @@ TEST(Layer, priorBoxLayerFwd) {
useGpu, useGpu,
result); result);
#ifdef PADDLE_WITH_GPU #ifdef PADDLE_WITH_CUDA
// reset the input parameters // reset the input parameters
variance[1] = 0.1; variance[1] = 0.1;
variance[3] = 0.2; variance[3] = 0.2;
......
...@@ -485,7 +485,7 @@ TEST(ProtoDataProvider, test) { ...@@ -485,7 +485,7 @@ TEST(ProtoDataProvider, test) {
// Currently in async mode, useGpu is not supported // Currently in async mode, useGpu is not supported
continue; continue;
} }
#ifndef PADDLE_WITH_GPU #ifndef PADDLE_WITH_CUDA
if (useGpu) { if (useGpu) {
continue; continue;
} }
...@@ -525,7 +525,7 @@ TEST(ProtoDataProvider, constant_slots) { ...@@ -525,7 +525,7 @@ TEST(ProtoDataProvider, constant_slots) {
for (int numConstantSlots : {1, 2}) { for (int numConstantSlots : {1, 2}) {
for (int useGpu : numTwoArray) { for (int useGpu : numTwoArray) {
for (int dataCompression : numTwoArray) { for (int dataCompression : numTwoArray) {
#ifndef PADDLE_WITH_GPU #ifndef PADDLE_WITH_CUDA
if (useGpu) { if (useGpu) {
continue; continue;
} }
...@@ -708,7 +708,7 @@ TEST(ProtoSequenceDataProvider, test) { ...@@ -708,7 +708,7 @@ TEST(ProtoSequenceDataProvider, test) {
// Currently in async mode, useGpu is not supported // Currently in async mode, useGpu is not supported
continue; continue;
} }
#ifndef PADDLE_WITH_GPU #ifndef PADDLE_WITH_CUDA
if (useGpu) { if (useGpu) {
continue; continue;
} }
......
...@@ -37,7 +37,7 @@ TEST(PyDataProvider, py_fill_slots) { ...@@ -37,7 +37,7 @@ TEST(PyDataProvider, py_fill_slots) {
config.clear_files(); config.clear_files();
std::string dataFile = "gserver/tests/pyDataProvider/pyDataProviderList"; std::string dataFile = "gserver/tests/pyDataProvider/pyDataProviderList";
config.set_files(dataFile); config.set_files(dataFile);
#ifndef PADDLE_WITH_GPU #ifndef PADDLE_WITH_CUDA
bool useGpu = false; bool useGpu = false;
#else #else
bool useGpu = true; bool useGpu = true;
...@@ -71,7 +71,7 @@ TEST(PyDataProvider, py_fill_nest_slots) { ...@@ -71,7 +71,7 @@ TEST(PyDataProvider, py_fill_nest_slots) {
std::string dataFile = "gserver/tests/pyDataProvider/pyDataProviderList"; std::string dataFile = "gserver/tests/pyDataProvider/pyDataProviderList";
config.set_files(dataFile); config.set_files(dataFile);
EXPECT_EQ(config.IsInitialized(), true); EXPECT_EQ(config.IsInitialized(), true);
#ifndef PADDLE_WITH_GPU #ifndef PADDLE_WITH_CUDA
bool useGpu = false; bool useGpu = false;
#else #else
bool useGpu = true; bool useGpu = true;
......
...@@ -321,7 +321,7 @@ TEST(Layer, SelectiveFcLayer_train_dense_mul) { ...@@ -321,7 +321,7 @@ TEST(Layer, SelectiveFcLayer_train_dense_mul) {
"filelist=gserver/tests/SelectiveFcTest/dense_mul_list"; "filelist=gserver/tests/SelectiveFcTest/dense_mul_list";
for (auto useGpu : {false, true}) { for (auto useGpu : {false, true}) {
#ifndef PADDLE_WITH_GPU #ifndef PADDLE_WITH_CUDA
if (useGpu) { if (useGpu) {
break; break;
} }
...@@ -388,7 +388,7 @@ void testSelectiveFcLayerTrainSparseMul(const LayerConfig& config, ...@@ -388,7 +388,7 @@ void testSelectiveFcLayerTrainSparseMul(const LayerConfig& config,
outMatSelfc->getWidth(), outMatSelfc->getWidth(),
outMatSelfc->getElementCnt())); outMatSelfc->getElementCnt()));
cpuOutMatSelfc->copyFrom(*outMatSelfc, HPPL_STREAM_DEFAULT); cpuOutMatSelfc->copyFrom(*outMatSelfc, HPPL_STREAM_DEFAULT);
#ifdef PADDLE_WITH_GPU #ifdef PADDLE_WITH_CUDA
if (useGpu) { if (useGpu) {
hl_stream_synchronize(HPPL_STREAM_DEFAULT); hl_stream_synchronize(HPPL_STREAM_DEFAULT);
} }
...@@ -418,7 +418,7 @@ void testSelectiveFcLayerTrainSparseMul(const LayerConfig& config, ...@@ -418,7 +418,7 @@ void testSelectiveFcLayerTrainSparseMul(const LayerConfig& config,
MatrixPtr cpuOutMatFc( MatrixPtr cpuOutMatFc(
new CpuMatrix(outMatFc->getHeight(), outMatFc->getWidth())); new CpuMatrix(outMatFc->getHeight(), outMatFc->getWidth()));
cpuOutMatFc->copyFrom(*outMatFc, HPPL_STREAM_DEFAULT); cpuOutMatFc->copyFrom(*outMatFc, HPPL_STREAM_DEFAULT);
#ifdef PADDLE_WITH_GPU #ifdef PADDLE_WITH_CUDA
if (useGpu) { if (useGpu) {
hl_stream_synchronize(HPPL_STREAM_DEFAULT); hl_stream_synchronize(HPPL_STREAM_DEFAULT);
} }
...@@ -443,7 +443,7 @@ TEST(Layer, SelectiveFcLayer_train_sparse_mul) { ...@@ -443,7 +443,7 @@ TEST(Layer, SelectiveFcLayer_train_sparse_mul) {
selLayerConfig.set_size(fcLayerWidth); selLayerConfig.set_size(fcLayerWidth);
testSelectiveFcLayerTrainSparseMul(selLayerConfig, false); testSelectiveFcLayerTrainSparseMul(selLayerConfig, false);
#ifdef PADDLE_WITH_GPU #ifdef PADDLE_WITH_CUDA
testSelectiveFcLayerTrainSparseMul(selLayerConfig, true); testSelectiveFcLayerTrainSparseMul(selLayerConfig, true);
#endif #endif
} }
......
...@@ -195,7 +195,7 @@ TEST(Layer, SeqSliceLayer) { ...@@ -195,7 +195,7 @@ TEST(Layer, SeqSliceLayer) {
vector<vector<real>> ends; vector<vector<real>> ends;
std::vector<bool> mode = {false}; std::vector<bool> mode = {false};
#ifdef PADDLE_WITH_GPU #ifdef PADDLE_WITH_CUDA
mode.push_back(true); mode.push_back(true);
#endif #endif
genSeqInfo(seqStartPos, subSeqStartPos); genSeqInfo(seqStartPos, subSeqStartPos);
......
...@@ -199,7 +199,7 @@ TEST(Layer, WarpCTCLayer) { ...@@ -199,7 +199,7 @@ TEST(Layer, WarpCTCLayer) {
for (auto batchSize : {1, 10, 32}) { for (auto batchSize : {1, 10, 32}) {
for (auto normByTimes : {false, true}) { for (auto normByTimes : {false, true}) {
for (auto useGpu : {false, true}) { for (auto useGpu : {false, true}) {
#ifndef PADDLE_WITH_GPU #ifndef PADDLE_WITH_CUDA
if (useGpu) continue; if (useGpu) continue;
#endif #endif
LOG(INFO) << "layerSize=" << layerSize << " batchSize=" << batchSize LOG(INFO) << "layerSize=" << layerSize << " batchSize=" << batchSize
......
...@@ -670,7 +670,7 @@ void GpuMatrix::leftMul(Matrix& a, real scaleAB, real scaleT) { ...@@ -670,7 +670,7 @@ void GpuMatrix::leftMul(Matrix& a, real scaleAB, real scaleT) {
} }
void GpuMatrix::selectRows(Matrix& table, IVector& ids) { void GpuMatrix::selectRows(Matrix& table, IVector& ids) {
#ifdef PADDLE_WITH_GPU #ifdef PADDLE_WITH_CUDA
CHECK(dynamic_cast<GpuMatrix*>(&table)); CHECK(dynamic_cast<GpuMatrix*>(&table));
CHECK(table.useGpu()); CHECK(table.useGpu());
CHECK(ids.useGpu()); CHECK(ids.useGpu());
...@@ -694,7 +694,7 @@ void GpuMatrix::selectRows(Matrix& table, IVector& ids) { ...@@ -694,7 +694,7 @@ void GpuMatrix::selectRows(Matrix& table, IVector& ids) {
} }
void GpuMatrix::addToRows(Matrix& table, IVector& ids) { void GpuMatrix::addToRows(Matrix& table, IVector& ids) {
#ifdef PADDLE_WITH_GPU #ifdef PADDLE_WITH_CUDA
CHECK(dynamic_cast<GpuMatrix*>(&table)); CHECK(dynamic_cast<GpuMatrix*>(&table));
CHECK(table.useGpu()); CHECK(table.useGpu());
CHECK(ids.useGpu()); CHECK(ids.useGpu());
...@@ -741,7 +741,7 @@ void GpuMatrix::rowMax(Matrix& max) { ...@@ -741,7 +741,7 @@ void GpuMatrix::rowMax(Matrix& max) {
} }
void GpuMatrix::rowMax(IVector& maxIds, Matrix& maxVal) { void GpuMatrix::rowMax(IVector& maxIds, Matrix& maxVal) {
#ifdef PADDLE_WITH_GPU #ifdef PADDLE_WITH_CUDA
CHECK(maxIds.useGpu() && maxVal.useGpu()) << "Matrix type are not equal"; CHECK(maxIds.useGpu() && maxVal.useGpu()) << "Matrix type are not equal";
size_t numSamples = getHeight(); size_t numSamples = getHeight();
size_t beam = maxVal.getWidth(); size_t beam = maxVal.getWidth();
......
...@@ -836,7 +836,7 @@ void GpuSparseMatrix::zeroMem() { ...@@ -836,7 +836,7 @@ void GpuSparseMatrix::zeroMem() {
} }
void GpuSparseMatrix::rowMax(IVector& maxIds, Matrix& maxVal) { void GpuSparseMatrix::rowMax(IVector& maxIds, Matrix& maxVal) {
#ifdef PADDLE_WITH_GPU #ifdef PADDLE_WITH_CUDA
CHECK(maxIds.useGpu() && maxVal.useGpu()) << "Matrix type are not equal"; CHECK(maxIds.useGpu() && maxVal.useGpu()) << "Matrix type are not equal";
size_t numSamples = getHeight(); size_t numSamples = getHeight();
size_t beam = maxVal.getWidth(); size_t beam = maxVal.getWidth();
......
...@@ -172,7 +172,7 @@ void GpuVectorT<T>::isEqualTo(const VectorT<T>& b, const T& value) { ...@@ -172,7 +172,7 @@ void GpuVectorT<T>::isEqualTo(const VectorT<T>& b, const T& value) {
template <class T> template <class T>
void GpuVectorT<T>::selectFrom(const VectorT<T>& src, const VectorT<int>& ids) { void GpuVectorT<T>::selectFrom(const VectorT<T>& src, const VectorT<int>& ids) {
#ifdef PADDLE_WITH_GPU #ifdef PADDLE_WITH_CUDA
hl_vector_select_from<T>(this->getData(), hl_vector_select_from<T>(this->getData(),
this->getSize(), this->getSize(),
src.getData(), src.getData(),
...@@ -850,7 +850,7 @@ CpuGpuVectorT<T>::CpuGpuVectorT(CpuGpuVectorT<T>& src, ...@@ -850,7 +850,7 @@ CpuGpuVectorT<T>::CpuGpuVectorT(CpuGpuVectorT<T>& src,
size_t size) size_t size)
: sync_(nullptr) { : sync_(nullptr) {
CHECK_LE(offset + size, static_cast<size_t>(src.getSize())); CHECK_LE(offset + size, static_cast<size_t>(src.getSize()));
#ifdef PADDLE_WITH_GPU #ifdef PADDLE_WITH_CUDA
SyncedFlag* flag = src.getSync(); SyncedFlag* flag = src.getSync();
if (*flag == DATA_AT_CPU) { if (*flag == DATA_AT_CPU) {
src.copyToGpu(); // will set synchronous data between CPU and GPU src.copyToGpu(); // will set synchronous data between CPU and GPU
...@@ -861,7 +861,7 @@ CpuGpuVectorT<T>::CpuGpuVectorT(CpuGpuVectorT<T>& src, ...@@ -861,7 +861,7 @@ CpuGpuVectorT<T>::CpuGpuVectorT(CpuGpuVectorT<T>& src,
auto cMemHandle = (src.getVector(false))->getMemoryHandle(); auto cMemHandle = (src.getVector(false))->getMemoryHandle();
cpuVectorT_ = std::make_shared<CpuVectorT<T>>( cpuVectorT_ = std::make_shared<CpuVectorT<T>>(
size, std::dynamic_pointer_cast<CpuMemoryHandle>(cMemHandle), offset); size, std::dynamic_pointer_cast<CpuMemoryHandle>(cMemHandle), offset);
#ifdef PADDLE_WITH_GPU #ifdef PADDLE_WITH_CUDA
auto gMemHandle = (src.getVector(true))->getMemoryHandle(); auto gMemHandle = (src.getVector(true))->getMemoryHandle();
gpuVectorT_ = std::make_shared<GpuVectorT<T>>( gpuVectorT_ = std::make_shared<GpuVectorT<T>>(
size, std::dynamic_pointer_cast<GpuMemoryHandle>(gMemHandle), offset); size, std::dynamic_pointer_cast<GpuMemoryHandle>(gMemHandle), offset);
......
...@@ -68,7 +68,7 @@ void testPoolAllocator() { ...@@ -68,7 +68,7 @@ void testPoolAllocator() {
TEST(Allocator, Pool) { TEST(Allocator, Pool) {
testPoolAllocator<CpuAllocator>(); testPoolAllocator<CpuAllocator>();
#ifdef PADDLE_WITH_GPU #ifdef PADDLE_WITH_CUDA
testPoolAllocator<GpuAllocator>(); testPoolAllocator<GpuAllocator>();
#endif #endif
} }
...@@ -92,7 +92,7 @@ TEST(MemoryHandle, Cpu) { ...@@ -92,7 +92,7 @@ TEST(MemoryHandle, Cpu) {
EXPECT_EQ(ptr1, ptr2); EXPECT_EQ(ptr1, ptr2);
} }
#ifdef PADDLE_WITH_GPU #ifdef PADDLE_WITH_CUDA
TEST(MemoryHandle, Gpu) { TEST(MemoryHandle, Gpu) {
int numGpu = hl_get_device_count(); int numGpu = hl_get_device_count();
......
...@@ -12,7 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ...@@ -12,7 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#ifdef PADDLE_WITH_GPU #ifdef PADDLE_WITH_CUDA
/** /**
* This test file use autotest::AutoCompare and cmpWithoutArg to compares the * This test file use autotest::AutoCompare and cmpWithoutArg to compares the
* implementation of CPU and GPU member function in * implementation of CPU and GPU member function in
......
...@@ -12,7 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ...@@ -12,7 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#ifdef PADDLE_WITH_GPU #ifdef PADDLE_WITH_CUDA
#include <gtest/gtest.h> #include <gtest/gtest.h>
#include "paddle/math/Vector.h" #include "paddle/math/Vector.h"
......
...@@ -94,7 +94,7 @@ void testWrapper(F&& f) { ...@@ -94,7 +94,7 @@ void testWrapper(F&& f) {
} }
} }
#ifdef PADDLE_WITH_GPU #ifdef PADDLE_WITH_CUDA
TEST(ExecViaCpu, test1) { TEST(ExecViaCpu, test1) {
testWrapper(f); testWrapper(f);
testWrapper(&f); testWrapper(&f);
......
...@@ -12,7 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ...@@ -12,7 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#ifdef PADDLE_WITH_GPU #ifdef PADDLE_WITH_CUDA
#include <gtest/gtest.h> #include <gtest/gtest.h>
#include "paddle/math/Matrix.h" #include "paddle/math/Matrix.h"
......
...@@ -12,7 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ...@@ -12,7 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#ifdef PADDLE_WITH_GPU #ifdef PADDLE_WITH_CUDA
/** /**
* This test file use autotest::AutoCompare and cmpWithArg to compares the * This test file use autotest::AutoCompare and cmpWithArg to compares the
* implementation of CPU and GPU member function in Matrix.cpp. * implementation of CPU and GPU member function in Matrix.cpp.
......
...@@ -47,7 +47,7 @@ struct MatrixPara { ...@@ -47,7 +47,7 @@ struct MatrixPara {
SparseFormat format; SparseFormat format;
}; };
#ifdef PADDLE_WITH_GPU #ifdef PADDLE_WITH_CUDA
void test_sparse_matrix_mul(MatrixPara paraA, void test_sparse_matrix_mul(MatrixPara paraA,
MatrixPara paraB, MatrixPara paraB,
MatrixPara paraC) { MatrixPara paraC) {
...@@ -452,7 +452,7 @@ TEST(Matrix, SparseMatrixCSRFormatTrimFrom) { ...@@ -452,7 +452,7 @@ TEST(Matrix, SparseMatrixCSRFormatTrimFrom) {
matB->trimFrom(*mat); matB->trimFrom(*mat);
checkSMatrixEqual2(matA, matB); checkSMatrixEqual2(matA, matB);
#ifdef PADDLE_WITH_GPU #ifdef PADDLE_WITH_CUDA
GpuSparseMatrixPtr matC = std::make_shared<GpuSparseMatrix>( GpuSparseMatrixPtr matC = std::make_shared<GpuSparseMatrix>(
height, trimedWidth, height, FLOAT_VALUE, SPARSE_CSR, true); height, trimedWidth, height, FLOAT_VALUE, SPARSE_CSR, true);
matC->trimFrom(*mat); matC->trimFrom(*mat);
...@@ -546,7 +546,7 @@ TEST(Matrix, SparseMatrixCSCFormatTrimFrom) { ...@@ -546,7 +546,7 @@ TEST(Matrix, SparseMatrixCSCFormatTrimFrom) {
matB->trimFrom(*mat); matB->trimFrom(*mat);
checkSMatrixEqual2(matA, matB); checkSMatrixEqual2(matA, matB);
#ifdef PADDLE_WITH_GPU #ifdef PADDLE_WITH_CUDA
GpuSparseMatrixPtr matC = std::make_shared<GpuSparseMatrix>( GpuSparseMatrixPtr matC = std::make_shared<GpuSparseMatrix>(
height, trimedWidth, height, FLOAT_VALUE, SPARSE_CSC, true); height, trimedWidth, height, FLOAT_VALUE, SPARSE_CSC, true);
matC->trimFrom(*mat); matC->trimFrom(*mat);
......
...@@ -91,7 +91,7 @@ int VectorCheckErr(const VectorPtr& vector1, const VectorPtr& vector2) { ...@@ -91,7 +91,7 @@ int VectorCheckErr(const VectorPtr& vector1, const VectorPtr& vector2) {
typedef std::function<void(size_t size, bool useGpu)> testMatrixFunc; typedef std::function<void(size_t size, bool useGpu)> testMatrixFunc;
void testCase(testMatrixFunc matrixFunc) { void testCase(testMatrixFunc matrixFunc) {
#ifdef PADDLE_WITH_GPU #ifdef PADDLE_WITH_CUDA
for (auto useGpu : {false, true}) { for (auto useGpu : {false, true}) {
#else #else
for (auto useGpu : {false}) { for (auto useGpu : {false}) {
......
...@@ -17,7 +17,7 @@ limitations under the License. */ ...@@ -17,7 +17,7 @@ limitations under the License. */
using namespace paddle; // NOLINT using namespace paddle; // NOLINT
#ifdef PADDLE_WITH_GPU #ifdef PADDLE_WITH_CUDA
TEST(MatrixBatchTransTest, test_batch_matrix_transpose) { TEST(MatrixBatchTransTest, test_batch_matrix_transpose) {
const int nx = 100; const int nx = 100;
const int ny = 50; const int ny = 50;
......
...@@ -12,7 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ...@@ -12,7 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#ifdef PADDLE_WITH_GPU #ifdef PADDLE_WITH_CUDA
/// This unittest checks GpuMatrix/CpuMatrix get same result, so disable when /// This unittest checks GpuMatrix/CpuMatrix get same result, so disable when
/// only cpu version. /// only cpu version.
......
...@@ -12,7 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ...@@ -12,7 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#ifdef PADDLE_WITH_GPU #ifdef PADDLE_WITH_CUDA
#include <cuda_runtime.h> #include <cuda_runtime.h>
#include <gtest/gtest.h> #include <gtest/gtest.h>
......
...@@ -12,7 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ...@@ -12,7 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#ifdef PADDLE_WITH_GPU #ifdef PADDLE_WITH_CUDA
/// This unittest checks GpuSparseMatrix/CpuSparseMatrix get same result, /// This unittest checks GpuSparseMatrix/CpuSparseMatrix get same result,
// so disable when // so disable when
/// only cpu version. /// only cpu version.
......
...@@ -175,7 +175,7 @@ void* BuddyAllocator::SystemAlloc(size_t size) { ...@@ -175,7 +175,7 @@ void* BuddyAllocator::SystemAlloc(size_t size) {
} }
BuddyAllocator::PoolSet::iterator BuddyAllocator::RefillPool() { BuddyAllocator::PoolSet::iterator BuddyAllocator::RefillPool() {
#ifdef PADDLE_WITH_GPU #ifdef PADDLE_WITH_CUDA
if (system_allocator_->UseGpu()) { if (system_allocator_->UseGpu()) {
if ((total_used_ + total_free_) == 0) { if ((total_used_ + total_free_) == 0) {
// Compute the maximum allocation size for the first allocation. // Compute the maximum allocation size for the first allocation.
......
...@@ -62,7 +62,7 @@ void CPUAllocator::Free(void* p, size_t size, size_t index) { ...@@ -62,7 +62,7 @@ void CPUAllocator::Free(void* p, size_t size, size_t index) {
bool CPUAllocator::UseGpu() const { return false; } bool CPUAllocator::UseGpu() const { return false; }
#ifdef PADDLE_WITH_GPU #ifdef PADDLE_WITH_CUDA
void* GPUAllocator::Alloc(size_t& index, size_t size) { void* GPUAllocator::Alloc(size_t& index, size_t size) {
// CUDA documentation doesn't explain if cudaMalloc returns nullptr // CUDA documentation doesn't explain if cudaMalloc returns nullptr
......
...@@ -40,7 +40,7 @@ class CPUAllocator : public SystemAllocator { ...@@ -40,7 +40,7 @@ class CPUAllocator : public SystemAllocator {
virtual bool UseGpu() const; virtual bool UseGpu() const;
}; };
#ifdef PADDLE_WITH_GPU #ifdef PADDLE_WITH_CUDA
class GPUAllocator : public SystemAllocator { class GPUAllocator : public SystemAllocator {
public: public:
virtual void* Alloc(size_t& index, size_t size); virtual void* Alloc(size_t& index, size_t size);
......
...@@ -56,7 +56,7 @@ TEST(CPUAllocator, LockMem) { ...@@ -56,7 +56,7 @@ TEST(CPUAllocator, LockMem) {
TestAllocator(a, 0); TestAllocator(a, 0);
} }
#ifdef PADDLE_WITH_GPU #ifdef PADDLE_WITH_CUDA
TEST(GPUAllocator, Alloc) { TEST(GPUAllocator, Alloc) {
paddle::memory::detail::GPUAllocator a; paddle::memory::detail::GPUAllocator a;
TestAllocator(a, 2048); TestAllocator(a, 2048);
......
...@@ -26,7 +26,7 @@ void Copy<platform::CPUPlace, platform::CPUPlace>(platform::CPUPlace, void* dst, ...@@ -26,7 +26,7 @@ void Copy<platform::CPUPlace, platform::CPUPlace>(platform::CPUPlace, void* dst,
std::memcpy(dst, src, num); std::memcpy(dst, src, num);
} }
#ifdef PADDLE_WITH_GPU #ifdef PADDLE_WITH_CUDA
template <> template <>
void Copy<platform::CPUPlace, platform::GPUPlace>(platform::CPUPlace dst_place, void Copy<platform::CPUPlace, platform::GPUPlace>(platform::CPUPlace dst_place,
void* dst, void* dst,
......
...@@ -33,7 +33,7 @@ namespace memory { ...@@ -33,7 +33,7 @@ namespace memory {
template <typename DstPlace, typename SrcPlace> template <typename DstPlace, typename SrcPlace>
void Copy(DstPlace, void* dst, SrcPlace, const void* src, size_t num); void Copy(DstPlace, void* dst, SrcPlace, const void* src, size_t num);
#ifdef PADDLE_WITH_GPU #ifdef PADDLE_WITH_CUDA
/** /**
* \brief Copy memory from one place to another place. * \brief Copy memory from one place to another place.
......
...@@ -62,7 +62,7 @@ size_t Used<platform::CPUPlace>(platform::CPUPlace place) { ...@@ -62,7 +62,7 @@ size_t Used<platform::CPUPlace>(platform::CPUPlace place) {
return GetCPUBuddyAllocator()->Used(); return GetCPUBuddyAllocator()->Used();
} }
#ifdef PADDLE_WITH_GPU #ifdef PADDLE_WITH_CUDA
BuddyAllocator* GetGPUBuddyAllocator(int gpu_id) { BuddyAllocator* GetGPUBuddyAllocator(int gpu_id) {
using BuddyAllocVec = std::vector<BuddyAllocator*>; using BuddyAllocVec = std::vector<BuddyAllocator*>;
......
...@@ -80,7 +80,7 @@ TEST(BuddyAllocator, CPUMultAlloc) { ...@@ -80,7 +80,7 @@ TEST(BuddyAllocator, CPUMultAlloc) {
} }
} }
#ifdef PADDLE_WITH_GPU #ifdef PADDLE_WITH_CUDA
size_t align(size_t size, paddle::platform::GPUPlace place) { size_t align(size_t size, paddle::platform::GPUPlace place) {
size += sizeof(paddle::memory::detail::Metadata); size += sizeof(paddle::memory::detail::Metadata);
......
...@@ -34,7 +34,7 @@ struct StridedMemcpyFunctor<T, 1> { ...@@ -34,7 +34,7 @@ struct StridedMemcpyFunctor<T, 1> {
auto& cpu_place = boost::get<platform::CPUPlace>(place); auto& cpu_place = boost::get<platform::CPUPlace>(place);
memory::Copy(cpu_place, dst, cpu_place, src, sizeof(T) * dst_dim.head); memory::Copy(cpu_place, dst, cpu_place, src, sizeof(T) * dst_dim.head);
} else { } else {
#ifdef PADDLE_WITH_GPU #ifdef PADDLE_WITH_CUDA
auto& gpu_place = boost::get<platform::GPUPlace>(place); auto& gpu_place = boost::get<platform::GPUPlace>(place);
auto& cuda_ctx = auto& cuda_ctx =
reinterpret_cast<const platform::CUDADeviceContext&>(dev_ctx); reinterpret_cast<const platform::CUDADeviceContext&>(dev_ctx);
......
...@@ -71,7 +71,7 @@ void testIm2col() { ...@@ -71,7 +71,7 @@ void testIm2col() {
context = context =
new paddle::platform::CPUDeviceContext(paddle::platform::CPUPlace()); new paddle::platform::CPUDeviceContext(paddle::platform::CPUPlace());
} else { } else {
#ifdef PADDLE_WITH_GPU #ifdef PADDLE_WITH_CUDA
context = context =
new paddle::platform::CUDADeviceContext(paddle::platform::GPUPlace()); new paddle::platform::CUDADeviceContext(paddle::platform::GPUPlace());
#else #else
...@@ -116,7 +116,7 @@ void testIm2col() { ...@@ -116,7 +116,7 @@ void testIm2col() {
TEST(math, im2col) { TEST(math, im2col) {
testIm2col<paddle::platform::CPUPlace>(); testIm2col<paddle::platform::CPUPlace>();
#ifdef PADDLE_WITH_GPU #ifdef PADDLE_WITH_CUDA
testIm2col<paddle::platform::GPUPlace>(); testIm2col<paddle::platform::GPUPlace>();
#endif #endif
} }
#include "paddle/operators/math/math_function.h" #include "paddle/operators/math/math_function.h"
#include "gtest/gtest.h" #include "gtest/gtest.h"
#ifdef PADDLE_WITH_GPU #ifdef PADDLE_WITH_CUDA
TEST(math_function, notrans_mul_trans) { TEST(math_function, notrans_mul_trans) {
paddle::framework::Tensor input1; paddle::framework::Tensor input1;
paddle::framework::Tensor input1_gpu; paddle::framework::Tensor input1_gpu;
......
...@@ -72,7 +72,7 @@ TEST(StridedMemcpy, CPUConcat) { ...@@ -72,7 +72,7 @@ TEST(StridedMemcpy, CPUConcat) {
} }
} }
#ifdef PADDLE_WITH_GPU #ifdef PADDLE_WITH_CUDA
TEST(StridedMemcpy, GPUCrop) { TEST(StridedMemcpy, GPUCrop) {
// clang-format off // clang-format off
int src[] = { int src[] = {
......
...@@ -35,7 +35,7 @@ Eigen::DefaultDevice* CPUDeviceContext::eigen_device() const { ...@@ -35,7 +35,7 @@ Eigen::DefaultDevice* CPUDeviceContext::eigen_device() const {
Place CPUDeviceContext::GetPlace() const { return CPUPlace(); } Place CPUDeviceContext::GetPlace() const { return CPUPlace(); }
#ifdef PADDLE_WITH_GPU #ifdef PADDLE_WITH_CUDA
template <> template <>
Eigen::GpuDevice* Eigen::GpuDevice*
......
...@@ -14,7 +14,7 @@ limitations under the License. */ ...@@ -14,7 +14,7 @@ limitations under the License. */
#include "paddle/platform/enforce.h" #include "paddle/platform/enforce.h"
#include "paddle/platform/place.h" #include "paddle/platform/place.h"
#ifdef PADDLE_WITH_GPU #ifdef PADDLE_WITH_CUDA
#include "paddle/platform/dynload/cublas.h" #include "paddle/platform/dynload/cublas.h"
#include "paddle/platform/dynload/cudnn.h" #include "paddle/platform/dynload/cudnn.h"
#include "paddle/platform/gpu_info.h" #include "paddle/platform/gpu_info.h"
...@@ -61,7 +61,7 @@ class CPUDeviceContext : public DeviceContext { ...@@ -61,7 +61,7 @@ class CPUDeviceContext : public DeviceContext {
std::unique_ptr<Eigen::DefaultDevice> eigen_device_; std::unique_ptr<Eigen::DefaultDevice> eigen_device_;
}; };
#ifdef PADDLE_WITH_GPU #ifdef PADDLE_WITH_CUDA
template <> template <>
struct EigenDeviceConverter<platform::GPUPlace> { struct EigenDeviceConverter<platform::GPUPlace> {
using EigenDeviceType = Eigen::GpuDevice; using EigenDeviceType = Eigen::GpuDevice;
......
...@@ -29,7 +29,7 @@ limitations under the License. */ ...@@ -29,7 +29,7 @@ limitations under the License. */
#include <cxxabi.h> // for __cxa_demangle #include <cxxabi.h> // for __cxa_demangle
#endif #endif
#ifdef PADDLE_WITH_GPU #ifdef PADDLE_WITH_CUDA
#include "paddle/platform/dynload/cublas.h" #include "paddle/platform/dynload/cublas.h"
#include "paddle/platform/dynload/cudnn.h" #include "paddle/platform/dynload/cudnn.h"
...@@ -113,7 +113,7 @@ inline typename std::enable_if<sizeof...(Args) != 0, void>::type throw_on_error( ...@@ -113,7 +113,7 @@ inline typename std::enable_if<sizeof...(Args) != 0, void>::type throw_on_error(
} }
} }
#ifdef PADDLE_WITH_GPU #ifdef PADDLE_WITH_CUDA
template <typename... Args> template <typename... Args>
inline typename std::enable_if<sizeof...(Args) != 0, void>::type throw_on_error( inline typename std::enable_if<sizeof...(Args) != 0, void>::type throw_on_error(
......
...@@ -14,7 +14,7 @@ limitations under the License. */ ...@@ -14,7 +14,7 @@ limitations under the License. */
#pragma once #pragma once
#ifdef PADDLE_WITH_GPU #ifdef PADDLE_WITH_CUDA
#include <cuda_runtime.h> #include <cuda_runtime.h>
#include <stddef.h> #include <stddef.h>
......
...@@ -16,7 +16,7 @@ ...@@ -16,7 +16,7 @@
#include <boost/config.hpp> #include <boost/config.hpp>
#ifdef PADDLE_WITH_GPU #ifdef PADDLE_WITH_CUDA
// Because boost's variadic templates has bug on nvcc, boost will disable // Because boost's variadic templates has bug on nvcc, boost will disable
// variadic template support when GPU enabled on nvcc. // variadic template support when GPU enabled on nvcc.
......
...@@ -215,7 +215,7 @@ int main(int argc, char** argv) { ...@@ -215,7 +215,7 @@ int main(int argc, char** argv) {
uint64_t dataSize = FLAGS_dim * sizeof(real); uint64_t dataSize = FLAGS_dim * sizeof(real);
#ifdef PADDLE_WITH_GPU #ifdef PADDLE_WITH_CUDA
GpuVector gpuParam(FLAGS_dim); GpuVector gpuParam(FLAGS_dim);
GpuVector gpuGrad(FLAGS_dim); GpuVector gpuGrad(FLAGS_dim);
#else #else
......
...@@ -99,7 +99,7 @@ TEST(ProtoServer, regular) { ...@@ -99,7 +99,7 @@ TEST(ProtoServer, regular) {
} }
TEST(ProtoServer, extended) { TEST(ProtoServer, extended) {
#ifdef PADDLE_WITH_GPU #ifdef PADDLE_WITH_CUDA
ProtoClient* client; ProtoClient* client;
if (FLAGS_rdma_tcp == "rdma") if (FLAGS_rdma_tcp == "rdma")
client = new ProtoClient(FLAGS_server_addr, FLAGS_port, F_RDMA); client = new ProtoClient(FLAGS_server_addr, FLAGS_port, F_RDMA);
......
...@@ -34,7 +34,7 @@ static size_t UniqueIntegerGenerator() { ...@@ -34,7 +34,7 @@ static size_t UniqueIntegerGenerator() {
} }
bool IsCompileGPU() { bool IsCompileGPU() {
#ifndef PADDLE_WITH_GPU #ifndef PADDLE_WITH_CUDA
return false; return false;
#else #else
return true; return true;
...@@ -78,7 +78,7 @@ PYBIND11_PLUGIN(core) { ...@@ -78,7 +78,7 @@ PYBIND11_PLUGIN(core) {
.def("set", PyCPUTensorSetFromArray<float>) .def("set", PyCPUTensorSetFromArray<float>)
.def("set", PyCPUTensorSetFromArray<int>) .def("set", PyCPUTensorSetFromArray<int>)
.def("set", PyCPUTensorSetFromArray<double>) .def("set", PyCPUTensorSetFromArray<double>)
#ifdef PADDLE_WITH_GPU #ifdef PADDLE_WITH_CUDA
.def("set", PyCUDATensorSetFromArray<float>) .def("set", PyCUDATensorSetFromArray<float>)
.def("set", PyCUDATensorSetFromArray<int>) .def("set", PyCUDATensorSetFromArray<int>)
.def("set", PyCUDATensorSetFromArray<double>) .def("set", PyCUDATensorSetFromArray<double>)
...@@ -96,7 +96,7 @@ PYBIND11_PLUGIN(core) { ...@@ -96,7 +96,7 @@ PYBIND11_PLUGIN(core) {
.def( .def(
"__init__", "__init__",
[](LoDTensor &instance, const std::vector<std::vector<size_t>> &lod) { [](LoDTensor &instance, const std::vector<std::vector<size_t>> &lod) {
#ifndef PADDLE_WITH_GPU #ifndef PADDLE_WITH_CUDA
new (&instance) LoDTensor(lod); new (&instance) LoDTensor(lod);
#else #else
LoD new_lod; LoD new_lod;
...@@ -107,7 +107,7 @@ PYBIND11_PLUGIN(core) { ...@@ -107,7 +107,7 @@ PYBIND11_PLUGIN(core) {
}) })
.def("set_lod", .def("set_lod",
[](LoDTensor &self, const std::vector<std::vector<size_t>> &lod) { [](LoDTensor &self, const std::vector<std::vector<size_t>> &lod) {
#ifndef PADDLE_WITH_GPU #ifndef PADDLE_WITH_CUDA
self.set_lod(lod); self.set_lod(lod);
#else #else
LoD new_lod; LoD new_lod;
...@@ -117,7 +117,7 @@ PYBIND11_PLUGIN(core) { ...@@ -117,7 +117,7 @@ PYBIND11_PLUGIN(core) {
#endif #endif
}) })
.def("lod", [](LoDTensor &self) -> std::vector<std::vector<size_t>> { .def("lod", [](LoDTensor &self) -> std::vector<std::vector<size_t>> {
#ifndef PADDLE_WITH_GPU #ifndef PADDLE_WITH_CUDA
return self.lod(); return self.lod();
#else #else
auto lod = self.lod(); auto lod = self.lod();
...@@ -203,7 +203,7 @@ All parameter, weight, gradient are variables in Paddle. ...@@ -203,7 +203,7 @@ All parameter, weight, gradient are variables in Paddle.
.def_static("create", .def_static("create",
[](paddle::platform::GPUPlace& place) [](paddle::platform::GPUPlace& place)
-> paddle::platform::DeviceContext* { -> paddle::platform::DeviceContext* {
#ifndef PADDLE_WITH_GPU #ifndef PADDLE_WITH_CUDA
PADDLE_THROW("GPUPlace is not supported in CPU device."); PADDLE_THROW("GPUPlace is not supported in CPU device.");
#else #else
return new paddle::platform::CUDADeviceContext(place); return new paddle::platform::CUDADeviceContext(place);
......
...@@ -106,7 +106,7 @@ void PyCPUTensorSetFromArray( ...@@ -106,7 +106,7 @@ void PyCPUTensorSetFromArray(
std::memcpy(dst, array.data(), sizeof(T) * array.size()); std::memcpy(dst, array.data(), sizeof(T) * array.size());
} }
#ifdef PADDLE_WITH_GPU #ifdef PADDLE_WITH_CUDA
template <typename T> template <typename T>
void PyCUDATensorSetFromArray( void PyCUDATensorSetFromArray(
framework::Tensor &self, framework::Tensor &self,
......
...@@ -29,7 +29,7 @@ int main(int argc, char** argv) { ...@@ -29,7 +29,7 @@ int main(int argc, char** argv) {
initMain(argc, argv); initMain(argc, argv);
initPython(argc, argv); initPython(argc, argv);
string confFile = TrainerConfigHelper::getConfigNameFromPath(FLAGS_model_dir); string confFile = TrainerConfigHelper::getConfigNameFromPath(FLAGS_model_dir);
#ifndef PADDLE_WITH_GPU #ifndef PADDLE_WITH_CUDA
FLAGS_use_gpu = false; FLAGS_use_gpu = false;
#endif #endif
auto config = std::make_shared<TrainerConfigHelper>(confFile); auto config = std::make_shared<TrainerConfigHelper>(confFile);
......
...@@ -146,7 +146,7 @@ void compareGradient(comData& comDataCpu, comData& comDataGpu) { ...@@ -146,7 +146,7 @@ void compareGradient(comData& comDataCpu, comData& comDataGpu) {
} }
int main(int argc, char** argv) { int main(int argc, char** argv) {
#ifndef PADDLE_WITH_GPU #ifndef PADDLE_WITH_CUDA
exit(0); exit(0);
#endif #endif
paddle::initMain(argc, argv); paddle::initMain(argc, argv);
......
...@@ -174,7 +174,7 @@ TEST(compareSparse, multiGradientMachine) { ...@@ -174,7 +174,7 @@ TEST(compareSparse, multiGradientMachine) {
FLAGS_local = local; FLAGS_local = local;
FLAGS_ports_num_for_sparse = 5; FLAGS_ports_num_for_sparse = 5;
for (bool useGpu : {false, true}) { for (bool useGpu : {false, true}) {
#ifndef PADDLE_WITH_GPU #ifndef PADDLE_WITH_CUDA
if (useGpu) continue; if (useGpu) continue;
#endif #endif
FLAGS_parallel_nn = useGpu; FLAGS_parallel_nn = useGpu;
...@@ -198,7 +198,7 @@ TEST(compareSparse, NeuralNetwork) { ...@@ -198,7 +198,7 @@ TEST(compareSparse, NeuralNetwork) {
FLAGS_local = local; FLAGS_local = local;
FLAGS_ports_num_for_sparse = 5; FLAGS_ports_num_for_sparse = 5;
for (bool useGpu : {false, true}) { for (bool useGpu : {false, true}) {
#ifndef PADDLE_WITH_GPU #ifndef PADDLE_WITH_CUDA
if (useGpu) continue; if (useGpu) continue;
#endif #endif
FLAGS_parallel_nn = useGpu; FLAGS_parallel_nn = useGpu;
......
...@@ -51,7 +51,7 @@ void checkGradientTest(const string& configFile, ...@@ -51,7 +51,7 @@ void checkGradientTest(const string& configFile,
TEST(checkGradient, cpu) { checkGradientTest(configFile1, false, false); } TEST(checkGradient, cpu) { checkGradientTest(configFile1, false, false); }
#ifdef PADDLE_WITH_GPU #ifdef PADDLE_WITH_CUDA
TEST(checkGradient, gpu) { checkGradientTest(configFile1, true, false); } TEST(checkGradient, gpu) { checkGradientTest(configFile1, true, false); }
TEST(checkGradient, multiGpu) { TEST(checkGradient, multiGpu) {
...@@ -97,7 +97,7 @@ TEST(checkGradient, hsigmoid) { checkGradientTest(configFile2, false, false); } ...@@ -97,7 +97,7 @@ TEST(checkGradient, hsigmoid) { checkGradientTest(configFile2, false, false); }
TEST(checkGradient, chunk) { TEST(checkGradient, chunk) {
checkGradientTest(configFile3, false, false); checkGradientTest(configFile3, false, false);
#ifdef PADDLE_WITH_GPU #ifdef PADDLE_WITH_CUDA
checkGradientTest(configFile3, true, true); checkGradientTest(configFile3, true, true);
#endif #endif
} }
......
...@@ -79,7 +79,7 @@ void trainerOnePassTest(const string& configFile, ...@@ -79,7 +79,7 @@ void trainerOnePassTest(const string& configFile,
// 1. test trainer (cpu, gpu). // 1. test trainer (cpu, gpu).
TEST(trainerOnePass, cpu) { trainerOnePassTest(configFile1, false, false); } TEST(trainerOnePass, cpu) { trainerOnePassTest(configFile1, false, false); }
#ifdef PADDLE_WITH_GPU #ifdef PADDLE_WITH_CUDA
TEST(trainerOnePass, gpu) { trainerOnePassTest(configFile1, true, false); } TEST(trainerOnePass, gpu) { trainerOnePassTest(configFile1, true, false); }
TEST(trainerOnePass, gpu2) { trainerOnePassTest(configFile1, true, false, 2); } TEST(trainerOnePass, gpu2) { trainerOnePassTest(configFile1, true, false, 2); }
...@@ -94,7 +94,7 @@ TEST(trainerOnePass, parallel) { ...@@ -94,7 +94,7 @@ TEST(trainerOnePass, parallel) {
#endif #endif
// 2. test average_window. // 2. test average_window.
#ifdef PADDLE_WITH_GPU #ifdef PADDLE_WITH_CUDA
TEST(average_window, gpu) { TEST(average_window, gpu) {
trainerOnePassTest(configFile1, true, false, 4, 0.01); trainerOnePassTest(configFile1, true, false, 4, 0.01);
} }
...@@ -266,7 +266,7 @@ TEST(checkRemoteUpdater, cpuTrainerOldUpdater) { ...@@ -266,7 +266,7 @@ TEST(checkRemoteUpdater, cpuTrainerOldUpdater) {
checkRemoteParameterUpdaterTest(configFile1, false, false, 1, true); checkRemoteParameterUpdaterTest(configFile1, false, false, 1, true);
} }
#ifdef PADDLE_WITH_GPU #ifdef PADDLE_WITH_CUDA
TEST(checkRemoteUpdater, gpuTrainer) { TEST(checkRemoteUpdater, gpuTrainer) {
checkRemoteParameterUpdaterTest(configFile1, true, false); checkRemoteParameterUpdaterTest(configFile1, true, false);
} }
......
...@@ -113,7 +113,7 @@ void testGeneration(const string& configFile, ...@@ -113,7 +113,7 @@ void testGeneration(const string& configFile,
#ifndef PADDLE_TYPE_DOUBLE #ifndef PADDLE_TYPE_DOUBLE
TEST(RecurrentGradientMachine, test_generation) { TEST(RecurrentGradientMachine, test_generation) {
#ifndef PADDLE_WITH_GPU #ifndef PADDLE_WITH_CUDA
const auto useGpuConfs = {false}; const auto useGpuConfs = {false};
#else #else
const auto useGpuConfs = {true, false}; const auto useGpuConfs = {true, false};
......
...@@ -14,7 +14,7 @@ limitations under the License. */ ...@@ -14,7 +14,7 @@ limitations under the License. */
#include "Flags.h" #include "Flags.h"
#ifndef PADDLE_WITH_GPU #ifndef PADDLE_WITH_CUDA
DEFINE_bool(use_gpu, false, "Only support CPU training"); DEFINE_bool(use_gpu, false, "Only support CPU training");
#else #else
DEFINE_bool(use_gpu, true, "Whether to use GPU for training"); DEFINE_bool(use_gpu, true, "Whether to use GPU for training");
......
...@@ -218,7 +218,7 @@ protected: ...@@ -218,7 +218,7 @@ protected:
* *d2* is peer device to enable direct access to by the d1 device. * *d2* is peer device to enable direct access to by the d1 device.
*/ */
inline void enablePeerAccess(int d1, int d2) { inline void enablePeerAccess(int d1, int d2) {
#ifdef PADDLE_WITH_GPU #ifdef PADDLE_WITH_CUDA
if (hl_device_can_access_peer(d1, d2)) { if (hl_device_can_access_peer(d1, d2)) {
SetDevice dev(d1); SetDevice dev(d1);
hl_device_enable_peer_access(d2); hl_device_enable_peer_access(d2);
......
...@@ -48,7 +48,7 @@ void printVersion(std::ostream& os); ...@@ -48,7 +48,7 @@ void printVersion(std::ostream& os);
* @return return true if paddle compiled with GPU * @return return true if paddle compiled with GPU
*/ */
constexpr bool isWithGpu() { constexpr bool isWithGpu() {
#ifndef PADDLE_WITH_GPU #ifndef PADDLE_WITH_CUDA
return false; return false;
#else #else
return true; return true;
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册