提交 e79d2f1b 编写于 作者: Y Yi Wang 提交者: GitHub

Merge pull request #4584 from reyoung/feature/change_macro_paddle_no_gpu

Change `PADDLE_ONLY_CPU` to `PADDLE_WITH_GPU`
...@@ -49,11 +49,11 @@ if(NOT WITH_GOLANG) ...@@ -49,11 +49,11 @@ if(NOT WITH_GOLANG)
endif(NOT WITH_GOLANG) endif(NOT WITH_GOLANG)
if(NOT WITH_GPU) if(NOT WITH_GPU)
add_definitions(-DPADDLE_ONLY_CPU)
add_definitions(-DHPPL_STUB_FUNC) add_definitions(-DHPPL_STUB_FUNC)
list(APPEND CMAKE_CXX_SOURCE_FILE_EXTENSIONS cu) list(APPEND CMAKE_CXX_SOURCE_FILE_EXTENSIONS cu)
else() else()
add_definitions(-DPADDLE_WITH_GPU)
FIND_PACKAGE(CUDA REQUIRED) FIND_PACKAGE(CUDA REQUIRED)
if(${CUDA_VERSION_MAJOR} VERSION_LESS 7) if(${CUDA_VERSION_MAJOR} VERSION_LESS 7)
......
...@@ -47,7 +47,7 @@ bool isUsingGpu() { return FLAGS_use_gpu; } ...@@ -47,7 +47,7 @@ bool isUsingGpu() { return FLAGS_use_gpu; }
void setUseGpu(bool useGpu) { FLAGS_use_gpu = useGpu; } void setUseGpu(bool useGpu) { FLAGS_use_gpu = useGpu; }
bool isGpuVersion() { bool isGpuVersion() {
#ifdef PADDLE_ONLY_CPU #ifndef PADDLE_WITH_GPU
return false; return false;
#else #else
return true; return true;
......
...@@ -46,7 +46,7 @@ paddle_error paddle_matrix_set_row(paddle_matrix mat, ...@@ -46,7 +46,7 @@ paddle_error paddle_matrix_set_row(paddle_matrix mat,
if (rowID >= ptr->mat->getHeight()) return kPD_OUT_OF_RANGE; if (rowID >= ptr->mat->getHeight()) return kPD_OUT_OF_RANGE;
paddle::real* buf = ptr->mat->getRowBuf(rowID); paddle::real* buf = ptr->mat->getRowBuf(rowID);
size_t width = ptr->mat->getWidth(); size_t width = ptr->mat->getWidth();
#ifndef PADDLE_ONLY_CPU #ifdef PADDLE_WITH_GPU
hl_memcpy(buf, rowArray, sizeof(paddle::real) * width); hl_memcpy(buf, rowArray, sizeof(paddle::real) * width);
#else #else
std::copy(rowArray, rowArray + width, buf); std::copy(rowArray, rowArray + width, buf);
......
...@@ -15,7 +15,7 @@ ...@@ -15,7 +15,7 @@
#pragma once #pragma once
#include <memory> #include <memory>
#ifndef PADDLE_ONLY_CPU #ifdef PADDLE_WITH_GPU
#include <thrust/device_vector.h> #include <thrust/device_vector.h>
#include <thrust/host_vector.h> #include <thrust/host_vector.h>
#include <thrust/system/cuda/experimental/pinned_allocator.h> #include <thrust/system/cuda/experimental/pinned_allocator.h>
...@@ -29,7 +29,7 @@ ...@@ -29,7 +29,7 @@
namespace paddle { namespace paddle {
namespace framework { namespace framework {
#ifdef PADDLE_ONLY_CPU #ifndef PADDLE_WITH_GPU
template <typename T> template <typename T>
using Vector = std::vector<T>; using Vector = std::vector<T>;
#else #else
......
...@@ -211,7 +211,7 @@ class OpKernelRegistrar : public Registrar { ...@@ -211,7 +211,7 @@ class OpKernelRegistrar : public Registrar {
// TODO(fengjiayi): The following macros // TODO(fengjiayi): The following macros
// seems ugly, do we have better method? // seems ugly, do we have better method?
#ifdef PADDLE_ONLY_CPU #ifndef PADDLE_WITH_GPU
#define USE_OP_KERNEL(op_type) USE_OP_DEVICE_KERNEL(op_type, CPU) #define USE_OP_KERNEL(op_type) USE_OP_DEVICE_KERNEL(op_type, CPU)
#else #else
#define USE_OP_KERNEL(op_type) \ #define USE_OP_KERNEL(op_type) \
......
...@@ -25,7 +25,7 @@ Eigen::DefaultDevice& ExecutionContext::GetEigenDevice< ...@@ -25,7 +25,7 @@ Eigen::DefaultDevice& ExecutionContext::GetEigenDevice<
return *device_context_.GetEigenDevice<platform::CPUPlace>(); return *device_context_.GetEigenDevice<platform::CPUPlace>();
} }
#ifndef PADDLE_ONLY_CPU #ifdef PADDLE_WITH_GPU
template <> template <>
Eigen::GpuDevice& Eigen::GpuDevice&
ExecutionContext::GetEigenDevice<platform::GPUPlace, Eigen::GpuDevice>() const { ExecutionContext::GetEigenDevice<platform::GPUPlace, Eigen::GpuDevice>() const {
......
...@@ -65,7 +65,7 @@ inline T* Tensor::mutable_data(platform::Place place) { ...@@ -65,7 +65,7 @@ inline T* Tensor::mutable_data(platform::Place place) {
holder_.reset(new PlaceholderImpl<T, platform::CPUPlace>( holder_.reset(new PlaceholderImpl<T, platform::CPUPlace>(
boost::get<platform::CPUPlace>(place), size)); boost::get<platform::CPUPlace>(place), size));
} else if (platform::is_gpu_place(place)) { } else if (platform::is_gpu_place(place)) {
#ifdef PADDLE_ONLY_CPU #ifndef PADDLE_WITH_GPU
PADDLE_THROW("'GPUPlace' is not supported in CPU only device."); PADDLE_THROW("'GPUPlace' is not supported in CPU only device.");
} }
#else #else
...@@ -103,7 +103,7 @@ inline void Tensor::CopyFrom(const Tensor& src, ...@@ -103,7 +103,7 @@ inline void Tensor::CopyFrom(const Tensor& src,
memory::Copy(boost::get<platform::CPUPlace>(dst_place), dst_ptr, memory::Copy(boost::get<platform::CPUPlace>(dst_place), dst_ptr,
boost::get<platform::CPUPlace>(src_place), src_ptr, size); boost::get<platform::CPUPlace>(src_place), src_ptr, size);
} }
#ifndef PADDLE_ONLY_CPU #ifdef PADDLE_WITH_GPU
else if (platform::is_gpu_place(src_place) && else if (platform::is_gpu_place(src_place) &&
platform::is_cpu_place(dst_place)) { platform::is_cpu_place(dst_place)) {
memory::Copy(boost::get<platform::CPUPlace>(dst_place), dst_ptr, memory::Copy(boost::get<platform::CPUPlace>(dst_place), dst_ptr,
......
...@@ -74,7 +74,7 @@ TEST(Tensor, MutableData) { ...@@ -74,7 +74,7 @@ TEST(Tensor, MutableData) {
EXPECT_EQ(p1, p2); EXPECT_EQ(p1, p2);
} }
#ifndef PADDLE_ONLY_CPU #ifdef PADDLE_WITH_GPU
{ {
Tensor src_tensor; Tensor src_tensor;
float* p1 = nullptr; float* p1 = nullptr;
...@@ -126,7 +126,7 @@ TEST(Tensor, ShareDataWith) { ...@@ -126,7 +126,7 @@ TEST(Tensor, ShareDataWith) {
ASSERT_EQ(src_tensor.data<int>(), dst_tensor.data<int>()); ASSERT_EQ(src_tensor.data<int>(), dst_tensor.data<int>());
} }
#ifndef PADDLE_ONLY_CPU #ifdef PADDLE_WITH_GPU
{ {
Tensor src_tensor; Tensor src_tensor;
Tensor dst_tensor; Tensor dst_tensor;
...@@ -163,7 +163,7 @@ TEST(Tensor, Slice) { ...@@ -163,7 +163,7 @@ TEST(Tensor, Slice) {
EXPECT_EQ(src_data_address + 3 * 4 * 1 * sizeof(int), slice_data_address); EXPECT_EQ(src_data_address + 3 * 4 * 1 * sizeof(int), slice_data_address);
} }
#ifndef PADDLE_ONLY_CPU #ifdef PADDLE_WITH_GPU
{ {
Tensor src_tensor; Tensor src_tensor;
src_tensor.mutable_data<double>(make_ddim({6, 9}), GPUPlace()); src_tensor.mutable_data<double>(make_ddim({6, 9}), GPUPlace());
...@@ -218,7 +218,7 @@ TEST(Tensor, CopyFrom) { ...@@ -218,7 +218,7 @@ TEST(Tensor, CopyFrom) {
EXPECT_EQ(dst_ptr[i], slice_ptr[i]); EXPECT_EQ(dst_ptr[i], slice_ptr[i]);
} }
} }
#ifndef PADDLE_ONLY_CPU #ifdef PADDLE_WITH_GPU
{ {
Tensor src_tensor; Tensor src_tensor;
Tensor gpu_tensor; Tensor gpu_tensor;
......
...@@ -194,7 +194,7 @@ public: ...@@ -194,7 +194,7 @@ public:
REGISTER_TYPED_FUNC(BlockExpand, CPU, BlockExpandForward); REGISTER_TYPED_FUNC(BlockExpand, CPU, BlockExpandForward);
REGISTER_TYPED_FUNC(BlockExpandGrad, CPU, BlockExpandBackward); REGISTER_TYPED_FUNC(BlockExpandGrad, CPU, BlockExpandBackward);
#ifndef PADDLE_ONLY_CPU #ifdef PADDLE_WITH_GPU
REGISTER_TYPED_FUNC(BlockExpand, GPU, BlockExpandForward); REGISTER_TYPED_FUNC(BlockExpand, GPU, BlockExpandForward);
REGISTER_TYPED_FUNC(BlockExpandGrad, GPU, BlockExpandBackward); REGISTER_TYPED_FUNC(BlockExpandGrad, GPU, BlockExpandBackward);
#endif #endif
......
...@@ -395,7 +395,7 @@ REGISTER_TYPED_FUNC(ContextProjectionForward, ...@@ -395,7 +395,7 @@ REGISTER_TYPED_FUNC(ContextProjectionForward,
REGISTER_TYPED_FUNC(ContextProjectionBackward, REGISTER_TYPED_FUNC(ContextProjectionBackward,
CPU, CPU,
ContextProjectionBackwardFunc); ContextProjectionBackwardFunc);
#ifndef PADDLE_ONLY_CPU #ifdef PADDLE_WITH_GPU
REGISTER_TYPED_FUNC(ContextProjectionForward, REGISTER_TYPED_FUNC(ContextProjectionForward,
GPU, GPU,
ContextProjectionForwardFunc); ContextProjectionForwardFunc);
......
...@@ -233,7 +233,7 @@ private: ...@@ -233,7 +233,7 @@ private:
REGISTER_TYPED_FUNC(CosSimForward, CPU, CosSimForwardFunc); REGISTER_TYPED_FUNC(CosSimForward, CPU, CosSimForwardFunc);
REGISTER_TYPED_FUNC(CosSimBackward, CPU, CosSimBackwardFunc); REGISTER_TYPED_FUNC(CosSimBackward, CPU, CosSimBackwardFunc);
#ifndef PADDLE_ONLY_CPU #ifdef PADDLE_WITH_GPU
REGISTER_TYPED_FUNC(CosSimForward, GPU, CosSimForwardFunc); REGISTER_TYPED_FUNC(CosSimForward, GPU, CosSimForwardFunc);
REGISTER_TYPED_FUNC(CosSimBackward, GPU, CosSimBackwardFunc); REGISTER_TYPED_FUNC(CosSimBackward, GPU, CosSimBackwardFunc);
#endif #endif
......
...@@ -169,7 +169,7 @@ private: ...@@ -169,7 +169,7 @@ private:
REGISTER_TYPED_FUNC(Crop, CPU, CropFunc); REGISTER_TYPED_FUNC(Crop, CPU, CropFunc);
REGISTER_TYPED_FUNC(CropGrad, CPU, CropGradFunc); REGISTER_TYPED_FUNC(CropGrad, CPU, CropGradFunc);
#ifndef PADDLE_ONLY_CPU #ifdef PADDLE_WITH_GPU
REGISTER_TYPED_FUNC(Crop, GPU, CropFunc); REGISTER_TYPED_FUNC(Crop, GPU, CropFunc);
REGISTER_TYPED_FUNC(CropGrad, GPU, CropGradFunc); REGISTER_TYPED_FUNC(CropGrad, GPU, CropGradFunc);
#endif #endif
......
...@@ -336,7 +336,7 @@ private: ...@@ -336,7 +336,7 @@ private:
REGISTER_TYPED_FUNC(CrossMapNormal, CPU, CrossMapNormalFunc); REGISTER_TYPED_FUNC(CrossMapNormal, CPU, CrossMapNormalFunc);
REGISTER_TYPED_FUNC(CrossMapNormalGrad, CPU, CrossMapNormalGradFunc); REGISTER_TYPED_FUNC(CrossMapNormalGrad, CPU, CrossMapNormalGradFunc);
#ifndef PADDLE_ONLY_CPU #ifdef PADDLE_WITH_GPU
REGISTER_TYPED_FUNC(CrossMapNormal, GPU, CrossMapNormalFunc); REGISTER_TYPED_FUNC(CrossMapNormal, GPU, CrossMapNormalFunc);
REGISTER_TYPED_FUNC(CrossMapNormalGrad, GPU, CrossMapNormalGradFunc); REGISTER_TYPED_FUNC(CrossMapNormalGrad, GPU, CrossMapNormalGradFunc);
#endif #endif
......
...@@ -292,7 +292,7 @@ REGISTER_TYPED_FUNC(DepthwiseConvGradInput, ...@@ -292,7 +292,7 @@ REGISTER_TYPED_FUNC(DepthwiseConvGradInput,
REGISTER_TYPED_FUNC(DepthwiseConvGradFilter, REGISTER_TYPED_FUNC(DepthwiseConvGradFilter,
CPU, CPU,
DepthwiseConvGradFilterFunction); DepthwiseConvGradFilterFunction);
#ifndef PADDLE_ONLY_CPU #ifdef PADDLE_WITH_GPU
REGISTER_TYPED_FUNC(DepthwiseConv, GPU, DepthwiseConvFunction); REGISTER_TYPED_FUNC(DepthwiseConv, GPU, DepthwiseConvFunction);
REGISTER_TYPED_FUNC(DepthwiseConvGradInput, REGISTER_TYPED_FUNC(DepthwiseConvGradInput,
GPU, GPU,
......
...@@ -17,7 +17,7 @@ limitations under the License. */ ...@@ -17,7 +17,7 @@ limitations under the License. */
namespace paddle { namespace paddle {
#ifndef PADDLE_ONLY_CPU #ifdef PADDLE_WITH_GPU
TEST(DepthwiseConv, Forward) { TEST(DepthwiseConv, Forward) {
DepthwiseConvolution<DEVICE_TYPE_CPU, DEVICE_TYPE_GPU>( DepthwiseConvolution<DEVICE_TYPE_CPU, DEVICE_TYPE_GPU>(
"GemmConv-CPU", "DepthwiseConv-GPU", forward); "GemmConv-CPU", "DepthwiseConv-GPU", forward);
......
...@@ -340,7 +340,7 @@ public: ...@@ -340,7 +340,7 @@ public:
REGISTER_TYPED_FUNC(GemmConv, CPU, GemmConvFunction); REGISTER_TYPED_FUNC(GemmConv, CPU, GemmConvFunction);
REGISTER_TYPED_FUNC(GemmConvGradInput, CPU, GemmConvGradInputFunction); REGISTER_TYPED_FUNC(GemmConvGradInput, CPU, GemmConvGradInputFunction);
REGISTER_TYPED_FUNC(GemmConvGradFilter, CPU, GemmConvGradFilterFunction); REGISTER_TYPED_FUNC(GemmConvGradFilter, CPU, GemmConvGradFilterFunction);
#ifndef PADDLE_ONLY_CPU #ifdef PADDLE_WITH_GPU
REGISTER_TYPED_FUNC(GemmConv, GPU, GemmConvFunction); REGISTER_TYPED_FUNC(GemmConv, GPU, GemmConvFunction);
REGISTER_TYPED_FUNC(GemmConvGradInput, GPU, GemmConvGradInputFunction); REGISTER_TYPED_FUNC(GemmConvGradInput, GPU, GemmConvGradInputFunction);
REGISTER_TYPED_FUNC(GemmConvGradFilter, GPU, GemmConvGradFilterFunction); REGISTER_TYPED_FUNC(GemmConvGradFilter, GPU, GemmConvGradFilterFunction);
......
...@@ -24,7 +24,7 @@ TEST(GemmConv, NaiveConv) { ...@@ -24,7 +24,7 @@ TEST(GemmConv, NaiveConv) {
"NaiveConv-CPU", "GemmConv-CPU", forward); "NaiveConv-CPU", "GemmConv-CPU", forward);
} }
#ifndef PADDLE_ONLY_CPU #ifdef PADDLE_WITH_GPU
TEST(GemmConv, Forward) { TEST(GemmConv, Forward) {
Convolution<DEVICE_TYPE_CPU, DEVICE_TYPE_GPU>( Convolution<DEVICE_TYPE_CPU, DEVICE_TYPE_GPU>(
"GemmConv-CPU", "GemmConv-GPU", forward); "GemmConv-CPU", "GemmConv-GPU", forward);
......
...@@ -116,7 +116,7 @@ void TestIm2ColFunctor() { ...@@ -116,7 +116,7 @@ void TestIm2ColFunctor() {
TEST(Im2ColFunctor, CPU) { TestIm2ColFunctor<DEVICE_TYPE_CPU, float>(); } TEST(Im2ColFunctor, CPU) { TestIm2ColFunctor<DEVICE_TYPE_CPU, float>(); }
#ifndef PADDLE_ONLY_CPU #ifdef PADDLE_WITH_GPU
TEST(Im2ColFunctor, GPU) { TestIm2ColFunctor<DEVICE_TYPE_GPU, float>(); } TEST(Im2ColFunctor, GPU) { TestIm2ColFunctor<DEVICE_TYPE_GPU, float>(); }
......
...@@ -341,7 +341,7 @@ private: ...@@ -341,7 +341,7 @@ private:
}; };
REGISTER_TYPED_FUNC(MulOp, CPU, MulFunc); REGISTER_TYPED_FUNC(MulOp, CPU, MulFunc);
#ifndef PADDLE_ONLY_CPU #ifdef PADDLE_WITH_GPU
REGISTER_TYPED_FUNC(MulOp, GPU, MulFunc); REGISTER_TYPED_FUNC(MulOp, GPU, MulFunc);
#endif #endif
} // namespace paddle } // namespace paddle
...@@ -207,7 +207,7 @@ private: ...@@ -207,7 +207,7 @@ private:
REGISTER_TYPED_FUNC(Pad, CPU, PadFunc); REGISTER_TYPED_FUNC(Pad, CPU, PadFunc);
REGISTER_TYPED_FUNC(PadGrad, CPU, PadGradFunc); REGISTER_TYPED_FUNC(PadGrad, CPU, PadGradFunc);
#ifndef PADDLE_ONLY_CPU #ifdef PADDLE_WITH_GPU
REGISTER_TYPED_FUNC(Pad, GPU, PadFunc); REGISTER_TYPED_FUNC(Pad, GPU, PadFunc);
REGISTER_TYPED_FUNC(PadGrad, GPU, PadGradFunc); REGISTER_TYPED_FUNC(PadGrad, GPU, PadGradFunc);
#endif #endif
......
...@@ -217,7 +217,7 @@ public: ...@@ -217,7 +217,7 @@ public:
REGISTER_TYPED_FUNC(RowConv, CPU, RowConvFunc); REGISTER_TYPED_FUNC(RowConv, CPU, RowConvFunc);
REGISTER_TYPED_FUNC(RowConvGrad, CPU, RowConvGradFunc); REGISTER_TYPED_FUNC(RowConvGrad, CPU, RowConvGradFunc);
#ifndef PADDLE_ONLY_CPU #ifdef PADDLE_WITH_GPU
REGISTER_TYPED_FUNC(RowConv, GPU, RowConvFunc); REGISTER_TYPED_FUNC(RowConv, GPU, RowConvFunc);
REGISTER_TYPED_FUNC(RowConvGrad, GPU, RowConvGradFunc); REGISTER_TYPED_FUNC(RowConvGrad, GPU, RowConvGradFunc);
#endif #endif
......
...@@ -132,7 +132,7 @@ public: ...@@ -132,7 +132,7 @@ public:
REGISTER_TYPED_FUNC(NCHW2NHWC, CPU, NCHW2NHWCFunc); REGISTER_TYPED_FUNC(NCHW2NHWC, CPU, NCHW2NHWCFunc);
REGISTER_TYPED_FUNC(NHWC2NCHW, CPU, NHWC2NCHWFunc); REGISTER_TYPED_FUNC(NHWC2NCHW, CPU, NHWC2NCHWFunc);
#ifndef PADDLE_ONLY_CPU #ifdef PADDLE_WITH_GPU
REGISTER_TYPED_FUNC(NCHW2NHWC, GPU, NCHW2NHWCFunc); REGISTER_TYPED_FUNC(NCHW2NHWC, GPU, NCHW2NHWCFunc);
REGISTER_TYPED_FUNC(NHWC2NCHW, GPU, NHWC2NCHWFunc); REGISTER_TYPED_FUNC(NHWC2NCHW, GPU, NHWC2NCHWFunc);
#endif #endif
......
...@@ -16,7 +16,7 @@ limitations under the License. */ ...@@ -16,7 +16,7 @@ limitations under the License. */
#include "BatchNormalizationLayer.h" #include "BatchNormalizationLayer.h"
#include "Layer.h" #include "Layer.h"
#include "paddle/utils/Stat.h" #include "paddle/utils/Stat.h"
#ifndef PADDLE_ONLY_CPU #ifdef PADDLE_WITH_GPU
#include "CudnnBatchNormLayer.h" #include "CudnnBatchNormLayer.h"
#endif #endif
......
...@@ -13,7 +13,7 @@ See the License for the specific language governing permissions and ...@@ -13,7 +13,7 @@ See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#include "paddle/utils/Stat.h" #include "paddle/utils/Stat.h"
#ifndef PADDLE_ONLY_CPU #ifdef PADDLE_WITH_GPU
#include "hl_batch_transpose.h" #include "hl_batch_transpose.h"
#endif #endif
#include "BatchNormalizationLayer.h" #include "BatchNormalizationLayer.h"
...@@ -90,7 +90,7 @@ void BatchNormalizationLayer::expandMat(const MatrixPtr& in, MatrixPtr& out) { ...@@ -90,7 +90,7 @@ void BatchNormalizationLayer::expandMat(const MatrixPtr& in, MatrixPtr& out) {
size_t batchSize = in->getHeight(); size_t batchSize = in->getHeight();
CHECK_EQ(out->getHeight(), batchSize * imgPixels_); CHECK_EQ(out->getHeight(), batchSize * imgPixels_);
if (useGpu_) { if (useGpu_) {
#ifdef PADDLE_ONLY_CPU #ifndef PADDLE_WITH_GPU
LOG(FATAL) << "paddle is compiled only for cpu"; LOG(FATAL) << "paddle is compiled only for cpu";
#else #else
batchTranspose( batchTranspose(
...@@ -127,7 +127,7 @@ void BatchNormalizationLayer::shrinkMat(const MatrixPtr& in, MatrixPtr& out) { ...@@ -127,7 +127,7 @@ void BatchNormalizationLayer::shrinkMat(const MatrixPtr& in, MatrixPtr& out) {
} }
CHECK_EQ(in->getHeight(), static_cast<size_t>(batchSize * imgPixels_)); CHECK_EQ(in->getHeight(), static_cast<size_t>(batchSize * imgPixels_));
if (useGpu_) { if (useGpu_) {
#ifdef PADDLE_ONLY_CPU #ifndef PADDLE_WITH_GPU
LOG(FATAL) << "paddle is compiled only for cpu"; LOG(FATAL) << "paddle is compiled only for cpu";
#else #else
batchTranspose( batchTranspose(
......
...@@ -15,7 +15,7 @@ limitations under the License. */ ...@@ -15,7 +15,7 @@ limitations under the License. */
#include "PoolLayer.h" #include "PoolLayer.h"
#include "PoolProjectionLayer.h" #include "PoolProjectionLayer.h"
#include "paddle/utils/Logging.h" #include "paddle/utils/Logging.h"
#ifndef PADDLE_ONLY_CPU #ifdef PADDLE_WITH_GPU
#include "CudnnPoolLayer.h" #include "CudnnPoolLayer.h"
#endif #endif
namespace paddle { namespace paddle {
...@@ -53,7 +53,7 @@ Layer* PoolLayer::create(const LayerConfig& config) { ...@@ -53,7 +53,7 @@ Layer* PoolLayer::create(const LayerConfig& config) {
const std::string& pool = config.inputs(0).pool_conf().pool_type(); const std::string& pool = config.inputs(0).pool_conf().pool_type();
if (pool == "max-projection" || pool == "avg-projection") { if (pool == "max-projection" || pool == "avg-projection") {
return new PoolProjectionLayer(config); return new PoolProjectionLayer(config);
#ifndef PADDLE_ONLY_CPU #ifdef PADDLE_WITH_GPU
} else if (CudnnPoolLayer::typeCheck(pool)) { } else if (CudnnPoolLayer::typeCheck(pool)) {
return new CudnnPoolLayer(config); return new CudnnPoolLayer(config);
#endif #endif
......
...@@ -674,7 +674,7 @@ void testLayerGradKernel(TestConfig testConf, ...@@ -674,7 +674,7 @@ void testLayerGradKernel(TestConfig testConf,
bool useGpu, bool useGpu,
bool useWeight, bool useWeight,
float epsilon) { float epsilon) {
#ifdef PADDLE_ONLY_CPU #ifndef PADDLE_WITH_GPU
if (useGpu) return; if (useGpu) return;
#endif #endif
FLAGS_use_gpu = useGpu; FLAGS_use_gpu = useGpu;
......
...@@ -119,7 +119,7 @@ TEST(Layer, batchNorm) { ...@@ -119,7 +119,7 @@ TEST(Layer, batchNorm) {
CHECK_EQ(static_cast<int>(convLayer->getOutputValue()->getWidth()), 576); CHECK_EQ(static_cast<int>(convLayer->getOutputValue()->getWidth()), 576);
} }
#ifndef PADDLE_ONLY_CPU #ifdef PADDLE_WITH_GPU
void batchNormInference(int n, int c, int h, int w) { void batchNormInference(int n, int c, int h, int w) {
MatrixPtr input = std::make_shared<GpuMatrix>(n, c * h * w); MatrixPtr input = std::make_shared<GpuMatrix>(n, c * h * w);
MatrixPtr cudnnOut = std::make_shared<GpuMatrix>(n, c * h * w); MatrixPtr cudnnOut = std::make_shared<GpuMatrix>(n, c * h * w);
......
...@@ -117,7 +117,7 @@ MatrixPtr doOneConvTest(size_t imgSize, ...@@ -117,7 +117,7 @@ MatrixPtr doOneConvTest(size_t imgSize,
} }
TEST(Layer, convParaUnified) { TEST(Layer, convParaUnified) {
#ifndef PADDLE_ONLY_CPU #ifdef PADDLE_WITH_GPU
MatrixPtr input, resultCpu, resultGpu; MatrixPtr input, resultCpu, resultGpu;
/// TEST1 for conv /// /// TEST1 for conv ///
......
...@@ -150,7 +150,7 @@ TEST(Layer, detectionOutputLayerFwd) { ...@@ -150,7 +150,7 @@ TEST(Layer, detectionOutputLayerFwd) {
useGpu, useGpu,
result2); result2);
#ifndef PADDLE_ONLY_CPU #ifdef PADDLE_WITH_GPU
// GPU case 1. // GPU case 1.
useGpu = true; useGpu = true;
inputLoc = Matrix::create(1, 16, false, useGpu); inputLoc = Matrix::create(1, 16, false, useGpu);
......
...@@ -51,7 +51,7 @@ void testEvaluator(TestConfig testConf, ...@@ -51,7 +51,7 @@ void testEvaluator(TestConfig testConf,
string testEvaluatorName, string testEvaluatorName,
size_t batchSize, size_t batchSize,
bool useGpu) { bool useGpu) {
#ifdef PADDLE_ONLY_CPU #ifndef PADDLE_WITH_GPU
if (useGpu) return; if (useGpu) return;
#endif #endif
FLAGS_use_gpu = useGpu; FLAGS_use_gpu = useGpu;
......
...@@ -97,7 +97,7 @@ TEST(Layer, kmaxSeqScoreLayer) { ...@@ -97,7 +97,7 @@ TEST(Layer, kmaxSeqScoreLayer) {
Matrix::create(subSeqStartPosition.back(), 1, false, false); Matrix::create(subSeqStartPosition.back(), 1, false, false);
std::vector<bool> mode = {false}; std::vector<bool> mode = {false};
#ifndef PADDLE_ONLY_CPU #ifdef PADDLE_WITH_GPU
mode.push_back(true); mode.push_back(true);
#endif #endif
......
...@@ -12,7 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ...@@ -12,7 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#ifndef PADDLE_ONLY_CPU #ifdef PADDLE_WITH_GPU
#include <cudnn.h> #include <cudnn.h>
#endif #endif
#include <gtest/gtest.h> #include <gtest/gtest.h>
...@@ -258,7 +258,7 @@ void testProjectionConv(size_t groups, bool isDeconv) { ...@@ -258,7 +258,7 @@ void testProjectionConv(size_t groups, bool isDeconv) {
true); true);
} }
#ifndef PADDLE_ONLY_CPU #ifdef PADDLE_WITH_GPU
TEST(Projection, conv) { TEST(Projection, conv) {
/// test ConvProjection /// test ConvProjection
testProjectionConv(1, false); testProjectionConv(1, false);
...@@ -422,7 +422,7 @@ TEST(Layer, depthwiseConvLayer) { ...@@ -422,7 +422,7 @@ TEST(Layer, depthwiseConvLayer) {
// 'depthwise_conv' is a sepecial case of 'exconv' whose // 'depthwise_conv' is a sepecial case of 'exconv' whose
// groups size equals to the input channels size. // groups size equals to the input channels size.
testDepthwiseConvLayer("exconv", /* useGpu= */ false); testDepthwiseConvLayer("exconv", /* useGpu= */ false);
#ifndef PADDLE_ONLY_CPU #ifdef PADDLE_WITH_GPU
testDepthwiseConvLayer("exconv", /* useGpu= */ true); testDepthwiseConvLayer("exconv", /* useGpu= */ true);
#endif #endif
} }
...@@ -480,7 +480,7 @@ void testConvLayer(const string& type, bool trans, bool useGpu) { ...@@ -480,7 +480,7 @@ void testConvLayer(const string& type, bool trans, bool useGpu) {
TEST(Layer, convLayer) { TEST(Layer, convLayer) {
testConvLayer("exconv", /* trans= */ false, /* useGpu= */ false); testConvLayer("exconv", /* trans= */ false, /* useGpu= */ false);
#ifndef PADDLE_ONLY_CPU #ifdef PADDLE_WITH_GPU
testConvLayer("exconv", /* trans= */ false, /* useGpu= */ true); testConvLayer("exconv", /* trans= */ false, /* useGpu= */ true);
testConvLayer("cudnn_conv", /* trans= */ false, /* useGpu= */ true); testConvLayer("cudnn_conv", /* trans= */ false, /* useGpu= */ true);
#endif #endif
...@@ -525,7 +525,7 @@ TEST(Layer, convTransLayer) { ...@@ -525,7 +525,7 @@ TEST(Layer, convTransLayer) {
for (auto useGpu : {false, true}) { for (auto useGpu : {false, true}) {
testConvTransLayer("exconvt", /* trans= */ false, /* useGpu= */ useGpu); testConvTransLayer("exconvt", /* trans= */ false, /* useGpu= */ useGpu);
} }
#ifndef PADDLE_ONLY_CPU #ifdef PADDLE_WITH_GPU
testConvTransLayer("cudnn_convt", /* trans= */ false, /* useGpu= */ true); testConvTransLayer("cudnn_convt", /* trans= */ false, /* useGpu= */ true);
#endif #endif
} }
...@@ -638,7 +638,7 @@ TEST(Layer, SelectiveFullyConnectedLayer) { ...@@ -638,7 +638,7 @@ TEST(Layer, SelectiveFullyConnectedLayer) {
/* trans= */ false, /* trans= */ false,
/* useGup= */ false, /* useGup= */ false,
false); false);
#ifndef PADDLE_ONLY_CPU #ifdef PADDLE_WITH_GPU
testLayerGrad(config, testLayerGrad(config,
"selective_fc", "selective_fc",
100, 100,
...@@ -1210,7 +1210,7 @@ void testPoolLayer(const string& poolType, bool trans, bool useGpu) { ...@@ -1210,7 +1210,7 @@ void testPoolLayer(const string& poolType, bool trans, bool useGpu) {
testLayerGrad(config, "pool", 100, trans, useGpu); testLayerGrad(config, "pool", 100, trans, useGpu);
} }
#ifndef PADDLE_ONLY_CPU #ifdef PADDLE_WITH_GPU
void testPoolLayer2(const string& poolType, bool trans, bool useGpu) { void testPoolLayer2(const string& poolType, bool trans, bool useGpu) {
TestConfig config; TestConfig config;
config.inputDefs.push_back({INPUT_DATA, "layer_0", 3200, 0}); config.inputDefs.push_back({INPUT_DATA, "layer_0", 3200, 0});
...@@ -1236,7 +1236,7 @@ TEST(Layer, PoolLayer) { ...@@ -1236,7 +1236,7 @@ TEST(Layer, PoolLayer) {
testPoolLayer("avg-projection", /* trans= */ false, /* useGpu= */ false); testPoolLayer("avg-projection", /* trans= */ false, /* useGpu= */ false);
testPoolLayer("max-projection", /* trans= */ false, /* useGpu= */ false); testPoolLayer("max-projection", /* trans= */ false, /* useGpu= */ false);
#ifndef PADDLE_ONLY_CPU #ifdef PADDLE_WITH_GPU
testPoolLayer("avg-projection", /* trans= */ false, /* useGpu= */ true); testPoolLayer("avg-projection", /* trans= */ false, /* useGpu= */ true);
testPoolLayer("max-projection", /* trans= */ false, /* useGpu= */ true); testPoolLayer("max-projection", /* trans= */ false, /* useGpu= */ true);
testPoolLayer("cudnn-max-pool", /* trans= */ false, /* useGpu= */ true); testPoolLayer("cudnn-max-pool", /* trans= */ false, /* useGpu= */ true);
...@@ -1309,7 +1309,7 @@ void testPool3DLayer(const string& poolType, bool trans, bool useGpu) { ...@@ -1309,7 +1309,7 @@ void testPool3DLayer(const string& poolType, bool trans, bool useGpu) {
TEST(Layer, Pool3DLayer) { TEST(Layer, Pool3DLayer) {
testPool3DLayer("avg", /* trans= */ false, /* useGpu= */ false); testPool3DLayer("avg", /* trans= */ false, /* useGpu= */ false);
testPool3DLayer("max", /* trans= */ false, /* useGpu= */ false); testPool3DLayer("max", /* trans= */ false, /* useGpu= */ false);
#ifndef PADDLE_ONLY_CPU #ifdef PADDLE_WITH_GPU
testPool3DLayer("avg", /* trans= */ false, /* useGpu= */ true); testPool3DLayer("avg", /* trans= */ false, /* useGpu= */ true);
testPool3DLayer("max", /* trans= */ false, /* useGpu= */ true); testPool3DLayer("max", /* trans= */ false, /* useGpu= */ true);
#endif #endif
...@@ -1695,7 +1695,7 @@ void testBatchNormLayer(const string& type, bool trans, bool useGpu) { ...@@ -1695,7 +1695,7 @@ void testBatchNormLayer(const string& type, bool trans, bool useGpu) {
TEST(Layer, BatchNormalizationLayer) { TEST(Layer, BatchNormalizationLayer) {
testBatchNormLayer("batch_norm", false, false); testBatchNormLayer("batch_norm", false, false);
#ifndef PADDLE_ONLY_CPU #ifdef PADDLE_WITH_GPU
testBatchNormLayer("batch_norm", false, true); testBatchNormLayer("batch_norm", false, true);
if (hl_get_cudnn_lib_version() >= int(4000)) { if (hl_get_cudnn_lib_version() >= int(4000)) {
testBatchNormLayer("cudnn_batch_norm", false, true); testBatchNormLayer("cudnn_batch_norm", false, true);
...@@ -1744,7 +1744,7 @@ void testBatchNorm3DLayer(const string& type, bool trans, bool useGpu) { ...@@ -1744,7 +1744,7 @@ void testBatchNorm3DLayer(const string& type, bool trans, bool useGpu) {
TEST(Layer, testBatchNorm3DLayer) { TEST(Layer, testBatchNorm3DLayer) {
testBatchNorm3DLayer("batch_norm", false, false); testBatchNorm3DLayer("batch_norm", false, false);
#ifndef PADDLE_ONLY_CPU #ifdef PADDLE_WITH_GPU
testBatchNorm3DLayer("batch_norm", false, true); testBatchNorm3DLayer("batch_norm", false, true);
if (hl_get_cudnn_lib_version() >= int(4000)) { if (hl_get_cudnn_lib_version() >= int(4000)) {
testBatchNorm3DLayer("cudnn_batch_norm", false, true); testBatchNorm3DLayer("cudnn_batch_norm", false, true);
...@@ -2262,7 +2262,7 @@ void test3DConvLayer(const string& type, bool trans, bool useGpu) { ...@@ -2262,7 +2262,7 @@ void test3DConvLayer(const string& type, bool trans, bool useGpu) {
TEST(Layer, test3DConvLayer) { TEST(Layer, test3DConvLayer) {
test3DConvLayer("conv3d", /* trans= */ false, /* useGpu= */ false); test3DConvLayer("conv3d", /* trans= */ false, /* useGpu= */ false);
#ifndef PADDLE_ONLY_CPU #ifdef PADDLE_WITH_GPU
test3DConvLayer("conv3d", /* trans= */ false, /* useGpu= */ true); test3DConvLayer("conv3d", /* trans= */ false, /* useGpu= */ true);
#endif #endif
} }
...@@ -2339,7 +2339,7 @@ void test3DDeConvLayer(const string& type, bool trans, bool useGpu) { ...@@ -2339,7 +2339,7 @@ void test3DDeConvLayer(const string& type, bool trans, bool useGpu) {
TEST(Layer, test3DDeConvLayer) { TEST(Layer, test3DDeConvLayer) {
test3DDeConvLayer("deconv3d", /* trans= */ false, /* useGpu= */ false); test3DDeConvLayer("deconv3d", /* trans= */ false, /* useGpu= */ false);
#ifndef PADDLE_ONLY_CPU #ifdef PADDLE_WITH_GPU
test3DDeConvLayer("deconv3d", /* trans= */ false, /* useGpu= */ true); test3DDeConvLayer("deconv3d", /* trans= */ false, /* useGpu= */ true);
#endif #endif
} }
......
...@@ -243,7 +243,7 @@ TEST(Compare, concat_slice) { ...@@ -243,7 +243,7 @@ TEST(Compare, concat_slice) {
compareNetwork(config_file_a, config_file_b); compareNetwork(config_file_a, config_file_b);
} }
#ifndef PADDLE_ONLY_CPU #ifdef PADDLE_WITH_GPU
TEST(Compare, img_pool) { TEST(Compare, img_pool) {
std::string config_file_a = "./gserver/tests/img_pool_a.conf"; std::string config_file_a = "./gserver/tests/img_pool_a.conf";
std::string config_file_b = "./gserver/tests/img_pool_b.conf"; std::string config_file_b = "./gserver/tests/img_pool_b.conf";
......
...@@ -151,7 +151,7 @@ TEST(Layer, priorBoxLayerFwd) { ...@@ -151,7 +151,7 @@ TEST(Layer, priorBoxLayerFwd) {
useGpu, useGpu,
result); result);
#ifndef PADDLE_ONLY_CPU #ifdef PADDLE_WITH_GPU
// reset the input parameters // reset the input parameters
variance[1] = 0.1; variance[1] = 0.1;
variance[3] = 0.2; variance[3] = 0.2;
......
...@@ -485,7 +485,7 @@ TEST(ProtoDataProvider, test) { ...@@ -485,7 +485,7 @@ TEST(ProtoDataProvider, test) {
// Currently in async mode, useGpu is not supported // Currently in async mode, useGpu is not supported
continue; continue;
} }
#ifdef PADDLE_ONLY_CPU #ifndef PADDLE_WITH_GPU
if (useGpu) { if (useGpu) {
continue; continue;
} }
...@@ -525,7 +525,7 @@ TEST(ProtoDataProvider, constant_slots) { ...@@ -525,7 +525,7 @@ TEST(ProtoDataProvider, constant_slots) {
for (int numConstantSlots : {1, 2}) { for (int numConstantSlots : {1, 2}) {
for (int useGpu : numTwoArray) { for (int useGpu : numTwoArray) {
for (int dataCompression : numTwoArray) { for (int dataCompression : numTwoArray) {
#ifdef PADDLE_ONLY_CPU #ifndef PADDLE_WITH_GPU
if (useGpu) { if (useGpu) {
continue; continue;
} }
...@@ -708,7 +708,7 @@ TEST(ProtoSequenceDataProvider, test) { ...@@ -708,7 +708,7 @@ TEST(ProtoSequenceDataProvider, test) {
// Currently in async mode, useGpu is not supported // Currently in async mode, useGpu is not supported
continue; continue;
} }
#ifdef PADDLE_ONLY_CPU #ifndef PADDLE_WITH_GPU
if (useGpu) { if (useGpu) {
continue; continue;
} }
......
...@@ -37,7 +37,7 @@ TEST(PyDataProvider, py_fill_slots) { ...@@ -37,7 +37,7 @@ TEST(PyDataProvider, py_fill_slots) {
config.clear_files(); config.clear_files();
std::string dataFile = "gserver/tests/pyDataProvider/pyDataProviderList"; std::string dataFile = "gserver/tests/pyDataProvider/pyDataProviderList";
config.set_files(dataFile); config.set_files(dataFile);
#ifdef PADDLE_ONLY_CPU #ifndef PADDLE_WITH_GPU
bool useGpu = false; bool useGpu = false;
#else #else
bool useGpu = true; bool useGpu = true;
...@@ -71,7 +71,7 @@ TEST(PyDataProvider, py_fill_nest_slots) { ...@@ -71,7 +71,7 @@ TEST(PyDataProvider, py_fill_nest_slots) {
std::string dataFile = "gserver/tests/pyDataProvider/pyDataProviderList"; std::string dataFile = "gserver/tests/pyDataProvider/pyDataProviderList";
config.set_files(dataFile); config.set_files(dataFile);
EXPECT_EQ(config.IsInitialized(), true); EXPECT_EQ(config.IsInitialized(), true);
#ifdef PADDLE_ONLY_CPU #ifndef PADDLE_WITH_GPU
bool useGpu = false; bool useGpu = false;
#else #else
bool useGpu = true; bool useGpu = true;
......
...@@ -321,7 +321,7 @@ TEST(Layer, SelectiveFcLayer_train_dense_mul) { ...@@ -321,7 +321,7 @@ TEST(Layer, SelectiveFcLayer_train_dense_mul) {
"filelist=gserver/tests/SelectiveFcTest/dense_mul_list"; "filelist=gserver/tests/SelectiveFcTest/dense_mul_list";
for (auto useGpu : {false, true}) { for (auto useGpu : {false, true}) {
#ifdef PADDLE_ONLY_CPU #ifndef PADDLE_WITH_GPU
if (useGpu) { if (useGpu) {
break; break;
} }
...@@ -388,7 +388,7 @@ void testSelectiveFcLayerTrainSparseMul(const LayerConfig& config, ...@@ -388,7 +388,7 @@ void testSelectiveFcLayerTrainSparseMul(const LayerConfig& config,
outMatSelfc->getWidth(), outMatSelfc->getWidth(),
outMatSelfc->getElementCnt())); outMatSelfc->getElementCnt()));
cpuOutMatSelfc->copyFrom(*outMatSelfc, HPPL_STREAM_DEFAULT); cpuOutMatSelfc->copyFrom(*outMatSelfc, HPPL_STREAM_DEFAULT);
#ifndef PADDLE_ONLY_CPU #ifdef PADDLE_WITH_GPU
if (useGpu) { if (useGpu) {
hl_stream_synchronize(HPPL_STREAM_DEFAULT); hl_stream_synchronize(HPPL_STREAM_DEFAULT);
} }
...@@ -418,7 +418,7 @@ void testSelectiveFcLayerTrainSparseMul(const LayerConfig& config, ...@@ -418,7 +418,7 @@ void testSelectiveFcLayerTrainSparseMul(const LayerConfig& config,
MatrixPtr cpuOutMatFc( MatrixPtr cpuOutMatFc(
new CpuMatrix(outMatFc->getHeight(), outMatFc->getWidth())); new CpuMatrix(outMatFc->getHeight(), outMatFc->getWidth()));
cpuOutMatFc->copyFrom(*outMatFc, HPPL_STREAM_DEFAULT); cpuOutMatFc->copyFrom(*outMatFc, HPPL_STREAM_DEFAULT);
#ifndef PADDLE_ONLY_CPU #ifdef PADDLE_WITH_GPU
if (useGpu) { if (useGpu) {
hl_stream_synchronize(HPPL_STREAM_DEFAULT); hl_stream_synchronize(HPPL_STREAM_DEFAULT);
} }
...@@ -443,7 +443,7 @@ TEST(Layer, SelectiveFcLayer_train_sparse_mul) { ...@@ -443,7 +443,7 @@ TEST(Layer, SelectiveFcLayer_train_sparse_mul) {
selLayerConfig.set_size(fcLayerWidth); selLayerConfig.set_size(fcLayerWidth);
testSelectiveFcLayerTrainSparseMul(selLayerConfig, false); testSelectiveFcLayerTrainSparseMul(selLayerConfig, false);
#ifndef PADDLE_ONLY_CPU #ifdef PADDLE_WITH_GPU
testSelectiveFcLayerTrainSparseMul(selLayerConfig, true); testSelectiveFcLayerTrainSparseMul(selLayerConfig, true);
#endif #endif
} }
......
...@@ -195,7 +195,7 @@ TEST(Layer, SeqSliceLayer) { ...@@ -195,7 +195,7 @@ TEST(Layer, SeqSliceLayer) {
vector<vector<real>> ends; vector<vector<real>> ends;
std::vector<bool> mode = {false}; std::vector<bool> mode = {false};
#ifndef PADDLE_ONLY_CPU #ifdef PADDLE_WITH_GPU
mode.push_back(true); mode.push_back(true);
#endif #endif
genSeqInfo(seqStartPos, subSeqStartPos); genSeqInfo(seqStartPos, subSeqStartPos);
......
...@@ -199,7 +199,7 @@ TEST(Layer, WarpCTCLayer) { ...@@ -199,7 +199,7 @@ TEST(Layer, WarpCTCLayer) {
for (auto batchSize : {1, 10, 32}) { for (auto batchSize : {1, 10, 32}) {
for (auto normByTimes : {false, true}) { for (auto normByTimes : {false, true}) {
for (auto useGpu : {false, true}) { for (auto useGpu : {false, true}) {
#ifdef PADDLE_ONLY_CPU #ifndef PADDLE_WITH_GPU
if (useGpu) continue; if (useGpu) continue;
#endif #endif
LOG(INFO) << "layerSize=" << layerSize << " batchSize=" << batchSize LOG(INFO) << "layerSize=" << layerSize << " batchSize=" << batchSize
......
...@@ -670,7 +670,7 @@ void GpuMatrix::leftMul(Matrix& a, real scaleAB, real scaleT) { ...@@ -670,7 +670,7 @@ void GpuMatrix::leftMul(Matrix& a, real scaleAB, real scaleT) {
} }
void GpuMatrix::selectRows(Matrix& table, IVector& ids) { void GpuMatrix::selectRows(Matrix& table, IVector& ids) {
#ifndef PADDLE_ONLY_CPU #ifdef PADDLE_WITH_GPU
CHECK(dynamic_cast<GpuMatrix*>(&table)); CHECK(dynamic_cast<GpuMatrix*>(&table));
CHECK(table.useGpu()); CHECK(table.useGpu());
CHECK(ids.useGpu()); CHECK(ids.useGpu());
...@@ -694,7 +694,7 @@ void GpuMatrix::selectRows(Matrix& table, IVector& ids) { ...@@ -694,7 +694,7 @@ void GpuMatrix::selectRows(Matrix& table, IVector& ids) {
} }
void GpuMatrix::addToRows(Matrix& table, IVector& ids) { void GpuMatrix::addToRows(Matrix& table, IVector& ids) {
#ifndef PADDLE_ONLY_CPU #ifdef PADDLE_WITH_GPU
CHECK(dynamic_cast<GpuMatrix*>(&table)); CHECK(dynamic_cast<GpuMatrix*>(&table));
CHECK(table.useGpu()); CHECK(table.useGpu());
CHECK(ids.useGpu()); CHECK(ids.useGpu());
...@@ -741,7 +741,7 @@ void GpuMatrix::rowMax(Matrix& max) { ...@@ -741,7 +741,7 @@ void GpuMatrix::rowMax(Matrix& max) {
} }
void GpuMatrix::rowMax(IVector& maxIds, Matrix& maxVal) { void GpuMatrix::rowMax(IVector& maxIds, Matrix& maxVal) {
#ifndef PADDLE_ONLY_CPU #ifdef PADDLE_WITH_GPU
CHECK(maxIds.useGpu() && maxVal.useGpu()) << "Matrix type are not equal"; CHECK(maxIds.useGpu() && maxVal.useGpu()) << "Matrix type are not equal";
size_t numSamples = getHeight(); size_t numSamples = getHeight();
size_t beam = maxVal.getWidth(); size_t beam = maxVal.getWidth();
......
...@@ -836,7 +836,7 @@ void GpuSparseMatrix::zeroMem() { ...@@ -836,7 +836,7 @@ void GpuSparseMatrix::zeroMem() {
} }
void GpuSparseMatrix::rowMax(IVector& maxIds, Matrix& maxVal) { void GpuSparseMatrix::rowMax(IVector& maxIds, Matrix& maxVal) {
#ifndef PADDLE_ONLY_CPU #ifdef PADDLE_WITH_GPU
CHECK(maxIds.useGpu() && maxVal.useGpu()) << "Matrix type are not equal"; CHECK(maxIds.useGpu() && maxVal.useGpu()) << "Matrix type are not equal";
size_t numSamples = getHeight(); size_t numSamples = getHeight();
size_t beam = maxVal.getWidth(); size_t beam = maxVal.getWidth();
......
...@@ -172,7 +172,7 @@ void GpuVectorT<T>::isEqualTo(const VectorT<T>& b, const T& value) { ...@@ -172,7 +172,7 @@ void GpuVectorT<T>::isEqualTo(const VectorT<T>& b, const T& value) {
template <class T> template <class T>
void GpuVectorT<T>::selectFrom(const VectorT<T>& src, const VectorT<int>& ids) { void GpuVectorT<T>::selectFrom(const VectorT<T>& src, const VectorT<int>& ids) {
#ifndef PADDLE_ONLY_CPU #ifdef PADDLE_WITH_GPU
hl_vector_select_from<T>(this->getData(), hl_vector_select_from<T>(this->getData(),
this->getSize(), this->getSize(),
src.getData(), src.getData(),
...@@ -850,7 +850,7 @@ CpuGpuVectorT<T>::CpuGpuVectorT(CpuGpuVectorT<T>& src, ...@@ -850,7 +850,7 @@ CpuGpuVectorT<T>::CpuGpuVectorT(CpuGpuVectorT<T>& src,
size_t size) size_t size)
: sync_(nullptr) { : sync_(nullptr) {
CHECK_LE(offset + size, static_cast<size_t>(src.getSize())); CHECK_LE(offset + size, static_cast<size_t>(src.getSize()));
#ifndef PADDLE_ONLY_CPU #ifdef PADDLE_WITH_GPU
SyncedFlag* flag = src.getSync(); SyncedFlag* flag = src.getSync();
if (*flag == DATA_AT_CPU) { if (*flag == DATA_AT_CPU) {
src.copyToGpu(); // will set synchronous data between CPU and GPU src.copyToGpu(); // will set synchronous data between CPU and GPU
...@@ -861,7 +861,7 @@ CpuGpuVectorT<T>::CpuGpuVectorT(CpuGpuVectorT<T>& src, ...@@ -861,7 +861,7 @@ CpuGpuVectorT<T>::CpuGpuVectorT(CpuGpuVectorT<T>& src,
auto cMemHandle = (src.getVector(false))->getMemoryHandle(); auto cMemHandle = (src.getVector(false))->getMemoryHandle();
cpuVectorT_ = std::make_shared<CpuVectorT<T>>( cpuVectorT_ = std::make_shared<CpuVectorT<T>>(
size, std::dynamic_pointer_cast<CpuMemoryHandle>(cMemHandle), offset); size, std::dynamic_pointer_cast<CpuMemoryHandle>(cMemHandle), offset);
#ifndef PADDLE_ONLY_CPU #ifdef PADDLE_WITH_GPU
auto gMemHandle = (src.getVector(true))->getMemoryHandle(); auto gMemHandle = (src.getVector(true))->getMemoryHandle();
gpuVectorT_ = std::make_shared<GpuVectorT<T>>( gpuVectorT_ = std::make_shared<GpuVectorT<T>>(
size, std::dynamic_pointer_cast<GpuMemoryHandle>(gMemHandle), offset); size, std::dynamic_pointer_cast<GpuMemoryHandle>(gMemHandle), offset);
......
...@@ -68,7 +68,7 @@ void testPoolAllocator() { ...@@ -68,7 +68,7 @@ void testPoolAllocator() {
TEST(Allocator, Pool) { TEST(Allocator, Pool) {
testPoolAllocator<CpuAllocator>(); testPoolAllocator<CpuAllocator>();
#ifndef PADDLE_ONLY_CPU #ifdef PADDLE_WITH_GPU
testPoolAllocator<GpuAllocator>(); testPoolAllocator<GpuAllocator>();
#endif #endif
} }
...@@ -92,7 +92,7 @@ TEST(MemoryHandle, Cpu) { ...@@ -92,7 +92,7 @@ TEST(MemoryHandle, Cpu) {
EXPECT_EQ(ptr1, ptr2); EXPECT_EQ(ptr1, ptr2);
} }
#ifndef PADDLE_ONLY_CPU #ifdef PADDLE_WITH_GPU
TEST(MemoryHandle, Gpu) { TEST(MemoryHandle, Gpu) {
int numGpu = hl_get_device_count(); int numGpu = hl_get_device_count();
......
...@@ -12,7 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ...@@ -12,7 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#ifndef PADDLE_ONLY_CPU #ifdef PADDLE_WITH_GPU
/** /**
* This test file use autotest::AutoCompare and cmpWithoutArg to compares the * This test file use autotest::AutoCompare and cmpWithoutArg to compares the
* implementation of CPU and GPU member function in * implementation of CPU and GPU member function in
......
...@@ -12,7 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ...@@ -12,7 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#ifndef PADDLE_ONLY_CPU #ifdef PADDLE_WITH_GPU
#include <gtest/gtest.h> #include <gtest/gtest.h>
#include "paddle/math/Vector.h" #include "paddle/math/Vector.h"
......
...@@ -94,7 +94,7 @@ void testWrapper(F&& f) { ...@@ -94,7 +94,7 @@ void testWrapper(F&& f) {
} }
} }
#ifndef PADDLE_ONLY_CPU #ifdef PADDLE_WITH_GPU
TEST(ExecViaCpu, test1) { TEST(ExecViaCpu, test1) {
testWrapper(f); testWrapper(f);
testWrapper(&f); testWrapper(&f);
......
...@@ -12,7 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ...@@ -12,7 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#ifndef PADDLE_ONLY_CPU #ifdef PADDLE_WITH_GPU
#include <gtest/gtest.h> #include <gtest/gtest.h>
#include "paddle/math/Matrix.h" #include "paddle/math/Matrix.h"
......
...@@ -12,7 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ...@@ -12,7 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#ifndef PADDLE_ONLY_CPU #ifdef PADDLE_WITH_GPU
/** /**
* This test file use autotest::AutoCompare and cmpWithArg to compares the * This test file use autotest::AutoCompare and cmpWithArg to compares the
* implementation of CPU and GPU member function in Matrix.cpp. * implementation of CPU and GPU member function in Matrix.cpp.
......
...@@ -47,7 +47,7 @@ struct MatrixPara { ...@@ -47,7 +47,7 @@ struct MatrixPara {
SparseFormat format; SparseFormat format;
}; };
#ifndef PADDLE_ONLY_CPU #ifdef PADDLE_WITH_GPU
void test_sparse_matrix_mul(MatrixPara paraA, void test_sparse_matrix_mul(MatrixPara paraA,
MatrixPara paraB, MatrixPara paraB,
MatrixPara paraC) { MatrixPara paraC) {
...@@ -452,7 +452,7 @@ TEST(Matrix, SparseMatrixCSRFormatTrimFrom) { ...@@ -452,7 +452,7 @@ TEST(Matrix, SparseMatrixCSRFormatTrimFrom) {
matB->trimFrom(*mat); matB->trimFrom(*mat);
checkSMatrixEqual2(matA, matB); checkSMatrixEqual2(matA, matB);
#ifndef PADDLE_ONLY_CPU #ifdef PADDLE_WITH_GPU
GpuSparseMatrixPtr matC = std::make_shared<GpuSparseMatrix>( GpuSparseMatrixPtr matC = std::make_shared<GpuSparseMatrix>(
height, trimedWidth, height, FLOAT_VALUE, SPARSE_CSR, true); height, trimedWidth, height, FLOAT_VALUE, SPARSE_CSR, true);
matC->trimFrom(*mat); matC->trimFrom(*mat);
...@@ -546,7 +546,7 @@ TEST(Matrix, SparseMatrixCSCFormatTrimFrom) { ...@@ -546,7 +546,7 @@ TEST(Matrix, SparseMatrixCSCFormatTrimFrom) {
matB->trimFrom(*mat); matB->trimFrom(*mat);
checkSMatrixEqual2(matA, matB); checkSMatrixEqual2(matA, matB);
#ifndef PADDLE_ONLY_CPU #ifdef PADDLE_WITH_GPU
GpuSparseMatrixPtr matC = std::make_shared<GpuSparseMatrix>( GpuSparseMatrixPtr matC = std::make_shared<GpuSparseMatrix>(
height, trimedWidth, height, FLOAT_VALUE, SPARSE_CSC, true); height, trimedWidth, height, FLOAT_VALUE, SPARSE_CSC, true);
matC->trimFrom(*mat); matC->trimFrom(*mat);
......
...@@ -270,7 +270,7 @@ TEST(Unary, BaseOp) { ...@@ -270,7 +270,7 @@ TEST(Unary, BaseOp) {
TestUnaryVectorT<CpuIVector, int> testCpuIVector( TestUnaryVectorT<CpuIVector, int> testCpuIVector(
testUnaryBaseOpInt<CpuIVector>); testUnaryBaseOpInt<CpuIVector>);
#ifndef PADDLE_ONLY_CPU #ifdef PADDLE_WITH_GPU
TestUnaryMatrix<GpuMatrix> testGpuMatrix(testUnaryBaseOp<GpuMatrix>); TestUnaryMatrix<GpuMatrix> testGpuMatrix(testUnaryBaseOp<GpuMatrix>);
TestUnaryVectorT<GpuVector, real> testGpuVector(testUnaryBaseOp<GpuVector>); TestUnaryVectorT<GpuVector, real> testGpuVector(testUnaryBaseOp<GpuVector>);
TestUnaryVectorT<GpuIVector, int> testGpuIVector( TestUnaryVectorT<GpuIVector, int> testGpuIVector(
...@@ -317,7 +317,7 @@ void testUnayrMathOp(Tensor& A1, Tensor& A2) { ...@@ -317,7 +317,7 @@ void testUnayrMathOp(Tensor& A1, Tensor& A2) {
TEST(Unary, MathOp) { TEST(Unary, MathOp) {
TestUnaryMatrix<CpuMatrix> testCpu(testUnayrMathOp<CpuMatrix>); TestUnaryMatrix<CpuMatrix> testCpu(testUnayrMathOp<CpuMatrix>);
#ifndef PADDLE_ONLY_CPU #ifdef PADDLE_WITH_GPU
TestUnaryMatrix<GpuMatrix> testGpu(testUnayrMathOp<GpuMatrix>); TestUnaryMatrix<GpuMatrix> testGpu(testUnayrMathOp<GpuMatrix>);
#endif #endif
} }
...@@ -374,7 +374,7 @@ void testUnayrCompareOp(Tensor& A1, Tensor& A2) { ...@@ -374,7 +374,7 @@ void testUnayrCompareOp(Tensor& A1, Tensor& A2) {
TEST(Unary, CompareOp) { TEST(Unary, CompareOp) {
TestUnaryMatrix<CpuMatrix> testCpu(testUnayrCompareOp<CpuMatrix>); TestUnaryMatrix<CpuMatrix> testCpu(testUnayrCompareOp<CpuMatrix>);
#ifndef PADDLE_ONLY_CPU #ifdef PADDLE_WITH_GPU
TestUnaryMatrix<GpuMatrix> testGpu(testUnayrCompareOp<GpuMatrix>); TestUnaryMatrix<GpuMatrix> testGpu(testUnayrCompareOp<GpuMatrix>);
#endif #endif
} }
...@@ -536,7 +536,7 @@ void testBinaryBaseOp(Tensor& A1, Tensor& A2, Tensor& B) { ...@@ -536,7 +536,7 @@ void testBinaryBaseOp(Tensor& A1, Tensor& A2, Tensor& B) {
TEST(Binary, BaseOp) { TEST(Binary, BaseOp) {
TestBinaryMatrix<CpuMatrix> testCpu(testBinaryBaseOp<CpuMatrix>); TestBinaryMatrix<CpuMatrix> testCpu(testBinaryBaseOp<CpuMatrix>);
#ifndef PADDLE_ONLY_CPU #ifdef PADDLE_WITH_GPU
TestBinaryMatrix<GpuMatrix> testGpu(testBinaryBaseOp<GpuMatrix>); TestBinaryMatrix<GpuMatrix> testGpu(testBinaryBaseOp<GpuMatrix>);
#endif #endif
} }
...@@ -710,7 +710,7 @@ void testBinaryMathOp(Tensor& A1, Tensor& A2, Tensor& B) { ...@@ -710,7 +710,7 @@ void testBinaryMathOp(Tensor& A1, Tensor& A2, Tensor& B) {
TEST(Binary, MathOp) { TEST(Binary, MathOp) {
TestBinaryMatrix<CpuMatrix> testCpu(testBinaryMathOp<CpuMatrix>); TestBinaryMatrix<CpuMatrix> testCpu(testBinaryMathOp<CpuMatrix>);
#ifndef PADDLE_ONLY_CPU #ifdef PADDLE_WITH_GPU
TestBinaryMatrix<GpuMatrix> testGpu(testBinaryMathOp<GpuMatrix>); TestBinaryMatrix<GpuMatrix> testGpu(testBinaryMathOp<GpuMatrix>);
#endif #endif
} }
...@@ -810,7 +810,7 @@ void testBinaryCompareOp(Tensor& A1, Tensor& A2, Tensor& B) { ...@@ -810,7 +810,7 @@ void testBinaryCompareOp(Tensor& A1, Tensor& A2, Tensor& B) {
TEST(Binary, CompareOp) { TEST(Binary, CompareOp) {
TestBinaryMatrix<CpuMatrix> testCpu(testBinaryCompareOp<CpuMatrix>); TestBinaryMatrix<CpuMatrix> testCpu(testBinaryCompareOp<CpuMatrix>);
#ifndef PADDLE_ONLY_CPU #ifdef PADDLE_WITH_GPU
TestBinaryMatrix<GpuMatrix> testGpu(testBinaryCompareOp<GpuMatrix>); TestBinaryMatrix<GpuMatrix> testGpu(testBinaryCompareOp<GpuMatrix>);
#endif #endif
} }
...@@ -955,7 +955,7 @@ void testTernaryBaseOp(Tensor& A1, Tensor& A2, Tensor& B, Tensor& C) { ...@@ -955,7 +955,7 @@ void testTernaryBaseOp(Tensor& A1, Tensor& A2, Tensor& B, Tensor& C) {
TEST(Ternary, BaseOp) { TEST(Ternary, BaseOp) {
TestTernaryMatrix<CpuMatrix> testCpu(testTernaryBaseOp<CpuMatrix>); TestTernaryMatrix<CpuMatrix> testCpu(testTernaryBaseOp<CpuMatrix>);
#ifndef PADDLE_ONLY_CPU #ifdef PADDLE_WITH_GPU
TestTernaryMatrix<GpuMatrix> testGpu(testTernaryBaseOp<GpuMatrix>); TestTernaryMatrix<GpuMatrix> testGpu(testTernaryBaseOp<GpuMatrix>);
#endif #endif
} }
...@@ -1058,7 +1058,7 @@ void testTernaryCompareOp(Tensor& A1, Tensor& A2, Tensor& B, Tensor& C) { ...@@ -1058,7 +1058,7 @@ void testTernaryCompareOp(Tensor& A1, Tensor& A2, Tensor& B, Tensor& C) {
TEST(Ternary, CompareOp) { TEST(Ternary, CompareOp) {
TestTernaryMatrix<CpuMatrix> testCpu(testTernaryCompareOp<CpuMatrix>); TestTernaryMatrix<CpuMatrix> testCpu(testTernaryCompareOp<CpuMatrix>);
#ifndef PADDLE_ONLY_CPU #ifdef PADDLE_WITH_GPU
TestTernaryMatrix<GpuMatrix> testGpu(testTernaryCompareOp<GpuMatrix>); TestTernaryMatrix<GpuMatrix> testGpu(testTernaryCompareOp<GpuMatrix>);
#endif #endif
} }
...@@ -1086,7 +1086,7 @@ void testQuaternaryAdd( ...@@ -1086,7 +1086,7 @@ void testQuaternaryAdd(
TEST(Quaternary, BaseOp) { TEST(Quaternary, BaseOp) {
TestQuaternaryMatrix<CpuMatrix> testCpu(testQuaternaryAdd<CpuMatrix>); TestQuaternaryMatrix<CpuMatrix> testCpu(testQuaternaryAdd<CpuMatrix>);
#ifndef PADDLE_ONLY_CPU #ifdef PADDLE_WITH_GPU
TestQuaternaryMatrix<GpuMatrix> testGpu(testQuaternaryAdd<GpuMatrix>); TestQuaternaryMatrix<GpuMatrix> testGpu(testQuaternaryAdd<GpuMatrix>);
#endif #endif
} }
...@@ -1156,7 +1156,7 @@ void testQuaternaryCompareOp( ...@@ -1156,7 +1156,7 @@ void testQuaternaryCompareOp(
TEST(Quaternary, CompareOp) { TEST(Quaternary, CompareOp) {
TestQuaternaryMatrix<CpuMatrix> testCpu(testQuaternaryCompareOp<CpuMatrix>); TestQuaternaryMatrix<CpuMatrix> testCpu(testQuaternaryCompareOp<CpuMatrix>);
#ifndef PADDLE_ONLY_CPU #ifdef PADDLE_WITH_GPU
TestQuaternaryMatrix<GpuMatrix> testGpu(testQuaternaryCompareOp<GpuMatrix>); TestQuaternaryMatrix<GpuMatrix> testGpu(testQuaternaryCompareOp<GpuMatrix>);
#endif #endif
} }
...@@ -91,7 +91,7 @@ int VectorCheckErr(const VectorPtr& vector1, const VectorPtr& vector2) { ...@@ -91,7 +91,7 @@ int VectorCheckErr(const VectorPtr& vector1, const VectorPtr& vector2) {
typedef std::function<void(size_t size, bool useGpu)> testMatrixFunc; typedef std::function<void(size_t size, bool useGpu)> testMatrixFunc;
void testCase(testMatrixFunc matrixFunc) { void testCase(testMatrixFunc matrixFunc) {
#ifndef PADDLE_ONLY_CPU #ifdef PADDLE_WITH_GPU
for (auto useGpu : {false, true}) { for (auto useGpu : {false, true}) {
#else #else
for (auto useGpu : {false}) { for (auto useGpu : {false}) {
......
...@@ -17,7 +17,7 @@ limitations under the License. */ ...@@ -17,7 +17,7 @@ limitations under the License. */
using namespace paddle; // NOLINT using namespace paddle; // NOLINT
#ifndef PADDLE_ONLY_CPU #ifdef PADDLE_WITH_GPU
TEST(MatrixBatchTransTest, test_batch_matrix_transpose) { TEST(MatrixBatchTransTest, test_batch_matrix_transpose) {
const int nx = 100; const int nx = 100;
const int ny = 50; const int ny = 50;
......
...@@ -72,7 +72,7 @@ void testLazyAssign(int height, int width) { ...@@ -72,7 +72,7 @@ void testLazyAssign(int height, int width) {
TEST(lazyAssign, CPU) { testMatrixCase(testLazyAssign<CpuMatrix>); } TEST(lazyAssign, CPU) { testMatrixCase(testLazyAssign<CpuMatrix>); }
#ifndef PADDLE_ONLY_CPU #ifdef PADDLE_WITH_GPU
TEST(lazyAssign, GPU) { testMatrixCase(testLazyAssign<GpuMatrix>); } TEST(lazyAssign, GPU) { testMatrixCase(testLazyAssign<GpuMatrix>); }
#endif #endif
...@@ -142,6 +142,6 @@ void testSgdUpdate(int height, int width) { ...@@ -142,6 +142,6 @@ void testSgdUpdate(int height, int width) {
TEST(sgdUpdate, CPU) { testMatrixCase(testSgdUpdate<CpuMatrix>); } TEST(sgdUpdate, CPU) { testMatrixCase(testSgdUpdate<CpuMatrix>); }
#ifndef PADDLE_ONLY_CPU #ifdef PADDLE_WITH_GPU
TEST(sgdUpdate, GPU) { testMatrixCase(testSgdUpdate<GpuMatrix>); } TEST(sgdUpdate, GPU) { testMatrixCase(testSgdUpdate<GpuMatrix>); }
#endif #endif
...@@ -12,7 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ...@@ -12,7 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#ifndef PADDLE_ONLY_CPU #ifdef PADDLE_WITH_GPU
/// This unittest checks GpuMatrix/CpuMatrix get same result, so disable when /// This unittest checks GpuMatrix/CpuMatrix get same result, so disable when
/// only cpu version. /// only cpu version.
......
...@@ -12,7 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ...@@ -12,7 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#ifndef PADDLE_ONLY_CPU #ifdef PADDLE_WITH_GPU
#include <cuda_runtime.h> #include <cuda_runtime.h>
#include <gtest/gtest.h> #include <gtest/gtest.h>
......
...@@ -12,7 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ...@@ -12,7 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#ifndef PADDLE_ONLY_CPU #ifdef PADDLE_WITH_GPU
/// This unittest checks GpuSparseMatrix/CpuSparseMatrix get same result, /// This unittest checks GpuSparseMatrix/CpuSparseMatrix get same result,
// so disable when // so disable when
/// only cpu version. /// only cpu version.
......
...@@ -175,7 +175,7 @@ void* BuddyAllocator::SystemAlloc(size_t size) { ...@@ -175,7 +175,7 @@ void* BuddyAllocator::SystemAlloc(size_t size) {
} }
BuddyAllocator::PoolSet::iterator BuddyAllocator::RefillPool() { BuddyAllocator::PoolSet::iterator BuddyAllocator::RefillPool() {
#ifndef PADDLE_ONLY_CPU #ifdef PADDLE_WITH_GPU
if (system_allocator_->UseGpu()) { if (system_allocator_->UseGpu()) {
if ((total_used_ + total_free_) == 0) { if ((total_used_ + total_free_) == 0) {
// Compute the maximum allocation size for the first allocation. // Compute the maximum allocation size for the first allocation.
......
...@@ -62,7 +62,7 @@ void CPUAllocator::Free(void* p, size_t size, size_t index) { ...@@ -62,7 +62,7 @@ void CPUAllocator::Free(void* p, size_t size, size_t index) {
bool CPUAllocator::UseGpu() const { return false; } bool CPUAllocator::UseGpu() const { return false; }
#ifndef PADDLE_ONLY_CPU #ifdef PADDLE_WITH_GPU
void* GPUAllocator::Alloc(size_t& index, size_t size) { void* GPUAllocator::Alloc(size_t& index, size_t size) {
// CUDA documentation doesn't explain if cudaMalloc returns nullptr // CUDA documentation doesn't explain if cudaMalloc returns nullptr
......
...@@ -40,7 +40,7 @@ class CPUAllocator : public SystemAllocator { ...@@ -40,7 +40,7 @@ class CPUAllocator : public SystemAllocator {
virtual bool UseGpu() const; virtual bool UseGpu() const;
}; };
#ifndef PADDLE_ONLY_CPU #ifdef PADDLE_WITH_GPU
class GPUAllocator : public SystemAllocator { class GPUAllocator : public SystemAllocator {
public: public:
virtual void* Alloc(size_t& index, size_t size); virtual void* Alloc(size_t& index, size_t size);
......
...@@ -56,7 +56,7 @@ TEST(CPUAllocator, LockMem) { ...@@ -56,7 +56,7 @@ TEST(CPUAllocator, LockMem) {
TestAllocator(a, 0); TestAllocator(a, 0);
} }
#ifndef PADDLE_ONLY_CPU #ifdef PADDLE_WITH_GPU
TEST(GPUAllocator, Alloc) { TEST(GPUAllocator, Alloc) {
paddle::memory::detail::GPUAllocator a; paddle::memory::detail::GPUAllocator a;
TestAllocator(a, 2048); TestAllocator(a, 2048);
......
...@@ -26,7 +26,7 @@ void Copy<platform::CPUPlace, platform::CPUPlace>(platform::CPUPlace, void* dst, ...@@ -26,7 +26,7 @@ void Copy<platform::CPUPlace, platform::CPUPlace>(platform::CPUPlace, void* dst,
std::memcpy(dst, src, num); std::memcpy(dst, src, num);
} }
#ifndef PADDLE_ONLY_CPU #ifdef PADDLE_WITH_GPU
template <> template <>
void Copy<platform::CPUPlace, platform::GPUPlace>(platform::CPUPlace dst_place, void Copy<platform::CPUPlace, platform::GPUPlace>(platform::CPUPlace dst_place,
void* dst, void* dst,
......
...@@ -33,7 +33,7 @@ namespace memory { ...@@ -33,7 +33,7 @@ namespace memory {
template <typename DstPlace, typename SrcPlace> template <typename DstPlace, typename SrcPlace>
void Copy(DstPlace, void* dst, SrcPlace, const void* src, size_t num); void Copy(DstPlace, void* dst, SrcPlace, const void* src, size_t num);
#ifndef PADDLE_ONLY_CPU #ifdef PADDLE_WITH_GPU
/** /**
* \brief Copy memory from one place to another place. * \brief Copy memory from one place to another place.
......
...@@ -62,7 +62,7 @@ size_t Used<platform::CPUPlace>(platform::CPUPlace place) { ...@@ -62,7 +62,7 @@ size_t Used<platform::CPUPlace>(platform::CPUPlace place) {
return GetCPUBuddyAllocator()->Used(); return GetCPUBuddyAllocator()->Used();
} }
#ifndef PADDLE_ONLY_CPU #ifdef PADDLE_WITH_GPU
BuddyAllocator* GetGPUBuddyAllocator(int gpu_id) { BuddyAllocator* GetGPUBuddyAllocator(int gpu_id) {
using BuddyAllocVec = std::vector<BuddyAllocator*>; using BuddyAllocVec = std::vector<BuddyAllocator*>;
......
...@@ -80,7 +80,7 @@ TEST(BuddyAllocator, CPUMultAlloc) { ...@@ -80,7 +80,7 @@ TEST(BuddyAllocator, CPUMultAlloc) {
} }
} }
#ifndef PADDLE_ONLY_CPU #ifdef PADDLE_WITH_GPU
size_t align(size_t size, paddle::platform::GPUPlace place) { size_t align(size_t size, paddle::platform::GPUPlace place) {
size += sizeof(paddle::memory::detail::Metadata); size += sizeof(paddle::memory::detail::Metadata);
......
...@@ -34,7 +34,7 @@ struct StridedMemcpyFunctor<T, 1> { ...@@ -34,7 +34,7 @@ struct StridedMemcpyFunctor<T, 1> {
auto& cpu_place = boost::get<platform::CPUPlace>(place); auto& cpu_place = boost::get<platform::CPUPlace>(place);
memory::Copy(cpu_place, dst, cpu_place, src, sizeof(T) * dst_dim.head); memory::Copy(cpu_place, dst, cpu_place, src, sizeof(T) * dst_dim.head);
} else { } else {
#ifndef PADDLE_ONLY_CPU #ifdef PADDLE_WITH_GPU
auto& gpu_place = boost::get<platform::GPUPlace>(place); auto& gpu_place = boost::get<platform::GPUPlace>(place);
auto& cuda_ctx = auto& cuda_ctx =
reinterpret_cast<const platform::CUDADeviceContext&>(dev_ctx); reinterpret_cast<const platform::CUDADeviceContext&>(dev_ctx);
......
...@@ -71,7 +71,7 @@ void testIm2col() { ...@@ -71,7 +71,7 @@ void testIm2col() {
context = context =
new paddle::platform::CPUDeviceContext(paddle::platform::CPUPlace()); new paddle::platform::CPUDeviceContext(paddle::platform::CPUPlace());
} else { } else {
#ifndef PADDLE_ONLY_CPU #ifdef PADDLE_WITH_GPU
context = context =
new paddle::platform::CUDADeviceContext(paddle::platform::GPUPlace()); new paddle::platform::CUDADeviceContext(paddle::platform::GPUPlace());
#else #else
...@@ -116,7 +116,7 @@ void testIm2col() { ...@@ -116,7 +116,7 @@ void testIm2col() {
TEST(math, im2col) { TEST(math, im2col) {
testIm2col<paddle::platform::CPUPlace>(); testIm2col<paddle::platform::CPUPlace>();
#ifndef PADDLE_ONLY_CPU #ifdef PADDLE_WITH_GPU
testIm2col<paddle::platform::GPUPlace>(); testIm2col<paddle::platform::GPUPlace>();
#endif #endif
} }
#include "paddle/operators/math/math_function.h" #include "paddle/operators/math/math_function.h"
#include "gtest/gtest.h" #include "gtest/gtest.h"
#ifndef PADDLE_ONLY_CPU #ifdef PADDLE_WITH_GPU
TEST(math_function, notrans_mul_trans) { TEST(math_function, notrans_mul_trans) {
paddle::framework::Tensor input1; paddle::framework::Tensor input1;
paddle::framework::Tensor input1_gpu; paddle::framework::Tensor input1_gpu;
......
...@@ -72,7 +72,7 @@ TEST(StridedMemcpy, CPUConcat) { ...@@ -72,7 +72,7 @@ TEST(StridedMemcpy, CPUConcat) {
} }
} }
#ifndef PADDLE_ONLY_CPU #ifdef PADDLE_WITH_GPU
TEST(StridedMemcpy, GPUCrop) { TEST(StridedMemcpy, GPUCrop) {
// clang-format off // clang-format off
int src[] = { int src[] = {
......
...@@ -35,7 +35,7 @@ Eigen::DefaultDevice* CPUDeviceContext::eigen_device() const { ...@@ -35,7 +35,7 @@ Eigen::DefaultDevice* CPUDeviceContext::eigen_device() const {
Place CPUDeviceContext::GetPlace() const { return CPUPlace(); } Place CPUDeviceContext::GetPlace() const { return CPUPlace(); }
#ifndef PADDLE_ONLY_CPU #ifdef PADDLE_WITH_GPU
template <> template <>
Eigen::GpuDevice* Eigen::GpuDevice*
......
...@@ -14,7 +14,7 @@ limitations under the License. */ ...@@ -14,7 +14,7 @@ limitations under the License. */
#include "paddle/platform/enforce.h" #include "paddle/platform/enforce.h"
#include "paddle/platform/place.h" #include "paddle/platform/place.h"
#ifndef PADDLE_ONLY_CPU #ifdef PADDLE_WITH_GPU
#include "paddle/platform/dynload/cublas.h" #include "paddle/platform/dynload/cublas.h"
#include "paddle/platform/dynload/cudnn.h" #include "paddle/platform/dynload/cudnn.h"
#include "paddle/platform/gpu_info.h" #include "paddle/platform/gpu_info.h"
...@@ -61,7 +61,7 @@ class CPUDeviceContext : public DeviceContext { ...@@ -61,7 +61,7 @@ class CPUDeviceContext : public DeviceContext {
std::unique_ptr<Eigen::DefaultDevice> eigen_device_; std::unique_ptr<Eigen::DefaultDevice> eigen_device_;
}; };
#ifndef PADDLE_ONLY_CPU #ifdef PADDLE_WITH_GPU
template <> template <>
struct EigenDeviceConverter<platform::GPUPlace> { struct EigenDeviceConverter<platform::GPUPlace> {
using EigenDeviceType = Eigen::GpuDevice; using EigenDeviceType = Eigen::GpuDevice;
......
...@@ -29,7 +29,7 @@ limitations under the License. */ ...@@ -29,7 +29,7 @@ limitations under the License. */
#include <cxxabi.h> // for __cxa_demangle #include <cxxabi.h> // for __cxa_demangle
#endif #endif
#ifndef PADDLE_ONLY_CPU #ifdef PADDLE_WITH_GPU
#include "paddle/platform/dynload/cublas.h" #include "paddle/platform/dynload/cublas.h"
#include "paddle/platform/dynload/cudnn.h" #include "paddle/platform/dynload/cudnn.h"
...@@ -113,7 +113,7 @@ inline typename std::enable_if<sizeof...(Args) != 0, void>::type throw_on_error( ...@@ -113,7 +113,7 @@ inline typename std::enable_if<sizeof...(Args) != 0, void>::type throw_on_error(
} }
} }
#ifndef PADDLE_ONLY_CPU #ifdef PADDLE_WITH_GPU
template <typename... Args> template <typename... Args>
inline typename std::enable_if<sizeof...(Args) != 0, void>::type throw_on_error( inline typename std::enable_if<sizeof...(Args) != 0, void>::type throw_on_error(
......
...@@ -14,7 +14,7 @@ limitations under the License. */ ...@@ -14,7 +14,7 @@ limitations under the License. */
#pragma once #pragma once
#ifndef PADDLE_ONLY_CPU #ifdef PADDLE_WITH_GPU
#include <cuda_runtime.h> #include <cuda_runtime.h>
#include <stddef.h> #include <stddef.h>
......
...@@ -16,7 +16,7 @@ ...@@ -16,7 +16,7 @@
#include <boost/config.hpp> #include <boost/config.hpp>
#ifndef PADDLE_ONLY_CPU #ifdef PADDLE_WITH_GPU
// Because boost's variadic templates has bug on nvcc, boost will disable // Because boost's variadic templates has bug on nvcc, boost will disable
// variadic template support when GPU enabled on nvcc. // variadic template support when GPU enabled on nvcc.
......
...@@ -215,7 +215,7 @@ int main(int argc, char** argv) { ...@@ -215,7 +215,7 @@ int main(int argc, char** argv) {
uint64_t dataSize = FLAGS_dim * sizeof(real); uint64_t dataSize = FLAGS_dim * sizeof(real);
#ifndef PADDLE_ONLY_CPU #ifdef PADDLE_WITH_GPU
GpuVector gpuParam(FLAGS_dim); GpuVector gpuParam(FLAGS_dim);
GpuVector gpuGrad(FLAGS_dim); GpuVector gpuGrad(FLAGS_dim);
#else #else
......
...@@ -99,7 +99,7 @@ TEST(ProtoServer, regular) { ...@@ -99,7 +99,7 @@ TEST(ProtoServer, regular) {
} }
TEST(ProtoServer, extended) { TEST(ProtoServer, extended) {
#ifndef PADDLE_ONLY_CPU #ifdef PADDLE_WITH_GPU
ProtoClient* client; ProtoClient* client;
if (FLAGS_rdma_tcp == "rdma") if (FLAGS_rdma_tcp == "rdma")
client = new ProtoClient(FLAGS_server_addr, FLAGS_port, F_RDMA); client = new ProtoClient(FLAGS_server_addr, FLAGS_port, F_RDMA);
......
...@@ -34,7 +34,7 @@ static size_t UniqueIntegerGenerator() { ...@@ -34,7 +34,7 @@ static size_t UniqueIntegerGenerator() {
} }
bool IsCompileGPU() { bool IsCompileGPU() {
#ifdef PADDLE_ONLY_CPU #ifndef PADDLE_WITH_GPU
return false; return false;
#else #else
return true; return true;
...@@ -78,7 +78,7 @@ PYBIND11_PLUGIN(core) { ...@@ -78,7 +78,7 @@ PYBIND11_PLUGIN(core) {
.def("set", PyCPUTensorSetFromArray<float>) .def("set", PyCPUTensorSetFromArray<float>)
.def("set", PyCPUTensorSetFromArray<int>) .def("set", PyCPUTensorSetFromArray<int>)
.def("set", PyCPUTensorSetFromArray<double>) .def("set", PyCPUTensorSetFromArray<double>)
#ifndef PADDLE_ONLY_CPU #ifdef PADDLE_WITH_GPU
.def("set", PyCUDATensorSetFromArray<float>) .def("set", PyCUDATensorSetFromArray<float>)
.def("set", PyCUDATensorSetFromArray<int>) .def("set", PyCUDATensorSetFromArray<int>)
.def("set", PyCUDATensorSetFromArray<double>) .def("set", PyCUDATensorSetFromArray<double>)
...@@ -96,7 +96,7 @@ PYBIND11_PLUGIN(core) { ...@@ -96,7 +96,7 @@ PYBIND11_PLUGIN(core) {
.def( .def(
"__init__", "__init__",
[](LoDTensor &instance, const std::vector<std::vector<size_t>> &lod) { [](LoDTensor &instance, const std::vector<std::vector<size_t>> &lod) {
#ifdef PADDLE_ONLY_CPU #ifndef PADDLE_WITH_GPU
new (&instance) LoDTensor(lod); new (&instance) LoDTensor(lod);
#else #else
LoD new_lod; LoD new_lod;
...@@ -107,7 +107,7 @@ PYBIND11_PLUGIN(core) { ...@@ -107,7 +107,7 @@ PYBIND11_PLUGIN(core) {
}) })
.def("set_lod", .def("set_lod",
[](LoDTensor &self, const std::vector<std::vector<size_t>> &lod) { [](LoDTensor &self, const std::vector<std::vector<size_t>> &lod) {
#ifdef PADDLE_ONLY_CPU #ifndef PADDLE_WITH_GPU
self.set_lod(lod); self.set_lod(lod);
#else #else
LoD new_lod; LoD new_lod;
...@@ -117,7 +117,7 @@ PYBIND11_PLUGIN(core) { ...@@ -117,7 +117,7 @@ PYBIND11_PLUGIN(core) {
#endif #endif
}) })
.def("lod", [](LoDTensor &self) -> std::vector<std::vector<size_t>> { .def("lod", [](LoDTensor &self) -> std::vector<std::vector<size_t>> {
#ifdef PADDLE_ONLY_CPU #ifndef PADDLE_WITH_GPU
return self.lod(); return self.lod();
#else #else
auto lod = self.lod(); auto lod = self.lod();
...@@ -203,7 +203,7 @@ All parameter, weight, gradient are variables in Paddle. ...@@ -203,7 +203,7 @@ All parameter, weight, gradient are variables in Paddle.
.def_static("create", .def_static("create",
[](paddle::platform::GPUPlace& place) [](paddle::platform::GPUPlace& place)
-> paddle::platform::DeviceContext* { -> paddle::platform::DeviceContext* {
#ifdef PADDLE_ONLY_CPU #ifndef PADDLE_WITH_GPU
PADDLE_THROW("GPUPlace is not supported in CPU device."); PADDLE_THROW("GPUPlace is not supported in CPU device.");
#else #else
return new paddle::platform::CUDADeviceContext(place); return new paddle::platform::CUDADeviceContext(place);
......
...@@ -106,7 +106,7 @@ void PyCPUTensorSetFromArray( ...@@ -106,7 +106,7 @@ void PyCPUTensorSetFromArray(
std::memcpy(dst, array.data(), sizeof(T) * array.size()); std::memcpy(dst, array.data(), sizeof(T) * array.size());
} }
#ifndef PADDLE_ONLY_CPU #ifdef PADDLE_WITH_GPU
template <typename T> template <typename T>
void PyCUDATensorSetFromArray( void PyCUDATensorSetFromArray(
framework::Tensor &self, framework::Tensor &self,
......
...@@ -29,7 +29,7 @@ int main(int argc, char** argv) { ...@@ -29,7 +29,7 @@ int main(int argc, char** argv) {
initMain(argc, argv); initMain(argc, argv);
initPython(argc, argv); initPython(argc, argv);
string confFile = TrainerConfigHelper::getConfigNameFromPath(FLAGS_model_dir); string confFile = TrainerConfigHelper::getConfigNameFromPath(FLAGS_model_dir);
#ifdef PADDLE_ONLY_CPU #ifndef PADDLE_WITH_GPU
FLAGS_use_gpu = false; FLAGS_use_gpu = false;
#endif #endif
auto config = std::make_shared<TrainerConfigHelper>(confFile); auto config = std::make_shared<TrainerConfigHelper>(confFile);
......
...@@ -146,7 +146,7 @@ void compareGradient(comData& comDataCpu, comData& comDataGpu) { ...@@ -146,7 +146,7 @@ void compareGradient(comData& comDataCpu, comData& comDataGpu) {
} }
int main(int argc, char** argv) { int main(int argc, char** argv) {
#ifdef PADDLE_ONLY_CPU #ifndef PADDLE_WITH_GPU
exit(0); exit(0);
#endif #endif
paddle::initMain(argc, argv); paddle::initMain(argc, argv);
......
...@@ -174,7 +174,7 @@ TEST(compareSparse, multiGradientMachine) { ...@@ -174,7 +174,7 @@ TEST(compareSparse, multiGradientMachine) {
FLAGS_local = local; FLAGS_local = local;
FLAGS_ports_num_for_sparse = 5; FLAGS_ports_num_for_sparse = 5;
for (bool useGpu : {false, true}) { for (bool useGpu : {false, true}) {
#ifdef PADDLE_ONLY_CPU #ifndef PADDLE_WITH_GPU
if (useGpu) continue; if (useGpu) continue;
#endif #endif
FLAGS_parallel_nn = useGpu; FLAGS_parallel_nn = useGpu;
...@@ -198,7 +198,7 @@ TEST(compareSparse, NeuralNetwork) { ...@@ -198,7 +198,7 @@ TEST(compareSparse, NeuralNetwork) {
FLAGS_local = local; FLAGS_local = local;
FLAGS_ports_num_for_sparse = 5; FLAGS_ports_num_for_sparse = 5;
for (bool useGpu : {false, true}) { for (bool useGpu : {false, true}) {
#ifdef PADDLE_ONLY_CPU #ifndef PADDLE_WITH_GPU
if (useGpu) continue; if (useGpu) continue;
#endif #endif
FLAGS_parallel_nn = useGpu; FLAGS_parallel_nn = useGpu;
......
...@@ -51,7 +51,7 @@ void checkGradientTest(const string& configFile, ...@@ -51,7 +51,7 @@ void checkGradientTest(const string& configFile,
TEST(checkGradient, cpu) { checkGradientTest(configFile1, false, false); } TEST(checkGradient, cpu) { checkGradientTest(configFile1, false, false); }
#ifndef PADDLE_ONLY_CPU #ifdef PADDLE_WITH_GPU
TEST(checkGradient, gpu) { checkGradientTest(configFile1, true, false); } TEST(checkGradient, gpu) { checkGradientTest(configFile1, true, false); }
TEST(checkGradient, multiGpu) { TEST(checkGradient, multiGpu) {
...@@ -97,7 +97,7 @@ TEST(checkGradient, hsigmoid) { checkGradientTest(configFile2, false, false); } ...@@ -97,7 +97,7 @@ TEST(checkGradient, hsigmoid) { checkGradientTest(configFile2, false, false); }
TEST(checkGradient, chunk) { TEST(checkGradient, chunk) {
checkGradientTest(configFile3, false, false); checkGradientTest(configFile3, false, false);
#ifndef PADDLE_ONLY_CPU #ifdef PADDLE_WITH_GPU
checkGradientTest(configFile3, true, true); checkGradientTest(configFile3, true, true);
#endif #endif
} }
......
...@@ -79,7 +79,7 @@ void trainerOnePassTest(const string& configFile, ...@@ -79,7 +79,7 @@ void trainerOnePassTest(const string& configFile,
// 1. test trainer (cpu, gpu). // 1. test trainer (cpu, gpu).
TEST(trainerOnePass, cpu) { trainerOnePassTest(configFile1, false, false); } TEST(trainerOnePass, cpu) { trainerOnePassTest(configFile1, false, false); }
#ifndef PADDLE_ONLY_CPU #ifdef PADDLE_WITH_GPU
TEST(trainerOnePass, gpu) { trainerOnePassTest(configFile1, true, false); } TEST(trainerOnePass, gpu) { trainerOnePassTest(configFile1, true, false); }
TEST(trainerOnePass, gpu2) { trainerOnePassTest(configFile1, true, false, 2); } TEST(trainerOnePass, gpu2) { trainerOnePassTest(configFile1, true, false, 2); }
...@@ -94,7 +94,7 @@ TEST(trainerOnePass, parallel) { ...@@ -94,7 +94,7 @@ TEST(trainerOnePass, parallel) {
#endif #endif
// 2. test average_window. // 2. test average_window.
#ifndef PADDLE_ONLY_CPU #ifdef PADDLE_WITH_GPU
TEST(average_window, gpu) { TEST(average_window, gpu) {
trainerOnePassTest(configFile1, true, false, 4, 0.01); trainerOnePassTest(configFile1, true, false, 4, 0.01);
} }
...@@ -266,7 +266,7 @@ TEST(checkRemoteUpdater, cpuTrainerOldUpdater) { ...@@ -266,7 +266,7 @@ TEST(checkRemoteUpdater, cpuTrainerOldUpdater) {
checkRemoteParameterUpdaterTest(configFile1, false, false, 1, true); checkRemoteParameterUpdaterTest(configFile1, false, false, 1, true);
} }
#ifndef PADDLE_ONLY_CPU #ifdef PADDLE_WITH_GPU
TEST(checkRemoteUpdater, gpuTrainer) { TEST(checkRemoteUpdater, gpuTrainer) {
checkRemoteParameterUpdaterTest(configFile1, true, false); checkRemoteParameterUpdaterTest(configFile1, true, false);
} }
......
...@@ -113,7 +113,7 @@ void testGeneration(const string& configFile, ...@@ -113,7 +113,7 @@ void testGeneration(const string& configFile,
#ifndef PADDLE_TYPE_DOUBLE #ifndef PADDLE_TYPE_DOUBLE
TEST(RecurrentGradientMachine, test_generation) { TEST(RecurrentGradientMachine, test_generation) {
#ifdef PADDLE_ONLY_CPU #ifndef PADDLE_WITH_GPU
const auto useGpuConfs = {false}; const auto useGpuConfs = {false};
#else #else
const auto useGpuConfs = {true, false}; const auto useGpuConfs = {true, false};
......
...@@ -14,7 +14,7 @@ limitations under the License. */ ...@@ -14,7 +14,7 @@ limitations under the License. */
#include "Flags.h" #include "Flags.h"
#ifdef PADDLE_ONLY_CPU #ifndef PADDLE_WITH_GPU
DEFINE_bool(use_gpu, false, "Only support CPU training"); DEFINE_bool(use_gpu, false, "Only support CPU training");
#else #else
DEFINE_bool(use_gpu, true, "Whether to use GPU for training"); DEFINE_bool(use_gpu, true, "Whether to use GPU for training");
......
...@@ -218,7 +218,7 @@ protected: ...@@ -218,7 +218,7 @@ protected:
* *d2* is peer device to enable direct access to by the d1 device. * *d2* is peer device to enable direct access to by the d1 device.
*/ */
inline void enablePeerAccess(int d1, int d2) { inline void enablePeerAccess(int d1, int d2) {
#ifndef PADDLE_ONLY_CPU #ifdef PADDLE_WITH_GPU
if (hl_device_can_access_peer(d1, d2)) { if (hl_device_can_access_peer(d1, d2)) {
SetDevice dev(d1); SetDevice dev(d1);
hl_device_enable_peer_access(d2); hl_device_enable_peer_access(d2);
......
...@@ -48,7 +48,7 @@ void printVersion(std::ostream& os); ...@@ -48,7 +48,7 @@ void printVersion(std::ostream& os);
* @return return true if paddle compiled with GPU * @return return true if paddle compiled with GPU
*/ */
constexpr bool isWithGpu() { constexpr bool isWithGpu() {
#ifdef PADDLE_ONLY_CPU #ifndef PADDLE_WITH_GPU
return false; return false;
#else #else
return true; return true;
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册