提交 275d65b5 编写于 作者: Z zchen0211

Merge branch 'develop' of https://github.com/PaddlePaddle/Paddle into develop

......@@ -49,11 +49,11 @@ if(NOT WITH_GOLANG)
endif(NOT WITH_GOLANG)
if(NOT WITH_GPU)
add_definitions(-DPADDLE_ONLY_CPU)
add_definitions(-DHPPL_STUB_FUNC)
list(APPEND CMAKE_CXX_SOURCE_FILE_EXTENSIONS cu)
else()
add_definitions(-DPADDLE_WITH_GPU)
FIND_PACKAGE(CUDA REQUIRED)
if(${CUDA_VERSION_MAJOR} VERSION_LESS 7)
......
......@@ -47,7 +47,7 @@ bool isUsingGpu() { return FLAGS_use_gpu; }
void setUseGpu(bool useGpu) { FLAGS_use_gpu = useGpu; }
bool isGpuVersion() {
#ifdef PADDLE_ONLY_CPU
#ifndef PADDLE_WITH_GPU
return false;
#else
return true;
......
......@@ -46,7 +46,7 @@ paddle_error paddle_matrix_set_row(paddle_matrix mat,
if (rowID >= ptr->mat->getHeight()) return kPD_OUT_OF_RANGE;
paddle::real* buf = ptr->mat->getRowBuf(rowID);
size_t width = ptr->mat->getWidth();
#ifndef PADDLE_ONLY_CPU
#ifdef PADDLE_WITH_GPU
hl_memcpy(buf, rowArray, sizeof(paddle::real) * width);
#else
std::copy(rowArray, rowArray + width, buf);
......
......@@ -15,7 +15,7 @@
#pragma once
#include <memory>
#ifndef PADDLE_ONLY_CPU
#ifdef PADDLE_WITH_GPU
#include <thrust/device_vector.h>
#include <thrust/host_vector.h>
#include <thrust/system/cuda/experimental/pinned_allocator.h>
......@@ -29,7 +29,7 @@
namespace paddle {
namespace framework {
#ifdef PADDLE_ONLY_CPU
#ifndef PADDLE_WITH_GPU
template <typename T>
using Vector = std::vector<T>;
#else
......
......@@ -211,7 +211,7 @@ class OpKernelRegistrar : public Registrar {
// TODO(fengjiayi): The following macros
// seems ugly, do we have better method?
#ifdef PADDLE_ONLY_CPU
#ifndef PADDLE_WITH_GPU
#define USE_OP_KERNEL(op_type) USE_OP_DEVICE_KERNEL(op_type, CPU)
#else
#define USE_OP_KERNEL(op_type) \
......
......@@ -25,7 +25,7 @@ Eigen::DefaultDevice& ExecutionContext::GetEigenDevice<
return *device_context_.GetEigenDevice<platform::CPUPlace>();
}
#ifndef PADDLE_ONLY_CPU
#ifdef PADDLE_WITH_GPU
template <>
Eigen::GpuDevice&
ExecutionContext::GetEigenDevice<platform::GPUPlace, Eigen::GpuDevice>() const {
......
......@@ -65,7 +65,7 @@ inline T* Tensor::mutable_data(platform::Place place) {
holder_.reset(new PlaceholderImpl<T, platform::CPUPlace>(
boost::get<platform::CPUPlace>(place), size));
} else if (platform::is_gpu_place(place)) {
#ifdef PADDLE_ONLY_CPU
#ifndef PADDLE_WITH_GPU
PADDLE_THROW("'GPUPlace' is not supported in CPU only device.");
}
#else
......@@ -103,7 +103,7 @@ inline void Tensor::CopyFrom(const Tensor& src,
memory::Copy(boost::get<platform::CPUPlace>(dst_place), dst_ptr,
boost::get<platform::CPUPlace>(src_place), src_ptr, size);
}
#ifndef PADDLE_ONLY_CPU
#ifdef PADDLE_WITH_GPU
else if (platform::is_gpu_place(src_place) &&
platform::is_cpu_place(dst_place)) {
memory::Copy(boost::get<platform::CPUPlace>(dst_place), dst_ptr,
......
......@@ -74,7 +74,7 @@ TEST(Tensor, MutableData) {
EXPECT_EQ(p1, p2);
}
#ifndef PADDLE_ONLY_CPU
#ifdef PADDLE_WITH_GPU
{
Tensor src_tensor;
float* p1 = nullptr;
......@@ -126,7 +126,7 @@ TEST(Tensor, ShareDataWith) {
ASSERT_EQ(src_tensor.data<int>(), dst_tensor.data<int>());
}
#ifndef PADDLE_ONLY_CPU
#ifdef PADDLE_WITH_GPU
{
Tensor src_tensor;
Tensor dst_tensor;
......@@ -163,7 +163,7 @@ TEST(Tensor, Slice) {
EXPECT_EQ(src_data_address + 3 * 4 * 1 * sizeof(int), slice_data_address);
}
#ifndef PADDLE_ONLY_CPU
#ifdef PADDLE_WITH_GPU
{
Tensor src_tensor;
src_tensor.mutable_data<double>(make_ddim({6, 9}), GPUPlace());
......@@ -218,7 +218,7 @@ TEST(Tensor, CopyFrom) {
EXPECT_EQ(dst_ptr[i], slice_ptr[i]);
}
}
#ifndef PADDLE_ONLY_CPU
#ifdef PADDLE_WITH_GPU
{
Tensor src_tensor;
Tensor gpu_tensor;
......
......@@ -194,7 +194,7 @@ public:
REGISTER_TYPED_FUNC(BlockExpand, CPU, BlockExpandForward);
REGISTER_TYPED_FUNC(BlockExpandGrad, CPU, BlockExpandBackward);
#ifndef PADDLE_ONLY_CPU
#ifdef PADDLE_WITH_GPU
REGISTER_TYPED_FUNC(BlockExpand, GPU, BlockExpandForward);
REGISTER_TYPED_FUNC(BlockExpandGrad, GPU, BlockExpandBackward);
#endif
......
......@@ -395,7 +395,7 @@ REGISTER_TYPED_FUNC(ContextProjectionForward,
REGISTER_TYPED_FUNC(ContextProjectionBackward,
CPU,
ContextProjectionBackwardFunc);
#ifndef PADDLE_ONLY_CPU
#ifdef PADDLE_WITH_GPU
REGISTER_TYPED_FUNC(ContextProjectionForward,
GPU,
ContextProjectionForwardFunc);
......
......@@ -233,7 +233,7 @@ private:
REGISTER_TYPED_FUNC(CosSimForward, CPU, CosSimForwardFunc);
REGISTER_TYPED_FUNC(CosSimBackward, CPU, CosSimBackwardFunc);
#ifndef PADDLE_ONLY_CPU
#ifdef PADDLE_WITH_GPU
REGISTER_TYPED_FUNC(CosSimForward, GPU, CosSimForwardFunc);
REGISTER_TYPED_FUNC(CosSimBackward, GPU, CosSimBackwardFunc);
#endif
......
......@@ -169,7 +169,7 @@ private:
REGISTER_TYPED_FUNC(Crop, CPU, CropFunc);
REGISTER_TYPED_FUNC(CropGrad, CPU, CropGradFunc);
#ifndef PADDLE_ONLY_CPU
#ifdef PADDLE_WITH_GPU
REGISTER_TYPED_FUNC(Crop, GPU, CropFunc);
REGISTER_TYPED_FUNC(CropGrad, GPU, CropGradFunc);
#endif
......
......@@ -336,7 +336,7 @@ private:
REGISTER_TYPED_FUNC(CrossMapNormal, CPU, CrossMapNormalFunc);
REGISTER_TYPED_FUNC(CrossMapNormalGrad, CPU, CrossMapNormalGradFunc);
#ifndef PADDLE_ONLY_CPU
#ifdef PADDLE_WITH_GPU
REGISTER_TYPED_FUNC(CrossMapNormal, GPU, CrossMapNormalFunc);
REGISTER_TYPED_FUNC(CrossMapNormalGrad, GPU, CrossMapNormalGradFunc);
#endif
......
......@@ -292,7 +292,7 @@ REGISTER_TYPED_FUNC(DepthwiseConvGradInput,
REGISTER_TYPED_FUNC(DepthwiseConvGradFilter,
CPU,
DepthwiseConvGradFilterFunction);
#ifndef PADDLE_ONLY_CPU
#ifdef PADDLE_WITH_GPU
REGISTER_TYPED_FUNC(DepthwiseConv, GPU, DepthwiseConvFunction);
REGISTER_TYPED_FUNC(DepthwiseConvGradInput,
GPU,
......
......@@ -17,7 +17,7 @@ limitations under the License. */
namespace paddle {
#ifndef PADDLE_ONLY_CPU
#ifdef PADDLE_WITH_GPU
TEST(DepthwiseConv, Forward) {
DepthwiseConvolution<DEVICE_TYPE_CPU, DEVICE_TYPE_GPU>(
"GemmConv-CPU", "DepthwiseConv-GPU", forward);
......
......@@ -340,7 +340,7 @@ public:
REGISTER_TYPED_FUNC(GemmConv, CPU, GemmConvFunction);
REGISTER_TYPED_FUNC(GemmConvGradInput, CPU, GemmConvGradInputFunction);
REGISTER_TYPED_FUNC(GemmConvGradFilter, CPU, GemmConvGradFilterFunction);
#ifndef PADDLE_ONLY_CPU
#ifdef PADDLE_WITH_GPU
REGISTER_TYPED_FUNC(GemmConv, GPU, GemmConvFunction);
REGISTER_TYPED_FUNC(GemmConvGradInput, GPU, GemmConvGradInputFunction);
REGISTER_TYPED_FUNC(GemmConvGradFilter, GPU, GemmConvGradFilterFunction);
......
......@@ -24,7 +24,7 @@ TEST(GemmConv, NaiveConv) {
"NaiveConv-CPU", "GemmConv-CPU", forward);
}
#ifndef PADDLE_ONLY_CPU
#ifdef PADDLE_WITH_GPU
TEST(GemmConv, Forward) {
Convolution<DEVICE_TYPE_CPU, DEVICE_TYPE_GPU>(
"GemmConv-CPU", "GemmConv-GPU", forward);
......
......@@ -116,7 +116,7 @@ void TestIm2ColFunctor() {
TEST(Im2ColFunctor, CPU) { TestIm2ColFunctor<DEVICE_TYPE_CPU, float>(); }
#ifndef PADDLE_ONLY_CPU
#ifdef PADDLE_WITH_GPU
TEST(Im2ColFunctor, GPU) { TestIm2ColFunctor<DEVICE_TYPE_GPU, float>(); }
......
......@@ -341,7 +341,7 @@ private:
};
REGISTER_TYPED_FUNC(MulOp, CPU, MulFunc);
#ifndef PADDLE_ONLY_CPU
#ifdef PADDLE_WITH_GPU
REGISTER_TYPED_FUNC(MulOp, GPU, MulFunc);
#endif
} // namespace paddle
......@@ -207,7 +207,7 @@ private:
REGISTER_TYPED_FUNC(Pad, CPU, PadFunc);
REGISTER_TYPED_FUNC(PadGrad, CPU, PadGradFunc);
#ifndef PADDLE_ONLY_CPU
#ifdef PADDLE_WITH_GPU
REGISTER_TYPED_FUNC(Pad, GPU, PadFunc);
REGISTER_TYPED_FUNC(PadGrad, GPU, PadGradFunc);
#endif
......
......@@ -217,7 +217,7 @@ public:
REGISTER_TYPED_FUNC(RowConv, CPU, RowConvFunc);
REGISTER_TYPED_FUNC(RowConvGrad, CPU, RowConvGradFunc);
#ifndef PADDLE_ONLY_CPU
#ifdef PADDLE_WITH_GPU
REGISTER_TYPED_FUNC(RowConv, GPU, RowConvFunc);
REGISTER_TYPED_FUNC(RowConvGrad, GPU, RowConvGradFunc);
#endif
......
......@@ -132,7 +132,7 @@ public:
REGISTER_TYPED_FUNC(NCHW2NHWC, CPU, NCHW2NHWCFunc);
REGISTER_TYPED_FUNC(NHWC2NCHW, CPU, NHWC2NCHWFunc);
#ifndef PADDLE_ONLY_CPU
#ifdef PADDLE_WITH_GPU
REGISTER_TYPED_FUNC(NCHW2NHWC, GPU, NCHW2NHWCFunc);
REGISTER_TYPED_FUNC(NHWC2NCHW, GPU, NHWC2NCHWFunc);
#endif
......
......@@ -16,7 +16,7 @@ limitations under the License. */
#include "BatchNormalizationLayer.h"
#include "Layer.h"
#include "paddle/utils/Stat.h"
#ifndef PADDLE_ONLY_CPU
#ifdef PADDLE_WITH_GPU
#include "CudnnBatchNormLayer.h"
#endif
......
......@@ -13,7 +13,7 @@ See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/utils/Stat.h"
#ifndef PADDLE_ONLY_CPU
#ifdef PADDLE_WITH_GPU
#include "hl_batch_transpose.h"
#endif
#include "BatchNormalizationLayer.h"
......@@ -90,7 +90,7 @@ void BatchNormalizationLayer::expandMat(const MatrixPtr& in, MatrixPtr& out) {
size_t batchSize = in->getHeight();
CHECK_EQ(out->getHeight(), batchSize * imgPixels_);
if (useGpu_) {
#ifdef PADDLE_ONLY_CPU
#ifndef PADDLE_WITH_GPU
LOG(FATAL) << "paddle is compiled only for cpu";
#else
batchTranspose(
......@@ -127,7 +127,7 @@ void BatchNormalizationLayer::shrinkMat(const MatrixPtr& in, MatrixPtr& out) {
}
CHECK_EQ(in->getHeight(), static_cast<size_t>(batchSize * imgPixels_));
if (useGpu_) {
#ifdef PADDLE_ONLY_CPU
#ifndef PADDLE_WITH_GPU
LOG(FATAL) << "paddle is compiled only for cpu";
#else
batchTranspose(
......
......@@ -15,7 +15,7 @@ limitations under the License. */
#include "PoolLayer.h"
#include "PoolProjectionLayer.h"
#include "paddle/utils/Logging.h"
#ifndef PADDLE_ONLY_CPU
#ifdef PADDLE_WITH_GPU
#include "CudnnPoolLayer.h"
#endif
namespace paddle {
......@@ -53,7 +53,7 @@ Layer* PoolLayer::create(const LayerConfig& config) {
const std::string& pool = config.inputs(0).pool_conf().pool_type();
if (pool == "max-projection" || pool == "avg-projection") {
return new PoolProjectionLayer(config);
#ifndef PADDLE_ONLY_CPU
#ifdef PADDLE_WITH_GPU
} else if (CudnnPoolLayer::typeCheck(pool)) {
return new CudnnPoolLayer(config);
#endif
......
......@@ -674,7 +674,7 @@ void testLayerGradKernel(TestConfig testConf,
bool useGpu,
bool useWeight,
float epsilon) {
#ifdef PADDLE_ONLY_CPU
#ifndef PADDLE_WITH_GPU
if (useGpu) return;
#endif
FLAGS_use_gpu = useGpu;
......
......@@ -119,7 +119,7 @@ TEST(Layer, batchNorm) {
CHECK_EQ(static_cast<int>(convLayer->getOutputValue()->getWidth()), 576);
}
#ifndef PADDLE_ONLY_CPU
#ifdef PADDLE_WITH_GPU
void batchNormInference(int n, int c, int h, int w) {
MatrixPtr input = std::make_shared<GpuMatrix>(n, c * h * w);
MatrixPtr cudnnOut = std::make_shared<GpuMatrix>(n, c * h * w);
......
......@@ -117,7 +117,7 @@ MatrixPtr doOneConvTest(size_t imgSize,
}
TEST(Layer, convParaUnified) {
#ifndef PADDLE_ONLY_CPU
#ifdef PADDLE_WITH_GPU
MatrixPtr input, resultCpu, resultGpu;
/// TEST1 for conv ///
......
......@@ -150,7 +150,7 @@ TEST(Layer, detectionOutputLayerFwd) {
useGpu,
result2);
#ifndef PADDLE_ONLY_CPU
#ifdef PADDLE_WITH_GPU
// GPU case 1.
useGpu = true;
inputLoc = Matrix::create(1, 16, false, useGpu);
......
......@@ -51,7 +51,7 @@ void testEvaluator(TestConfig testConf,
string testEvaluatorName,
size_t batchSize,
bool useGpu) {
#ifdef PADDLE_ONLY_CPU
#ifndef PADDLE_WITH_GPU
if (useGpu) return;
#endif
FLAGS_use_gpu = useGpu;
......
......@@ -97,7 +97,7 @@ TEST(Layer, kmaxSeqScoreLayer) {
Matrix::create(subSeqStartPosition.back(), 1, false, false);
std::vector<bool> mode = {false};
#ifndef PADDLE_ONLY_CPU
#ifdef PADDLE_WITH_GPU
mode.push_back(true);
#endif
......
......@@ -12,7 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#ifndef PADDLE_ONLY_CPU
#ifdef PADDLE_WITH_GPU
#include <cudnn.h>
#endif
#include <gtest/gtest.h>
......@@ -258,7 +258,7 @@ void testProjectionConv(size_t groups, bool isDeconv) {
true);
}
#ifndef PADDLE_ONLY_CPU
#ifdef PADDLE_WITH_GPU
TEST(Projection, conv) {
/// test ConvProjection
testProjectionConv(1, false);
......@@ -422,7 +422,7 @@ TEST(Layer, depthwiseConvLayer) {
// 'depthwise_conv' is a sepecial case of 'exconv' whose
// groups size equals to the input channels size.
testDepthwiseConvLayer("exconv", /* useGpu= */ false);
#ifndef PADDLE_ONLY_CPU
#ifdef PADDLE_WITH_GPU
testDepthwiseConvLayer("exconv", /* useGpu= */ true);
#endif
}
......@@ -480,7 +480,7 @@ void testConvLayer(const string& type, bool trans, bool useGpu) {
TEST(Layer, convLayer) {
testConvLayer("exconv", /* trans= */ false, /* useGpu= */ false);
#ifndef PADDLE_ONLY_CPU
#ifdef PADDLE_WITH_GPU
testConvLayer("exconv", /* trans= */ false, /* useGpu= */ true);
testConvLayer("cudnn_conv", /* trans= */ false, /* useGpu= */ true);
#endif
......@@ -525,7 +525,7 @@ TEST(Layer, convTransLayer) {
for (auto useGpu : {false, true}) {
testConvTransLayer("exconvt", /* trans= */ false, /* useGpu= */ useGpu);
}
#ifndef PADDLE_ONLY_CPU
#ifdef PADDLE_WITH_GPU
testConvTransLayer("cudnn_convt", /* trans= */ false, /* useGpu= */ true);
#endif
}
......@@ -638,7 +638,7 @@ TEST(Layer, SelectiveFullyConnectedLayer) {
/* trans= */ false,
/* useGup= */ false,
false);
#ifndef PADDLE_ONLY_CPU
#ifdef PADDLE_WITH_GPU
testLayerGrad(config,
"selective_fc",
100,
......@@ -1210,7 +1210,7 @@ void testPoolLayer(const string& poolType, bool trans, bool useGpu) {
testLayerGrad(config, "pool", 100, trans, useGpu);
}
#ifndef PADDLE_ONLY_CPU
#ifdef PADDLE_WITH_GPU
void testPoolLayer2(const string& poolType, bool trans, bool useGpu) {
TestConfig config;
config.inputDefs.push_back({INPUT_DATA, "layer_0", 3200, 0});
......@@ -1236,7 +1236,7 @@ TEST(Layer, PoolLayer) {
testPoolLayer("avg-projection", /* trans= */ false, /* useGpu= */ false);
testPoolLayer("max-projection", /* trans= */ false, /* useGpu= */ false);
#ifndef PADDLE_ONLY_CPU
#ifdef PADDLE_WITH_GPU
testPoolLayer("avg-projection", /* trans= */ false, /* useGpu= */ true);
testPoolLayer("max-projection", /* trans= */ false, /* useGpu= */ true);
testPoolLayer("cudnn-max-pool", /* trans= */ false, /* useGpu= */ true);
......@@ -1309,7 +1309,7 @@ void testPool3DLayer(const string& poolType, bool trans, bool useGpu) {
TEST(Layer, Pool3DLayer) {
testPool3DLayer("avg", /* trans= */ false, /* useGpu= */ false);
testPool3DLayer("max", /* trans= */ false, /* useGpu= */ false);
#ifndef PADDLE_ONLY_CPU
#ifdef PADDLE_WITH_GPU
testPool3DLayer("avg", /* trans= */ false, /* useGpu= */ true);
testPool3DLayer("max", /* trans= */ false, /* useGpu= */ true);
#endif
......@@ -1695,7 +1695,7 @@ void testBatchNormLayer(const string& type, bool trans, bool useGpu) {
TEST(Layer, BatchNormalizationLayer) {
testBatchNormLayer("batch_norm", false, false);
#ifndef PADDLE_ONLY_CPU
#ifdef PADDLE_WITH_GPU
testBatchNormLayer("batch_norm", false, true);
if (hl_get_cudnn_lib_version() >= int(4000)) {
testBatchNormLayer("cudnn_batch_norm", false, true);
......@@ -1744,7 +1744,7 @@ void testBatchNorm3DLayer(const string& type, bool trans, bool useGpu) {
TEST(Layer, testBatchNorm3DLayer) {
testBatchNorm3DLayer("batch_norm", false, false);
#ifndef PADDLE_ONLY_CPU
#ifdef PADDLE_WITH_GPU
testBatchNorm3DLayer("batch_norm", false, true);
if (hl_get_cudnn_lib_version() >= int(4000)) {
testBatchNorm3DLayer("cudnn_batch_norm", false, true);
......@@ -2262,7 +2262,7 @@ void test3DConvLayer(const string& type, bool trans, bool useGpu) {
TEST(Layer, test3DConvLayer) {
test3DConvLayer("conv3d", /* trans= */ false, /* useGpu= */ false);
#ifndef PADDLE_ONLY_CPU
#ifdef PADDLE_WITH_GPU
test3DConvLayer("conv3d", /* trans= */ false, /* useGpu= */ true);
#endif
}
......@@ -2339,7 +2339,7 @@ void test3DDeConvLayer(const string& type, bool trans, bool useGpu) {
TEST(Layer, test3DDeConvLayer) {
test3DDeConvLayer("deconv3d", /* trans= */ false, /* useGpu= */ false);
#ifndef PADDLE_ONLY_CPU
#ifdef PADDLE_WITH_GPU
test3DDeConvLayer("deconv3d", /* trans= */ false, /* useGpu= */ true);
#endif
}
......
......@@ -243,7 +243,7 @@ TEST(Compare, concat_slice) {
compareNetwork(config_file_a, config_file_b);
}
#ifndef PADDLE_ONLY_CPU
#ifdef PADDLE_WITH_GPU
TEST(Compare, img_pool) {
std::string config_file_a = "./gserver/tests/img_pool_a.conf";
std::string config_file_b = "./gserver/tests/img_pool_b.conf";
......
......@@ -151,7 +151,7 @@ TEST(Layer, priorBoxLayerFwd) {
useGpu,
result);
#ifndef PADDLE_ONLY_CPU
#ifdef PADDLE_WITH_GPU
// reset the input parameters
variance[1] = 0.1;
variance[3] = 0.2;
......
......@@ -485,7 +485,7 @@ TEST(ProtoDataProvider, test) {
// Currently in async mode, useGpu is not supported
continue;
}
#ifdef PADDLE_ONLY_CPU
#ifndef PADDLE_WITH_GPU
if (useGpu) {
continue;
}
......@@ -525,7 +525,7 @@ TEST(ProtoDataProvider, constant_slots) {
for (int numConstantSlots : {1, 2}) {
for (int useGpu : numTwoArray) {
for (int dataCompression : numTwoArray) {
#ifdef PADDLE_ONLY_CPU
#ifndef PADDLE_WITH_GPU
if (useGpu) {
continue;
}
......@@ -708,7 +708,7 @@ TEST(ProtoSequenceDataProvider, test) {
// Currently in async mode, useGpu is not supported
continue;
}
#ifdef PADDLE_ONLY_CPU
#ifndef PADDLE_WITH_GPU
if (useGpu) {
continue;
}
......
......@@ -37,7 +37,7 @@ TEST(PyDataProvider, py_fill_slots) {
config.clear_files();
std::string dataFile = "gserver/tests/pyDataProvider/pyDataProviderList";
config.set_files(dataFile);
#ifdef PADDLE_ONLY_CPU
#ifndef PADDLE_WITH_GPU
bool useGpu = false;
#else
bool useGpu = true;
......@@ -71,7 +71,7 @@ TEST(PyDataProvider, py_fill_nest_slots) {
std::string dataFile = "gserver/tests/pyDataProvider/pyDataProviderList";
config.set_files(dataFile);
EXPECT_EQ(config.IsInitialized(), true);
#ifdef PADDLE_ONLY_CPU
#ifndef PADDLE_WITH_GPU
bool useGpu = false;
#else
bool useGpu = true;
......
......@@ -321,7 +321,7 @@ TEST(Layer, SelectiveFcLayer_train_dense_mul) {
"filelist=gserver/tests/SelectiveFcTest/dense_mul_list";
for (auto useGpu : {false, true}) {
#ifdef PADDLE_ONLY_CPU
#ifndef PADDLE_WITH_GPU
if (useGpu) {
break;
}
......@@ -388,7 +388,7 @@ void testSelectiveFcLayerTrainSparseMul(const LayerConfig& config,
outMatSelfc->getWidth(),
outMatSelfc->getElementCnt()));
cpuOutMatSelfc->copyFrom(*outMatSelfc, HPPL_STREAM_DEFAULT);
#ifndef PADDLE_ONLY_CPU
#ifdef PADDLE_WITH_GPU
if (useGpu) {
hl_stream_synchronize(HPPL_STREAM_DEFAULT);
}
......@@ -418,7 +418,7 @@ void testSelectiveFcLayerTrainSparseMul(const LayerConfig& config,
MatrixPtr cpuOutMatFc(
new CpuMatrix(outMatFc->getHeight(), outMatFc->getWidth()));
cpuOutMatFc->copyFrom(*outMatFc, HPPL_STREAM_DEFAULT);
#ifndef PADDLE_ONLY_CPU
#ifdef PADDLE_WITH_GPU
if (useGpu) {
hl_stream_synchronize(HPPL_STREAM_DEFAULT);
}
......@@ -443,7 +443,7 @@ TEST(Layer, SelectiveFcLayer_train_sparse_mul) {
selLayerConfig.set_size(fcLayerWidth);
testSelectiveFcLayerTrainSparseMul(selLayerConfig, false);
#ifndef PADDLE_ONLY_CPU
#ifdef PADDLE_WITH_GPU
testSelectiveFcLayerTrainSparseMul(selLayerConfig, true);
#endif
}
......
......@@ -195,7 +195,7 @@ TEST(Layer, SeqSliceLayer) {
vector<vector<real>> ends;
std::vector<bool> mode = {false};
#ifndef PADDLE_ONLY_CPU
#ifdef PADDLE_WITH_GPU
mode.push_back(true);
#endif
genSeqInfo(seqStartPos, subSeqStartPos);
......
......@@ -199,7 +199,7 @@ TEST(Layer, WarpCTCLayer) {
for (auto batchSize : {1, 10, 32}) {
for (auto normByTimes : {false, true}) {
for (auto useGpu : {false, true}) {
#ifdef PADDLE_ONLY_CPU
#ifndef PADDLE_WITH_GPU
if (useGpu) continue;
#endif
LOG(INFO) << "layerSize=" << layerSize << " batchSize=" << batchSize
......
......@@ -670,7 +670,7 @@ void GpuMatrix::leftMul(Matrix& a, real scaleAB, real scaleT) {
}
void GpuMatrix::selectRows(Matrix& table, IVector& ids) {
#ifndef PADDLE_ONLY_CPU
#ifdef PADDLE_WITH_GPU
CHECK(dynamic_cast<GpuMatrix*>(&table));
CHECK(table.useGpu());
CHECK(ids.useGpu());
......@@ -694,7 +694,7 @@ void GpuMatrix::selectRows(Matrix& table, IVector& ids) {
}
void GpuMatrix::addToRows(Matrix& table, IVector& ids) {
#ifndef PADDLE_ONLY_CPU
#ifdef PADDLE_WITH_GPU
CHECK(dynamic_cast<GpuMatrix*>(&table));
CHECK(table.useGpu());
CHECK(ids.useGpu());
......@@ -741,7 +741,7 @@ void GpuMatrix::rowMax(Matrix& max) {
}
void GpuMatrix::rowMax(IVector& maxIds, Matrix& maxVal) {
#ifndef PADDLE_ONLY_CPU
#ifdef PADDLE_WITH_GPU
CHECK(maxIds.useGpu() && maxVal.useGpu()) << "Matrix type are not equal";
size_t numSamples = getHeight();
size_t beam = maxVal.getWidth();
......
......@@ -836,7 +836,7 @@ void GpuSparseMatrix::zeroMem() {
}
void GpuSparseMatrix::rowMax(IVector& maxIds, Matrix& maxVal) {
#ifndef PADDLE_ONLY_CPU
#ifdef PADDLE_WITH_GPU
CHECK(maxIds.useGpu() && maxVal.useGpu()) << "Matrix type are not equal";
size_t numSamples = getHeight();
size_t beam = maxVal.getWidth();
......
......@@ -172,7 +172,7 @@ void GpuVectorT<T>::isEqualTo(const VectorT<T>& b, const T& value) {
template <class T>
void GpuVectorT<T>::selectFrom(const VectorT<T>& src, const VectorT<int>& ids) {
#ifndef PADDLE_ONLY_CPU
#ifdef PADDLE_WITH_GPU
hl_vector_select_from<T>(this->getData(),
this->getSize(),
src.getData(),
......@@ -850,7 +850,7 @@ CpuGpuVectorT<T>::CpuGpuVectorT(CpuGpuVectorT<T>& src,
size_t size)
: sync_(nullptr) {
CHECK_LE(offset + size, static_cast<size_t>(src.getSize()));
#ifndef PADDLE_ONLY_CPU
#ifdef PADDLE_WITH_GPU
SyncedFlag* flag = src.getSync();
if (*flag == DATA_AT_CPU) {
src.copyToGpu(); // will set synchronous data between CPU and GPU
......@@ -861,7 +861,7 @@ CpuGpuVectorT<T>::CpuGpuVectorT(CpuGpuVectorT<T>& src,
auto cMemHandle = (src.getVector(false))->getMemoryHandle();
cpuVectorT_ = std::make_shared<CpuVectorT<T>>(
size, std::dynamic_pointer_cast<CpuMemoryHandle>(cMemHandle), offset);
#ifndef PADDLE_ONLY_CPU
#ifdef PADDLE_WITH_GPU
auto gMemHandle = (src.getVector(true))->getMemoryHandle();
gpuVectorT_ = std::make_shared<GpuVectorT<T>>(
size, std::dynamic_pointer_cast<GpuMemoryHandle>(gMemHandle), offset);
......
......@@ -68,7 +68,7 @@ void testPoolAllocator() {
TEST(Allocator, Pool) {
testPoolAllocator<CpuAllocator>();
#ifndef PADDLE_ONLY_CPU
#ifdef PADDLE_WITH_GPU
testPoolAllocator<GpuAllocator>();
#endif
}
......@@ -92,7 +92,7 @@ TEST(MemoryHandle, Cpu) {
EXPECT_EQ(ptr1, ptr2);
}
#ifndef PADDLE_ONLY_CPU
#ifdef PADDLE_WITH_GPU
TEST(MemoryHandle, Gpu) {
int numGpu = hl_get_device_count();
......
......@@ -12,7 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#ifndef PADDLE_ONLY_CPU
#ifdef PADDLE_WITH_GPU
/**
* This test file use autotest::AutoCompare and cmpWithoutArg to compares the
* implementation of CPU and GPU member function in
......
......@@ -12,7 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#ifndef PADDLE_ONLY_CPU
#ifdef PADDLE_WITH_GPU
#include <gtest/gtest.h>
#include "paddle/math/Vector.h"
......
......@@ -94,7 +94,7 @@ void testWrapper(F&& f) {
}
}
#ifndef PADDLE_ONLY_CPU
#ifdef PADDLE_WITH_GPU
TEST(ExecViaCpu, test1) {
testWrapper(f);
testWrapper(&f);
......
......@@ -12,7 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#ifndef PADDLE_ONLY_CPU
#ifdef PADDLE_WITH_GPU
#include <gtest/gtest.h>
#include "paddle/math/Matrix.h"
......
......@@ -12,7 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#ifndef PADDLE_ONLY_CPU
#ifdef PADDLE_WITH_GPU
/**
* This test file use autotest::AutoCompare and cmpWithArg to compares the
* implementation of CPU and GPU member function in Matrix.cpp.
......
......@@ -47,7 +47,7 @@ struct MatrixPara {
SparseFormat format;
};
#ifndef PADDLE_ONLY_CPU
#ifdef PADDLE_WITH_GPU
void test_sparse_matrix_mul(MatrixPara paraA,
MatrixPara paraB,
MatrixPara paraC) {
......@@ -452,7 +452,7 @@ TEST(Matrix, SparseMatrixCSRFormatTrimFrom) {
matB->trimFrom(*mat);
checkSMatrixEqual2(matA, matB);
#ifndef PADDLE_ONLY_CPU
#ifdef PADDLE_WITH_GPU
GpuSparseMatrixPtr matC = std::make_shared<GpuSparseMatrix>(
height, trimedWidth, height, FLOAT_VALUE, SPARSE_CSR, true);
matC->trimFrom(*mat);
......@@ -546,7 +546,7 @@ TEST(Matrix, SparseMatrixCSCFormatTrimFrom) {
matB->trimFrom(*mat);
checkSMatrixEqual2(matA, matB);
#ifndef PADDLE_ONLY_CPU
#ifdef PADDLE_WITH_GPU
GpuSparseMatrixPtr matC = std::make_shared<GpuSparseMatrix>(
height, trimedWidth, height, FLOAT_VALUE, SPARSE_CSC, true);
matC->trimFrom(*mat);
......
......@@ -270,7 +270,7 @@ TEST(Unary, BaseOp) {
TestUnaryVectorT<CpuIVector, int> testCpuIVector(
testUnaryBaseOpInt<CpuIVector>);
#ifndef PADDLE_ONLY_CPU
#ifdef PADDLE_WITH_GPU
TestUnaryMatrix<GpuMatrix> testGpuMatrix(testUnaryBaseOp<GpuMatrix>);
TestUnaryVectorT<GpuVector, real> testGpuVector(testUnaryBaseOp<GpuVector>);
TestUnaryVectorT<GpuIVector, int> testGpuIVector(
......@@ -317,7 +317,7 @@ void testUnayrMathOp(Tensor& A1, Tensor& A2) {
TEST(Unary, MathOp) {
TestUnaryMatrix<CpuMatrix> testCpu(testUnayrMathOp<CpuMatrix>);
#ifndef PADDLE_ONLY_CPU
#ifdef PADDLE_WITH_GPU
TestUnaryMatrix<GpuMatrix> testGpu(testUnayrMathOp<GpuMatrix>);
#endif
}
......@@ -374,7 +374,7 @@ void testUnayrCompareOp(Tensor& A1, Tensor& A2) {
TEST(Unary, CompareOp) {
TestUnaryMatrix<CpuMatrix> testCpu(testUnayrCompareOp<CpuMatrix>);
#ifndef PADDLE_ONLY_CPU
#ifdef PADDLE_WITH_GPU
TestUnaryMatrix<GpuMatrix> testGpu(testUnayrCompareOp<GpuMatrix>);
#endif
}
......@@ -536,7 +536,7 @@ void testBinaryBaseOp(Tensor& A1, Tensor& A2, Tensor& B) {
TEST(Binary, BaseOp) {
TestBinaryMatrix<CpuMatrix> testCpu(testBinaryBaseOp<CpuMatrix>);
#ifndef PADDLE_ONLY_CPU
#ifdef PADDLE_WITH_GPU
TestBinaryMatrix<GpuMatrix> testGpu(testBinaryBaseOp<GpuMatrix>);
#endif
}
......@@ -710,7 +710,7 @@ void testBinaryMathOp(Tensor& A1, Tensor& A2, Tensor& B) {
TEST(Binary, MathOp) {
TestBinaryMatrix<CpuMatrix> testCpu(testBinaryMathOp<CpuMatrix>);
#ifndef PADDLE_ONLY_CPU
#ifdef PADDLE_WITH_GPU
TestBinaryMatrix<GpuMatrix> testGpu(testBinaryMathOp<GpuMatrix>);
#endif
}
......@@ -810,7 +810,7 @@ void testBinaryCompareOp(Tensor& A1, Tensor& A2, Tensor& B) {
TEST(Binary, CompareOp) {
TestBinaryMatrix<CpuMatrix> testCpu(testBinaryCompareOp<CpuMatrix>);
#ifndef PADDLE_ONLY_CPU
#ifdef PADDLE_WITH_GPU
TestBinaryMatrix<GpuMatrix> testGpu(testBinaryCompareOp<GpuMatrix>);
#endif
}
......@@ -955,7 +955,7 @@ void testTernaryBaseOp(Tensor& A1, Tensor& A2, Tensor& B, Tensor& C) {
TEST(Ternary, BaseOp) {
TestTernaryMatrix<CpuMatrix> testCpu(testTernaryBaseOp<CpuMatrix>);
#ifndef PADDLE_ONLY_CPU
#ifdef PADDLE_WITH_GPU
TestTernaryMatrix<GpuMatrix> testGpu(testTernaryBaseOp<GpuMatrix>);
#endif
}
......@@ -1058,7 +1058,7 @@ void testTernaryCompareOp(Tensor& A1, Tensor& A2, Tensor& B, Tensor& C) {
TEST(Ternary, CompareOp) {
TestTernaryMatrix<CpuMatrix> testCpu(testTernaryCompareOp<CpuMatrix>);
#ifndef PADDLE_ONLY_CPU
#ifdef PADDLE_WITH_GPU
TestTernaryMatrix<GpuMatrix> testGpu(testTernaryCompareOp<GpuMatrix>);
#endif
}
......@@ -1086,7 +1086,7 @@ void testQuaternaryAdd(
TEST(Quaternary, BaseOp) {
TestQuaternaryMatrix<CpuMatrix> testCpu(testQuaternaryAdd<CpuMatrix>);
#ifndef PADDLE_ONLY_CPU
#ifdef PADDLE_WITH_GPU
TestQuaternaryMatrix<GpuMatrix> testGpu(testQuaternaryAdd<GpuMatrix>);
#endif
}
......@@ -1156,7 +1156,7 @@ void testQuaternaryCompareOp(
TEST(Quaternary, CompareOp) {
TestQuaternaryMatrix<CpuMatrix> testCpu(testQuaternaryCompareOp<CpuMatrix>);
#ifndef PADDLE_ONLY_CPU
#ifdef PADDLE_WITH_GPU
TestQuaternaryMatrix<GpuMatrix> testGpu(testQuaternaryCompareOp<GpuMatrix>);
#endif
}
......@@ -91,7 +91,7 @@ int VectorCheckErr(const VectorPtr& vector1, const VectorPtr& vector2) {
typedef std::function<void(size_t size, bool useGpu)> testMatrixFunc;
void testCase(testMatrixFunc matrixFunc) {
#ifndef PADDLE_ONLY_CPU
#ifdef PADDLE_WITH_GPU
for (auto useGpu : {false, true}) {
#else
for (auto useGpu : {false}) {
......
......@@ -17,7 +17,7 @@ limitations under the License. */
using namespace paddle; // NOLINT
#ifndef PADDLE_ONLY_CPU
#ifdef PADDLE_WITH_GPU
TEST(MatrixBatchTransTest, test_batch_matrix_transpose) {
const int nx = 100;
const int ny = 50;
......
......@@ -72,7 +72,7 @@ void testLazyAssign(int height, int width) {
TEST(lazyAssign, CPU) { testMatrixCase(testLazyAssign<CpuMatrix>); }
#ifndef PADDLE_ONLY_CPU
#ifdef PADDLE_WITH_GPU
TEST(lazyAssign, GPU) { testMatrixCase(testLazyAssign<GpuMatrix>); }
#endif
......@@ -142,6 +142,6 @@ void testSgdUpdate(int height, int width) {
TEST(sgdUpdate, CPU) { testMatrixCase(testSgdUpdate<CpuMatrix>); }
#ifndef PADDLE_ONLY_CPU
#ifdef PADDLE_WITH_GPU
TEST(sgdUpdate, GPU) { testMatrixCase(testSgdUpdate<GpuMatrix>); }
#endif
......@@ -12,7 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#ifndef PADDLE_ONLY_CPU
#ifdef PADDLE_WITH_GPU
/// This unittest checks GpuMatrix/CpuMatrix get same result, so disable when
/// only cpu version.
......
......@@ -12,7 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#ifndef PADDLE_ONLY_CPU
#ifdef PADDLE_WITH_GPU
#include <cuda_runtime.h>
#include <gtest/gtest.h>
......
......@@ -12,7 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#ifndef PADDLE_ONLY_CPU
#ifdef PADDLE_WITH_GPU
/// This unittest checks GpuSparseMatrix/CpuSparseMatrix get same result,
// so disable when
/// only cpu version.
......
......@@ -175,7 +175,7 @@ void* BuddyAllocator::SystemAlloc(size_t size) {
}
BuddyAllocator::PoolSet::iterator BuddyAllocator::RefillPool() {
#ifndef PADDLE_ONLY_CPU
#ifdef PADDLE_WITH_GPU
if (system_allocator_->UseGpu()) {
if ((total_used_ + total_free_) == 0) {
// Compute the maximum allocation size for the first allocation.
......
......@@ -62,7 +62,7 @@ void CPUAllocator::Free(void* p, size_t size, size_t index) {
bool CPUAllocator::UseGpu() const { return false; }
#ifndef PADDLE_ONLY_CPU
#ifdef PADDLE_WITH_GPU
void* GPUAllocator::Alloc(size_t& index, size_t size) {
// CUDA documentation doesn't explain if cudaMalloc returns nullptr
......
......@@ -40,7 +40,7 @@ class CPUAllocator : public SystemAllocator {
virtual bool UseGpu() const;
};
#ifndef PADDLE_ONLY_CPU
#ifdef PADDLE_WITH_GPU
class GPUAllocator : public SystemAllocator {
public:
virtual void* Alloc(size_t& index, size_t size);
......
......@@ -56,7 +56,7 @@ TEST(CPUAllocator, LockMem) {
TestAllocator(a, 0);
}
#ifndef PADDLE_ONLY_CPU
#ifdef PADDLE_WITH_GPU
TEST(GPUAllocator, Alloc) {
paddle::memory::detail::GPUAllocator a;
TestAllocator(a, 2048);
......
......@@ -26,7 +26,7 @@ void Copy<platform::CPUPlace, platform::CPUPlace>(platform::CPUPlace, void* dst,
std::memcpy(dst, src, num);
}
#ifndef PADDLE_ONLY_CPU
#ifdef PADDLE_WITH_GPU
template <>
void Copy<platform::CPUPlace, platform::GPUPlace>(platform::CPUPlace dst_place,
void* dst,
......
......@@ -33,7 +33,7 @@ namespace memory {
template <typename DstPlace, typename SrcPlace>
void Copy(DstPlace, void* dst, SrcPlace, const void* src, size_t num);
#ifndef PADDLE_ONLY_CPU
#ifdef PADDLE_WITH_GPU
/**
* \brief Copy memory from one place to another place.
......
......@@ -62,7 +62,7 @@ size_t Used<platform::CPUPlace>(platform::CPUPlace place) {
return GetCPUBuddyAllocator()->Used();
}
#ifndef PADDLE_ONLY_CPU
#ifdef PADDLE_WITH_GPU
BuddyAllocator* GetGPUBuddyAllocator(int gpu_id) {
using BuddyAllocVec = std::vector<BuddyAllocator*>;
......
......@@ -80,7 +80,7 @@ TEST(BuddyAllocator, CPUMultAlloc) {
}
}
#ifndef PADDLE_ONLY_CPU
#ifdef PADDLE_WITH_GPU
size_t align(size_t size, paddle::platform::GPUPlace place) {
size += sizeof(paddle::memory::detail::Metadata);
......
......@@ -34,7 +34,7 @@ struct StridedMemcpyFunctor<T, 1> {
auto& cpu_place = boost::get<platform::CPUPlace>(place);
memory::Copy(cpu_place, dst, cpu_place, src, sizeof(T) * dst_dim.head);
} else {
#ifndef PADDLE_ONLY_CPU
#ifdef PADDLE_WITH_GPU
auto& gpu_place = boost::get<platform::GPUPlace>(place);
auto& cuda_ctx =
reinterpret_cast<const platform::CUDADeviceContext&>(dev_ctx);
......
......@@ -71,7 +71,7 @@ void testIm2col() {
context =
new paddle::platform::CPUDeviceContext(paddle::platform::CPUPlace());
} else {
#ifndef PADDLE_ONLY_CPU
#ifdef PADDLE_WITH_GPU
context =
new paddle::platform::CUDADeviceContext(paddle::platform::GPUPlace());
#else
......@@ -116,7 +116,7 @@ void testIm2col() {
TEST(math, im2col) {
testIm2col<paddle::platform::CPUPlace>();
#ifndef PADDLE_ONLY_CPU
#ifdef PADDLE_WITH_GPU
testIm2col<paddle::platform::GPUPlace>();
#endif
}
#include "paddle/operators/math/math_function.h"
#include "gtest/gtest.h"
#ifndef PADDLE_ONLY_CPU
#ifdef PADDLE_WITH_GPU
TEST(math_function, notrans_mul_trans) {
paddle::framework::Tensor input1;
paddle::framework::Tensor input1_gpu;
......
......@@ -72,7 +72,7 @@ TEST(StridedMemcpy, CPUConcat) {
}
}
#ifndef PADDLE_ONLY_CPU
#ifdef PADDLE_WITH_GPU
TEST(StridedMemcpy, GPUCrop) {
// clang-format off
int src[] = {
......
......@@ -35,7 +35,7 @@ Eigen::DefaultDevice* CPUDeviceContext::eigen_device() const {
Place CPUDeviceContext::GetPlace() const { return CPUPlace(); }
#ifndef PADDLE_ONLY_CPU
#ifdef PADDLE_WITH_GPU
template <>
Eigen::GpuDevice*
......
......@@ -14,7 +14,7 @@ limitations under the License. */
#include "paddle/platform/enforce.h"
#include "paddle/platform/place.h"
#ifndef PADDLE_ONLY_CPU
#ifdef PADDLE_WITH_GPU
#include "paddle/platform/dynload/cublas.h"
#include "paddle/platform/dynload/cudnn.h"
#include "paddle/platform/gpu_info.h"
......@@ -61,7 +61,7 @@ class CPUDeviceContext : public DeviceContext {
std::unique_ptr<Eigen::DefaultDevice> eigen_device_;
};
#ifndef PADDLE_ONLY_CPU
#ifdef PADDLE_WITH_GPU
template <>
struct EigenDeviceConverter<platform::GPUPlace> {
using EigenDeviceType = Eigen::GpuDevice;
......
......@@ -29,7 +29,7 @@ limitations under the License. */
#include <cxxabi.h> // for __cxa_demangle
#endif
#ifndef PADDLE_ONLY_CPU
#ifdef PADDLE_WITH_GPU
#include "paddle/platform/dynload/cublas.h"
#include "paddle/platform/dynload/cudnn.h"
......@@ -113,7 +113,7 @@ inline typename std::enable_if<sizeof...(Args) != 0, void>::type throw_on_error(
}
}
#ifndef PADDLE_ONLY_CPU
#ifdef PADDLE_WITH_GPU
template <typename... Args>
inline typename std::enable_if<sizeof...(Args) != 0, void>::type throw_on_error(
......
......@@ -14,7 +14,7 @@ limitations under the License. */
#pragma once
#ifndef PADDLE_ONLY_CPU
#ifdef PADDLE_WITH_GPU
#include <cuda_runtime.h>
#include <stddef.h>
......
......@@ -16,7 +16,7 @@
#include <boost/config.hpp>
#ifndef PADDLE_ONLY_CPU
#ifdef PADDLE_WITH_GPU
// Because boost's variadic templates has bug on nvcc, boost will disable
// variadic template support when GPU enabled on nvcc.
......
......@@ -215,7 +215,7 @@ int main(int argc, char** argv) {
uint64_t dataSize = FLAGS_dim * sizeof(real);
#ifndef PADDLE_ONLY_CPU
#ifdef PADDLE_WITH_GPU
GpuVector gpuParam(FLAGS_dim);
GpuVector gpuGrad(FLAGS_dim);
#else
......
......@@ -99,7 +99,7 @@ TEST(ProtoServer, regular) {
}
TEST(ProtoServer, extended) {
#ifndef PADDLE_ONLY_CPU
#ifdef PADDLE_WITH_GPU
ProtoClient* client;
if (FLAGS_rdma_tcp == "rdma")
client = new ProtoClient(FLAGS_server_addr, FLAGS_port, F_RDMA);
......
......@@ -34,7 +34,7 @@ static size_t UniqueIntegerGenerator() {
}
bool IsCompileGPU() {
#ifdef PADDLE_ONLY_CPU
#ifndef PADDLE_WITH_GPU
return false;
#else
return true;
......@@ -78,7 +78,7 @@ PYBIND11_PLUGIN(core) {
.def("set", PyCPUTensorSetFromArray<float>)
.def("set", PyCPUTensorSetFromArray<int>)
.def("set", PyCPUTensorSetFromArray<double>)
#ifndef PADDLE_ONLY_CPU
#ifdef PADDLE_WITH_GPU
.def("set", PyCUDATensorSetFromArray<float>)
.def("set", PyCUDATensorSetFromArray<int>)
.def("set", PyCUDATensorSetFromArray<double>)
......@@ -96,7 +96,7 @@ PYBIND11_PLUGIN(core) {
.def(
"__init__",
[](LoDTensor &instance, const std::vector<std::vector<size_t>> &lod) {
#ifdef PADDLE_ONLY_CPU
#ifndef PADDLE_WITH_GPU
new (&instance) LoDTensor(lod);
#else
LoD new_lod;
......@@ -107,7 +107,7 @@ PYBIND11_PLUGIN(core) {
})
.def("set_lod",
[](LoDTensor &self, const std::vector<std::vector<size_t>> &lod) {
#ifdef PADDLE_ONLY_CPU
#ifndef PADDLE_WITH_GPU
self.set_lod(lod);
#else
LoD new_lod;
......@@ -117,7 +117,7 @@ PYBIND11_PLUGIN(core) {
#endif
})
.def("lod", [](LoDTensor &self) -> std::vector<std::vector<size_t>> {
#ifdef PADDLE_ONLY_CPU
#ifndef PADDLE_WITH_GPU
return self.lod();
#else
auto lod = self.lod();
......@@ -203,7 +203,7 @@ All parameter, weight, gradient are variables in Paddle.
.def_static("create",
[](paddle::platform::GPUPlace& place)
-> paddle::platform::DeviceContext* {
#ifdef PADDLE_ONLY_CPU
#ifndef PADDLE_WITH_GPU
PADDLE_THROW("GPUPlace is not supported in CPU device.");
#else
return new paddle::platform::CUDADeviceContext(place);
......
......@@ -106,7 +106,7 @@ void PyCPUTensorSetFromArray(
std::memcpy(dst, array.data(), sizeof(T) * array.size());
}
#ifndef PADDLE_ONLY_CPU
#ifdef PADDLE_WITH_GPU
template <typename T>
void PyCUDATensorSetFromArray(
framework::Tensor &self,
......
......@@ -29,7 +29,7 @@ int main(int argc, char** argv) {
initMain(argc, argv);
initPython(argc, argv);
string confFile = TrainerConfigHelper::getConfigNameFromPath(FLAGS_model_dir);
#ifdef PADDLE_ONLY_CPU
#ifndef PADDLE_WITH_GPU
FLAGS_use_gpu = false;
#endif
auto config = std::make_shared<TrainerConfigHelper>(confFile);
......
......@@ -146,7 +146,7 @@ void compareGradient(comData& comDataCpu, comData& comDataGpu) {
}
int main(int argc, char** argv) {
#ifdef PADDLE_ONLY_CPU
#ifndef PADDLE_WITH_GPU
exit(0);
#endif
paddle::initMain(argc, argv);
......
......@@ -174,7 +174,7 @@ TEST(compareSparse, multiGradientMachine) {
FLAGS_local = local;
FLAGS_ports_num_for_sparse = 5;
for (bool useGpu : {false, true}) {
#ifdef PADDLE_ONLY_CPU
#ifndef PADDLE_WITH_GPU
if (useGpu) continue;
#endif
FLAGS_parallel_nn = useGpu;
......@@ -198,7 +198,7 @@ TEST(compareSparse, NeuralNetwork) {
FLAGS_local = local;
FLAGS_ports_num_for_sparse = 5;
for (bool useGpu : {false, true}) {
#ifdef PADDLE_ONLY_CPU
#ifndef PADDLE_WITH_GPU
if (useGpu) continue;
#endif
FLAGS_parallel_nn = useGpu;
......
......@@ -51,7 +51,7 @@ void checkGradientTest(const string& configFile,
TEST(checkGradient, cpu) { checkGradientTest(configFile1, false, false); }
#ifndef PADDLE_ONLY_CPU
#ifdef PADDLE_WITH_GPU
TEST(checkGradient, gpu) { checkGradientTest(configFile1, true, false); }
TEST(checkGradient, multiGpu) {
......@@ -97,7 +97,7 @@ TEST(checkGradient, hsigmoid) { checkGradientTest(configFile2, false, false); }
TEST(checkGradient, chunk) {
checkGradientTest(configFile3, false, false);
#ifndef PADDLE_ONLY_CPU
#ifdef PADDLE_WITH_GPU
checkGradientTest(configFile3, true, true);
#endif
}
......
......@@ -79,7 +79,7 @@ void trainerOnePassTest(const string& configFile,
// 1. test trainer (cpu, gpu).
TEST(trainerOnePass, cpu) { trainerOnePassTest(configFile1, false, false); }
#ifndef PADDLE_ONLY_CPU
#ifdef PADDLE_WITH_GPU
TEST(trainerOnePass, gpu) { trainerOnePassTest(configFile1, true, false); }
TEST(trainerOnePass, gpu2) { trainerOnePassTest(configFile1, true, false, 2); }
......@@ -94,7 +94,7 @@ TEST(trainerOnePass, parallel) {
#endif
// 2. test average_window.
#ifndef PADDLE_ONLY_CPU
#ifdef PADDLE_WITH_GPU
TEST(average_window, gpu) {
trainerOnePassTest(configFile1, true, false, 4, 0.01);
}
......@@ -266,7 +266,7 @@ TEST(checkRemoteUpdater, cpuTrainerOldUpdater) {
checkRemoteParameterUpdaterTest(configFile1, false, false, 1, true);
}
#ifndef PADDLE_ONLY_CPU
#ifdef PADDLE_WITH_GPU
TEST(checkRemoteUpdater, gpuTrainer) {
checkRemoteParameterUpdaterTest(configFile1, true, false);
}
......
......@@ -113,7 +113,7 @@ void testGeneration(const string& configFile,
#ifndef PADDLE_TYPE_DOUBLE
TEST(RecurrentGradientMachine, test_generation) {
#ifdef PADDLE_ONLY_CPU
#ifndef PADDLE_WITH_GPU
const auto useGpuConfs = {false};
#else
const auto useGpuConfs = {true, false};
......
......@@ -14,7 +14,7 @@ limitations under the License. */
#include "Flags.h"
#ifdef PADDLE_ONLY_CPU
#ifndef PADDLE_WITH_GPU
DEFINE_bool(use_gpu, false, "Only support CPU training");
#else
DEFINE_bool(use_gpu, true, "Whether to use GPU for training");
......
......@@ -218,7 +218,7 @@ protected:
* *d2* is peer device to enable direct access to by the d1 device.
*/
inline void enablePeerAccess(int d1, int d2) {
#ifndef PADDLE_ONLY_CPU
#ifdef PADDLE_WITH_GPU
if (hl_device_can_access_peer(d1, d2)) {
SetDevice dev(d1);
hl_device_enable_peer_access(d2);
......
......@@ -48,7 +48,7 @@ void printVersion(std::ostream& os);
* @return return true if paddle compiled with GPU
*/
constexpr bool isWithGpu() {
#ifdef PADDLE_ONLY_CPU
#ifndef PADDLE_WITH_GPU
return false;
#else
return true;
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册