Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
s920243400
PaddleDetection
提交
1172f249
P
PaddleDetection
项目概览
s920243400
/
PaddleDetection
与 Fork 源项目一致
Fork自
PaddlePaddle / PaddleDetection
通知
2
Star
0
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
PaddleDetection
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
1172f249
编写于
10月 04, 2017
作者:
Y
Yi Wang
提交者:
GitHub
10月 04, 2017
浏览文件
操作
浏览文件
下载
差异文件
Merge pull request #4590 from wangkuiyi/paddle_only_cpu
Add -D PADDLE_WITH_CUDA in cmake/configure.cmake
上级
c0511c8a
880b874b
变更
89
显示空白变更内容
内联
并排
Showing
89 changed file
with
136 addition
and
135 deletion
+136
-135
cmake/configure.cmake
cmake/configure.cmake
+2
-1
paddle/api/Util.cpp
paddle/api/Util.cpp
+1
-1
paddle/capi/Matrix.cpp
paddle/capi/Matrix.cpp
+1
-1
paddle/framework/grad_op_builder_test.cc
paddle/framework/grad_op_builder_test.cc
+1
-1
paddle/framework/lod_tensor.h
paddle/framework/lod_tensor.h
+2
-2
paddle/framework/op_proto_maker_test.cc
paddle/framework/op_proto_maker_test.cc
+1
-1
paddle/framework/op_registry.h
paddle/framework/op_registry.h
+1
-1
paddle/framework/op_registry_test.cc
paddle/framework/op_registry_test.cc
+1
-1
paddle/framework/operator.cc
paddle/framework/operator.cc
+1
-1
paddle/framework/tensor_impl.h
paddle/framework/tensor_impl.h
+2
-2
paddle/framework/tensor_test.cc
paddle/framework/tensor_test.cc
+4
-4
paddle/function/BlockExpandOp.cpp
paddle/function/BlockExpandOp.cpp
+1
-1
paddle/function/ContextProjectionOp.cpp
paddle/function/ContextProjectionOp.cpp
+1
-1
paddle/function/CosSimOp.cpp
paddle/function/CosSimOp.cpp
+1
-1
paddle/function/CropOp.cpp
paddle/function/CropOp.cpp
+1
-1
paddle/function/CrossMapNormalOp.cpp
paddle/function/CrossMapNormalOp.cpp
+1
-1
paddle/function/DepthwiseConvOp.cpp
paddle/function/DepthwiseConvOp.cpp
+1
-1
paddle/function/DepthwiseConvOpTest.cpp
paddle/function/DepthwiseConvOpTest.cpp
+1
-1
paddle/function/GemmConvOp.cpp
paddle/function/GemmConvOp.cpp
+1
-1
paddle/function/GemmConvOpTest.cpp
paddle/function/GemmConvOpTest.cpp
+1
-1
paddle/function/Im2ColTest.cpp
paddle/function/Im2ColTest.cpp
+1
-1
paddle/function/MulOp.cpp
paddle/function/MulOp.cpp
+1
-1
paddle/function/PadOp.cpp
paddle/function/PadOp.cpp
+1
-1
paddle/function/RowConvOp.cpp
paddle/function/RowConvOp.cpp
+1
-1
paddle/function/SwitchOp.cpp
paddle/function/SwitchOp.cpp
+1
-1
paddle/gserver/layers/BatchNormBaseLayer.cpp
paddle/gserver/layers/BatchNormBaseLayer.cpp
+1
-1
paddle/gserver/layers/BatchNormalizationLayer.cpp
paddle/gserver/layers/BatchNormalizationLayer.cpp
+3
-3
paddle/gserver/layers/PoolLayer.cpp
paddle/gserver/layers/PoolLayer.cpp
+2
-2
paddle/gserver/tests/LayerGradUtil.cpp
paddle/gserver/tests/LayerGradUtil.cpp
+1
-1
paddle/gserver/tests/test_BatchNorm.cpp
paddle/gserver/tests/test_BatchNorm.cpp
+1
-1
paddle/gserver/tests/test_ConvUnify.cpp
paddle/gserver/tests/test_ConvUnify.cpp
+1
-1
paddle/gserver/tests/test_DetectionOutput.cpp
paddle/gserver/tests/test_DetectionOutput.cpp
+1
-1
paddle/gserver/tests/test_Evaluator.cpp
paddle/gserver/tests/test_Evaluator.cpp
+1
-1
paddle/gserver/tests/test_KmaxSeqScore.cpp
paddle/gserver/tests/test_KmaxSeqScore.cpp
+1
-1
paddle/gserver/tests/test_LayerGrad.cpp
paddle/gserver/tests/test_LayerGrad.cpp
+13
-13
paddle/gserver/tests/test_NetworkCompare.cpp
paddle/gserver/tests/test_NetworkCompare.cpp
+1
-1
paddle/gserver/tests/test_PriorBox.cpp
paddle/gserver/tests/test_PriorBox.cpp
+1
-1
paddle/gserver/tests/test_ProtoDataProvider.cpp
paddle/gserver/tests/test_ProtoDataProvider.cpp
+3
-3
paddle/gserver/tests/test_PyDataProvider.cpp
paddle/gserver/tests/test_PyDataProvider.cpp
+2
-2
paddle/gserver/tests/test_SelectiveFCLayer.cpp
paddle/gserver/tests/test_SelectiveFCLayer.cpp
+4
-4
paddle/gserver/tests/test_SeqSliceLayerGrad.cpp
paddle/gserver/tests/test_SeqSliceLayerGrad.cpp
+1
-1
paddle/gserver/tests/test_WarpCTCLayer.cpp
paddle/gserver/tests/test_WarpCTCLayer.cpp
+1
-1
paddle/math/Matrix.cpp
paddle/math/Matrix.cpp
+3
-3
paddle/math/SparseMatrix.cpp
paddle/math/SparseMatrix.cpp
+1
-1
paddle/math/Vector.cpp
paddle/math/Vector.cpp
+3
-3
paddle/math/tests/test_Allocator.cpp
paddle/math/tests/test_Allocator.cpp
+2
-2
paddle/math/tests/test_BaseMatrix.cpp
paddle/math/tests/test_BaseMatrix.cpp
+1
-1
paddle/math/tests/test_CpuGpuVector.cpp
paddle/math/tests/test_CpuGpuVector.cpp
+1
-1
paddle/math/tests/test_ExecViaCpu.cpp
paddle/math/tests/test_ExecViaCpu.cpp
+1
-1
paddle/math/tests/test_GpuProfiler.cpp
paddle/math/tests/test_GpuProfiler.cpp
+1
-1
paddle/math/tests/test_Matrix.cpp
paddle/math/tests/test_Matrix.cpp
+1
-1
paddle/math/tests/test_SparseMatrix.cpp
paddle/math/tests/test_SparseMatrix.cpp
+3
-3
paddle/math/tests/test_TrainingAlgorithm.cpp
paddle/math/tests/test_TrainingAlgorithm.cpp
+1
-1
paddle/math/tests/test_batchTranspose.cpp
paddle/math/tests/test_batchTranspose.cpp
+1
-1
paddle/math/tests/test_matrixCompare.cpp
paddle/math/tests/test_matrixCompare.cpp
+1
-1
paddle/math/tests/test_perturbation.cpp
paddle/math/tests/test_perturbation.cpp
+1
-1
paddle/math/tests/test_sparseMatrixCompare.cpp
paddle/math/tests/test_sparseMatrixCompare.cpp
+1
-1
paddle/memory/detail/buddy_allocator.cc
paddle/memory/detail/buddy_allocator.cc
+1
-1
paddle/memory/detail/system_allocator.cc
paddle/memory/detail/system_allocator.cc
+1
-1
paddle/memory/detail/system_allocator.h
paddle/memory/detail/system_allocator.h
+1
-1
paddle/memory/detail/system_allocator_test.cc
paddle/memory/detail/system_allocator_test.cc
+1
-1
paddle/memory/memcpy.cc
paddle/memory/memcpy.cc
+1
-1
paddle/memory/memcpy.h
paddle/memory/memcpy.h
+1
-1
paddle/memory/memory.cc
paddle/memory/memory.cc
+1
-1
paddle/memory/memory_test.cc
paddle/memory/memory_test.cc
+1
-1
paddle/operators/detail/strided_memcpy.h
paddle/operators/detail/strided_memcpy.h
+1
-1
paddle/operators/math/im2col_test.cc
paddle/operators/math/im2col_test.cc
+2
-2
paddle/operators/math/math_function_test.cc
paddle/operators/math/math_function_test.cc
+1
-1
paddle/operators/strided_memcpy_test.cc
paddle/operators/strided_memcpy_test.cc
+2
-2
paddle/platform/device_context.cc
paddle/platform/device_context.cc
+1
-1
paddle/platform/device_context.h
paddle/platform/device_context.h
+2
-2
paddle/platform/enforce.h
paddle/platform/enforce.h
+2
-2
paddle/platform/enforce_test.cc
paddle/platform/enforce_test.cc
+1
-1
paddle/platform/gpu_info.h
paddle/platform/gpu_info.h
+1
-1
paddle/platform/variant.h
paddle/platform/variant.h
+1
-1
paddle/pserver/test/SocketTest.cpp
paddle/pserver/test/SocketTest.cpp
+1
-1
paddle/pserver/test/test_ProtoServer.cpp
paddle/pserver/test/test_ProtoServer.cpp
+1
-1
paddle/pybind/pybind.cc
paddle/pybind/pybind.cc
+6
-6
paddle/pybind/tensor_py.h
paddle/pybind/tensor_py.h
+1
-1
paddle/string/to_string_test.cc
paddle/string/to_string_test.cc
+1
-1
paddle/trainer/MergeModel.cpp
paddle/trainer/MergeModel.cpp
+1
-1
paddle/trainer/tests/test_Compare.cpp
paddle/trainer/tests/test_Compare.cpp
+1
-1
paddle/trainer/tests/test_CompareSparse.cpp
paddle/trainer/tests/test_CompareSparse.cpp
+2
-2
paddle/trainer/tests/test_Trainer.cpp
paddle/trainer/tests/test_Trainer.cpp
+2
-2
paddle/trainer/tests/test_TrainerOnePass.cpp
paddle/trainer/tests/test_TrainerOnePass.cpp
+3
-3
paddle/trainer/tests/test_recurrent_machine_generation.cpp
paddle/trainer/tests/test_recurrent_machine_generation.cpp
+1
-1
paddle/utils/Flags.cpp
paddle/utils/Flags.cpp
+1
-1
paddle/utils/Util.h
paddle/utils/Util.h
+1
-1
paddle/utils/Version.h
paddle/utils/Version.h
+1
-1
未找到文件。
cmake/configure.cmake
浏览文件 @
1172f249
...
...
@@ -53,7 +53,8 @@ if(NOT WITH_GPU)
list
(
APPEND CMAKE_CXX_SOURCE_FILE_EXTENSIONS cu
)
else
()
add_definitions
(
-DPADDLE_WITH_GPU
)
add_definitions
(
-DPADDLE_WITH_CUDA
)
FIND_PACKAGE
(
CUDA REQUIRED
)
if
(
${
CUDA_VERSION_MAJOR
}
VERSION_LESS 7
)
...
...
paddle/api/Util.cpp
浏览文件 @
1172f249
...
...
@@ -47,7 +47,7 @@ bool isUsingGpu() { return FLAGS_use_gpu; }
void
setUseGpu
(
bool
useGpu
)
{
FLAGS_use_gpu
=
useGpu
;
}
bool
isGpuVersion
()
{
#ifndef PADDLE_WITH_
GPU
#ifndef PADDLE_WITH_
CUDA
return
false
;
#else
return
true
;
...
...
paddle/capi/Matrix.cpp
浏览文件 @
1172f249
...
...
@@ -46,7 +46,7 @@ paddle_error paddle_matrix_set_row(paddle_matrix mat,
if
(
rowID
>=
ptr
->
mat
->
getHeight
())
return
kPD_OUT_OF_RANGE
;
paddle
::
real
*
buf
=
ptr
->
mat
->
getRowBuf
(
rowID
);
size_t
width
=
ptr
->
mat
->
getWidth
();
#ifdef PADDLE_WITH_
GPU
#ifdef PADDLE_WITH_
CUDA
hl_memcpy
(
buf
,
rowArray
,
sizeof
(
paddle
::
real
)
*
width
);
#else
std
::
copy
(
rowArray
,
rowArray
+
width
,
buf
);
...
...
paddle/framework/grad_op_builder_test.cc
浏览文件 @
1172f249
paddle/framework/lod_tensor.h
浏览文件 @
1172f249
...
...
@@ -15,7 +15,7 @@
#pragma once
#include <memory>
#ifdef PADDLE_WITH_
GPU
#ifdef PADDLE_WITH_
CUDA
#include <thrust/device_vector.h>
#include <thrust/host_vector.h>
#include <thrust/system/cuda/experimental/pinned_allocator.h>
...
...
@@ -29,7 +29,7 @@
namespace
paddle
{
namespace
framework
{
#ifndef PADDLE_WITH_
GPU
#ifndef PADDLE_WITH_
CUDA
template
<
typename
T
>
using
Vector
=
std
::
vector
<
T
>
;
#else
...
...
paddle/framework/op_proto_maker_test.cc
浏览文件 @
1172f249
paddle/framework/op_registry.h
浏览文件 @
1172f249
...
...
@@ -211,7 +211,7 @@ class OpKernelRegistrar : public Registrar {
// TODO(fengjiayi): The following macros
// seems ugly, do we have better method?
#ifndef PADDLE_WITH_
GPU
#ifndef PADDLE_WITH_
CUDA
#define USE_OP_KERNEL(op_type) USE_OP_DEVICE_KERNEL(op_type, CPU)
#else
#define USE_OP_KERNEL(op_type) \
...
...
paddle/framework/op_registry_test.cc
浏览文件 @
1172f249
paddle/framework/operator.cc
浏览文件 @
1172f249
...
...
@@ -25,7 +25,7 @@ Eigen::DefaultDevice& ExecutionContext::GetEigenDevice<
return
*
device_context_
.
GetEigenDevice
<
platform
::
CPUPlace
>
();
}
#ifdef PADDLE_WITH_
GPU
#ifdef PADDLE_WITH_
CUDA
template
<
>
Eigen
::
GpuDevice
&
ExecutionContext
::
GetEigenDevice
<
platform
::
GPUPlace
,
Eigen
::
GpuDevice
>
()
const
{
...
...
paddle/framework/tensor_impl.h
浏览文件 @
1172f249
...
...
@@ -65,7 +65,7 @@ inline T* Tensor::mutable_data(platform::Place place) {
holder_
.
reset
(
new
PlaceholderImpl
<
T
,
platform
::
CPUPlace
>
(
boost
::
get
<
platform
::
CPUPlace
>
(
place
),
size
));
}
else
if
(
platform
::
is_gpu_place
(
place
))
{
#ifndef PADDLE_WITH_
GPU
#ifndef PADDLE_WITH_
CUDA
PADDLE_THROW
(
"'GPUPlace' is not supported in CPU only device."
);
}
#else
...
...
@@ -103,7 +103,7 @@ inline void Tensor::CopyFrom(const Tensor& src,
memory
::
Copy
(
boost
::
get
<
platform
::
CPUPlace
>
(
dst_place
),
dst_ptr
,
boost
::
get
<
platform
::
CPUPlace
>
(
src_place
),
src_ptr
,
size
);
}
#ifdef PADDLE_WITH_
GPU
#ifdef PADDLE_WITH_
CUDA
else
if
(
platform
::
is_gpu_place
(
src_place
)
&&
platform
::
is_cpu_place
(
dst_place
))
{
memory
::
Copy
(
boost
::
get
<
platform
::
CPUPlace
>
(
dst_place
),
dst_ptr
,
...
...
paddle/framework/tensor_test.cc
浏览文件 @
1172f249
...
...
@@ -74,7 +74,7 @@ TEST(Tensor, MutableData) {
EXPECT_EQ
(
p1
,
p2
);
}
#ifdef PADDLE_WITH_
GPU
#ifdef PADDLE_WITH_
CUDA
{
Tensor
src_tensor
;
float
*
p1
=
nullptr
;
...
...
@@ -126,7 +126,7 @@ TEST(Tensor, ShareDataWith) {
ASSERT_EQ
(
src_tensor
.
data
<
int
>
(),
dst_tensor
.
data
<
int
>
());
}
#ifdef PADDLE_WITH_
GPU
#ifdef PADDLE_WITH_
CUDA
{
Tensor
src_tensor
;
Tensor
dst_tensor
;
...
...
@@ -163,7 +163,7 @@ TEST(Tensor, Slice) {
EXPECT_EQ
(
src_data_address
+
3
*
4
*
1
*
sizeof
(
int
),
slice_data_address
);
}
#ifdef PADDLE_WITH_
GPU
#ifdef PADDLE_WITH_
CUDA
{
Tensor
src_tensor
;
src_tensor
.
mutable_data
<
double
>
(
make_ddim
({
6
,
9
}),
GPUPlace
());
...
...
@@ -218,7 +218,7 @@ TEST(Tensor, CopyFrom) {
EXPECT_EQ
(
dst_ptr
[
i
],
slice_ptr
[
i
]);
}
}
#ifdef PADDLE_WITH_
GPU
#ifdef PADDLE_WITH_
CUDA
{
Tensor
src_tensor
;
Tensor
gpu_tensor
;
...
...
paddle/function/BlockExpandOp.cpp
浏览文件 @
1172f249
...
...
@@ -194,7 +194,7 @@ public:
REGISTER_TYPED_FUNC
(
BlockExpand
,
CPU
,
BlockExpandForward
);
REGISTER_TYPED_FUNC
(
BlockExpandGrad
,
CPU
,
BlockExpandBackward
);
#ifdef PADDLE_WITH_
GPU
#ifdef PADDLE_WITH_
CUDA
REGISTER_TYPED_FUNC
(
BlockExpand
,
GPU
,
BlockExpandForward
);
REGISTER_TYPED_FUNC
(
BlockExpandGrad
,
GPU
,
BlockExpandBackward
);
#endif
...
...
paddle/function/ContextProjectionOp.cpp
浏览文件 @
1172f249
...
...
@@ -395,7 +395,7 @@ REGISTER_TYPED_FUNC(ContextProjectionForward,
REGISTER_TYPED_FUNC
(
ContextProjectionBackward
,
CPU
,
ContextProjectionBackwardFunc
);
#ifdef PADDLE_WITH_
GPU
#ifdef PADDLE_WITH_
CUDA
REGISTER_TYPED_FUNC
(
ContextProjectionForward
,
GPU
,
ContextProjectionForwardFunc
);
...
...
paddle/function/CosSimOp.cpp
浏览文件 @
1172f249
...
...
@@ -233,7 +233,7 @@ private:
REGISTER_TYPED_FUNC
(
CosSimForward
,
CPU
,
CosSimForwardFunc
);
REGISTER_TYPED_FUNC
(
CosSimBackward
,
CPU
,
CosSimBackwardFunc
);
#ifdef PADDLE_WITH_
GPU
#ifdef PADDLE_WITH_
CUDA
REGISTER_TYPED_FUNC
(
CosSimForward
,
GPU
,
CosSimForwardFunc
);
REGISTER_TYPED_FUNC
(
CosSimBackward
,
GPU
,
CosSimBackwardFunc
);
#endif
...
...
paddle/function/CropOp.cpp
浏览文件 @
1172f249
...
...
@@ -169,7 +169,7 @@ private:
REGISTER_TYPED_FUNC
(
Crop
,
CPU
,
CropFunc
);
REGISTER_TYPED_FUNC
(
CropGrad
,
CPU
,
CropGradFunc
);
#ifdef PADDLE_WITH_
GPU
#ifdef PADDLE_WITH_
CUDA
REGISTER_TYPED_FUNC
(
Crop
,
GPU
,
CropFunc
);
REGISTER_TYPED_FUNC
(
CropGrad
,
GPU
,
CropGradFunc
);
#endif
...
...
paddle/function/CrossMapNormalOp.cpp
浏览文件 @
1172f249
...
...
@@ -336,7 +336,7 @@ private:
REGISTER_TYPED_FUNC
(
CrossMapNormal
,
CPU
,
CrossMapNormalFunc
);
REGISTER_TYPED_FUNC
(
CrossMapNormalGrad
,
CPU
,
CrossMapNormalGradFunc
);
#ifdef PADDLE_WITH_
GPU
#ifdef PADDLE_WITH_
CUDA
REGISTER_TYPED_FUNC
(
CrossMapNormal
,
GPU
,
CrossMapNormalFunc
);
REGISTER_TYPED_FUNC
(
CrossMapNormalGrad
,
GPU
,
CrossMapNormalGradFunc
);
#endif
...
...
paddle/function/DepthwiseConvOp.cpp
浏览文件 @
1172f249
...
...
@@ -292,7 +292,7 @@ REGISTER_TYPED_FUNC(DepthwiseConvGradInput,
REGISTER_TYPED_FUNC
(
DepthwiseConvGradFilter
,
CPU
,
DepthwiseConvGradFilterFunction
);
#ifdef PADDLE_WITH_
GPU
#ifdef PADDLE_WITH_
CUDA
REGISTER_TYPED_FUNC
(
DepthwiseConv
,
GPU
,
DepthwiseConvFunction
);
REGISTER_TYPED_FUNC
(
DepthwiseConvGradInput
,
GPU
,
...
...
paddle/function/DepthwiseConvOpTest.cpp
浏览文件 @
1172f249
...
...
@@ -17,7 +17,7 @@ limitations under the License. */
namespace
paddle
{
#ifdef PADDLE_WITH_
GPU
#ifdef PADDLE_WITH_
CUDA
TEST
(
DepthwiseConv
,
Forward
)
{
DepthwiseConvolution
<
DEVICE_TYPE_CPU
,
DEVICE_TYPE_GPU
>
(
"GemmConv-CPU"
,
"DepthwiseConv-GPU"
,
forward
);
...
...
paddle/function/GemmConvOp.cpp
浏览文件 @
1172f249
...
...
@@ -340,7 +340,7 @@ public:
REGISTER_TYPED_FUNC
(
GemmConv
,
CPU
,
GemmConvFunction
);
REGISTER_TYPED_FUNC
(
GemmConvGradInput
,
CPU
,
GemmConvGradInputFunction
);
REGISTER_TYPED_FUNC
(
GemmConvGradFilter
,
CPU
,
GemmConvGradFilterFunction
);
#ifdef PADDLE_WITH_
GPU
#ifdef PADDLE_WITH_
CUDA
REGISTER_TYPED_FUNC
(
GemmConv
,
GPU
,
GemmConvFunction
);
REGISTER_TYPED_FUNC
(
GemmConvGradInput
,
GPU
,
GemmConvGradInputFunction
);
REGISTER_TYPED_FUNC
(
GemmConvGradFilter
,
GPU
,
GemmConvGradFilterFunction
);
...
...
paddle/function/GemmConvOpTest.cpp
浏览文件 @
1172f249
...
...
@@ -24,7 +24,7 @@ TEST(GemmConv, NaiveConv) {
"NaiveConv-CPU"
,
"GemmConv-CPU"
,
forward
);
}
#ifdef PADDLE_WITH_
GPU
#ifdef PADDLE_WITH_
CUDA
TEST
(
GemmConv
,
Forward
)
{
Convolution
<
DEVICE_TYPE_CPU
,
DEVICE_TYPE_GPU
>
(
"GemmConv-CPU"
,
"GemmConv-GPU"
,
forward
);
...
...
paddle/function/Im2ColTest.cpp
浏览文件 @
1172f249
...
...
@@ -116,7 +116,7 @@ void TestIm2ColFunctor() {
TEST
(
Im2ColFunctor
,
CPU
)
{
TestIm2ColFunctor
<
DEVICE_TYPE_CPU
,
float
>
();
}
#ifdef PADDLE_WITH_
GPU
#ifdef PADDLE_WITH_
CUDA
TEST
(
Im2ColFunctor
,
GPU
)
{
TestIm2ColFunctor
<
DEVICE_TYPE_GPU
,
float
>
();
}
...
...
paddle/function/MulOp.cpp
浏览文件 @
1172f249
...
...
@@ -341,7 +341,7 @@ private:
};
REGISTER_TYPED_FUNC
(
MulOp
,
CPU
,
MulFunc
);
#ifdef PADDLE_WITH_
GPU
#ifdef PADDLE_WITH_
CUDA
REGISTER_TYPED_FUNC
(
MulOp
,
GPU
,
MulFunc
);
#endif
}
// namespace paddle
paddle/function/PadOp.cpp
浏览文件 @
1172f249
...
...
@@ -207,7 +207,7 @@ private:
REGISTER_TYPED_FUNC
(
Pad
,
CPU
,
PadFunc
);
REGISTER_TYPED_FUNC
(
PadGrad
,
CPU
,
PadGradFunc
);
#ifdef PADDLE_WITH_
GPU
#ifdef PADDLE_WITH_
CUDA
REGISTER_TYPED_FUNC
(
Pad
,
GPU
,
PadFunc
);
REGISTER_TYPED_FUNC
(
PadGrad
,
GPU
,
PadGradFunc
);
#endif
...
...
paddle/function/RowConvOp.cpp
浏览文件 @
1172f249
...
...
@@ -217,7 +217,7 @@ public:
REGISTER_TYPED_FUNC
(
RowConv
,
CPU
,
RowConvFunc
);
REGISTER_TYPED_FUNC
(
RowConvGrad
,
CPU
,
RowConvGradFunc
);
#ifdef PADDLE_WITH_
GPU
#ifdef PADDLE_WITH_
CUDA
REGISTER_TYPED_FUNC
(
RowConv
,
GPU
,
RowConvFunc
);
REGISTER_TYPED_FUNC
(
RowConvGrad
,
GPU
,
RowConvGradFunc
);
#endif
...
...
paddle/function/SwitchOp.cpp
浏览文件 @
1172f249
...
...
@@ -132,7 +132,7 @@ public:
REGISTER_TYPED_FUNC
(
NCHW2NHWC
,
CPU
,
NCHW2NHWCFunc
);
REGISTER_TYPED_FUNC
(
NHWC2NCHW
,
CPU
,
NHWC2NCHWFunc
);
#ifdef PADDLE_WITH_
GPU
#ifdef PADDLE_WITH_
CUDA
REGISTER_TYPED_FUNC
(
NCHW2NHWC
,
GPU
,
NCHW2NHWCFunc
);
REGISTER_TYPED_FUNC
(
NHWC2NCHW
,
GPU
,
NHWC2NCHWFunc
);
#endif
...
...
paddle/gserver/layers/BatchNormBaseLayer.cpp
浏览文件 @
1172f249
...
...
@@ -16,7 +16,7 @@ limitations under the License. */
#include "BatchNormalizationLayer.h"
#include "Layer.h"
#include "paddle/utils/Stat.h"
#ifdef PADDLE_WITH_
GPU
#ifdef PADDLE_WITH_
CUDA
#include "CudnnBatchNormLayer.h"
#endif
...
...
paddle/gserver/layers/BatchNormalizationLayer.cpp
浏览文件 @
1172f249
...
...
@@ -13,7 +13,7 @@ See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/utils/Stat.h"
#ifdef PADDLE_WITH_
GPU
#ifdef PADDLE_WITH_
CUDA
#include "hl_batch_transpose.h"
#endif
#include "BatchNormalizationLayer.h"
...
...
@@ -90,7 +90,7 @@ void BatchNormalizationLayer::expandMat(const MatrixPtr& in, MatrixPtr& out) {
size_t
batchSize
=
in
->
getHeight
();
CHECK_EQ
(
out
->
getHeight
(),
batchSize
*
imgPixels_
);
if
(
useGpu_
)
{
#ifndef PADDLE_WITH_
GPU
#ifndef PADDLE_WITH_
CUDA
LOG
(
FATAL
)
<<
"paddle is compiled only for cpu"
;
#else
batchTranspose
(
...
...
@@ -127,7 +127,7 @@ void BatchNormalizationLayer::shrinkMat(const MatrixPtr& in, MatrixPtr& out) {
}
CHECK_EQ
(
in
->
getHeight
(),
static_cast
<
size_t
>
(
batchSize
*
imgPixels_
));
if
(
useGpu_
)
{
#ifndef PADDLE_WITH_
GPU
#ifndef PADDLE_WITH_
CUDA
LOG
(
FATAL
)
<<
"paddle is compiled only for cpu"
;
#else
batchTranspose
(
...
...
paddle/gserver/layers/PoolLayer.cpp
浏览文件 @
1172f249
...
...
@@ -15,7 +15,7 @@ limitations under the License. */
#include "PoolLayer.h"
#include "PoolProjectionLayer.h"
#include "paddle/utils/Logging.h"
#ifdef PADDLE_WITH_
GPU
#ifdef PADDLE_WITH_
CUDA
#include "CudnnPoolLayer.h"
#endif
namespace
paddle
{
...
...
@@ -53,7 +53,7 @@ Layer* PoolLayer::create(const LayerConfig& config) {
const
std
::
string
&
pool
=
config
.
inputs
(
0
).
pool_conf
().
pool_type
();
if
(
pool
==
"max-projection"
||
pool
==
"avg-projection"
)
{
return
new
PoolProjectionLayer
(
config
);
#ifdef PADDLE_WITH_
GPU
#ifdef PADDLE_WITH_
CUDA
}
else
if
(
CudnnPoolLayer
::
typeCheck
(
pool
))
{
return
new
CudnnPoolLayer
(
config
);
#endif
...
...
paddle/gserver/tests/LayerGradUtil.cpp
浏览文件 @
1172f249
...
...
@@ -674,7 +674,7 @@ void testLayerGradKernel(TestConfig testConf,
bool
useGpu
,
bool
useWeight
,
float
epsilon
)
{
#ifndef PADDLE_WITH_
GPU
#ifndef PADDLE_WITH_
CUDA
if
(
useGpu
)
return
;
#endif
FLAGS_use_gpu
=
useGpu
;
...
...
paddle/gserver/tests/test_BatchNorm.cpp
浏览文件 @
1172f249
...
...
@@ -119,7 +119,7 @@ TEST(Layer, batchNorm) {
CHECK_EQ
(
static_cast
<
int
>
(
convLayer
->
getOutputValue
()
->
getWidth
()),
576
);
}
#ifdef PADDLE_WITH_
GPU
#ifdef PADDLE_WITH_
CUDA
void
batchNormInference
(
int
n
,
int
c
,
int
h
,
int
w
)
{
MatrixPtr
input
=
std
::
make_shared
<
GpuMatrix
>
(
n
,
c
*
h
*
w
);
MatrixPtr
cudnnOut
=
std
::
make_shared
<
GpuMatrix
>
(
n
,
c
*
h
*
w
);
...
...
paddle/gserver/tests/test_ConvUnify.cpp
浏览文件 @
1172f249
...
...
@@ -117,7 +117,7 @@ MatrixPtr doOneConvTest(size_t imgSize,
}
TEST
(
Layer
,
convParaUnified
)
{
#ifdef PADDLE_WITH_
GPU
#ifdef PADDLE_WITH_
CUDA
MatrixPtr
input
,
resultCpu
,
resultGpu
;
/// TEST1 for conv ///
...
...
paddle/gserver/tests/test_DetectionOutput.cpp
浏览文件 @
1172f249
...
...
@@ -150,7 +150,7 @@ TEST(Layer, detectionOutputLayerFwd) {
useGpu
,
result2
);
#ifdef PADDLE_WITH_
GPU
#ifdef PADDLE_WITH_
CUDA
// GPU case 1.
useGpu
=
true
;
inputLoc
=
Matrix
::
create
(
1
,
16
,
false
,
useGpu
);
...
...
paddle/gserver/tests/test_Evaluator.cpp
浏览文件 @
1172f249
...
...
@@ -51,7 +51,7 @@ void testEvaluator(TestConfig testConf,
string
testEvaluatorName
,
size_t
batchSize
,
bool
useGpu
)
{
#ifndef PADDLE_WITH_
GPU
#ifndef PADDLE_WITH_
CUDA
if
(
useGpu
)
return
;
#endif
FLAGS_use_gpu
=
useGpu
;
...
...
paddle/gserver/tests/test_KmaxSeqScore.cpp
浏览文件 @
1172f249
...
...
@@ -97,7 +97,7 @@ TEST(Layer, kmaxSeqScoreLayer) {
Matrix
::
create
(
subSeqStartPosition
.
back
(),
1
,
false
,
false
);
std
::
vector
<
bool
>
mode
=
{
false
};
#ifdef PADDLE_WITH_
GPU
#ifdef PADDLE_WITH_
CUDA
mode
.
push_back
(
true
);
#endif
...
...
paddle/gserver/tests/test_LayerGrad.cpp
浏览文件 @
1172f249
...
...
@@ -12,7 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#ifdef PADDLE_WITH_
GPU
#ifdef PADDLE_WITH_
CUDA
#include <cudnn.h>
#endif
#include <gtest/gtest.h>
...
...
@@ -258,7 +258,7 @@ void testProjectionConv(size_t groups, bool isDeconv) {
true
);
}
#ifdef PADDLE_WITH_
GPU
#ifdef PADDLE_WITH_
CUDA
TEST
(
Projection
,
conv
)
{
/// test ConvProjection
testProjectionConv
(
1
,
false
);
...
...
@@ -422,7 +422,7 @@ TEST(Layer, depthwiseConvLayer) {
// 'depthwise_conv' is a sepecial case of 'exconv' whose
// groups size equals to the input channels size.
testDepthwiseConvLayer
(
"exconv"
,
/* useGpu= */
false
);
#ifdef PADDLE_WITH_
GPU
#ifdef PADDLE_WITH_
CUDA
testDepthwiseConvLayer
(
"exconv"
,
/* useGpu= */
true
);
#endif
}
...
...
@@ -480,7 +480,7 @@ void testConvLayer(const string& type, bool trans, bool useGpu) {
TEST
(
Layer
,
convLayer
)
{
testConvLayer
(
"exconv"
,
/* trans= */
false
,
/* useGpu= */
false
);
#ifdef PADDLE_WITH_
GPU
#ifdef PADDLE_WITH_
CUDA
testConvLayer
(
"exconv"
,
/* trans= */
false
,
/* useGpu= */
true
);
testConvLayer
(
"cudnn_conv"
,
/* trans= */
false
,
/* useGpu= */
true
);
#endif
...
...
@@ -525,7 +525,7 @@ TEST(Layer, convTransLayer) {
for
(
auto
useGpu
:
{
false
,
true
})
{
testConvTransLayer
(
"exconvt"
,
/* trans= */
false
,
/* useGpu= */
useGpu
);
}
#ifdef PADDLE_WITH_
GPU
#ifdef PADDLE_WITH_
CUDA
testConvTransLayer
(
"cudnn_convt"
,
/* trans= */
false
,
/* useGpu= */
true
);
#endif
}
...
...
@@ -638,7 +638,7 @@ TEST(Layer, SelectiveFullyConnectedLayer) {
/* trans= */
false
,
/* useGup= */
false
,
false
);
#ifdef PADDLE_WITH_
GPU
#ifdef PADDLE_WITH_
CUDA
testLayerGrad
(
config
,
"selective_fc"
,
100
,
...
...
@@ -1210,7 +1210,7 @@ void testPoolLayer(const string& poolType, bool trans, bool useGpu) {
testLayerGrad
(
config
,
"pool"
,
100
,
trans
,
useGpu
);
}
#ifdef PADDLE_WITH_
GPU
#ifdef PADDLE_WITH_
CUDA
void
testPoolLayer2
(
const
string
&
poolType
,
bool
trans
,
bool
useGpu
)
{
TestConfig
config
;
config
.
inputDefs
.
push_back
({
INPUT_DATA
,
"layer_0"
,
3200
,
0
});
...
...
@@ -1236,7 +1236,7 @@ TEST(Layer, PoolLayer) {
testPoolLayer
(
"avg-projection"
,
/* trans= */
false
,
/* useGpu= */
false
);
testPoolLayer
(
"max-projection"
,
/* trans= */
false
,
/* useGpu= */
false
);
#ifdef PADDLE_WITH_
GPU
#ifdef PADDLE_WITH_
CUDA
testPoolLayer
(
"avg-projection"
,
/* trans= */
false
,
/* useGpu= */
true
);
testPoolLayer
(
"max-projection"
,
/* trans= */
false
,
/* useGpu= */
true
);
testPoolLayer
(
"cudnn-max-pool"
,
/* trans= */
false
,
/* useGpu= */
true
);
...
...
@@ -1309,7 +1309,7 @@ void testPool3DLayer(const string& poolType, bool trans, bool useGpu) {
TEST
(
Layer
,
Pool3DLayer
)
{
testPool3DLayer
(
"avg"
,
/* trans= */
false
,
/* useGpu= */
false
);
testPool3DLayer
(
"max"
,
/* trans= */
false
,
/* useGpu= */
false
);
#ifdef PADDLE_WITH_
GPU
#ifdef PADDLE_WITH_
CUDA
testPool3DLayer
(
"avg"
,
/* trans= */
false
,
/* useGpu= */
true
);
testPool3DLayer
(
"max"
,
/* trans= */
false
,
/* useGpu= */
true
);
#endif
...
...
@@ -1695,7 +1695,7 @@ void testBatchNormLayer(const string& type, bool trans, bool useGpu) {
TEST
(
Layer
,
BatchNormalizationLayer
)
{
testBatchNormLayer
(
"batch_norm"
,
false
,
false
);
#ifdef PADDLE_WITH_
GPU
#ifdef PADDLE_WITH_
CUDA
testBatchNormLayer
(
"batch_norm"
,
false
,
true
);
if
(
hl_get_cudnn_lib_version
()
>=
int
(
4000
))
{
testBatchNormLayer
(
"cudnn_batch_norm"
,
false
,
true
);
...
...
@@ -1744,7 +1744,7 @@ void testBatchNorm3DLayer(const string& type, bool trans, bool useGpu) {
TEST
(
Layer
,
testBatchNorm3DLayer
)
{
testBatchNorm3DLayer
(
"batch_norm"
,
false
,
false
);
#ifdef PADDLE_WITH_
GPU
#ifdef PADDLE_WITH_
CUDA
testBatchNorm3DLayer
(
"batch_norm"
,
false
,
true
);
if
(
hl_get_cudnn_lib_version
()
>=
int
(
4000
))
{
testBatchNorm3DLayer
(
"cudnn_batch_norm"
,
false
,
true
);
...
...
@@ -2262,7 +2262,7 @@ void test3DConvLayer(const string& type, bool trans, bool useGpu) {
TEST
(
Layer
,
test3DConvLayer
)
{
test3DConvLayer
(
"conv3d"
,
/* trans= */
false
,
/* useGpu= */
false
);
#ifdef PADDLE_WITH_
GPU
#ifdef PADDLE_WITH_
CUDA
test3DConvLayer
(
"conv3d"
,
/* trans= */
false
,
/* useGpu= */
true
);
#endif
}
...
...
@@ -2339,7 +2339,7 @@ void test3DDeConvLayer(const string& type, bool trans, bool useGpu) {
TEST
(
Layer
,
test3DDeConvLayer
)
{
test3DDeConvLayer
(
"deconv3d"
,
/* trans= */
false
,
/* useGpu= */
false
);
#ifdef PADDLE_WITH_
GPU
#ifdef PADDLE_WITH_
CUDA
test3DDeConvLayer
(
"deconv3d"
,
/* trans= */
false
,
/* useGpu= */
true
);
#endif
}
...
...
paddle/gserver/tests/test_NetworkCompare.cpp
浏览文件 @
1172f249
...
...
@@ -243,7 +243,7 @@ TEST(Compare, concat_slice) {
compareNetwork
(
config_file_a
,
config_file_b
);
}
#ifdef PADDLE_WITH_
GPU
#ifdef PADDLE_WITH_
CUDA
TEST
(
Compare
,
img_pool
)
{
std
::
string
config_file_a
=
"./gserver/tests/img_pool_a.conf"
;
std
::
string
config_file_b
=
"./gserver/tests/img_pool_b.conf"
;
...
...
paddle/gserver/tests/test_PriorBox.cpp
浏览文件 @
1172f249
...
...
@@ -151,7 +151,7 @@ TEST(Layer, priorBoxLayerFwd) {
useGpu
,
result
);
#ifdef PADDLE_WITH_
GPU
#ifdef PADDLE_WITH_
CUDA
// reset the input parameters
variance
[
1
]
=
0.1
;
variance
[
3
]
=
0.2
;
...
...
paddle/gserver/tests/test_ProtoDataProvider.cpp
浏览文件 @
1172f249
...
...
@@ -485,7 +485,7 @@ TEST(ProtoDataProvider, test) {
// Currently in async mode, useGpu is not supported
continue
;
}
#ifndef PADDLE_WITH_
GPU
#ifndef PADDLE_WITH_
CUDA
if
(
useGpu
)
{
continue
;
}
...
...
@@ -525,7 +525,7 @@ TEST(ProtoDataProvider, constant_slots) {
for
(
int
numConstantSlots
:
{
1
,
2
})
{
for
(
int
useGpu
:
numTwoArray
)
{
for
(
int
dataCompression
:
numTwoArray
)
{
#ifndef PADDLE_WITH_
GPU
#ifndef PADDLE_WITH_
CUDA
if
(
useGpu
)
{
continue
;
}
...
...
@@ -708,7 +708,7 @@ TEST(ProtoSequenceDataProvider, test) {
// Currently in async mode, useGpu is not supported
continue
;
}
#ifndef PADDLE_WITH_
GPU
#ifndef PADDLE_WITH_
CUDA
if
(
useGpu
)
{
continue
;
}
...
...
paddle/gserver/tests/test_PyDataProvider.cpp
浏览文件 @
1172f249
...
...
@@ -37,7 +37,7 @@ TEST(PyDataProvider, py_fill_slots) {
config
.
clear_files
();
std
::
string
dataFile
=
"gserver/tests/pyDataProvider/pyDataProviderList"
;
config
.
set_files
(
dataFile
);
#ifndef PADDLE_WITH_
GPU
#ifndef PADDLE_WITH_
CUDA
bool
useGpu
=
false
;
#else
bool
useGpu
=
true
;
...
...
@@ -71,7 +71,7 @@ TEST(PyDataProvider, py_fill_nest_slots) {
std
::
string
dataFile
=
"gserver/tests/pyDataProvider/pyDataProviderList"
;
config
.
set_files
(
dataFile
);
EXPECT_EQ
(
config
.
IsInitialized
(),
true
);
#ifndef PADDLE_WITH_
GPU
#ifndef PADDLE_WITH_
CUDA
bool
useGpu
=
false
;
#else
bool
useGpu
=
true
;
...
...
paddle/gserver/tests/test_SelectiveFCLayer.cpp
浏览文件 @
1172f249
...
...
@@ -321,7 +321,7 @@ TEST(Layer, SelectiveFcLayer_train_dense_mul) {
"filelist=gserver/tests/SelectiveFcTest/dense_mul_list"
;
for
(
auto
useGpu
:
{
false
,
true
})
{
#ifndef PADDLE_WITH_
GPU
#ifndef PADDLE_WITH_
CUDA
if
(
useGpu
)
{
break
;
}
...
...
@@ -388,7 +388,7 @@ void testSelectiveFcLayerTrainSparseMul(const LayerConfig& config,
outMatSelfc
->
getWidth
(),
outMatSelfc
->
getElementCnt
()));
cpuOutMatSelfc
->
copyFrom
(
*
outMatSelfc
,
HPPL_STREAM_DEFAULT
);
#ifdef PADDLE_WITH_
GPU
#ifdef PADDLE_WITH_
CUDA
if
(
useGpu
)
{
hl_stream_synchronize
(
HPPL_STREAM_DEFAULT
);
}
...
...
@@ -418,7 +418,7 @@ void testSelectiveFcLayerTrainSparseMul(const LayerConfig& config,
MatrixPtr
cpuOutMatFc
(
new
CpuMatrix
(
outMatFc
->
getHeight
(),
outMatFc
->
getWidth
()));
cpuOutMatFc
->
copyFrom
(
*
outMatFc
,
HPPL_STREAM_DEFAULT
);
#ifdef PADDLE_WITH_
GPU
#ifdef PADDLE_WITH_
CUDA
if
(
useGpu
)
{
hl_stream_synchronize
(
HPPL_STREAM_DEFAULT
);
}
...
...
@@ -443,7 +443,7 @@ TEST(Layer, SelectiveFcLayer_train_sparse_mul) {
selLayerConfig
.
set_size
(
fcLayerWidth
);
testSelectiveFcLayerTrainSparseMul
(
selLayerConfig
,
false
);
#ifdef PADDLE_WITH_
GPU
#ifdef PADDLE_WITH_
CUDA
testSelectiveFcLayerTrainSparseMul
(
selLayerConfig
,
true
);
#endif
}
...
...
paddle/gserver/tests/test_SeqSliceLayerGrad.cpp
浏览文件 @
1172f249
...
...
@@ -195,7 +195,7 @@ TEST(Layer, SeqSliceLayer) {
vector
<
vector
<
real
>>
ends
;
std
::
vector
<
bool
>
mode
=
{
false
};
#ifdef PADDLE_WITH_
GPU
#ifdef PADDLE_WITH_
CUDA
mode
.
push_back
(
true
);
#endif
genSeqInfo
(
seqStartPos
,
subSeqStartPos
);
...
...
paddle/gserver/tests/test_WarpCTCLayer.cpp
浏览文件 @
1172f249
...
...
@@ -199,7 +199,7 @@ TEST(Layer, WarpCTCLayer) {
for
(
auto
batchSize
:
{
1
,
10
,
32
})
{
for
(
auto
normByTimes
:
{
false
,
true
})
{
for
(
auto
useGpu
:
{
false
,
true
})
{
#ifndef PADDLE_WITH_
GPU
#ifndef PADDLE_WITH_
CUDA
if
(
useGpu
)
continue
;
#endif
LOG
(
INFO
)
<<
"layerSize="
<<
layerSize
<<
" batchSize="
<<
batchSize
...
...
paddle/math/Matrix.cpp
浏览文件 @
1172f249
...
...
@@ -670,7 +670,7 @@ void GpuMatrix::leftMul(Matrix& a, real scaleAB, real scaleT) {
}
void
GpuMatrix
::
selectRows
(
Matrix
&
table
,
IVector
&
ids
)
{
#ifdef PADDLE_WITH_
GPU
#ifdef PADDLE_WITH_
CUDA
CHECK
(
dynamic_cast
<
GpuMatrix
*>
(
&
table
));
CHECK
(
table
.
useGpu
());
CHECK
(
ids
.
useGpu
());
...
...
@@ -694,7 +694,7 @@ void GpuMatrix::selectRows(Matrix& table, IVector& ids) {
}
void
GpuMatrix
::
addToRows
(
Matrix
&
table
,
IVector
&
ids
)
{
#ifdef PADDLE_WITH_
GPU
#ifdef PADDLE_WITH_
CUDA
CHECK
(
dynamic_cast
<
GpuMatrix
*>
(
&
table
));
CHECK
(
table
.
useGpu
());
CHECK
(
ids
.
useGpu
());
...
...
@@ -741,7 +741,7 @@ void GpuMatrix::rowMax(Matrix& max) {
}
void
GpuMatrix
::
rowMax
(
IVector
&
maxIds
,
Matrix
&
maxVal
)
{
#ifdef PADDLE_WITH_
GPU
#ifdef PADDLE_WITH_
CUDA
CHECK
(
maxIds
.
useGpu
()
&&
maxVal
.
useGpu
())
<<
"Matrix type are not equal"
;
size_t
numSamples
=
getHeight
();
size_t
beam
=
maxVal
.
getWidth
();
...
...
paddle/math/SparseMatrix.cpp
浏览文件 @
1172f249
...
...
@@ -836,7 +836,7 @@ void GpuSparseMatrix::zeroMem() {
}
void
GpuSparseMatrix
::
rowMax
(
IVector
&
maxIds
,
Matrix
&
maxVal
)
{
#ifdef PADDLE_WITH_
GPU
#ifdef PADDLE_WITH_
CUDA
CHECK
(
maxIds
.
useGpu
()
&&
maxVal
.
useGpu
())
<<
"Matrix type are not equal"
;
size_t
numSamples
=
getHeight
();
size_t
beam
=
maxVal
.
getWidth
();
...
...
paddle/math/Vector.cpp
浏览文件 @
1172f249
...
...
@@ -172,7 +172,7 @@ void GpuVectorT<T>::isEqualTo(const VectorT<T>& b, const T& value) {
template
<
class
T
>
void
GpuVectorT
<
T
>::
selectFrom
(
const
VectorT
<
T
>&
src
,
const
VectorT
<
int
>&
ids
)
{
#ifdef PADDLE_WITH_
GPU
#ifdef PADDLE_WITH_
CUDA
hl_vector_select_from
<
T
>
(
this
->
getData
(),
this
->
getSize
(),
src
.
getData
(),
...
...
@@ -850,7 +850,7 @@ CpuGpuVectorT<T>::CpuGpuVectorT(CpuGpuVectorT<T>& src,
size_t
size
)
:
sync_
(
nullptr
)
{
CHECK_LE
(
offset
+
size
,
static_cast
<
size_t
>
(
src
.
getSize
()));
#ifdef PADDLE_WITH_
GPU
#ifdef PADDLE_WITH_
CUDA
SyncedFlag
*
flag
=
src
.
getSync
();
if
(
*
flag
==
DATA_AT_CPU
)
{
src
.
copyToGpu
();
// will set synchronous data between CPU and GPU
...
...
@@ -861,7 +861,7 @@ CpuGpuVectorT<T>::CpuGpuVectorT(CpuGpuVectorT<T>& src,
auto
cMemHandle
=
(
src
.
getVector
(
false
))
->
getMemoryHandle
();
cpuVectorT_
=
std
::
make_shared
<
CpuVectorT
<
T
>>
(
size
,
std
::
dynamic_pointer_cast
<
CpuMemoryHandle
>
(
cMemHandle
),
offset
);
#ifdef PADDLE_WITH_
GPU
#ifdef PADDLE_WITH_
CUDA
auto
gMemHandle
=
(
src
.
getVector
(
true
))
->
getMemoryHandle
();
gpuVectorT_
=
std
::
make_shared
<
GpuVectorT
<
T
>>
(
size
,
std
::
dynamic_pointer_cast
<
GpuMemoryHandle
>
(
gMemHandle
),
offset
);
...
...
paddle/math/tests/test_Allocator.cpp
浏览文件 @
1172f249
...
...
@@ -68,7 +68,7 @@ void testPoolAllocator() {
TEST
(
Allocator
,
Pool
)
{
testPoolAllocator
<
CpuAllocator
>
();
#ifdef PADDLE_WITH_
GPU
#ifdef PADDLE_WITH_
CUDA
testPoolAllocator
<
GpuAllocator
>
();
#endif
}
...
...
@@ -92,7 +92,7 @@ TEST(MemoryHandle, Cpu) {
EXPECT_EQ
(
ptr1
,
ptr2
);
}
#ifdef PADDLE_WITH_
GPU
#ifdef PADDLE_WITH_
CUDA
TEST
(
MemoryHandle
,
Gpu
)
{
int
numGpu
=
hl_get_device_count
();
...
...
paddle/math/tests/test_BaseMatrix.cpp
浏览文件 @
1172f249
...
...
@@ -12,7 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#ifdef PADDLE_WITH_
GPU
#ifdef PADDLE_WITH_
CUDA
/**
* This test file use autotest::AutoCompare and cmpWithoutArg to compares the
* implementation of CPU and GPU member function in
...
...
paddle/math/tests/test_CpuGpuVector.cpp
浏览文件 @
1172f249
...
...
@@ -12,7 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#ifdef PADDLE_WITH_
GPU
#ifdef PADDLE_WITH_
CUDA
#include <gtest/gtest.h>
#include "paddle/math/Vector.h"
...
...
paddle/math/tests/test_ExecViaCpu.cpp
浏览文件 @
1172f249
...
...
@@ -94,7 +94,7 @@ void testWrapper(F&& f) {
}
}
#ifdef PADDLE_WITH_
GPU
#ifdef PADDLE_WITH_
CUDA
TEST
(
ExecViaCpu
,
test1
)
{
testWrapper
(
f
);
testWrapper
(
&
f
);
...
...
paddle/math/tests/test_GpuProfiler.cpp
浏览文件 @
1172f249
...
...
@@ -12,7 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#ifdef PADDLE_WITH_
GPU
#ifdef PADDLE_WITH_
CUDA
#include <gtest/gtest.h>
#include "paddle/math/Matrix.h"
...
...
paddle/math/tests/test_Matrix.cpp
浏览文件 @
1172f249
...
...
@@ -12,7 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#ifdef PADDLE_WITH_
GPU
#ifdef PADDLE_WITH_
CUDA
/**
* This test file use autotest::AutoCompare and cmpWithArg to compares the
* implementation of CPU and GPU member function in Matrix.cpp.
...
...
paddle/math/tests/test_SparseMatrix.cpp
浏览文件 @
1172f249
...
...
@@ -47,7 +47,7 @@ struct MatrixPara {
SparseFormat
format
;
};
#ifdef PADDLE_WITH_
GPU
#ifdef PADDLE_WITH_
CUDA
void
test_sparse_matrix_mul
(
MatrixPara
paraA
,
MatrixPara
paraB
,
MatrixPara
paraC
)
{
...
...
@@ -452,7 +452,7 @@ TEST(Matrix, SparseMatrixCSRFormatTrimFrom) {
matB
->
trimFrom
(
*
mat
);
checkSMatrixEqual2
(
matA
,
matB
);
#ifdef PADDLE_WITH_
GPU
#ifdef PADDLE_WITH_
CUDA
GpuSparseMatrixPtr
matC
=
std
::
make_shared
<
GpuSparseMatrix
>
(
height
,
trimedWidth
,
height
,
FLOAT_VALUE
,
SPARSE_CSR
,
true
);
matC
->
trimFrom
(
*
mat
);
...
...
@@ -546,7 +546,7 @@ TEST(Matrix, SparseMatrixCSCFormatTrimFrom) {
matB
->
trimFrom
(
*
mat
);
checkSMatrixEqual2
(
matA
,
matB
);
#ifdef PADDLE_WITH_
GPU
#ifdef PADDLE_WITH_
CUDA
GpuSparseMatrixPtr
matC
=
std
::
make_shared
<
GpuSparseMatrix
>
(
height
,
trimedWidth
,
height
,
FLOAT_VALUE
,
SPARSE_CSC
,
true
);
matC
->
trimFrom
(
*
mat
);
...
...
paddle/math/tests/test_TrainingAlgorithm.cpp
浏览文件 @
1172f249
...
...
@@ -91,7 +91,7 @@ int VectorCheckErr(const VectorPtr& vector1, const VectorPtr& vector2) {
typedef
std
::
function
<
void
(
size_t
size
,
bool
useGpu
)
>
testMatrixFunc
;
void
testCase
(
testMatrixFunc
matrixFunc
)
{
#ifdef PADDLE_WITH_
GPU
#ifdef PADDLE_WITH_
CUDA
for
(
auto
useGpu
:
{
false
,
true
})
{
#else
for
(
auto
useGpu
:
{
false
})
{
...
...
paddle/math/tests/test_batchTranspose.cpp
浏览文件 @
1172f249
...
...
@@ -17,7 +17,7 @@ limitations under the License. */
using
namespace
paddle
;
// NOLINT
#ifdef PADDLE_WITH_
GPU
#ifdef PADDLE_WITH_
CUDA
TEST
(
MatrixBatchTransTest
,
test_batch_matrix_transpose
)
{
const
int
nx
=
100
;
const
int
ny
=
50
;
...
...
paddle/math/tests/test_matrixCompare.cpp
浏览文件 @
1172f249
...
...
@@ -12,7 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#ifdef PADDLE_WITH_
GPU
#ifdef PADDLE_WITH_
CUDA
/// This unittest checks GpuMatrix/CpuMatrix get same result, so disable when
/// only cpu version.
...
...
paddle/math/tests/test_perturbation.cpp
浏览文件 @
1172f249
...
...
@@ -12,7 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#ifdef PADDLE_WITH_
GPU
#ifdef PADDLE_WITH_
CUDA
#include <cuda_runtime.h>
#include <gtest/gtest.h>
...
...
paddle/math/tests/test_sparseMatrixCompare.cpp
浏览文件 @
1172f249
...
...
@@ -12,7 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#ifdef PADDLE_WITH_
GPU
#ifdef PADDLE_WITH_
CUDA
/// This unittest checks GpuSparseMatrix/CpuSparseMatrix get same result,
// so disable when
/// only cpu version.
...
...
paddle/memory/detail/buddy_allocator.cc
浏览文件 @
1172f249
...
...
@@ -175,7 +175,7 @@ void* BuddyAllocator::SystemAlloc(size_t size) {
}
BuddyAllocator
::
PoolSet
::
iterator
BuddyAllocator
::
RefillPool
()
{
#ifdef PADDLE_WITH_
GPU
#ifdef PADDLE_WITH_
CUDA
if
(
system_allocator_
->
UseGpu
())
{
if
((
total_used_
+
total_free_
)
==
0
)
{
// Compute the maximum allocation size for the first allocation.
...
...
paddle/memory/detail/system_allocator.cc
浏览文件 @
1172f249
...
...
@@ -62,7 +62,7 @@ void CPUAllocator::Free(void* p, size_t size, size_t index) {
bool
CPUAllocator
::
UseGpu
()
const
{
return
false
;
}
#ifdef PADDLE_WITH_
GPU
#ifdef PADDLE_WITH_
CUDA
void
*
GPUAllocator
::
Alloc
(
size_t
&
index
,
size_t
size
)
{
// CUDA documentation doesn't explain if cudaMalloc returns nullptr
...
...
paddle/memory/detail/system_allocator.h
浏览文件 @
1172f249
...
...
@@ -40,7 +40,7 @@ class CPUAllocator : public SystemAllocator {
virtual
bool
UseGpu
()
const
;
};
#ifdef PADDLE_WITH_
GPU
#ifdef PADDLE_WITH_
CUDA
class
GPUAllocator
:
public
SystemAllocator
{
public:
virtual
void
*
Alloc
(
size_t
&
index
,
size_t
size
);
...
...
paddle/memory/detail/system_allocator_test.cc
浏览文件 @
1172f249
...
...
@@ -56,7 +56,7 @@ TEST(CPUAllocator, LockMem) {
TestAllocator
(
a
,
0
);
}
#ifdef PADDLE_WITH_
GPU
#ifdef PADDLE_WITH_
CUDA
TEST
(
GPUAllocator
,
Alloc
)
{
paddle
::
memory
::
detail
::
GPUAllocator
a
;
TestAllocator
(
a
,
2048
);
...
...
paddle/memory/memcpy.cc
浏览文件 @
1172f249
...
...
@@ -26,7 +26,7 @@ void Copy<platform::CPUPlace, platform::CPUPlace>(platform::CPUPlace, void* dst,
std
::
memcpy
(
dst
,
src
,
num
);
}
#ifdef PADDLE_WITH_
GPU
#ifdef PADDLE_WITH_
CUDA
template
<
>
void
Copy
<
platform
::
CPUPlace
,
platform
::
GPUPlace
>
(
platform
::
CPUPlace
dst_place
,
void
*
dst
,
...
...
paddle/memory/memcpy.h
浏览文件 @
1172f249
...
...
@@ -33,7 +33,7 @@ namespace memory {
template
<
typename
DstPlace
,
typename
SrcPlace
>
void
Copy
(
DstPlace
,
void
*
dst
,
SrcPlace
,
const
void
*
src
,
size_t
num
);
#ifdef PADDLE_WITH_
GPU
#ifdef PADDLE_WITH_
CUDA
/**
* \brief Copy memory from one place to another place.
...
...
paddle/memory/memory.cc
浏览文件 @
1172f249
...
...
@@ -62,7 +62,7 @@ size_t Used<platform::CPUPlace>(platform::CPUPlace place) {
return
GetCPUBuddyAllocator
()
->
Used
();
}
#ifdef PADDLE_WITH_
GPU
#ifdef PADDLE_WITH_
CUDA
BuddyAllocator
*
GetGPUBuddyAllocator
(
int
gpu_id
)
{
using
BuddyAllocVec
=
std
::
vector
<
BuddyAllocator
*>
;
...
...
paddle/memory/memory_test.cc
浏览文件 @
1172f249
...
...
@@ -80,7 +80,7 @@ TEST(BuddyAllocator, CPUMultAlloc) {
}
}
#ifdef PADDLE_WITH_
GPU
#ifdef PADDLE_WITH_
CUDA
size_t
align
(
size_t
size
,
paddle
::
platform
::
GPUPlace
place
)
{
size
+=
sizeof
(
paddle
::
memory
::
detail
::
Metadata
);
...
...
paddle/operators/detail/strided_memcpy.h
浏览文件 @
1172f249
...
...
@@ -34,7 +34,7 @@ struct StridedMemcpyFunctor<T, 1> {
auto
&
cpu_place
=
boost
::
get
<
platform
::
CPUPlace
>
(
place
);
memory
::
Copy
(
cpu_place
,
dst
,
cpu_place
,
src
,
sizeof
(
T
)
*
dst_dim
.
head
);
}
else
{
#ifdef PADDLE_WITH_
GPU
#ifdef PADDLE_WITH_
CUDA
auto
&
gpu_place
=
boost
::
get
<
platform
::
GPUPlace
>
(
place
);
auto
&
cuda_ctx
=
reinterpret_cast
<
const
platform
::
CUDADeviceContext
&>
(
dev_ctx
);
...
...
paddle/operators/math/im2col_test.cc
浏览文件 @
1172f249
...
...
@@ -71,7 +71,7 @@ void testIm2col() {
context
=
new
paddle
::
platform
::
CPUDeviceContext
(
paddle
::
platform
::
CPUPlace
());
}
else
{
#ifdef PADDLE_WITH_
GPU
#ifdef PADDLE_WITH_
CUDA
context
=
new
paddle
::
platform
::
CUDADeviceContext
(
paddle
::
platform
::
GPUPlace
());
#else
...
...
@@ -116,7 +116,7 @@ void testIm2col() {
TEST
(
math
,
im2col
)
{
testIm2col
<
paddle
::
platform
::
CPUPlace
>
();
#ifdef PADDLE_WITH_
GPU
#ifdef PADDLE_WITH_
CUDA
testIm2col
<
paddle
::
platform
::
GPUPlace
>
();
#endif
}
paddle/operators/math/math_function_test.cc
浏览文件 @
1172f249
#include "paddle/operators/math/math_function.h"
#include "gtest/gtest.h"
#ifdef PADDLE_WITH_
GPU
#ifdef PADDLE_WITH_
CUDA
TEST
(
math_function
,
notrans_mul_trans
)
{
paddle
::
framework
::
Tensor
input1
;
paddle
::
framework
::
Tensor
input1_gpu
;
...
...
paddle/operators/strided_memcpy_test.cc
浏览文件 @
1172f249
...
...
@@ -72,7 +72,7 @@ TEST(StridedMemcpy, CPUConcat) {
}
}
#ifdef PADDLE_WITH_
GPU
#ifdef PADDLE_WITH_
CUDA
TEST
(
StridedMemcpy
,
GPUCrop
)
{
// clang-format off
int
src
[]
=
{
...
...
paddle/platform/device_context.cc
浏览文件 @
1172f249
...
...
@@ -35,7 +35,7 @@ Eigen::DefaultDevice* CPUDeviceContext::eigen_device() const {
Place
CPUDeviceContext
::
GetPlace
()
const
{
return
CPUPlace
();
}
#ifdef PADDLE_WITH_
GPU
#ifdef PADDLE_WITH_
CUDA
template
<
>
Eigen
::
GpuDevice
*
...
...
paddle/platform/device_context.h
浏览文件 @
1172f249
...
...
@@ -14,7 +14,7 @@ limitations under the License. */
#include "paddle/platform/enforce.h"
#include "paddle/platform/place.h"
#ifdef PADDLE_WITH_
GPU
#ifdef PADDLE_WITH_
CUDA
#include "paddle/platform/dynload/cublas.h"
#include "paddle/platform/dynload/cudnn.h"
#include "paddle/platform/gpu_info.h"
...
...
@@ -61,7 +61,7 @@ class CPUDeviceContext : public DeviceContext {
std
::
unique_ptr
<
Eigen
::
DefaultDevice
>
eigen_device_
;
};
#ifdef PADDLE_WITH_
GPU
#ifdef PADDLE_WITH_
CUDA
template
<
>
struct
EigenDeviceConverter
<
platform
::
GPUPlace
>
{
using
EigenDeviceType
=
Eigen
::
GpuDevice
;
...
...
paddle/platform/enforce.h
浏览文件 @
1172f249
...
...
@@ -29,7 +29,7 @@ limitations under the License. */
#include <cxxabi.h> // for __cxa_demangle
#endif
#ifdef PADDLE_WITH_
GPU
#ifdef PADDLE_WITH_
CUDA
#include "paddle/platform/dynload/cublas.h"
#include "paddle/platform/dynload/cudnn.h"
...
...
@@ -113,7 +113,7 @@ inline typename std::enable_if<sizeof...(Args) != 0, void>::type throw_on_error(
}
}
#ifdef PADDLE_WITH_
GPU
#ifdef PADDLE_WITH_
CUDA
template
<
typename
...
Args
>
inline
typename
std
::
enable_if
<
sizeof
...(
Args
)
!=
0
,
void
>::
type
throw_on_error
(
...
...
paddle/platform/enforce_test.cc
浏览文件 @
1172f249
paddle/platform/gpu_info.h
浏览文件 @
1172f249
...
...
@@ -14,7 +14,7 @@ limitations under the License. */
#pragma once
#ifdef PADDLE_WITH_
GPU
#ifdef PADDLE_WITH_
CUDA
#include <cuda_runtime.h>
#include <stddef.h>
...
...
paddle/platform/variant.h
浏览文件 @
1172f249
...
...
@@ -16,7 +16,7 @@
#include <boost/config.hpp>
#ifdef PADDLE_WITH_
GPU
#ifdef PADDLE_WITH_
CUDA
// Because boost's variadic templates has bug on nvcc, boost will disable
// variadic template support when GPU enabled on nvcc.
...
...
paddle/pserver/test/SocketTest.cpp
浏览文件 @
1172f249
...
...
@@ -215,7 +215,7 @@ int main(int argc, char** argv) {
uint64_t
dataSize
=
FLAGS_dim
*
sizeof
(
real
);
#ifdef PADDLE_WITH_
GPU
#ifdef PADDLE_WITH_
CUDA
GpuVector
gpuParam
(
FLAGS_dim
);
GpuVector
gpuGrad
(
FLAGS_dim
);
#else
...
...
paddle/pserver/test/test_ProtoServer.cpp
浏览文件 @
1172f249
...
...
@@ -99,7 +99,7 @@ TEST(ProtoServer, regular) {
}
TEST
(
ProtoServer
,
extended
)
{
#ifdef PADDLE_WITH_
GPU
#ifdef PADDLE_WITH_
CUDA
ProtoClient
*
client
;
if
(
FLAGS_rdma_tcp
==
"rdma"
)
client
=
new
ProtoClient
(
FLAGS_server_addr
,
FLAGS_port
,
F_RDMA
);
...
...
paddle/pybind/pybind.cc
浏览文件 @
1172f249
...
...
@@ -34,7 +34,7 @@ static size_t UniqueIntegerGenerator() {
}
bool
IsCompileGPU
()
{
#ifndef PADDLE_WITH_
GPU
#ifndef PADDLE_WITH_
CUDA
return
false
;
#else
return
true
;
...
...
@@ -78,7 +78,7 @@ PYBIND11_PLUGIN(core) {
.
def
(
"set"
,
PyCPUTensorSetFromArray
<
float
>
)
.
def
(
"set"
,
PyCPUTensorSetFromArray
<
int
>
)
.
def
(
"set"
,
PyCPUTensorSetFromArray
<
double
>
)
#ifdef PADDLE_WITH_
GPU
#ifdef PADDLE_WITH_
CUDA
.
def
(
"set"
,
PyCUDATensorSetFromArray
<
float
>
)
.
def
(
"set"
,
PyCUDATensorSetFromArray
<
int
>
)
.
def
(
"set"
,
PyCUDATensorSetFromArray
<
double
>
)
...
...
@@ -96,7 +96,7 @@ PYBIND11_PLUGIN(core) {
.
def
(
"__init__"
,
[](
LoDTensor
&
instance
,
const
std
::
vector
<
std
::
vector
<
size_t
>>
&
lod
)
{
#ifndef PADDLE_WITH_
GPU
#ifndef PADDLE_WITH_
CUDA
new
(
&
instance
)
LoDTensor
(
lod
);
#else
LoD
new_lod
;
...
...
@@ -107,7 +107,7 @@ PYBIND11_PLUGIN(core) {
})
.
def
(
"set_lod"
,
[](
LoDTensor
&
self
,
const
std
::
vector
<
std
::
vector
<
size_t
>>
&
lod
)
{
#ifndef PADDLE_WITH_
GPU
#ifndef PADDLE_WITH_
CUDA
self
.
set_lod
(
lod
);
#else
LoD
new_lod
;
...
...
@@ -117,7 +117,7 @@ PYBIND11_PLUGIN(core) {
#endif
})
.
def
(
"lod"
,
[](
LoDTensor
&
self
)
->
std
::
vector
<
std
::
vector
<
size_t
>>
{
#ifndef PADDLE_WITH_
GPU
#ifndef PADDLE_WITH_
CUDA
return
self
.
lod
();
#else
auto
lod
=
self
.
lod
();
...
...
@@ -203,7 +203,7 @@ All parameter, weight, gradient are variables in Paddle.
.
def_static
(
"create"
,
[](
paddle
::
platform
::
GPUPlace
&
place
)
->
paddle
::
platform
::
DeviceContext
*
{
#ifndef PADDLE_WITH_
GPU
#ifndef PADDLE_WITH_
CUDA
PADDLE_THROW
(
"GPUPlace is not supported in CPU device."
);
#else
return
new
paddle
::
platform
::
CUDADeviceContext
(
place
);
...
...
paddle/pybind/tensor_py.h
浏览文件 @
1172f249
...
...
@@ -106,7 +106,7 @@ void PyCPUTensorSetFromArray(
std
::
memcpy
(
dst
,
array
.
data
(),
sizeof
(
T
)
*
array
.
size
());
}
#ifdef PADDLE_WITH_
GPU
#ifdef PADDLE_WITH_
CUDA
template
<
typename
T
>
void
PyCUDATensorSetFromArray
(
framework
::
Tensor
&
self
,
...
...
paddle/string/to_string_test.cc
浏览文件 @
1172f249
paddle/trainer/MergeModel.cpp
浏览文件 @
1172f249
...
...
@@ -29,7 +29,7 @@ int main(int argc, char** argv) {
initMain
(
argc
,
argv
);
initPython
(
argc
,
argv
);
string
confFile
=
TrainerConfigHelper
::
getConfigNameFromPath
(
FLAGS_model_dir
);
#ifndef PADDLE_WITH_
GPU
#ifndef PADDLE_WITH_
CUDA
FLAGS_use_gpu
=
false
;
#endif
auto
config
=
std
::
make_shared
<
TrainerConfigHelper
>
(
confFile
);
...
...
paddle/trainer/tests/test_Compare.cpp
浏览文件 @
1172f249
...
...
@@ -146,7 +146,7 @@ void compareGradient(comData& comDataCpu, comData& comDataGpu) {
}
int
main
(
int
argc
,
char
**
argv
)
{
#ifndef PADDLE_WITH_
GPU
#ifndef PADDLE_WITH_
CUDA
exit
(
0
);
#endif
paddle
::
initMain
(
argc
,
argv
);
...
...
paddle/trainer/tests/test_CompareSparse.cpp
浏览文件 @
1172f249
...
...
@@ -174,7 +174,7 @@ TEST(compareSparse, multiGradientMachine) {
FLAGS_local
=
local
;
FLAGS_ports_num_for_sparse
=
5
;
for
(
bool
useGpu
:
{
false
,
true
})
{
#ifndef PADDLE_WITH_
GPU
#ifndef PADDLE_WITH_
CUDA
if
(
useGpu
)
continue
;
#endif
FLAGS_parallel_nn
=
useGpu
;
...
...
@@ -198,7 +198,7 @@ TEST(compareSparse, NeuralNetwork) {
FLAGS_local
=
local
;
FLAGS_ports_num_for_sparse
=
5
;
for
(
bool
useGpu
:
{
false
,
true
})
{
#ifndef PADDLE_WITH_
GPU
#ifndef PADDLE_WITH_
CUDA
if
(
useGpu
)
continue
;
#endif
FLAGS_parallel_nn
=
useGpu
;
...
...
paddle/trainer/tests/test_Trainer.cpp
浏览文件 @
1172f249
...
...
@@ -51,7 +51,7 @@ void checkGradientTest(const string& configFile,
TEST
(
checkGradient
,
cpu
)
{
checkGradientTest
(
configFile1
,
false
,
false
);
}
#ifdef PADDLE_WITH_
GPU
#ifdef PADDLE_WITH_
CUDA
TEST
(
checkGradient
,
gpu
)
{
checkGradientTest
(
configFile1
,
true
,
false
);
}
TEST
(
checkGradient
,
multiGpu
)
{
...
...
@@ -97,7 +97,7 @@ TEST(checkGradient, hsigmoid) { checkGradientTest(configFile2, false, false); }
TEST
(
checkGradient
,
chunk
)
{
checkGradientTest
(
configFile3
,
false
,
false
);
#ifdef PADDLE_WITH_
GPU
#ifdef PADDLE_WITH_
CUDA
checkGradientTest
(
configFile3
,
true
,
true
);
#endif
}
...
...
paddle/trainer/tests/test_TrainerOnePass.cpp
浏览文件 @
1172f249
...
...
@@ -79,7 +79,7 @@ void trainerOnePassTest(const string& configFile,
// 1. test trainer (cpu, gpu).
TEST
(
trainerOnePass
,
cpu
)
{
trainerOnePassTest
(
configFile1
,
false
,
false
);
}
#ifdef PADDLE_WITH_
GPU
#ifdef PADDLE_WITH_
CUDA
TEST
(
trainerOnePass
,
gpu
)
{
trainerOnePassTest
(
configFile1
,
true
,
false
);
}
TEST
(
trainerOnePass
,
gpu2
)
{
trainerOnePassTest
(
configFile1
,
true
,
false
,
2
);
}
...
...
@@ -94,7 +94,7 @@ TEST(trainerOnePass, parallel) {
#endif
// 2. test average_window.
#ifdef PADDLE_WITH_
GPU
#ifdef PADDLE_WITH_
CUDA
TEST
(
average_window
,
gpu
)
{
trainerOnePassTest
(
configFile1
,
true
,
false
,
4
,
0.01
);
}
...
...
@@ -266,7 +266,7 @@ TEST(checkRemoteUpdater, cpuTrainerOldUpdater) {
checkRemoteParameterUpdaterTest
(
configFile1
,
false
,
false
,
1
,
true
);
}
#ifdef PADDLE_WITH_
GPU
#ifdef PADDLE_WITH_
CUDA
TEST
(
checkRemoteUpdater
,
gpuTrainer
)
{
checkRemoteParameterUpdaterTest
(
configFile1
,
true
,
false
);
}
...
...
paddle/trainer/tests/test_recurrent_machine_generation.cpp
浏览文件 @
1172f249
...
...
@@ -113,7 +113,7 @@ void testGeneration(const string& configFile,
#ifndef PADDLE_TYPE_DOUBLE
TEST
(
RecurrentGradientMachine
,
test_generation
)
{
#ifndef PADDLE_WITH_
GPU
#ifndef PADDLE_WITH_
CUDA
const
auto
useGpuConfs
=
{
false
};
#else
const
auto
useGpuConfs
=
{
true
,
false
};
...
...
paddle/utils/Flags.cpp
浏览文件 @
1172f249
...
...
@@ -14,7 +14,7 @@ limitations under the License. */
#include "Flags.h"
#ifndef PADDLE_WITH_
GPU
#ifndef PADDLE_WITH_
CUDA
DEFINE_bool
(
use_gpu
,
false
,
"Only support CPU training"
);
#else
DEFINE_bool
(
use_gpu
,
true
,
"Whether to use GPU for training"
);
...
...
paddle/utils/Util.h
浏览文件 @
1172f249
...
...
@@ -218,7 +218,7 @@ protected:
* *d2* is peer device to enable direct access to by the d1 device.
*/
inline
void
enablePeerAccess
(
int
d1
,
int
d2
)
{
#ifdef PADDLE_WITH_
GPU
#ifdef PADDLE_WITH_
CUDA
if
(
hl_device_can_access_peer
(
d1
,
d2
))
{
SetDevice
dev
(
d1
);
hl_device_enable_peer_access
(
d2
);
...
...
paddle/utils/Version.h
浏览文件 @
1172f249
...
...
@@ -48,7 +48,7 @@ void printVersion(std::ostream& os);
* @return return true if paddle compiled with GPU
*/
constexpr
bool
isWithGpu
()
{
#ifndef PADDLE_WITH_
GPU
#ifndef PADDLE_WITH_
CUDA
return
false
;
#else
return
true
;
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录