Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
BaiXuePrincess
Paddle
提交
1172f249
P
Paddle
项目概览
BaiXuePrincess
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
1172f249
编写于
10月 04, 2017
作者:
Y
Yi Wang
提交者:
GitHub
10月 04, 2017
浏览文件
操作
浏览文件
下载
差异文件
Merge pull request #4590 from wangkuiyi/paddle_only_cpu
Add -D PADDLE_WITH_CUDA in cmake/configure.cmake
上级
c0511c8a
880b874b
变更
89
显示空白变更内容
内联
并排
Showing
89 changed file
with
136 addition
and
135 deletion
+136
-135
cmake/configure.cmake
cmake/configure.cmake
+2
-1
paddle/api/Util.cpp
paddle/api/Util.cpp
+1
-1
paddle/capi/Matrix.cpp
paddle/capi/Matrix.cpp
+1
-1
paddle/framework/grad_op_builder_test.cc
paddle/framework/grad_op_builder_test.cc
+1
-1
paddle/framework/lod_tensor.h
paddle/framework/lod_tensor.h
+2
-2
paddle/framework/op_proto_maker_test.cc
paddle/framework/op_proto_maker_test.cc
+1
-1
paddle/framework/op_registry.h
paddle/framework/op_registry.h
+1
-1
paddle/framework/op_registry_test.cc
paddle/framework/op_registry_test.cc
+1
-1
paddle/framework/operator.cc
paddle/framework/operator.cc
+1
-1
paddle/framework/tensor_impl.h
paddle/framework/tensor_impl.h
+2
-2
paddle/framework/tensor_test.cc
paddle/framework/tensor_test.cc
+4
-4
paddle/function/BlockExpandOp.cpp
paddle/function/BlockExpandOp.cpp
+1
-1
paddle/function/ContextProjectionOp.cpp
paddle/function/ContextProjectionOp.cpp
+1
-1
paddle/function/CosSimOp.cpp
paddle/function/CosSimOp.cpp
+1
-1
paddle/function/CropOp.cpp
paddle/function/CropOp.cpp
+1
-1
paddle/function/CrossMapNormalOp.cpp
paddle/function/CrossMapNormalOp.cpp
+1
-1
paddle/function/DepthwiseConvOp.cpp
paddle/function/DepthwiseConvOp.cpp
+1
-1
paddle/function/DepthwiseConvOpTest.cpp
paddle/function/DepthwiseConvOpTest.cpp
+1
-1
paddle/function/GemmConvOp.cpp
paddle/function/GemmConvOp.cpp
+1
-1
paddle/function/GemmConvOpTest.cpp
paddle/function/GemmConvOpTest.cpp
+1
-1
paddle/function/Im2ColTest.cpp
paddle/function/Im2ColTest.cpp
+1
-1
paddle/function/MulOp.cpp
paddle/function/MulOp.cpp
+1
-1
paddle/function/PadOp.cpp
paddle/function/PadOp.cpp
+1
-1
paddle/function/RowConvOp.cpp
paddle/function/RowConvOp.cpp
+1
-1
paddle/function/SwitchOp.cpp
paddle/function/SwitchOp.cpp
+1
-1
paddle/gserver/layers/BatchNormBaseLayer.cpp
paddle/gserver/layers/BatchNormBaseLayer.cpp
+1
-1
paddle/gserver/layers/BatchNormalizationLayer.cpp
paddle/gserver/layers/BatchNormalizationLayer.cpp
+3
-3
paddle/gserver/layers/PoolLayer.cpp
paddle/gserver/layers/PoolLayer.cpp
+2
-2
paddle/gserver/tests/LayerGradUtil.cpp
paddle/gserver/tests/LayerGradUtil.cpp
+1
-1
paddle/gserver/tests/test_BatchNorm.cpp
paddle/gserver/tests/test_BatchNorm.cpp
+1
-1
paddle/gserver/tests/test_ConvUnify.cpp
paddle/gserver/tests/test_ConvUnify.cpp
+1
-1
paddle/gserver/tests/test_DetectionOutput.cpp
paddle/gserver/tests/test_DetectionOutput.cpp
+1
-1
paddle/gserver/tests/test_Evaluator.cpp
paddle/gserver/tests/test_Evaluator.cpp
+1
-1
paddle/gserver/tests/test_KmaxSeqScore.cpp
paddle/gserver/tests/test_KmaxSeqScore.cpp
+1
-1
paddle/gserver/tests/test_LayerGrad.cpp
paddle/gserver/tests/test_LayerGrad.cpp
+13
-13
paddle/gserver/tests/test_NetworkCompare.cpp
paddle/gserver/tests/test_NetworkCompare.cpp
+1
-1
paddle/gserver/tests/test_PriorBox.cpp
paddle/gserver/tests/test_PriorBox.cpp
+1
-1
paddle/gserver/tests/test_ProtoDataProvider.cpp
paddle/gserver/tests/test_ProtoDataProvider.cpp
+3
-3
paddle/gserver/tests/test_PyDataProvider.cpp
paddle/gserver/tests/test_PyDataProvider.cpp
+2
-2
paddle/gserver/tests/test_SelectiveFCLayer.cpp
paddle/gserver/tests/test_SelectiveFCLayer.cpp
+4
-4
paddle/gserver/tests/test_SeqSliceLayerGrad.cpp
paddle/gserver/tests/test_SeqSliceLayerGrad.cpp
+1
-1
paddle/gserver/tests/test_WarpCTCLayer.cpp
paddle/gserver/tests/test_WarpCTCLayer.cpp
+1
-1
paddle/math/Matrix.cpp
paddle/math/Matrix.cpp
+3
-3
paddle/math/SparseMatrix.cpp
paddle/math/SparseMatrix.cpp
+1
-1
paddle/math/Vector.cpp
paddle/math/Vector.cpp
+3
-3
paddle/math/tests/test_Allocator.cpp
paddle/math/tests/test_Allocator.cpp
+2
-2
paddle/math/tests/test_BaseMatrix.cpp
paddle/math/tests/test_BaseMatrix.cpp
+1
-1
paddle/math/tests/test_CpuGpuVector.cpp
paddle/math/tests/test_CpuGpuVector.cpp
+1
-1
paddle/math/tests/test_ExecViaCpu.cpp
paddle/math/tests/test_ExecViaCpu.cpp
+1
-1
paddle/math/tests/test_GpuProfiler.cpp
paddle/math/tests/test_GpuProfiler.cpp
+1
-1
paddle/math/tests/test_Matrix.cpp
paddle/math/tests/test_Matrix.cpp
+1
-1
paddle/math/tests/test_SparseMatrix.cpp
paddle/math/tests/test_SparseMatrix.cpp
+3
-3
paddle/math/tests/test_TrainingAlgorithm.cpp
paddle/math/tests/test_TrainingAlgorithm.cpp
+1
-1
paddle/math/tests/test_batchTranspose.cpp
paddle/math/tests/test_batchTranspose.cpp
+1
-1
paddle/math/tests/test_matrixCompare.cpp
paddle/math/tests/test_matrixCompare.cpp
+1
-1
paddle/math/tests/test_perturbation.cpp
paddle/math/tests/test_perturbation.cpp
+1
-1
paddle/math/tests/test_sparseMatrixCompare.cpp
paddle/math/tests/test_sparseMatrixCompare.cpp
+1
-1
paddle/memory/detail/buddy_allocator.cc
paddle/memory/detail/buddy_allocator.cc
+1
-1
paddle/memory/detail/system_allocator.cc
paddle/memory/detail/system_allocator.cc
+1
-1
paddle/memory/detail/system_allocator.h
paddle/memory/detail/system_allocator.h
+1
-1
paddle/memory/detail/system_allocator_test.cc
paddle/memory/detail/system_allocator_test.cc
+1
-1
paddle/memory/memcpy.cc
paddle/memory/memcpy.cc
+1
-1
paddle/memory/memcpy.h
paddle/memory/memcpy.h
+1
-1
paddle/memory/memory.cc
paddle/memory/memory.cc
+1
-1
paddle/memory/memory_test.cc
paddle/memory/memory_test.cc
+1
-1
paddle/operators/detail/strided_memcpy.h
paddle/operators/detail/strided_memcpy.h
+1
-1
paddle/operators/math/im2col_test.cc
paddle/operators/math/im2col_test.cc
+2
-2
paddle/operators/math/math_function_test.cc
paddle/operators/math/math_function_test.cc
+1
-1
paddle/operators/strided_memcpy_test.cc
paddle/operators/strided_memcpy_test.cc
+2
-2
paddle/platform/device_context.cc
paddle/platform/device_context.cc
+1
-1
paddle/platform/device_context.h
paddle/platform/device_context.h
+2
-2
paddle/platform/enforce.h
paddle/platform/enforce.h
+2
-2
paddle/platform/enforce_test.cc
paddle/platform/enforce_test.cc
+1
-1
paddle/platform/gpu_info.h
paddle/platform/gpu_info.h
+1
-1
paddle/platform/variant.h
paddle/platform/variant.h
+1
-1
paddle/pserver/test/SocketTest.cpp
paddle/pserver/test/SocketTest.cpp
+1
-1
paddle/pserver/test/test_ProtoServer.cpp
paddle/pserver/test/test_ProtoServer.cpp
+1
-1
paddle/pybind/pybind.cc
paddle/pybind/pybind.cc
+6
-6
paddle/pybind/tensor_py.h
paddle/pybind/tensor_py.h
+1
-1
paddle/string/to_string_test.cc
paddle/string/to_string_test.cc
+1
-1
paddle/trainer/MergeModel.cpp
paddle/trainer/MergeModel.cpp
+1
-1
paddle/trainer/tests/test_Compare.cpp
paddle/trainer/tests/test_Compare.cpp
+1
-1
paddle/trainer/tests/test_CompareSparse.cpp
paddle/trainer/tests/test_CompareSparse.cpp
+2
-2
paddle/trainer/tests/test_Trainer.cpp
paddle/trainer/tests/test_Trainer.cpp
+2
-2
paddle/trainer/tests/test_TrainerOnePass.cpp
paddle/trainer/tests/test_TrainerOnePass.cpp
+3
-3
paddle/trainer/tests/test_recurrent_machine_generation.cpp
paddle/trainer/tests/test_recurrent_machine_generation.cpp
+1
-1
paddle/utils/Flags.cpp
paddle/utils/Flags.cpp
+1
-1
paddle/utils/Util.h
paddle/utils/Util.h
+1
-1
paddle/utils/Version.h
paddle/utils/Version.h
+1
-1
未找到文件。
cmake/configure.cmake
浏览文件 @
1172f249
...
@@ -53,7 +53,8 @@ if(NOT WITH_GPU)
...
@@ -53,7 +53,8 @@ if(NOT WITH_GPU)
list
(
APPEND CMAKE_CXX_SOURCE_FILE_EXTENSIONS cu
)
list
(
APPEND CMAKE_CXX_SOURCE_FILE_EXTENSIONS cu
)
else
()
else
()
add_definitions
(
-DPADDLE_WITH_GPU
)
add_definitions
(
-DPADDLE_WITH_CUDA
)
FIND_PACKAGE
(
CUDA REQUIRED
)
FIND_PACKAGE
(
CUDA REQUIRED
)
if
(
${
CUDA_VERSION_MAJOR
}
VERSION_LESS 7
)
if
(
${
CUDA_VERSION_MAJOR
}
VERSION_LESS 7
)
...
...
paddle/api/Util.cpp
浏览文件 @
1172f249
...
@@ -47,7 +47,7 @@ bool isUsingGpu() { return FLAGS_use_gpu; }
...
@@ -47,7 +47,7 @@ bool isUsingGpu() { return FLAGS_use_gpu; }
void
setUseGpu
(
bool
useGpu
)
{
FLAGS_use_gpu
=
useGpu
;
}
void
setUseGpu
(
bool
useGpu
)
{
FLAGS_use_gpu
=
useGpu
;
}
bool
isGpuVersion
()
{
bool
isGpuVersion
()
{
#ifndef PADDLE_WITH_
GPU
#ifndef PADDLE_WITH_
CUDA
return
false
;
return
false
;
#else
#else
return
true
;
return
true
;
...
...
paddle/capi/Matrix.cpp
浏览文件 @
1172f249
...
@@ -46,7 +46,7 @@ paddle_error paddle_matrix_set_row(paddle_matrix mat,
...
@@ -46,7 +46,7 @@ paddle_error paddle_matrix_set_row(paddle_matrix mat,
if
(
rowID
>=
ptr
->
mat
->
getHeight
())
return
kPD_OUT_OF_RANGE
;
if
(
rowID
>=
ptr
->
mat
->
getHeight
())
return
kPD_OUT_OF_RANGE
;
paddle
::
real
*
buf
=
ptr
->
mat
->
getRowBuf
(
rowID
);
paddle
::
real
*
buf
=
ptr
->
mat
->
getRowBuf
(
rowID
);
size_t
width
=
ptr
->
mat
->
getWidth
();
size_t
width
=
ptr
->
mat
->
getWidth
();
#ifdef PADDLE_WITH_
GPU
#ifdef PADDLE_WITH_
CUDA
hl_memcpy
(
buf
,
rowArray
,
sizeof
(
paddle
::
real
)
*
width
);
hl_memcpy
(
buf
,
rowArray
,
sizeof
(
paddle
::
real
)
*
width
);
#else
#else
std
::
copy
(
rowArray
,
rowArray
+
width
,
buf
);
std
::
copy
(
rowArray
,
rowArray
+
width
,
buf
);
...
...
paddle/framework/grad_op_builder_test.cc
浏览文件 @
1172f249
paddle/framework/lod_tensor.h
浏览文件 @
1172f249
...
@@ -15,7 +15,7 @@
...
@@ -15,7 +15,7 @@
#pragma once
#pragma once
#include <memory>
#include <memory>
#ifdef PADDLE_WITH_
GPU
#ifdef PADDLE_WITH_
CUDA
#include <thrust/device_vector.h>
#include <thrust/device_vector.h>
#include <thrust/host_vector.h>
#include <thrust/host_vector.h>
#include <thrust/system/cuda/experimental/pinned_allocator.h>
#include <thrust/system/cuda/experimental/pinned_allocator.h>
...
@@ -29,7 +29,7 @@
...
@@ -29,7 +29,7 @@
namespace
paddle
{
namespace
paddle
{
namespace
framework
{
namespace
framework
{
#ifndef PADDLE_WITH_
GPU
#ifndef PADDLE_WITH_
CUDA
template
<
typename
T
>
template
<
typename
T
>
using
Vector
=
std
::
vector
<
T
>
;
using
Vector
=
std
::
vector
<
T
>
;
#else
#else
...
...
paddle/framework/op_proto_maker_test.cc
浏览文件 @
1172f249
paddle/framework/op_registry.h
浏览文件 @
1172f249
...
@@ -211,7 +211,7 @@ class OpKernelRegistrar : public Registrar {
...
@@ -211,7 +211,7 @@ class OpKernelRegistrar : public Registrar {
// TODO(fengjiayi): The following macros
// TODO(fengjiayi): The following macros
// seems ugly, do we have better method?
// seems ugly, do we have better method?
#ifndef PADDLE_WITH_
GPU
#ifndef PADDLE_WITH_
CUDA
#define USE_OP_KERNEL(op_type) USE_OP_DEVICE_KERNEL(op_type, CPU)
#define USE_OP_KERNEL(op_type) USE_OP_DEVICE_KERNEL(op_type, CPU)
#else
#else
#define USE_OP_KERNEL(op_type) \
#define USE_OP_KERNEL(op_type) \
...
...
paddle/framework/op_registry_test.cc
浏览文件 @
1172f249
paddle/framework/operator.cc
浏览文件 @
1172f249
...
@@ -25,7 +25,7 @@ Eigen::DefaultDevice& ExecutionContext::GetEigenDevice<
...
@@ -25,7 +25,7 @@ Eigen::DefaultDevice& ExecutionContext::GetEigenDevice<
return
*
device_context_
.
GetEigenDevice
<
platform
::
CPUPlace
>
();
return
*
device_context_
.
GetEigenDevice
<
platform
::
CPUPlace
>
();
}
}
#ifdef PADDLE_WITH_
GPU
#ifdef PADDLE_WITH_
CUDA
template
<
>
template
<
>
Eigen
::
GpuDevice
&
Eigen
::
GpuDevice
&
ExecutionContext
::
GetEigenDevice
<
platform
::
GPUPlace
,
Eigen
::
GpuDevice
>
()
const
{
ExecutionContext
::
GetEigenDevice
<
platform
::
GPUPlace
,
Eigen
::
GpuDevice
>
()
const
{
...
...
paddle/framework/tensor_impl.h
浏览文件 @
1172f249
...
@@ -65,7 +65,7 @@ inline T* Tensor::mutable_data(platform::Place place) {
...
@@ -65,7 +65,7 @@ inline T* Tensor::mutable_data(platform::Place place) {
holder_
.
reset
(
new
PlaceholderImpl
<
T
,
platform
::
CPUPlace
>
(
holder_
.
reset
(
new
PlaceholderImpl
<
T
,
platform
::
CPUPlace
>
(
boost
::
get
<
platform
::
CPUPlace
>
(
place
),
size
));
boost
::
get
<
platform
::
CPUPlace
>
(
place
),
size
));
}
else
if
(
platform
::
is_gpu_place
(
place
))
{
}
else
if
(
platform
::
is_gpu_place
(
place
))
{
#ifndef PADDLE_WITH_
GPU
#ifndef PADDLE_WITH_
CUDA
PADDLE_THROW
(
"'GPUPlace' is not supported in CPU only device."
);
PADDLE_THROW
(
"'GPUPlace' is not supported in CPU only device."
);
}
}
#else
#else
...
@@ -103,7 +103,7 @@ inline void Tensor::CopyFrom(const Tensor& src,
...
@@ -103,7 +103,7 @@ inline void Tensor::CopyFrom(const Tensor& src,
memory
::
Copy
(
boost
::
get
<
platform
::
CPUPlace
>
(
dst_place
),
dst_ptr
,
memory
::
Copy
(
boost
::
get
<
platform
::
CPUPlace
>
(
dst_place
),
dst_ptr
,
boost
::
get
<
platform
::
CPUPlace
>
(
src_place
),
src_ptr
,
size
);
boost
::
get
<
platform
::
CPUPlace
>
(
src_place
),
src_ptr
,
size
);
}
}
#ifdef PADDLE_WITH_
GPU
#ifdef PADDLE_WITH_
CUDA
else
if
(
platform
::
is_gpu_place
(
src_place
)
&&
else
if
(
platform
::
is_gpu_place
(
src_place
)
&&
platform
::
is_cpu_place
(
dst_place
))
{
platform
::
is_cpu_place
(
dst_place
))
{
memory
::
Copy
(
boost
::
get
<
platform
::
CPUPlace
>
(
dst_place
),
dst_ptr
,
memory
::
Copy
(
boost
::
get
<
platform
::
CPUPlace
>
(
dst_place
),
dst_ptr
,
...
...
paddle/framework/tensor_test.cc
浏览文件 @
1172f249
...
@@ -74,7 +74,7 @@ TEST(Tensor, MutableData) {
...
@@ -74,7 +74,7 @@ TEST(Tensor, MutableData) {
EXPECT_EQ
(
p1
,
p2
);
EXPECT_EQ
(
p1
,
p2
);
}
}
#ifdef PADDLE_WITH_
GPU
#ifdef PADDLE_WITH_
CUDA
{
{
Tensor
src_tensor
;
Tensor
src_tensor
;
float
*
p1
=
nullptr
;
float
*
p1
=
nullptr
;
...
@@ -126,7 +126,7 @@ TEST(Tensor, ShareDataWith) {
...
@@ -126,7 +126,7 @@ TEST(Tensor, ShareDataWith) {
ASSERT_EQ
(
src_tensor
.
data
<
int
>
(),
dst_tensor
.
data
<
int
>
());
ASSERT_EQ
(
src_tensor
.
data
<
int
>
(),
dst_tensor
.
data
<
int
>
());
}
}
#ifdef PADDLE_WITH_
GPU
#ifdef PADDLE_WITH_
CUDA
{
{
Tensor
src_tensor
;
Tensor
src_tensor
;
Tensor
dst_tensor
;
Tensor
dst_tensor
;
...
@@ -163,7 +163,7 @@ TEST(Tensor, Slice) {
...
@@ -163,7 +163,7 @@ TEST(Tensor, Slice) {
EXPECT_EQ
(
src_data_address
+
3
*
4
*
1
*
sizeof
(
int
),
slice_data_address
);
EXPECT_EQ
(
src_data_address
+
3
*
4
*
1
*
sizeof
(
int
),
slice_data_address
);
}
}
#ifdef PADDLE_WITH_
GPU
#ifdef PADDLE_WITH_
CUDA
{
{
Tensor
src_tensor
;
Tensor
src_tensor
;
src_tensor
.
mutable_data
<
double
>
(
make_ddim
({
6
,
9
}),
GPUPlace
());
src_tensor
.
mutable_data
<
double
>
(
make_ddim
({
6
,
9
}),
GPUPlace
());
...
@@ -218,7 +218,7 @@ TEST(Tensor, CopyFrom) {
...
@@ -218,7 +218,7 @@ TEST(Tensor, CopyFrom) {
EXPECT_EQ
(
dst_ptr
[
i
],
slice_ptr
[
i
]);
EXPECT_EQ
(
dst_ptr
[
i
],
slice_ptr
[
i
]);
}
}
}
}
#ifdef PADDLE_WITH_
GPU
#ifdef PADDLE_WITH_
CUDA
{
{
Tensor
src_tensor
;
Tensor
src_tensor
;
Tensor
gpu_tensor
;
Tensor
gpu_tensor
;
...
...
paddle/function/BlockExpandOp.cpp
浏览文件 @
1172f249
...
@@ -194,7 +194,7 @@ public:
...
@@ -194,7 +194,7 @@ public:
REGISTER_TYPED_FUNC
(
BlockExpand
,
CPU
,
BlockExpandForward
);
REGISTER_TYPED_FUNC
(
BlockExpand
,
CPU
,
BlockExpandForward
);
REGISTER_TYPED_FUNC
(
BlockExpandGrad
,
CPU
,
BlockExpandBackward
);
REGISTER_TYPED_FUNC
(
BlockExpandGrad
,
CPU
,
BlockExpandBackward
);
#ifdef PADDLE_WITH_
GPU
#ifdef PADDLE_WITH_
CUDA
REGISTER_TYPED_FUNC
(
BlockExpand
,
GPU
,
BlockExpandForward
);
REGISTER_TYPED_FUNC
(
BlockExpand
,
GPU
,
BlockExpandForward
);
REGISTER_TYPED_FUNC
(
BlockExpandGrad
,
GPU
,
BlockExpandBackward
);
REGISTER_TYPED_FUNC
(
BlockExpandGrad
,
GPU
,
BlockExpandBackward
);
#endif
#endif
...
...
paddle/function/ContextProjectionOp.cpp
浏览文件 @
1172f249
...
@@ -395,7 +395,7 @@ REGISTER_TYPED_FUNC(ContextProjectionForward,
...
@@ -395,7 +395,7 @@ REGISTER_TYPED_FUNC(ContextProjectionForward,
REGISTER_TYPED_FUNC
(
ContextProjectionBackward
,
REGISTER_TYPED_FUNC
(
ContextProjectionBackward
,
CPU
,
CPU
,
ContextProjectionBackwardFunc
);
ContextProjectionBackwardFunc
);
#ifdef PADDLE_WITH_
GPU
#ifdef PADDLE_WITH_
CUDA
REGISTER_TYPED_FUNC
(
ContextProjectionForward
,
REGISTER_TYPED_FUNC
(
ContextProjectionForward
,
GPU
,
GPU
,
ContextProjectionForwardFunc
);
ContextProjectionForwardFunc
);
...
...
paddle/function/CosSimOp.cpp
浏览文件 @
1172f249
...
@@ -233,7 +233,7 @@ private:
...
@@ -233,7 +233,7 @@ private:
REGISTER_TYPED_FUNC
(
CosSimForward
,
CPU
,
CosSimForwardFunc
);
REGISTER_TYPED_FUNC
(
CosSimForward
,
CPU
,
CosSimForwardFunc
);
REGISTER_TYPED_FUNC
(
CosSimBackward
,
CPU
,
CosSimBackwardFunc
);
REGISTER_TYPED_FUNC
(
CosSimBackward
,
CPU
,
CosSimBackwardFunc
);
#ifdef PADDLE_WITH_
GPU
#ifdef PADDLE_WITH_
CUDA
REGISTER_TYPED_FUNC
(
CosSimForward
,
GPU
,
CosSimForwardFunc
);
REGISTER_TYPED_FUNC
(
CosSimForward
,
GPU
,
CosSimForwardFunc
);
REGISTER_TYPED_FUNC
(
CosSimBackward
,
GPU
,
CosSimBackwardFunc
);
REGISTER_TYPED_FUNC
(
CosSimBackward
,
GPU
,
CosSimBackwardFunc
);
#endif
#endif
...
...
paddle/function/CropOp.cpp
浏览文件 @
1172f249
...
@@ -169,7 +169,7 @@ private:
...
@@ -169,7 +169,7 @@ private:
REGISTER_TYPED_FUNC
(
Crop
,
CPU
,
CropFunc
);
REGISTER_TYPED_FUNC
(
Crop
,
CPU
,
CropFunc
);
REGISTER_TYPED_FUNC
(
CropGrad
,
CPU
,
CropGradFunc
);
REGISTER_TYPED_FUNC
(
CropGrad
,
CPU
,
CropGradFunc
);
#ifdef PADDLE_WITH_
GPU
#ifdef PADDLE_WITH_
CUDA
REGISTER_TYPED_FUNC
(
Crop
,
GPU
,
CropFunc
);
REGISTER_TYPED_FUNC
(
Crop
,
GPU
,
CropFunc
);
REGISTER_TYPED_FUNC
(
CropGrad
,
GPU
,
CropGradFunc
);
REGISTER_TYPED_FUNC
(
CropGrad
,
GPU
,
CropGradFunc
);
#endif
#endif
...
...
paddle/function/CrossMapNormalOp.cpp
浏览文件 @
1172f249
...
@@ -336,7 +336,7 @@ private:
...
@@ -336,7 +336,7 @@ private:
REGISTER_TYPED_FUNC
(
CrossMapNormal
,
CPU
,
CrossMapNormalFunc
);
REGISTER_TYPED_FUNC
(
CrossMapNormal
,
CPU
,
CrossMapNormalFunc
);
REGISTER_TYPED_FUNC
(
CrossMapNormalGrad
,
CPU
,
CrossMapNormalGradFunc
);
REGISTER_TYPED_FUNC
(
CrossMapNormalGrad
,
CPU
,
CrossMapNormalGradFunc
);
#ifdef PADDLE_WITH_
GPU
#ifdef PADDLE_WITH_
CUDA
REGISTER_TYPED_FUNC
(
CrossMapNormal
,
GPU
,
CrossMapNormalFunc
);
REGISTER_TYPED_FUNC
(
CrossMapNormal
,
GPU
,
CrossMapNormalFunc
);
REGISTER_TYPED_FUNC
(
CrossMapNormalGrad
,
GPU
,
CrossMapNormalGradFunc
);
REGISTER_TYPED_FUNC
(
CrossMapNormalGrad
,
GPU
,
CrossMapNormalGradFunc
);
#endif
#endif
...
...
paddle/function/DepthwiseConvOp.cpp
浏览文件 @
1172f249
...
@@ -292,7 +292,7 @@ REGISTER_TYPED_FUNC(DepthwiseConvGradInput,
...
@@ -292,7 +292,7 @@ REGISTER_TYPED_FUNC(DepthwiseConvGradInput,
REGISTER_TYPED_FUNC
(
DepthwiseConvGradFilter
,
REGISTER_TYPED_FUNC
(
DepthwiseConvGradFilter
,
CPU
,
CPU
,
DepthwiseConvGradFilterFunction
);
DepthwiseConvGradFilterFunction
);
#ifdef PADDLE_WITH_
GPU
#ifdef PADDLE_WITH_
CUDA
REGISTER_TYPED_FUNC
(
DepthwiseConv
,
GPU
,
DepthwiseConvFunction
);
REGISTER_TYPED_FUNC
(
DepthwiseConv
,
GPU
,
DepthwiseConvFunction
);
REGISTER_TYPED_FUNC
(
DepthwiseConvGradInput
,
REGISTER_TYPED_FUNC
(
DepthwiseConvGradInput
,
GPU
,
GPU
,
...
...
paddle/function/DepthwiseConvOpTest.cpp
浏览文件 @
1172f249
...
@@ -17,7 +17,7 @@ limitations under the License. */
...
@@ -17,7 +17,7 @@ limitations under the License. */
namespace
paddle
{
namespace
paddle
{
#ifdef PADDLE_WITH_
GPU
#ifdef PADDLE_WITH_
CUDA
TEST
(
DepthwiseConv
,
Forward
)
{
TEST
(
DepthwiseConv
,
Forward
)
{
DepthwiseConvolution
<
DEVICE_TYPE_CPU
,
DEVICE_TYPE_GPU
>
(
DepthwiseConvolution
<
DEVICE_TYPE_CPU
,
DEVICE_TYPE_GPU
>
(
"GemmConv-CPU"
,
"DepthwiseConv-GPU"
,
forward
);
"GemmConv-CPU"
,
"DepthwiseConv-GPU"
,
forward
);
...
...
paddle/function/GemmConvOp.cpp
浏览文件 @
1172f249
...
@@ -340,7 +340,7 @@ public:
...
@@ -340,7 +340,7 @@ public:
REGISTER_TYPED_FUNC
(
GemmConv
,
CPU
,
GemmConvFunction
);
REGISTER_TYPED_FUNC
(
GemmConv
,
CPU
,
GemmConvFunction
);
REGISTER_TYPED_FUNC
(
GemmConvGradInput
,
CPU
,
GemmConvGradInputFunction
);
REGISTER_TYPED_FUNC
(
GemmConvGradInput
,
CPU
,
GemmConvGradInputFunction
);
REGISTER_TYPED_FUNC
(
GemmConvGradFilter
,
CPU
,
GemmConvGradFilterFunction
);
REGISTER_TYPED_FUNC
(
GemmConvGradFilter
,
CPU
,
GemmConvGradFilterFunction
);
#ifdef PADDLE_WITH_
GPU
#ifdef PADDLE_WITH_
CUDA
REGISTER_TYPED_FUNC
(
GemmConv
,
GPU
,
GemmConvFunction
);
REGISTER_TYPED_FUNC
(
GemmConv
,
GPU
,
GemmConvFunction
);
REGISTER_TYPED_FUNC
(
GemmConvGradInput
,
GPU
,
GemmConvGradInputFunction
);
REGISTER_TYPED_FUNC
(
GemmConvGradInput
,
GPU
,
GemmConvGradInputFunction
);
REGISTER_TYPED_FUNC
(
GemmConvGradFilter
,
GPU
,
GemmConvGradFilterFunction
);
REGISTER_TYPED_FUNC
(
GemmConvGradFilter
,
GPU
,
GemmConvGradFilterFunction
);
...
...
paddle/function/GemmConvOpTest.cpp
浏览文件 @
1172f249
...
@@ -24,7 +24,7 @@ TEST(GemmConv, NaiveConv) {
...
@@ -24,7 +24,7 @@ TEST(GemmConv, NaiveConv) {
"NaiveConv-CPU"
,
"GemmConv-CPU"
,
forward
);
"NaiveConv-CPU"
,
"GemmConv-CPU"
,
forward
);
}
}
#ifdef PADDLE_WITH_
GPU
#ifdef PADDLE_WITH_
CUDA
TEST
(
GemmConv
,
Forward
)
{
TEST
(
GemmConv
,
Forward
)
{
Convolution
<
DEVICE_TYPE_CPU
,
DEVICE_TYPE_GPU
>
(
Convolution
<
DEVICE_TYPE_CPU
,
DEVICE_TYPE_GPU
>
(
"GemmConv-CPU"
,
"GemmConv-GPU"
,
forward
);
"GemmConv-CPU"
,
"GemmConv-GPU"
,
forward
);
...
...
paddle/function/Im2ColTest.cpp
浏览文件 @
1172f249
...
@@ -116,7 +116,7 @@ void TestIm2ColFunctor() {
...
@@ -116,7 +116,7 @@ void TestIm2ColFunctor() {
TEST
(
Im2ColFunctor
,
CPU
)
{
TestIm2ColFunctor
<
DEVICE_TYPE_CPU
,
float
>
();
}
TEST
(
Im2ColFunctor
,
CPU
)
{
TestIm2ColFunctor
<
DEVICE_TYPE_CPU
,
float
>
();
}
#ifdef PADDLE_WITH_
GPU
#ifdef PADDLE_WITH_
CUDA
TEST
(
Im2ColFunctor
,
GPU
)
{
TestIm2ColFunctor
<
DEVICE_TYPE_GPU
,
float
>
();
}
TEST
(
Im2ColFunctor
,
GPU
)
{
TestIm2ColFunctor
<
DEVICE_TYPE_GPU
,
float
>
();
}
...
...
paddle/function/MulOp.cpp
浏览文件 @
1172f249
...
@@ -341,7 +341,7 @@ private:
...
@@ -341,7 +341,7 @@ private:
};
};
REGISTER_TYPED_FUNC
(
MulOp
,
CPU
,
MulFunc
);
REGISTER_TYPED_FUNC
(
MulOp
,
CPU
,
MulFunc
);
#ifdef PADDLE_WITH_
GPU
#ifdef PADDLE_WITH_
CUDA
REGISTER_TYPED_FUNC
(
MulOp
,
GPU
,
MulFunc
);
REGISTER_TYPED_FUNC
(
MulOp
,
GPU
,
MulFunc
);
#endif
#endif
}
// namespace paddle
}
// namespace paddle
paddle/function/PadOp.cpp
浏览文件 @
1172f249
...
@@ -207,7 +207,7 @@ private:
...
@@ -207,7 +207,7 @@ private:
REGISTER_TYPED_FUNC
(
Pad
,
CPU
,
PadFunc
);
REGISTER_TYPED_FUNC
(
Pad
,
CPU
,
PadFunc
);
REGISTER_TYPED_FUNC
(
PadGrad
,
CPU
,
PadGradFunc
);
REGISTER_TYPED_FUNC
(
PadGrad
,
CPU
,
PadGradFunc
);
#ifdef PADDLE_WITH_
GPU
#ifdef PADDLE_WITH_
CUDA
REGISTER_TYPED_FUNC
(
Pad
,
GPU
,
PadFunc
);
REGISTER_TYPED_FUNC
(
Pad
,
GPU
,
PadFunc
);
REGISTER_TYPED_FUNC
(
PadGrad
,
GPU
,
PadGradFunc
);
REGISTER_TYPED_FUNC
(
PadGrad
,
GPU
,
PadGradFunc
);
#endif
#endif
...
...
paddle/function/RowConvOp.cpp
浏览文件 @
1172f249
...
@@ -217,7 +217,7 @@ public:
...
@@ -217,7 +217,7 @@ public:
REGISTER_TYPED_FUNC
(
RowConv
,
CPU
,
RowConvFunc
);
REGISTER_TYPED_FUNC
(
RowConv
,
CPU
,
RowConvFunc
);
REGISTER_TYPED_FUNC
(
RowConvGrad
,
CPU
,
RowConvGradFunc
);
REGISTER_TYPED_FUNC
(
RowConvGrad
,
CPU
,
RowConvGradFunc
);
#ifdef PADDLE_WITH_
GPU
#ifdef PADDLE_WITH_
CUDA
REGISTER_TYPED_FUNC
(
RowConv
,
GPU
,
RowConvFunc
);
REGISTER_TYPED_FUNC
(
RowConv
,
GPU
,
RowConvFunc
);
REGISTER_TYPED_FUNC
(
RowConvGrad
,
GPU
,
RowConvGradFunc
);
REGISTER_TYPED_FUNC
(
RowConvGrad
,
GPU
,
RowConvGradFunc
);
#endif
#endif
...
...
paddle/function/SwitchOp.cpp
浏览文件 @
1172f249
...
@@ -132,7 +132,7 @@ public:
...
@@ -132,7 +132,7 @@ public:
REGISTER_TYPED_FUNC
(
NCHW2NHWC
,
CPU
,
NCHW2NHWCFunc
);
REGISTER_TYPED_FUNC
(
NCHW2NHWC
,
CPU
,
NCHW2NHWCFunc
);
REGISTER_TYPED_FUNC
(
NHWC2NCHW
,
CPU
,
NHWC2NCHWFunc
);
REGISTER_TYPED_FUNC
(
NHWC2NCHW
,
CPU
,
NHWC2NCHWFunc
);
#ifdef PADDLE_WITH_
GPU
#ifdef PADDLE_WITH_
CUDA
REGISTER_TYPED_FUNC
(
NCHW2NHWC
,
GPU
,
NCHW2NHWCFunc
);
REGISTER_TYPED_FUNC
(
NCHW2NHWC
,
GPU
,
NCHW2NHWCFunc
);
REGISTER_TYPED_FUNC
(
NHWC2NCHW
,
GPU
,
NHWC2NCHWFunc
);
REGISTER_TYPED_FUNC
(
NHWC2NCHW
,
GPU
,
NHWC2NCHWFunc
);
#endif
#endif
...
...
paddle/gserver/layers/BatchNormBaseLayer.cpp
浏览文件 @
1172f249
...
@@ -16,7 +16,7 @@ limitations under the License. */
...
@@ -16,7 +16,7 @@ limitations under the License. */
#include "BatchNormalizationLayer.h"
#include "BatchNormalizationLayer.h"
#include "Layer.h"
#include "Layer.h"
#include "paddle/utils/Stat.h"
#include "paddle/utils/Stat.h"
#ifdef PADDLE_WITH_
GPU
#ifdef PADDLE_WITH_
CUDA
#include "CudnnBatchNormLayer.h"
#include "CudnnBatchNormLayer.h"
#endif
#endif
...
...
paddle/gserver/layers/BatchNormalizationLayer.cpp
浏览文件 @
1172f249
...
@@ -13,7 +13,7 @@ See the License for the specific language governing permissions and
...
@@ -13,7 +13,7 @@ See the License for the specific language governing permissions and
limitations under the License. */
limitations under the License. */
#include "paddle/utils/Stat.h"
#include "paddle/utils/Stat.h"
#ifdef PADDLE_WITH_
GPU
#ifdef PADDLE_WITH_
CUDA
#include "hl_batch_transpose.h"
#include "hl_batch_transpose.h"
#endif
#endif
#include "BatchNormalizationLayer.h"
#include "BatchNormalizationLayer.h"
...
@@ -90,7 +90,7 @@ void BatchNormalizationLayer::expandMat(const MatrixPtr& in, MatrixPtr& out) {
...
@@ -90,7 +90,7 @@ void BatchNormalizationLayer::expandMat(const MatrixPtr& in, MatrixPtr& out) {
size_t
batchSize
=
in
->
getHeight
();
size_t
batchSize
=
in
->
getHeight
();
CHECK_EQ
(
out
->
getHeight
(),
batchSize
*
imgPixels_
);
CHECK_EQ
(
out
->
getHeight
(),
batchSize
*
imgPixels_
);
if
(
useGpu_
)
{
if
(
useGpu_
)
{
#ifndef PADDLE_WITH_
GPU
#ifndef PADDLE_WITH_
CUDA
LOG
(
FATAL
)
<<
"paddle is compiled only for cpu"
;
LOG
(
FATAL
)
<<
"paddle is compiled only for cpu"
;
#else
#else
batchTranspose
(
batchTranspose
(
...
@@ -127,7 +127,7 @@ void BatchNormalizationLayer::shrinkMat(const MatrixPtr& in, MatrixPtr& out) {
...
@@ -127,7 +127,7 @@ void BatchNormalizationLayer::shrinkMat(const MatrixPtr& in, MatrixPtr& out) {
}
}
CHECK_EQ
(
in
->
getHeight
(),
static_cast
<
size_t
>
(
batchSize
*
imgPixels_
));
CHECK_EQ
(
in
->
getHeight
(),
static_cast
<
size_t
>
(
batchSize
*
imgPixels_
));
if
(
useGpu_
)
{
if
(
useGpu_
)
{
#ifndef PADDLE_WITH_
GPU
#ifndef PADDLE_WITH_
CUDA
LOG
(
FATAL
)
<<
"paddle is compiled only for cpu"
;
LOG
(
FATAL
)
<<
"paddle is compiled only for cpu"
;
#else
#else
batchTranspose
(
batchTranspose
(
...
...
paddle/gserver/layers/PoolLayer.cpp
浏览文件 @
1172f249
...
@@ -15,7 +15,7 @@ limitations under the License. */
...
@@ -15,7 +15,7 @@ limitations under the License. */
#include "PoolLayer.h"
#include "PoolLayer.h"
#include "PoolProjectionLayer.h"
#include "PoolProjectionLayer.h"
#include "paddle/utils/Logging.h"
#include "paddle/utils/Logging.h"
#ifdef PADDLE_WITH_
GPU
#ifdef PADDLE_WITH_
CUDA
#include "CudnnPoolLayer.h"
#include "CudnnPoolLayer.h"
#endif
#endif
namespace
paddle
{
namespace
paddle
{
...
@@ -53,7 +53,7 @@ Layer* PoolLayer::create(const LayerConfig& config) {
...
@@ -53,7 +53,7 @@ Layer* PoolLayer::create(const LayerConfig& config) {
const
std
::
string
&
pool
=
config
.
inputs
(
0
).
pool_conf
().
pool_type
();
const
std
::
string
&
pool
=
config
.
inputs
(
0
).
pool_conf
().
pool_type
();
if
(
pool
==
"max-projection"
||
pool
==
"avg-projection"
)
{
if
(
pool
==
"max-projection"
||
pool
==
"avg-projection"
)
{
return
new
PoolProjectionLayer
(
config
);
return
new
PoolProjectionLayer
(
config
);
#ifdef PADDLE_WITH_
GPU
#ifdef PADDLE_WITH_
CUDA
}
else
if
(
CudnnPoolLayer
::
typeCheck
(
pool
))
{
}
else
if
(
CudnnPoolLayer
::
typeCheck
(
pool
))
{
return
new
CudnnPoolLayer
(
config
);
return
new
CudnnPoolLayer
(
config
);
#endif
#endif
...
...
paddle/gserver/tests/LayerGradUtil.cpp
浏览文件 @
1172f249
...
@@ -674,7 +674,7 @@ void testLayerGradKernel(TestConfig testConf,
...
@@ -674,7 +674,7 @@ void testLayerGradKernel(TestConfig testConf,
bool
useGpu
,
bool
useGpu
,
bool
useWeight
,
bool
useWeight
,
float
epsilon
)
{
float
epsilon
)
{
#ifndef PADDLE_WITH_
GPU
#ifndef PADDLE_WITH_
CUDA
if
(
useGpu
)
return
;
if
(
useGpu
)
return
;
#endif
#endif
FLAGS_use_gpu
=
useGpu
;
FLAGS_use_gpu
=
useGpu
;
...
...
paddle/gserver/tests/test_BatchNorm.cpp
浏览文件 @
1172f249
...
@@ -119,7 +119,7 @@ TEST(Layer, batchNorm) {
...
@@ -119,7 +119,7 @@ TEST(Layer, batchNorm) {
CHECK_EQ
(
static_cast
<
int
>
(
convLayer
->
getOutputValue
()
->
getWidth
()),
576
);
CHECK_EQ
(
static_cast
<
int
>
(
convLayer
->
getOutputValue
()
->
getWidth
()),
576
);
}
}
#ifdef PADDLE_WITH_
GPU
#ifdef PADDLE_WITH_
CUDA
void
batchNormInference
(
int
n
,
int
c
,
int
h
,
int
w
)
{
void
batchNormInference
(
int
n
,
int
c
,
int
h
,
int
w
)
{
MatrixPtr
input
=
std
::
make_shared
<
GpuMatrix
>
(
n
,
c
*
h
*
w
);
MatrixPtr
input
=
std
::
make_shared
<
GpuMatrix
>
(
n
,
c
*
h
*
w
);
MatrixPtr
cudnnOut
=
std
::
make_shared
<
GpuMatrix
>
(
n
,
c
*
h
*
w
);
MatrixPtr
cudnnOut
=
std
::
make_shared
<
GpuMatrix
>
(
n
,
c
*
h
*
w
);
...
...
paddle/gserver/tests/test_ConvUnify.cpp
浏览文件 @
1172f249
...
@@ -117,7 +117,7 @@ MatrixPtr doOneConvTest(size_t imgSize,
...
@@ -117,7 +117,7 @@ MatrixPtr doOneConvTest(size_t imgSize,
}
}
TEST
(
Layer
,
convParaUnified
)
{
TEST
(
Layer
,
convParaUnified
)
{
#ifdef PADDLE_WITH_
GPU
#ifdef PADDLE_WITH_
CUDA
MatrixPtr
input
,
resultCpu
,
resultGpu
;
MatrixPtr
input
,
resultCpu
,
resultGpu
;
/// TEST1 for conv ///
/// TEST1 for conv ///
...
...
paddle/gserver/tests/test_DetectionOutput.cpp
浏览文件 @
1172f249
...
@@ -150,7 +150,7 @@ TEST(Layer, detectionOutputLayerFwd) {
...
@@ -150,7 +150,7 @@ TEST(Layer, detectionOutputLayerFwd) {
useGpu
,
useGpu
,
result2
);
result2
);
#ifdef PADDLE_WITH_
GPU
#ifdef PADDLE_WITH_
CUDA
// GPU case 1.
// GPU case 1.
useGpu
=
true
;
useGpu
=
true
;
inputLoc
=
Matrix
::
create
(
1
,
16
,
false
,
useGpu
);
inputLoc
=
Matrix
::
create
(
1
,
16
,
false
,
useGpu
);
...
...
paddle/gserver/tests/test_Evaluator.cpp
浏览文件 @
1172f249
...
@@ -51,7 +51,7 @@ void testEvaluator(TestConfig testConf,
...
@@ -51,7 +51,7 @@ void testEvaluator(TestConfig testConf,
string
testEvaluatorName
,
string
testEvaluatorName
,
size_t
batchSize
,
size_t
batchSize
,
bool
useGpu
)
{
bool
useGpu
)
{
#ifndef PADDLE_WITH_
GPU
#ifndef PADDLE_WITH_
CUDA
if
(
useGpu
)
return
;
if
(
useGpu
)
return
;
#endif
#endif
FLAGS_use_gpu
=
useGpu
;
FLAGS_use_gpu
=
useGpu
;
...
...
paddle/gserver/tests/test_KmaxSeqScore.cpp
浏览文件 @
1172f249
...
@@ -97,7 +97,7 @@ TEST(Layer, kmaxSeqScoreLayer) {
...
@@ -97,7 +97,7 @@ TEST(Layer, kmaxSeqScoreLayer) {
Matrix
::
create
(
subSeqStartPosition
.
back
(),
1
,
false
,
false
);
Matrix
::
create
(
subSeqStartPosition
.
back
(),
1
,
false
,
false
);
std
::
vector
<
bool
>
mode
=
{
false
};
std
::
vector
<
bool
>
mode
=
{
false
};
#ifdef PADDLE_WITH_
GPU
#ifdef PADDLE_WITH_
CUDA
mode
.
push_back
(
true
);
mode
.
push_back
(
true
);
#endif
#endif
...
...
paddle/gserver/tests/test_LayerGrad.cpp
浏览文件 @
1172f249
...
@@ -12,7 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
...
@@ -12,7 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
See the License for the specific language governing permissions and
limitations under the License. */
limitations under the License. */
#ifdef PADDLE_WITH_
GPU
#ifdef PADDLE_WITH_
CUDA
#include <cudnn.h>
#include <cudnn.h>
#endif
#endif
#include <gtest/gtest.h>
#include <gtest/gtest.h>
...
@@ -258,7 +258,7 @@ void testProjectionConv(size_t groups, bool isDeconv) {
...
@@ -258,7 +258,7 @@ void testProjectionConv(size_t groups, bool isDeconv) {
true
);
true
);
}
}
#ifdef PADDLE_WITH_
GPU
#ifdef PADDLE_WITH_
CUDA
TEST
(
Projection
,
conv
)
{
TEST
(
Projection
,
conv
)
{
/// test ConvProjection
/// test ConvProjection
testProjectionConv
(
1
,
false
);
testProjectionConv
(
1
,
false
);
...
@@ -422,7 +422,7 @@ TEST(Layer, depthwiseConvLayer) {
...
@@ -422,7 +422,7 @@ TEST(Layer, depthwiseConvLayer) {
// 'depthwise_conv' is a sepecial case of 'exconv' whose
// 'depthwise_conv' is a sepecial case of 'exconv' whose
// groups size equals to the input channels size.
// groups size equals to the input channels size.
testDepthwiseConvLayer
(
"exconv"
,
/* useGpu= */
false
);
testDepthwiseConvLayer
(
"exconv"
,
/* useGpu= */
false
);
#ifdef PADDLE_WITH_
GPU
#ifdef PADDLE_WITH_
CUDA
testDepthwiseConvLayer
(
"exconv"
,
/* useGpu= */
true
);
testDepthwiseConvLayer
(
"exconv"
,
/* useGpu= */
true
);
#endif
#endif
}
}
...
@@ -480,7 +480,7 @@ void testConvLayer(const string& type, bool trans, bool useGpu) {
...
@@ -480,7 +480,7 @@ void testConvLayer(const string& type, bool trans, bool useGpu) {
TEST
(
Layer
,
convLayer
)
{
TEST
(
Layer
,
convLayer
)
{
testConvLayer
(
"exconv"
,
/* trans= */
false
,
/* useGpu= */
false
);
testConvLayer
(
"exconv"
,
/* trans= */
false
,
/* useGpu= */
false
);
#ifdef PADDLE_WITH_
GPU
#ifdef PADDLE_WITH_
CUDA
testConvLayer
(
"exconv"
,
/* trans= */
false
,
/* useGpu= */
true
);
testConvLayer
(
"exconv"
,
/* trans= */
false
,
/* useGpu= */
true
);
testConvLayer
(
"cudnn_conv"
,
/* trans= */
false
,
/* useGpu= */
true
);
testConvLayer
(
"cudnn_conv"
,
/* trans= */
false
,
/* useGpu= */
true
);
#endif
#endif
...
@@ -525,7 +525,7 @@ TEST(Layer, convTransLayer) {
...
@@ -525,7 +525,7 @@ TEST(Layer, convTransLayer) {
for
(
auto
useGpu
:
{
false
,
true
})
{
for
(
auto
useGpu
:
{
false
,
true
})
{
testConvTransLayer
(
"exconvt"
,
/* trans= */
false
,
/* useGpu= */
useGpu
);
testConvTransLayer
(
"exconvt"
,
/* trans= */
false
,
/* useGpu= */
useGpu
);
}
}
#ifdef PADDLE_WITH_
GPU
#ifdef PADDLE_WITH_
CUDA
testConvTransLayer
(
"cudnn_convt"
,
/* trans= */
false
,
/* useGpu= */
true
);
testConvTransLayer
(
"cudnn_convt"
,
/* trans= */
false
,
/* useGpu= */
true
);
#endif
#endif
}
}
...
@@ -638,7 +638,7 @@ TEST(Layer, SelectiveFullyConnectedLayer) {
...
@@ -638,7 +638,7 @@ TEST(Layer, SelectiveFullyConnectedLayer) {
/* trans= */
false
,
/* trans= */
false
,
/* useGup= */
false
,
/* useGup= */
false
,
false
);
false
);
#ifdef PADDLE_WITH_
GPU
#ifdef PADDLE_WITH_
CUDA
testLayerGrad
(
config
,
testLayerGrad
(
config
,
"selective_fc"
,
"selective_fc"
,
100
,
100
,
...
@@ -1210,7 +1210,7 @@ void testPoolLayer(const string& poolType, bool trans, bool useGpu) {
...
@@ -1210,7 +1210,7 @@ void testPoolLayer(const string& poolType, bool trans, bool useGpu) {
testLayerGrad
(
config
,
"pool"
,
100
,
trans
,
useGpu
);
testLayerGrad
(
config
,
"pool"
,
100
,
trans
,
useGpu
);
}
}
#ifdef PADDLE_WITH_
GPU
#ifdef PADDLE_WITH_
CUDA
void
testPoolLayer2
(
const
string
&
poolType
,
bool
trans
,
bool
useGpu
)
{
void
testPoolLayer2
(
const
string
&
poolType
,
bool
trans
,
bool
useGpu
)
{
TestConfig
config
;
TestConfig
config
;
config
.
inputDefs
.
push_back
({
INPUT_DATA
,
"layer_0"
,
3200
,
0
});
config
.
inputDefs
.
push_back
({
INPUT_DATA
,
"layer_0"
,
3200
,
0
});
...
@@ -1236,7 +1236,7 @@ TEST(Layer, PoolLayer) {
...
@@ -1236,7 +1236,7 @@ TEST(Layer, PoolLayer) {
testPoolLayer
(
"avg-projection"
,
/* trans= */
false
,
/* useGpu= */
false
);
testPoolLayer
(
"avg-projection"
,
/* trans= */
false
,
/* useGpu= */
false
);
testPoolLayer
(
"max-projection"
,
/* trans= */
false
,
/* useGpu= */
false
);
testPoolLayer
(
"max-projection"
,
/* trans= */
false
,
/* useGpu= */
false
);
#ifdef PADDLE_WITH_
GPU
#ifdef PADDLE_WITH_
CUDA
testPoolLayer
(
"avg-projection"
,
/* trans= */
false
,
/* useGpu= */
true
);
testPoolLayer
(
"avg-projection"
,
/* trans= */
false
,
/* useGpu= */
true
);
testPoolLayer
(
"max-projection"
,
/* trans= */
false
,
/* useGpu= */
true
);
testPoolLayer
(
"max-projection"
,
/* trans= */
false
,
/* useGpu= */
true
);
testPoolLayer
(
"cudnn-max-pool"
,
/* trans= */
false
,
/* useGpu= */
true
);
testPoolLayer
(
"cudnn-max-pool"
,
/* trans= */
false
,
/* useGpu= */
true
);
...
@@ -1309,7 +1309,7 @@ void testPool3DLayer(const string& poolType, bool trans, bool useGpu) {
...
@@ -1309,7 +1309,7 @@ void testPool3DLayer(const string& poolType, bool trans, bool useGpu) {
TEST
(
Layer
,
Pool3DLayer
)
{
TEST
(
Layer
,
Pool3DLayer
)
{
testPool3DLayer
(
"avg"
,
/* trans= */
false
,
/* useGpu= */
false
);
testPool3DLayer
(
"avg"
,
/* trans= */
false
,
/* useGpu= */
false
);
testPool3DLayer
(
"max"
,
/* trans= */
false
,
/* useGpu= */
false
);
testPool3DLayer
(
"max"
,
/* trans= */
false
,
/* useGpu= */
false
);
#ifdef PADDLE_WITH_
GPU
#ifdef PADDLE_WITH_
CUDA
testPool3DLayer
(
"avg"
,
/* trans= */
false
,
/* useGpu= */
true
);
testPool3DLayer
(
"avg"
,
/* trans= */
false
,
/* useGpu= */
true
);
testPool3DLayer
(
"max"
,
/* trans= */
false
,
/* useGpu= */
true
);
testPool3DLayer
(
"max"
,
/* trans= */
false
,
/* useGpu= */
true
);
#endif
#endif
...
@@ -1695,7 +1695,7 @@ void testBatchNormLayer(const string& type, bool trans, bool useGpu) {
...
@@ -1695,7 +1695,7 @@ void testBatchNormLayer(const string& type, bool trans, bool useGpu) {
TEST
(
Layer
,
BatchNormalizationLayer
)
{
TEST
(
Layer
,
BatchNormalizationLayer
)
{
testBatchNormLayer
(
"batch_norm"
,
false
,
false
);
testBatchNormLayer
(
"batch_norm"
,
false
,
false
);
#ifdef PADDLE_WITH_
GPU
#ifdef PADDLE_WITH_
CUDA
testBatchNormLayer
(
"batch_norm"
,
false
,
true
);
testBatchNormLayer
(
"batch_norm"
,
false
,
true
);
if
(
hl_get_cudnn_lib_version
()
>=
int
(
4000
))
{
if
(
hl_get_cudnn_lib_version
()
>=
int
(
4000
))
{
testBatchNormLayer
(
"cudnn_batch_norm"
,
false
,
true
);
testBatchNormLayer
(
"cudnn_batch_norm"
,
false
,
true
);
...
@@ -1744,7 +1744,7 @@ void testBatchNorm3DLayer(const string& type, bool trans, bool useGpu) {
...
@@ -1744,7 +1744,7 @@ void testBatchNorm3DLayer(const string& type, bool trans, bool useGpu) {
TEST
(
Layer
,
testBatchNorm3DLayer
)
{
TEST
(
Layer
,
testBatchNorm3DLayer
)
{
testBatchNorm3DLayer
(
"batch_norm"
,
false
,
false
);
testBatchNorm3DLayer
(
"batch_norm"
,
false
,
false
);
#ifdef PADDLE_WITH_
GPU
#ifdef PADDLE_WITH_
CUDA
testBatchNorm3DLayer
(
"batch_norm"
,
false
,
true
);
testBatchNorm3DLayer
(
"batch_norm"
,
false
,
true
);
if
(
hl_get_cudnn_lib_version
()
>=
int
(
4000
))
{
if
(
hl_get_cudnn_lib_version
()
>=
int
(
4000
))
{
testBatchNorm3DLayer
(
"cudnn_batch_norm"
,
false
,
true
);
testBatchNorm3DLayer
(
"cudnn_batch_norm"
,
false
,
true
);
...
@@ -2262,7 +2262,7 @@ void test3DConvLayer(const string& type, bool trans, bool useGpu) {
...
@@ -2262,7 +2262,7 @@ void test3DConvLayer(const string& type, bool trans, bool useGpu) {
TEST
(
Layer
,
test3DConvLayer
)
{
TEST
(
Layer
,
test3DConvLayer
)
{
test3DConvLayer
(
"conv3d"
,
/* trans= */
false
,
/* useGpu= */
false
);
test3DConvLayer
(
"conv3d"
,
/* trans= */
false
,
/* useGpu= */
false
);
#ifdef PADDLE_WITH_
GPU
#ifdef PADDLE_WITH_
CUDA
test3DConvLayer
(
"conv3d"
,
/* trans= */
false
,
/* useGpu= */
true
);
test3DConvLayer
(
"conv3d"
,
/* trans= */
false
,
/* useGpu= */
true
);
#endif
#endif
}
}
...
@@ -2339,7 +2339,7 @@ void test3DDeConvLayer(const string& type, bool trans, bool useGpu) {
...
@@ -2339,7 +2339,7 @@ void test3DDeConvLayer(const string& type, bool trans, bool useGpu) {
TEST
(
Layer
,
test3DDeConvLayer
)
{
TEST
(
Layer
,
test3DDeConvLayer
)
{
test3DDeConvLayer
(
"deconv3d"
,
/* trans= */
false
,
/* useGpu= */
false
);
test3DDeConvLayer
(
"deconv3d"
,
/* trans= */
false
,
/* useGpu= */
false
);
#ifdef PADDLE_WITH_
GPU
#ifdef PADDLE_WITH_
CUDA
test3DDeConvLayer
(
"deconv3d"
,
/* trans= */
false
,
/* useGpu= */
true
);
test3DDeConvLayer
(
"deconv3d"
,
/* trans= */
false
,
/* useGpu= */
true
);
#endif
#endif
}
}
...
...
paddle/gserver/tests/test_NetworkCompare.cpp
浏览文件 @
1172f249
...
@@ -243,7 +243,7 @@ TEST(Compare, concat_slice) {
...
@@ -243,7 +243,7 @@ TEST(Compare, concat_slice) {
compareNetwork
(
config_file_a
,
config_file_b
);
compareNetwork
(
config_file_a
,
config_file_b
);
}
}
#ifdef PADDLE_WITH_
GPU
#ifdef PADDLE_WITH_
CUDA
TEST
(
Compare
,
img_pool
)
{
TEST
(
Compare
,
img_pool
)
{
std
::
string
config_file_a
=
"./gserver/tests/img_pool_a.conf"
;
std
::
string
config_file_a
=
"./gserver/tests/img_pool_a.conf"
;
std
::
string
config_file_b
=
"./gserver/tests/img_pool_b.conf"
;
std
::
string
config_file_b
=
"./gserver/tests/img_pool_b.conf"
;
...
...
paddle/gserver/tests/test_PriorBox.cpp
浏览文件 @
1172f249
...
@@ -151,7 +151,7 @@ TEST(Layer, priorBoxLayerFwd) {
...
@@ -151,7 +151,7 @@ TEST(Layer, priorBoxLayerFwd) {
useGpu
,
useGpu
,
result
);
result
);
#ifdef PADDLE_WITH_
GPU
#ifdef PADDLE_WITH_
CUDA
// reset the input parameters
// reset the input parameters
variance
[
1
]
=
0.1
;
variance
[
1
]
=
0.1
;
variance
[
3
]
=
0.2
;
variance
[
3
]
=
0.2
;
...
...
paddle/gserver/tests/test_ProtoDataProvider.cpp
浏览文件 @
1172f249
...
@@ -485,7 +485,7 @@ TEST(ProtoDataProvider, test) {
...
@@ -485,7 +485,7 @@ TEST(ProtoDataProvider, test) {
// Currently in async mode, useGpu is not supported
// Currently in async mode, useGpu is not supported
continue
;
continue
;
}
}
#ifndef PADDLE_WITH_
GPU
#ifndef PADDLE_WITH_
CUDA
if
(
useGpu
)
{
if
(
useGpu
)
{
continue
;
continue
;
}
}
...
@@ -525,7 +525,7 @@ TEST(ProtoDataProvider, constant_slots) {
...
@@ -525,7 +525,7 @@ TEST(ProtoDataProvider, constant_slots) {
for
(
int
numConstantSlots
:
{
1
,
2
})
{
for
(
int
numConstantSlots
:
{
1
,
2
})
{
for
(
int
useGpu
:
numTwoArray
)
{
for
(
int
useGpu
:
numTwoArray
)
{
for
(
int
dataCompression
:
numTwoArray
)
{
for
(
int
dataCompression
:
numTwoArray
)
{
#ifndef PADDLE_WITH_
GPU
#ifndef PADDLE_WITH_
CUDA
if
(
useGpu
)
{
if
(
useGpu
)
{
continue
;
continue
;
}
}
...
@@ -708,7 +708,7 @@ TEST(ProtoSequenceDataProvider, test) {
...
@@ -708,7 +708,7 @@ TEST(ProtoSequenceDataProvider, test) {
// Currently in async mode, useGpu is not supported
// Currently in async mode, useGpu is not supported
continue
;
continue
;
}
}
#ifndef PADDLE_WITH_
GPU
#ifndef PADDLE_WITH_
CUDA
if
(
useGpu
)
{
if
(
useGpu
)
{
continue
;
continue
;
}
}
...
...
paddle/gserver/tests/test_PyDataProvider.cpp
浏览文件 @
1172f249
...
@@ -37,7 +37,7 @@ TEST(PyDataProvider, py_fill_slots) {
...
@@ -37,7 +37,7 @@ TEST(PyDataProvider, py_fill_slots) {
config
.
clear_files
();
config
.
clear_files
();
std
::
string
dataFile
=
"gserver/tests/pyDataProvider/pyDataProviderList"
;
std
::
string
dataFile
=
"gserver/tests/pyDataProvider/pyDataProviderList"
;
config
.
set_files
(
dataFile
);
config
.
set_files
(
dataFile
);
#ifndef PADDLE_WITH_
GPU
#ifndef PADDLE_WITH_
CUDA
bool
useGpu
=
false
;
bool
useGpu
=
false
;
#else
#else
bool
useGpu
=
true
;
bool
useGpu
=
true
;
...
@@ -71,7 +71,7 @@ TEST(PyDataProvider, py_fill_nest_slots) {
...
@@ -71,7 +71,7 @@ TEST(PyDataProvider, py_fill_nest_slots) {
std
::
string
dataFile
=
"gserver/tests/pyDataProvider/pyDataProviderList"
;
std
::
string
dataFile
=
"gserver/tests/pyDataProvider/pyDataProviderList"
;
config
.
set_files
(
dataFile
);
config
.
set_files
(
dataFile
);
EXPECT_EQ
(
config
.
IsInitialized
(),
true
);
EXPECT_EQ
(
config
.
IsInitialized
(),
true
);
#ifndef PADDLE_WITH_
GPU
#ifndef PADDLE_WITH_
CUDA
bool
useGpu
=
false
;
bool
useGpu
=
false
;
#else
#else
bool
useGpu
=
true
;
bool
useGpu
=
true
;
...
...
paddle/gserver/tests/test_SelectiveFCLayer.cpp
浏览文件 @
1172f249
...
@@ -321,7 +321,7 @@ TEST(Layer, SelectiveFcLayer_train_dense_mul) {
...
@@ -321,7 +321,7 @@ TEST(Layer, SelectiveFcLayer_train_dense_mul) {
"filelist=gserver/tests/SelectiveFcTest/dense_mul_list"
;
"filelist=gserver/tests/SelectiveFcTest/dense_mul_list"
;
for
(
auto
useGpu
:
{
false
,
true
})
{
for
(
auto
useGpu
:
{
false
,
true
})
{
#ifndef PADDLE_WITH_
GPU
#ifndef PADDLE_WITH_
CUDA
if
(
useGpu
)
{
if
(
useGpu
)
{
break
;
break
;
}
}
...
@@ -388,7 +388,7 @@ void testSelectiveFcLayerTrainSparseMul(const LayerConfig& config,
...
@@ -388,7 +388,7 @@ void testSelectiveFcLayerTrainSparseMul(const LayerConfig& config,
outMatSelfc
->
getWidth
(),
outMatSelfc
->
getWidth
(),
outMatSelfc
->
getElementCnt
()));
outMatSelfc
->
getElementCnt
()));
cpuOutMatSelfc
->
copyFrom
(
*
outMatSelfc
,
HPPL_STREAM_DEFAULT
);
cpuOutMatSelfc
->
copyFrom
(
*
outMatSelfc
,
HPPL_STREAM_DEFAULT
);
#ifdef PADDLE_WITH_
GPU
#ifdef PADDLE_WITH_
CUDA
if
(
useGpu
)
{
if
(
useGpu
)
{
hl_stream_synchronize
(
HPPL_STREAM_DEFAULT
);
hl_stream_synchronize
(
HPPL_STREAM_DEFAULT
);
}
}
...
@@ -418,7 +418,7 @@ void testSelectiveFcLayerTrainSparseMul(const LayerConfig& config,
...
@@ -418,7 +418,7 @@ void testSelectiveFcLayerTrainSparseMul(const LayerConfig& config,
MatrixPtr
cpuOutMatFc
(
MatrixPtr
cpuOutMatFc
(
new
CpuMatrix
(
outMatFc
->
getHeight
(),
outMatFc
->
getWidth
()));
new
CpuMatrix
(
outMatFc
->
getHeight
(),
outMatFc
->
getWidth
()));
cpuOutMatFc
->
copyFrom
(
*
outMatFc
,
HPPL_STREAM_DEFAULT
);
cpuOutMatFc
->
copyFrom
(
*
outMatFc
,
HPPL_STREAM_DEFAULT
);
#ifdef PADDLE_WITH_
GPU
#ifdef PADDLE_WITH_
CUDA
if
(
useGpu
)
{
if
(
useGpu
)
{
hl_stream_synchronize
(
HPPL_STREAM_DEFAULT
);
hl_stream_synchronize
(
HPPL_STREAM_DEFAULT
);
}
}
...
@@ -443,7 +443,7 @@ TEST(Layer, SelectiveFcLayer_train_sparse_mul) {
...
@@ -443,7 +443,7 @@ TEST(Layer, SelectiveFcLayer_train_sparse_mul) {
selLayerConfig
.
set_size
(
fcLayerWidth
);
selLayerConfig
.
set_size
(
fcLayerWidth
);
testSelectiveFcLayerTrainSparseMul
(
selLayerConfig
,
false
);
testSelectiveFcLayerTrainSparseMul
(
selLayerConfig
,
false
);
#ifdef PADDLE_WITH_
GPU
#ifdef PADDLE_WITH_
CUDA
testSelectiveFcLayerTrainSparseMul
(
selLayerConfig
,
true
);
testSelectiveFcLayerTrainSparseMul
(
selLayerConfig
,
true
);
#endif
#endif
}
}
...
...
paddle/gserver/tests/test_SeqSliceLayerGrad.cpp
浏览文件 @
1172f249
...
@@ -195,7 +195,7 @@ TEST(Layer, SeqSliceLayer) {
...
@@ -195,7 +195,7 @@ TEST(Layer, SeqSliceLayer) {
vector
<
vector
<
real
>>
ends
;
vector
<
vector
<
real
>>
ends
;
std
::
vector
<
bool
>
mode
=
{
false
};
std
::
vector
<
bool
>
mode
=
{
false
};
#ifdef PADDLE_WITH_
GPU
#ifdef PADDLE_WITH_
CUDA
mode
.
push_back
(
true
);
mode
.
push_back
(
true
);
#endif
#endif
genSeqInfo
(
seqStartPos
,
subSeqStartPos
);
genSeqInfo
(
seqStartPos
,
subSeqStartPos
);
...
...
paddle/gserver/tests/test_WarpCTCLayer.cpp
浏览文件 @
1172f249
...
@@ -199,7 +199,7 @@ TEST(Layer, WarpCTCLayer) {
...
@@ -199,7 +199,7 @@ TEST(Layer, WarpCTCLayer) {
for
(
auto
batchSize
:
{
1
,
10
,
32
})
{
for
(
auto
batchSize
:
{
1
,
10
,
32
})
{
for
(
auto
normByTimes
:
{
false
,
true
})
{
for
(
auto
normByTimes
:
{
false
,
true
})
{
for
(
auto
useGpu
:
{
false
,
true
})
{
for
(
auto
useGpu
:
{
false
,
true
})
{
#ifndef PADDLE_WITH_
GPU
#ifndef PADDLE_WITH_
CUDA
if
(
useGpu
)
continue
;
if
(
useGpu
)
continue
;
#endif
#endif
LOG
(
INFO
)
<<
"layerSize="
<<
layerSize
<<
" batchSize="
<<
batchSize
LOG
(
INFO
)
<<
"layerSize="
<<
layerSize
<<
" batchSize="
<<
batchSize
...
...
paddle/math/Matrix.cpp
浏览文件 @
1172f249
...
@@ -670,7 +670,7 @@ void GpuMatrix::leftMul(Matrix& a, real scaleAB, real scaleT) {
...
@@ -670,7 +670,7 @@ void GpuMatrix::leftMul(Matrix& a, real scaleAB, real scaleT) {
}
}
void
GpuMatrix
::
selectRows
(
Matrix
&
table
,
IVector
&
ids
)
{
void
GpuMatrix
::
selectRows
(
Matrix
&
table
,
IVector
&
ids
)
{
#ifdef PADDLE_WITH_
GPU
#ifdef PADDLE_WITH_
CUDA
CHECK
(
dynamic_cast
<
GpuMatrix
*>
(
&
table
));
CHECK
(
dynamic_cast
<
GpuMatrix
*>
(
&
table
));
CHECK
(
table
.
useGpu
());
CHECK
(
table
.
useGpu
());
CHECK
(
ids
.
useGpu
());
CHECK
(
ids
.
useGpu
());
...
@@ -694,7 +694,7 @@ void GpuMatrix::selectRows(Matrix& table, IVector& ids) {
...
@@ -694,7 +694,7 @@ void GpuMatrix::selectRows(Matrix& table, IVector& ids) {
}
}
void
GpuMatrix
::
addToRows
(
Matrix
&
table
,
IVector
&
ids
)
{
void
GpuMatrix
::
addToRows
(
Matrix
&
table
,
IVector
&
ids
)
{
#ifdef PADDLE_WITH_
GPU
#ifdef PADDLE_WITH_
CUDA
CHECK
(
dynamic_cast
<
GpuMatrix
*>
(
&
table
));
CHECK
(
dynamic_cast
<
GpuMatrix
*>
(
&
table
));
CHECK
(
table
.
useGpu
());
CHECK
(
table
.
useGpu
());
CHECK
(
ids
.
useGpu
());
CHECK
(
ids
.
useGpu
());
...
@@ -741,7 +741,7 @@ void GpuMatrix::rowMax(Matrix& max) {
...
@@ -741,7 +741,7 @@ void GpuMatrix::rowMax(Matrix& max) {
}
}
void
GpuMatrix
::
rowMax
(
IVector
&
maxIds
,
Matrix
&
maxVal
)
{
void
GpuMatrix
::
rowMax
(
IVector
&
maxIds
,
Matrix
&
maxVal
)
{
#ifdef PADDLE_WITH_
GPU
#ifdef PADDLE_WITH_
CUDA
CHECK
(
maxIds
.
useGpu
()
&&
maxVal
.
useGpu
())
<<
"Matrix type are not equal"
;
CHECK
(
maxIds
.
useGpu
()
&&
maxVal
.
useGpu
())
<<
"Matrix type are not equal"
;
size_t
numSamples
=
getHeight
();
size_t
numSamples
=
getHeight
();
size_t
beam
=
maxVal
.
getWidth
();
size_t
beam
=
maxVal
.
getWidth
();
...
...
paddle/math/SparseMatrix.cpp
浏览文件 @
1172f249
...
@@ -836,7 +836,7 @@ void GpuSparseMatrix::zeroMem() {
...
@@ -836,7 +836,7 @@ void GpuSparseMatrix::zeroMem() {
}
}
void
GpuSparseMatrix
::
rowMax
(
IVector
&
maxIds
,
Matrix
&
maxVal
)
{
void
GpuSparseMatrix
::
rowMax
(
IVector
&
maxIds
,
Matrix
&
maxVal
)
{
#ifdef PADDLE_WITH_
GPU
#ifdef PADDLE_WITH_
CUDA
CHECK
(
maxIds
.
useGpu
()
&&
maxVal
.
useGpu
())
<<
"Matrix type are not equal"
;
CHECK
(
maxIds
.
useGpu
()
&&
maxVal
.
useGpu
())
<<
"Matrix type are not equal"
;
size_t
numSamples
=
getHeight
();
size_t
numSamples
=
getHeight
();
size_t
beam
=
maxVal
.
getWidth
();
size_t
beam
=
maxVal
.
getWidth
();
...
...
paddle/math/Vector.cpp
浏览文件 @
1172f249
...
@@ -172,7 +172,7 @@ void GpuVectorT<T>::isEqualTo(const VectorT<T>& b, const T& value) {
...
@@ -172,7 +172,7 @@ void GpuVectorT<T>::isEqualTo(const VectorT<T>& b, const T& value) {
template
<
class
T
>
template
<
class
T
>
void
GpuVectorT
<
T
>::
selectFrom
(
const
VectorT
<
T
>&
src
,
const
VectorT
<
int
>&
ids
)
{
void
GpuVectorT
<
T
>::
selectFrom
(
const
VectorT
<
T
>&
src
,
const
VectorT
<
int
>&
ids
)
{
#ifdef PADDLE_WITH_
GPU
#ifdef PADDLE_WITH_
CUDA
hl_vector_select_from
<
T
>
(
this
->
getData
(),
hl_vector_select_from
<
T
>
(
this
->
getData
(),
this
->
getSize
(),
this
->
getSize
(),
src
.
getData
(),
src
.
getData
(),
...
@@ -850,7 +850,7 @@ CpuGpuVectorT<T>::CpuGpuVectorT(CpuGpuVectorT<T>& src,
...
@@ -850,7 +850,7 @@ CpuGpuVectorT<T>::CpuGpuVectorT(CpuGpuVectorT<T>& src,
size_t
size
)
size_t
size
)
:
sync_
(
nullptr
)
{
:
sync_
(
nullptr
)
{
CHECK_LE
(
offset
+
size
,
static_cast
<
size_t
>
(
src
.
getSize
()));
CHECK_LE
(
offset
+
size
,
static_cast
<
size_t
>
(
src
.
getSize
()));
#ifdef PADDLE_WITH_
GPU
#ifdef PADDLE_WITH_
CUDA
SyncedFlag
*
flag
=
src
.
getSync
();
SyncedFlag
*
flag
=
src
.
getSync
();
if
(
*
flag
==
DATA_AT_CPU
)
{
if
(
*
flag
==
DATA_AT_CPU
)
{
src
.
copyToGpu
();
// will set synchronous data between CPU and GPU
src
.
copyToGpu
();
// will set synchronous data between CPU and GPU
...
@@ -861,7 +861,7 @@ CpuGpuVectorT<T>::CpuGpuVectorT(CpuGpuVectorT<T>& src,
...
@@ -861,7 +861,7 @@ CpuGpuVectorT<T>::CpuGpuVectorT(CpuGpuVectorT<T>& src,
auto
cMemHandle
=
(
src
.
getVector
(
false
))
->
getMemoryHandle
();
auto
cMemHandle
=
(
src
.
getVector
(
false
))
->
getMemoryHandle
();
cpuVectorT_
=
std
::
make_shared
<
CpuVectorT
<
T
>>
(
cpuVectorT_
=
std
::
make_shared
<
CpuVectorT
<
T
>>
(
size
,
std
::
dynamic_pointer_cast
<
CpuMemoryHandle
>
(
cMemHandle
),
offset
);
size
,
std
::
dynamic_pointer_cast
<
CpuMemoryHandle
>
(
cMemHandle
),
offset
);
#ifdef PADDLE_WITH_
GPU
#ifdef PADDLE_WITH_
CUDA
auto
gMemHandle
=
(
src
.
getVector
(
true
))
->
getMemoryHandle
();
auto
gMemHandle
=
(
src
.
getVector
(
true
))
->
getMemoryHandle
();
gpuVectorT_
=
std
::
make_shared
<
GpuVectorT
<
T
>>
(
gpuVectorT_
=
std
::
make_shared
<
GpuVectorT
<
T
>>
(
size
,
std
::
dynamic_pointer_cast
<
GpuMemoryHandle
>
(
gMemHandle
),
offset
);
size
,
std
::
dynamic_pointer_cast
<
GpuMemoryHandle
>
(
gMemHandle
),
offset
);
...
...
paddle/math/tests/test_Allocator.cpp
浏览文件 @
1172f249
...
@@ -68,7 +68,7 @@ void testPoolAllocator() {
...
@@ -68,7 +68,7 @@ void testPoolAllocator() {
TEST
(
Allocator
,
Pool
)
{
TEST
(
Allocator
,
Pool
)
{
testPoolAllocator
<
CpuAllocator
>
();
testPoolAllocator
<
CpuAllocator
>
();
#ifdef PADDLE_WITH_
GPU
#ifdef PADDLE_WITH_
CUDA
testPoolAllocator
<
GpuAllocator
>
();
testPoolAllocator
<
GpuAllocator
>
();
#endif
#endif
}
}
...
@@ -92,7 +92,7 @@ TEST(MemoryHandle, Cpu) {
...
@@ -92,7 +92,7 @@ TEST(MemoryHandle, Cpu) {
EXPECT_EQ
(
ptr1
,
ptr2
);
EXPECT_EQ
(
ptr1
,
ptr2
);
}
}
#ifdef PADDLE_WITH_
GPU
#ifdef PADDLE_WITH_
CUDA
TEST
(
MemoryHandle
,
Gpu
)
{
TEST
(
MemoryHandle
,
Gpu
)
{
int
numGpu
=
hl_get_device_count
();
int
numGpu
=
hl_get_device_count
();
...
...
paddle/math/tests/test_BaseMatrix.cpp
浏览文件 @
1172f249
...
@@ -12,7 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
...
@@ -12,7 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
See the License for the specific language governing permissions and
limitations under the License. */
limitations under the License. */
#ifdef PADDLE_WITH_
GPU
#ifdef PADDLE_WITH_
CUDA
/**
/**
* This test file use autotest::AutoCompare and cmpWithoutArg to compares the
* This test file use autotest::AutoCompare and cmpWithoutArg to compares the
* implementation of CPU and GPU member function in
* implementation of CPU and GPU member function in
...
...
paddle/math/tests/test_CpuGpuVector.cpp
浏览文件 @
1172f249
...
@@ -12,7 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
...
@@ -12,7 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
See the License for the specific language governing permissions and
limitations under the License. */
limitations under the License. */
#ifdef PADDLE_WITH_
GPU
#ifdef PADDLE_WITH_
CUDA
#include <gtest/gtest.h>
#include <gtest/gtest.h>
#include "paddle/math/Vector.h"
#include "paddle/math/Vector.h"
...
...
paddle/math/tests/test_ExecViaCpu.cpp
浏览文件 @
1172f249
...
@@ -94,7 +94,7 @@ void testWrapper(F&& f) {
...
@@ -94,7 +94,7 @@ void testWrapper(F&& f) {
}
}
}
}
#ifdef PADDLE_WITH_
GPU
#ifdef PADDLE_WITH_
CUDA
TEST
(
ExecViaCpu
,
test1
)
{
TEST
(
ExecViaCpu
,
test1
)
{
testWrapper
(
f
);
testWrapper
(
f
);
testWrapper
(
&
f
);
testWrapper
(
&
f
);
...
...
paddle/math/tests/test_GpuProfiler.cpp
浏览文件 @
1172f249
...
@@ -12,7 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
...
@@ -12,7 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
See the License for the specific language governing permissions and
limitations under the License. */
limitations under the License. */
#ifdef PADDLE_WITH_
GPU
#ifdef PADDLE_WITH_
CUDA
#include <gtest/gtest.h>
#include <gtest/gtest.h>
#include "paddle/math/Matrix.h"
#include "paddle/math/Matrix.h"
...
...
paddle/math/tests/test_Matrix.cpp
浏览文件 @
1172f249
...
@@ -12,7 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
...
@@ -12,7 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
See the License for the specific language governing permissions and
limitations under the License. */
limitations under the License. */
#ifdef PADDLE_WITH_
GPU
#ifdef PADDLE_WITH_
CUDA
/**
/**
* This test file use autotest::AutoCompare and cmpWithArg to compares the
* This test file use autotest::AutoCompare and cmpWithArg to compares the
* implementation of CPU and GPU member function in Matrix.cpp.
* implementation of CPU and GPU member function in Matrix.cpp.
...
...
paddle/math/tests/test_SparseMatrix.cpp
浏览文件 @
1172f249
...
@@ -47,7 +47,7 @@ struct MatrixPara {
...
@@ -47,7 +47,7 @@ struct MatrixPara {
SparseFormat
format
;
SparseFormat
format
;
};
};
#ifdef PADDLE_WITH_
GPU
#ifdef PADDLE_WITH_
CUDA
void
test_sparse_matrix_mul
(
MatrixPara
paraA
,
void
test_sparse_matrix_mul
(
MatrixPara
paraA
,
MatrixPara
paraB
,
MatrixPara
paraB
,
MatrixPara
paraC
)
{
MatrixPara
paraC
)
{
...
@@ -452,7 +452,7 @@ TEST(Matrix, SparseMatrixCSRFormatTrimFrom) {
...
@@ -452,7 +452,7 @@ TEST(Matrix, SparseMatrixCSRFormatTrimFrom) {
matB
->
trimFrom
(
*
mat
);
matB
->
trimFrom
(
*
mat
);
checkSMatrixEqual2
(
matA
,
matB
);
checkSMatrixEqual2
(
matA
,
matB
);
#ifdef PADDLE_WITH_
GPU
#ifdef PADDLE_WITH_
CUDA
GpuSparseMatrixPtr
matC
=
std
::
make_shared
<
GpuSparseMatrix
>
(
GpuSparseMatrixPtr
matC
=
std
::
make_shared
<
GpuSparseMatrix
>
(
height
,
trimedWidth
,
height
,
FLOAT_VALUE
,
SPARSE_CSR
,
true
);
height
,
trimedWidth
,
height
,
FLOAT_VALUE
,
SPARSE_CSR
,
true
);
matC
->
trimFrom
(
*
mat
);
matC
->
trimFrom
(
*
mat
);
...
@@ -546,7 +546,7 @@ TEST(Matrix, SparseMatrixCSCFormatTrimFrom) {
...
@@ -546,7 +546,7 @@ TEST(Matrix, SparseMatrixCSCFormatTrimFrom) {
matB
->
trimFrom
(
*
mat
);
matB
->
trimFrom
(
*
mat
);
checkSMatrixEqual2
(
matA
,
matB
);
checkSMatrixEqual2
(
matA
,
matB
);
#ifdef PADDLE_WITH_
GPU
#ifdef PADDLE_WITH_
CUDA
GpuSparseMatrixPtr
matC
=
std
::
make_shared
<
GpuSparseMatrix
>
(
GpuSparseMatrixPtr
matC
=
std
::
make_shared
<
GpuSparseMatrix
>
(
height
,
trimedWidth
,
height
,
FLOAT_VALUE
,
SPARSE_CSC
,
true
);
height
,
trimedWidth
,
height
,
FLOAT_VALUE
,
SPARSE_CSC
,
true
);
matC
->
trimFrom
(
*
mat
);
matC
->
trimFrom
(
*
mat
);
...
...
paddle/math/tests/test_TrainingAlgorithm.cpp
浏览文件 @
1172f249
...
@@ -91,7 +91,7 @@ int VectorCheckErr(const VectorPtr& vector1, const VectorPtr& vector2) {
...
@@ -91,7 +91,7 @@ int VectorCheckErr(const VectorPtr& vector1, const VectorPtr& vector2) {
typedef
std
::
function
<
void
(
size_t
size
,
bool
useGpu
)
>
testMatrixFunc
;
typedef
std
::
function
<
void
(
size_t
size
,
bool
useGpu
)
>
testMatrixFunc
;
void
testCase
(
testMatrixFunc
matrixFunc
)
{
void
testCase
(
testMatrixFunc
matrixFunc
)
{
#ifdef PADDLE_WITH_
GPU
#ifdef PADDLE_WITH_
CUDA
for
(
auto
useGpu
:
{
false
,
true
})
{
for
(
auto
useGpu
:
{
false
,
true
})
{
#else
#else
for
(
auto
useGpu
:
{
false
})
{
for
(
auto
useGpu
:
{
false
})
{
...
...
paddle/math/tests/test_batchTranspose.cpp
浏览文件 @
1172f249
...
@@ -17,7 +17,7 @@ limitations under the License. */
...
@@ -17,7 +17,7 @@ limitations under the License. */
using
namespace
paddle
;
// NOLINT
using
namespace
paddle
;
// NOLINT
#ifdef PADDLE_WITH_
GPU
#ifdef PADDLE_WITH_
CUDA
TEST
(
MatrixBatchTransTest
,
test_batch_matrix_transpose
)
{
TEST
(
MatrixBatchTransTest
,
test_batch_matrix_transpose
)
{
const
int
nx
=
100
;
const
int
nx
=
100
;
const
int
ny
=
50
;
const
int
ny
=
50
;
...
...
paddle/math/tests/test_matrixCompare.cpp
浏览文件 @
1172f249
...
@@ -12,7 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
...
@@ -12,7 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
See the License for the specific language governing permissions and
limitations under the License. */
limitations under the License. */
#ifdef PADDLE_WITH_
GPU
#ifdef PADDLE_WITH_
CUDA
/// This unittest checks GpuMatrix/CpuMatrix get same result, so disable when
/// This unittest checks GpuMatrix/CpuMatrix get same result, so disable when
/// only cpu version.
/// only cpu version.
...
...
paddle/math/tests/test_perturbation.cpp
浏览文件 @
1172f249
...
@@ -12,7 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
...
@@ -12,7 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
See the License for the specific language governing permissions and
limitations under the License. */
limitations under the License. */
#ifdef PADDLE_WITH_
GPU
#ifdef PADDLE_WITH_
CUDA
#include <cuda_runtime.h>
#include <cuda_runtime.h>
#include <gtest/gtest.h>
#include <gtest/gtest.h>
...
...
paddle/math/tests/test_sparseMatrixCompare.cpp
浏览文件 @
1172f249
...
@@ -12,7 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
...
@@ -12,7 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
See the License for the specific language governing permissions and
limitations under the License. */
limitations under the License. */
#ifdef PADDLE_WITH_
GPU
#ifdef PADDLE_WITH_
CUDA
/// This unittest checks GpuSparseMatrix/CpuSparseMatrix get same result,
/// This unittest checks GpuSparseMatrix/CpuSparseMatrix get same result,
// so disable when
// so disable when
/// only cpu version.
/// only cpu version.
...
...
paddle/memory/detail/buddy_allocator.cc
浏览文件 @
1172f249
...
@@ -175,7 +175,7 @@ void* BuddyAllocator::SystemAlloc(size_t size) {
...
@@ -175,7 +175,7 @@ void* BuddyAllocator::SystemAlloc(size_t size) {
}
}
BuddyAllocator
::
PoolSet
::
iterator
BuddyAllocator
::
RefillPool
()
{
BuddyAllocator
::
PoolSet
::
iterator
BuddyAllocator
::
RefillPool
()
{
#ifdef PADDLE_WITH_
GPU
#ifdef PADDLE_WITH_
CUDA
if
(
system_allocator_
->
UseGpu
())
{
if
(
system_allocator_
->
UseGpu
())
{
if
((
total_used_
+
total_free_
)
==
0
)
{
if
((
total_used_
+
total_free_
)
==
0
)
{
// Compute the maximum allocation size for the first allocation.
// Compute the maximum allocation size for the first allocation.
...
...
paddle/memory/detail/system_allocator.cc
浏览文件 @
1172f249
...
@@ -62,7 +62,7 @@ void CPUAllocator::Free(void* p, size_t size, size_t index) {
...
@@ -62,7 +62,7 @@ void CPUAllocator::Free(void* p, size_t size, size_t index) {
bool
CPUAllocator
::
UseGpu
()
const
{
return
false
;
}
bool
CPUAllocator
::
UseGpu
()
const
{
return
false
;
}
#ifdef PADDLE_WITH_
GPU
#ifdef PADDLE_WITH_
CUDA
void
*
GPUAllocator
::
Alloc
(
size_t
&
index
,
size_t
size
)
{
void
*
GPUAllocator
::
Alloc
(
size_t
&
index
,
size_t
size
)
{
// CUDA documentation doesn't explain if cudaMalloc returns nullptr
// CUDA documentation doesn't explain if cudaMalloc returns nullptr
...
...
paddle/memory/detail/system_allocator.h
浏览文件 @
1172f249
...
@@ -40,7 +40,7 @@ class CPUAllocator : public SystemAllocator {
...
@@ -40,7 +40,7 @@ class CPUAllocator : public SystemAllocator {
virtual
bool
UseGpu
()
const
;
virtual
bool
UseGpu
()
const
;
};
};
#ifdef PADDLE_WITH_
GPU
#ifdef PADDLE_WITH_
CUDA
class
GPUAllocator
:
public
SystemAllocator
{
class
GPUAllocator
:
public
SystemAllocator
{
public:
public:
virtual
void
*
Alloc
(
size_t
&
index
,
size_t
size
);
virtual
void
*
Alloc
(
size_t
&
index
,
size_t
size
);
...
...
paddle/memory/detail/system_allocator_test.cc
浏览文件 @
1172f249
...
@@ -56,7 +56,7 @@ TEST(CPUAllocator, LockMem) {
...
@@ -56,7 +56,7 @@ TEST(CPUAllocator, LockMem) {
TestAllocator
(
a
,
0
);
TestAllocator
(
a
,
0
);
}
}
#ifdef PADDLE_WITH_
GPU
#ifdef PADDLE_WITH_
CUDA
TEST
(
GPUAllocator
,
Alloc
)
{
TEST
(
GPUAllocator
,
Alloc
)
{
paddle
::
memory
::
detail
::
GPUAllocator
a
;
paddle
::
memory
::
detail
::
GPUAllocator
a
;
TestAllocator
(
a
,
2048
);
TestAllocator
(
a
,
2048
);
...
...
paddle/memory/memcpy.cc
浏览文件 @
1172f249
...
@@ -26,7 +26,7 @@ void Copy<platform::CPUPlace, platform::CPUPlace>(platform::CPUPlace, void* dst,
...
@@ -26,7 +26,7 @@ void Copy<platform::CPUPlace, platform::CPUPlace>(platform::CPUPlace, void* dst,
std
::
memcpy
(
dst
,
src
,
num
);
std
::
memcpy
(
dst
,
src
,
num
);
}
}
#ifdef PADDLE_WITH_
GPU
#ifdef PADDLE_WITH_
CUDA
template
<
>
template
<
>
void
Copy
<
platform
::
CPUPlace
,
platform
::
GPUPlace
>
(
platform
::
CPUPlace
dst_place
,
void
Copy
<
platform
::
CPUPlace
,
platform
::
GPUPlace
>
(
platform
::
CPUPlace
dst_place
,
void
*
dst
,
void
*
dst
,
...
...
paddle/memory/memcpy.h
浏览文件 @
1172f249
...
@@ -33,7 +33,7 @@ namespace memory {
...
@@ -33,7 +33,7 @@ namespace memory {
template
<
typename
DstPlace
,
typename
SrcPlace
>
template
<
typename
DstPlace
,
typename
SrcPlace
>
void
Copy
(
DstPlace
,
void
*
dst
,
SrcPlace
,
const
void
*
src
,
size_t
num
);
void
Copy
(
DstPlace
,
void
*
dst
,
SrcPlace
,
const
void
*
src
,
size_t
num
);
#ifdef PADDLE_WITH_
GPU
#ifdef PADDLE_WITH_
CUDA
/**
/**
* \brief Copy memory from one place to another place.
* \brief Copy memory from one place to another place.
...
...
paddle/memory/memory.cc
浏览文件 @
1172f249
...
@@ -62,7 +62,7 @@ size_t Used<platform::CPUPlace>(platform::CPUPlace place) {
...
@@ -62,7 +62,7 @@ size_t Used<platform::CPUPlace>(platform::CPUPlace place) {
return
GetCPUBuddyAllocator
()
->
Used
();
return
GetCPUBuddyAllocator
()
->
Used
();
}
}
#ifdef PADDLE_WITH_
GPU
#ifdef PADDLE_WITH_
CUDA
BuddyAllocator
*
GetGPUBuddyAllocator
(
int
gpu_id
)
{
BuddyAllocator
*
GetGPUBuddyAllocator
(
int
gpu_id
)
{
using
BuddyAllocVec
=
std
::
vector
<
BuddyAllocator
*>
;
using
BuddyAllocVec
=
std
::
vector
<
BuddyAllocator
*>
;
...
...
paddle/memory/memory_test.cc
浏览文件 @
1172f249
...
@@ -80,7 +80,7 @@ TEST(BuddyAllocator, CPUMultAlloc) {
...
@@ -80,7 +80,7 @@ TEST(BuddyAllocator, CPUMultAlloc) {
}
}
}
}
#ifdef PADDLE_WITH_
GPU
#ifdef PADDLE_WITH_
CUDA
size_t
align
(
size_t
size
,
paddle
::
platform
::
GPUPlace
place
)
{
size_t
align
(
size_t
size
,
paddle
::
platform
::
GPUPlace
place
)
{
size
+=
sizeof
(
paddle
::
memory
::
detail
::
Metadata
);
size
+=
sizeof
(
paddle
::
memory
::
detail
::
Metadata
);
...
...
paddle/operators/detail/strided_memcpy.h
浏览文件 @
1172f249
...
@@ -34,7 +34,7 @@ struct StridedMemcpyFunctor<T, 1> {
...
@@ -34,7 +34,7 @@ struct StridedMemcpyFunctor<T, 1> {
auto
&
cpu_place
=
boost
::
get
<
platform
::
CPUPlace
>
(
place
);
auto
&
cpu_place
=
boost
::
get
<
platform
::
CPUPlace
>
(
place
);
memory
::
Copy
(
cpu_place
,
dst
,
cpu_place
,
src
,
sizeof
(
T
)
*
dst_dim
.
head
);
memory
::
Copy
(
cpu_place
,
dst
,
cpu_place
,
src
,
sizeof
(
T
)
*
dst_dim
.
head
);
}
else
{
}
else
{
#ifdef PADDLE_WITH_
GPU
#ifdef PADDLE_WITH_
CUDA
auto
&
gpu_place
=
boost
::
get
<
platform
::
GPUPlace
>
(
place
);
auto
&
gpu_place
=
boost
::
get
<
platform
::
GPUPlace
>
(
place
);
auto
&
cuda_ctx
=
auto
&
cuda_ctx
=
reinterpret_cast
<
const
platform
::
CUDADeviceContext
&>
(
dev_ctx
);
reinterpret_cast
<
const
platform
::
CUDADeviceContext
&>
(
dev_ctx
);
...
...
paddle/operators/math/im2col_test.cc
浏览文件 @
1172f249
...
@@ -71,7 +71,7 @@ void testIm2col() {
...
@@ -71,7 +71,7 @@ void testIm2col() {
context
=
context
=
new
paddle
::
platform
::
CPUDeviceContext
(
paddle
::
platform
::
CPUPlace
());
new
paddle
::
platform
::
CPUDeviceContext
(
paddle
::
platform
::
CPUPlace
());
}
else
{
}
else
{
#ifdef PADDLE_WITH_
GPU
#ifdef PADDLE_WITH_
CUDA
context
=
context
=
new
paddle
::
platform
::
CUDADeviceContext
(
paddle
::
platform
::
GPUPlace
());
new
paddle
::
platform
::
CUDADeviceContext
(
paddle
::
platform
::
GPUPlace
());
#else
#else
...
@@ -116,7 +116,7 @@ void testIm2col() {
...
@@ -116,7 +116,7 @@ void testIm2col() {
TEST
(
math
,
im2col
)
{
TEST
(
math
,
im2col
)
{
testIm2col
<
paddle
::
platform
::
CPUPlace
>
();
testIm2col
<
paddle
::
platform
::
CPUPlace
>
();
#ifdef PADDLE_WITH_
GPU
#ifdef PADDLE_WITH_
CUDA
testIm2col
<
paddle
::
platform
::
GPUPlace
>
();
testIm2col
<
paddle
::
platform
::
GPUPlace
>
();
#endif
#endif
}
}
paddle/operators/math/math_function_test.cc
浏览文件 @
1172f249
#include "paddle/operators/math/math_function.h"
#include "paddle/operators/math/math_function.h"
#include "gtest/gtest.h"
#include "gtest/gtest.h"
#ifdef PADDLE_WITH_
GPU
#ifdef PADDLE_WITH_
CUDA
TEST
(
math_function
,
notrans_mul_trans
)
{
TEST
(
math_function
,
notrans_mul_trans
)
{
paddle
::
framework
::
Tensor
input1
;
paddle
::
framework
::
Tensor
input1
;
paddle
::
framework
::
Tensor
input1_gpu
;
paddle
::
framework
::
Tensor
input1_gpu
;
...
...
paddle/operators/strided_memcpy_test.cc
浏览文件 @
1172f249
...
@@ -72,7 +72,7 @@ TEST(StridedMemcpy, CPUConcat) {
...
@@ -72,7 +72,7 @@ TEST(StridedMemcpy, CPUConcat) {
}
}
}
}
#ifdef PADDLE_WITH_
GPU
#ifdef PADDLE_WITH_
CUDA
TEST
(
StridedMemcpy
,
GPUCrop
)
{
TEST
(
StridedMemcpy
,
GPUCrop
)
{
// clang-format off
// clang-format off
int
src
[]
=
{
int
src
[]
=
{
...
...
paddle/platform/device_context.cc
浏览文件 @
1172f249
...
@@ -35,7 +35,7 @@ Eigen::DefaultDevice* CPUDeviceContext::eigen_device() const {
...
@@ -35,7 +35,7 @@ Eigen::DefaultDevice* CPUDeviceContext::eigen_device() const {
Place
CPUDeviceContext
::
GetPlace
()
const
{
return
CPUPlace
();
}
Place
CPUDeviceContext
::
GetPlace
()
const
{
return
CPUPlace
();
}
#ifdef PADDLE_WITH_
GPU
#ifdef PADDLE_WITH_
CUDA
template
<
>
template
<
>
Eigen
::
GpuDevice
*
Eigen
::
GpuDevice
*
...
...
paddle/platform/device_context.h
浏览文件 @
1172f249
...
@@ -14,7 +14,7 @@ limitations under the License. */
...
@@ -14,7 +14,7 @@ limitations under the License. */
#include "paddle/platform/enforce.h"
#include "paddle/platform/enforce.h"
#include "paddle/platform/place.h"
#include "paddle/platform/place.h"
#ifdef PADDLE_WITH_
GPU
#ifdef PADDLE_WITH_
CUDA
#include "paddle/platform/dynload/cublas.h"
#include "paddle/platform/dynload/cublas.h"
#include "paddle/platform/dynload/cudnn.h"
#include "paddle/platform/dynload/cudnn.h"
#include "paddle/platform/gpu_info.h"
#include "paddle/platform/gpu_info.h"
...
@@ -61,7 +61,7 @@ class CPUDeviceContext : public DeviceContext {
...
@@ -61,7 +61,7 @@ class CPUDeviceContext : public DeviceContext {
std
::
unique_ptr
<
Eigen
::
DefaultDevice
>
eigen_device_
;
std
::
unique_ptr
<
Eigen
::
DefaultDevice
>
eigen_device_
;
};
};
#ifdef PADDLE_WITH_
GPU
#ifdef PADDLE_WITH_
CUDA
template
<
>
template
<
>
struct
EigenDeviceConverter
<
platform
::
GPUPlace
>
{
struct
EigenDeviceConverter
<
platform
::
GPUPlace
>
{
using
EigenDeviceType
=
Eigen
::
GpuDevice
;
using
EigenDeviceType
=
Eigen
::
GpuDevice
;
...
...
paddle/platform/enforce.h
浏览文件 @
1172f249
...
@@ -29,7 +29,7 @@ limitations under the License. */
...
@@ -29,7 +29,7 @@ limitations under the License. */
#include <cxxabi.h> // for __cxa_demangle
#include <cxxabi.h> // for __cxa_demangle
#endif
#endif
#ifdef PADDLE_WITH_
GPU
#ifdef PADDLE_WITH_
CUDA
#include "paddle/platform/dynload/cublas.h"
#include "paddle/platform/dynload/cublas.h"
#include "paddle/platform/dynload/cudnn.h"
#include "paddle/platform/dynload/cudnn.h"
...
@@ -113,7 +113,7 @@ inline typename std::enable_if<sizeof...(Args) != 0, void>::type throw_on_error(
...
@@ -113,7 +113,7 @@ inline typename std::enable_if<sizeof...(Args) != 0, void>::type throw_on_error(
}
}
}
}
#ifdef PADDLE_WITH_
GPU
#ifdef PADDLE_WITH_
CUDA
template
<
typename
...
Args
>
template
<
typename
...
Args
>
inline
typename
std
::
enable_if
<
sizeof
...(
Args
)
!=
0
,
void
>::
type
throw_on_error
(
inline
typename
std
::
enable_if
<
sizeof
...(
Args
)
!=
0
,
void
>::
type
throw_on_error
(
...
...
paddle/platform/enforce_test.cc
浏览文件 @
1172f249
paddle/platform/gpu_info.h
浏览文件 @
1172f249
...
@@ -14,7 +14,7 @@ limitations under the License. */
...
@@ -14,7 +14,7 @@ limitations under the License. */
#pragma once
#pragma once
#ifdef PADDLE_WITH_
GPU
#ifdef PADDLE_WITH_
CUDA
#include <cuda_runtime.h>
#include <cuda_runtime.h>
#include <stddef.h>
#include <stddef.h>
...
...
paddle/platform/variant.h
浏览文件 @
1172f249
...
@@ -16,7 +16,7 @@
...
@@ -16,7 +16,7 @@
#include <boost/config.hpp>
#include <boost/config.hpp>
#ifdef PADDLE_WITH_
GPU
#ifdef PADDLE_WITH_
CUDA
// Because boost's variadic templates has bug on nvcc, boost will disable
// Because boost's variadic templates has bug on nvcc, boost will disable
// variadic template support when GPU enabled on nvcc.
// variadic template support when GPU enabled on nvcc.
...
...
paddle/pserver/test/SocketTest.cpp
浏览文件 @
1172f249
...
@@ -215,7 +215,7 @@ int main(int argc, char** argv) {
...
@@ -215,7 +215,7 @@ int main(int argc, char** argv) {
uint64_t
dataSize
=
FLAGS_dim
*
sizeof
(
real
);
uint64_t
dataSize
=
FLAGS_dim
*
sizeof
(
real
);
#ifdef PADDLE_WITH_
GPU
#ifdef PADDLE_WITH_
CUDA
GpuVector
gpuParam
(
FLAGS_dim
);
GpuVector
gpuParam
(
FLAGS_dim
);
GpuVector
gpuGrad
(
FLAGS_dim
);
GpuVector
gpuGrad
(
FLAGS_dim
);
#else
#else
...
...
paddle/pserver/test/test_ProtoServer.cpp
浏览文件 @
1172f249
...
@@ -99,7 +99,7 @@ TEST(ProtoServer, regular) {
...
@@ -99,7 +99,7 @@ TEST(ProtoServer, regular) {
}
}
TEST
(
ProtoServer
,
extended
)
{
TEST
(
ProtoServer
,
extended
)
{
#ifdef PADDLE_WITH_
GPU
#ifdef PADDLE_WITH_
CUDA
ProtoClient
*
client
;
ProtoClient
*
client
;
if
(
FLAGS_rdma_tcp
==
"rdma"
)
if
(
FLAGS_rdma_tcp
==
"rdma"
)
client
=
new
ProtoClient
(
FLAGS_server_addr
,
FLAGS_port
,
F_RDMA
);
client
=
new
ProtoClient
(
FLAGS_server_addr
,
FLAGS_port
,
F_RDMA
);
...
...
paddle/pybind/pybind.cc
浏览文件 @
1172f249
...
@@ -34,7 +34,7 @@ static size_t UniqueIntegerGenerator() {
...
@@ -34,7 +34,7 @@ static size_t UniqueIntegerGenerator() {
}
}
bool
IsCompileGPU
()
{
bool
IsCompileGPU
()
{
#ifndef PADDLE_WITH_
GPU
#ifndef PADDLE_WITH_
CUDA
return
false
;
return
false
;
#else
#else
return
true
;
return
true
;
...
@@ -78,7 +78,7 @@ PYBIND11_PLUGIN(core) {
...
@@ -78,7 +78,7 @@ PYBIND11_PLUGIN(core) {
.
def
(
"set"
,
PyCPUTensorSetFromArray
<
float
>
)
.
def
(
"set"
,
PyCPUTensorSetFromArray
<
float
>
)
.
def
(
"set"
,
PyCPUTensorSetFromArray
<
int
>
)
.
def
(
"set"
,
PyCPUTensorSetFromArray
<
int
>
)
.
def
(
"set"
,
PyCPUTensorSetFromArray
<
double
>
)
.
def
(
"set"
,
PyCPUTensorSetFromArray
<
double
>
)
#ifdef PADDLE_WITH_
GPU
#ifdef PADDLE_WITH_
CUDA
.
def
(
"set"
,
PyCUDATensorSetFromArray
<
float
>
)
.
def
(
"set"
,
PyCUDATensorSetFromArray
<
float
>
)
.
def
(
"set"
,
PyCUDATensorSetFromArray
<
int
>
)
.
def
(
"set"
,
PyCUDATensorSetFromArray
<
int
>
)
.
def
(
"set"
,
PyCUDATensorSetFromArray
<
double
>
)
.
def
(
"set"
,
PyCUDATensorSetFromArray
<
double
>
)
...
@@ -96,7 +96,7 @@ PYBIND11_PLUGIN(core) {
...
@@ -96,7 +96,7 @@ PYBIND11_PLUGIN(core) {
.
def
(
.
def
(
"__init__"
,
"__init__"
,
[](
LoDTensor
&
instance
,
const
std
::
vector
<
std
::
vector
<
size_t
>>
&
lod
)
{
[](
LoDTensor
&
instance
,
const
std
::
vector
<
std
::
vector
<
size_t
>>
&
lod
)
{
#ifndef PADDLE_WITH_
GPU
#ifndef PADDLE_WITH_
CUDA
new
(
&
instance
)
LoDTensor
(
lod
);
new
(
&
instance
)
LoDTensor
(
lod
);
#else
#else
LoD
new_lod
;
LoD
new_lod
;
...
@@ -107,7 +107,7 @@ PYBIND11_PLUGIN(core) {
...
@@ -107,7 +107,7 @@ PYBIND11_PLUGIN(core) {
})
})
.
def
(
"set_lod"
,
.
def
(
"set_lod"
,
[](
LoDTensor
&
self
,
const
std
::
vector
<
std
::
vector
<
size_t
>>
&
lod
)
{
[](
LoDTensor
&
self
,
const
std
::
vector
<
std
::
vector
<
size_t
>>
&
lod
)
{
#ifndef PADDLE_WITH_
GPU
#ifndef PADDLE_WITH_
CUDA
self
.
set_lod
(
lod
);
self
.
set_lod
(
lod
);
#else
#else
LoD
new_lod
;
LoD
new_lod
;
...
@@ -117,7 +117,7 @@ PYBIND11_PLUGIN(core) {
...
@@ -117,7 +117,7 @@ PYBIND11_PLUGIN(core) {
#endif
#endif
})
})
.
def
(
"lod"
,
[](
LoDTensor
&
self
)
->
std
::
vector
<
std
::
vector
<
size_t
>>
{
.
def
(
"lod"
,
[](
LoDTensor
&
self
)
->
std
::
vector
<
std
::
vector
<
size_t
>>
{
#ifndef PADDLE_WITH_
GPU
#ifndef PADDLE_WITH_
CUDA
return
self
.
lod
();
return
self
.
lod
();
#else
#else
auto
lod
=
self
.
lod
();
auto
lod
=
self
.
lod
();
...
@@ -203,7 +203,7 @@ All parameter, weight, gradient are variables in Paddle.
...
@@ -203,7 +203,7 @@ All parameter, weight, gradient are variables in Paddle.
.
def_static
(
"create"
,
.
def_static
(
"create"
,
[](
paddle
::
platform
::
GPUPlace
&
place
)
[](
paddle
::
platform
::
GPUPlace
&
place
)
->
paddle
::
platform
::
DeviceContext
*
{
->
paddle
::
platform
::
DeviceContext
*
{
#ifndef PADDLE_WITH_
GPU
#ifndef PADDLE_WITH_
CUDA
PADDLE_THROW
(
"GPUPlace is not supported in CPU device."
);
PADDLE_THROW
(
"GPUPlace is not supported in CPU device."
);
#else
#else
return
new
paddle
::
platform
::
CUDADeviceContext
(
place
);
return
new
paddle
::
platform
::
CUDADeviceContext
(
place
);
...
...
paddle/pybind/tensor_py.h
浏览文件 @
1172f249
...
@@ -106,7 +106,7 @@ void PyCPUTensorSetFromArray(
...
@@ -106,7 +106,7 @@ void PyCPUTensorSetFromArray(
std
::
memcpy
(
dst
,
array
.
data
(),
sizeof
(
T
)
*
array
.
size
());
std
::
memcpy
(
dst
,
array
.
data
(),
sizeof
(
T
)
*
array
.
size
());
}
}
#ifdef PADDLE_WITH_
GPU
#ifdef PADDLE_WITH_
CUDA
template
<
typename
T
>
template
<
typename
T
>
void
PyCUDATensorSetFromArray
(
void
PyCUDATensorSetFromArray
(
framework
::
Tensor
&
self
,
framework
::
Tensor
&
self
,
...
...
paddle/string/to_string_test.cc
浏览文件 @
1172f249
paddle/trainer/MergeModel.cpp
浏览文件 @
1172f249
...
@@ -29,7 +29,7 @@ int main(int argc, char** argv) {
...
@@ -29,7 +29,7 @@ int main(int argc, char** argv) {
initMain
(
argc
,
argv
);
initMain
(
argc
,
argv
);
initPython
(
argc
,
argv
);
initPython
(
argc
,
argv
);
string
confFile
=
TrainerConfigHelper
::
getConfigNameFromPath
(
FLAGS_model_dir
);
string
confFile
=
TrainerConfigHelper
::
getConfigNameFromPath
(
FLAGS_model_dir
);
#ifndef PADDLE_WITH_
GPU
#ifndef PADDLE_WITH_
CUDA
FLAGS_use_gpu
=
false
;
FLAGS_use_gpu
=
false
;
#endif
#endif
auto
config
=
std
::
make_shared
<
TrainerConfigHelper
>
(
confFile
);
auto
config
=
std
::
make_shared
<
TrainerConfigHelper
>
(
confFile
);
...
...
paddle/trainer/tests/test_Compare.cpp
浏览文件 @
1172f249
...
@@ -146,7 +146,7 @@ void compareGradient(comData& comDataCpu, comData& comDataGpu) {
...
@@ -146,7 +146,7 @@ void compareGradient(comData& comDataCpu, comData& comDataGpu) {
}
}
int
main
(
int
argc
,
char
**
argv
)
{
int
main
(
int
argc
,
char
**
argv
)
{
#ifndef PADDLE_WITH_
GPU
#ifndef PADDLE_WITH_
CUDA
exit
(
0
);
exit
(
0
);
#endif
#endif
paddle
::
initMain
(
argc
,
argv
);
paddle
::
initMain
(
argc
,
argv
);
...
...
paddle/trainer/tests/test_CompareSparse.cpp
浏览文件 @
1172f249
...
@@ -174,7 +174,7 @@ TEST(compareSparse, multiGradientMachine) {
...
@@ -174,7 +174,7 @@ TEST(compareSparse, multiGradientMachine) {
FLAGS_local
=
local
;
FLAGS_local
=
local
;
FLAGS_ports_num_for_sparse
=
5
;
FLAGS_ports_num_for_sparse
=
5
;
for
(
bool
useGpu
:
{
false
,
true
})
{
for
(
bool
useGpu
:
{
false
,
true
})
{
#ifndef PADDLE_WITH_
GPU
#ifndef PADDLE_WITH_
CUDA
if
(
useGpu
)
continue
;
if
(
useGpu
)
continue
;
#endif
#endif
FLAGS_parallel_nn
=
useGpu
;
FLAGS_parallel_nn
=
useGpu
;
...
@@ -198,7 +198,7 @@ TEST(compareSparse, NeuralNetwork) {
...
@@ -198,7 +198,7 @@ TEST(compareSparse, NeuralNetwork) {
FLAGS_local
=
local
;
FLAGS_local
=
local
;
FLAGS_ports_num_for_sparse
=
5
;
FLAGS_ports_num_for_sparse
=
5
;
for
(
bool
useGpu
:
{
false
,
true
})
{
for
(
bool
useGpu
:
{
false
,
true
})
{
#ifndef PADDLE_WITH_
GPU
#ifndef PADDLE_WITH_
CUDA
if
(
useGpu
)
continue
;
if
(
useGpu
)
continue
;
#endif
#endif
FLAGS_parallel_nn
=
useGpu
;
FLAGS_parallel_nn
=
useGpu
;
...
...
paddle/trainer/tests/test_Trainer.cpp
浏览文件 @
1172f249
...
@@ -51,7 +51,7 @@ void checkGradientTest(const string& configFile,
...
@@ -51,7 +51,7 @@ void checkGradientTest(const string& configFile,
TEST
(
checkGradient
,
cpu
)
{
checkGradientTest
(
configFile1
,
false
,
false
);
}
TEST
(
checkGradient
,
cpu
)
{
checkGradientTest
(
configFile1
,
false
,
false
);
}
#ifdef PADDLE_WITH_
GPU
#ifdef PADDLE_WITH_
CUDA
TEST
(
checkGradient
,
gpu
)
{
checkGradientTest
(
configFile1
,
true
,
false
);
}
TEST
(
checkGradient
,
gpu
)
{
checkGradientTest
(
configFile1
,
true
,
false
);
}
TEST
(
checkGradient
,
multiGpu
)
{
TEST
(
checkGradient
,
multiGpu
)
{
...
@@ -97,7 +97,7 @@ TEST(checkGradient, hsigmoid) { checkGradientTest(configFile2, false, false); }
...
@@ -97,7 +97,7 @@ TEST(checkGradient, hsigmoid) { checkGradientTest(configFile2, false, false); }
TEST
(
checkGradient
,
chunk
)
{
TEST
(
checkGradient
,
chunk
)
{
checkGradientTest
(
configFile3
,
false
,
false
);
checkGradientTest
(
configFile3
,
false
,
false
);
#ifdef PADDLE_WITH_
GPU
#ifdef PADDLE_WITH_
CUDA
checkGradientTest
(
configFile3
,
true
,
true
);
checkGradientTest
(
configFile3
,
true
,
true
);
#endif
#endif
}
}
...
...
paddle/trainer/tests/test_TrainerOnePass.cpp
浏览文件 @
1172f249
...
@@ -79,7 +79,7 @@ void trainerOnePassTest(const string& configFile,
...
@@ -79,7 +79,7 @@ void trainerOnePassTest(const string& configFile,
// 1. test trainer (cpu, gpu).
// 1. test trainer (cpu, gpu).
TEST
(
trainerOnePass
,
cpu
)
{
trainerOnePassTest
(
configFile1
,
false
,
false
);
}
TEST
(
trainerOnePass
,
cpu
)
{
trainerOnePassTest
(
configFile1
,
false
,
false
);
}
#ifdef PADDLE_WITH_
GPU
#ifdef PADDLE_WITH_
CUDA
TEST
(
trainerOnePass
,
gpu
)
{
trainerOnePassTest
(
configFile1
,
true
,
false
);
}
TEST
(
trainerOnePass
,
gpu
)
{
trainerOnePassTest
(
configFile1
,
true
,
false
);
}
TEST
(
trainerOnePass
,
gpu2
)
{
trainerOnePassTest
(
configFile1
,
true
,
false
,
2
);
}
TEST
(
trainerOnePass
,
gpu2
)
{
trainerOnePassTest
(
configFile1
,
true
,
false
,
2
);
}
...
@@ -94,7 +94,7 @@ TEST(trainerOnePass, parallel) {
...
@@ -94,7 +94,7 @@ TEST(trainerOnePass, parallel) {
#endif
#endif
// 2. test average_window.
// 2. test average_window.
#ifdef PADDLE_WITH_
GPU
#ifdef PADDLE_WITH_
CUDA
TEST
(
average_window
,
gpu
)
{
TEST
(
average_window
,
gpu
)
{
trainerOnePassTest
(
configFile1
,
true
,
false
,
4
,
0.01
);
trainerOnePassTest
(
configFile1
,
true
,
false
,
4
,
0.01
);
}
}
...
@@ -266,7 +266,7 @@ TEST(checkRemoteUpdater, cpuTrainerOldUpdater) {
...
@@ -266,7 +266,7 @@ TEST(checkRemoteUpdater, cpuTrainerOldUpdater) {
checkRemoteParameterUpdaterTest
(
configFile1
,
false
,
false
,
1
,
true
);
checkRemoteParameterUpdaterTest
(
configFile1
,
false
,
false
,
1
,
true
);
}
}
#ifdef PADDLE_WITH_
GPU
#ifdef PADDLE_WITH_
CUDA
TEST
(
checkRemoteUpdater
,
gpuTrainer
)
{
TEST
(
checkRemoteUpdater
,
gpuTrainer
)
{
checkRemoteParameterUpdaterTest
(
configFile1
,
true
,
false
);
checkRemoteParameterUpdaterTest
(
configFile1
,
true
,
false
);
}
}
...
...
paddle/trainer/tests/test_recurrent_machine_generation.cpp
浏览文件 @
1172f249
...
@@ -113,7 +113,7 @@ void testGeneration(const string& configFile,
...
@@ -113,7 +113,7 @@ void testGeneration(const string& configFile,
#ifndef PADDLE_TYPE_DOUBLE
#ifndef PADDLE_TYPE_DOUBLE
TEST
(
RecurrentGradientMachine
,
test_generation
)
{
TEST
(
RecurrentGradientMachine
,
test_generation
)
{
#ifndef PADDLE_WITH_
GPU
#ifndef PADDLE_WITH_
CUDA
const
auto
useGpuConfs
=
{
false
};
const
auto
useGpuConfs
=
{
false
};
#else
#else
const
auto
useGpuConfs
=
{
true
,
false
};
const
auto
useGpuConfs
=
{
true
,
false
};
...
...
paddle/utils/Flags.cpp
浏览文件 @
1172f249
...
@@ -14,7 +14,7 @@ limitations under the License. */
...
@@ -14,7 +14,7 @@ limitations under the License. */
#include "Flags.h"
#include "Flags.h"
#ifndef PADDLE_WITH_
GPU
#ifndef PADDLE_WITH_
CUDA
DEFINE_bool
(
use_gpu
,
false
,
"Only support CPU training"
);
DEFINE_bool
(
use_gpu
,
false
,
"Only support CPU training"
);
#else
#else
DEFINE_bool
(
use_gpu
,
true
,
"Whether to use GPU for training"
);
DEFINE_bool
(
use_gpu
,
true
,
"Whether to use GPU for training"
);
...
...
paddle/utils/Util.h
浏览文件 @
1172f249
...
@@ -218,7 +218,7 @@ protected:
...
@@ -218,7 +218,7 @@ protected:
* *d2* is peer device to enable direct access to by the d1 device.
* *d2* is peer device to enable direct access to by the d1 device.
*/
*/
inline
void
enablePeerAccess
(
int
d1
,
int
d2
)
{
inline
void
enablePeerAccess
(
int
d1
,
int
d2
)
{
#ifdef PADDLE_WITH_
GPU
#ifdef PADDLE_WITH_
CUDA
if
(
hl_device_can_access_peer
(
d1
,
d2
))
{
if
(
hl_device_can_access_peer
(
d1
,
d2
))
{
SetDevice
dev
(
d1
);
SetDevice
dev
(
d1
);
hl_device_enable_peer_access
(
d2
);
hl_device_enable_peer_access
(
d2
);
...
...
paddle/utils/Version.h
浏览文件 @
1172f249
...
@@ -48,7 +48,7 @@ void printVersion(std::ostream& os);
...
@@ -48,7 +48,7 @@ void printVersion(std::ostream& os);
* @return return true if paddle compiled with GPU
* @return return true if paddle compiled with GPU
*/
*/
constexpr
bool
isWithGpu
()
{
constexpr
bool
isWithGpu
()
{
#ifndef PADDLE_WITH_
GPU
#ifndef PADDLE_WITH_
CUDA
return
false
;
return
false
;
#else
#else
return
true
;
return
true
;
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录