Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
Paddle
提交
1172f249
P
Paddle
项目概览
PaddlePaddle
/
Paddle
大约 1 年 前同步成功
通知
2299
Star
20931
Fork
5422
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1423
列表
看板
标记
里程碑
合并请求
543
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1,423
Issue
1,423
列表
看板
标记
里程碑
合并请求
543
合并请求
543
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
1172f249
编写于
10月 04, 2017
作者:
Y
Yi Wang
提交者:
GitHub
10月 04, 2017
浏览文件
操作
浏览文件
下载
差异文件
Merge pull request #4590 from wangkuiyi/paddle_only_cpu
Add -D PADDLE_WITH_CUDA in cmake/configure.cmake
上级
c0511c8a
880b874b
变更
89
隐藏空白更改
内联
并排
Showing
89 changed file
with
136 addition
and
135 deletion
+136
-135
cmake/configure.cmake
cmake/configure.cmake
+2
-1
paddle/api/Util.cpp
paddle/api/Util.cpp
+1
-1
paddle/capi/Matrix.cpp
paddle/capi/Matrix.cpp
+1
-1
paddle/framework/grad_op_builder_test.cc
paddle/framework/grad_op_builder_test.cc
+1
-1
paddle/framework/lod_tensor.h
paddle/framework/lod_tensor.h
+2
-2
paddle/framework/op_proto_maker_test.cc
paddle/framework/op_proto_maker_test.cc
+1
-1
paddle/framework/op_registry.h
paddle/framework/op_registry.h
+1
-1
paddle/framework/op_registry_test.cc
paddle/framework/op_registry_test.cc
+1
-1
paddle/framework/operator.cc
paddle/framework/operator.cc
+1
-1
paddle/framework/tensor_impl.h
paddle/framework/tensor_impl.h
+2
-2
paddle/framework/tensor_test.cc
paddle/framework/tensor_test.cc
+4
-4
paddle/function/BlockExpandOp.cpp
paddle/function/BlockExpandOp.cpp
+1
-1
paddle/function/ContextProjectionOp.cpp
paddle/function/ContextProjectionOp.cpp
+1
-1
paddle/function/CosSimOp.cpp
paddle/function/CosSimOp.cpp
+1
-1
paddle/function/CropOp.cpp
paddle/function/CropOp.cpp
+1
-1
paddle/function/CrossMapNormalOp.cpp
paddle/function/CrossMapNormalOp.cpp
+1
-1
paddle/function/DepthwiseConvOp.cpp
paddle/function/DepthwiseConvOp.cpp
+1
-1
paddle/function/DepthwiseConvOpTest.cpp
paddle/function/DepthwiseConvOpTest.cpp
+1
-1
paddle/function/GemmConvOp.cpp
paddle/function/GemmConvOp.cpp
+1
-1
paddle/function/GemmConvOpTest.cpp
paddle/function/GemmConvOpTest.cpp
+1
-1
paddle/function/Im2ColTest.cpp
paddle/function/Im2ColTest.cpp
+1
-1
paddle/function/MulOp.cpp
paddle/function/MulOp.cpp
+1
-1
paddle/function/PadOp.cpp
paddle/function/PadOp.cpp
+1
-1
paddle/function/RowConvOp.cpp
paddle/function/RowConvOp.cpp
+1
-1
paddle/function/SwitchOp.cpp
paddle/function/SwitchOp.cpp
+1
-1
paddle/gserver/layers/BatchNormBaseLayer.cpp
paddle/gserver/layers/BatchNormBaseLayer.cpp
+1
-1
paddle/gserver/layers/BatchNormalizationLayer.cpp
paddle/gserver/layers/BatchNormalizationLayer.cpp
+3
-3
paddle/gserver/layers/PoolLayer.cpp
paddle/gserver/layers/PoolLayer.cpp
+2
-2
paddle/gserver/tests/LayerGradUtil.cpp
paddle/gserver/tests/LayerGradUtil.cpp
+1
-1
paddle/gserver/tests/test_BatchNorm.cpp
paddle/gserver/tests/test_BatchNorm.cpp
+1
-1
paddle/gserver/tests/test_ConvUnify.cpp
paddle/gserver/tests/test_ConvUnify.cpp
+1
-1
paddle/gserver/tests/test_DetectionOutput.cpp
paddle/gserver/tests/test_DetectionOutput.cpp
+1
-1
paddle/gserver/tests/test_Evaluator.cpp
paddle/gserver/tests/test_Evaluator.cpp
+1
-1
paddle/gserver/tests/test_KmaxSeqScore.cpp
paddle/gserver/tests/test_KmaxSeqScore.cpp
+1
-1
paddle/gserver/tests/test_LayerGrad.cpp
paddle/gserver/tests/test_LayerGrad.cpp
+13
-13
paddle/gserver/tests/test_NetworkCompare.cpp
paddle/gserver/tests/test_NetworkCompare.cpp
+1
-1
paddle/gserver/tests/test_PriorBox.cpp
paddle/gserver/tests/test_PriorBox.cpp
+1
-1
paddle/gserver/tests/test_ProtoDataProvider.cpp
paddle/gserver/tests/test_ProtoDataProvider.cpp
+3
-3
paddle/gserver/tests/test_PyDataProvider.cpp
paddle/gserver/tests/test_PyDataProvider.cpp
+2
-2
paddle/gserver/tests/test_SelectiveFCLayer.cpp
paddle/gserver/tests/test_SelectiveFCLayer.cpp
+4
-4
paddle/gserver/tests/test_SeqSliceLayerGrad.cpp
paddle/gserver/tests/test_SeqSliceLayerGrad.cpp
+1
-1
paddle/gserver/tests/test_WarpCTCLayer.cpp
paddle/gserver/tests/test_WarpCTCLayer.cpp
+1
-1
paddle/math/Matrix.cpp
paddle/math/Matrix.cpp
+3
-3
paddle/math/SparseMatrix.cpp
paddle/math/SparseMatrix.cpp
+1
-1
paddle/math/Vector.cpp
paddle/math/Vector.cpp
+3
-3
paddle/math/tests/test_Allocator.cpp
paddle/math/tests/test_Allocator.cpp
+2
-2
paddle/math/tests/test_BaseMatrix.cpp
paddle/math/tests/test_BaseMatrix.cpp
+1
-1
paddle/math/tests/test_CpuGpuVector.cpp
paddle/math/tests/test_CpuGpuVector.cpp
+1
-1
paddle/math/tests/test_ExecViaCpu.cpp
paddle/math/tests/test_ExecViaCpu.cpp
+1
-1
paddle/math/tests/test_GpuProfiler.cpp
paddle/math/tests/test_GpuProfiler.cpp
+1
-1
paddle/math/tests/test_Matrix.cpp
paddle/math/tests/test_Matrix.cpp
+1
-1
paddle/math/tests/test_SparseMatrix.cpp
paddle/math/tests/test_SparseMatrix.cpp
+3
-3
paddle/math/tests/test_TrainingAlgorithm.cpp
paddle/math/tests/test_TrainingAlgorithm.cpp
+1
-1
paddle/math/tests/test_batchTranspose.cpp
paddle/math/tests/test_batchTranspose.cpp
+1
-1
paddle/math/tests/test_matrixCompare.cpp
paddle/math/tests/test_matrixCompare.cpp
+1
-1
paddle/math/tests/test_perturbation.cpp
paddle/math/tests/test_perturbation.cpp
+1
-1
paddle/math/tests/test_sparseMatrixCompare.cpp
paddle/math/tests/test_sparseMatrixCompare.cpp
+1
-1
paddle/memory/detail/buddy_allocator.cc
paddle/memory/detail/buddy_allocator.cc
+1
-1
paddle/memory/detail/system_allocator.cc
paddle/memory/detail/system_allocator.cc
+1
-1
paddle/memory/detail/system_allocator.h
paddle/memory/detail/system_allocator.h
+1
-1
paddle/memory/detail/system_allocator_test.cc
paddle/memory/detail/system_allocator_test.cc
+1
-1
paddle/memory/memcpy.cc
paddle/memory/memcpy.cc
+1
-1
paddle/memory/memcpy.h
paddle/memory/memcpy.h
+1
-1
paddle/memory/memory.cc
paddle/memory/memory.cc
+1
-1
paddle/memory/memory_test.cc
paddle/memory/memory_test.cc
+1
-1
paddle/operators/detail/strided_memcpy.h
paddle/operators/detail/strided_memcpy.h
+1
-1
paddle/operators/math/im2col_test.cc
paddle/operators/math/im2col_test.cc
+2
-2
paddle/operators/math/math_function_test.cc
paddle/operators/math/math_function_test.cc
+1
-1
paddle/operators/strided_memcpy_test.cc
paddle/operators/strided_memcpy_test.cc
+2
-2
paddle/platform/device_context.cc
paddle/platform/device_context.cc
+1
-1
paddle/platform/device_context.h
paddle/platform/device_context.h
+2
-2
paddle/platform/enforce.h
paddle/platform/enforce.h
+2
-2
paddle/platform/enforce_test.cc
paddle/platform/enforce_test.cc
+1
-1
paddle/platform/gpu_info.h
paddle/platform/gpu_info.h
+1
-1
paddle/platform/variant.h
paddle/platform/variant.h
+1
-1
paddle/pserver/test/SocketTest.cpp
paddle/pserver/test/SocketTest.cpp
+1
-1
paddle/pserver/test/test_ProtoServer.cpp
paddle/pserver/test/test_ProtoServer.cpp
+1
-1
paddle/pybind/pybind.cc
paddle/pybind/pybind.cc
+6
-6
paddle/pybind/tensor_py.h
paddle/pybind/tensor_py.h
+1
-1
paddle/string/to_string_test.cc
paddle/string/to_string_test.cc
+1
-1
paddle/trainer/MergeModel.cpp
paddle/trainer/MergeModel.cpp
+1
-1
paddle/trainer/tests/test_Compare.cpp
paddle/trainer/tests/test_Compare.cpp
+1
-1
paddle/trainer/tests/test_CompareSparse.cpp
paddle/trainer/tests/test_CompareSparse.cpp
+2
-2
paddle/trainer/tests/test_Trainer.cpp
paddle/trainer/tests/test_Trainer.cpp
+2
-2
paddle/trainer/tests/test_TrainerOnePass.cpp
paddle/trainer/tests/test_TrainerOnePass.cpp
+3
-3
paddle/trainer/tests/test_recurrent_machine_generation.cpp
paddle/trainer/tests/test_recurrent_machine_generation.cpp
+1
-1
paddle/utils/Flags.cpp
paddle/utils/Flags.cpp
+1
-1
paddle/utils/Util.h
paddle/utils/Util.h
+1
-1
paddle/utils/Version.h
paddle/utils/Version.h
+1
-1
未找到文件。
cmake/configure.cmake
浏览文件 @
1172f249
...
...
@@ -53,7 +53,8 @@ if(NOT WITH_GPU)
list
(
APPEND CMAKE_CXX_SOURCE_FILE_EXTENSIONS cu
)
else
()
add_definitions
(
-DPADDLE_WITH_GPU
)
add_definitions
(
-DPADDLE_WITH_CUDA
)
FIND_PACKAGE
(
CUDA REQUIRED
)
if
(
${
CUDA_VERSION_MAJOR
}
VERSION_LESS 7
)
...
...
paddle/api/Util.cpp
浏览文件 @
1172f249
...
...
@@ -47,7 +47,7 @@ bool isUsingGpu() { return FLAGS_use_gpu; }
void
setUseGpu
(
bool
useGpu
)
{
FLAGS_use_gpu
=
useGpu
;
}
bool
isGpuVersion
()
{
#ifndef PADDLE_WITH_
GPU
#ifndef PADDLE_WITH_
CUDA
return
false
;
#else
return
true
;
...
...
paddle/capi/Matrix.cpp
浏览文件 @
1172f249
...
...
@@ -46,7 +46,7 @@ paddle_error paddle_matrix_set_row(paddle_matrix mat,
if
(
rowID
>=
ptr
->
mat
->
getHeight
())
return
kPD_OUT_OF_RANGE
;
paddle
::
real
*
buf
=
ptr
->
mat
->
getRowBuf
(
rowID
);
size_t
width
=
ptr
->
mat
->
getWidth
();
#ifdef PADDLE_WITH_
GPU
#ifdef PADDLE_WITH_
CUDA
hl_memcpy
(
buf
,
rowArray
,
sizeof
(
paddle
::
real
)
*
width
);
#else
std
::
copy
(
rowArray
,
rowArray
+
width
,
buf
);
...
...
paddle/framework/grad_op_builder_test.cc
浏览文件 @
1172f249
...
...
@@ -183,4 +183,4 @@ TEST(GradOpDescBuilder, IOIgnoredInGradient) {
{
f
::
GradVarName
(
"in3_1"
),
f
::
GradVarName
(
"in3_2"
)}));
delete
forw_op
;
delete
grad_op
;
}
\ No newline at end of file
}
paddle/framework/lod_tensor.h
浏览文件 @
1172f249
...
...
@@ -15,7 +15,7 @@
#pragma once
#include <memory>
#ifdef PADDLE_WITH_
GPU
#ifdef PADDLE_WITH_
CUDA
#include <thrust/device_vector.h>
#include <thrust/host_vector.h>
#include <thrust/system/cuda/experimental/pinned_allocator.h>
...
...
@@ -29,7 +29,7 @@
namespace
paddle
{
namespace
framework
{
#ifndef PADDLE_WITH_
GPU
#ifndef PADDLE_WITH_
CUDA
template
<
typename
T
>
using
Vector
=
std
::
vector
<
T
>
;
#else
...
...
paddle/framework/op_proto_maker_test.cc
浏览文件 @
1172f249
...
...
@@ -48,4 +48,4 @@ TEST(ProtoMaker, DuplicatedInOut) {
paddle
::
framework
::
OpAttrChecker
op_checker
;
auto
proto_maker
=
TestInOutProtoMaker
(
&
op_proto
,
&
op_checker
);
ASSERT_THROW
(
proto_maker
.
Validate
(),
paddle
::
platform
::
EnforceNotMet
);
}
\ No newline at end of file
}
paddle/framework/op_registry.h
浏览文件 @
1172f249
...
...
@@ -211,7 +211,7 @@ class OpKernelRegistrar : public Registrar {
// TODO(fengjiayi): The following macros
// seems ugly, do we have better method?
#ifndef PADDLE_WITH_
GPU
#ifndef PADDLE_WITH_
CUDA
#define USE_OP_KERNEL(op_type) USE_OP_DEVICE_KERNEL(op_type, CPU)
#else
#define USE_OP_KERNEL(op_type) \
...
...
paddle/framework/op_registry_test.cc
浏览文件 @
1172f249
...
...
@@ -183,4 +183,4 @@ class CosineOpComplete : public paddle::framework::CosineOp {
TEST
(
OperatorRegistrar
,
Test
)
{
using
namespace
paddle
::
framework
;
OperatorRegistrar
<
CosineOpComplete
,
CosineOpProtoAndCheckerMaker
>
reg
(
"cos"
);
}
\ No newline at end of file
}
paddle/framework/operator.cc
浏览文件 @
1172f249
...
...
@@ -25,7 +25,7 @@ Eigen::DefaultDevice& ExecutionContext::GetEigenDevice<
return
*
device_context_
.
GetEigenDevice
<
platform
::
CPUPlace
>
();
}
#ifdef PADDLE_WITH_
GPU
#ifdef PADDLE_WITH_
CUDA
template
<
>
Eigen
::
GpuDevice
&
ExecutionContext
::
GetEigenDevice
<
platform
::
GPUPlace
,
Eigen
::
GpuDevice
>
()
const
{
...
...
paddle/framework/tensor_impl.h
浏览文件 @
1172f249
...
...
@@ -65,7 +65,7 @@ inline T* Tensor::mutable_data(platform::Place place) {
holder_
.
reset
(
new
PlaceholderImpl
<
T
,
platform
::
CPUPlace
>
(
boost
::
get
<
platform
::
CPUPlace
>
(
place
),
size
));
}
else
if
(
platform
::
is_gpu_place
(
place
))
{
#ifndef PADDLE_WITH_
GPU
#ifndef PADDLE_WITH_
CUDA
PADDLE_THROW
(
"'GPUPlace' is not supported in CPU only device."
);
}
#else
...
...
@@ -103,7 +103,7 @@ inline void Tensor::CopyFrom(const Tensor& src,
memory
::
Copy
(
boost
::
get
<
platform
::
CPUPlace
>
(
dst_place
),
dst_ptr
,
boost
::
get
<
platform
::
CPUPlace
>
(
src_place
),
src_ptr
,
size
);
}
#ifdef PADDLE_WITH_
GPU
#ifdef PADDLE_WITH_
CUDA
else
if
(
platform
::
is_gpu_place
(
src_place
)
&&
platform
::
is_cpu_place
(
dst_place
))
{
memory
::
Copy
(
boost
::
get
<
platform
::
CPUPlace
>
(
dst_place
),
dst_ptr
,
...
...
paddle/framework/tensor_test.cc
浏览文件 @
1172f249
...
...
@@ -74,7 +74,7 @@ TEST(Tensor, MutableData) {
EXPECT_EQ
(
p1
,
p2
);
}
#ifdef PADDLE_WITH_
GPU
#ifdef PADDLE_WITH_
CUDA
{
Tensor
src_tensor
;
float
*
p1
=
nullptr
;
...
...
@@ -126,7 +126,7 @@ TEST(Tensor, ShareDataWith) {
ASSERT_EQ
(
src_tensor
.
data
<
int
>
(),
dst_tensor
.
data
<
int
>
());
}
#ifdef PADDLE_WITH_
GPU
#ifdef PADDLE_WITH_
CUDA
{
Tensor
src_tensor
;
Tensor
dst_tensor
;
...
...
@@ -163,7 +163,7 @@ TEST(Tensor, Slice) {
EXPECT_EQ
(
src_data_address
+
3
*
4
*
1
*
sizeof
(
int
),
slice_data_address
);
}
#ifdef PADDLE_WITH_
GPU
#ifdef PADDLE_WITH_
CUDA
{
Tensor
src_tensor
;
src_tensor
.
mutable_data
<
double
>
(
make_ddim
({
6
,
9
}),
GPUPlace
());
...
...
@@ -218,7 +218,7 @@ TEST(Tensor, CopyFrom) {
EXPECT_EQ
(
dst_ptr
[
i
],
slice_ptr
[
i
]);
}
}
#ifdef PADDLE_WITH_
GPU
#ifdef PADDLE_WITH_
CUDA
{
Tensor
src_tensor
;
Tensor
gpu_tensor
;
...
...
paddle/function/BlockExpandOp.cpp
浏览文件 @
1172f249
...
...
@@ -194,7 +194,7 @@ public:
REGISTER_TYPED_FUNC
(
BlockExpand
,
CPU
,
BlockExpandForward
);
REGISTER_TYPED_FUNC
(
BlockExpandGrad
,
CPU
,
BlockExpandBackward
);
#ifdef PADDLE_WITH_
GPU
#ifdef PADDLE_WITH_
CUDA
REGISTER_TYPED_FUNC
(
BlockExpand
,
GPU
,
BlockExpandForward
);
REGISTER_TYPED_FUNC
(
BlockExpandGrad
,
GPU
,
BlockExpandBackward
);
#endif
...
...
paddle/function/ContextProjectionOp.cpp
浏览文件 @
1172f249
...
...
@@ -395,7 +395,7 @@ REGISTER_TYPED_FUNC(ContextProjectionForward,
REGISTER_TYPED_FUNC
(
ContextProjectionBackward
,
CPU
,
ContextProjectionBackwardFunc
);
#ifdef PADDLE_WITH_
GPU
#ifdef PADDLE_WITH_
CUDA
REGISTER_TYPED_FUNC
(
ContextProjectionForward
,
GPU
,
ContextProjectionForwardFunc
);
...
...
paddle/function/CosSimOp.cpp
浏览文件 @
1172f249
...
...
@@ -233,7 +233,7 @@ private:
REGISTER_TYPED_FUNC
(
CosSimForward
,
CPU
,
CosSimForwardFunc
);
REGISTER_TYPED_FUNC
(
CosSimBackward
,
CPU
,
CosSimBackwardFunc
);
#ifdef PADDLE_WITH_
GPU
#ifdef PADDLE_WITH_
CUDA
REGISTER_TYPED_FUNC
(
CosSimForward
,
GPU
,
CosSimForwardFunc
);
REGISTER_TYPED_FUNC
(
CosSimBackward
,
GPU
,
CosSimBackwardFunc
);
#endif
...
...
paddle/function/CropOp.cpp
浏览文件 @
1172f249
...
...
@@ -169,7 +169,7 @@ private:
REGISTER_TYPED_FUNC
(
Crop
,
CPU
,
CropFunc
);
REGISTER_TYPED_FUNC
(
CropGrad
,
CPU
,
CropGradFunc
);
#ifdef PADDLE_WITH_
GPU
#ifdef PADDLE_WITH_
CUDA
REGISTER_TYPED_FUNC
(
Crop
,
GPU
,
CropFunc
);
REGISTER_TYPED_FUNC
(
CropGrad
,
GPU
,
CropGradFunc
);
#endif
...
...
paddle/function/CrossMapNormalOp.cpp
浏览文件 @
1172f249
...
...
@@ -336,7 +336,7 @@ private:
REGISTER_TYPED_FUNC
(
CrossMapNormal
,
CPU
,
CrossMapNormalFunc
);
REGISTER_TYPED_FUNC
(
CrossMapNormalGrad
,
CPU
,
CrossMapNormalGradFunc
);
#ifdef PADDLE_WITH_
GPU
#ifdef PADDLE_WITH_
CUDA
REGISTER_TYPED_FUNC
(
CrossMapNormal
,
GPU
,
CrossMapNormalFunc
);
REGISTER_TYPED_FUNC
(
CrossMapNormalGrad
,
GPU
,
CrossMapNormalGradFunc
);
#endif
...
...
paddle/function/DepthwiseConvOp.cpp
浏览文件 @
1172f249
...
...
@@ -292,7 +292,7 @@ REGISTER_TYPED_FUNC(DepthwiseConvGradInput,
REGISTER_TYPED_FUNC
(
DepthwiseConvGradFilter
,
CPU
,
DepthwiseConvGradFilterFunction
);
#ifdef PADDLE_WITH_
GPU
#ifdef PADDLE_WITH_
CUDA
REGISTER_TYPED_FUNC
(
DepthwiseConv
,
GPU
,
DepthwiseConvFunction
);
REGISTER_TYPED_FUNC
(
DepthwiseConvGradInput
,
GPU
,
...
...
paddle/function/DepthwiseConvOpTest.cpp
浏览文件 @
1172f249
...
...
@@ -17,7 +17,7 @@ limitations under the License. */
namespace
paddle
{
#ifdef PADDLE_WITH_
GPU
#ifdef PADDLE_WITH_
CUDA
TEST
(
DepthwiseConv
,
Forward
)
{
DepthwiseConvolution
<
DEVICE_TYPE_CPU
,
DEVICE_TYPE_GPU
>
(
"GemmConv-CPU"
,
"DepthwiseConv-GPU"
,
forward
);
...
...
paddle/function/GemmConvOp.cpp
浏览文件 @
1172f249
...
...
@@ -340,7 +340,7 @@ public:
REGISTER_TYPED_FUNC
(
GemmConv
,
CPU
,
GemmConvFunction
);
REGISTER_TYPED_FUNC
(
GemmConvGradInput
,
CPU
,
GemmConvGradInputFunction
);
REGISTER_TYPED_FUNC
(
GemmConvGradFilter
,
CPU
,
GemmConvGradFilterFunction
);
#ifdef PADDLE_WITH_
GPU
#ifdef PADDLE_WITH_
CUDA
REGISTER_TYPED_FUNC
(
GemmConv
,
GPU
,
GemmConvFunction
);
REGISTER_TYPED_FUNC
(
GemmConvGradInput
,
GPU
,
GemmConvGradInputFunction
);
REGISTER_TYPED_FUNC
(
GemmConvGradFilter
,
GPU
,
GemmConvGradFilterFunction
);
...
...
paddle/function/GemmConvOpTest.cpp
浏览文件 @
1172f249
...
...
@@ -24,7 +24,7 @@ TEST(GemmConv, NaiveConv) {
"NaiveConv-CPU"
,
"GemmConv-CPU"
,
forward
);
}
#ifdef PADDLE_WITH_
GPU
#ifdef PADDLE_WITH_
CUDA
TEST
(
GemmConv
,
Forward
)
{
Convolution
<
DEVICE_TYPE_CPU
,
DEVICE_TYPE_GPU
>
(
"GemmConv-CPU"
,
"GemmConv-GPU"
,
forward
);
...
...
paddle/function/Im2ColTest.cpp
浏览文件 @
1172f249
...
...
@@ -116,7 +116,7 @@ void TestIm2ColFunctor() {
TEST
(
Im2ColFunctor
,
CPU
)
{
TestIm2ColFunctor
<
DEVICE_TYPE_CPU
,
float
>
();
}
#ifdef PADDLE_WITH_
GPU
#ifdef PADDLE_WITH_
CUDA
TEST
(
Im2ColFunctor
,
GPU
)
{
TestIm2ColFunctor
<
DEVICE_TYPE_GPU
,
float
>
();
}
...
...
paddle/function/MulOp.cpp
浏览文件 @
1172f249
...
...
@@ -341,7 +341,7 @@ private:
};
REGISTER_TYPED_FUNC
(
MulOp
,
CPU
,
MulFunc
);
#ifdef PADDLE_WITH_
GPU
#ifdef PADDLE_WITH_
CUDA
REGISTER_TYPED_FUNC
(
MulOp
,
GPU
,
MulFunc
);
#endif
}
// namespace paddle
paddle/function/PadOp.cpp
浏览文件 @
1172f249
...
...
@@ -207,7 +207,7 @@ private:
REGISTER_TYPED_FUNC
(
Pad
,
CPU
,
PadFunc
);
REGISTER_TYPED_FUNC
(
PadGrad
,
CPU
,
PadGradFunc
);
#ifdef PADDLE_WITH_
GPU
#ifdef PADDLE_WITH_
CUDA
REGISTER_TYPED_FUNC
(
Pad
,
GPU
,
PadFunc
);
REGISTER_TYPED_FUNC
(
PadGrad
,
GPU
,
PadGradFunc
);
#endif
...
...
paddle/function/RowConvOp.cpp
浏览文件 @
1172f249
...
...
@@ -217,7 +217,7 @@ public:
REGISTER_TYPED_FUNC
(
RowConv
,
CPU
,
RowConvFunc
);
REGISTER_TYPED_FUNC
(
RowConvGrad
,
CPU
,
RowConvGradFunc
);
#ifdef PADDLE_WITH_
GPU
#ifdef PADDLE_WITH_
CUDA
REGISTER_TYPED_FUNC
(
RowConv
,
GPU
,
RowConvFunc
);
REGISTER_TYPED_FUNC
(
RowConvGrad
,
GPU
,
RowConvGradFunc
);
#endif
...
...
paddle/function/SwitchOp.cpp
浏览文件 @
1172f249
...
...
@@ -132,7 +132,7 @@ public:
REGISTER_TYPED_FUNC
(
NCHW2NHWC
,
CPU
,
NCHW2NHWCFunc
);
REGISTER_TYPED_FUNC
(
NHWC2NCHW
,
CPU
,
NHWC2NCHWFunc
);
#ifdef PADDLE_WITH_
GPU
#ifdef PADDLE_WITH_
CUDA
REGISTER_TYPED_FUNC
(
NCHW2NHWC
,
GPU
,
NCHW2NHWCFunc
);
REGISTER_TYPED_FUNC
(
NHWC2NCHW
,
GPU
,
NHWC2NCHWFunc
);
#endif
...
...
paddle/gserver/layers/BatchNormBaseLayer.cpp
浏览文件 @
1172f249
...
...
@@ -16,7 +16,7 @@ limitations under the License. */
#include "BatchNormalizationLayer.h"
#include "Layer.h"
#include "paddle/utils/Stat.h"
#ifdef PADDLE_WITH_
GPU
#ifdef PADDLE_WITH_
CUDA
#include "CudnnBatchNormLayer.h"
#endif
...
...
paddle/gserver/layers/BatchNormalizationLayer.cpp
浏览文件 @
1172f249
...
...
@@ -13,7 +13,7 @@ See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/utils/Stat.h"
#ifdef PADDLE_WITH_
GPU
#ifdef PADDLE_WITH_
CUDA
#include "hl_batch_transpose.h"
#endif
#include "BatchNormalizationLayer.h"
...
...
@@ -90,7 +90,7 @@ void BatchNormalizationLayer::expandMat(const MatrixPtr& in, MatrixPtr& out) {
size_t
batchSize
=
in
->
getHeight
();
CHECK_EQ
(
out
->
getHeight
(),
batchSize
*
imgPixels_
);
if
(
useGpu_
)
{
#ifndef PADDLE_WITH_
GPU
#ifndef PADDLE_WITH_
CUDA
LOG
(
FATAL
)
<<
"paddle is compiled only for cpu"
;
#else
batchTranspose
(
...
...
@@ -127,7 +127,7 @@ void BatchNormalizationLayer::shrinkMat(const MatrixPtr& in, MatrixPtr& out) {
}
CHECK_EQ
(
in
->
getHeight
(),
static_cast
<
size_t
>
(
batchSize
*
imgPixels_
));
if
(
useGpu_
)
{
#ifndef PADDLE_WITH_
GPU
#ifndef PADDLE_WITH_
CUDA
LOG
(
FATAL
)
<<
"paddle is compiled only for cpu"
;
#else
batchTranspose
(
...
...
paddle/gserver/layers/PoolLayer.cpp
浏览文件 @
1172f249
...
...
@@ -15,7 +15,7 @@ limitations under the License. */
#include "PoolLayer.h"
#include "PoolProjectionLayer.h"
#include "paddle/utils/Logging.h"
#ifdef PADDLE_WITH_
GPU
#ifdef PADDLE_WITH_
CUDA
#include "CudnnPoolLayer.h"
#endif
namespace
paddle
{
...
...
@@ -53,7 +53,7 @@ Layer* PoolLayer::create(const LayerConfig& config) {
const
std
::
string
&
pool
=
config
.
inputs
(
0
).
pool_conf
().
pool_type
();
if
(
pool
==
"max-projection"
||
pool
==
"avg-projection"
)
{
return
new
PoolProjectionLayer
(
config
);
#ifdef PADDLE_WITH_
GPU
#ifdef PADDLE_WITH_
CUDA
}
else
if
(
CudnnPoolLayer
::
typeCheck
(
pool
))
{
return
new
CudnnPoolLayer
(
config
);
#endif
...
...
paddle/gserver/tests/LayerGradUtil.cpp
浏览文件 @
1172f249
...
...
@@ -674,7 +674,7 @@ void testLayerGradKernel(TestConfig testConf,
bool
useGpu
,
bool
useWeight
,
float
epsilon
)
{
#ifndef PADDLE_WITH_
GPU
#ifndef PADDLE_WITH_
CUDA
if
(
useGpu
)
return
;
#endif
FLAGS_use_gpu
=
useGpu
;
...
...
paddle/gserver/tests/test_BatchNorm.cpp
浏览文件 @
1172f249
...
...
@@ -119,7 +119,7 @@ TEST(Layer, batchNorm) {
CHECK_EQ
(
static_cast
<
int
>
(
convLayer
->
getOutputValue
()
->
getWidth
()),
576
);
}
#ifdef PADDLE_WITH_
GPU
#ifdef PADDLE_WITH_
CUDA
void
batchNormInference
(
int
n
,
int
c
,
int
h
,
int
w
)
{
MatrixPtr
input
=
std
::
make_shared
<
GpuMatrix
>
(
n
,
c
*
h
*
w
);
MatrixPtr
cudnnOut
=
std
::
make_shared
<
GpuMatrix
>
(
n
,
c
*
h
*
w
);
...
...
paddle/gserver/tests/test_ConvUnify.cpp
浏览文件 @
1172f249
...
...
@@ -117,7 +117,7 @@ MatrixPtr doOneConvTest(size_t imgSize,
}
TEST
(
Layer
,
convParaUnified
)
{
#ifdef PADDLE_WITH_
GPU
#ifdef PADDLE_WITH_
CUDA
MatrixPtr
input
,
resultCpu
,
resultGpu
;
/// TEST1 for conv ///
...
...
paddle/gserver/tests/test_DetectionOutput.cpp
浏览文件 @
1172f249
...
...
@@ -150,7 +150,7 @@ TEST(Layer, detectionOutputLayerFwd) {
useGpu
,
result2
);
#ifdef PADDLE_WITH_
GPU
#ifdef PADDLE_WITH_
CUDA
// GPU case 1.
useGpu
=
true
;
inputLoc
=
Matrix
::
create
(
1
,
16
,
false
,
useGpu
);
...
...
paddle/gserver/tests/test_Evaluator.cpp
浏览文件 @
1172f249
...
...
@@ -51,7 +51,7 @@ void testEvaluator(TestConfig testConf,
string
testEvaluatorName
,
size_t
batchSize
,
bool
useGpu
)
{
#ifndef PADDLE_WITH_
GPU
#ifndef PADDLE_WITH_
CUDA
if
(
useGpu
)
return
;
#endif
FLAGS_use_gpu
=
useGpu
;
...
...
paddle/gserver/tests/test_KmaxSeqScore.cpp
浏览文件 @
1172f249
...
...
@@ -97,7 +97,7 @@ TEST(Layer, kmaxSeqScoreLayer) {
Matrix
::
create
(
subSeqStartPosition
.
back
(),
1
,
false
,
false
);
std
::
vector
<
bool
>
mode
=
{
false
};
#ifdef PADDLE_WITH_
GPU
#ifdef PADDLE_WITH_
CUDA
mode
.
push_back
(
true
);
#endif
...
...
paddle/gserver/tests/test_LayerGrad.cpp
浏览文件 @
1172f249
...
...
@@ -12,7 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#ifdef PADDLE_WITH_
GPU
#ifdef PADDLE_WITH_
CUDA
#include <cudnn.h>
#endif
#include <gtest/gtest.h>
...
...
@@ -258,7 +258,7 @@ void testProjectionConv(size_t groups, bool isDeconv) {
true
);
}
#ifdef PADDLE_WITH_
GPU
#ifdef PADDLE_WITH_
CUDA
TEST
(
Projection
,
conv
)
{
/// test ConvProjection
testProjectionConv
(
1
,
false
);
...
...
@@ -422,7 +422,7 @@ TEST(Layer, depthwiseConvLayer) {
// 'depthwise_conv' is a sepecial case of 'exconv' whose
// groups size equals to the input channels size.
testDepthwiseConvLayer
(
"exconv"
,
/* useGpu= */
false
);
#ifdef PADDLE_WITH_
GPU
#ifdef PADDLE_WITH_
CUDA
testDepthwiseConvLayer
(
"exconv"
,
/* useGpu= */
true
);
#endif
}
...
...
@@ -480,7 +480,7 @@ void testConvLayer(const string& type, bool trans, bool useGpu) {
TEST
(
Layer
,
convLayer
)
{
testConvLayer
(
"exconv"
,
/* trans= */
false
,
/* useGpu= */
false
);
#ifdef PADDLE_WITH_
GPU
#ifdef PADDLE_WITH_
CUDA
testConvLayer
(
"exconv"
,
/* trans= */
false
,
/* useGpu= */
true
);
testConvLayer
(
"cudnn_conv"
,
/* trans= */
false
,
/* useGpu= */
true
);
#endif
...
...
@@ -525,7 +525,7 @@ TEST(Layer, convTransLayer) {
for
(
auto
useGpu
:
{
false
,
true
})
{
testConvTransLayer
(
"exconvt"
,
/* trans= */
false
,
/* useGpu= */
useGpu
);
}
#ifdef PADDLE_WITH_
GPU
#ifdef PADDLE_WITH_
CUDA
testConvTransLayer
(
"cudnn_convt"
,
/* trans= */
false
,
/* useGpu= */
true
);
#endif
}
...
...
@@ -638,7 +638,7 @@ TEST(Layer, SelectiveFullyConnectedLayer) {
/* trans= */
false
,
/* useGup= */
false
,
false
);
#ifdef PADDLE_WITH_
GPU
#ifdef PADDLE_WITH_
CUDA
testLayerGrad
(
config
,
"selective_fc"
,
100
,
...
...
@@ -1210,7 +1210,7 @@ void testPoolLayer(const string& poolType, bool trans, bool useGpu) {
testLayerGrad
(
config
,
"pool"
,
100
,
trans
,
useGpu
);
}
#ifdef PADDLE_WITH_
GPU
#ifdef PADDLE_WITH_
CUDA
void
testPoolLayer2
(
const
string
&
poolType
,
bool
trans
,
bool
useGpu
)
{
TestConfig
config
;
config
.
inputDefs
.
push_back
({
INPUT_DATA
,
"layer_0"
,
3200
,
0
});
...
...
@@ -1236,7 +1236,7 @@ TEST(Layer, PoolLayer) {
testPoolLayer
(
"avg-projection"
,
/* trans= */
false
,
/* useGpu= */
false
);
testPoolLayer
(
"max-projection"
,
/* trans= */
false
,
/* useGpu= */
false
);
#ifdef PADDLE_WITH_
GPU
#ifdef PADDLE_WITH_
CUDA
testPoolLayer
(
"avg-projection"
,
/* trans= */
false
,
/* useGpu= */
true
);
testPoolLayer
(
"max-projection"
,
/* trans= */
false
,
/* useGpu= */
true
);
testPoolLayer
(
"cudnn-max-pool"
,
/* trans= */
false
,
/* useGpu= */
true
);
...
...
@@ -1309,7 +1309,7 @@ void testPool3DLayer(const string& poolType, bool trans, bool useGpu) {
TEST
(
Layer
,
Pool3DLayer
)
{
testPool3DLayer
(
"avg"
,
/* trans= */
false
,
/* useGpu= */
false
);
testPool3DLayer
(
"max"
,
/* trans= */
false
,
/* useGpu= */
false
);
#ifdef PADDLE_WITH_
GPU
#ifdef PADDLE_WITH_
CUDA
testPool3DLayer
(
"avg"
,
/* trans= */
false
,
/* useGpu= */
true
);
testPool3DLayer
(
"max"
,
/* trans= */
false
,
/* useGpu= */
true
);
#endif
...
...
@@ -1695,7 +1695,7 @@ void testBatchNormLayer(const string& type, bool trans, bool useGpu) {
TEST
(
Layer
,
BatchNormalizationLayer
)
{
testBatchNormLayer
(
"batch_norm"
,
false
,
false
);
#ifdef PADDLE_WITH_
GPU
#ifdef PADDLE_WITH_
CUDA
testBatchNormLayer
(
"batch_norm"
,
false
,
true
);
if
(
hl_get_cudnn_lib_version
()
>=
int
(
4000
))
{
testBatchNormLayer
(
"cudnn_batch_norm"
,
false
,
true
);
...
...
@@ -1744,7 +1744,7 @@ void testBatchNorm3DLayer(const string& type, bool trans, bool useGpu) {
TEST
(
Layer
,
testBatchNorm3DLayer
)
{
testBatchNorm3DLayer
(
"batch_norm"
,
false
,
false
);
#ifdef PADDLE_WITH_
GPU
#ifdef PADDLE_WITH_
CUDA
testBatchNorm3DLayer
(
"batch_norm"
,
false
,
true
);
if
(
hl_get_cudnn_lib_version
()
>=
int
(
4000
))
{
testBatchNorm3DLayer
(
"cudnn_batch_norm"
,
false
,
true
);
...
...
@@ -2262,7 +2262,7 @@ void test3DConvLayer(const string& type, bool trans, bool useGpu) {
TEST
(
Layer
,
test3DConvLayer
)
{
test3DConvLayer
(
"conv3d"
,
/* trans= */
false
,
/* useGpu= */
false
);
#ifdef PADDLE_WITH_
GPU
#ifdef PADDLE_WITH_
CUDA
test3DConvLayer
(
"conv3d"
,
/* trans= */
false
,
/* useGpu= */
true
);
#endif
}
...
...
@@ -2339,7 +2339,7 @@ void test3DDeConvLayer(const string& type, bool trans, bool useGpu) {
TEST
(
Layer
,
test3DDeConvLayer
)
{
test3DDeConvLayer
(
"deconv3d"
,
/* trans= */
false
,
/* useGpu= */
false
);
#ifdef PADDLE_WITH_
GPU
#ifdef PADDLE_WITH_
CUDA
test3DDeConvLayer
(
"deconv3d"
,
/* trans= */
false
,
/* useGpu= */
true
);
#endif
}
...
...
paddle/gserver/tests/test_NetworkCompare.cpp
浏览文件 @
1172f249
...
...
@@ -243,7 +243,7 @@ TEST(Compare, concat_slice) {
compareNetwork
(
config_file_a
,
config_file_b
);
}
#ifdef PADDLE_WITH_
GPU
#ifdef PADDLE_WITH_
CUDA
TEST
(
Compare
,
img_pool
)
{
std
::
string
config_file_a
=
"./gserver/tests/img_pool_a.conf"
;
std
::
string
config_file_b
=
"./gserver/tests/img_pool_b.conf"
;
...
...
paddle/gserver/tests/test_PriorBox.cpp
浏览文件 @
1172f249
...
...
@@ -151,7 +151,7 @@ TEST(Layer, priorBoxLayerFwd) {
useGpu
,
result
);
#ifdef PADDLE_WITH_
GPU
#ifdef PADDLE_WITH_
CUDA
// reset the input parameters
variance
[
1
]
=
0.1
;
variance
[
3
]
=
0.2
;
...
...
paddle/gserver/tests/test_ProtoDataProvider.cpp
浏览文件 @
1172f249
...
...
@@ -485,7 +485,7 @@ TEST(ProtoDataProvider, test) {
// Currently in async mode, useGpu is not supported
continue
;
}
#ifndef PADDLE_WITH_
GPU
#ifndef PADDLE_WITH_
CUDA
if
(
useGpu
)
{
continue
;
}
...
...
@@ -525,7 +525,7 @@ TEST(ProtoDataProvider, constant_slots) {
for
(
int
numConstantSlots
:
{
1
,
2
})
{
for
(
int
useGpu
:
numTwoArray
)
{
for
(
int
dataCompression
:
numTwoArray
)
{
#ifndef PADDLE_WITH_
GPU
#ifndef PADDLE_WITH_
CUDA
if
(
useGpu
)
{
continue
;
}
...
...
@@ -708,7 +708,7 @@ TEST(ProtoSequenceDataProvider, test) {
// Currently in async mode, useGpu is not supported
continue
;
}
#ifndef PADDLE_WITH_
GPU
#ifndef PADDLE_WITH_
CUDA
if
(
useGpu
)
{
continue
;
}
...
...
paddle/gserver/tests/test_PyDataProvider.cpp
浏览文件 @
1172f249
...
...
@@ -37,7 +37,7 @@ TEST(PyDataProvider, py_fill_slots) {
config
.
clear_files
();
std
::
string
dataFile
=
"gserver/tests/pyDataProvider/pyDataProviderList"
;
config
.
set_files
(
dataFile
);
#ifndef PADDLE_WITH_
GPU
#ifndef PADDLE_WITH_
CUDA
bool
useGpu
=
false
;
#else
bool
useGpu
=
true
;
...
...
@@ -71,7 +71,7 @@ TEST(PyDataProvider, py_fill_nest_slots) {
std
::
string
dataFile
=
"gserver/tests/pyDataProvider/pyDataProviderList"
;
config
.
set_files
(
dataFile
);
EXPECT_EQ
(
config
.
IsInitialized
(),
true
);
#ifndef PADDLE_WITH_
GPU
#ifndef PADDLE_WITH_
CUDA
bool
useGpu
=
false
;
#else
bool
useGpu
=
true
;
...
...
paddle/gserver/tests/test_SelectiveFCLayer.cpp
浏览文件 @
1172f249
...
...
@@ -321,7 +321,7 @@ TEST(Layer, SelectiveFcLayer_train_dense_mul) {
"filelist=gserver/tests/SelectiveFcTest/dense_mul_list"
;
for
(
auto
useGpu
:
{
false
,
true
})
{
#ifndef PADDLE_WITH_
GPU
#ifndef PADDLE_WITH_
CUDA
if
(
useGpu
)
{
break
;
}
...
...
@@ -388,7 +388,7 @@ void testSelectiveFcLayerTrainSparseMul(const LayerConfig& config,
outMatSelfc
->
getWidth
(),
outMatSelfc
->
getElementCnt
()));
cpuOutMatSelfc
->
copyFrom
(
*
outMatSelfc
,
HPPL_STREAM_DEFAULT
);
#ifdef PADDLE_WITH_
GPU
#ifdef PADDLE_WITH_
CUDA
if
(
useGpu
)
{
hl_stream_synchronize
(
HPPL_STREAM_DEFAULT
);
}
...
...
@@ -418,7 +418,7 @@ void testSelectiveFcLayerTrainSparseMul(const LayerConfig& config,
MatrixPtr
cpuOutMatFc
(
new
CpuMatrix
(
outMatFc
->
getHeight
(),
outMatFc
->
getWidth
()));
cpuOutMatFc
->
copyFrom
(
*
outMatFc
,
HPPL_STREAM_DEFAULT
);
#ifdef PADDLE_WITH_
GPU
#ifdef PADDLE_WITH_
CUDA
if
(
useGpu
)
{
hl_stream_synchronize
(
HPPL_STREAM_DEFAULT
);
}
...
...
@@ -443,7 +443,7 @@ TEST(Layer, SelectiveFcLayer_train_sparse_mul) {
selLayerConfig
.
set_size
(
fcLayerWidth
);
testSelectiveFcLayerTrainSparseMul
(
selLayerConfig
,
false
);
#ifdef PADDLE_WITH_
GPU
#ifdef PADDLE_WITH_
CUDA
testSelectiveFcLayerTrainSparseMul
(
selLayerConfig
,
true
);
#endif
}
...
...
paddle/gserver/tests/test_SeqSliceLayerGrad.cpp
浏览文件 @
1172f249
...
...
@@ -195,7 +195,7 @@ TEST(Layer, SeqSliceLayer) {
vector
<
vector
<
real
>>
ends
;
std
::
vector
<
bool
>
mode
=
{
false
};
#ifdef PADDLE_WITH_
GPU
#ifdef PADDLE_WITH_
CUDA
mode
.
push_back
(
true
);
#endif
genSeqInfo
(
seqStartPos
,
subSeqStartPos
);
...
...
paddle/gserver/tests/test_WarpCTCLayer.cpp
浏览文件 @
1172f249
...
...
@@ -199,7 +199,7 @@ TEST(Layer, WarpCTCLayer) {
for
(
auto
batchSize
:
{
1
,
10
,
32
})
{
for
(
auto
normByTimes
:
{
false
,
true
})
{
for
(
auto
useGpu
:
{
false
,
true
})
{
#ifndef PADDLE_WITH_
GPU
#ifndef PADDLE_WITH_
CUDA
if
(
useGpu
)
continue
;
#endif
LOG
(
INFO
)
<<
"layerSize="
<<
layerSize
<<
" batchSize="
<<
batchSize
...
...
paddle/math/Matrix.cpp
浏览文件 @
1172f249
...
...
@@ -670,7 +670,7 @@ void GpuMatrix::leftMul(Matrix& a, real scaleAB, real scaleT) {
}
void
GpuMatrix
::
selectRows
(
Matrix
&
table
,
IVector
&
ids
)
{
#ifdef PADDLE_WITH_
GPU
#ifdef PADDLE_WITH_
CUDA
CHECK
(
dynamic_cast
<
GpuMatrix
*>
(
&
table
));
CHECK
(
table
.
useGpu
());
CHECK
(
ids
.
useGpu
());
...
...
@@ -694,7 +694,7 @@ void GpuMatrix::selectRows(Matrix& table, IVector& ids) {
}
void
GpuMatrix
::
addToRows
(
Matrix
&
table
,
IVector
&
ids
)
{
#ifdef PADDLE_WITH_
GPU
#ifdef PADDLE_WITH_
CUDA
CHECK
(
dynamic_cast
<
GpuMatrix
*>
(
&
table
));
CHECK
(
table
.
useGpu
());
CHECK
(
ids
.
useGpu
());
...
...
@@ -741,7 +741,7 @@ void GpuMatrix::rowMax(Matrix& max) {
}
void
GpuMatrix
::
rowMax
(
IVector
&
maxIds
,
Matrix
&
maxVal
)
{
#ifdef PADDLE_WITH_
GPU
#ifdef PADDLE_WITH_
CUDA
CHECK
(
maxIds
.
useGpu
()
&&
maxVal
.
useGpu
())
<<
"Matrix type are not equal"
;
size_t
numSamples
=
getHeight
();
size_t
beam
=
maxVal
.
getWidth
();
...
...
paddle/math/SparseMatrix.cpp
浏览文件 @
1172f249
...
...
@@ -836,7 +836,7 @@ void GpuSparseMatrix::zeroMem() {
}
void
GpuSparseMatrix
::
rowMax
(
IVector
&
maxIds
,
Matrix
&
maxVal
)
{
#ifdef PADDLE_WITH_
GPU
#ifdef PADDLE_WITH_
CUDA
CHECK
(
maxIds
.
useGpu
()
&&
maxVal
.
useGpu
())
<<
"Matrix type are not equal"
;
size_t
numSamples
=
getHeight
();
size_t
beam
=
maxVal
.
getWidth
();
...
...
paddle/math/Vector.cpp
浏览文件 @
1172f249
...
...
@@ -172,7 +172,7 @@ void GpuVectorT<T>::isEqualTo(const VectorT<T>& b, const T& value) {
template
<
class
T
>
void
GpuVectorT
<
T
>::
selectFrom
(
const
VectorT
<
T
>&
src
,
const
VectorT
<
int
>&
ids
)
{
#ifdef PADDLE_WITH_
GPU
#ifdef PADDLE_WITH_
CUDA
hl_vector_select_from
<
T
>
(
this
->
getData
(),
this
->
getSize
(),
src
.
getData
(),
...
...
@@ -850,7 +850,7 @@ CpuGpuVectorT<T>::CpuGpuVectorT(CpuGpuVectorT<T>& src,
size_t
size
)
:
sync_
(
nullptr
)
{
CHECK_LE
(
offset
+
size
,
static_cast
<
size_t
>
(
src
.
getSize
()));
#ifdef PADDLE_WITH_
GPU
#ifdef PADDLE_WITH_
CUDA
SyncedFlag
*
flag
=
src
.
getSync
();
if
(
*
flag
==
DATA_AT_CPU
)
{
src
.
copyToGpu
();
// will set synchronous data between CPU and GPU
...
...
@@ -861,7 +861,7 @@ CpuGpuVectorT<T>::CpuGpuVectorT(CpuGpuVectorT<T>& src,
auto
cMemHandle
=
(
src
.
getVector
(
false
))
->
getMemoryHandle
();
cpuVectorT_
=
std
::
make_shared
<
CpuVectorT
<
T
>>
(
size
,
std
::
dynamic_pointer_cast
<
CpuMemoryHandle
>
(
cMemHandle
),
offset
);
#ifdef PADDLE_WITH_
GPU
#ifdef PADDLE_WITH_
CUDA
auto
gMemHandle
=
(
src
.
getVector
(
true
))
->
getMemoryHandle
();
gpuVectorT_
=
std
::
make_shared
<
GpuVectorT
<
T
>>
(
size
,
std
::
dynamic_pointer_cast
<
GpuMemoryHandle
>
(
gMemHandle
),
offset
);
...
...
paddle/math/tests/test_Allocator.cpp
浏览文件 @
1172f249
...
...
@@ -68,7 +68,7 @@ void testPoolAllocator() {
TEST
(
Allocator
,
Pool
)
{
testPoolAllocator
<
CpuAllocator
>
();
#ifdef PADDLE_WITH_
GPU
#ifdef PADDLE_WITH_
CUDA
testPoolAllocator
<
GpuAllocator
>
();
#endif
}
...
...
@@ -92,7 +92,7 @@ TEST(MemoryHandle, Cpu) {
EXPECT_EQ
(
ptr1
,
ptr2
);
}
#ifdef PADDLE_WITH_
GPU
#ifdef PADDLE_WITH_
CUDA
TEST
(
MemoryHandle
,
Gpu
)
{
int
numGpu
=
hl_get_device_count
();
...
...
paddle/math/tests/test_BaseMatrix.cpp
浏览文件 @
1172f249
...
...
@@ -12,7 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#ifdef PADDLE_WITH_
GPU
#ifdef PADDLE_WITH_
CUDA
/**
* This test file use autotest::AutoCompare and cmpWithoutArg to compares the
* implementation of CPU and GPU member function in
...
...
paddle/math/tests/test_CpuGpuVector.cpp
浏览文件 @
1172f249
...
...
@@ -12,7 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#ifdef PADDLE_WITH_
GPU
#ifdef PADDLE_WITH_
CUDA
#include <gtest/gtest.h>
#include "paddle/math/Vector.h"
...
...
paddle/math/tests/test_ExecViaCpu.cpp
浏览文件 @
1172f249
...
...
@@ -94,7 +94,7 @@ void testWrapper(F&& f) {
}
}
#ifdef PADDLE_WITH_
GPU
#ifdef PADDLE_WITH_
CUDA
TEST
(
ExecViaCpu
,
test1
)
{
testWrapper
(
f
);
testWrapper
(
&
f
);
...
...
paddle/math/tests/test_GpuProfiler.cpp
浏览文件 @
1172f249
...
...
@@ -12,7 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#ifdef PADDLE_WITH_
GPU
#ifdef PADDLE_WITH_
CUDA
#include <gtest/gtest.h>
#include "paddle/math/Matrix.h"
...
...
paddle/math/tests/test_Matrix.cpp
浏览文件 @
1172f249
...
...
@@ -12,7 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#ifdef PADDLE_WITH_
GPU
#ifdef PADDLE_WITH_
CUDA
/**
* This test file use autotest::AutoCompare and cmpWithArg to compares the
* implementation of CPU and GPU member function in Matrix.cpp.
...
...
paddle/math/tests/test_SparseMatrix.cpp
浏览文件 @
1172f249
...
...
@@ -47,7 +47,7 @@ struct MatrixPara {
SparseFormat
format
;
};
#ifdef PADDLE_WITH_
GPU
#ifdef PADDLE_WITH_
CUDA
void
test_sparse_matrix_mul
(
MatrixPara
paraA
,
MatrixPara
paraB
,
MatrixPara
paraC
)
{
...
...
@@ -452,7 +452,7 @@ TEST(Matrix, SparseMatrixCSRFormatTrimFrom) {
matB
->
trimFrom
(
*
mat
);
checkSMatrixEqual2
(
matA
,
matB
);
#ifdef PADDLE_WITH_
GPU
#ifdef PADDLE_WITH_
CUDA
GpuSparseMatrixPtr
matC
=
std
::
make_shared
<
GpuSparseMatrix
>
(
height
,
trimedWidth
,
height
,
FLOAT_VALUE
,
SPARSE_CSR
,
true
);
matC
->
trimFrom
(
*
mat
);
...
...
@@ -546,7 +546,7 @@ TEST(Matrix, SparseMatrixCSCFormatTrimFrom) {
matB
->
trimFrom
(
*
mat
);
checkSMatrixEqual2
(
matA
,
matB
);
#ifdef PADDLE_WITH_
GPU
#ifdef PADDLE_WITH_
CUDA
GpuSparseMatrixPtr
matC
=
std
::
make_shared
<
GpuSparseMatrix
>
(
height
,
trimedWidth
,
height
,
FLOAT_VALUE
,
SPARSE_CSC
,
true
);
matC
->
trimFrom
(
*
mat
);
...
...
paddle/math/tests/test_TrainingAlgorithm.cpp
浏览文件 @
1172f249
...
...
@@ -91,7 +91,7 @@ int VectorCheckErr(const VectorPtr& vector1, const VectorPtr& vector2) {
typedef
std
::
function
<
void
(
size_t
size
,
bool
useGpu
)
>
testMatrixFunc
;
void
testCase
(
testMatrixFunc
matrixFunc
)
{
#ifdef PADDLE_WITH_
GPU
#ifdef PADDLE_WITH_
CUDA
for
(
auto
useGpu
:
{
false
,
true
})
{
#else
for
(
auto
useGpu
:
{
false
})
{
...
...
paddle/math/tests/test_batchTranspose.cpp
浏览文件 @
1172f249
...
...
@@ -17,7 +17,7 @@ limitations under the License. */
using
namespace
paddle
;
// NOLINT
#ifdef PADDLE_WITH_
GPU
#ifdef PADDLE_WITH_
CUDA
TEST
(
MatrixBatchTransTest
,
test_batch_matrix_transpose
)
{
const
int
nx
=
100
;
const
int
ny
=
50
;
...
...
paddle/math/tests/test_matrixCompare.cpp
浏览文件 @
1172f249
...
...
@@ -12,7 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#ifdef PADDLE_WITH_
GPU
#ifdef PADDLE_WITH_
CUDA
/// This unittest checks GpuMatrix/CpuMatrix get same result, so disable when
/// only cpu version.
...
...
paddle/math/tests/test_perturbation.cpp
浏览文件 @
1172f249
...
...
@@ -12,7 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#ifdef PADDLE_WITH_
GPU
#ifdef PADDLE_WITH_
CUDA
#include <cuda_runtime.h>
#include <gtest/gtest.h>
...
...
paddle/math/tests/test_sparseMatrixCompare.cpp
浏览文件 @
1172f249
...
...
@@ -12,7 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#ifdef PADDLE_WITH_
GPU
#ifdef PADDLE_WITH_
CUDA
/// This unittest checks GpuSparseMatrix/CpuSparseMatrix get same result,
// so disable when
/// only cpu version.
...
...
paddle/memory/detail/buddy_allocator.cc
浏览文件 @
1172f249
...
...
@@ -175,7 +175,7 @@ void* BuddyAllocator::SystemAlloc(size_t size) {
}
BuddyAllocator
::
PoolSet
::
iterator
BuddyAllocator
::
RefillPool
()
{
#ifdef PADDLE_WITH_
GPU
#ifdef PADDLE_WITH_
CUDA
if
(
system_allocator_
->
UseGpu
())
{
if
((
total_used_
+
total_free_
)
==
0
)
{
// Compute the maximum allocation size for the first allocation.
...
...
paddle/memory/detail/system_allocator.cc
浏览文件 @
1172f249
...
...
@@ -62,7 +62,7 @@ void CPUAllocator::Free(void* p, size_t size, size_t index) {
bool
CPUAllocator
::
UseGpu
()
const
{
return
false
;
}
#ifdef PADDLE_WITH_
GPU
#ifdef PADDLE_WITH_
CUDA
void
*
GPUAllocator
::
Alloc
(
size_t
&
index
,
size_t
size
)
{
// CUDA documentation doesn't explain if cudaMalloc returns nullptr
...
...
paddle/memory/detail/system_allocator.h
浏览文件 @
1172f249
...
...
@@ -40,7 +40,7 @@ class CPUAllocator : public SystemAllocator {
virtual
bool
UseGpu
()
const
;
};
#ifdef PADDLE_WITH_
GPU
#ifdef PADDLE_WITH_
CUDA
class
GPUAllocator
:
public
SystemAllocator
{
public:
virtual
void
*
Alloc
(
size_t
&
index
,
size_t
size
);
...
...
paddle/memory/detail/system_allocator_test.cc
浏览文件 @
1172f249
...
...
@@ -56,7 +56,7 @@ TEST(CPUAllocator, LockMem) {
TestAllocator
(
a
,
0
);
}
#ifdef PADDLE_WITH_
GPU
#ifdef PADDLE_WITH_
CUDA
TEST
(
GPUAllocator
,
Alloc
)
{
paddle
::
memory
::
detail
::
GPUAllocator
a
;
TestAllocator
(
a
,
2048
);
...
...
paddle/memory/memcpy.cc
浏览文件 @
1172f249
...
...
@@ -26,7 +26,7 @@ void Copy<platform::CPUPlace, platform::CPUPlace>(platform::CPUPlace, void* dst,
std
::
memcpy
(
dst
,
src
,
num
);
}
#ifdef PADDLE_WITH_
GPU
#ifdef PADDLE_WITH_
CUDA
template
<
>
void
Copy
<
platform
::
CPUPlace
,
platform
::
GPUPlace
>
(
platform
::
CPUPlace
dst_place
,
void
*
dst
,
...
...
paddle/memory/memcpy.h
浏览文件 @
1172f249
...
...
@@ -33,7 +33,7 @@ namespace memory {
template
<
typename
DstPlace
,
typename
SrcPlace
>
void
Copy
(
DstPlace
,
void
*
dst
,
SrcPlace
,
const
void
*
src
,
size_t
num
);
#ifdef PADDLE_WITH_
GPU
#ifdef PADDLE_WITH_
CUDA
/**
* \brief Copy memory from one place to another place.
...
...
paddle/memory/memory.cc
浏览文件 @
1172f249
...
...
@@ -62,7 +62,7 @@ size_t Used<platform::CPUPlace>(platform::CPUPlace place) {
return
GetCPUBuddyAllocator
()
->
Used
();
}
#ifdef PADDLE_WITH_
GPU
#ifdef PADDLE_WITH_
CUDA
BuddyAllocator
*
GetGPUBuddyAllocator
(
int
gpu_id
)
{
using
BuddyAllocVec
=
std
::
vector
<
BuddyAllocator
*>
;
...
...
paddle/memory/memory_test.cc
浏览文件 @
1172f249
...
...
@@ -80,7 +80,7 @@ TEST(BuddyAllocator, CPUMultAlloc) {
}
}
#ifdef PADDLE_WITH_
GPU
#ifdef PADDLE_WITH_
CUDA
size_t
align
(
size_t
size
,
paddle
::
platform
::
GPUPlace
place
)
{
size
+=
sizeof
(
paddle
::
memory
::
detail
::
Metadata
);
...
...
paddle/operators/detail/strided_memcpy.h
浏览文件 @
1172f249
...
...
@@ -34,7 +34,7 @@ struct StridedMemcpyFunctor<T, 1> {
auto
&
cpu_place
=
boost
::
get
<
platform
::
CPUPlace
>
(
place
);
memory
::
Copy
(
cpu_place
,
dst
,
cpu_place
,
src
,
sizeof
(
T
)
*
dst_dim
.
head
);
}
else
{
#ifdef PADDLE_WITH_
GPU
#ifdef PADDLE_WITH_
CUDA
auto
&
gpu_place
=
boost
::
get
<
platform
::
GPUPlace
>
(
place
);
auto
&
cuda_ctx
=
reinterpret_cast
<
const
platform
::
CUDADeviceContext
&>
(
dev_ctx
);
...
...
paddle/operators/math/im2col_test.cc
浏览文件 @
1172f249
...
...
@@ -71,7 +71,7 @@ void testIm2col() {
context
=
new
paddle
::
platform
::
CPUDeviceContext
(
paddle
::
platform
::
CPUPlace
());
}
else
{
#ifdef PADDLE_WITH_
GPU
#ifdef PADDLE_WITH_
CUDA
context
=
new
paddle
::
platform
::
CUDADeviceContext
(
paddle
::
platform
::
GPUPlace
());
#else
...
...
@@ -116,7 +116,7 @@ void testIm2col() {
TEST
(
math
,
im2col
)
{
testIm2col
<
paddle
::
platform
::
CPUPlace
>
();
#ifdef PADDLE_WITH_
GPU
#ifdef PADDLE_WITH_
CUDA
testIm2col
<
paddle
::
platform
::
GPUPlace
>
();
#endif
}
paddle/operators/math/math_function_test.cc
浏览文件 @
1172f249
#include "paddle/operators/math/math_function.h"
#include "gtest/gtest.h"
#ifdef PADDLE_WITH_
GPU
#ifdef PADDLE_WITH_
CUDA
TEST
(
math_function
,
notrans_mul_trans
)
{
paddle
::
framework
::
Tensor
input1
;
paddle
::
framework
::
Tensor
input1_gpu
;
...
...
paddle/operators/strided_memcpy_test.cc
浏览文件 @
1172f249
...
...
@@ -72,7 +72,7 @@ TEST(StridedMemcpy, CPUConcat) {
}
}
#ifdef PADDLE_WITH_
GPU
#ifdef PADDLE_WITH_
CUDA
TEST
(
StridedMemcpy
,
GPUCrop
)
{
// clang-format off
int
src
[]
=
{
...
...
@@ -157,4 +157,4 @@ TEST(StridedMemcpy, GPUConcat) {
#endif
}
// namespace operators
}
// namespace paddle
\ No newline at end of file
}
// namespace paddle
paddle/platform/device_context.cc
浏览文件 @
1172f249
...
...
@@ -35,7 +35,7 @@ Eigen::DefaultDevice* CPUDeviceContext::eigen_device() const {
Place
CPUDeviceContext
::
GetPlace
()
const
{
return
CPUPlace
();
}
#ifdef PADDLE_WITH_
GPU
#ifdef PADDLE_WITH_
CUDA
template
<
>
Eigen
::
GpuDevice
*
...
...
paddle/platform/device_context.h
浏览文件 @
1172f249
...
...
@@ -14,7 +14,7 @@ limitations under the License. */
#include "paddle/platform/enforce.h"
#include "paddle/platform/place.h"
#ifdef PADDLE_WITH_
GPU
#ifdef PADDLE_WITH_
CUDA
#include "paddle/platform/dynload/cublas.h"
#include "paddle/platform/dynload/cudnn.h"
#include "paddle/platform/gpu_info.h"
...
...
@@ -61,7 +61,7 @@ class CPUDeviceContext : public DeviceContext {
std
::
unique_ptr
<
Eigen
::
DefaultDevice
>
eigen_device_
;
};
#ifdef PADDLE_WITH_
GPU
#ifdef PADDLE_WITH_
CUDA
template
<
>
struct
EigenDeviceConverter
<
platform
::
GPUPlace
>
{
using
EigenDeviceType
=
Eigen
::
GpuDevice
;
...
...
paddle/platform/enforce.h
浏览文件 @
1172f249
...
...
@@ -29,7 +29,7 @@ limitations under the License. */
#include <cxxabi.h> // for __cxa_demangle
#endif
#ifdef PADDLE_WITH_
GPU
#ifdef PADDLE_WITH_
CUDA
#include "paddle/platform/dynload/cublas.h"
#include "paddle/platform/dynload/cudnn.h"
...
...
@@ -113,7 +113,7 @@ inline typename std::enable_if<sizeof...(Args) != 0, void>::type throw_on_error(
}
}
#ifdef PADDLE_WITH_
GPU
#ifdef PADDLE_WITH_
CUDA
template
<
typename
...
Args
>
inline
typename
std
::
enable_if
<
sizeof
...(
Args
)
!=
0
,
void
>::
type
throw_on_error
(
...
...
paddle/platform/enforce_test.cc
浏览文件 @
1172f249
...
...
@@ -213,4 +213,4 @@ TEST(ENFORCE_USER_DEFINED_CLASS, EQ) {
TEST
(
ENFORCE_USER_DEFINED_CLASS
,
NE
)
{
Dims
a
{{
1
,
2
,
3
,
4
}},
b
{{
5
,
6
,
7
,
8
}};
ASSERT_THROW
(
PADDLE_ENFORCE_EQ
(
a
,
b
),
paddle
::
platform
::
EnforceNotMet
);
}
\ No newline at end of file
}
paddle/platform/gpu_info.h
浏览文件 @
1172f249
...
...
@@ -14,7 +14,7 @@ limitations under the License. */
#pragma once
#ifdef PADDLE_WITH_
GPU
#ifdef PADDLE_WITH_
CUDA
#include <cuda_runtime.h>
#include <stddef.h>
...
...
paddle/platform/variant.h
浏览文件 @
1172f249
...
...
@@ -16,7 +16,7 @@
#include <boost/config.hpp>
#ifdef PADDLE_WITH_
GPU
#ifdef PADDLE_WITH_
CUDA
// Because boost's variadic templates has bug on nvcc, boost will disable
// variadic template support when GPU enabled on nvcc.
...
...
paddle/pserver/test/SocketTest.cpp
浏览文件 @
1172f249
...
...
@@ -215,7 +215,7 @@ int main(int argc, char** argv) {
uint64_t
dataSize
=
FLAGS_dim
*
sizeof
(
real
);
#ifdef PADDLE_WITH_
GPU
#ifdef PADDLE_WITH_
CUDA
GpuVector
gpuParam
(
FLAGS_dim
);
GpuVector
gpuGrad
(
FLAGS_dim
);
#else
...
...
paddle/pserver/test/test_ProtoServer.cpp
浏览文件 @
1172f249
...
...
@@ -99,7 +99,7 @@ TEST(ProtoServer, regular) {
}
TEST
(
ProtoServer
,
extended
)
{
#ifdef PADDLE_WITH_
GPU
#ifdef PADDLE_WITH_
CUDA
ProtoClient
*
client
;
if
(
FLAGS_rdma_tcp
==
"rdma"
)
client
=
new
ProtoClient
(
FLAGS_server_addr
,
FLAGS_port
,
F_RDMA
);
...
...
paddle/pybind/pybind.cc
浏览文件 @
1172f249
...
...
@@ -34,7 +34,7 @@ static size_t UniqueIntegerGenerator() {
}
bool
IsCompileGPU
()
{
#ifndef PADDLE_WITH_
GPU
#ifndef PADDLE_WITH_
CUDA
return
false
;
#else
return
true
;
...
...
@@ -78,7 +78,7 @@ PYBIND11_PLUGIN(core) {
.
def
(
"set"
,
PyCPUTensorSetFromArray
<
float
>
)
.
def
(
"set"
,
PyCPUTensorSetFromArray
<
int
>
)
.
def
(
"set"
,
PyCPUTensorSetFromArray
<
double
>
)
#ifdef PADDLE_WITH_
GPU
#ifdef PADDLE_WITH_
CUDA
.
def
(
"set"
,
PyCUDATensorSetFromArray
<
float
>
)
.
def
(
"set"
,
PyCUDATensorSetFromArray
<
int
>
)
.
def
(
"set"
,
PyCUDATensorSetFromArray
<
double
>
)
...
...
@@ -96,7 +96,7 @@ PYBIND11_PLUGIN(core) {
.
def
(
"__init__"
,
[](
LoDTensor
&
instance
,
const
std
::
vector
<
std
::
vector
<
size_t
>>
&
lod
)
{
#ifndef PADDLE_WITH_
GPU
#ifndef PADDLE_WITH_
CUDA
new
(
&
instance
)
LoDTensor
(
lod
);
#else
LoD
new_lod
;
...
...
@@ -107,7 +107,7 @@ PYBIND11_PLUGIN(core) {
})
.
def
(
"set_lod"
,
[](
LoDTensor
&
self
,
const
std
::
vector
<
std
::
vector
<
size_t
>>
&
lod
)
{
#ifndef PADDLE_WITH_
GPU
#ifndef PADDLE_WITH_
CUDA
self
.
set_lod
(
lod
);
#else
LoD
new_lod
;
...
...
@@ -117,7 +117,7 @@ PYBIND11_PLUGIN(core) {
#endif
})
.
def
(
"lod"
,
[](
LoDTensor
&
self
)
->
std
::
vector
<
std
::
vector
<
size_t
>>
{
#ifndef PADDLE_WITH_
GPU
#ifndef PADDLE_WITH_
CUDA
return
self
.
lod
();
#else
auto
lod
=
self
.
lod
();
...
...
@@ -203,7 +203,7 @@ All parameter, weight, gradient are variables in Paddle.
.
def_static
(
"create"
,
[](
paddle
::
platform
::
GPUPlace
&
place
)
->
paddle
::
platform
::
DeviceContext
*
{
#ifndef PADDLE_WITH_
GPU
#ifndef PADDLE_WITH_
CUDA
PADDLE_THROW
(
"GPUPlace is not supported in CPU device."
);
#else
return
new
paddle
::
platform
::
CUDADeviceContext
(
place
);
...
...
paddle/pybind/tensor_py.h
浏览文件 @
1172f249
...
...
@@ -106,7 +106,7 @@ void PyCPUTensorSetFromArray(
std
::
memcpy
(
dst
,
array
.
data
(),
sizeof
(
T
)
*
array
.
size
());
}
#ifdef PADDLE_WITH_
GPU
#ifdef PADDLE_WITH_
CUDA
template
<
typename
T
>
void
PyCUDATensorSetFromArray
(
framework
::
Tensor
&
self
,
...
...
paddle/string/to_string_test.cc
浏览文件 @
1172f249
...
...
@@ -36,4 +36,4 @@ TEST(to_string, user_defined) {
using
namespace
paddle
::
string
;
UserDefinedClass
instance
;
ASSERT_EQ
(
kOutputString
,
to_string
(
instance
));
}
\ No newline at end of file
}
paddle/trainer/MergeModel.cpp
浏览文件 @
1172f249
...
...
@@ -29,7 +29,7 @@ int main(int argc, char** argv) {
initMain
(
argc
,
argv
);
initPython
(
argc
,
argv
);
string
confFile
=
TrainerConfigHelper
::
getConfigNameFromPath
(
FLAGS_model_dir
);
#ifndef PADDLE_WITH_
GPU
#ifndef PADDLE_WITH_
CUDA
FLAGS_use_gpu
=
false
;
#endif
auto
config
=
std
::
make_shared
<
TrainerConfigHelper
>
(
confFile
);
...
...
paddle/trainer/tests/test_Compare.cpp
浏览文件 @
1172f249
...
...
@@ -146,7 +146,7 @@ void compareGradient(comData& comDataCpu, comData& comDataGpu) {
}
int
main
(
int
argc
,
char
**
argv
)
{
#ifndef PADDLE_WITH_
GPU
#ifndef PADDLE_WITH_
CUDA
exit
(
0
);
#endif
paddle
::
initMain
(
argc
,
argv
);
...
...
paddle/trainer/tests/test_CompareSparse.cpp
浏览文件 @
1172f249
...
...
@@ -174,7 +174,7 @@ TEST(compareSparse, multiGradientMachine) {
FLAGS_local
=
local
;
FLAGS_ports_num_for_sparse
=
5
;
for
(
bool
useGpu
:
{
false
,
true
})
{
#ifndef PADDLE_WITH_
GPU
#ifndef PADDLE_WITH_
CUDA
if
(
useGpu
)
continue
;
#endif
FLAGS_parallel_nn
=
useGpu
;
...
...
@@ -198,7 +198,7 @@ TEST(compareSparse, NeuralNetwork) {
FLAGS_local
=
local
;
FLAGS_ports_num_for_sparse
=
5
;
for
(
bool
useGpu
:
{
false
,
true
})
{
#ifndef PADDLE_WITH_
GPU
#ifndef PADDLE_WITH_
CUDA
if
(
useGpu
)
continue
;
#endif
FLAGS_parallel_nn
=
useGpu
;
...
...
paddle/trainer/tests/test_Trainer.cpp
浏览文件 @
1172f249
...
...
@@ -51,7 +51,7 @@ void checkGradientTest(const string& configFile,
TEST
(
checkGradient
,
cpu
)
{
checkGradientTest
(
configFile1
,
false
,
false
);
}
#ifdef PADDLE_WITH_
GPU
#ifdef PADDLE_WITH_
CUDA
TEST
(
checkGradient
,
gpu
)
{
checkGradientTest
(
configFile1
,
true
,
false
);
}
TEST
(
checkGradient
,
multiGpu
)
{
...
...
@@ -97,7 +97,7 @@ TEST(checkGradient, hsigmoid) { checkGradientTest(configFile2, false, false); }
TEST
(
checkGradient
,
chunk
)
{
checkGradientTest
(
configFile3
,
false
,
false
);
#ifdef PADDLE_WITH_
GPU
#ifdef PADDLE_WITH_
CUDA
checkGradientTest
(
configFile3
,
true
,
true
);
#endif
}
...
...
paddle/trainer/tests/test_TrainerOnePass.cpp
浏览文件 @
1172f249
...
...
@@ -79,7 +79,7 @@ void trainerOnePassTest(const string& configFile,
// 1. test trainer (cpu, gpu).
TEST
(
trainerOnePass
,
cpu
)
{
trainerOnePassTest
(
configFile1
,
false
,
false
);
}
#ifdef PADDLE_WITH_
GPU
#ifdef PADDLE_WITH_
CUDA
TEST
(
trainerOnePass
,
gpu
)
{
trainerOnePassTest
(
configFile1
,
true
,
false
);
}
TEST
(
trainerOnePass
,
gpu2
)
{
trainerOnePassTest
(
configFile1
,
true
,
false
,
2
);
}
...
...
@@ -94,7 +94,7 @@ TEST(trainerOnePass, parallel) {
#endif
// 2. test average_window.
#ifdef PADDLE_WITH_
GPU
#ifdef PADDLE_WITH_
CUDA
TEST
(
average_window
,
gpu
)
{
trainerOnePassTest
(
configFile1
,
true
,
false
,
4
,
0.01
);
}
...
...
@@ -266,7 +266,7 @@ TEST(checkRemoteUpdater, cpuTrainerOldUpdater) {
checkRemoteParameterUpdaterTest
(
configFile1
,
false
,
false
,
1
,
true
);
}
#ifdef PADDLE_WITH_
GPU
#ifdef PADDLE_WITH_
CUDA
TEST
(
checkRemoteUpdater
,
gpuTrainer
)
{
checkRemoteParameterUpdaterTest
(
configFile1
,
true
,
false
);
}
...
...
paddle/trainer/tests/test_recurrent_machine_generation.cpp
浏览文件 @
1172f249
...
...
@@ -113,7 +113,7 @@ void testGeneration(const string& configFile,
#ifndef PADDLE_TYPE_DOUBLE
TEST
(
RecurrentGradientMachine
,
test_generation
)
{
#ifndef PADDLE_WITH_
GPU
#ifndef PADDLE_WITH_
CUDA
const
auto
useGpuConfs
=
{
false
};
#else
const
auto
useGpuConfs
=
{
true
,
false
};
...
...
paddle/utils/Flags.cpp
浏览文件 @
1172f249
...
...
@@ -14,7 +14,7 @@ limitations under the License. */
#include "Flags.h"
#ifndef PADDLE_WITH_
GPU
#ifndef PADDLE_WITH_
CUDA
DEFINE_bool
(
use_gpu
,
false
,
"Only support CPU training"
);
#else
DEFINE_bool
(
use_gpu
,
true
,
"Whether to use GPU for training"
);
...
...
paddle/utils/Util.h
浏览文件 @
1172f249
...
...
@@ -218,7 +218,7 @@ protected:
* *d2* is peer device to enable direct access to by the d1 device.
*/
inline
void
enablePeerAccess
(
int
d1
,
int
d2
)
{
#ifdef PADDLE_WITH_
GPU
#ifdef PADDLE_WITH_
CUDA
if
(
hl_device_can_access_peer
(
d1
,
d2
))
{
SetDevice
dev
(
d1
);
hl_device_enable_peer_access
(
d2
);
...
...
paddle/utils/Version.h
浏览文件 @
1172f249
...
...
@@ -48,7 +48,7 @@ void printVersion(std::ostream& os);
* @return return true if paddle compiled with GPU
*/
constexpr
bool
isWithGpu
()
{
#ifndef PADDLE_WITH_
GPU
#ifndef PADDLE_WITH_
CUDA
return
false
;
#else
return
true
;
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录