Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
PaddleDetection
提交
f985700a
P
PaddleDetection
项目概览
PaddlePaddle
/
PaddleDetection
大约 1 年 前同步成功
通知
694
Star
11112
Fork
2696
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
184
列表
看板
标记
里程碑
合并请求
40
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
PaddleDetection
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
184
Issue
184
列表
看板
标记
里程碑
合并请求
40
合并请求
40
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
体验新版 GitCode,发现更多精彩内容 >>
提交
f985700a
编写于
10月 04, 2017
作者:
Y
Yi Wang
浏览文件
操作
浏览文件
下载
差异文件
Resolve conflict
上级
c0a76c40
e79d2f1b
变更
86
隐藏空白更改
内联
并排
Showing
86 changed file
with
140 addition
and
144 deletion
+140
-144
cmake/configure.cmake
cmake/configure.cmake
+0
-4
paddle/api/Util.cpp
paddle/api/Util.cpp
+1
-1
paddle/capi/Matrix.cpp
paddle/capi/Matrix.cpp
+1
-1
paddle/framework/lod_tensor.h
paddle/framework/lod_tensor.h
+2
-2
paddle/framework/op_registry.h
paddle/framework/op_registry.h
+1
-1
paddle/framework/operator.cc
paddle/framework/operator.cc
+1
-1
paddle/framework/tensor_impl.h
paddle/framework/tensor_impl.h
+2
-2
paddle/framework/tensor_test.cc
paddle/framework/tensor_test.cc
+4
-4
paddle/function/BlockExpandOp.cpp
paddle/function/BlockExpandOp.cpp
+1
-1
paddle/function/ContextProjectionOp.cpp
paddle/function/ContextProjectionOp.cpp
+1
-1
paddle/function/CosSimOp.cpp
paddle/function/CosSimOp.cpp
+1
-1
paddle/function/CropOp.cpp
paddle/function/CropOp.cpp
+1
-1
paddle/function/CrossMapNormalOp.cpp
paddle/function/CrossMapNormalOp.cpp
+1
-1
paddle/function/DepthwiseConvOp.cpp
paddle/function/DepthwiseConvOp.cpp
+1
-1
paddle/function/DepthwiseConvOpTest.cpp
paddle/function/DepthwiseConvOpTest.cpp
+1
-1
paddle/function/GemmConvOp.cpp
paddle/function/GemmConvOp.cpp
+1
-1
paddle/function/GemmConvOpTest.cpp
paddle/function/GemmConvOpTest.cpp
+1
-1
paddle/function/Im2ColTest.cpp
paddle/function/Im2ColTest.cpp
+1
-1
paddle/function/MulOp.cpp
paddle/function/MulOp.cpp
+1
-1
paddle/function/PadOp.cpp
paddle/function/PadOp.cpp
+1
-1
paddle/function/RowConvOp.cpp
paddle/function/RowConvOp.cpp
+1
-1
paddle/function/SwitchOp.cpp
paddle/function/SwitchOp.cpp
+1
-1
paddle/gserver/layers/BatchNormBaseLayer.cpp
paddle/gserver/layers/BatchNormBaseLayer.cpp
+1
-1
paddle/gserver/layers/BatchNormalizationLayer.cpp
paddle/gserver/layers/BatchNormalizationLayer.cpp
+3
-3
paddle/gserver/layers/PoolLayer.cpp
paddle/gserver/layers/PoolLayer.cpp
+2
-2
paddle/gserver/tests/LayerGradUtil.cpp
paddle/gserver/tests/LayerGradUtil.cpp
+1
-1
paddle/gserver/tests/test_BatchNorm.cpp
paddle/gserver/tests/test_BatchNorm.cpp
+1
-1
paddle/gserver/tests/test_ConvUnify.cpp
paddle/gserver/tests/test_ConvUnify.cpp
+1
-1
paddle/gserver/tests/test_DetectionOutput.cpp
paddle/gserver/tests/test_DetectionOutput.cpp
+1
-1
paddle/gserver/tests/test_Evaluator.cpp
paddle/gserver/tests/test_Evaluator.cpp
+1
-1
paddle/gserver/tests/test_KmaxSeqScore.cpp
paddle/gserver/tests/test_KmaxSeqScore.cpp
+1
-1
paddle/gserver/tests/test_LayerGrad.cpp
paddle/gserver/tests/test_LayerGrad.cpp
+13
-13
paddle/gserver/tests/test_NetworkCompare.cpp
paddle/gserver/tests/test_NetworkCompare.cpp
+1
-1
paddle/gserver/tests/test_PriorBox.cpp
paddle/gserver/tests/test_PriorBox.cpp
+1
-1
paddle/gserver/tests/test_ProtoDataProvider.cpp
paddle/gserver/tests/test_ProtoDataProvider.cpp
+3
-3
paddle/gserver/tests/test_PyDataProvider.cpp
paddle/gserver/tests/test_PyDataProvider.cpp
+2
-2
paddle/gserver/tests/test_SelectiveFCLayer.cpp
paddle/gserver/tests/test_SelectiveFCLayer.cpp
+4
-4
paddle/gserver/tests/test_SeqSliceLayerGrad.cpp
paddle/gserver/tests/test_SeqSliceLayerGrad.cpp
+1
-1
paddle/gserver/tests/test_WarpCTCLayer.cpp
paddle/gserver/tests/test_WarpCTCLayer.cpp
+1
-1
paddle/math/Matrix.cpp
paddle/math/Matrix.cpp
+3
-3
paddle/math/SparseMatrix.cpp
paddle/math/SparseMatrix.cpp
+1
-1
paddle/math/Vector.cpp
paddle/math/Vector.cpp
+3
-3
paddle/math/tests/test_Allocator.cpp
paddle/math/tests/test_Allocator.cpp
+2
-2
paddle/math/tests/test_BaseMatrix.cpp
paddle/math/tests/test_BaseMatrix.cpp
+1
-1
paddle/math/tests/test_CpuGpuVector.cpp
paddle/math/tests/test_CpuGpuVector.cpp
+1
-1
paddle/math/tests/test_ExecViaCpu.cpp
paddle/math/tests/test_ExecViaCpu.cpp
+1
-1
paddle/math/tests/test_GpuProfiler.cpp
paddle/math/tests/test_GpuProfiler.cpp
+1
-1
paddle/math/tests/test_Matrix.cpp
paddle/math/tests/test_Matrix.cpp
+1
-1
paddle/math/tests/test_SparseMatrix.cpp
paddle/math/tests/test_SparseMatrix.cpp
+3
-3
paddle/math/tests/test_Tensor.cu
paddle/math/tests/test_Tensor.cu
+10
-10
paddle/math/tests/test_TrainingAlgorithm.cpp
paddle/math/tests/test_TrainingAlgorithm.cpp
+1
-1
paddle/math/tests/test_batchTranspose.cpp
paddle/math/tests/test_batchTranspose.cpp
+1
-1
paddle/math/tests/test_lazyAssign.cu
paddle/math/tests/test_lazyAssign.cu
+2
-2
paddle/math/tests/test_matrixCompare.cpp
paddle/math/tests/test_matrixCompare.cpp
+1
-1
paddle/math/tests/test_perturbation.cpp
paddle/math/tests/test_perturbation.cpp
+1
-1
paddle/math/tests/test_sparseMatrixCompare.cpp
paddle/math/tests/test_sparseMatrixCompare.cpp
+1
-1
paddle/memory/detail/buddy_allocator.cc
paddle/memory/detail/buddy_allocator.cc
+1
-1
paddle/memory/detail/system_allocator.cc
paddle/memory/detail/system_allocator.cc
+1
-1
paddle/memory/detail/system_allocator.h
paddle/memory/detail/system_allocator.h
+1
-1
paddle/memory/detail/system_allocator_test.cc
paddle/memory/detail/system_allocator_test.cc
+1
-1
paddle/memory/memcpy.cc
paddle/memory/memcpy.cc
+1
-1
paddle/memory/memcpy.h
paddle/memory/memcpy.h
+1
-1
paddle/memory/memory.cc
paddle/memory/memory.cc
+1
-1
paddle/memory/memory_test.cc
paddle/memory/memory_test.cc
+1
-1
paddle/operators/detail/strided_memcpy.h
paddle/operators/detail/strided_memcpy.h
+1
-1
paddle/operators/math/im2col_test.cc
paddle/operators/math/im2col_test.cc
+2
-2
paddle/operators/math/math_function_test.cc
paddle/operators/math/math_function_test.cc
+1
-1
paddle/operators/strided_memcpy_test.cc
paddle/operators/strided_memcpy_test.cc
+1
-1
paddle/platform/device_context.cc
paddle/platform/device_context.cc
+1
-1
paddle/platform/device_context.h
paddle/platform/device_context.h
+2
-2
paddle/platform/enforce.h
paddle/platform/enforce.h
+2
-2
paddle/platform/gpu_info.h
paddle/platform/gpu_info.h
+1
-1
paddle/platform/variant.h
paddle/platform/variant.h
+1
-1
paddle/pserver/test/SocketTest.cpp
paddle/pserver/test/SocketTest.cpp
+1
-1
paddle/pserver/test/test_ProtoServer.cpp
paddle/pserver/test/test_ProtoServer.cpp
+1
-1
paddle/pybind/pybind.cc
paddle/pybind/pybind.cc
+6
-6
paddle/pybind/tensor_py.h
paddle/pybind/tensor_py.h
+1
-1
paddle/trainer/MergeModel.cpp
paddle/trainer/MergeModel.cpp
+1
-1
paddle/trainer/tests/test_Compare.cpp
paddle/trainer/tests/test_Compare.cpp
+1
-1
paddle/trainer/tests/test_CompareSparse.cpp
paddle/trainer/tests/test_CompareSparse.cpp
+2
-2
paddle/trainer/tests/test_Trainer.cpp
paddle/trainer/tests/test_Trainer.cpp
+2
-2
paddle/trainer/tests/test_TrainerOnePass.cpp
paddle/trainer/tests/test_TrainerOnePass.cpp
+3
-3
paddle/trainer/tests/test_recurrent_machine_generation.cpp
paddle/trainer/tests/test_recurrent_machine_generation.cpp
+1
-1
paddle/utils/Flags.cpp
paddle/utils/Flags.cpp
+1
-1
paddle/utils/Util.h
paddle/utils/Util.h
+1
-1
paddle/utils/Version.h
paddle/utils/Version.h
+1
-1
未找到文件。
cmake/configure.cmake
浏览文件 @
f985700a
...
...
@@ -49,10 +49,6 @@ if(NOT WITH_GOLANG)
endif
(
NOT WITH_GOLANG
)
if
(
NOT WITH_GPU
)
# Will gradually remove uses of PADDLE_ONLY_CPU in source files,
# so could we remove -DPADDLE_ONLY_CPU.
# c.f. https://github.com/PaddlePaddle/Paddle/issues/4588
add_definitions
(
-DPADDLE_ONLY_CPU
)
add_definitions
(
-DHPPL_STUB_FUNC
)
list
(
APPEND CMAKE_CXX_SOURCE_FILE_EXTENSIONS cu
)
...
...
paddle/api/Util.cpp
浏览文件 @
f985700a
...
...
@@ -47,7 +47,7 @@ bool isUsingGpu() { return FLAGS_use_gpu; }
void
setUseGpu
(
bool
useGpu
)
{
FLAGS_use_gpu
=
useGpu
;
}
bool
isGpuVersion
()
{
#if
def PADDLE_ONLY_C
PU
#if
ndef PADDLE_WITH_G
PU
return
false
;
#else
return
true
;
...
...
paddle/capi/Matrix.cpp
浏览文件 @
f985700a
...
...
@@ -46,7 +46,7 @@ paddle_error paddle_matrix_set_row(paddle_matrix mat,
if
(
rowID
>=
ptr
->
mat
->
getHeight
())
return
kPD_OUT_OF_RANGE
;
paddle
::
real
*
buf
=
ptr
->
mat
->
getRowBuf
(
rowID
);
size_t
width
=
ptr
->
mat
->
getWidth
();
#if
ndef PADDLE_ONLY_C
PU
#if
def PADDLE_WITH_G
PU
hl_memcpy
(
buf
,
rowArray
,
sizeof
(
paddle
::
real
)
*
width
);
#else
std
::
copy
(
rowArray
,
rowArray
+
width
,
buf
);
...
...
paddle/framework/lod_tensor.h
浏览文件 @
f985700a
...
...
@@ -15,7 +15,7 @@
#pragma once
#include <memory>
#if
ndef PADDLE_ONLY_C
PU
#if
def PADDLE_WITH_G
PU
#include <thrust/device_vector.h>
#include <thrust/host_vector.h>
#include <thrust/system/cuda/experimental/pinned_allocator.h>
...
...
@@ -29,7 +29,7 @@
namespace
paddle
{
namespace
framework
{
#if
def PADDLE_ONLY_C
PU
#if
ndef PADDLE_WITH_G
PU
template
<
typename
T
>
using
Vector
=
std
::
vector
<
T
>
;
#else
...
...
paddle/framework/op_registry.h
浏览文件 @
f985700a
...
...
@@ -211,7 +211,7 @@ class OpKernelRegistrar : public Registrar {
// TODO(fengjiayi): The following macros
// seems ugly, do we have better method?
#if
def PADDLE_ONLY_C
PU
#if
ndef PADDLE_WITH_G
PU
#define USE_OP_KERNEL(op_type) USE_OP_DEVICE_KERNEL(op_type, CPU)
#else
#define USE_OP_KERNEL(op_type) \
...
...
paddle/framework/operator.cc
浏览文件 @
f985700a
...
...
@@ -25,7 +25,7 @@ Eigen::DefaultDevice& ExecutionContext::GetEigenDevice<
return
*
device_context_
.
GetEigenDevice
<
platform
::
CPUPlace
>
();
}
#if
ndef PADDLE_ONLY_C
PU
#if
def PADDLE_WITH_G
PU
template
<
>
Eigen
::
GpuDevice
&
ExecutionContext
::
GetEigenDevice
<
platform
::
GPUPlace
,
Eigen
::
GpuDevice
>
()
const
{
...
...
paddle/framework/tensor_impl.h
浏览文件 @
f985700a
...
...
@@ -65,7 +65,7 @@ inline T* Tensor::mutable_data(platform::Place place) {
holder_
.
reset
(
new
PlaceholderImpl
<
T
,
platform
::
CPUPlace
>
(
boost
::
get
<
platform
::
CPUPlace
>
(
place
),
size
));
}
else
if
(
platform
::
is_gpu_place
(
place
))
{
#if
def PADDLE_ONLY_C
PU
#if
ndef PADDLE_WITH_G
PU
PADDLE_THROW
(
"'GPUPlace' is not supported in CPU only device."
);
}
#else
...
...
@@ -103,7 +103,7 @@ inline void Tensor::CopyFrom(const Tensor& src,
memory
::
Copy
(
boost
::
get
<
platform
::
CPUPlace
>
(
dst_place
),
dst_ptr
,
boost
::
get
<
platform
::
CPUPlace
>
(
src_place
),
src_ptr
,
size
);
}
#if
ndef PADDLE_ONLY_C
PU
#if
def PADDLE_WITH_G
PU
else
if
(
platform
::
is_gpu_place
(
src_place
)
&&
platform
::
is_cpu_place
(
dst_place
))
{
memory
::
Copy
(
boost
::
get
<
platform
::
CPUPlace
>
(
dst_place
),
dst_ptr
,
...
...
paddle/framework/tensor_test.cc
浏览文件 @
f985700a
...
...
@@ -74,7 +74,7 @@ TEST(Tensor, MutableData) {
EXPECT_EQ
(
p1
,
p2
);
}
#if
ndef PADDLE_ONLY_C
PU
#if
def PADDLE_WITH_G
PU
{
Tensor
src_tensor
;
float
*
p1
=
nullptr
;
...
...
@@ -126,7 +126,7 @@ TEST(Tensor, ShareDataWith) {
ASSERT_EQ
(
src_tensor
.
data
<
int
>
(),
dst_tensor
.
data
<
int
>
());
}
#if
ndef PADDLE_ONLY_C
PU
#if
def PADDLE_WITH_G
PU
{
Tensor
src_tensor
;
Tensor
dst_tensor
;
...
...
@@ -163,7 +163,7 @@ TEST(Tensor, Slice) {
EXPECT_EQ
(
src_data_address
+
3
*
4
*
1
*
sizeof
(
int
),
slice_data_address
);
}
#if
ndef PADDLE_ONLY_C
PU
#if
def PADDLE_WITH_G
PU
{
Tensor
src_tensor
;
src_tensor
.
mutable_data
<
double
>
(
make_ddim
({
6
,
9
}),
GPUPlace
());
...
...
@@ -218,7 +218,7 @@ TEST(Tensor, CopyFrom) {
EXPECT_EQ
(
dst_ptr
[
i
],
slice_ptr
[
i
]);
}
}
#if
ndef PADDLE_ONLY_C
PU
#if
def PADDLE_WITH_G
PU
{
Tensor
src_tensor
;
Tensor
gpu_tensor
;
...
...
paddle/function/BlockExpandOp.cpp
浏览文件 @
f985700a
...
...
@@ -194,7 +194,7 @@ public:
REGISTER_TYPED_FUNC
(
BlockExpand
,
CPU
,
BlockExpandForward
);
REGISTER_TYPED_FUNC
(
BlockExpandGrad
,
CPU
,
BlockExpandBackward
);
#if
ndef PADDLE_ONLY_C
PU
#if
def PADDLE_WITH_G
PU
REGISTER_TYPED_FUNC
(
BlockExpand
,
GPU
,
BlockExpandForward
);
REGISTER_TYPED_FUNC
(
BlockExpandGrad
,
GPU
,
BlockExpandBackward
);
#endif
...
...
paddle/function/ContextProjectionOp.cpp
浏览文件 @
f985700a
...
...
@@ -395,7 +395,7 @@ REGISTER_TYPED_FUNC(ContextProjectionForward,
REGISTER_TYPED_FUNC
(
ContextProjectionBackward
,
CPU
,
ContextProjectionBackwardFunc
);
#if
ndef PADDLE_ONLY_C
PU
#if
def PADDLE_WITH_G
PU
REGISTER_TYPED_FUNC
(
ContextProjectionForward
,
GPU
,
ContextProjectionForwardFunc
);
...
...
paddle/function/CosSimOp.cpp
浏览文件 @
f985700a
...
...
@@ -233,7 +233,7 @@ private:
REGISTER_TYPED_FUNC
(
CosSimForward
,
CPU
,
CosSimForwardFunc
);
REGISTER_TYPED_FUNC
(
CosSimBackward
,
CPU
,
CosSimBackwardFunc
);
#if
ndef PADDLE_ONLY_C
PU
#if
def PADDLE_WITH_G
PU
REGISTER_TYPED_FUNC
(
CosSimForward
,
GPU
,
CosSimForwardFunc
);
REGISTER_TYPED_FUNC
(
CosSimBackward
,
GPU
,
CosSimBackwardFunc
);
#endif
...
...
paddle/function/CropOp.cpp
浏览文件 @
f985700a
...
...
@@ -169,7 +169,7 @@ private:
REGISTER_TYPED_FUNC
(
Crop
,
CPU
,
CropFunc
);
REGISTER_TYPED_FUNC
(
CropGrad
,
CPU
,
CropGradFunc
);
#if
ndef PADDLE_ONLY_C
PU
#if
def PADDLE_WITH_G
PU
REGISTER_TYPED_FUNC
(
Crop
,
GPU
,
CropFunc
);
REGISTER_TYPED_FUNC
(
CropGrad
,
GPU
,
CropGradFunc
);
#endif
...
...
paddle/function/CrossMapNormalOp.cpp
浏览文件 @
f985700a
...
...
@@ -336,7 +336,7 @@ private:
REGISTER_TYPED_FUNC
(
CrossMapNormal
,
CPU
,
CrossMapNormalFunc
);
REGISTER_TYPED_FUNC
(
CrossMapNormalGrad
,
CPU
,
CrossMapNormalGradFunc
);
#if
ndef PADDLE_ONLY_C
PU
#if
def PADDLE_WITH_G
PU
REGISTER_TYPED_FUNC
(
CrossMapNormal
,
GPU
,
CrossMapNormalFunc
);
REGISTER_TYPED_FUNC
(
CrossMapNormalGrad
,
GPU
,
CrossMapNormalGradFunc
);
#endif
...
...
paddle/function/DepthwiseConvOp.cpp
浏览文件 @
f985700a
...
...
@@ -292,7 +292,7 @@ REGISTER_TYPED_FUNC(DepthwiseConvGradInput,
REGISTER_TYPED_FUNC
(
DepthwiseConvGradFilter
,
CPU
,
DepthwiseConvGradFilterFunction
);
#if
ndef PADDLE_ONLY_C
PU
#if
def PADDLE_WITH_G
PU
REGISTER_TYPED_FUNC
(
DepthwiseConv
,
GPU
,
DepthwiseConvFunction
);
REGISTER_TYPED_FUNC
(
DepthwiseConvGradInput
,
GPU
,
...
...
paddle/function/DepthwiseConvOpTest.cpp
浏览文件 @
f985700a
...
...
@@ -17,7 +17,7 @@ limitations under the License. */
namespace
paddle
{
#if
ndef PADDLE_ONLY_C
PU
#if
def PADDLE_WITH_G
PU
TEST
(
DepthwiseConv
,
Forward
)
{
DepthwiseConvolution
<
DEVICE_TYPE_CPU
,
DEVICE_TYPE_GPU
>
(
"GemmConv-CPU"
,
"DepthwiseConv-GPU"
,
forward
);
...
...
paddle/function/GemmConvOp.cpp
浏览文件 @
f985700a
...
...
@@ -340,7 +340,7 @@ public:
REGISTER_TYPED_FUNC
(
GemmConv
,
CPU
,
GemmConvFunction
);
REGISTER_TYPED_FUNC
(
GemmConvGradInput
,
CPU
,
GemmConvGradInputFunction
);
REGISTER_TYPED_FUNC
(
GemmConvGradFilter
,
CPU
,
GemmConvGradFilterFunction
);
#if
ndef PADDLE_ONLY_C
PU
#if
def PADDLE_WITH_G
PU
REGISTER_TYPED_FUNC
(
GemmConv
,
GPU
,
GemmConvFunction
);
REGISTER_TYPED_FUNC
(
GemmConvGradInput
,
GPU
,
GemmConvGradInputFunction
);
REGISTER_TYPED_FUNC
(
GemmConvGradFilter
,
GPU
,
GemmConvGradFilterFunction
);
...
...
paddle/function/GemmConvOpTest.cpp
浏览文件 @
f985700a
...
...
@@ -24,7 +24,7 @@ TEST(GemmConv, NaiveConv) {
"NaiveConv-CPU"
,
"GemmConv-CPU"
,
forward
);
}
#if
ndef PADDLE_ONLY_C
PU
#if
def PADDLE_WITH_G
PU
TEST
(
GemmConv
,
Forward
)
{
Convolution
<
DEVICE_TYPE_CPU
,
DEVICE_TYPE_GPU
>
(
"GemmConv-CPU"
,
"GemmConv-GPU"
,
forward
);
...
...
paddle/function/Im2ColTest.cpp
浏览文件 @
f985700a
...
...
@@ -116,7 +116,7 @@ void TestIm2ColFunctor() {
TEST
(
Im2ColFunctor
,
CPU
)
{
TestIm2ColFunctor
<
DEVICE_TYPE_CPU
,
float
>
();
}
#if
ndef PADDLE_ONLY_C
PU
#if
def PADDLE_WITH_G
PU
TEST
(
Im2ColFunctor
,
GPU
)
{
TestIm2ColFunctor
<
DEVICE_TYPE_GPU
,
float
>
();
}
...
...
paddle/function/MulOp.cpp
浏览文件 @
f985700a
...
...
@@ -341,7 +341,7 @@ private:
};
REGISTER_TYPED_FUNC
(
MulOp
,
CPU
,
MulFunc
);
#if
ndef PADDLE_ONLY_C
PU
#if
def PADDLE_WITH_G
PU
REGISTER_TYPED_FUNC
(
MulOp
,
GPU
,
MulFunc
);
#endif
}
// namespace paddle
paddle/function/PadOp.cpp
浏览文件 @
f985700a
...
...
@@ -207,7 +207,7 @@ private:
REGISTER_TYPED_FUNC
(
Pad
,
CPU
,
PadFunc
);
REGISTER_TYPED_FUNC
(
PadGrad
,
CPU
,
PadGradFunc
);
#if
ndef PADDLE_ONLY_C
PU
#if
def PADDLE_WITH_G
PU
REGISTER_TYPED_FUNC
(
Pad
,
GPU
,
PadFunc
);
REGISTER_TYPED_FUNC
(
PadGrad
,
GPU
,
PadGradFunc
);
#endif
...
...
paddle/function/RowConvOp.cpp
浏览文件 @
f985700a
...
...
@@ -217,7 +217,7 @@ public:
REGISTER_TYPED_FUNC
(
RowConv
,
CPU
,
RowConvFunc
);
REGISTER_TYPED_FUNC
(
RowConvGrad
,
CPU
,
RowConvGradFunc
);
#if
ndef PADDLE_ONLY_C
PU
#if
def PADDLE_WITH_G
PU
REGISTER_TYPED_FUNC
(
RowConv
,
GPU
,
RowConvFunc
);
REGISTER_TYPED_FUNC
(
RowConvGrad
,
GPU
,
RowConvGradFunc
);
#endif
...
...
paddle/function/SwitchOp.cpp
浏览文件 @
f985700a
...
...
@@ -132,7 +132,7 @@ public:
REGISTER_TYPED_FUNC
(
NCHW2NHWC
,
CPU
,
NCHW2NHWCFunc
);
REGISTER_TYPED_FUNC
(
NHWC2NCHW
,
CPU
,
NHWC2NCHWFunc
);
#if
ndef PADDLE_ONLY_C
PU
#if
def PADDLE_WITH_G
PU
REGISTER_TYPED_FUNC
(
NCHW2NHWC
,
GPU
,
NCHW2NHWCFunc
);
REGISTER_TYPED_FUNC
(
NHWC2NCHW
,
GPU
,
NHWC2NCHWFunc
);
#endif
...
...
paddle/gserver/layers/BatchNormBaseLayer.cpp
浏览文件 @
f985700a
...
...
@@ -16,7 +16,7 @@ limitations under the License. */
#include "BatchNormalizationLayer.h"
#include "Layer.h"
#include "paddle/utils/Stat.h"
#if
ndef PADDLE_ONLY_C
PU
#if
def PADDLE_WITH_G
PU
#include "CudnnBatchNormLayer.h"
#endif
...
...
paddle/gserver/layers/BatchNormalizationLayer.cpp
浏览文件 @
f985700a
...
...
@@ -13,7 +13,7 @@ See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/utils/Stat.h"
#if
ndef PADDLE_ONLY_C
PU
#if
def PADDLE_WITH_G
PU
#include "hl_batch_transpose.h"
#endif
#include "BatchNormalizationLayer.h"
...
...
@@ -90,7 +90,7 @@ void BatchNormalizationLayer::expandMat(const MatrixPtr& in, MatrixPtr& out) {
size_t
batchSize
=
in
->
getHeight
();
CHECK_EQ
(
out
->
getHeight
(),
batchSize
*
imgPixels_
);
if
(
useGpu_
)
{
#if
def PADDLE_ONLY_C
PU
#if
ndef PADDLE_WITH_G
PU
LOG
(
FATAL
)
<<
"paddle is compiled only for cpu"
;
#else
batchTranspose
(
...
...
@@ -127,7 +127,7 @@ void BatchNormalizationLayer::shrinkMat(const MatrixPtr& in, MatrixPtr& out) {
}
CHECK_EQ
(
in
->
getHeight
(),
static_cast
<
size_t
>
(
batchSize
*
imgPixels_
));
if
(
useGpu_
)
{
#if
def PADDLE_ONLY_C
PU
#if
ndef PADDLE_WITH_G
PU
LOG
(
FATAL
)
<<
"paddle is compiled only for cpu"
;
#else
batchTranspose
(
...
...
paddle/gserver/layers/PoolLayer.cpp
浏览文件 @
f985700a
...
...
@@ -15,7 +15,7 @@ limitations under the License. */
#include "PoolLayer.h"
#include "PoolProjectionLayer.h"
#include "paddle/utils/Logging.h"
#if
ndef PADDLE_ONLY_C
PU
#if
def PADDLE_WITH_G
PU
#include "CudnnPoolLayer.h"
#endif
namespace
paddle
{
...
...
@@ -53,7 +53,7 @@ Layer* PoolLayer::create(const LayerConfig& config) {
const
std
::
string
&
pool
=
config
.
inputs
(
0
).
pool_conf
().
pool_type
();
if
(
pool
==
"max-projection"
||
pool
==
"avg-projection"
)
{
return
new
PoolProjectionLayer
(
config
);
#if
ndef PADDLE_ONLY_C
PU
#if
def PADDLE_WITH_G
PU
}
else
if
(
CudnnPoolLayer
::
typeCheck
(
pool
))
{
return
new
CudnnPoolLayer
(
config
);
#endif
...
...
paddle/gserver/tests/LayerGradUtil.cpp
浏览文件 @
f985700a
...
...
@@ -674,7 +674,7 @@ void testLayerGradKernel(TestConfig testConf,
bool
useGpu
,
bool
useWeight
,
float
epsilon
)
{
#if
def PADDLE_ONLY_C
PU
#if
ndef PADDLE_WITH_G
PU
if
(
useGpu
)
return
;
#endif
FLAGS_use_gpu
=
useGpu
;
...
...
paddle/gserver/tests/test_BatchNorm.cpp
浏览文件 @
f985700a
...
...
@@ -119,7 +119,7 @@ TEST(Layer, batchNorm) {
CHECK_EQ
(
static_cast
<
int
>
(
convLayer
->
getOutputValue
()
->
getWidth
()),
576
);
}
#if
ndef PADDLE_ONLY_C
PU
#if
def PADDLE_WITH_G
PU
void
batchNormInference
(
int
n
,
int
c
,
int
h
,
int
w
)
{
MatrixPtr
input
=
std
::
make_shared
<
GpuMatrix
>
(
n
,
c
*
h
*
w
);
MatrixPtr
cudnnOut
=
std
::
make_shared
<
GpuMatrix
>
(
n
,
c
*
h
*
w
);
...
...
paddle/gserver/tests/test_ConvUnify.cpp
浏览文件 @
f985700a
...
...
@@ -117,7 +117,7 @@ MatrixPtr doOneConvTest(size_t imgSize,
}
TEST
(
Layer
,
convParaUnified
)
{
#if
ndef PADDLE_ONLY_C
PU
#if
def PADDLE_WITH_G
PU
MatrixPtr
input
,
resultCpu
,
resultGpu
;
/// TEST1 for conv ///
...
...
paddle/gserver/tests/test_DetectionOutput.cpp
浏览文件 @
f985700a
...
...
@@ -150,7 +150,7 @@ TEST(Layer, detectionOutputLayerFwd) {
useGpu
,
result2
);
#if
ndef PADDLE_ONLY_C
PU
#if
def PADDLE_WITH_G
PU
// GPU case 1.
useGpu
=
true
;
inputLoc
=
Matrix
::
create
(
1
,
16
,
false
,
useGpu
);
...
...
paddle/gserver/tests/test_Evaluator.cpp
浏览文件 @
f985700a
...
...
@@ -51,7 +51,7 @@ void testEvaluator(TestConfig testConf,
string
testEvaluatorName
,
size_t
batchSize
,
bool
useGpu
)
{
#if
def PADDLE_ONLY_C
PU
#if
ndef PADDLE_WITH_G
PU
if
(
useGpu
)
return
;
#endif
FLAGS_use_gpu
=
useGpu
;
...
...
paddle/gserver/tests/test_KmaxSeqScore.cpp
浏览文件 @
f985700a
...
...
@@ -97,7 +97,7 @@ TEST(Layer, kmaxSeqScoreLayer) {
Matrix
::
create
(
subSeqStartPosition
.
back
(),
1
,
false
,
false
);
std
::
vector
<
bool
>
mode
=
{
false
};
#if
ndef PADDLE_ONLY_C
PU
#if
def PADDLE_WITH_G
PU
mode
.
push_back
(
true
);
#endif
...
...
paddle/gserver/tests/test_LayerGrad.cpp
浏览文件 @
f985700a
...
...
@@ -12,7 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#if
ndef PADDLE_ONLY_C
PU
#if
def PADDLE_WITH_G
PU
#include <cudnn.h>
#endif
#include <gtest/gtest.h>
...
...
@@ -258,7 +258,7 @@ void testProjectionConv(size_t groups, bool isDeconv) {
true
);
}
#if
ndef PADDLE_ONLY_C
PU
#if
def PADDLE_WITH_G
PU
TEST
(
Projection
,
conv
)
{
/// test ConvProjection
testProjectionConv
(
1
,
false
);
...
...
@@ -422,7 +422,7 @@ TEST(Layer, depthwiseConvLayer) {
// 'depthwise_conv' is a sepecial case of 'exconv' whose
// groups size equals to the input channels size.
testDepthwiseConvLayer
(
"exconv"
,
/* useGpu= */
false
);
#if
ndef PADDLE_ONLY_C
PU
#if
def PADDLE_WITH_G
PU
testDepthwiseConvLayer
(
"exconv"
,
/* useGpu= */
true
);
#endif
}
...
...
@@ -480,7 +480,7 @@ void testConvLayer(const string& type, bool trans, bool useGpu) {
TEST
(
Layer
,
convLayer
)
{
testConvLayer
(
"exconv"
,
/* trans= */
false
,
/* useGpu= */
false
);
#if
ndef PADDLE_ONLY_C
PU
#if
def PADDLE_WITH_G
PU
testConvLayer
(
"exconv"
,
/* trans= */
false
,
/* useGpu= */
true
);
testConvLayer
(
"cudnn_conv"
,
/* trans= */
false
,
/* useGpu= */
true
);
#endif
...
...
@@ -525,7 +525,7 @@ TEST(Layer, convTransLayer) {
for
(
auto
useGpu
:
{
false
,
true
})
{
testConvTransLayer
(
"exconvt"
,
/* trans= */
false
,
/* useGpu= */
useGpu
);
}
#if
ndef PADDLE_ONLY_C
PU
#if
def PADDLE_WITH_G
PU
testConvTransLayer
(
"cudnn_convt"
,
/* trans= */
false
,
/* useGpu= */
true
);
#endif
}
...
...
@@ -638,7 +638,7 @@ TEST(Layer, SelectiveFullyConnectedLayer) {
/* trans= */
false
,
/* useGup= */
false
,
false
);
#if
ndef PADDLE_ONLY_C
PU
#if
def PADDLE_WITH_G
PU
testLayerGrad
(
config
,
"selective_fc"
,
100
,
...
...
@@ -1210,7 +1210,7 @@ void testPoolLayer(const string& poolType, bool trans, bool useGpu) {
testLayerGrad
(
config
,
"pool"
,
100
,
trans
,
useGpu
);
}
#if
ndef PADDLE_ONLY_C
PU
#if
def PADDLE_WITH_G
PU
void
testPoolLayer2
(
const
string
&
poolType
,
bool
trans
,
bool
useGpu
)
{
TestConfig
config
;
config
.
inputDefs
.
push_back
({
INPUT_DATA
,
"layer_0"
,
3200
,
0
});
...
...
@@ -1236,7 +1236,7 @@ TEST(Layer, PoolLayer) {
testPoolLayer
(
"avg-projection"
,
/* trans= */
false
,
/* useGpu= */
false
);
testPoolLayer
(
"max-projection"
,
/* trans= */
false
,
/* useGpu= */
false
);
#if
ndef PADDLE_ONLY_C
PU
#if
def PADDLE_WITH_G
PU
testPoolLayer
(
"avg-projection"
,
/* trans= */
false
,
/* useGpu= */
true
);
testPoolLayer
(
"max-projection"
,
/* trans= */
false
,
/* useGpu= */
true
);
testPoolLayer
(
"cudnn-max-pool"
,
/* trans= */
false
,
/* useGpu= */
true
);
...
...
@@ -1309,7 +1309,7 @@ void testPool3DLayer(const string& poolType, bool trans, bool useGpu) {
TEST
(
Layer
,
Pool3DLayer
)
{
testPool3DLayer
(
"avg"
,
/* trans= */
false
,
/* useGpu= */
false
);
testPool3DLayer
(
"max"
,
/* trans= */
false
,
/* useGpu= */
false
);
#if
ndef PADDLE_ONLY_C
PU
#if
def PADDLE_WITH_G
PU
testPool3DLayer
(
"avg"
,
/* trans= */
false
,
/* useGpu= */
true
);
testPool3DLayer
(
"max"
,
/* trans= */
false
,
/* useGpu= */
true
);
#endif
...
...
@@ -1695,7 +1695,7 @@ void testBatchNormLayer(const string& type, bool trans, bool useGpu) {
TEST
(
Layer
,
BatchNormalizationLayer
)
{
testBatchNormLayer
(
"batch_norm"
,
false
,
false
);
#if
ndef PADDLE_ONLY_C
PU
#if
def PADDLE_WITH_G
PU
testBatchNormLayer
(
"batch_norm"
,
false
,
true
);
if
(
hl_get_cudnn_lib_version
()
>=
int
(
4000
))
{
testBatchNormLayer
(
"cudnn_batch_norm"
,
false
,
true
);
...
...
@@ -1744,7 +1744,7 @@ void testBatchNorm3DLayer(const string& type, bool trans, bool useGpu) {
TEST
(
Layer
,
testBatchNorm3DLayer
)
{
testBatchNorm3DLayer
(
"batch_norm"
,
false
,
false
);
#if
ndef PADDLE_ONLY_C
PU
#if
def PADDLE_WITH_G
PU
testBatchNorm3DLayer
(
"batch_norm"
,
false
,
true
);
if
(
hl_get_cudnn_lib_version
()
>=
int
(
4000
))
{
testBatchNorm3DLayer
(
"cudnn_batch_norm"
,
false
,
true
);
...
...
@@ -2262,7 +2262,7 @@ void test3DConvLayer(const string& type, bool trans, bool useGpu) {
TEST
(
Layer
,
test3DConvLayer
)
{
test3DConvLayer
(
"conv3d"
,
/* trans= */
false
,
/* useGpu= */
false
);
#if
ndef PADDLE_ONLY_C
PU
#if
def PADDLE_WITH_G
PU
test3DConvLayer
(
"conv3d"
,
/* trans= */
false
,
/* useGpu= */
true
);
#endif
}
...
...
@@ -2339,7 +2339,7 @@ void test3DDeConvLayer(const string& type, bool trans, bool useGpu) {
TEST
(
Layer
,
test3DDeConvLayer
)
{
test3DDeConvLayer
(
"deconv3d"
,
/* trans= */
false
,
/* useGpu= */
false
);
#if
ndef PADDLE_ONLY_C
PU
#if
def PADDLE_WITH_G
PU
test3DDeConvLayer
(
"deconv3d"
,
/* trans= */
false
,
/* useGpu= */
true
);
#endif
}
...
...
paddle/gserver/tests/test_NetworkCompare.cpp
浏览文件 @
f985700a
...
...
@@ -243,7 +243,7 @@ TEST(Compare, concat_slice) {
compareNetwork
(
config_file_a
,
config_file_b
);
}
#if
ndef PADDLE_ONLY_C
PU
#if
def PADDLE_WITH_G
PU
TEST
(
Compare
,
img_pool
)
{
std
::
string
config_file_a
=
"./gserver/tests/img_pool_a.conf"
;
std
::
string
config_file_b
=
"./gserver/tests/img_pool_b.conf"
;
...
...
paddle/gserver/tests/test_PriorBox.cpp
浏览文件 @
f985700a
...
...
@@ -151,7 +151,7 @@ TEST(Layer, priorBoxLayerFwd) {
useGpu
,
result
);
#if
ndef PADDLE_ONLY_C
PU
#if
def PADDLE_WITH_G
PU
// reset the input parameters
variance
[
1
]
=
0.1
;
variance
[
3
]
=
0.2
;
...
...
paddle/gserver/tests/test_ProtoDataProvider.cpp
浏览文件 @
f985700a
...
...
@@ -485,7 +485,7 @@ TEST(ProtoDataProvider, test) {
// Currently in async mode, useGpu is not supported
continue
;
}
#if
def PADDLE_ONLY_C
PU
#if
ndef PADDLE_WITH_G
PU
if
(
useGpu
)
{
continue
;
}
...
...
@@ -525,7 +525,7 @@ TEST(ProtoDataProvider, constant_slots) {
for
(
int
numConstantSlots
:
{
1
,
2
})
{
for
(
int
useGpu
:
numTwoArray
)
{
for
(
int
dataCompression
:
numTwoArray
)
{
#if
def PADDLE_ONLY_C
PU
#if
ndef PADDLE_WITH_G
PU
if
(
useGpu
)
{
continue
;
}
...
...
@@ -708,7 +708,7 @@ TEST(ProtoSequenceDataProvider, test) {
// Currently in async mode, useGpu is not supported
continue
;
}
#if
def PADDLE_ONLY_C
PU
#if
ndef PADDLE_WITH_G
PU
if
(
useGpu
)
{
continue
;
}
...
...
paddle/gserver/tests/test_PyDataProvider.cpp
浏览文件 @
f985700a
...
...
@@ -37,7 +37,7 @@ TEST(PyDataProvider, py_fill_slots) {
config
.
clear_files
();
std
::
string
dataFile
=
"gserver/tests/pyDataProvider/pyDataProviderList"
;
config
.
set_files
(
dataFile
);
#if
def PADDLE_ONLY_C
PU
#if
ndef PADDLE_WITH_G
PU
bool
useGpu
=
false
;
#else
bool
useGpu
=
true
;
...
...
@@ -71,7 +71,7 @@ TEST(PyDataProvider, py_fill_nest_slots) {
std
::
string
dataFile
=
"gserver/tests/pyDataProvider/pyDataProviderList"
;
config
.
set_files
(
dataFile
);
EXPECT_EQ
(
config
.
IsInitialized
(),
true
);
#if
def PADDLE_ONLY_C
PU
#if
ndef PADDLE_WITH_G
PU
bool
useGpu
=
false
;
#else
bool
useGpu
=
true
;
...
...
paddle/gserver/tests/test_SelectiveFCLayer.cpp
浏览文件 @
f985700a
...
...
@@ -321,7 +321,7 @@ TEST(Layer, SelectiveFcLayer_train_dense_mul) {
"filelist=gserver/tests/SelectiveFcTest/dense_mul_list"
;
for
(
auto
useGpu
:
{
false
,
true
})
{
#if
def PADDLE_ONLY_C
PU
#if
ndef PADDLE_WITH_G
PU
if
(
useGpu
)
{
break
;
}
...
...
@@ -388,7 +388,7 @@ void testSelectiveFcLayerTrainSparseMul(const LayerConfig& config,
outMatSelfc
->
getWidth
(),
outMatSelfc
->
getElementCnt
()));
cpuOutMatSelfc
->
copyFrom
(
*
outMatSelfc
,
HPPL_STREAM_DEFAULT
);
#if
ndef PADDLE_ONLY_C
PU
#if
def PADDLE_WITH_G
PU
if
(
useGpu
)
{
hl_stream_synchronize
(
HPPL_STREAM_DEFAULT
);
}
...
...
@@ -418,7 +418,7 @@ void testSelectiveFcLayerTrainSparseMul(const LayerConfig& config,
MatrixPtr
cpuOutMatFc
(
new
CpuMatrix
(
outMatFc
->
getHeight
(),
outMatFc
->
getWidth
()));
cpuOutMatFc
->
copyFrom
(
*
outMatFc
,
HPPL_STREAM_DEFAULT
);
#if
ndef PADDLE_ONLY_C
PU
#if
def PADDLE_WITH_G
PU
if
(
useGpu
)
{
hl_stream_synchronize
(
HPPL_STREAM_DEFAULT
);
}
...
...
@@ -443,7 +443,7 @@ TEST(Layer, SelectiveFcLayer_train_sparse_mul) {
selLayerConfig
.
set_size
(
fcLayerWidth
);
testSelectiveFcLayerTrainSparseMul
(
selLayerConfig
,
false
);
#if
ndef PADDLE_ONLY_C
PU
#if
def PADDLE_WITH_G
PU
testSelectiveFcLayerTrainSparseMul
(
selLayerConfig
,
true
);
#endif
}
...
...
paddle/gserver/tests/test_SeqSliceLayerGrad.cpp
浏览文件 @
f985700a
...
...
@@ -195,7 +195,7 @@ TEST(Layer, SeqSliceLayer) {
vector
<
vector
<
real
>>
ends
;
std
::
vector
<
bool
>
mode
=
{
false
};
#if
ndef PADDLE_ONLY_C
PU
#if
def PADDLE_WITH_G
PU
mode
.
push_back
(
true
);
#endif
genSeqInfo
(
seqStartPos
,
subSeqStartPos
);
...
...
paddle/gserver/tests/test_WarpCTCLayer.cpp
浏览文件 @
f985700a
...
...
@@ -199,7 +199,7 @@ TEST(Layer, WarpCTCLayer) {
for
(
auto
batchSize
:
{
1
,
10
,
32
})
{
for
(
auto
normByTimes
:
{
false
,
true
})
{
for
(
auto
useGpu
:
{
false
,
true
})
{
#if
def PADDLE_ONLY_C
PU
#if
ndef PADDLE_WITH_G
PU
if
(
useGpu
)
continue
;
#endif
LOG
(
INFO
)
<<
"layerSize="
<<
layerSize
<<
" batchSize="
<<
batchSize
...
...
paddle/math/Matrix.cpp
浏览文件 @
f985700a
...
...
@@ -670,7 +670,7 @@ void GpuMatrix::leftMul(Matrix& a, real scaleAB, real scaleT) {
}
void
GpuMatrix
::
selectRows
(
Matrix
&
table
,
IVector
&
ids
)
{
#if
ndef PADDLE_ONLY_C
PU
#if
def PADDLE_WITH_G
PU
CHECK
(
dynamic_cast
<
GpuMatrix
*>
(
&
table
));
CHECK
(
table
.
useGpu
());
CHECK
(
ids
.
useGpu
());
...
...
@@ -694,7 +694,7 @@ void GpuMatrix::selectRows(Matrix& table, IVector& ids) {
}
void
GpuMatrix
::
addToRows
(
Matrix
&
table
,
IVector
&
ids
)
{
#if
ndef PADDLE_ONLY_C
PU
#if
def PADDLE_WITH_G
PU
CHECK
(
dynamic_cast
<
GpuMatrix
*>
(
&
table
));
CHECK
(
table
.
useGpu
());
CHECK
(
ids
.
useGpu
());
...
...
@@ -741,7 +741,7 @@ void GpuMatrix::rowMax(Matrix& max) {
}
void
GpuMatrix
::
rowMax
(
IVector
&
maxIds
,
Matrix
&
maxVal
)
{
#if
ndef PADDLE_ONLY_C
PU
#if
def PADDLE_WITH_G
PU
CHECK
(
maxIds
.
useGpu
()
&&
maxVal
.
useGpu
())
<<
"Matrix type are not equal"
;
size_t
numSamples
=
getHeight
();
size_t
beam
=
maxVal
.
getWidth
();
...
...
paddle/math/SparseMatrix.cpp
浏览文件 @
f985700a
...
...
@@ -836,7 +836,7 @@ void GpuSparseMatrix::zeroMem() {
}
void
GpuSparseMatrix
::
rowMax
(
IVector
&
maxIds
,
Matrix
&
maxVal
)
{
#if
ndef PADDLE_ONLY_C
PU
#if
def PADDLE_WITH_G
PU
CHECK
(
maxIds
.
useGpu
()
&&
maxVal
.
useGpu
())
<<
"Matrix type are not equal"
;
size_t
numSamples
=
getHeight
();
size_t
beam
=
maxVal
.
getWidth
();
...
...
paddle/math/Vector.cpp
浏览文件 @
f985700a
...
...
@@ -172,7 +172,7 @@ void GpuVectorT<T>::isEqualTo(const VectorT<T>& b, const T& value) {
template
<
class
T
>
void
GpuVectorT
<
T
>::
selectFrom
(
const
VectorT
<
T
>&
src
,
const
VectorT
<
int
>&
ids
)
{
#if
ndef PADDLE_ONLY_C
PU
#if
def PADDLE_WITH_G
PU
hl_vector_select_from
<
T
>
(
this
->
getData
(),
this
->
getSize
(),
src
.
getData
(),
...
...
@@ -850,7 +850,7 @@ CpuGpuVectorT<T>::CpuGpuVectorT(CpuGpuVectorT<T>& src,
size_t
size
)
:
sync_
(
nullptr
)
{
CHECK_LE
(
offset
+
size
,
static_cast
<
size_t
>
(
src
.
getSize
()));
#if
ndef PADDLE_ONLY_C
PU
#if
def PADDLE_WITH_G
PU
SyncedFlag
*
flag
=
src
.
getSync
();
if
(
*
flag
==
DATA_AT_CPU
)
{
src
.
copyToGpu
();
// will set synchronous data between CPU and GPU
...
...
@@ -861,7 +861,7 @@ CpuGpuVectorT<T>::CpuGpuVectorT(CpuGpuVectorT<T>& src,
auto
cMemHandle
=
(
src
.
getVector
(
false
))
->
getMemoryHandle
();
cpuVectorT_
=
std
::
make_shared
<
CpuVectorT
<
T
>>
(
size
,
std
::
dynamic_pointer_cast
<
CpuMemoryHandle
>
(
cMemHandle
),
offset
);
#if
ndef PADDLE_ONLY_C
PU
#if
def PADDLE_WITH_G
PU
auto
gMemHandle
=
(
src
.
getVector
(
true
))
->
getMemoryHandle
();
gpuVectorT_
=
std
::
make_shared
<
GpuVectorT
<
T
>>
(
size
,
std
::
dynamic_pointer_cast
<
GpuMemoryHandle
>
(
gMemHandle
),
offset
);
...
...
paddle/math/tests/test_Allocator.cpp
浏览文件 @
f985700a
...
...
@@ -68,7 +68,7 @@ void testPoolAllocator() {
TEST
(
Allocator
,
Pool
)
{
testPoolAllocator
<
CpuAllocator
>
();
#if
ndef PADDLE_ONLY_C
PU
#if
def PADDLE_WITH_G
PU
testPoolAllocator
<
GpuAllocator
>
();
#endif
}
...
...
@@ -92,7 +92,7 @@ TEST(MemoryHandle, Cpu) {
EXPECT_EQ
(
ptr1
,
ptr2
);
}
#if
ndef PADDLE_ONLY_C
PU
#if
def PADDLE_WITH_G
PU
TEST
(
MemoryHandle
,
Gpu
)
{
int
numGpu
=
hl_get_device_count
();
...
...
paddle/math/tests/test_BaseMatrix.cpp
浏览文件 @
f985700a
...
...
@@ -12,7 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#if
ndef PADDLE_ONLY_C
PU
#if
def PADDLE_WITH_G
PU
/**
* This test file use autotest::AutoCompare and cmpWithoutArg to compares the
* implementation of CPU and GPU member function in
...
...
paddle/math/tests/test_CpuGpuVector.cpp
浏览文件 @
f985700a
...
...
@@ -12,7 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#if
ndef PADDLE_ONLY_C
PU
#if
def PADDLE_WITH_G
PU
#include <gtest/gtest.h>
#include "paddle/math/Vector.h"
...
...
paddle/math/tests/test_ExecViaCpu.cpp
浏览文件 @
f985700a
...
...
@@ -94,7 +94,7 @@ void testWrapper(F&& f) {
}
}
#if
ndef PADDLE_ONLY_C
PU
#if
def PADDLE_WITH_G
PU
TEST
(
ExecViaCpu
,
test1
)
{
testWrapper
(
f
);
testWrapper
(
&
f
);
...
...
paddle/math/tests/test_GpuProfiler.cpp
浏览文件 @
f985700a
...
...
@@ -12,7 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#if
ndef PADDLE_ONLY_C
PU
#if
def PADDLE_WITH_G
PU
#include <gtest/gtest.h>
#include "paddle/math/Matrix.h"
...
...
paddle/math/tests/test_Matrix.cpp
浏览文件 @
f985700a
...
...
@@ -12,7 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#if
ndef PADDLE_ONLY_C
PU
#if
def PADDLE_WITH_G
PU
/**
* This test file use autotest::AutoCompare and cmpWithArg to compares the
* implementation of CPU and GPU member function in Matrix.cpp.
...
...
paddle/math/tests/test_SparseMatrix.cpp
浏览文件 @
f985700a
...
...
@@ -47,7 +47,7 @@ struct MatrixPara {
SparseFormat
format
;
};
#if
ndef PADDLE_ONLY_C
PU
#if
def PADDLE_WITH_G
PU
void
test_sparse_matrix_mul
(
MatrixPara
paraA
,
MatrixPara
paraB
,
MatrixPara
paraC
)
{
...
...
@@ -452,7 +452,7 @@ TEST(Matrix, SparseMatrixCSRFormatTrimFrom) {
matB
->
trimFrom
(
*
mat
);
checkSMatrixEqual2
(
matA
,
matB
);
#if
ndef PADDLE_ONLY_C
PU
#if
def PADDLE_WITH_G
PU
GpuSparseMatrixPtr
matC
=
std
::
make_shared
<
GpuSparseMatrix
>
(
height
,
trimedWidth
,
height
,
FLOAT_VALUE
,
SPARSE_CSR
,
true
);
matC
->
trimFrom
(
*
mat
);
...
...
@@ -546,7 +546,7 @@ TEST(Matrix, SparseMatrixCSCFormatTrimFrom) {
matB
->
trimFrom
(
*
mat
);
checkSMatrixEqual2
(
matA
,
matB
);
#if
ndef PADDLE_ONLY_C
PU
#if
def PADDLE_WITH_G
PU
GpuSparseMatrixPtr
matC
=
std
::
make_shared
<
GpuSparseMatrix
>
(
height
,
trimedWidth
,
height
,
FLOAT_VALUE
,
SPARSE_CSC
,
true
);
matC
->
trimFrom
(
*
mat
);
...
...
paddle/math/tests/test_Tensor.cu
浏览文件 @
f985700a
...
...
@@ -270,7 +270,7 @@ TEST(Unary, BaseOp) {
TestUnaryVectorT
<
CpuIVector
,
int
>
testCpuIVector
(
testUnaryBaseOpInt
<
CpuIVector
>
);
#if
ndef PADDLE_ONLY_C
PU
#if
def PADDLE_WITH_G
PU
TestUnaryMatrix
<
GpuMatrix
>
testGpuMatrix
(
testUnaryBaseOp
<
GpuMatrix
>
);
TestUnaryVectorT
<
GpuVector
,
real
>
testGpuVector
(
testUnaryBaseOp
<
GpuVector
>
);
TestUnaryVectorT
<
GpuIVector
,
int
>
testGpuIVector
(
...
...
@@ -317,7 +317,7 @@ void testUnayrMathOp(Tensor& A1, Tensor& A2) {
TEST
(
Unary
,
MathOp
)
{
TestUnaryMatrix
<
CpuMatrix
>
testCpu
(
testUnayrMathOp
<
CpuMatrix
>
);
#if
ndef PADDLE_ONLY_C
PU
#if
def PADDLE_WITH_G
PU
TestUnaryMatrix
<
GpuMatrix
>
testGpu
(
testUnayrMathOp
<
GpuMatrix
>
);
#endif
}
...
...
@@ -374,7 +374,7 @@ void testUnayrCompareOp(Tensor& A1, Tensor& A2) {
TEST
(
Unary
,
CompareOp
)
{
TestUnaryMatrix
<
CpuMatrix
>
testCpu
(
testUnayrCompareOp
<
CpuMatrix
>
);
#if
ndef PADDLE_ONLY_C
PU
#if
def PADDLE_WITH_G
PU
TestUnaryMatrix
<
GpuMatrix
>
testGpu
(
testUnayrCompareOp
<
GpuMatrix
>
);
#endif
}
...
...
@@ -536,7 +536,7 @@ void testBinaryBaseOp(Tensor& A1, Tensor& A2, Tensor& B) {
TEST
(
Binary
,
BaseOp
)
{
TestBinaryMatrix
<
CpuMatrix
>
testCpu
(
testBinaryBaseOp
<
CpuMatrix
>
);
#if
ndef PADDLE_ONLY_C
PU
#if
def PADDLE_WITH_G
PU
TestBinaryMatrix
<
GpuMatrix
>
testGpu
(
testBinaryBaseOp
<
GpuMatrix
>
);
#endif
}
...
...
@@ -710,7 +710,7 @@ void testBinaryMathOp(Tensor& A1, Tensor& A2, Tensor& B) {
TEST
(
Binary
,
MathOp
)
{
TestBinaryMatrix
<
CpuMatrix
>
testCpu
(
testBinaryMathOp
<
CpuMatrix
>
);
#if
ndef PADDLE_ONLY_C
PU
#if
def PADDLE_WITH_G
PU
TestBinaryMatrix
<
GpuMatrix
>
testGpu
(
testBinaryMathOp
<
GpuMatrix
>
);
#endif
}
...
...
@@ -810,7 +810,7 @@ void testBinaryCompareOp(Tensor& A1, Tensor& A2, Tensor& B) {
TEST
(
Binary
,
CompareOp
)
{
TestBinaryMatrix
<
CpuMatrix
>
testCpu
(
testBinaryCompareOp
<
CpuMatrix
>
);
#if
ndef PADDLE_ONLY_C
PU
#if
def PADDLE_WITH_G
PU
TestBinaryMatrix
<
GpuMatrix
>
testGpu
(
testBinaryCompareOp
<
GpuMatrix
>
);
#endif
}
...
...
@@ -955,7 +955,7 @@ void testTernaryBaseOp(Tensor& A1, Tensor& A2, Tensor& B, Tensor& C) {
TEST
(
Ternary
,
BaseOp
)
{
TestTernaryMatrix
<
CpuMatrix
>
testCpu
(
testTernaryBaseOp
<
CpuMatrix
>
);
#if
ndef PADDLE_ONLY_C
PU
#if
def PADDLE_WITH_G
PU
TestTernaryMatrix
<
GpuMatrix
>
testGpu
(
testTernaryBaseOp
<
GpuMatrix
>
);
#endif
}
...
...
@@ -1058,7 +1058,7 @@ void testTernaryCompareOp(Tensor& A1, Tensor& A2, Tensor& B, Tensor& C) {
TEST
(
Ternary
,
CompareOp
)
{
TestTernaryMatrix
<
CpuMatrix
>
testCpu
(
testTernaryCompareOp
<
CpuMatrix
>
);
#if
ndef PADDLE_ONLY_C
PU
#if
def PADDLE_WITH_G
PU
TestTernaryMatrix
<
GpuMatrix
>
testGpu
(
testTernaryCompareOp
<
GpuMatrix
>
);
#endif
}
...
...
@@ -1086,7 +1086,7 @@ void testQuaternaryAdd(
TEST
(
Quaternary
,
BaseOp
)
{
TestQuaternaryMatrix
<
CpuMatrix
>
testCpu
(
testQuaternaryAdd
<
CpuMatrix
>
);
#if
ndef PADDLE_ONLY_C
PU
#if
def PADDLE_WITH_G
PU
TestQuaternaryMatrix
<
GpuMatrix
>
testGpu
(
testQuaternaryAdd
<
GpuMatrix
>
);
#endif
}
...
...
@@ -1156,7 +1156,7 @@ void testQuaternaryCompareOp(
TEST
(
Quaternary
,
CompareOp
)
{
TestQuaternaryMatrix
<
CpuMatrix
>
testCpu
(
testQuaternaryCompareOp
<
CpuMatrix
>
);
#if
ndef PADDLE_ONLY_C
PU
#if
def PADDLE_WITH_G
PU
TestQuaternaryMatrix
<
GpuMatrix
>
testGpu
(
testQuaternaryCompareOp
<
GpuMatrix
>
);
#endif
}
paddle/math/tests/test_TrainingAlgorithm.cpp
浏览文件 @
f985700a
...
...
@@ -91,7 +91,7 @@ int VectorCheckErr(const VectorPtr& vector1, const VectorPtr& vector2) {
typedef
std
::
function
<
void
(
size_t
size
,
bool
useGpu
)
>
testMatrixFunc
;
void
testCase
(
testMatrixFunc
matrixFunc
)
{
#if
ndef PADDLE_ONLY_C
PU
#if
def PADDLE_WITH_G
PU
for
(
auto
useGpu
:
{
false
,
true
})
{
#else
for
(
auto
useGpu
:
{
false
})
{
...
...
paddle/math/tests/test_batchTranspose.cpp
浏览文件 @
f985700a
...
...
@@ -17,7 +17,7 @@ limitations under the License. */
using
namespace
paddle
;
// NOLINT
#if
ndef PADDLE_ONLY_C
PU
#if
def PADDLE_WITH_G
PU
TEST
(
MatrixBatchTransTest
,
test_batch_matrix_transpose
)
{
const
int
nx
=
100
;
const
int
ny
=
50
;
...
...
paddle/math/tests/test_lazyAssign.cu
浏览文件 @
f985700a
...
...
@@ -72,7 +72,7 @@ void testLazyAssign(int height, int width) {
TEST
(
lazyAssign
,
CPU
)
{
testMatrixCase
(
testLazyAssign
<
CpuMatrix
>
);
}
#if
ndef PADDLE_ONLY_C
PU
#if
def PADDLE_WITH_G
PU
TEST
(
lazyAssign
,
GPU
)
{
testMatrixCase
(
testLazyAssign
<
GpuMatrix
>
);
}
#endif
...
...
@@ -142,6 +142,6 @@ void testSgdUpdate(int height, int width) {
TEST
(
sgdUpdate
,
CPU
)
{
testMatrixCase
(
testSgdUpdate
<
CpuMatrix
>
);
}
#if
ndef PADDLE_ONLY_C
PU
#if
def PADDLE_WITH_G
PU
TEST
(
sgdUpdate
,
GPU
)
{
testMatrixCase
(
testSgdUpdate
<
GpuMatrix
>
);
}
#endif
paddle/math/tests/test_matrixCompare.cpp
浏览文件 @
f985700a
...
...
@@ -12,7 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#if
ndef PADDLE_ONLY_C
PU
#if
def PADDLE_WITH_G
PU
/// This unittest checks GpuMatrix/CpuMatrix get same result, so disable when
/// only cpu version.
...
...
paddle/math/tests/test_perturbation.cpp
浏览文件 @
f985700a
...
...
@@ -12,7 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#if
ndef PADDLE_ONLY_C
PU
#if
def PADDLE_WITH_G
PU
#include <cuda_runtime.h>
#include <gtest/gtest.h>
...
...
paddle/math/tests/test_sparseMatrixCompare.cpp
浏览文件 @
f985700a
...
...
@@ -12,7 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#if
ndef PADDLE_ONLY_C
PU
#if
def PADDLE_WITH_G
PU
/// This unittest checks GpuSparseMatrix/CpuSparseMatrix get same result,
// so disable when
/// only cpu version.
...
...
paddle/memory/detail/buddy_allocator.cc
浏览文件 @
f985700a
...
...
@@ -175,7 +175,7 @@ void* BuddyAllocator::SystemAlloc(size_t size) {
}
BuddyAllocator
::
PoolSet
::
iterator
BuddyAllocator
::
RefillPool
()
{
#if
ndef PADDLE_ONLY_C
PU
#if
def PADDLE_WITH_G
PU
if
(
system_allocator_
->
UseGpu
())
{
if
((
total_used_
+
total_free_
)
==
0
)
{
// Compute the maximum allocation size for the first allocation.
...
...
paddle/memory/detail/system_allocator.cc
浏览文件 @
f985700a
...
...
@@ -62,7 +62,7 @@ void CPUAllocator::Free(void* p, size_t size, size_t index) {
bool
CPUAllocator
::
UseGpu
()
const
{
return
false
;
}
#if
ndef PADDLE_ONLY_C
PU
#if
def PADDLE_WITH_G
PU
void
*
GPUAllocator
::
Alloc
(
size_t
&
index
,
size_t
size
)
{
// CUDA documentation doesn't explain if cudaMalloc returns nullptr
...
...
paddle/memory/detail/system_allocator.h
浏览文件 @
f985700a
...
...
@@ -40,7 +40,7 @@ class CPUAllocator : public SystemAllocator {
virtual
bool
UseGpu
()
const
;
};
#if
ndef PADDLE_ONLY_C
PU
#if
def PADDLE_WITH_G
PU
class
GPUAllocator
:
public
SystemAllocator
{
public:
virtual
void
*
Alloc
(
size_t
&
index
,
size_t
size
);
...
...
paddle/memory/detail/system_allocator_test.cc
浏览文件 @
f985700a
...
...
@@ -56,7 +56,7 @@ TEST(CPUAllocator, LockMem) {
TestAllocator
(
a
,
0
);
}
#if
ndef PADDLE_ONLY_C
PU
#if
def PADDLE_WITH_G
PU
TEST
(
GPUAllocator
,
Alloc
)
{
paddle
::
memory
::
detail
::
GPUAllocator
a
;
TestAllocator
(
a
,
2048
);
...
...
paddle/memory/memcpy.cc
浏览文件 @
f985700a
...
...
@@ -26,7 +26,7 @@ void Copy<platform::CPUPlace, platform::CPUPlace>(platform::CPUPlace, void* dst,
std
::
memcpy
(
dst
,
src
,
num
);
}
#if
ndef PADDLE_ONLY_C
PU
#if
def PADDLE_WITH_G
PU
template
<
>
void
Copy
<
platform
::
CPUPlace
,
platform
::
GPUPlace
>
(
platform
::
CPUPlace
dst_place
,
void
*
dst
,
...
...
paddle/memory/memcpy.h
浏览文件 @
f985700a
...
...
@@ -33,7 +33,7 @@ namespace memory {
template
<
typename
DstPlace
,
typename
SrcPlace
>
void
Copy
(
DstPlace
,
void
*
dst
,
SrcPlace
,
const
void
*
src
,
size_t
num
);
#if
ndef PADDLE_ONLY_C
PU
#if
def PADDLE_WITH_G
PU
/**
* \brief Copy memory from one place to another place.
...
...
paddle/memory/memory.cc
浏览文件 @
f985700a
...
...
@@ -62,7 +62,7 @@ size_t Used<platform::CPUPlace>(platform::CPUPlace place) {
return
GetCPUBuddyAllocator
()
->
Used
();
}
#if
ndef PADDLE_ONLY_C
PU
#if
def PADDLE_WITH_G
PU
BuddyAllocator
*
GetGPUBuddyAllocator
(
int
gpu_id
)
{
using
BuddyAllocVec
=
std
::
vector
<
BuddyAllocator
*>
;
...
...
paddle/memory/memory_test.cc
浏览文件 @
f985700a
...
...
@@ -80,7 +80,7 @@ TEST(BuddyAllocator, CPUMultAlloc) {
}
}
#if
ndef PADDLE_ONLY_C
PU
#if
def PADDLE_WITH_G
PU
size_t
align
(
size_t
size
,
paddle
::
platform
::
GPUPlace
place
)
{
size
+=
sizeof
(
paddle
::
memory
::
detail
::
Metadata
);
...
...
paddle/operators/detail/strided_memcpy.h
浏览文件 @
f985700a
...
...
@@ -34,7 +34,7 @@ struct StridedMemcpyFunctor<T, 1> {
auto
&
cpu_place
=
boost
::
get
<
platform
::
CPUPlace
>
(
place
);
memory
::
Copy
(
cpu_place
,
dst
,
cpu_place
,
src
,
sizeof
(
T
)
*
dst_dim
.
head
);
}
else
{
#if
ndef PADDLE_ONLY_C
PU
#if
def PADDLE_WITH_G
PU
auto
&
gpu_place
=
boost
::
get
<
platform
::
GPUPlace
>
(
place
);
auto
&
cuda_ctx
=
reinterpret_cast
<
const
platform
::
CUDADeviceContext
&>
(
dev_ctx
);
...
...
paddle/operators/math/im2col_test.cc
浏览文件 @
f985700a
...
...
@@ -71,7 +71,7 @@ void testIm2col() {
context
=
new
paddle
::
platform
::
CPUDeviceContext
(
paddle
::
platform
::
CPUPlace
());
}
else
{
#if
ndef PADDLE_ONLY_C
PU
#if
def PADDLE_WITH_G
PU
context
=
new
paddle
::
platform
::
CUDADeviceContext
(
paddle
::
platform
::
GPUPlace
());
#else
...
...
@@ -116,7 +116,7 @@ void testIm2col() {
TEST
(
math
,
im2col
)
{
testIm2col
<
paddle
::
platform
::
CPUPlace
>
();
#if
ndef PADDLE_ONLY_C
PU
#if
def PADDLE_WITH_G
PU
testIm2col
<
paddle
::
platform
::
GPUPlace
>
();
#endif
}
paddle/operators/math/math_function_test.cc
浏览文件 @
f985700a
#include "paddle/operators/math/math_function.h"
#include "gtest/gtest.h"
#if
ndef PADDLE_ONLY_C
PU
#if
def PADDLE_WITH_G
PU
TEST
(
math_function
,
notrans_mul_trans
)
{
paddle
::
framework
::
Tensor
input1
;
paddle
::
framework
::
Tensor
input1_gpu
;
...
...
paddle/operators/strided_memcpy_test.cc
浏览文件 @
f985700a
...
...
@@ -72,7 +72,7 @@ TEST(StridedMemcpy, CPUConcat) {
}
}
#if
ndef PADDLE_ONLY_C
PU
#if
def PADDLE_WITH_G
PU
TEST
(
StridedMemcpy
,
GPUCrop
)
{
// clang-format off
int
src
[]
=
{
...
...
paddle/platform/device_context.cc
浏览文件 @
f985700a
...
...
@@ -35,7 +35,7 @@ Eigen::DefaultDevice* CPUDeviceContext::eigen_device() const {
Place
CPUDeviceContext
::
GetPlace
()
const
{
return
CPUPlace
();
}
#if
ndef PADDLE_ONLY_C
PU
#if
def PADDLE_WITH_G
PU
template
<
>
Eigen
::
GpuDevice
*
...
...
paddle/platform/device_context.h
浏览文件 @
f985700a
...
...
@@ -14,7 +14,7 @@ limitations under the License. */
#include "paddle/platform/enforce.h"
#include "paddle/platform/place.h"
#if
ndef PADDLE_ONLY_C
PU
#if
def PADDLE_WITH_G
PU
#include "paddle/platform/dynload/cublas.h"
#include "paddle/platform/dynload/cudnn.h"
#include "paddle/platform/gpu_info.h"
...
...
@@ -61,7 +61,7 @@ class CPUDeviceContext : public DeviceContext {
std
::
unique_ptr
<
Eigen
::
DefaultDevice
>
eigen_device_
;
};
#if
ndef PADDLE_ONLY_C
PU
#if
def PADDLE_WITH_G
PU
template
<
>
struct
EigenDeviceConverter
<
platform
::
GPUPlace
>
{
using
EigenDeviceType
=
Eigen
::
GpuDevice
;
...
...
paddle/platform/enforce.h
浏览文件 @
f985700a
...
...
@@ -29,7 +29,7 @@ limitations under the License. */
#include <cxxabi.h> // for __cxa_demangle
#endif
#if
ndef PADDLE_ONLY_C
PU
#if
def PADDLE_WITH_G
PU
#include "paddle/platform/dynload/cublas.h"
#include "paddle/platform/dynload/cudnn.h"
...
...
@@ -113,7 +113,7 @@ inline typename std::enable_if<sizeof...(Args) != 0, void>::type throw_on_error(
}
}
#if
ndef PADDLE_ONLY_C
PU
#if
def PADDLE_WITH_G
PU
template
<
typename
...
Args
>
inline
typename
std
::
enable_if
<
sizeof
...(
Args
)
!=
0
,
void
>::
type
throw_on_error
(
...
...
paddle/platform/gpu_info.h
浏览文件 @
f985700a
...
...
@@ -14,7 +14,7 @@ limitations under the License. */
#pragma once
#if
ndef PADDLE_ONLY_C
PU
#if
def PADDLE_WITH_G
PU
#include <cuda_runtime.h>
#include <stddef.h>
...
...
paddle/platform/variant.h
浏览文件 @
f985700a
...
...
@@ -16,7 +16,7 @@
#include <boost/config.hpp>
#if
ndef PADDLE_ONLY_C
PU
#if
def PADDLE_WITH_G
PU
// Because boost's variadic templates has bug on nvcc, boost will disable
// variadic template support when GPU enabled on nvcc.
...
...
paddle/pserver/test/SocketTest.cpp
浏览文件 @
f985700a
...
...
@@ -215,7 +215,7 @@ int main(int argc, char** argv) {
uint64_t
dataSize
=
FLAGS_dim
*
sizeof
(
real
);
#if
ndef PADDLE_ONLY_C
PU
#if
def PADDLE_WITH_G
PU
GpuVector
gpuParam
(
FLAGS_dim
);
GpuVector
gpuGrad
(
FLAGS_dim
);
#else
...
...
paddle/pserver/test/test_ProtoServer.cpp
浏览文件 @
f985700a
...
...
@@ -99,7 +99,7 @@ TEST(ProtoServer, regular) {
}
TEST
(
ProtoServer
,
extended
)
{
#if
ndef PADDLE_ONLY_C
PU
#if
def PADDLE_WITH_G
PU
ProtoClient
*
client
;
if
(
FLAGS_rdma_tcp
==
"rdma"
)
client
=
new
ProtoClient
(
FLAGS_server_addr
,
FLAGS_port
,
F_RDMA
);
...
...
paddle/pybind/pybind.cc
浏览文件 @
f985700a
...
...
@@ -34,7 +34,7 @@ static size_t UniqueIntegerGenerator() {
}
bool
IsCompileGPU
()
{
#if
def PADDLE_ONLY_C
PU
#if
ndef PADDLE_WITH_G
PU
return
false
;
#else
return
true
;
...
...
@@ -78,7 +78,7 @@ PYBIND11_PLUGIN(core) {
.
def
(
"set"
,
PyCPUTensorSetFromArray
<
float
>
)
.
def
(
"set"
,
PyCPUTensorSetFromArray
<
int
>
)
.
def
(
"set"
,
PyCPUTensorSetFromArray
<
double
>
)
#if
ndef PADDLE_ONLY_C
PU
#if
def PADDLE_WITH_G
PU
.
def
(
"set"
,
PyCUDATensorSetFromArray
<
float
>
)
.
def
(
"set"
,
PyCUDATensorSetFromArray
<
int
>
)
.
def
(
"set"
,
PyCUDATensorSetFromArray
<
double
>
)
...
...
@@ -96,7 +96,7 @@ PYBIND11_PLUGIN(core) {
.
def
(
"__init__"
,
[](
LoDTensor
&
instance
,
const
std
::
vector
<
std
::
vector
<
size_t
>>
&
lod
)
{
#if
def PADDLE_ONLY_C
PU
#if
ndef PADDLE_WITH_G
PU
new
(
&
instance
)
LoDTensor
(
lod
);
#else
LoD
new_lod
;
...
...
@@ -107,7 +107,7 @@ PYBIND11_PLUGIN(core) {
})
.
def
(
"set_lod"
,
[](
LoDTensor
&
self
,
const
std
::
vector
<
std
::
vector
<
size_t
>>
&
lod
)
{
#if
def PADDLE_ONLY_C
PU
#if
ndef PADDLE_WITH_G
PU
self
.
set_lod
(
lod
);
#else
LoD
new_lod
;
...
...
@@ -117,7 +117,7 @@ PYBIND11_PLUGIN(core) {
#endif
})
.
def
(
"lod"
,
[](
LoDTensor
&
self
)
->
std
::
vector
<
std
::
vector
<
size_t
>>
{
#if
def PADDLE_ONLY_C
PU
#if
ndef PADDLE_WITH_G
PU
return
self
.
lod
();
#else
auto
lod
=
self
.
lod
();
...
...
@@ -203,7 +203,7 @@ All parameter, weight, gradient are variables in Paddle.
.
def_static
(
"create"
,
[](
paddle
::
platform
::
GPUPlace
&
place
)
->
paddle
::
platform
::
DeviceContext
*
{
#if
def PADDLE_ONLY_C
PU
#if
ndef PADDLE_WITH_G
PU
PADDLE_THROW
(
"GPUPlace is not supported in CPU device."
);
#else
return
new
paddle
::
platform
::
CUDADeviceContext
(
place
);
...
...
paddle/pybind/tensor_py.h
浏览文件 @
f985700a
...
...
@@ -106,7 +106,7 @@ void PyCPUTensorSetFromArray(
std
::
memcpy
(
dst
,
array
.
data
(),
sizeof
(
T
)
*
array
.
size
());
}
#if
ndef PADDLE_ONLY_C
PU
#if
def PADDLE_WITH_G
PU
template
<
typename
T
>
void
PyCUDATensorSetFromArray
(
framework
::
Tensor
&
self
,
...
...
paddle/trainer/MergeModel.cpp
浏览文件 @
f985700a
...
...
@@ -29,7 +29,7 @@ int main(int argc, char** argv) {
initMain
(
argc
,
argv
);
initPython
(
argc
,
argv
);
string
confFile
=
TrainerConfigHelper
::
getConfigNameFromPath
(
FLAGS_model_dir
);
#if
def PADDLE_ONLY_C
PU
#if
ndef PADDLE_WITH_G
PU
FLAGS_use_gpu
=
false
;
#endif
auto
config
=
std
::
make_shared
<
TrainerConfigHelper
>
(
confFile
);
...
...
paddle/trainer/tests/test_Compare.cpp
浏览文件 @
f985700a
...
...
@@ -146,7 +146,7 @@ void compareGradient(comData& comDataCpu, comData& comDataGpu) {
}
int
main
(
int
argc
,
char
**
argv
)
{
#if
def PADDLE_ONLY_C
PU
#if
ndef PADDLE_WITH_G
PU
exit
(
0
);
#endif
paddle
::
initMain
(
argc
,
argv
);
...
...
paddle/trainer/tests/test_CompareSparse.cpp
浏览文件 @
f985700a
...
...
@@ -174,7 +174,7 @@ TEST(compareSparse, multiGradientMachine) {
FLAGS_local
=
local
;
FLAGS_ports_num_for_sparse
=
5
;
for
(
bool
useGpu
:
{
false
,
true
})
{
#if
def PADDLE_ONLY_C
PU
#if
ndef PADDLE_WITH_G
PU
if
(
useGpu
)
continue
;
#endif
FLAGS_parallel_nn
=
useGpu
;
...
...
@@ -198,7 +198,7 @@ TEST(compareSparse, NeuralNetwork) {
FLAGS_local
=
local
;
FLAGS_ports_num_for_sparse
=
5
;
for
(
bool
useGpu
:
{
false
,
true
})
{
#if
def PADDLE_ONLY_C
PU
#if
ndef PADDLE_WITH_G
PU
if
(
useGpu
)
continue
;
#endif
FLAGS_parallel_nn
=
useGpu
;
...
...
paddle/trainer/tests/test_Trainer.cpp
浏览文件 @
f985700a
...
...
@@ -51,7 +51,7 @@ void checkGradientTest(const string& configFile,
TEST
(
checkGradient
,
cpu
)
{
checkGradientTest
(
configFile1
,
false
,
false
);
}
#if
ndef PADDLE_ONLY_C
PU
#if
def PADDLE_WITH_G
PU
TEST
(
checkGradient
,
gpu
)
{
checkGradientTest
(
configFile1
,
true
,
false
);
}
TEST
(
checkGradient
,
multiGpu
)
{
...
...
@@ -97,7 +97,7 @@ TEST(checkGradient, hsigmoid) { checkGradientTest(configFile2, false, false); }
TEST
(
checkGradient
,
chunk
)
{
checkGradientTest
(
configFile3
,
false
,
false
);
#if
ndef PADDLE_ONLY_C
PU
#if
def PADDLE_WITH_G
PU
checkGradientTest
(
configFile3
,
true
,
true
);
#endif
}
...
...
paddle/trainer/tests/test_TrainerOnePass.cpp
浏览文件 @
f985700a
...
...
@@ -79,7 +79,7 @@ void trainerOnePassTest(const string& configFile,
// 1. test trainer (cpu, gpu).
TEST
(
trainerOnePass
,
cpu
)
{
trainerOnePassTest
(
configFile1
,
false
,
false
);
}
#if
ndef PADDLE_ONLY_C
PU
#if
def PADDLE_WITH_G
PU
TEST
(
trainerOnePass
,
gpu
)
{
trainerOnePassTest
(
configFile1
,
true
,
false
);
}
TEST
(
trainerOnePass
,
gpu2
)
{
trainerOnePassTest
(
configFile1
,
true
,
false
,
2
);
}
...
...
@@ -94,7 +94,7 @@ TEST(trainerOnePass, parallel) {
#endif
// 2. test average_window.
#if
ndef PADDLE_ONLY_C
PU
#if
def PADDLE_WITH_G
PU
TEST
(
average_window
,
gpu
)
{
trainerOnePassTest
(
configFile1
,
true
,
false
,
4
,
0.01
);
}
...
...
@@ -266,7 +266,7 @@ TEST(checkRemoteUpdater, cpuTrainerOldUpdater) {
checkRemoteParameterUpdaterTest
(
configFile1
,
false
,
false
,
1
,
true
);
}
#if
ndef PADDLE_ONLY_C
PU
#if
def PADDLE_WITH_G
PU
TEST
(
checkRemoteUpdater
,
gpuTrainer
)
{
checkRemoteParameterUpdaterTest
(
configFile1
,
true
,
false
);
}
...
...
paddle/trainer/tests/test_recurrent_machine_generation.cpp
浏览文件 @
f985700a
...
...
@@ -113,7 +113,7 @@ void testGeneration(const string& configFile,
#ifndef PADDLE_TYPE_DOUBLE
TEST
(
RecurrentGradientMachine
,
test_generation
)
{
#if
def PADDLE_ONLY_C
PU
#if
ndef PADDLE_WITH_G
PU
const
auto
useGpuConfs
=
{
false
};
#else
const
auto
useGpuConfs
=
{
true
,
false
};
...
...
paddle/utils/Flags.cpp
浏览文件 @
f985700a
...
...
@@ -14,7 +14,7 @@ limitations under the License. */
#include "Flags.h"
#if
def PADDLE_ONLY_C
PU
#if
ndef PADDLE_WITH_G
PU
DEFINE_bool
(
use_gpu
,
false
,
"Only support CPU training"
);
#else
DEFINE_bool
(
use_gpu
,
true
,
"Whether to use GPU for training"
);
...
...
paddle/utils/Util.h
浏览文件 @
f985700a
...
...
@@ -218,7 +218,7 @@ protected:
* *d2* is peer device to enable direct access to by the d1 device.
*/
inline
void
enablePeerAccess
(
int
d1
,
int
d2
)
{
#if
ndef PADDLE_ONLY_C
PU
#if
def PADDLE_WITH_G
PU
if
(
hl_device_can_access_peer
(
d1
,
d2
))
{
SetDevice
dev
(
d1
);
hl_device_enable_peer_access
(
d2
);
...
...
paddle/utils/Version.h
浏览文件 @
f985700a
...
...
@@ -48,7 +48,7 @@ void printVersion(std::ostream& os);
* @return return true if paddle compiled with GPU
*/
constexpr
bool
isWithGpu
()
{
#if
def PADDLE_ONLY_C
PU
#if
ndef PADDLE_WITH_G
PU
return
false
;
#else
return
true
;
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录