Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
PaddleDetection
提交
8ad67da9
P
PaddleDetection
项目概览
PaddlePaddle
/
PaddleDetection
大约 1 年 前同步成功
通知
695
Star
11112
Fork
2696
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
184
列表
看板
标记
里程碑
合并请求
40
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
PaddleDetection
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
184
Issue
184
列表
看板
标记
里程碑
合并请求
40
合并请求
40
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
8ad67da9
编写于
10月 12, 2017
作者:
C
chengduoZH
浏览文件
操作
浏览文件
下载
差异文件
fix conflict
上级
6ef2da2e
2daba040
变更
13
显示空白变更内容
内联
并排
Showing
13 changed file
with
515 addition
and
117 deletion
+515
-117
paddle/api/CMakeLists.txt
paddle/api/CMakeLists.txt
+1
-1
paddle/framework/CMakeLists.txt
paddle/framework/CMakeLists.txt
+8
-6
paddle/framework/executor_test.cc
paddle/framework/executor_test.cc
+10
-0
paddle/framework/operator.h
paddle/framework/operator.h
+9
-0
paddle/operators/conv2d_op.cc
paddle/operators/conv2d_op.cc
+73
-93
paddle/operators/conv2d_op.cu
paddle/operators/conv2d_op.cu
+1
-1
paddle/operators/conv2d_op.h
paddle/operators/conv2d_op.h
+32
-1
paddle/operators/conv_cudnn_op.cc
paddle/operators/conv_cudnn_op.cc
+47
-0
paddle/operators/conv_cudnn_op.cu
paddle/operators/conv_cudnn_op.cu
+277
-0
paddle/platform/cudnn_helper.h
paddle/platform/cudnn_helper.h
+31
-11
paddle/pybind/CMakeLists.txt
paddle/pybind/CMakeLists.txt
+1
-1
python/paddle/v2/framework/tests/test_conv2d_op.py
python/paddle/v2/framework/tests/test_conv2d_op.py
+23
-3
python/paddle/v2/framework/tests/test_seq_concat_op.py
python/paddle/v2/framework/tests/test_seq_concat_op.py
+2
-0
未找到文件。
paddle/api/CMakeLists.txt
浏览文件 @
8ad67da9
...
...
@@ -26,7 +26,7 @@ FILE(GLOB PY_PADDLE_PYTHON_FILES ${PADDLE_SOURCE_DIR}/paddle/py_paddle/*.py)
SET_SOURCE_FILES_PROPERTIES
(
Paddle.i PROPERTIES CPLUSPLUS ON
)
SET
(
CMAKE_SWIG_OUTDIR
${
CMAKE_CURRENT_BINARY_DIR
}
)
SET
(
CMAKE_CXX_FLAGS
"
${
CMAKE_CXX_FLAGS
}
-Wno-parentheses-equality -Wno-missing-field-initializers -Wno-self-assign"
)
SET
(
CMAKE_CXX_FLAGS
"
${
CMAKE_CXX_FLAGS
}
-Wno-parentheses-equality -Wno-missing-field-initializers -Wno-self-assign
-ftls-model=global-dynamic
"
)
SET
(
SWIG_MODULE_swig_paddle_EXTRA_DEPS
paddle_parameter
...
...
paddle/framework/CMakeLists.txt
浏览文件 @
8ad67da9
...
...
@@ -42,12 +42,14 @@ add_custom_command(TARGET framework_py_proto POST_BUILD
cc_library
(
backward SRCS backward.cc DEPS net_op
)
cc_test
(
backward_test SRCS backward_test.cc DEPS backward recurrent_op device_context
)
cc_library
(
executor SRCS executor.cc DEPS op_registry device_context scope framework_proto backward
${
GLOB_OP_LIB
}
)
#if(WITH_GPU)
# nv_test(executor_test SRCS executor_test.cc DEPS executor)
#else()
# cc_test(executor_test SRCS executor_test.cc DEPS executor)
#endif()
cc_library
(
executor SRCS executor.cc DEPS op_registry device_context scope framework_proto backward
)
set
(
EXECUTOR_TEST_OP elementwise_add_op gaussian_random_op feed_op fetch_op
mul_op sum_op squared_l2_distance_op fill_constant_op sgd_op
)
if
(
WITH_GPU
)
nv_test
(
executor_test SRCS executor_test.cc DEPS executor
${
EXECUTOR_TEST_OP
}
)
else
()
cc_test
(
executor_test SRCS executor_test.cc DEPS executor
${
EXECUTOR_TEST_OP
}
)
endif
()
cc_library
(
tensor_array SRCS tensor_array.cc DEPS lod_tensor
)
cc_test
(
tensor_array_test SRCS tensor_array_test.cc DEPS tensor_array place
)
paddle/framework/executor_test.cc
浏览文件 @
8ad67da9
...
...
@@ -25,6 +25,16 @@ limitations under the License. */
#include "paddle/framework/op_registry.h"
#include "paddle/framework/operator.h"
USE_OP
(
elementwise_add
);
USE_OP
(
gaussian_random
);
USE_OP
(
feed
);
USE_OP
(
fetch
);
USE_OP
(
mul
);
USE_OP
(
sum
);
USE_OP
(
squared_l2_distance
);
USE_OP
(
fill_constant
);
USE_OP
(
sgd
);
using
namespace
paddle
::
platform
;
using
namespace
paddle
::
framework
;
...
...
paddle/framework/operator.h
浏览文件 @
8ad67da9
...
...
@@ -289,6 +289,15 @@ class ExecutionContext {
return
device_context_
;
}
#ifdef PADDLE_WITH_CUDA
const
platform
::
CUDADeviceContext
&
cuda_device_context
()
const
{
PADDLE_ENFORCE
(
platform
::
is_gpu_place
(
device_context_
.
GetPlace
()));
auto
cuda_ctx
=
reinterpret_cast
<
const
platform
::
CUDADeviceContext
*>
(
&
device_context_
);
return
*
cuda_ctx
;
}
#endif
private:
const
OperatorBase
&
op_
;
const
Scope
&
scope_
;
...
...
paddle/operators/conv2d_op.cc
浏览文件 @
8ad67da9
...
...
@@ -12,22 +12,12 @@
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/operators/
gemm_
conv2d_op.h"
#include "paddle/operators/conv2d_op.h"
namespace
paddle
{
namespace
operators
{
int
outputSize
(
int
input_size
,
int
filter_size
,
int
padding
,
int
stride
)
{
int
output_size
=
(
input_size
-
filter_size
+
2
*
padding
)
/
stride
+
1
;
return
output_size
;
}
class
Conv2DOp
:
public
framework
::
OperatorWithKernel
{
public:
using
framework
::
OperatorWithKernel
::
OperatorWithKernel
;
protected:
void
InferShape
(
framework
::
InferShapeContext
*
ctx
)
const
override
{
void
Conv2DOp
::
InferShape
(
framework
::
InferShapeContext
*
ctx
)
const
{
PADDLE_ENFORCE
(
ctx
->
HasInput
(
"Input"
),
"Input(Input) of Conv2DOp should not be null."
);
PADDLE_ENFORCE
(
ctx
->
HasInput
(
"Filter"
),
...
...
@@ -53,25 +43,22 @@ class Conv2DOp : public framework::OperatorWithKernel {
"The number of output channels should be divided by groups."
);
auto
output_height
=
o
utputSize
(
in_dims
[
2
],
filter_dims
[
2
],
paddings
[
0
],
strides
[
0
]);
O
utputSize
(
in_dims
[
2
],
filter_dims
[
2
],
paddings
[
0
],
strides
[
0
]);
auto
output_width
=
outputSize
(
in_dims
[
3
],
filter_dims
[
3
],
paddings
[
1
],
strides
[
1
]);
ctx
->
SetOutputDim
(
"Output"
,
{
in_dims
[
0
],
filter_dims
[
0
],
output_height
,
output_width
});
}
};
OutputSize
(
in_dims
[
3
],
filter_dims
[
3
],
paddings
[
1
],
strides
[
1
]);
ctx
->
SetOutputDim
(
"Output"
,
{
in_dims
[
0
],
filter_dims
[
0
],
output_height
,
output_width
});
}
class
Conv2DOpMaker
:
public
framework
::
OpProtoAndCheckerMaker
{
public:
Conv2DOpMaker
(
framework
::
OpProto
*
proto
,
framework
::
OpAttrChecker
*
op_checker
)
Conv2DOpMaker
::
Conv2DOpMaker
(
framework
::
OpProto
*
proto
,
framework
::
OpAttrChecker
*
op_checker
)
:
OpProtoAndCheckerMaker
(
proto
,
op_checker
)
{
AddInput
(
"Input"
,
"The input tensor of convolution operator. "
"The format of input tensor is NCHW. Where N is batch size, C is the "
"number of channels, H and W is the height and width of image."
);
AddInput
(
"Filter"
,
AddInput
(
"Filter"
,
"The filter tensor of convolution operator."
"The format of the filter tensor is MCHW, where M is the number of "
"output image channels, C is the number of input image channels, "
...
...
@@ -98,15 +85,9 @@ The convolution operation calculates the output based on the input, filter
and strides, paddings, groups parameters. The size of each dimension of the
parameters is checked in the infer-shape.
)DOC"
);
}
};
class
Conv2DOpGrad
:
public
framework
::
OperatorWithKernel
{
public:
using
framework
::
OperatorWithKernel
::
OperatorWithKernel
;
}
protected:
void
InferShape
(
framework
::
InferShapeContext
*
ctx
)
const
override
{
void
Conv2DOpGrad
::
InferShape
(
framework
::
InferShapeContext
*
ctx
)
const
{
auto
in_dims
=
ctx
->
GetInputDim
(
"Input"
);
auto
filter_dims
=
ctx
->
GetInputDim
(
"Filter"
);
if
(
ctx
->
HasOutput
(
framework
::
GradVarName
(
"Input"
)))
{
...
...
@@ -115,8 +96,7 @@ class Conv2DOpGrad : public framework::OperatorWithKernel {
if
(
ctx
->
HasOutput
(
framework
::
GradVarName
(
"Filter"
)))
{
ctx
->
SetOutputDim
(
framework
::
GradVarName
(
"Filter"
),
filter_dims
);
}
}
};
}
}
// namespace operators
}
// namespace paddle
...
...
paddle/operators/conv2d_op.cu
浏览文件 @
8ad67da9
...
...
@@ -12,7 +12,7 @@
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/operators/
gemm_
conv2d_op.h"
#include "paddle/operators/conv2d_op.h"
namespace
ops
=
paddle
::
operators
;
...
...
paddle/operators/
gemm_
conv2d_op.h
→
paddle/operators/conv2d_op.h
浏览文件 @
8ad67da9
...
...
@@ -24,6 +24,38 @@ namespace operators {
using
Tensor
=
framework
::
Tensor
;
// Base convolution operator definations for other conv
// like operators to reuse the implementation.
inline
int
OutputSize
(
int
input_size
,
int
filter_size
,
int
padding
,
int
stride
)
{
int
output_size
=
(
input_size
-
filter_size
+
2
*
padding
)
/
stride
+
1
;
return
output_size
;
}
// Define Op classes in .h file so that other conv
// operator implementations can reuse the code.
class
Conv2DOpMaker
:
public
framework
::
OpProtoAndCheckerMaker
{
public:
Conv2DOpMaker
(
framework
::
OpProto
*
proto
,
framework
::
OpAttrChecker
*
op_checker
);
};
class
Conv2DOp
:
public
framework
::
OperatorWithKernel
{
public:
using
framework
::
OperatorWithKernel
::
OperatorWithKernel
;
protected:
void
InferShape
(
framework
::
InferShapeContext
*
ctx
)
const
override
;
};
class
Conv2DOpGrad
:
public
framework
::
OperatorWithKernel
{
public:
using
framework
::
OperatorWithKernel
::
OperatorWithKernel
;
protected:
void
InferShape
(
framework
::
InferShapeContext
*
ctx
)
const
override
;
};
template
<
typename
Place
,
typename
T
>
class
GemmConv2DKernel
:
public
framework
::
OpKernel
<
T
>
{
public:
...
...
@@ -74,7 +106,6 @@ class GemmConv2DKernel : public framework::OpKernel<T> {
framework
::
DDim
output_matrix_shape
=
{
output_channels
,
output_height
*
output_width
};
// convolution operator: im2col + gemm
int
in_step
=
input_channels
/
groups
;
int
out_step
=
output_channels
/
groups
;
...
...
paddle/operators/conv_cudnn_op.cc
0 → 100644
浏览文件 @
8ad67da9
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/operators/conv2d_op.h"
namespace
paddle
{
namespace
operators
{
class
CudnnConvOpMaker
:
public
Conv2DOpMaker
{
public:
CudnnConvOpMaker
(
framework
::
OpProto
*
proto
,
framework
::
OpAttrChecker
*
op_checker
)
:
Conv2DOpMaker
(
proto
,
op_checker
)
{
AddAttr
<
std
::
vector
<
int
>>
(
"dilations"
,
"dilations of convolution operator."
)
.
SetDefault
(
std
::
vector
<
int
>
{
1
,
1
});
AddAttr
<
int
>
(
"workspace_size_MB"
,
"workspace size for cudnn, in MB, "
"workspace is a section of GPU memory which will be "
"allocated/freed each time the operator runs, larger "
"workspace size can increase performance but also requires "
"better hardward. This size should be carefully setted."
)
.
SetDefault
(
4096
);
}
};
}
// namespace operators
}
// namespace paddle
namespace
ops
=
paddle
::
operators
;
REGISTER_OP
(
conv_cudnn
,
ops
::
Conv2DOp
,
ops
::
CudnnConvOpMaker
,
conv_cudnn_grad
,
ops
::
Conv2DOpGrad
);
REGISTER_OP_CPU_KERNEL
(
conv_cudnn
,
ops
::
GemmConv2DKernel
<
paddle
::
platform
::
CPUPlace
,
float
>
);
REGISTER_OP_CPU_KERNEL
(
conv_cudnn_grad
,
ops
::
GemmConvGrad2DKernel
<
paddle
::
platform
::
CPUPlace
,
float
>
);
paddle/operators/conv_cudnn_op.cu
0 → 100644
浏览文件 @
8ad67da9
/* Copyright (c) 2016 PaddlePaddle Authors All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/framework/eigen.h"
#include "paddle/framework/op_registry.h"
#include "paddle/memory/memory.h"
#include "paddle/operators/conv2d_op.h"
#include "paddle/platform/assert.h"
#include "paddle/platform/cudnn_helper.h"
namespace
paddle
{
namespace
operators
{
using
Tensor
=
framework
::
Tensor
;
using
ScopedTensorDescriptor
=
platform
::
ScopedTensorDescriptor
;
using
ScopedFilterDescriptor
=
platform
::
ScopedFilterDescriptor
;
using
ScopedConvolutionDescriptor
=
platform
::
ScopedConvolutionDescriptor
;
using
DataLayout
=
platform
::
DataLayout
;
using
CUDADeviceContext
=
platform
::
CUDADeviceContext
;
static
constexpr
size_t
kCONV_CUDNN_WORKSPACE_LIMIT_BYTES
=
1024
*
1024
*
1024
;
// NOTE: framework::vectorize converts to type int64_t
// which does not fit cudnn inputs.
std
::
vector
<
int
>
Dims2Vector
(
const
framework
::
DDim
&
dims
)
{
std
::
vector
<
int
>
ret
;
for
(
int
i
=
0
;
i
<
dims
.
size
();
i
++
)
{
ret
.
push_back
(
dims
[
i
]);
}
return
ret
;
}
template
<
typename
T
>
class
CudnnConvOpKernel
:
public
framework
::
OpKernel
<
T
>
{
public:
void
Compute
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
PADDLE_ENFORCE
(
platform
::
is_gpu_place
(
ctx
.
GetPlace
()),
"It must use GPUPlace."
);
auto
*
input
=
ctx
.
Input
<
Tensor
>
(
"Input"
);
auto
*
filter
=
ctx
.
Input
<
Tensor
>
(
"Filter"
);
auto
*
output
=
ctx
.
Output
<
Tensor
>
(
"Output"
);
std
::
vector
<
int
>
strides
=
ctx
.
Attr
<
std
::
vector
<
int
>>
(
"strides"
);
std
::
vector
<
int
>
paddings
=
ctx
.
Attr
<
std
::
vector
<
int
>>
(
"paddings"
);
std
::
vector
<
int
>
dilations
=
ctx
.
Attr
<
std
::
vector
<
int
>>
(
"dilations"
);
int
groups
=
ctx
.
Attr
<
int
>
(
"groups"
);
int
user_workspace_size
=
ctx
.
Attr
<
int
>
(
"workspace_size_MB"
);
const
T
*
input_data
=
input
->
data
<
T
>
();
const
T
*
filter_data
=
filter
->
data
<
T
>
();
T
*
output_data
=
output
->
mutable_data
<
T
>
(
ctx
.
GetPlace
());
// ------------------- cudnn descriptors ---------------------
ScopedTensorDescriptor
input_desc
;
ScopedTensorDescriptor
output_desc
;
ScopedFilterDescriptor
filter_desc
;
ScopedConvolutionDescriptor
conv_desc
;
DataLayout
layout
=
DataLayout
::
kNCHW
;
cudnnTensorDescriptor_t
cudnn_input_desc
=
input_desc
.
descriptor
<
T
>
(
layout
,
Dims2Vector
(
input
->
dims
()),
groups
);
cudnnTensorDescriptor_t
cudnn_output_desc
=
output_desc
.
descriptor
<
T
>
(
layout
,
Dims2Vector
(
output
->
dims
()),
groups
);
cudnnFilterDescriptor_t
cudnn_filter_desc
=
filter_desc
.
descriptor
<
T
>
(
layout
,
Dims2Vector
(
filter
->
dims
()),
groups
);
cudnnConvolutionDescriptor_t
cudnn_conv_desc
=
conv_desc
.
descriptor
<
T
>
(
paddings
,
strides
,
dilations
);
int
input_channels
=
input
->
dims
()[
1
];
int
input_height
=
input
->
dims
()[
2
];
int
input_width
=
input
->
dims
()[
3
];
int
output_channels
=
output
->
dims
()[
1
];
int
output_height
=
output
->
dims
()[
2
];
int
output_width
=
output
->
dims
()[
3
];
int
group_offset_in
=
input_channels
/
groups
*
input_height
*
input_width
;
int
group_offset_out
=
output_channels
/
groups
*
output_height
*
output_width
;
int
group_offset_filter
=
filter
->
numel
()
/
groups
;
// ------------------- cudnn conv workspace ---------------------
void
*
cudnn_workspace
=
nullptr
;
size_t
workspace_size_in_bytes
;
// final workspace to allocate.
size_t
workspace_size_limit
=
kCONV_CUDNN_WORKSPACE_LIMIT_BYTES
;
if
(
user_workspace_size
>
0
)
{
workspace_size_limit
=
user_workspace_size
*
1024
*
1024
;
}
// ------------------- cudnn conv algorithm ---------------------
cudnnConvolutionFwdAlgo_t
algo
;
auto
handle
=
ctx
.
cuda_device_context
().
cudnn_handle
();
PADDLE_ENFORCE
(
platform
::
dynload
::
cudnnGetConvolutionForwardAlgorithm
(
handle
,
cudnn_input_desc
,
cudnn_filter_desc
,
cudnn_conv_desc
,
cudnn_output_desc
,
CUDNN_CONVOLUTION_FWD_SPECIFY_WORKSPACE_LIMIT
,
workspace_size_limit
,
&
algo
));
// get workspace size able to allocate
PADDLE_ENFORCE
(
platform
::
dynload
::
cudnnGetConvolutionForwardWorkspaceSize
(
handle
,
cudnn_input_desc
,
cudnn_filter_desc
,
cudnn_conv_desc
,
cudnn_output_desc
,
algo
,
&
workspace_size_in_bytes
));
// Allocate on GPU memory
platform
::
GPUPlace
gpu
=
boost
::
get
<
platform
::
GPUPlace
>
(
ctx
.
GetPlace
());
cudnn_workspace
=
paddle
::
memory
::
Alloc
(
gpu
,
workspace_size_in_bytes
);
// ------------------- cudnn conv forward ---------------------
T
alpha
=
1.0
f
,
beta
=
0.0
f
;
for
(
int
i
=
0
;
i
<
groups
;
i
++
)
{
PADDLE_ENFORCE
(
platform
::
dynload
::
cudnnConvolutionForward
(
handle
,
&
alpha
,
cudnn_input_desc
,
input_data
+
i
*
group_offset_in
,
cudnn_filter_desc
,
filter_data
+
i
*
group_offset_filter
,
cudnn_conv_desc
,
algo
,
cudnn_workspace
,
workspace_size_in_bytes
,
&
beta
,
cudnn_output_desc
,
output_data
+
i
*
group_offset_out
));
}
// Release the cudnn workspace
paddle
::
memory
::
Free
(
gpu
,
cudnn_workspace
);
}
};
template
<
typename
T
>
class
CudnnConvGradOpKernel
:
public
framework
::
OpKernel
<
T
>
{
public:
void
Compute
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
PADDLE_ENFORCE
(
platform
::
is_gpu_place
(
ctx
.
GetPlace
()),
"It must use GPUPlace."
);
auto
input
=
ctx
.
Input
<
Tensor
>
(
"Input"
);
auto
filter
=
ctx
.
Input
<
Tensor
>
(
"Filter"
);
auto
output_grad
=
ctx
.
Input
<
Tensor
>
(
framework
::
GradVarName
(
"Output"
));
auto
input_grad
=
ctx
.
Output
<
Tensor
>
(
framework
::
GradVarName
(
"Input"
));
auto
filter_grad
=
ctx
.
Output
<
Tensor
>
(
framework
::
GradVarName
(
"Filter"
));
const
T
*
input_data
=
input
->
data
<
T
>
();
const
T
*
output_grad_data
=
output_grad
->
data
<
T
>
();
const
T
*
filter_data
=
filter
->
data
<
T
>
();
std
::
vector
<
int
>
strides
=
ctx
.
Attr
<
std
::
vector
<
int
>>
(
"strides"
);
std
::
vector
<
int
>
paddings
=
ctx
.
Attr
<
std
::
vector
<
int
>>
(
"paddings"
);
std
::
vector
<
int
>
dilations
=
ctx
.
Attr
<
std
::
vector
<
int
>>
(
"dilations"
);
int
groups
=
ctx
.
Attr
<
int
>
(
"groups"
);
int
user_workspace_size
=
ctx
.
Attr
<
int
>
(
"workspace_size_MB"
);
// ------------------- cudnn descriptors ---------------------
ScopedTensorDescriptor
input_desc
;
ScopedTensorDescriptor
output_grad_desc
;
ScopedTensorDescriptor
input_grad_desc
;
ScopedFilterDescriptor
filter_desc
;
ScopedFilterDescriptor
filter_grad_desc
;
ScopedConvolutionDescriptor
conv_desc
;
DataLayout
layout
=
DataLayout
::
kNCHW
;
cudnnTensorDescriptor_t
cudnn_input_desc
=
input_desc
.
descriptor
<
T
>
(
layout
,
Dims2Vector
(
input
->
dims
()),
groups
);
cudnnTensorDescriptor_t
cudnn_output_grad_desc
=
output_grad_desc
.
descriptor
<
T
>
(
layout
,
Dims2Vector
(
output_grad
->
dims
()),
groups
);
cudnnFilterDescriptor_t
cudnn_filter_desc
=
filter_desc
.
descriptor
<
T
>
(
layout
,
Dims2Vector
(
filter
->
dims
()),
groups
);
cudnnTensorDescriptor_t
cudnn_input_grad_desc
=
nullptr
;
cudnnFilterDescriptor_t
cudnn_filter_grad_desc
=
nullptr
;
cudnnConvolutionDescriptor_t
cudnn_conv_desc
=
conv_desc
.
descriptor
<
T
>
(
paddings
,
strides
,
dilations
);
int
input_channels
=
input
->
dims
()[
1
];
int
input_height
=
input
->
dims
()[
2
];
int
input_width
=
input
->
dims
()[
3
];
int
output_grad_channels
=
filter
->
dims
()[
0
];
int
output_grad_height
=
output_grad
->
dims
()[
2
];
int
output_grad_width
=
output_grad
->
dims
()[
3
];
int
group_offset_in
=
input_channels
/
groups
*
input_height
*
input_width
;
int
group_offset_out
=
output_grad_channels
/
groups
*
output_grad_height
*
output_grad_width
;
int
group_offset_filter
=
filter
->
numel
()
/
groups
;
// ------------------- cudnn backward algorithm ---------------------
cudnnConvolutionBwdDataAlgo_t
data_algo
;
cudnnConvolutionBwdFilterAlgo_t
filter_algo
;
size_t
workspace_size_in_bytes
=
0
,
tmp_size
=
0
;
size_t
workspace_size_limit
=
kCONV_CUDNN_WORKSPACE_LIMIT_BYTES
;
if
(
user_workspace_size
>
0
)
{
workspace_size_limit
=
user_workspace_size
*
1024
*
1024
;
}
auto
handle
=
ctx
.
cuda_device_context
().
cudnn_handle
();
if
(
input_grad
)
{
cudnn_input_grad_desc
=
input_grad_desc
.
descriptor
<
T
>
(
layout
,
Dims2Vector
(
input_grad
->
dims
()),
groups
);
PADDLE_ENFORCE
(
platform
::
dynload
::
cudnnGetConvolutionBackwardDataAlgorithm
(
handle
,
cudnn_filter_desc
,
// dyDesc: Handle to the previously initialized input differential
// tensor descriptor.
cudnn_output_grad_desc
,
cudnn_conv_desc
,
// dxDesc: Handle to the previously initialized output tensor
// descriptor.
cudnn_input_grad_desc
,
CUDNN_CONVOLUTION_BWD_DATA_SPECIFY_WORKSPACE_LIMIT
,
workspace_size_limit
,
&
data_algo
));
PADDLE_ENFORCE
(
platform
::
dynload
::
cudnnGetConvolutionBackwardDataWorkspaceSize
(
handle
,
cudnn_filter_desc
,
cudnn_output_grad_desc
,
cudnn_conv_desc
,
cudnn_input_grad_desc
,
data_algo
,
&
tmp_size
));
workspace_size_in_bytes
=
std
::
max
(
workspace_size_in_bytes
,
tmp_size
);
}
if
(
filter_grad
)
{
cudnn_filter_grad_desc
=
filter_grad_desc
.
descriptor
<
T
>
(
layout
,
Dims2Vector
(
filter_grad
->
dims
()),
groups
);
PADDLE_ENFORCE
(
platform
::
dynload
::
cudnnGetConvolutionBackwardFilterAlgorithm
(
handle
,
cudnn_input_desc
,
cudnn_output_grad_desc
,
cudnn_conv_desc
,
cudnn_filter_desc
,
CUDNN_CONVOLUTION_BWD_FILTER_SPECIFY_WORKSPACE_LIMIT
,
workspace_size_limit
,
&
filter_algo
));
PADDLE_ENFORCE
(
platform
::
dynload
::
cudnnGetConvolutionBackwardFilterWorkspaceSize
(
handle
,
cudnn_input_desc
,
cudnn_output_grad_desc
,
cudnn_conv_desc
,
cudnn_filter_desc
,
filter_algo
,
&
tmp_size
));
workspace_size_in_bytes
=
std
::
max
(
workspace_size_in_bytes
,
tmp_size
);
}
// ------------------- cudnn conv workspace ---------------------
// Already on GPU
void
*
cudnn_workspace
=
nullptr
;
platform
::
GPUPlace
gpu
=
boost
::
get
<
platform
::
GPUPlace
>
(
ctx
.
GetPlace
());
cudnn_workspace
=
paddle
::
memory
::
Alloc
(
gpu
,
workspace_size_in_bytes
);
// ------------------- cudnn conv backward data ---------------------
// FIXME(typhoonzero): template type T may not be the same as cudnn call.
T
alpha
=
1.0
f
,
beta
=
0.0
f
;
if
(
input_grad
)
{
T
*
input_grad_data
=
input_grad
->
mutable_data
<
T
>
(
ctx
.
GetPlace
());
auto
t
=
framework
::
EigenVector
<
T
>::
Flatten
(
*
input_grad
);
t
.
device
(
ctx
.
GetEigenDevice
<
platform
::
GPUPlace
>
())
=
t
.
constant
(
static_cast
<
T
>
(
0
));
for
(
int
i
=
0
;
i
<
groups
;
i
++
)
{
PADDLE_ENFORCE
(
platform
::
dynload
::
cudnnConvolutionBackwardData
(
handle
,
&
alpha
,
cudnn_filter_desc
,
filter_data
+
i
*
group_offset_filter
,
cudnn_output_grad_desc
,
output_grad_data
+
i
*
group_offset_out
,
cudnn_conv_desc
,
data_algo
,
cudnn_workspace
,
workspace_size_in_bytes
,
&
beta
,
cudnn_input_grad_desc
,
input_grad_data
+
i
*
group_offset_in
));
}
}
// ------------------- cudnn conv backward filter ---------------------
if
(
filter_grad
)
{
T
*
filter_grad_data
=
filter_grad
->
mutable_data
<
T
>
(
ctx
.
GetPlace
());
auto
t
=
framework
::
EigenVector
<
T
>::
Flatten
(
*
filter_grad
);
t
.
device
(
ctx
.
GetEigenDevice
<
platform
::
GPUPlace
>
())
=
t
.
constant
(
static_cast
<
T
>
(
0
));
for
(
int
i
=
0
;
i
<
groups
;
i
++
)
{
PADDLE_ENFORCE
(
platform
::
dynload
::
cudnnConvolutionBackwardFilter
(
handle
,
&
alpha
,
cudnn_input_desc
,
input_data
+
i
*
group_offset_in
,
cudnn_output_grad_desc
,
output_grad_data
+
i
*
group_offset_out
,
cudnn_conv_desc
,
filter_algo
,
cudnn_workspace
,
workspace_size_in_bytes
,
&
beta
,
cudnn_filter_grad_desc
,
filter_grad_data
+
i
*
group_offset_filter
));
}
}
// Release the cudnn workspace
paddle
::
memory
::
Free
(
gpu
,
cudnn_workspace
);
}
};
}
// namespace operators
}
// namespace paddle
REGISTER_OP_GPU_KERNEL
(
conv_cudnn
,
paddle
::
operators
::
CudnnConvOpKernel
<
float
>
);
REGISTER_OP_GPU_KERNEL
(
conv_cudnn_grad
,
paddle
::
operators
::
CudnnConvGradOpKernel
<
float
>
);
paddle/platform/cudnn_helper.h
浏览文件 @
8ad67da9
...
...
@@ -71,23 +71,32 @@ class ScopedTensorDescriptor {
inline
cudnnTensorDescriptor_t
descriptor
(
const
cudnnTensorFormat_t
format
,
const
cudnnDataType_t
type
,
const
std
::
vector
<
int
>&
dims
)
{
// the format is not used now, but it maybe useful feature
const
std
::
vector
<
int
>&
dims
,
const
int
groups
=
1
)
{
// the format is not used now, will add later
std
::
vector
<
int
>
strides
(
dims
.
size
());
strides
[
dims
.
size
()
-
1
]
=
1
;
for
(
int
i
=
dims
.
size
()
-
2
;
i
>=
0
;
i
--
)
{
strides
[
i
]
=
dims
[
i
+
1
]
*
strides
[
i
+
1
];
}
// Update tensor descriptor dims setting if groups > 1
// FIXME(typhoonzero): Assume using NCHW order
std
::
vector
<
int
>
dims_with_group
(
dims
.
begin
(),
dims
.
end
());
// copy
if
(
groups
>
1
)
{
dims_with_group
[
1
]
=
dims_with_group
[
1
]
/
groups
;
}
PADDLE_ENFORCE
(
dynload
::
cudnnSetTensorNdDescriptor
(
desc_
,
type
,
dims
.
size
(),
dims
.
data
(),
strides
.
data
()));
desc_
,
type
,
dims_with_group
.
size
(),
dims_with_group
.
data
(),
strides
.
data
()));
return
desc_
;
}
template
<
typename
T
>
inline
cudnnTensorDescriptor_t
descriptor
(
const
DataLayout
&
order
,
const
std
::
vector
<
int
>&
dims
)
{
return
descriptor
(
GetCudnnTensorFormat
(
order
),
CudnnDataType
<
T
>::
type
,
dims
);
const
std
::
vector
<
int
>&
dims
,
const
int
groups
=
1
)
{
return
descriptor
(
GetCudnnTensorFormat
(
order
),
CudnnDataType
<
T
>::
type
,
dims
,
groups
);
}
private:
...
...
@@ -106,18 +115,29 @@ class ScopedFilterDescriptor {
inline
cudnnFilterDescriptor_t
descriptor
(
const
cudnnTensorFormat_t
format
,
const
cudnnDataType_t
type
,
const
std
::
vector
<
int
>&
kernel
)
{
// filter layout: output input spatial_dim_y spatial_dim_x
const
std
::
vector
<
int
>&
kernel
,
const
int
groups
=
1
)
{
// filter layout: MCHW, where M is the number of
// output image channels, C is the number of input image channels,
// H and W is height and width of filter.
std
::
vector
<
int
>
kernel_with_group
(
kernel
.
begin
(),
kernel
.
end
());
if
(
groups
>
1
)
{
// M /= groups
kernel_with_group
[
0
]
/=
groups
;
// NOTE: input filter(C) of the filter is already asserted to be C/groups.
}
PADDLE_ENFORCE
(
dynload
::
cudnnSetFilterNdDescriptor
(
desc_
,
type
,
format
,
kernel
.
size
(),
kernel
.
data
()));
desc_
,
type
,
format
,
kernel_with_group
.
size
(),
kernel_with_group
.
data
()));
return
desc_
;
}
template
<
typename
T
>
inline
cudnnFilterDescriptor_t
descriptor
(
const
DataLayout
&
order
,
const
std
::
vector
<
int
>&
kernel
)
{
const
std
::
vector
<
int
>&
kernel
,
const
int
groups
=
1
)
{
return
descriptor
(
GetCudnnTensorFormat
(
order
),
CudnnDataType
<
T
>::
type
,
kernel
);
kernel
,
groups
);
}
private:
...
...
paddle/pybind/CMakeLists.txt
浏览文件 @
8ad67da9
if
(
WITH_PYTHON
)
cc_library
(
paddle_pybind SHARED
SRCS pybind.cc exception.cc protobuf.cc
DEPS pybind python backward proto_desc tensor_array
DEPS pybind python backward proto_desc tensor_array
paddle_memory
${
GLOB_OP_LIB
}
)
endif
(
WITH_PYTHON
)
python/paddle/v2/framework/tests/test_conv2d_op.py
浏览文件 @
8ad67da9
...
...
@@ -36,7 +36,7 @@ def conv2d_forward_naive(input, filter, group, conv_param):
class
TestConv2dOp
(
OpTest
):
def
setUp
(
self
):
self
.
init_groups
()
self
.
op_type
=
"conv2d"
self
.
init_optype
()
pad
=
[
0
,
0
]
stride
=
[
1
,
1
]
input_size
=
[
2
,
3
,
5
,
5
]
# NCHW
...
...
@@ -45,14 +45,18 @@ class TestConv2dOp(OpTest):
filter_size
=
[
6
,
f_c
,
3
,
3
]
conv2d_param
=
{
'stride'
:
stride
,
'pad'
:
pad
}
input
=
np
.
random
.
random
(
input_size
).
astype
(
"float32"
)
filter
=
np
.
random
.
random
(
filter_size
).
astype
(
"float32"
)
output
=
conv2d_forward_naive
(
input
,
filter
,
self
.
groups
,
conv2d_param
)
self
.
inputs
=
{
'Input'
:
input
,
'Filter'
:
filter
}
self
.
attrs
=
{
'strides'
:
stride
,
'paddings'
:
pad
,
'groups'
:
self
.
groups
}
self
.
attrs
=
{
'strides'
:
stride
,
'paddings'
:
pad
,
'groups'
:
self
.
groups
,
'dilations'
:
[
1
,
1
]
}
self
.
outputs
=
{
'Output'
:
output
}
def
test_check_output
(
self
):
...
...
@@ -79,11 +83,27 @@ class TestConv2dOp(OpTest):
def
init_groups
(
self
):
self
.
groups
=
1
def
init_optype
(
self
):
self
.
op_type
=
"conv2d"
class
TestWithGroup
(
TestConv2dOp
):
def
init_groups
(
self
):
self
.
groups
=
3
class
TestCudnn2d
(
TestConv2dOp
):
def
init_optype
(
self
):
self
.
op_type
=
"conv_cudnn"
class
TestCudnn2dWithGroup
(
TestConv2dOp
):
def
init_optype
(
self
):
self
.
op_type
=
"conv_cudnn"
def
init_groups
(
self
):
self
.
groups
=
3
if
__name__
==
'__main__'
:
unittest
.
main
()
python/paddle/v2/framework/tests/test_seq_concat_op.py
浏览文件 @
8ad67da9
import
unittest
import
numpy
as
np
import
sys
from
op_test
import
OpTest
...
...
@@ -74,4 +75,5 @@ class TestConcatOpLevelZero(TestConcatOp):
if
__name__
==
'__main__'
:
sys
.
exit
(
0
)
unittest
.
main
()
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录