Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
PaddleDetection
提交
06db7038
P
PaddleDetection
项目概览
PaddlePaddle
/
PaddleDetection
大约 1 年 前同步成功
通知
694
Star
11112
Fork
2696
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
184
列表
看板
标记
里程碑
合并请求
40
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
PaddleDetection
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
184
Issue
184
列表
看板
标记
里程碑
合并请求
40
合并请求
40
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
体验新版 GitCode,发现更多精彩内容 >>
You need to sign in or sign up before continuing.
提交
06db7038
编写于
1月 23, 2018
作者:
X
xzl
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
../../../../../paddle/api
上级
3772d27d
变更
5
隐藏空白更改
内联
并排
Showing
5 changed file
with
19 addition
and
21 deletion
+19
-21
paddle/operators/CMakeLists.txt
paddle/operators/CMakeLists.txt
+2
-1
paddle/operators/conv_op.cc
paddle/operators/conv_op.cc
+8
-3
paddle/operators/conv_op.h
paddle/operators/conv_op.h
+2
-5
paddle/operators/math/CMakeLists.txt
paddle/operators/math/CMakeLists.txt
+1
-0
paddle/operators/math/depthwise_conv.cu
paddle/operators/math/depthwise_conv.cu
+6
-12
未找到文件。
paddle/operators/CMakeLists.txt
浏览文件 @
06db7038
...
@@ -155,7 +155,8 @@ op_library(parallel_do_op DEPS executor)
...
@@ -155,7 +155,8 @@ op_library(parallel_do_op DEPS executor)
# Regist multiple Kernel to pybind
# Regist multiple Kernel to pybind
if
(
WITH_GPU
)
if
(
WITH_GPU
)
op_library
(
conv_op SRCS conv_op.cc conv_op.cu.cc conv_cudnn_op.cu.cc DEPS vol2col
)
op_library
(
conv_op SRCS conv_op.cc conv_op.cu.cc conv_cudnn_op.cu.cc DEPS
vol2col depthwise_conv
)
op_library
(
pool_op SRCS pool_op.cc pool_op.cu.cc pool_cudnn_op.cu.cc DEPS pooling
)
op_library
(
pool_op SRCS pool_op.cc pool_op.cu.cc pool_cudnn_op.cu.cc DEPS pooling
)
op_library
(
conv_transpose_op SRCS conv_transpose_op.cc conv_transpose_op.cu.cc
op_library
(
conv_transpose_op SRCS conv_transpose_op.cc conv_transpose_op.cu.cc
conv_transpose_cudnn_op.cu.cc DEPS vol2col
)
conv_transpose_cudnn_op.cu.cc DEPS vol2col
)
...
...
paddle/operators/conv_op.cc
浏览文件 @
06db7038
...
@@ -318,15 +318,20 @@ framework::OpKernelType ConvOpGrad::GetExpectedKernelType(
...
@@ -318,15 +318,20 @@ framework::OpKernelType ConvOpGrad::GetExpectedKernelType(
namespace
ops
=
paddle
::
operators
;
namespace
ops
=
paddle
::
operators
;
REGISTER_OP
(
conv2d
,
ops
::
ConvOp
,
ops
::
Conv2DOpMaker
,
conv2d_grad
,
REGISTER_OP
(
conv2d
,
ops
::
ConvOp
,
ops
::
Conv2DOpMaker
,
conv2d_grad
,
ops
::
ConvOpGrad
);
ops
::
ConvOpGrad
);
REGISTER_OP
(
depthwiseConv
,
ops
::
ConvOp
,
ops
::
Conv2DOpMaker
,
conv2d
_grad
,
REGISTER_OP
(
depthwiseConv
,
ops
::
ConvOp
,
ops
::
Conv2DOpMaker
,
depthwiseConv
_grad
,
ops
::
ConvOpGrad
);
ops
::
ConvOpGrad
);
REGISTER_OP
(
conv3d
,
ops
::
ConvOp
,
ops
::
Conv3DOpMaker
,
conv3d_grad
,
REGISTER_OP
(
conv3d
,
ops
::
ConvOp
,
ops
::
Conv3DOpMaker
,
conv3d_grad
,
ops
::
ConvOpGrad
);
ops
::
ConvOpGrad
);
REGISTER_OP_CPU_KERNEL
(
REGISTER_OP_CPU_KERNEL
(
depthwiseConv
,
depthwiseConv
,
ops
::
DepthwiseConvKernel
<
paddle
::
platform
::
CPUDeviceContext
,
float
>
,
ops
::
GemmConvKernel
<
paddle
::
platform
::
CPUDeviceContext
,
float
>
,
ops
::
DepthwiseConvKernel
<
paddle
::
platform
::
CPUDeviceContext
,
double
>
);
ops
::
GemmConvKernel
<
paddle
::
platform
::
CPUDeviceContext
,
double
>
);
REGISTER_OP_CPU_KERNEL
(
depthwiseConv_grad
,
ops
::
GemmConvGradKernel
<
paddle
::
platform
::
CPUDeviceContext
,
float
>
,
ops
::
GemmConvGradKernel
<
paddle
::
platform
::
CPUDeviceContext
,
double
>
);
REGISTER_OP_CPU_KERNEL
(
REGISTER_OP_CPU_KERNEL
(
conv2d
,
ops
::
GemmConvKernel
<
paddle
::
platform
::
CPUDeviceContext
,
float
>
,
conv2d
,
ops
::
GemmConvKernel
<
paddle
::
platform
::
CPUDeviceContext
,
float
>
,
...
...
paddle/operators/conv_op.h
浏览文件 @
06db7038
...
@@ -364,18 +364,15 @@ class DepthwiseConvKernel : public framework::OpKernel<T> {
...
@@ -364,18 +364,15 @@ class DepthwiseConvKernel : public framework::OpKernel<T> {
Tensor
*
output
=
context
.
Output
<
Tensor
>
(
"Output"
);
Tensor
*
output
=
context
.
Output
<
Tensor
>
(
"Output"
);
output
->
mutable_data
<
T
>
(
context
.
GetPlace
());
output
->
mutable_data
<
T
>
(
context
.
GetPlace
());
std
::
vector
<
int
>
ksize
=
context
.
Attr
<
std
::
vector
<
int
>>
(
"ksize"
);
std
::
vector
<
int
>
strides
=
context
.
Attr
<
std
::
vector
<
int
>>
(
"strides"
);
std
::
vector
<
int
>
strides
=
context
.
Attr
<
std
::
vector
<
int
>>
(
"strides"
);
std
::
vector
<
int
>
paddings
=
context
.
Attr
<
std
::
vector
<
int
>>
(
"paddings"
);
std
::
vector
<
int
>
paddings
=
context
.
Attr
<
std
::
vector
<
int
>>
(
"paddings"
);
std
::
vector
<
int
>
dilations
=
context
.
Attr
<
std
::
vector
<
int
>>
(
"dilations"
);
std
::
vector
<
int
>
dilations
=
context
.
Attr
<
std
::
vector
<
int
>>
(
"dilations"
);
framework
::
DDim
filter_matrix_shape
=
{
filter
.
dims
()[
0
],
filter
.
numel
()
/
filter
.
dims
()[
0
]};
filter
.
Resize
(
filter_matrix_shape
);
math
::
DepthwiseConvFunctor
<
DeviceContext
,
T
>
depthwiseConv
;
math
::
DepthwiseConvFunctor
<
DeviceContext
,
T
>
depthwiseConv
;
auto
&
dev_ctx
=
context
.
template
device_context
<
DeviceContext
>();
auto
&
dev_ctx
=
context
.
template
device_context
<
DeviceContext
>();
depthwiseConv
(
dev_ctx
,
input
,
filter
,
filter_shape_vec
,
strides
,
paddings
,
depthwiseConv
(
dev_ctx
,
*
input
,
filter
,
ksize
,
strides
,
paddings
,
output
);
output
);
}
}
};
};
...
...
paddle/operators/math/CMakeLists.txt
浏览文件 @
06db7038
...
@@ -8,6 +8,7 @@ if(WITH_GPU)
...
@@ -8,6 +8,7 @@ if(WITH_GPU)
nv_library
(
softmax SRCS softmax.cc softmax.cu DEPS device_context
)
nv_library
(
softmax SRCS softmax.cc softmax.cu DEPS device_context
)
nv_library
(
cross_entropy SRCS cross_entropy.cc cross_entropy.cu DEPS device_context
)
nv_library
(
cross_entropy SRCS cross_entropy.cc cross_entropy.cu DEPS device_context
)
nv_library
(
pooling SRCS pooling.cc pooling.cu DEPS device_context
)
nv_library
(
pooling SRCS pooling.cc pooling.cu DEPS device_context
)
nv_library
(
depthwise_conv SRCS depthwise_conv.cu DEPS device_context
)
nv_library
(
sequence_pooling SRCS sequence_pooling.cc sequence_pooling.cu DEPS device_context math_function
)
nv_library
(
sequence_pooling SRCS sequence_pooling.cc sequence_pooling.cu DEPS device_context math_function
)
nv_library
(
vol2col SRCS vol2col.cc vol2col.cu DEPS device_context tensor
)
nv_library
(
vol2col SRCS vol2col.cc vol2col.cu DEPS device_context tensor
)
nv_library
(
context_project SRCS context_project.cc context_project.cu DEPS device_context math_function
)
nv_library
(
context_project SRCS context_project.cc context_project.cu DEPS device_context math_function
)
...
...
paddle/operators/math/depthwise_conv.cu
浏览文件 @
06db7038
...
@@ -12,7 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
...
@@ -12,7 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
See the License for the specific language governing permissions and
limitations under the License. */
limitations under the License. */
#include "paddle/operators/math/
pooling
.h"
#include "paddle/operators/math/
depthwise_conv
.h"
#include "paddle/platform/cuda_helper.h"
#include "paddle/platform/cuda_helper.h"
namespace
paddle
{
namespace
paddle
{
...
@@ -195,7 +195,7 @@ __global__ void KernelDepthwiseConvFilterGrad(const int num_i,
...
@@ -195,7 +195,7 @@ __global__ void KernelDepthwiseConvFilterGrad(const int num_i,
* Ksize, strides, paddings are two elements. These two elements represent
* Ksize, strides, paddings are two elements. These two elements represent
* height and width, respectively.
* height and width, respectively.
*/
*/
template
<
typename
T
>
template
<
class
T
>
class
DepthwiseConvFunctor
<
platform
::
CUDADeviceContext
,
T
>
{
class
DepthwiseConvFunctor
<
platform
::
CUDADeviceContext
,
T
>
{
public:
public:
void
operator
()(
const
platform
::
CUDADeviceContext
&
context
,
void
operator
()(
const
platform
::
CUDADeviceContext
&
context
,
...
@@ -226,7 +226,7 @@ class DepthwiseConvFunctor<platform::CUDADeviceContext, T> {
...
@@ -226,7 +226,7 @@ class DepthwiseConvFunctor<platform::CUDADeviceContext, T> {
dim3
threads
(
1024
,
1
);
dim3
threads
(
1024
,
1
);
dim3
grid
(
blocks
,
1
);
dim3
grid
(
blocks
,
1
);
KernelDepthwiseConv
<
T
><<<
grid
,
threads
,
0
,
STREAM_DEFAULT
>>>
(
KernelDepthwiseConv
<
T
><<<
grid
,
threads
,
0
,
context
.
stream
()
>>>
(
nthreads
,
input_data
,
filter_data
,
batch_size
,
output_channels
,
nthreads
,
input_data
,
filter_data
,
batch_size
,
output_channels
,
output_height
,
output_width
,
input_channels
,
input_height
,
input_width
,
output_height
,
output_width
,
input_channels
,
input_height
,
input_width
,
output_channels
/
input_channels
,
ksize_height
,
ksize_width
,
output_channels
/
input_channels
,
ksize_height
,
ksize_width
,
...
@@ -236,7 +236,6 @@ class DepthwiseConvFunctor<platform::CUDADeviceContext, T> {
...
@@ -236,7 +236,6 @@ class DepthwiseConvFunctor<platform::CUDADeviceContext, T> {
};
};
/*
/*
template <typename T>
template <typename T>
class DepthwiseConvInputGradFunctor<platform::CUDADeviceContext, PoolProcess, T>
class DepthwiseConvInputGradFunctor<platform::CUDADeviceContext, PoolProcess, T>
{
{
...
@@ -254,8 +253,7 @@ class DepthwiseConvInputGradFunctor<platform::CUDADeviceContext, PoolProcess, T>
...
@@ -254,8 +253,7 @@ class DepthwiseConvInputGradFunctor<platform::CUDADeviceContext, PoolProcess, T>
const int output_height = output.dims()[2];
const int output_height = output.dims()[2];
const int output_width = output.dims()[3];
const int output_width = output.dims()[3];
const int ksize_height = ksize[0];
const int ksize_height = ksize[0];
const int ksize_width = ksize[1];
const int ksize_width = ksize[1]; const int stride_height = strides[0];
const int stride_height = strides[0];
const int stride_width = strides[1];
const int stride_width = strides[1];
const int padding_height = paddings[0];
const int padding_height = paddings[0];
const int padding_width = paddings[1];
const int padding_width = paddings[1];
...
@@ -321,24 +319,20 @@ class DepthwiseConvdFilterGradFunctor<platform::CUDADeviceContext, T> {
...
@@ -321,24 +319,20 @@ class DepthwiseConvdFilterGradFunctor<platform::CUDADeviceContext, T> {
*/
*/
template
class
DepthwiseConvFunctor
<
platform
::
CUDADeviceContext
,
template
class
DepthwiseConvFunctor
<
platform
::
CUDADeviceContext
,
paddle
::
operators
::
math
::
MaxPool
<
float
>,
float
>;
float
>;
template
class
DepthwiseConvFunctor
<
platform
::
CUDADeviceContext
,
double
>;
/*
/*
template class DepthwiseConvInputGradFunctor<platform::CUDADeviceContext,
template class DepthwiseConvInputGradFunctor<platform::CUDADeviceContext,
paddle::operators::math::MaxPoolGrad<float>,
float>;
float>;
template class DepthwiseConvFilterGradFunctor<platform::CUDADeviceContext,
template class DepthwiseConvFilterGradFunctor<platform::CUDADeviceContext,
paddle::operators::math::MaxPoolGrad<float>,
float>;
float>;
template class DepthwiseConvFunctor<platform::CUDADeviceContext,
template class DepthwiseConvFunctor<platform::CUDADeviceContext,
paddle::operators::math::MaxPool<double>, double>;
template class DepthwiseConvInputGradFunctor<platform::CUDADeviceContext,
template class DepthwiseConvInputGradFunctor<platform::CUDADeviceContext,
paddle::operators::math::MaxPoolGrad<double>,
double>;
double>;
template class DepthwiseConvFilterGradFunctor<platform::CUDADeviceContext,
template class DepthwiseConvFilterGradFunctor<platform::CUDADeviceContext,
paddle::operators::math::MaxPoolGrad<double>,
double>;
double>;
*/
*/
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录