Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
Paddle
提交
380cc6bb
P
Paddle
项目概览
PaddlePaddle
/
Paddle
大约 1 年 前同步成功
通知
2298
Star
20931
Fork
5422
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1423
列表
看板
标记
里程碑
合并请求
543
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1,423
Issue
1,423
列表
看板
标记
里程碑
合并请求
543
合并请求
543
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
体验新版 GitCode,发现更多精彩内容 >>
提交
380cc6bb
编写于
10月 12, 2017
作者:
H
hedaoyuan
浏览文件
操作
浏览文件
下载
差异文件
Merge branch 'develop' of
https://github.com/baidu/Paddle
into inference
上级
773d064a
d3b8bffa
变更
18
隐藏空白更改
内联
并排
Showing
18 changed file
with
758 addition
and
119 deletion
+758
-119
paddle/api/CMakeLists.txt
paddle/api/CMakeLists.txt
+1
-1
paddle/framework/CMakeLists.txt
paddle/framework/CMakeLists.txt
+8
-6
paddle/framework/executor_test.cc
paddle/framework/executor_test.cc
+10
-0
paddle/framework/operator.h
paddle/framework/operator.h
+9
-0
paddle/operators/conv2d_op.cc
paddle/operators/conv2d_op.cc
+73
-93
paddle/operators/conv2d_op.cu
paddle/operators/conv2d_op.cu
+1
-1
paddle/operators/conv2d_op.h
paddle/operators/conv2d_op.h
+32
-1
paddle/operators/conv_cudnn_op.cc
paddle/operators/conv_cudnn_op.cc
+47
-0
paddle/operators/conv_cudnn_op.cu
paddle/operators/conv_cudnn_op.cu
+277
-0
paddle/operators/decayed_adagrad_op.cc
paddle/operators/decayed_adagrad_op.cc
+96
-0
paddle/operators/decayed_adagrad_op.cu
paddle/operators/decayed_adagrad_op.cu
+21
-0
paddle/operators/decayed_adagrad_op.h
paddle/operators/decayed_adagrad_op.h
+56
-0
paddle/operators/math/vol2col_test.cc
paddle/operators/math/vol2col_test.cc
+4
-4
paddle/platform/cudnn_helper.h
paddle/platform/cudnn_helper.h
+31
-11
paddle/pybind/CMakeLists.txt
paddle/pybind/CMakeLists.txt
+1
-1
python/paddle/v2/framework/tests/test_conv2d_op.py
python/paddle/v2/framework/tests/test_conv2d_op.py
+18
-1
python/paddle/v2/framework/tests/test_decayed_adagrad_op.py
python/paddle/v2/framework/tests/test_decayed_adagrad_op.py
+71
-0
python/paddle/v2/framework/tests/test_seq_concat_op.py
python/paddle/v2/framework/tests/test_seq_concat_op.py
+2
-0
未找到文件。
paddle/api/CMakeLists.txt
浏览文件 @
380cc6bb
...
...
@@ -26,7 +26,7 @@ FILE(GLOB PY_PADDLE_PYTHON_FILES ${PADDLE_SOURCE_DIR}/paddle/py_paddle/*.py)
SET_SOURCE_FILES_PROPERTIES
(
Paddle.i PROPERTIES CPLUSPLUS ON
)
SET
(
CMAKE_SWIG_OUTDIR
${
CMAKE_CURRENT_BINARY_DIR
}
)
SET
(
CMAKE_CXX_FLAGS
"
${
CMAKE_CXX_FLAGS
}
-Wno-parentheses-equality -Wno-missing-field-initializers -Wno-self-assign"
)
SET
(
CMAKE_CXX_FLAGS
"
${
CMAKE_CXX_FLAGS
}
-Wno-parentheses-equality -Wno-missing-field-initializers -Wno-self-assign
-ftls-model=global-dynamic
"
)
SET
(
SWIG_MODULE_swig_paddle_EXTRA_DEPS
paddle_parameter
...
...
paddle/framework/CMakeLists.txt
浏览文件 @
380cc6bb
...
...
@@ -42,12 +42,14 @@ add_custom_command(TARGET framework_py_proto POST_BUILD
cc_library
(
backward SRCS backward.cc DEPS net_op
)
cc_test
(
backward_test SRCS backward_test.cc DEPS backward recurrent_op device_context
)
cc_library
(
executor SRCS executor.cc DEPS op_registry device_context scope framework_proto backward
${
GLOB_OP_LIB
}
)
#if(WITH_GPU)
# nv_test(executor_test SRCS executor_test.cc DEPS executor)
#else()
# cc_test(executor_test SRCS executor_test.cc DEPS executor)
#endif()
cc_library
(
executor SRCS executor.cc DEPS op_registry device_context scope framework_proto backward
)
set
(
EXECUTOR_TEST_OP elementwise_add_op gaussian_random_op feed_op fetch_op
mul_op sum_op squared_l2_distance_op fill_constant_op sgd_op
)
if
(
WITH_GPU
)
nv_test
(
executor_test SRCS executor_test.cc DEPS executor
${
EXECUTOR_TEST_OP
}
)
else
()
cc_test
(
executor_test SRCS executor_test.cc DEPS executor
${
EXECUTOR_TEST_OP
}
)
endif
()
cc_library
(
tensor_array SRCS tensor_array.cc DEPS lod_tensor
)
cc_test
(
tensor_array_test SRCS tensor_array_test.cc DEPS tensor_array place
)
paddle/framework/executor_test.cc
浏览文件 @
380cc6bb
...
...
@@ -25,6 +25,16 @@ limitations under the License. */
#include "paddle/framework/op_registry.h"
#include "paddle/framework/operator.h"
USE_OP
(
elementwise_add
);
USE_OP
(
gaussian_random
);
USE_OP
(
feed
);
USE_OP
(
fetch
);
USE_OP
(
mul
);
USE_OP
(
sum
);
USE_OP
(
squared_l2_distance
);
USE_OP
(
fill_constant
);
USE_OP
(
sgd
);
using
namespace
paddle
::
platform
;
using
namespace
paddle
::
framework
;
...
...
paddle/framework/operator.h
浏览文件 @
380cc6bb
...
...
@@ -289,6 +289,15 @@ class ExecutionContext {
return
device_context_
;
}
#ifdef PADDLE_WITH_CUDA
const
platform
::
CUDADeviceContext
&
cuda_device_context
()
const
{
PADDLE_ENFORCE
(
platform
::
is_gpu_place
(
device_context_
.
GetPlace
()));
auto
cuda_ctx
=
reinterpret_cast
<
const
platform
::
CUDADeviceContext
*>
(
&
device_context_
);
return
*
cuda_ctx
;
}
#endif
private:
const
OperatorBase
&
op_
;
const
Scope
&
scope_
;
...
...
paddle/operators/conv2d_op.cc
浏览文件 @
380cc6bb
...
...
@@ -12,111 +12,91 @@
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/operators/
gemm_
conv2d_op.h"
#include "paddle/operators/conv2d_op.h"
namespace
paddle
{
namespace
operators
{
int
outputSize
(
int
input_size
,
int
filter_size
,
int
padding
,
int
stride
)
{
int
output_size
=
(
input_size
-
filter_size
+
2
*
padding
)
/
stride
+
1
;
return
output_size
;
void
Conv2DOp
::
InferShape
(
framework
::
InferShapeContext
*
ctx
)
const
{
PADDLE_ENFORCE
(
ctx
->
HasInput
(
"Input"
),
"Input(Input) of Conv2DOp should not be null."
);
PADDLE_ENFORCE
(
ctx
->
HasInput
(
"Filter"
),
"Input(Filter) of Conv2DOp should not be null."
);
PADDLE_ENFORCE
(
ctx
->
HasOutput
(
"Output"
),
"Output(Output) of Conv2DOp should not be null."
);
auto
in_dims
=
ctx
->
GetInputDim
(
"Input"
);
auto
filter_dims
=
ctx
->
GetInputDim
(
"Filter"
);
std
::
vector
<
int
>
strides
=
ctx
->
Attrs
().
Get
<
std
::
vector
<
int
>>
(
"strides"
);
std
::
vector
<
int
>
paddings
=
ctx
->
Attrs
().
Get
<
std
::
vector
<
int
>>
(
"paddings"
);
int
groups
=
ctx
->
Attrs
().
Get
<
int
>
(
"groups"
);
int
input_channels
=
in_dims
[
1
];
int
output_channels
=
filter_dims
[
0
];
PADDLE_ENFORCE_EQ
(
in_dims
.
size
(),
4
,
"Conv2DOp input should be 4-D."
);
PADDLE_ENFORCE_EQ
(
filter_dims
.
size
(),
4
,
"Conv2DOp filter should be 4-D."
);
PADDLE_ENFORCE_EQ
(
input_channels
,
filter_dims
[
1
]
*
groups
,
"The number of input channels should be equal to filter "
"channels * groups."
);
PADDLE_ENFORCE_EQ
(
output_channels
%
groups
,
0
,
"The number of output channels should be divided by groups."
);
auto
output_height
=
OutputSize
(
in_dims
[
2
],
filter_dims
[
2
],
paddings
[
0
],
strides
[
0
]);
auto
output_width
=
OutputSize
(
in_dims
[
3
],
filter_dims
[
3
],
paddings
[
1
],
strides
[
1
]);
ctx
->
SetOutputDim
(
"Output"
,
{
in_dims
[
0
],
filter_dims
[
0
],
output_height
,
output_width
});
}
class
Conv2DOp
:
public
framework
::
OperatorWithKernel
{
public:
using
framework
::
OperatorWithKernel
::
OperatorWithKernel
;
protected:
void
InferShape
(
framework
::
InferShapeContext
*
ctx
)
const
override
{
PADDLE_ENFORCE
(
ctx
->
HasInput
(
"Input"
),
"Input(Input) of Conv2DOp should not be null."
);
PADDLE_ENFORCE
(
ctx
->
HasInput
(
"Filter"
),
"Input(Filter) of Conv2DOp should not be null."
);
PADDLE_ENFORCE
(
ctx
->
HasOutput
(
"Output"
),
"Output(Output) of Conv2DOp should not be null."
);
auto
in_dims
=
ctx
->
GetInputDim
(
"Input"
);
auto
filter_dims
=
ctx
->
GetInputDim
(
"Filter"
);
std
::
vector
<
int
>
strides
=
ctx
->
Attrs
().
Get
<
std
::
vector
<
int
>>
(
"strides"
);
std
::
vector
<
int
>
paddings
=
ctx
->
Attrs
().
Get
<
std
::
vector
<
int
>>
(
"paddings"
);
int
groups
=
ctx
->
Attrs
().
Get
<
int
>
(
"groups"
);
int
input_channels
=
in_dims
[
1
];
int
output_channels
=
filter_dims
[
0
];
PADDLE_ENFORCE_EQ
(
in_dims
.
size
(),
4
,
"Conv2DOp input should be 4-D."
);
PADDLE_ENFORCE_EQ
(
filter_dims
.
size
(),
4
,
"Conv2DOp filter should be 4-D."
);
PADDLE_ENFORCE_EQ
(
input_channels
,
filter_dims
[
1
]
*
groups
,
"The number of input channels should be equal to filter "
"channels * groups."
);
PADDLE_ENFORCE_EQ
(
output_channels
%
groups
,
0
,
"The number of output channels should be divided by groups."
);
auto
output_height
=
outputSize
(
in_dims
[
2
],
filter_dims
[
2
],
paddings
[
0
],
strides
[
0
]);
auto
output_width
=
outputSize
(
in_dims
[
3
],
filter_dims
[
3
],
paddings
[
1
],
strides
[
1
]);
ctx
->
SetOutputDim
(
"Output"
,
{
in_dims
[
0
],
filter_dims
[
0
],
output_height
,
output_width
});
}
};
class
Conv2DOpMaker
:
public
framework
::
OpProtoAndCheckerMaker
{
public:
Conv2DOpMaker
(
framework
::
OpProto
*
proto
,
framework
::
OpAttrChecker
*
op_checker
)
:
OpProtoAndCheckerMaker
(
proto
,
op_checker
)
{
AddInput
(
"Input"
,
"The input tensor of convolution operator. "
"The format of input tensor is NCHW. Where N is batch size, C is the "
"number of channels, H and W is the height and width of image."
);
AddInput
(
"Filter"
,
"The filter tensor of convolution operator."
"The format of the filter tensor is MCHW, where M is the number of "
"output image channels, C is the number of input image channels, "
"H and W is height and width of filter. "
"If the groups attribute is greater than 1, C equal the number of "
"input image channels divided by the groups."
);
AddOutput
(
"Output"
,
"The output tensor of convolution operator."
"The format of output tensor is also NCHW."
);
AddAttr
<
std
::
vector
<
int
>>
(
"strides"
,
"strides of convolution operator."
)
.
SetDefault
({
1
,
1
});
AddAttr
<
std
::
vector
<
int
>>
(
"paddings"
,
"paddings of convolution operator."
)
.
SetDefault
({
0
,
0
});
AddAttr
<
int
>
(
"groups"
,
"group size of convolution operator. "
"Refer to grouped convolution in Alex Krizhevsky's paper: "
"when group=2, the first half of the filters are only connected to the "
"first half of the input channels, and the second half only connected "
"to the second half."
)
.
SetDefault
(
1
);
AddComment
(
R"DOC(
Conv2DOpMaker
::
Conv2DOpMaker
(
framework
::
OpProto
*
proto
,
framework
::
OpAttrChecker
*
op_checker
)
:
OpProtoAndCheckerMaker
(
proto
,
op_checker
)
{
AddInput
(
"Input"
,
"The input tensor of convolution operator. "
"The format of input tensor is NCHW. Where N is batch size, C is the "
"number of channels, H and W is the height and width of image."
);
AddInput
(
"Filter"
,
"The filter tensor of convolution operator."
"The format of the filter tensor is MCHW, where M is the number of "
"output image channels, C is the number of input image channels, "
"H and W is height and width of filter. "
"If the groups attribute is greater than 1, C equal the number of "
"input image channels divided by the groups."
);
AddOutput
(
"Output"
,
"The output tensor of convolution operator."
"The format of output tensor is also NCHW."
);
AddAttr
<
std
::
vector
<
int
>>
(
"strides"
,
"strides of convolution operator."
)
.
SetDefault
({
1
,
1
});
AddAttr
<
std
::
vector
<
int
>>
(
"paddings"
,
"paddings of convolution operator."
)
.
SetDefault
({
0
,
0
});
AddAttr
<
int
>
(
"groups"
,
"group size of convolution operator. "
"Refer to grouped convolution in Alex Krizhevsky's paper: "
"when group=2, the first half of the filters are only connected to the "
"first half of the input channels, and the second half only connected "
"to the second half."
)
.
SetDefault
(
1
);
AddComment
(
R"DOC(
The convolution operation calculates the output based on the input, filter
and strides, paddings, groups parameters. The size of each dimension of the
parameters is checked in the infer-shape.
)DOC"
);
}
};
class
Conv2DOpGrad
:
public
framework
::
OperatorWithKernel
{
public:
using
framework
::
OperatorWithKernel
::
OperatorWithKernel
;
}
protected:
void
InferShape
(
framework
::
InferShapeContext
*
ctx
)
const
override
{
auto
in_dims
=
ctx
->
GetInputDim
(
"Input"
);
auto
filter_dims
=
ctx
->
GetInputDim
(
"Filter"
);
if
(
ctx
->
HasOutput
(
framework
::
GradVarName
(
"Input"
)))
{
ctx
->
SetOutputDim
(
framework
::
GradVarName
(
"Input"
),
in_dims
);
}
if
(
ctx
->
HasOutput
(
framework
::
GradVarName
(
"Filter"
)))
{
ctx
->
SetOutputDim
(
framework
::
GradVarName
(
"Filter"
),
filter_dims
);
}
void
Conv2DOpGrad
::
InferShape
(
framework
::
InferShapeContext
*
ctx
)
const
{
auto
in_dims
=
ctx
->
GetInputDim
(
"Input"
);
auto
filter_dims
=
ctx
->
GetInputDim
(
"Filter"
);
if
(
ctx
->
HasOutput
(
framework
::
GradVarName
(
"Input"
)))
{
ctx
->
SetOutputDim
(
framework
::
GradVarName
(
"Input"
),
in_dims
);
}
};
if
(
ctx
->
HasOutput
(
framework
::
GradVarName
(
"Filter"
)))
{
ctx
->
SetOutputDim
(
framework
::
GradVarName
(
"Filter"
),
filter_dims
);
}
}
}
// namespace operators
}
// namespace paddle
...
...
paddle/operators/conv2d_op.cu
浏览文件 @
380cc6bb
...
...
@@ -12,7 +12,7 @@
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/operators/
gemm_
conv2d_op.h"
#include "paddle/operators/conv2d_op.h"
namespace
ops
=
paddle
::
operators
;
...
...
paddle/operators/
gemm_
conv2d_op.h
→
paddle/operators/conv2d_op.h
浏览文件 @
380cc6bb
...
...
@@ -24,6 +24,38 @@ namespace operators {
using
Tensor
=
framework
::
Tensor
;
// Base convolution operator definations for other conv
// like operators to reuse the implementation.
inline
int
OutputSize
(
int
input_size
,
int
filter_size
,
int
padding
,
int
stride
)
{
int
output_size
=
(
input_size
-
filter_size
+
2
*
padding
)
/
stride
+
1
;
return
output_size
;
}
// Define Op classes in .h file so that other conv
// operator implementations can reuse the code.
class
Conv2DOpMaker
:
public
framework
::
OpProtoAndCheckerMaker
{
public:
Conv2DOpMaker
(
framework
::
OpProto
*
proto
,
framework
::
OpAttrChecker
*
op_checker
);
};
class
Conv2DOp
:
public
framework
::
OperatorWithKernel
{
public:
using
framework
::
OperatorWithKernel
::
OperatorWithKernel
;
protected:
void
InferShape
(
framework
::
InferShapeContext
*
ctx
)
const
override
;
};
class
Conv2DOpGrad
:
public
framework
::
OperatorWithKernel
{
public:
using
framework
::
OperatorWithKernel
::
OperatorWithKernel
;
protected:
void
InferShape
(
framework
::
InferShapeContext
*
ctx
)
const
override
;
};
template
<
typename
Place
,
typename
T
>
class
GemmConv2DKernel
:
public
framework
::
OpKernel
<
T
>
{
public:
...
...
@@ -74,7 +106,6 @@ class GemmConv2DKernel : public framework::OpKernel<T> {
framework
::
DDim
output_matrix_shape
=
{
output_channels
,
output_height
*
output_width
};
// convolution operator: im2col + gemm
int
in_step
=
input_channels
/
groups
;
int
out_step
=
output_channels
/
groups
;
...
...
paddle/operators/conv_cudnn_op.cc
0 → 100644
浏览文件 @
380cc6bb
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/operators/conv2d_op.h"
namespace
paddle
{
namespace
operators
{
class
CudnnConvOpMaker
:
public
Conv2DOpMaker
{
public:
CudnnConvOpMaker
(
framework
::
OpProto
*
proto
,
framework
::
OpAttrChecker
*
op_checker
)
:
Conv2DOpMaker
(
proto
,
op_checker
)
{
AddAttr
<
std
::
vector
<
int
>>
(
"dilations"
,
"dilations of convolution operator."
)
.
SetDefault
(
std
::
vector
<
int
>
{
1
,
1
});
AddAttr
<
int
>
(
"workspace_size_MB"
,
"workspace size for cudnn, in MB, "
"workspace is a section of GPU memory which will be "
"allocated/freed each time the operator runs, larger "
"workspace size can increase performance but also requires "
"better hardward. This size should be carefully setted."
)
.
SetDefault
(
4096
);
}
};
}
// namespace operators
}
// namespace paddle
namespace
ops
=
paddle
::
operators
;
REGISTER_OP
(
conv_cudnn
,
ops
::
Conv2DOp
,
ops
::
CudnnConvOpMaker
,
conv_cudnn_grad
,
ops
::
Conv2DOpGrad
);
REGISTER_OP_CPU_KERNEL
(
conv_cudnn
,
ops
::
GemmConv2DKernel
<
paddle
::
platform
::
CPUPlace
,
float
>
);
REGISTER_OP_CPU_KERNEL
(
conv_cudnn_grad
,
ops
::
GemmConvGrad2DKernel
<
paddle
::
platform
::
CPUPlace
,
float
>
);
paddle/operators/conv_cudnn_op.cu
0 → 100644
浏览文件 @
380cc6bb
/* Copyright (c) 2016 PaddlePaddle Authors All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/framework/eigen.h"
#include "paddle/framework/op_registry.h"
#include "paddle/memory/memory.h"
#include "paddle/operators/conv2d_op.h"
#include "paddle/platform/assert.h"
#include "paddle/platform/cudnn_helper.h"
namespace
paddle
{
namespace
operators
{
using
Tensor
=
framework
::
Tensor
;
using
ScopedTensorDescriptor
=
platform
::
ScopedTensorDescriptor
;
using
ScopedFilterDescriptor
=
platform
::
ScopedFilterDescriptor
;
using
ScopedConvolutionDescriptor
=
platform
::
ScopedConvolutionDescriptor
;
using
DataLayout
=
platform
::
DataLayout
;
using
CUDADeviceContext
=
platform
::
CUDADeviceContext
;
static
constexpr
size_t
kCONV_CUDNN_WORKSPACE_LIMIT_BYTES
=
1024
*
1024
*
1024
;
// NOTE: framework::vectorize converts to type int64_t
// which does not fit cudnn inputs.
std
::
vector
<
int
>
Dims2Vector
(
const
framework
::
DDim
&
dims
)
{
std
::
vector
<
int
>
ret
;
for
(
int
i
=
0
;
i
<
dims
.
size
();
i
++
)
{
ret
.
push_back
(
dims
[
i
]);
}
return
ret
;
}
template
<
typename
T
>
class
CudnnConvOpKernel
:
public
framework
::
OpKernel
<
T
>
{
public:
void
Compute
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
PADDLE_ENFORCE
(
platform
::
is_gpu_place
(
ctx
.
GetPlace
()),
"It must use GPUPlace."
);
auto
*
input
=
ctx
.
Input
<
Tensor
>
(
"Input"
);
auto
*
filter
=
ctx
.
Input
<
Tensor
>
(
"Filter"
);
auto
*
output
=
ctx
.
Output
<
Tensor
>
(
"Output"
);
std
::
vector
<
int
>
strides
=
ctx
.
Attr
<
std
::
vector
<
int
>>
(
"strides"
);
std
::
vector
<
int
>
paddings
=
ctx
.
Attr
<
std
::
vector
<
int
>>
(
"paddings"
);
std
::
vector
<
int
>
dilations
=
ctx
.
Attr
<
std
::
vector
<
int
>>
(
"dilations"
);
int
groups
=
ctx
.
Attr
<
int
>
(
"groups"
);
int
user_workspace_size
=
ctx
.
Attr
<
int
>
(
"workspace_size_MB"
);
const
T
*
input_data
=
input
->
data
<
T
>
();
const
T
*
filter_data
=
filter
->
data
<
T
>
();
T
*
output_data
=
output
->
mutable_data
<
T
>
(
ctx
.
GetPlace
());
// ------------------- cudnn descriptors ---------------------
ScopedTensorDescriptor
input_desc
;
ScopedTensorDescriptor
output_desc
;
ScopedFilterDescriptor
filter_desc
;
ScopedConvolutionDescriptor
conv_desc
;
DataLayout
layout
=
DataLayout
::
kNCHW
;
cudnnTensorDescriptor_t
cudnn_input_desc
=
input_desc
.
descriptor
<
T
>
(
layout
,
Dims2Vector
(
input
->
dims
()),
groups
);
cudnnTensorDescriptor_t
cudnn_output_desc
=
output_desc
.
descriptor
<
T
>
(
layout
,
Dims2Vector
(
output
->
dims
()),
groups
);
cudnnFilterDescriptor_t
cudnn_filter_desc
=
filter_desc
.
descriptor
<
T
>
(
layout
,
Dims2Vector
(
filter
->
dims
()),
groups
);
cudnnConvolutionDescriptor_t
cudnn_conv_desc
=
conv_desc
.
descriptor
<
T
>
(
paddings
,
strides
,
dilations
);
int
input_channels
=
input
->
dims
()[
1
];
int
input_height
=
input
->
dims
()[
2
];
int
input_width
=
input
->
dims
()[
3
];
int
output_channels
=
output
->
dims
()[
1
];
int
output_height
=
output
->
dims
()[
2
];
int
output_width
=
output
->
dims
()[
3
];
int
group_offset_in
=
input_channels
/
groups
*
input_height
*
input_width
;
int
group_offset_out
=
output_channels
/
groups
*
output_height
*
output_width
;
int
group_offset_filter
=
filter
->
numel
()
/
groups
;
// ------------------- cudnn conv workspace ---------------------
void
*
cudnn_workspace
=
nullptr
;
size_t
workspace_size_in_bytes
;
// final workspace to allocate.
size_t
workspace_size_limit
=
kCONV_CUDNN_WORKSPACE_LIMIT_BYTES
;
if
(
user_workspace_size
>
0
)
{
workspace_size_limit
=
user_workspace_size
*
1024
*
1024
;
}
// ------------------- cudnn conv algorithm ---------------------
cudnnConvolutionFwdAlgo_t
algo
;
auto
handle
=
ctx
.
cuda_device_context
().
cudnn_handle
();
PADDLE_ENFORCE
(
platform
::
dynload
::
cudnnGetConvolutionForwardAlgorithm
(
handle
,
cudnn_input_desc
,
cudnn_filter_desc
,
cudnn_conv_desc
,
cudnn_output_desc
,
CUDNN_CONVOLUTION_FWD_SPECIFY_WORKSPACE_LIMIT
,
workspace_size_limit
,
&
algo
));
// get workspace size able to allocate
PADDLE_ENFORCE
(
platform
::
dynload
::
cudnnGetConvolutionForwardWorkspaceSize
(
handle
,
cudnn_input_desc
,
cudnn_filter_desc
,
cudnn_conv_desc
,
cudnn_output_desc
,
algo
,
&
workspace_size_in_bytes
));
// Allocate on GPU memory
platform
::
GPUPlace
gpu
=
boost
::
get
<
platform
::
GPUPlace
>
(
ctx
.
GetPlace
());
cudnn_workspace
=
paddle
::
memory
::
Alloc
(
gpu
,
workspace_size_in_bytes
);
// ------------------- cudnn conv forward ---------------------
T
alpha
=
1.0
f
,
beta
=
0.0
f
;
for
(
int
i
=
0
;
i
<
groups
;
i
++
)
{
PADDLE_ENFORCE
(
platform
::
dynload
::
cudnnConvolutionForward
(
handle
,
&
alpha
,
cudnn_input_desc
,
input_data
+
i
*
group_offset_in
,
cudnn_filter_desc
,
filter_data
+
i
*
group_offset_filter
,
cudnn_conv_desc
,
algo
,
cudnn_workspace
,
workspace_size_in_bytes
,
&
beta
,
cudnn_output_desc
,
output_data
+
i
*
group_offset_out
));
}
// Release the cudnn workspace
paddle
::
memory
::
Free
(
gpu
,
cudnn_workspace
);
}
};
template
<
typename
T
>
class
CudnnConvGradOpKernel
:
public
framework
::
OpKernel
<
T
>
{
public:
void
Compute
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
PADDLE_ENFORCE
(
platform
::
is_gpu_place
(
ctx
.
GetPlace
()),
"It must use GPUPlace."
);
auto
input
=
ctx
.
Input
<
Tensor
>
(
"Input"
);
auto
filter
=
ctx
.
Input
<
Tensor
>
(
"Filter"
);
auto
output_grad
=
ctx
.
Input
<
Tensor
>
(
framework
::
GradVarName
(
"Output"
));
auto
input_grad
=
ctx
.
Output
<
Tensor
>
(
framework
::
GradVarName
(
"Input"
));
auto
filter_grad
=
ctx
.
Output
<
Tensor
>
(
framework
::
GradVarName
(
"Filter"
));
const
T
*
input_data
=
input
->
data
<
T
>
();
const
T
*
output_grad_data
=
output_grad
->
data
<
T
>
();
const
T
*
filter_data
=
filter
->
data
<
T
>
();
std
::
vector
<
int
>
strides
=
ctx
.
Attr
<
std
::
vector
<
int
>>
(
"strides"
);
std
::
vector
<
int
>
paddings
=
ctx
.
Attr
<
std
::
vector
<
int
>>
(
"paddings"
);
std
::
vector
<
int
>
dilations
=
ctx
.
Attr
<
std
::
vector
<
int
>>
(
"dilations"
);
int
groups
=
ctx
.
Attr
<
int
>
(
"groups"
);
int
user_workspace_size
=
ctx
.
Attr
<
int
>
(
"workspace_size_MB"
);
// ------------------- cudnn descriptors ---------------------
ScopedTensorDescriptor
input_desc
;
ScopedTensorDescriptor
output_grad_desc
;
ScopedTensorDescriptor
input_grad_desc
;
ScopedFilterDescriptor
filter_desc
;
ScopedFilterDescriptor
filter_grad_desc
;
ScopedConvolutionDescriptor
conv_desc
;
DataLayout
layout
=
DataLayout
::
kNCHW
;
cudnnTensorDescriptor_t
cudnn_input_desc
=
input_desc
.
descriptor
<
T
>
(
layout
,
Dims2Vector
(
input
->
dims
()),
groups
);
cudnnTensorDescriptor_t
cudnn_output_grad_desc
=
output_grad_desc
.
descriptor
<
T
>
(
layout
,
Dims2Vector
(
output_grad
->
dims
()),
groups
);
cudnnFilterDescriptor_t
cudnn_filter_desc
=
filter_desc
.
descriptor
<
T
>
(
layout
,
Dims2Vector
(
filter
->
dims
()),
groups
);
cudnnTensorDescriptor_t
cudnn_input_grad_desc
=
nullptr
;
cudnnFilterDescriptor_t
cudnn_filter_grad_desc
=
nullptr
;
cudnnConvolutionDescriptor_t
cudnn_conv_desc
=
conv_desc
.
descriptor
<
T
>
(
paddings
,
strides
,
dilations
);
int
input_channels
=
input
->
dims
()[
1
];
int
input_height
=
input
->
dims
()[
2
];
int
input_width
=
input
->
dims
()[
3
];
int
output_grad_channels
=
filter
->
dims
()[
0
];
int
output_grad_height
=
output_grad
->
dims
()[
2
];
int
output_grad_width
=
output_grad
->
dims
()[
3
];
int
group_offset_in
=
input_channels
/
groups
*
input_height
*
input_width
;
int
group_offset_out
=
output_grad_channels
/
groups
*
output_grad_height
*
output_grad_width
;
int
group_offset_filter
=
filter
->
numel
()
/
groups
;
// ------------------- cudnn backward algorithm ---------------------
cudnnConvolutionBwdDataAlgo_t
data_algo
;
cudnnConvolutionBwdFilterAlgo_t
filter_algo
;
size_t
workspace_size_in_bytes
=
0
,
tmp_size
=
0
;
size_t
workspace_size_limit
=
kCONV_CUDNN_WORKSPACE_LIMIT_BYTES
;
if
(
user_workspace_size
>
0
)
{
workspace_size_limit
=
user_workspace_size
*
1024
*
1024
;
}
auto
handle
=
ctx
.
cuda_device_context
().
cudnn_handle
();
if
(
input_grad
)
{
cudnn_input_grad_desc
=
input_grad_desc
.
descriptor
<
T
>
(
layout
,
Dims2Vector
(
input_grad
->
dims
()),
groups
);
PADDLE_ENFORCE
(
platform
::
dynload
::
cudnnGetConvolutionBackwardDataAlgorithm
(
handle
,
cudnn_filter_desc
,
// dyDesc: Handle to the previously initialized input differential
// tensor descriptor.
cudnn_output_grad_desc
,
cudnn_conv_desc
,
// dxDesc: Handle to the previously initialized output tensor
// descriptor.
cudnn_input_grad_desc
,
CUDNN_CONVOLUTION_BWD_DATA_SPECIFY_WORKSPACE_LIMIT
,
workspace_size_limit
,
&
data_algo
));
PADDLE_ENFORCE
(
platform
::
dynload
::
cudnnGetConvolutionBackwardDataWorkspaceSize
(
handle
,
cudnn_filter_desc
,
cudnn_output_grad_desc
,
cudnn_conv_desc
,
cudnn_input_grad_desc
,
data_algo
,
&
tmp_size
));
workspace_size_in_bytes
=
std
::
max
(
workspace_size_in_bytes
,
tmp_size
);
}
if
(
filter_grad
)
{
cudnn_filter_grad_desc
=
filter_grad_desc
.
descriptor
<
T
>
(
layout
,
Dims2Vector
(
filter_grad
->
dims
()),
groups
);
PADDLE_ENFORCE
(
platform
::
dynload
::
cudnnGetConvolutionBackwardFilterAlgorithm
(
handle
,
cudnn_input_desc
,
cudnn_output_grad_desc
,
cudnn_conv_desc
,
cudnn_filter_desc
,
CUDNN_CONVOLUTION_BWD_FILTER_SPECIFY_WORKSPACE_LIMIT
,
workspace_size_limit
,
&
filter_algo
));
PADDLE_ENFORCE
(
platform
::
dynload
::
cudnnGetConvolutionBackwardFilterWorkspaceSize
(
handle
,
cudnn_input_desc
,
cudnn_output_grad_desc
,
cudnn_conv_desc
,
cudnn_filter_desc
,
filter_algo
,
&
tmp_size
));
workspace_size_in_bytes
=
std
::
max
(
workspace_size_in_bytes
,
tmp_size
);
}
// ------------------- cudnn conv workspace ---------------------
// Already on GPU
void
*
cudnn_workspace
=
nullptr
;
platform
::
GPUPlace
gpu
=
boost
::
get
<
platform
::
GPUPlace
>
(
ctx
.
GetPlace
());
cudnn_workspace
=
paddle
::
memory
::
Alloc
(
gpu
,
workspace_size_in_bytes
);
// ------------------- cudnn conv backward data ---------------------
// FIXME(typhoonzero): template type T may not be the same as cudnn call.
T
alpha
=
1.0
f
,
beta
=
0.0
f
;
if
(
input_grad
)
{
T
*
input_grad_data
=
input_grad
->
mutable_data
<
T
>
(
ctx
.
GetPlace
());
auto
t
=
framework
::
EigenVector
<
T
>::
Flatten
(
*
input_grad
);
t
.
device
(
ctx
.
GetEigenDevice
<
platform
::
GPUPlace
>
())
=
t
.
constant
(
static_cast
<
T
>
(
0
));
for
(
int
i
=
0
;
i
<
groups
;
i
++
)
{
PADDLE_ENFORCE
(
platform
::
dynload
::
cudnnConvolutionBackwardData
(
handle
,
&
alpha
,
cudnn_filter_desc
,
filter_data
+
i
*
group_offset_filter
,
cudnn_output_grad_desc
,
output_grad_data
+
i
*
group_offset_out
,
cudnn_conv_desc
,
data_algo
,
cudnn_workspace
,
workspace_size_in_bytes
,
&
beta
,
cudnn_input_grad_desc
,
input_grad_data
+
i
*
group_offset_in
));
}
}
// ------------------- cudnn conv backward filter ---------------------
if
(
filter_grad
)
{
T
*
filter_grad_data
=
filter_grad
->
mutable_data
<
T
>
(
ctx
.
GetPlace
());
auto
t
=
framework
::
EigenVector
<
T
>::
Flatten
(
*
filter_grad
);
t
.
device
(
ctx
.
GetEigenDevice
<
platform
::
GPUPlace
>
())
=
t
.
constant
(
static_cast
<
T
>
(
0
));
for
(
int
i
=
0
;
i
<
groups
;
i
++
)
{
PADDLE_ENFORCE
(
platform
::
dynload
::
cudnnConvolutionBackwardFilter
(
handle
,
&
alpha
,
cudnn_input_desc
,
input_data
+
i
*
group_offset_in
,
cudnn_output_grad_desc
,
output_grad_data
+
i
*
group_offset_out
,
cudnn_conv_desc
,
filter_algo
,
cudnn_workspace
,
workspace_size_in_bytes
,
&
beta
,
cudnn_filter_grad_desc
,
filter_grad_data
+
i
*
group_offset_filter
));
}
}
// Release the cudnn workspace
paddle
::
memory
::
Free
(
gpu
,
cudnn_workspace
);
}
};
}
// namespace operators
}
// namespace paddle
REGISTER_OP_GPU_KERNEL
(
conv_cudnn
,
paddle
::
operators
::
CudnnConvOpKernel
<
float
>
);
REGISTER_OP_GPU_KERNEL
(
conv_cudnn_grad
,
paddle
::
operators
::
CudnnConvGradOpKernel
<
float
>
);
paddle/operators/decayed_adagrad_op.cc
0 → 100644
浏览文件 @
380cc6bb
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/operators/decayed_adagrad_op.h"
namespace
paddle
{
namespace
operators
{
class
DecayedAdagradOp
:
public
framework
::
OperatorWithKernel
{
public:
using
framework
::
OperatorWithKernel
::
OperatorWithKernel
;
protected:
void
InferShape
(
framework
::
InferShapeContextBase
*
ctx
)
const
override
{
PADDLE_ENFORCE
(
ctx
->
HasInput
(
"Param"
),
"Input(Param) of DecayedAdagradOp should not be null."
);
PADDLE_ENFORCE
(
ctx
->
HasInput
(
"Grad"
),
"Input(Grad) of DecayedAdagradOp should not be null."
);
PADDLE_ENFORCE
(
ctx
->
HasInput
(
"Moment"
),
"Input(Moment) of DecayedAdagradOp should not be null."
);
PADDLE_ENFORCE
(
ctx
->
HasInput
(
"LearningRate"
),
"Input(LearningRate) of DecayedAdagradOp should not be null."
);
PADDLE_ENFORCE
(
ctx
->
HasOutput
(
"ParamOut"
),
"Output(ParamOut) of DecayedAdagradOp should not be null."
);
PADDLE_ENFORCE
(
ctx
->
HasOutput
(
"MomentOut"
),
"Output(MomentOut) of DecayedAdagradOp should not be null."
);
auto
lr_dims
=
ctx
->
GetInputDim
(
"LearningRate"
);
PADDLE_ENFORCE_EQ
(
framework
::
product
(
lr_dims
),
1
,
"LearningRate should have one element"
);
auto
param_dims
=
ctx
->
GetInputDim
(
"Param"
);
PADDLE_ENFORCE_EQ
(
param_dims
,
ctx
->
GetInputDim
(
"Grad"
),
"Param and Grad input of DecayedAdagradOp should have "
"the same dimension."
);
PADDLE_ENFORCE_EQ
(
param_dims
,
ctx
->
GetInputDim
(
"Moment"
),
"Param and Moment input of DecayedAdagradOp should have "
"the same dimension."
);
ctx
->
SetOutputDim
(
"ParamOut"
,
param_dims
);
ctx
->
SetOutputDim
(
"MomentOut"
,
param_dims
);
}
};
class
DecayedAdagradOpMaker
:
public
framework
::
OpProtoAndCheckerMaker
{
public:
DecayedAdagradOpMaker
(
framework
::
OpProto
*
proto
,
framework
::
OpAttrChecker
*
op_checker
)
:
OpProtoAndCheckerMaker
(
proto
,
op_checker
)
{
AddInput
(
"Param"
,
"(Tensor) Input parameter"
);
AddInput
(
"Grad"
,
"(Tensor) Input gradient"
);
AddInput
(
"Moment"
,
"(Tensor) Second moment"
);
AddInput
(
"LearningRate"
,
"(Tensor) Learning rate"
);
AddOutput
(
"ParamOut"
,
"(Tensor) Output parameter"
);
AddOutput
(
"MomentOut"
,
"(Tensor) Output second moment"
);
AddAttr
<
float
>
(
"decay"
,
"(float, default 0.95) "
"Discounting factor for coming gradient"
)
.
SetDefault
(
0.95
);
AddAttr
<
float
>
(
"epsilon"
,
"(float, default 1.0e-6) "
"Constant for numerical stability"
)
.
SetDefault
(
1.0e-6
f
);
AddComment
(
R"DOC(
Decayed Adagrad
moment_out = decay * moment + (1 - decay) * grad * grad
param_out = param - learning_rate * grad / (sqrt(moment_out) + epsilon)
)DOC"
);
}
};
}
// namespace operators
}
// namespace paddle
namespace
ops
=
paddle
::
operators
;
REGISTER_OP_WITHOUT_GRADIENT
(
decayed_adagrad
,
ops
::
DecayedAdagradOp
,
ops
::
DecayedAdagradOpMaker
);
REGISTER_OP_CPU_KERNEL
(
decayed_adagrad
,
ops
::
DecayedAdagradOpKernel
<
paddle
::
platform
::
CPUPlace
,
float
>
);
paddle/operators/decayed_adagrad_op.cu
0 → 100644
浏览文件 @
380cc6bb
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#define EIGEN_USE_GPU
#include "paddle/operators/decayed_adagrad_op.h"
namespace
ops
=
paddle
::
operators
;
REGISTER_OP_GPU_KERNEL
(
decayed_adagrad
,
ops
::
DecayedAdagradOpKernel
<
paddle
::
platform
::
GPUPlace
,
float
>
);
paddle/operators/decayed_adagrad_op.h
0 → 100644
浏览文件 @
380cc6bb
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include "paddle/framework/eigen.h"
#include "paddle/framework/op_registry.h"
namespace
paddle
{
namespace
operators
{
template
<
typename
Place
,
typename
T
>
class
DecayedAdagradOpKernel
:
public
framework
::
OpKernel
<
T
>
{
public:
void
Compute
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
auto
param_out_tensor
=
ctx
.
Output
<
framework
::
Tensor
>
(
"ParamOut"
);
auto
moment_out_tensor
=
ctx
.
Output
<
framework
::
Tensor
>
(
"MomentOut"
);
param_out_tensor
->
mutable_data
<
T
>
(
ctx
.
GetPlace
());
moment_out_tensor
->
mutable_data
<
T
>
(
ctx
.
GetPlace
());
float
decay
=
ctx
.
Attr
<
float
>
(
"decay"
);
float
epsilon
=
ctx
.
Attr
<
float
>
(
"epsilon"
);
auto
param
=
framework
::
EigenVector
<
T
>::
Flatten
(
*
ctx
.
Input
<
framework
::
Tensor
>
(
"Param"
));
auto
grad
=
framework
::
EigenVector
<
T
>::
Flatten
(
*
ctx
.
Input
<
framework
::
Tensor
>
(
"Grad"
));
auto
moment
=
framework
::
EigenVector
<
T
>::
Flatten
(
*
ctx
.
Input
<
framework
::
Tensor
>
(
"Moment"
));
auto
lr
=
framework
::
EigenVector
<
T
>::
Flatten
(
*
ctx
.
Input
<
framework
::
Tensor
>
(
"LearningRate"
));
auto
param_out
=
framework
::
EigenVector
<
T
>::
Flatten
(
*
param_out_tensor
);
auto
moment_out
=
framework
::
EigenVector
<
T
>::
Flatten
(
*
moment_out_tensor
);
auto
place
=
ctx
.
GetEigenDevice
<
Place
>
();
moment_out
.
device
(
place
)
=
decay
*
moment
+
(
1
-
decay
)
*
grad
*
grad
;
Eigen
::
DSizes
<
int
,
1
>
m_dsize
(
moment_out_tensor
->
numel
());
param_out
.
device
(
place
)
=
param
-
lr
.
broadcast
(
m_dsize
)
*
grad
/
(
moment_out
.
sqrt
()
+
epsilon
);
}
};
}
// namespace operators
}
// namespace paddle
paddle/operators/math/vol2col_test.cc
浏览文件 @
380cc6bb
...
...
@@ -78,7 +78,7 @@ void testVol2col() {
if
(
paddle
::
platform
::
is_cpu_place
(
*
place
))
{
input
=
input_tmp
;
}
else
{
input
.
CopyFrom
<
float
>
(
input_tmp
,
*
place
);
input
.
CopyFrom
<
float
>
(
input_tmp
,
*
place
,
*
context
);
}
output
.
mutable_data
<
float
>
({
1
,
filter_size
,
filter_size
,
filter_size
,
output_depth
,
output_height
,
output_width
},
...
...
@@ -93,7 +93,7 @@ void testVol2col() {
if
(
paddle
::
platform
::
is_cpu_place
(
*
place
))
{
out_cfo_ptr
=
output
.
data
<
float
>
();
}
else
{
output_tmp
.
CopyFrom
<
float
>
(
output
,
paddle
::
platform
::
CPUPlace
());
output_tmp
.
CopyFrom
<
float
>
(
output
,
paddle
::
platform
::
CPUPlace
()
,
*
context
);
out_cfo_ptr
=
output_tmp
.
data
<
float
>
();
}
...
...
@@ -107,7 +107,7 @@ void testVol2col() {
if
(
paddle
::
platform
::
is_cpu_place
(
*
place
))
{
input
=
input_tmp
;
}
else
{
input
.
CopyFrom
<
float
>
(
input_tmp
,
*
place
);
input
.
CopyFrom
<
float
>
(
input_tmp
,
*
place
,
*
context
);
}
paddle
::
operators
::
math
::
Col2VolFunctor
<
Place
,
float
>
col2vol
;
...
...
@@ -118,7 +118,7 @@ void testVol2col() {
if
(
paddle
::
platform
::
is_cpu_place
(
*
place
))
{
in_ptr
=
input
.
data
<
float
>
();
}
else
{
input_tmp
.
CopyFrom
<
float
>
(
input
,
paddle
::
platform
::
CPUPlace
());
input_tmp
.
CopyFrom
<
float
>
(
input
,
paddle
::
platform
::
CPUPlace
()
,
*
context
);
in_ptr
=
input_tmp
.
data
<
float
>
();
}
...
...
paddle/platform/cudnn_helper.h
浏览文件 @
380cc6bb
...
...
@@ -71,23 +71,32 @@ class ScopedTensorDescriptor {
inline
cudnnTensorDescriptor_t
descriptor
(
const
cudnnTensorFormat_t
format
,
const
cudnnDataType_t
type
,
const
std
::
vector
<
int
>&
dims
)
{
// the format is not used now, but it maybe useful feature
const
std
::
vector
<
int
>&
dims
,
const
int
groups
=
1
)
{
// the format is not used now, will add later
std
::
vector
<
int
>
strides
(
dims
.
size
());
strides
[
dims
.
size
()
-
1
]
=
1
;
for
(
int
i
=
dims
.
size
()
-
2
;
i
>=
0
;
i
--
)
{
strides
[
i
]
=
dims
[
i
+
1
]
*
strides
[
i
+
1
];
}
// Update tensor descriptor dims setting if groups > 1
// FIXME(typhoonzero): Assume using NCHW order
std
::
vector
<
int
>
dims_with_group
(
dims
.
begin
(),
dims
.
end
());
// copy
if
(
groups
>
1
)
{
dims_with_group
[
1
]
=
dims_with_group
[
1
]
/
groups
;
}
PADDLE_ENFORCE
(
dynload
::
cudnnSetTensorNdDescriptor
(
desc_
,
type
,
dims
.
size
(),
dims
.
data
(),
strides
.
data
()));
desc_
,
type
,
dims_with_group
.
size
(),
dims_with_group
.
data
(),
strides
.
data
()));
return
desc_
;
}
template
<
typename
T
>
inline
cudnnTensorDescriptor_t
descriptor
(
const
DataLayout
&
order
,
const
std
::
vector
<
int
>&
dims
)
{
return
descriptor
(
GetCudnnTensorFormat
(
order
),
CudnnDataType
<
T
>::
type
,
dims
);
const
std
::
vector
<
int
>&
dims
,
const
int
groups
=
1
)
{
return
descriptor
(
GetCudnnTensorFormat
(
order
),
CudnnDataType
<
T
>::
type
,
dims
,
groups
);
}
private:
...
...
@@ -106,18 +115,29 @@ class ScopedFilterDescriptor {
inline
cudnnFilterDescriptor_t
descriptor
(
const
cudnnTensorFormat_t
format
,
const
cudnnDataType_t
type
,
const
std
::
vector
<
int
>&
kernel
)
{
// filter layout: output input spatial_dim_y spatial_dim_x
const
std
::
vector
<
int
>&
kernel
,
const
int
groups
=
1
)
{
// filter layout: MCHW, where M is the number of
// output image channels, C is the number of input image channels,
// H and W is height and width of filter.
std
::
vector
<
int
>
kernel_with_group
(
kernel
.
begin
(),
kernel
.
end
());
if
(
groups
>
1
)
{
// M /= groups
kernel_with_group
[
0
]
/=
groups
;
// NOTE: input filter(C) of the filter is already asserted to be C/groups.
}
PADDLE_ENFORCE
(
dynload
::
cudnnSetFilterNdDescriptor
(
desc_
,
type
,
format
,
kernel
.
size
(),
kernel
.
data
()));
desc_
,
type
,
format
,
kernel_with_group
.
size
(),
kernel_with_group
.
data
()));
return
desc_
;
}
template
<
typename
T
>
inline
cudnnFilterDescriptor_t
descriptor
(
const
DataLayout
&
order
,
const
std
::
vector
<
int
>&
kernel
)
{
const
std
::
vector
<
int
>&
kernel
,
const
int
groups
=
1
)
{
return
descriptor
(
GetCudnnTensorFormat
(
order
),
CudnnDataType
<
T
>::
type
,
kernel
);
kernel
,
groups
);
}
private:
...
...
paddle/pybind/CMakeLists.txt
浏览文件 @
380cc6bb
if
(
WITH_PYTHON
)
cc_library
(
paddle_pybind SHARED
SRCS pybind.cc exception.cc protobuf.cc
DEPS pybind python backward proto_desc tensor_array
DEPS pybind python backward proto_desc tensor_array
paddle_memory
${
GLOB_OP_LIB
}
)
endif
(
WITH_PYTHON
)
python/paddle/v2/framework/tests/test_conv2d_op.py
浏览文件 @
380cc6bb
...
...
@@ -6,7 +6,7 @@ from op_test import OpTest
class
TestConv2dOp
(
OpTest
):
def
setUp
(
self
):
self
.
init_groups
()
self
.
op_type
=
"conv2d"
self
.
init_optype
()
batch_size
=
2
input_channels
=
3
input_height
=
5
...
...
@@ -32,6 +32,7 @@ class TestConv2dOp(OpTest):
self
.
attrs
=
{
'strides'
:
[
1
,
1
],
'paddings'
:
[
0
,
0
],
'dilations'
:
[
1
,
1
],
'groups'
:
self
.
groups
}
...
...
@@ -93,11 +94,27 @@ class TestConv2dOp(OpTest):
def
init_groups
(
self
):
self
.
groups
=
1
def
init_optype
(
self
):
self
.
op_type
=
"conv2d"
class
TestWithGroup
(
TestConv2dOp
):
def
init_groups
(
self
):
self
.
groups
=
3
class
TestCudnn2d
(
TestConv2dOp
):
def
init_optype
(
self
):
self
.
op_type
=
"conv_cudnn"
class
TestCudnn2dWithGroup
(
TestConv2dOp
):
def
init_optype
(
self
):
self
.
op_type
=
"conv_cudnn"
def
init_groups
(
self
):
self
.
groups
=
3
if
__name__
==
'__main__'
:
unittest
.
main
()
python/paddle/v2/framework/tests/test_decayed_adagrad_op.py
0 → 100644
浏览文件 @
380cc6bb
import
unittest
import
numpy
as
np
from
op_test
import
OpTest
class
TestDecayedAdagradOp1
(
OpTest
):
''' Test DecayedAdagrad operator with explicit attributes
'''
def
setUp
(
self
):
self
.
op_type
=
"decayed_adagrad"
param
=
np
.
random
.
random
((
123
,
321
)).
astype
(
"float32"
)
grad
=
np
.
random
.
random
((
123
,
321
)).
astype
(
"float32"
)
moment
=
np
.
zeros
((
123
,
321
)).
astype
(
"float32"
)
lr
=
0.01
decay
=
0.80
epsilon
=
1e-8
self
.
inputs
=
{
'Param'
:
param
,
'Grad'
:
grad
,
'Moment'
:
moment
,
'LearningRate'
:
np
.
array
([
lr
]).
astype
(
"float32"
)
}
self
.
attrs
=
{
'decay'
:
decay
,
'epsilon'
:
epsilon
}
moment_out
=
decay
*
moment
+
(
1
-
decay
)
*
grad
*
grad
param_out
=
param
-
lr
*
grad
/
(
np
.
sqrt
(
moment_out
)
+
epsilon
)
self
.
outputs
=
{
'ParamOut'
:
param_out
,
'MomentOut'
:
moment_out
}
def
test_check_output
(
self
):
self
.
check_output
()
class
TestDecayedAdagradOp2
(
OpTest
):
''' Test DecayedAdagrad operator with default attributes
'''
def
setUp
(
self
):
self
.
op_type
=
"decayed_adagrad"
param
=
np
.
random
.
random
((
123
,
321
)).
astype
(
"float32"
)
grad
=
np
.
random
.
random
((
123
,
321
)).
astype
(
"float32"
)
moment
=
np
.
zeros
((
123
,
321
)).
astype
(
"float32"
)
lr
=
0.01
decay
=
0.95
epsilon
=
1e-6
self
.
inputs
=
{
'Param'
:
param
,
'Grad'
:
grad
,
'Moment'
:
moment
,
'LearningRate'
:
np
.
array
([
lr
]).
astype
(
"float32"
)
}
self
.
attrs
=
{
'decay'
:
decay
,
'epsilon'
:
epsilon
}
moment_out
=
decay
*
moment
+
(
1
-
decay
)
*
grad
*
grad
param_out
=
param
-
lr
*
grad
/
(
np
.
sqrt
(
moment_out
)
+
epsilon
)
self
.
outputs
=
{
'ParamOut'
:
param_out
,
'MomentOut'
:
moment_out
}
def
test_check_output
(
self
):
self
.
check_output
()
if
__name__
==
"__main__"
:
unittest
.
main
()
python/paddle/v2/framework/tests/test_seq_concat_op.py
浏览文件 @
380cc6bb
import
unittest
import
numpy
as
np
import
sys
from
op_test
import
OpTest
...
...
@@ -74,4 +75,5 @@ class TestConcatOpLevelZero(TestConcatOp):
if
__name__
==
'__main__'
:
sys
.
exit
(
0
)
unittest
.
main
()
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录