Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
机器未来
Paddle
提交
68a75344
P
Paddle
项目概览
机器未来
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1
Issue
1
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
68a75344
编写于
4月 03, 2018
作者:
Y
Yi Wang
提交者:
GitHub
4月 03, 2018
浏览文件
操作
浏览文件
下载
差异文件
Merge branch 'develop' into fix_cpplint_errors_operators_detail
上级
97eac501
a98a3fdc
变更
22
隐藏空白更改
内联
并排
Showing
22 changed file
with
1428 addition
and
49 deletion
+1428
-49
paddle/cuda/include/hl_cnn.h
paddle/cuda/include/hl_cnn.h
+44
-0
paddle/cuda/include/stub/hl_cnn_stub.h
paddle/cuda/include/stub/hl_cnn_stub.h
+20
-0
paddle/cuda/src/hl_cuda_cnn.cu
paddle/cuda/src/hl_cuda_cnn.cu
+76
-0
paddle/fluid/framework/executor.cc
paddle/fluid/framework/executor.cc
+15
-0
paddle/fluid/framework/executor.h
paddle/fluid/framework/executor.h
+3
-0
paddle/fluid/operators/detail/variable_response.cc
paddle/fluid/operators/detail/variable_response.cc
+0
-2
paddle/fluid/operators/fc_mkldnn_op.cc
paddle/fluid/operators/fc_mkldnn_op.cc
+303
-0
paddle/fluid/operators/fc_op.cc
paddle/fluid/operators/fc_op.cc
+102
-0
paddle/fluid/operators/fc_op.h
paddle/fluid/operators/fc_op.h
+52
-0
paddle/fluid/operators/listen_and_serv_op.cc
paddle/fluid/operators/listen_and_serv_op.cc
+29
-17
paddle/fluid/operators/split_ids_op.h
paddle/fluid/operators/split_ids_op.h
+3
-6
paddle/gserver/layers/UpsampleLayer.cpp
paddle/gserver/layers/UpsampleLayer.cpp
+108
-0
paddle/gserver/layers/UpsampleLayer.h
paddle/gserver/layers/UpsampleLayer.h
+53
-0
paddle/gserver/tests/CMakeLists.txt
paddle/gserver/tests/CMakeLists.txt
+1
-0
paddle/gserver/tests/test_Upsample.cpp
paddle/gserver/tests/test_Upsample.cpp
+152
-0
paddle/math/Matrix.cpp
paddle/math/Matrix.cpp
+126
-0
paddle/math/Matrix.h
paddle/math/Matrix.h
+52
-0
proto/ModelConfig.proto
proto/ModelConfig.proto
+11
-0
python/paddle/fluid/layers/nn.py
python/paddle/fluid/layers/nn.py
+52
-24
python/paddle/fluid/tests/unittests/test_fc_op.py
python/paddle/fluid/tests/unittests/test_fc_op.py
+99
-0
python/paddle/trainer/config_parser.py
python/paddle/trainer/config_parser.py
+48
-0
python/paddle/trainer_config_helpers/layers.py
python/paddle/trainer_config_helpers/layers.py
+79
-0
未找到文件。
paddle/cuda/include/hl_cnn.h
浏览文件 @
68a75344
...
@@ -370,4 +370,48 @@ extern void hl_maxout_backward(real* inGrad,
...
@@ -370,4 +370,48 @@ extern void hl_maxout_backward(real* inGrad,
size_t
featLen
,
size_t
featLen
,
size_t
groups
);
size_t
groups
);
/**
* @brief Upsample forward.
* @param[in] inputData input data.
* @param[out] maskData the mask data from MaxPoolWithMaskLayer.
* @param[out] batchSize the batch size of the input.
* @param[in] imgSizeH image height.
* @param[in] imgSizeW image width.
* @param[in] channels the input channels.
* @param[in] outputH the output height.
* @param[in] outputW the output widht.
* @param[out] outputData output data.
*/
extern
void
hl_upsample_forward
(
real
*
inputData
,
real
*
maskData
,
size_t
batchSize
,
size_t
imgSizeH
,
size_t
imgSizeW
,
size_t
channels
,
size_t
outputH
,
size_t
outputW
,
real
*
outputData
);
/**
* @brief Upsample backward.
* @param[in] outputGradData the output grad data.
* @param[out] maskData the mask data from MaxPoolWithMaskLayer.
* @param[out] batchSize the batch size of the input.
* @param[in] imgSizeH image height.
* @param[in] imgSizeW image width.
* @param[in] channels the input channels.
* @param[in] outputH the output height.
* @param[in] outputW the output widht.
* @param[out] inputGradData the input grad data.
*/
extern
void
hl_upsample_backward
(
real
*
outputGradData
,
real
*
maskData
,
size_t
batchSize
,
size_t
imgSizeH
,
size_t
imgSizeW
,
size_t
channels
,
size_t
outputH
,
size_t
outputW
,
real
*
inputGradData
);
#endif // HL_CNN_H_
#endif // HL_CNN_H_
paddle/cuda/include/stub/hl_cnn_stub.h
浏览文件 @
68a75344
...
@@ -224,4 +224,24 @@ inline void hl_maxout_backward(real* inGrad,
...
@@ -224,4 +224,24 @@ inline void hl_maxout_backward(real* inGrad,
size_t
featLen
,
size_t
featLen
,
size_t
group
)
{}
size_t
group
)
{}
inline
void
hl_upsample_forward
(
real
*
inputData
,
real
*
maskData
,
size_t
batchSize
,
size_t
imgSizeH
,
size_t
imgSizeW
,
size_t
channels
,
size_t
outputH
,
size_t
outputW
,
real
*
outputData
)
{}
inline
void
hl_upsample_backward
(
real
*
outputGradData
,
real
*
maskData
,
size_t
batchSize
,
size_t
imgSizeH
,
size_t
imgSizeW
,
size_t
channels
,
size_t
outputH
,
size_t
outputW
,
real
*
inputGradData
)
{}
#endif // HL_CNN_STUB_H_
#endif // HL_CNN_STUB_H_
paddle/cuda/src/hl_cuda_cnn.cu
浏览文件 @
68a75344
...
@@ -1028,3 +1028,79 @@ void hl_maxout_backward(real* inGrad,
...
@@ -1028,3 +1028,79 @@ void hl_maxout_backward(real* inGrad,
num_kernels
,
inGrad
,
outGrad
,
idData
,
size
,
featLen
,
groups
);
num_kernels
,
inGrad
,
outGrad
,
idData
,
size
,
featLen
,
groups
);
CHECK_SYNC
(
"hl_maxout_backward failed"
);
CHECK_SYNC
(
"hl_maxout_backward failed"
);
}
}
__global__
void
upsampleForwardCompute
(
real
*
input_data
,
real
*
mask_data
,
size_t
nthreads
,
size_t
in_h
,
size_t
in_w
,
size_t
out_h
,
size_t
out_w
,
real
*
output_data
)
{
int
index
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
if
(
index
<
nthreads
)
{
int
offset
=
index
/
(
in_w
*
in_h
)
*
out_h
*
out_w
;
int
upsample_idx
=
static_cast
<
int
>
(
mask_data
[
index
]);
output_data
[
offset
+
upsample_idx
]
=
input_data
[
index
];
}
}
__global__
void
upsampleBackwardCompute
(
real
*
out_grad
,
real
*
mask_data
,
size_t
nthreads
,
size_t
in_h
,
size_t
in_w
,
size_t
out_h
,
size_t
out_w
,
real
*
input_grad
)
{
int
index
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
if
(
index
<
nthreads
)
{
int
offset
=
index
/
(
in_w
*
in_h
)
*
out_h
*
out_w
;
int
upsample_idx
=
static_cast
<
int
>
(
mask_data
[
index
]);
input_grad
[
index
]
=
out_grad
[
offset
+
upsample_idx
];
}
}
void
hl_upsample_forward
(
real
*
inputData
,
real
*
maskData
,
size_t
batchSize
,
size_t
imgSizeH
,
size_t
imgSizeW
,
size_t
channels
,
size_t
outputH
,
size_t
outputW
,
real
*
outputData
)
{
int
num_kernels
=
batchSize
*
imgSizeH
*
imgSizeW
*
channels
;
int
blocks
=
(
num_kernels
+
1024
-
1
)
/
1024
;
upsampleForwardCompute
<<<
blocks
,
1024
,
0
,
STREAM_DEFAULT
>>>
(
inputData
,
maskData
,
num_kernels
,
imgSizeH
,
imgSizeW
,
outputH
,
outputW
,
outputData
);
CHECK_SYNC
(
"hl_upsample_forward failed"
);
}
void
hl_upsample_backward
(
real
*
outputGradData
,
real
*
maskData
,
size_t
batchSize
,
size_t
imgSizeH
,
size_t
imgSizeW
,
size_t
channels
,
size_t
outputH
,
size_t
outputW
,
real
*
inputGradData
)
{
int
num_kernels
=
batchSize
*
imgSizeH
*
imgSizeW
*
channels
;
int
blocks
=
(
num_kernels
+
1024
-
1
)
/
1024
;
upsampleBackwardCompute
<<<
blocks
,
1024
,
0
,
STREAM_DEFAULT
>>>
(
outputGradData
,
maskData
,
num_kernels
,
imgSizeH
,
imgSizeW
,
outputH
,
outputW
,
inputGradData
);
CHECK_SYNC
(
"hl_upsample_backward failed"
);
}
paddle/fluid/framework/executor.cc
浏览文件 @
68a75344
...
@@ -279,6 +279,21 @@ std::unique_ptr<ExecutorPrepareContext> Executor::Prepare(
...
@@ -279,6 +279,21 @@ std::unique_ptr<ExecutorPrepareContext> Executor::Prepare(
return
std
::
unique_ptr
<
ExecutorPrepareContext
>
(
ctx
);
return
std
::
unique_ptr
<
ExecutorPrepareContext
>
(
ctx
);
}
}
std
::
vector
<
std
::
shared_ptr
<
ExecutorPrepareContext
>>
Executor
::
Prepare
(
const
ProgramDesc
&
program
,
const
std
::
vector
<
int
>&
block_ids
)
{
std
::
vector
<
std
::
shared_ptr
<
ExecutorPrepareContext
>>
result
;
for
(
auto
&
bid
:
block_ids
)
{
auto
*
ctx
=
new
ExecutorPrepareContext
(
program
,
bid
);
PADDLE_ENFORCE_LT
(
static_cast
<
size_t
>
(
bid
),
program
.
Size
());
auto
&
block
=
program
.
Block
(
bid
);
for
(
auto
&
op_desc
:
block
.
AllOps
())
{
ctx
->
ops_
.
push_back
(
OpRegistry
::
CreateOp
(
*
op_desc
));
}
result
.
push_back
(
std
::
shared_ptr
<
ExecutorPrepareContext
>
(
ctx
));
}
return
result
;
}
void
Executor
::
RunPreparedContext
(
ExecutorPrepareContext
*
ctx
,
Scope
*
scope
,
void
Executor
::
RunPreparedContext
(
ExecutorPrepareContext
*
ctx
,
Scope
*
scope
,
bool
create_local_scope
,
bool
create_vars
)
{
bool
create_local_scope
,
bool
create_vars
)
{
auto
&
block
=
ctx
->
prog_
.
Block
(
ctx
->
block_id_
);
auto
&
block
=
ctx
->
prog_
.
Block
(
ctx
->
block_id_
);
...
...
paddle/fluid/framework/executor.h
浏览文件 @
68a75344
...
@@ -61,6 +61,9 @@ class Executor {
...
@@ -61,6 +61,9 @@ class Executor {
static
std
::
unique_ptr
<
ExecutorPrepareContext
>
Prepare
(
static
std
::
unique_ptr
<
ExecutorPrepareContext
>
Prepare
(
const
ProgramDesc
&
program
,
int
block_id
);
const
ProgramDesc
&
program
,
int
block_id
);
static
std
::
vector
<
std
::
shared_ptr
<
ExecutorPrepareContext
>>
Prepare
(
const
ProgramDesc
&
program
,
const
std
::
vector
<
int
>&
block_ids
);
void
RunPreparedContext
(
ExecutorPrepareContext
*
ctx
,
Scope
*
scope
,
void
RunPreparedContext
(
ExecutorPrepareContext
*
ctx
,
Scope
*
scope
,
bool
create_local_scope
=
true
,
bool
create_local_scope
=
true
,
bool
create_vars
=
true
);
bool
create_vars
=
true
);
...
...
paddle/fluid/operators/detail/variable_response.cc
浏览文件 @
68a75344
...
@@ -14,8 +14,6 @@
...
@@ -14,8 +14,6 @@
#include "paddle/fluid/operators/detail/variable_response.h"
#include "paddle/fluid/operators/detail/variable_response.h"
#include <string.h>
#include <string>
#include <string>
#include <utility>
#include <utility>
#include <vector>
#include <vector>
...
...
paddle/fluid/operators/fc_mkldnn_op.cc
0 → 100644
浏览文件 @
68a75344
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/framework/tensor.h"
#include "paddle/fluid/operators/fc_op.h"
#include "paddle/fluid/platform/device_context.h"
#include "paddle/fluid/platform/mkldnn_helper.h"
namespace
paddle
{
namespace
operators
{
using
paddle
::
framework
::
Tensor
;
using
paddle
::
platform
::
MKLDNNDeviceContext
;
template
<
typename
T
>
class
MKLDNNMD
{
public:
explicit
MKLDNNMD
(
const
T
*
in
,
const
T
*
w
,
bool
bias
)
:
in
{
paddle
::
framework
::
vectorize2int
(
in
->
dims
())},
w
{
paddle
::
framework
::
vectorize2int
(
w
->
dims
())}
{
with_bias_
=
bias
;
}
mkldnn
::
memory
::
desc
dst
()
const
{
return
platform
::
MKLDNNMemDesc
({
in
[
0
],
w
[
1
]},
mkldnn
::
memory
::
data_type
::
f32
,
mkldnn
::
memory
::
format
::
nc
);
}
mkldnn
::
memory
::
desc
src
()
const
{
return
is_spatial
()
?
platform
::
MKLDNNMemDesc
({
in
[
0
],
in
[
1
],
in
[
2
],
in
[
3
]},
mkldnn
::
memory
::
data_type
::
f32
,
mkldnn
::
memory
::
format
::
nchw
)
:
platform
::
MKLDNNMemDesc
({
in
[
0
],
in
[
1
]},
mkldnn
::
memory
::
data_type
::
f32
,
mkldnn
::
memory
::
format
::
nc
);
}
mkldnn
::
memory
::
desc
weights
()
const
{
return
is_spatial
()
?
platform
::
MKLDNNMemDesc
({
w
[
1
],
in
[
1
],
in
[
2
],
in
[
3
]},
mkldnn
::
memory
::
data_type
::
f32
,
mkldnn
::
memory
::
format
::
oihw
)
:
platform
::
MKLDNNMemDesc
({
w
[
1
],
in
[
1
]},
mkldnn
::
memory
::
data_type
::
f32
,
mkldnn
::
memory
::
format
::
oi
);
}
mkldnn
::
memory
::
desc
bias
()
const
{
return
with_bias_
?
platform
::
MKLDNNMemDesc
({
w
[
1
]},
mkldnn
::
memory
::
data_type
::
f32
,
mkldnn
::
memory
::
format
::
format_undef
)
:
platform
::
MKLDNNMemDesc
({},
mkldnn
::
memory
::
data_type
::
f32
,
mkldnn
::
memory
::
format
::
format_undef
);
}
private:
bool
is_spatial
()
const
{
return
in
.
size
()
>
1
&&
w
.
size
()
>
1
;
}
std
::
vector
<
int
>
in
;
std
::
vector
<
int
>
w
;
bool
with_bias_
;
bool
is_spatial_
;
};
class
MKLDNNMemory
{
public:
MKLDNNMemory
(
MKLDNNMD
<
Tensor
>*
t
,
const
mkldnn
::
engine
&
e
)
:
md_
{
t
},
engine_
{
e
}
{}
virtual
~
MKLDNNMemory
()
=
default
;
template
<
typename
Output
>
mkldnn
::
memory
dst
(
const
Output
*
out
)
{
return
mkldnn
::
memory
({
md_
->
dst
(),
engine_
},
static_cast
<
void
*>
(
const_cast
<
float
*>
(
out
)));
}
template
<
typename
Output
>
mkldnn
::
memory
dst
(
Output
*
out
)
{
return
mkldnn
::
memory
({
md_
->
dst
(),
engine_
},
out
);
}
template
<
typename
Input
>
mkldnn
::
memory
src
(
const
Input
*
in
)
{
return
mkldnn
::
memory
({
md_
->
src
(),
engine_
},
static_cast
<
void
*>
(
const_cast
<
float
*>
(
in
)));
}
template
<
typename
Weight
>
mkldnn
::
memory
weights
(
const
Weight
*
w
)
{
return
mkldnn
::
memory
({
md_
->
weights
(),
engine_
},
static_cast
<
void
*>
(
const_cast
<
float
*>
(
w
)));
}
mkldnn
::
memory
bias
()
{
return
mkldnn
::
memory
(
mkldnn
::
memory
::
primitive_desc
(
md_
->
bias
(),
engine_
));
}
private:
MKLDNNMD
<
Tensor
>*
md_
;
const
mkldnn
::
engine
&
engine_
;
};
template
<
typename
T
>
class
FCMKLDNNOpKernel
:
public
paddle
::
framework
::
OpKernel
<
T
>
{
void
Compute
(
const
paddle
::
framework
::
ExecutionContext
&
ctx
)
const
override
{
PADDLE_ENFORCE
(
paddle
::
platform
::
is_cpu_place
(
ctx
.
GetPlace
()),
"It must use CPUPlace."
);
auto
&
dev_ctx
=
ctx
.
template
device_context
<
MKLDNNDeviceContext
>();
const
auto
&
mkldnn_engine
=
dev_ctx
.
GetEngine
();
auto
input
=
ctx
.
Input
<
Tensor
>
(
"Input"
);
auto
w
=
ctx
.
Input
<
Tensor
>
(
"W"
);
PADDLE_ENFORCE
(
input
->
dims
().
size
()
==
2
||
input
->
dims
().
size
()
==
4
,
"Input must be with 2 or 4 dimensions, i.e. NCHW"
);
PADDLE_ENFORCE
(
w
->
dims
().
size
()
==
2
||
w
->
dims
().
size
()
==
4
,
"Weights must be with 2 or 4 dimensions, i.e. OI or OIHW"
);
bool
with_bias
=
ctx
.
Attr
<
bool
>
(
"bias_attr"
);
MKLDNNMD
<
Tensor
>
md
(
input
,
w
,
with_bias
);
std
::
shared_ptr
<
mkldnn
::
inner_product_forward
::
primitive_desc
>
pd
=
FcFwdPrimitiveDesc
(
md
.
src
(),
md
.
weights
(),
md
.
dst
(),
md
.
bias
(),
with_bias
,
mkldnn_engine
);
const
std
::
string
key
=
ctx
.
op
().
Output
(
"Out"
);
const
std
::
string
key_fc_pd
=
key
+
"@fc_pd"
;
dev_ctx
.
SetBlob
(
key_fc_pd
,
pd
);
MKLDNNMemory
mem
(
&
md
,
mkldnn_engine
);
const
T
*
input_data
=
input
->
data
<
T
>
();
const
T
*
w_data
=
w
->
data
<
T
>
();
auto
output
=
ctx
.
Output
<
Tensor
>
(
"Out"
);
T
*
output_data
=
output
->
mutable_data
<
T
>
(
ctx
.
GetPlace
());
auto
dst_memory
=
mem
.
dst
(
output_data
);
auto
src_memory
=
mem
.
src
(
input_data
);
auto
weights_memory
=
mem
.
weights
(
w_data
);
auto
bias_memory
=
mem
.
bias
();
auto
forward
=
with_bias
?
mkldnn
::
inner_product_forward
(
*
pd
,
src_memory
,
weights_memory
,
bias_memory
,
dst_memory
)
:
mkldnn
::
inner_product_forward
(
*
pd
,
src_memory
,
weights_memory
,
dst_memory
);
std
::
vector
<
mkldnn
::
primitive
>
pipeline
=
{
forward
};
mkldnn
::
stream
(
mkldnn
::
stream
::
kind
::
eager
).
submit
(
pipeline
).
wait
();
}
private:
std
::
unique_ptr
<
mkldnn
::
inner_product_forward
::
primitive_desc
>
FcFwdPrimitiveDesc
(
const
mkldnn
::
memory
::
desc
&
src
,
const
mkldnn
::
memory
::
desc
&
weights
,
const
mkldnn
::
memory
::
desc
&
dst
,
const
mkldnn
::
memory
::
desc
&
bias
,
const
bool
with_bias
,
const
mkldnn
::
engine
&
engine
)
const
{
auto
desc
=
with_bias
?
mkldnn
::
inner_product_forward
::
desc
(
mkldnn
::
prop_kind
::
forward
,
src
,
weights
,
bias
,
dst
)
:
mkldnn
::
inner_product_forward
::
desc
(
mkldnn
::
prop_kind
::
forward
,
src
,
weights
,
dst
);
auto
pd
=
new
mkldnn
::
inner_product_forward
::
primitive_desc
(
desc
,
engine
);
return
std
::
unique_ptr
<
mkldnn
::
inner_product_forward
::
primitive_desc
>
(
pd
);
}
};
template
<
typename
T
>
class
FCMKLDNNGradOpKernel
:
public
paddle
::
framework
::
OpKernel
<
T
>
{
public:
void
Compute
(
const
paddle
::
framework
::
ExecutionContext
&
ctx
)
const
override
{
PADDLE_ENFORCE
(
paddle
::
platform
::
is_cpu_place
(
ctx
.
GetPlace
()),
"It must use CPUPlace."
);
auto
&
dev_ctx
=
ctx
.
template
device_context
<
MKLDNNDeviceContext
>();
const
auto
&
mkldnn_engine
=
dev_ctx
.
GetEngine
();
T
*
input_grad_data
=
nullptr
;
T
*
w_grad_data
=
nullptr
;
Tensor
*
input_grad
=
ctx
.
Output
<
Tensor
>
(
framework
::
GradVarName
(
"Input"
));
Tensor
*
w_grad
=
ctx
.
Output
<
Tensor
>
(
framework
::
GradVarName
(
"W"
));
if
(
input_grad
)
{
input_grad_data
=
input_grad
->
mutable_data
<
T
>
(
ctx
.
GetPlace
());
}
if
(
w_grad
)
{
w_grad_data
=
w_grad
->
mutable_data
<
T
>
(
ctx
.
GetPlace
());
}
const
Tensor
*
input
=
ctx
.
Input
<
Tensor
>
(
"Input"
);
const
T
*
input_data
=
input
->
data
<
T
>
();
const
Tensor
*
w
=
ctx
.
Input
<
Tensor
>
(
"W"
);
const
T
*
w_data
=
w
->
data
<
T
>
();
const
Tensor
*
out_grad
=
ctx
.
Input
<
Tensor
>
(
framework
::
GradVarName
(
"Out"
));
const
T
*
out_grad_data
=
out_grad
->
data
<
T
>
();
bool
with_bias
=
ctx
.
Attr
<
bool
>
(
"bias_attr"
);
MKLDNNMD
<
Tensor
>
md
(
input
,
w
,
with_bias
);
MKLDNNMemory
mem
(
&
md
,
mkldnn_engine
);
auto
dst_memory
=
mem
.
dst
(
out_grad_data
);
auto
src_memory
=
mem
.
src
(
input_data
);
auto
weights_memory
=
mem
.
weights
(
w_data
);
auto
bias_memory
=
mem
.
bias
();
const
std
::
string
key
=
ctx
.
op
().
Input
(
"Out"
);
const
std
::
string
key_fc_pd
=
key
+
"@fc_pd"
;
auto
pd
=
std
::
static_pointer_cast
<
mkldnn
::
inner_product_forward
::
primitive_desc
>
(
dev_ctx
.
GetBlob
(
key_fc_pd
));
PADDLE_ENFORCE
(
pd
!=
nullptr
,
"Fail to find key_fc_pd in device context"
);
if
(
w_grad
)
{
auto
weights_grad_memory
=
mem
.
weights
(
w_grad_data
);
mkldnn
::
inner_product_backward_weights
::
primitive_desc
bwd_weight_pd
=
FcBwdWeightsPrimitiveDesc
(
md
.
src
(),
md
.
weights
(),
md
.
dst
(),
md
.
bias
(),
with_bias
,
*
pd
,
mkldnn_engine
);
auto
bwd_weights_prim
=
mkldnn
::
inner_product_backward_weights
(
bwd_weight_pd
,
src_memory
,
dst_memory
,
weights_grad_memory
,
bias_memory
);
std
::
vector
<
mkldnn
::
primitive
>
pipeline
{
bwd_weights_prim
};
mkldnn
::
stream
(
mkldnn
::
stream
::
kind
::
eager
).
submit
(
pipeline
).
wait
();
}
if
(
input_grad
)
{
auto
src_grad_memory
=
mem
.
src
(
input_grad_data
);
mkldnn
::
inner_product_backward_data
::
primitive_desc
bwd_data_pd
=
FcBwdDataPrimitiveDesc
(
md
.
src
(),
md
.
weights
(),
md
.
dst
(),
*
pd
,
mkldnn_engine
);
auto
bwd_data_prim
=
mkldnn
::
inner_product_backward_data
(
bwd_data_pd
,
dst_memory
,
weights_memory
,
src_grad_memory
);
std
::
vector
<
mkldnn
::
primitive
>
pipeline
{
bwd_data_prim
};
mkldnn
::
stream
(
mkldnn
::
stream
::
kind
::
eager
).
submit
(
pipeline
).
wait
();
}
}
private:
mkldnn
::
inner_product_backward_weights
::
primitive_desc
FcBwdWeightsPrimitiveDesc
(
const
mkldnn
::
memory
::
desc
&
src
,
const
mkldnn
::
memory
::
desc
&
diff_weights
,
const
mkldnn
::
memory
::
desc
&
diff_dst
,
const
mkldnn
::
memory
::
desc
&
bias
,
const
bool
with_bias
,
const
mkldnn
::
inner_product_forward
::
primitive_desc
&
pd
,
const
mkldnn
::
engine
&
engine
)
const
{
auto
bwd_weight_desc
=
with_bias
?
mkldnn
::
inner_product_backward_weights
::
desc
(
src
,
diff_weights
,
bias
,
diff_dst
)
:
mkldnn
::
inner_product_backward_weights
::
desc
(
src
,
diff_weights
,
bias
,
diff_dst
);
return
mkldnn
::
inner_product_backward_weights
::
primitive_desc
(
bwd_weight_desc
,
engine
,
pd
);
}
mkldnn
::
inner_product_backward_data
::
primitive_desc
FcBwdDataPrimitiveDesc
(
const
mkldnn
::
memory
::
desc
&
diff_src
,
const
mkldnn
::
memory
::
desc
&
weights
,
const
mkldnn
::
memory
::
desc
&
diff_dst
,
const
mkldnn
::
inner_product_forward
::
primitive_desc
&
pd
,
const
mkldnn
::
engine
&
engine
)
const
{
auto
bwd_data_desc
=
mkldnn
::
inner_product_backward_data
::
desc
(
diff_src
,
weights
,
diff_dst
);
return
mkldnn
::
inner_product_backward_data
::
primitive_desc
(
bwd_data_desc
,
engine
,
pd
);
}
};
}
// namespace operators
}
// namespace paddle
REGISTER_OP_KERNEL
(
fc
,
MKLDNN
,
::
paddle
::
platform
::
CPUPlace
,
paddle
::
operators
::
FCMKLDNNOpKernel
<
float
>
);
REGISTER_OP_KERNEL
(
fc_grad
,
MKLDNN
,
::
paddle
::
platform
::
CPUPlace
,
paddle
::
operators
::
FCMKLDNNGradOpKernel
<
float
>
);
paddle/fluid/operators/fc_op.cc
0 → 100644
浏览文件 @
68a75344
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/operators/fc_op.h"
#include <vector>
namespace
paddle
{
namespace
operators
{
void
FCOp
::
InferShape
(
framework
::
InferShapeContext
*
ctx
)
const
{
PADDLE_ENFORCE
(
ctx
->
HasInput
(
"Input"
),
"X(Input) of Fully Connected should not be null."
);
PADDLE_ENFORCE
(
ctx
->
HasOutput
(
"Out"
),
"Out(Output) of Fully Connected should not be null."
);
PADDLE_ENFORCE
(
ctx
->
HasInput
(
"W"
),
"W(Input) of Fully Connected should not be null."
);
auto
in_dims
=
ctx
->
GetInputDim
(
"Input"
);
auto
w_dims
=
ctx
->
GetInputDim
(
"W"
);
std
::
vector
<
int64_t
>
output_shape
({
in_dims
[
0
],
w_dims
[
1
]});
PADDLE_ENFORCE
(
in_dims
.
size
()
==
2
||
in_dims
.
size
()
==
4
,
"Fully Connected input should be 2-D or 4-D tensor."
);
PADDLE_ENFORCE
(
w_dims
.
size
()
==
2
||
w_dims
.
size
()
==
4
,
"Fully Connected input should be 2-D or 4-D tensor."
);
ctx
->
SetOutputDim
(
"Out"
,
framework
::
make_ddim
(
output_shape
));
ctx
->
ShareLoD
(
"Input"
,
"Out"
);
}
framework
::
OpKernelType
FCOp
::
GetExpectedKernelType
(
const
framework
::
ExecutionContext
&
ctx
)
const
{
framework
::
LibraryType
library
{
framework
::
LibraryType
::
kMKLDNN
};
framework
::
DataLayout
layout
{
framework
::
DataLayout
::
kAnyLayout
};
return
framework
::
OpKernelType
(
framework
::
ToDataType
(
ctx
.
Input
<
Tensor
>
(
"Input"
)
->
type
()),
ctx
.
GetPlace
(),
layout
,
library
);
}
void
FCOpGrad
::
InferShape
(
framework
::
InferShapeContext
*
ctx
)
const
{
auto
in_dims
=
ctx
->
GetInputDim
(
"Input"
);
auto
w_dims
=
ctx
->
GetInputDim
(
"W"
);
if
(
ctx
->
HasOutput
(
framework
::
GradVarName
(
"Input"
)))
{
ctx
->
SetOutputDim
(
framework
::
GradVarName
(
"Input"
),
in_dims
);
}
if
(
ctx
->
HasOutput
(
framework
::
GradVarName
(
"W"
)))
{
ctx
->
SetOutputDim
(
framework
::
GradVarName
(
"W"
),
w_dims
);
}
}
framework
::
OpKernelType
FCOpGrad
::
GetExpectedKernelType
(
const
framework
::
ExecutionContext
&
ctx
)
const
{
framework
::
LibraryType
library
{
framework
::
LibraryType
::
kMKLDNN
};
framework
::
DataLayout
layout
{
framework
::
DataLayout
::
kAnyLayout
};
return
framework
::
OpKernelType
(
framework
::
ToDataType
(
ctx
.
Input
<
Tensor
>
(
"Input"
)
->
type
()),
ctx
.
GetPlace
(),
layout
,
library
);
}
FCOpMaker
::
FCOpMaker
(
OpProto
*
proto
,
OpAttrChecker
*
op_checker
)
:
OpProtoAndCheckerMaker
(
proto
,
op_checker
)
{
AddInput
(
"Input"
,
"(Tensor) The input tensor of fully connected operator. "
);
AddInput
(
"W"
,
"(Tensor), The second input tensor of fc op."
);
AddOutput
(
"Out"
,
"(Tensor) The output tensor of fully connected operator. "
);
AddAttr
<
bool
>
(
"use_mkldnn"
,
"(bool, default false) Only used in mkldnn kernel"
)
.
SetDefault
(
false
);
AddAttr
<
bool
>
(
"bias_attr"
,
"(bool, default false) Only used in mkldnn kernel"
)
.
SetDefault
(
false
);
AddComment
(
R"DOC(
Fully Connected Operator.
The fully connected operation calculates the output based on the input, weights and bias attribute.
The size of each dimension of the parameters checked in the infer-shape.
The matrix of bias is generated by the mkldnn framework, when the bias_attr is True.
Additional parametrs are use_mkldnn and bias_attr.
The input(X) size and output(Out) size may be diffrent.
The fully connected layer only supports MKLDNN version
)DOC"
);
}
}
// namespace operators
}
// namespace paddle
REGISTER_OP
(
fc
,
paddle
::
operators
::
FCOp
,
paddle
::
operators
::
FCOpMaker
,
fc_grad
,
paddle
::
operators
::
FCOpGrad
);
paddle/fluid/operators/fc_op.h
0 → 100644
浏览文件 @
68a75344
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include "paddle/fluid/framework/op_registry.h"
namespace
paddle
{
namespace
operators
{
using
Tensor
=
framework
::
Tensor
;
class
FCOp
:
public
framework
::
OperatorWithKernel
{
public:
using
framework
::
OperatorWithKernel
::
OperatorWithKernel
;
void
InferShape
(
framework
::
InferShapeContext
*
ctx
)
const
override
;
protected:
framework
::
OpKernelType
GetExpectedKernelType
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
;
};
class
FCOpGrad
:
public
framework
::
OperatorWithKernel
{
public:
using
framework
::
OperatorWithKernel
::
OperatorWithKernel
;
void
InferShape
(
framework
::
InferShapeContext
*
ctx
)
const
override
;
protected:
framework
::
OpKernelType
GetExpectedKernelType
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
;
};
class
FCOpMaker
:
public
framework
::
OpProtoAndCheckerMaker
{
public:
FCOpMaker
(
OpProto
*
proto
,
OpAttrChecker
*
op_checker
);
};
}
// namespace operators
}
// namespace paddle
paddle/fluid/operators/listen_and_serv_op.cc
浏览文件 @
68a75344
...
@@ -45,20 +45,23 @@ static void CreateTensorFromMessageType(framework::Variable *var,
...
@@ -45,20 +45,23 @@ static void CreateTensorFromMessageType(framework::Variable *var,
}
}
}
}
static
void
ParallelExecuteBlocks
(
const
std
::
vector
<
size_t
>
&
parallel_blkids
,
static
void
ParallelExecuteBlocks
(
framework
::
Executor
*
executor
,
const
std
::
vector
<
size_t
>
&
parallel_blkids
,
framework
::
Executor
*
executor
,
framework
::
ProgramDesc
*
program
,
const
std
::
vector
<
std
::
shared_ptr
<
framework
::
ExecutorPrepareContext
>>
framework
::
Scope
*
scope
)
{
&
prepared
,
framework
::
ProgramDesc
*
program
,
framework
::
Scope
*
scope
)
{
std
::
vector
<
std
::
future
<
void
>>
fs
;
std
::
vector
<
std
::
future
<
void
>>
fs
;
for
(
size_t
idx
:
parallel_blkids
)
{
for
(
size_t
idx
:
parallel_blkids
)
{
fs
.
push_back
(
framework
::
Async
([
&
executor
,
&
program
,
&
scope
,
idx
]()
{
fs
.
push_back
(
int
run_block
=
idx
;
// thread local
framework
::
Async
([
&
executor
,
&
prepared
,
&
program
,
&
scope
,
idx
]()
{
try
{
int
run_block
=
idx
;
// thread local
executor
->
Run
(
*
program
,
scope
,
run_block
,
false
,
false
);
try
{
}
catch
(
std
::
exception
&
e
)
{
executor
->
RunPreparedContext
(
prepared
[
run_block
].
get
(),
scope
,
LOG
(
ERROR
)
<<
"run sub program error "
<<
e
.
what
();
false
,
false
);
}
}
catch
(
std
::
exception
&
e
)
{
}));
LOG
(
ERROR
)
<<
"run sub program error "
<<
e
.
what
();
}
}));
}
}
for
(
size_t
i
=
0
;
i
<
fs
.
size
();
++
i
)
fs
[
i
].
wait
();
for
(
size_t
i
=
0
;
i
<
fs
.
size
();
++
i
)
fs
[
i
].
wait
();
}
}
...
@@ -96,11 +99,18 @@ class ListenAndServOp : public framework::OperatorBase {
...
@@ -96,11 +99,18 @@ class ListenAndServOp : public framework::OperatorBase {
auto
*
block
=
Attr
<
framework
::
BlockDesc
*>
(
kOptimizeBlock
);
auto
*
block
=
Attr
<
framework
::
BlockDesc
*>
(
kOptimizeBlock
);
auto
*
program
=
block
->
Program
();
auto
*
program
=
block
->
Program
();
in
t
num_blocks
=
program
->
Size
();
size_
t
num_blocks
=
program
->
Size
();
PADDLE_ENFORCE_GE
(
num_blocks
,
2
,
PADDLE_ENFORCE_GE
(
num_blocks
,
2
,
"server program should have at least 2 blocks"
);
"server program should have at least 2 blocks"
);
framework
::
Executor
executor
(
dev_place
);
framework
::
Executor
executor
(
dev_place
);
std
::
vector
<
int
>
block_list
;
for
(
size_t
blkid
=
1
;
blkid
<
num_blocks
;
++
blkid
)
block_list
.
push_back
(
blkid
);
auto
prepared
=
executor
.
Prepare
(
*
program
,
block_list
);
prepared
.
insert
(
prepared
.
begin
(),
std
::
shared_ptr
<
framework
::
ExecutorPrepareContext
>
(
nullptr
));
// TODO(qiao) set proper fields for table lookup and update
// TODO(qiao) set proper fields for table lookup and update
rpc_service_
->
SetExecutor
(
&
executor
);
rpc_service_
->
SetExecutor
(
&
executor
);
...
@@ -153,21 +163,22 @@ class ListenAndServOp : public framework::OperatorBase {
...
@@ -153,21 +163,22 @@ class ListenAndServOp : public framework::OperatorBase {
// The optimize blocks which have the same parent ID would run parallel
// The optimize blocks which have the same parent ID would run parallel
// TODO(Yancey1989): need to use ParallelExecutor for future
// TODO(Yancey1989): need to use ParallelExecutor for future
size
_t
last_parent_blkid
=
program
->
Block
(
1
).
Parent
();
int32
_t
last_parent_blkid
=
program
->
Block
(
1
).
Parent
();
std
::
vector
<
size_t
>
parallel_blkids
;
std
::
vector
<
size_t
>
parallel_blkids
;
parallel_blkids
.
push_back
(
1
);
parallel_blkids
.
push_back
(
1
);
double
ts
=
detail
::
GetTimestamp
();
double
ts
=
detail
::
GetTimestamp
();
for
(
size_t
blkid
=
2
;
blkid
<
num_blocks
;
++
blkid
)
{
for
(
size_t
blkid
=
2
;
blkid
<
num_blocks
;
++
blkid
)
{
if
(
program
->
Block
(
blkid
).
Parent
()
!=
last_parent_blkid
)
{
if
(
program
->
Block
(
blkid
).
Parent
()
!=
last_parent_blkid
)
{
for
(
size_t
idx
:
parallel_blkids
)
VLOG
(
3
)
<<
idx
;
for
(
size_t
idx
:
parallel_blkids
)
VLOG
(
3
)
<<
idx
;
ParallelExecuteBlocks
(
parallel_blkids
,
&
executor
,
program
,
ParallelExecuteBlocks
(
parallel_blkids
,
&
executor
,
pr
epared
,
pr
ogram
,
&
recv_scope
);
&
recv_scope
);
parallel_blkids
.
clear
();
parallel_blkids
.
clear
();
last_parent_blkid
=
program
->
Block
(
blkid
).
Parent
();
last_parent_blkid
=
program
->
Block
(
blkid
).
Parent
();
}
}
parallel_blkids
.
push_back
(
blkid
);
parallel_blkids
.
push_back
(
blkid
);
}
}
ParallelExecuteBlocks
(
parallel_blkids
,
&
executor
,
program
,
&
recv_scope
);
ParallelExecuteBlocks
(
parallel_blkids
,
&
executor
,
prepared
,
program
,
&
recv_scope
);
VLOG
(
3
)
<<
"run all blocks spent "
<<
detail
::
GetTimestamp
()
-
ts
VLOG
(
3
)
<<
"run all blocks spent "
<<
detail
::
GetTimestamp
()
-
ts
<<
"(ms)"
;
<<
"(ms)"
;
...
@@ -181,7 +192,8 @@ class ListenAndServOp : public framework::OperatorBase {
...
@@ -181,7 +192,8 @@ class ListenAndServOp : public framework::OperatorBase {
var
->
GetMutable
<
framework
::
SelectedRows
>
()
->
mutable_rows
()
->
clear
();
var
->
GetMutable
<
framework
::
SelectedRows
>
()
->
mutable_rows
()
->
clear
();
}
}
rpc_service_
->
SetCond
(
1
);
rpc_service_
->
SetCond
(
1
);
// FIXME(typhoonzero): use another condition to sync wait clients get.
// NOTE: does not consider barrier request retry in here, we may use
// global barrier id to resolve this.
rpc_service_
->
WaitClientGet
(
fan_in
);
rpc_service_
->
WaitClientGet
(
fan_in
);
sparse_vars
.
clear
();
sparse_vars
.
clear
();
}
// while(true)
}
// while(true)
...
...
paddle/fluid/operators/split_ids_op.h
浏览文件 @
68a75344
...
@@ -30,19 +30,16 @@ class SplitIdsOpKernel : public framework::OpKernel<T> {
...
@@ -30,19 +30,16 @@ class SplitIdsOpKernel : public framework::OpKernel<T> {
PADDLE_THROW
(
"SplitIds do not support GPU kernel"
);
PADDLE_THROW
(
"SplitIds do not support GPU kernel"
);
}
}
const
auto
*
ids_t
=
ctx
.
Input
<
framework
::
LoDTensor
>
(
"Ids"
);
auto
&
ids_dims
=
ctx
.
Input
<
framework
::
LoDTensor
>
(
"Ids"
)
->
dims
(
);
auto
&
ids_dims
=
ids_t
->
dims
();
const
T
*
ids
=
ctx
.
Input
<
framework
::
LoDTensor
>
(
"Ids"
)
->
data
<
T
>
();
auto
outs
=
ctx
.
MultiOutput
<
framework
::
LoDTensor
>
(
"Out"
);
auto
outs
=
ctx
.
MultiOutput
<
framework
::
LoDTensor
>
(
"Out"
);
const
T
*
ids
=
ids_t
->
data
<
T
>
();
const
size_t
shard_num
=
outs
.
size
();
const
size_t
shard_num
=
outs
.
size
();
std
::
vector
<
std
::
vector
<
T
>>
out_ids
;
std
::
vector
<
std
::
vector
<
T
>>
out_ids
;
out_ids
.
resize
(
outs
.
size
());
out_ids
.
resize
(
outs
.
size
());
// split id by their shard_num.
// split id by their shard_num.
for
(
size_
t
i
=
0
;
i
<
ids_dims
[
0
];
++
i
)
{
for
(
in
t
i
=
0
;
i
<
ids_dims
[
0
];
++
i
)
{
T
id
=
ids
[
i
];
T
id
=
ids
[
i
];
size_t
shard_id
=
static_cast
<
size_t
>
(
id
)
%
shard_num
;
size_t
shard_id
=
static_cast
<
size_t
>
(
id
)
%
shard_num
;
out_ids
[
shard_id
].
push_back
(
id
);
out_ids
[
shard_id
].
push_back
(
id
);
...
...
paddle/gserver/layers/UpsampleLayer.cpp
0 → 100644
浏览文件 @
68a75344
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "UpsampleLayer.h"
#include "iostream"
namespace
paddle
{
REGISTER_LAYER
(
upsample
,
UpsampleLayer
);
size_t
UpsampleLayer
::
getOutputSize
()
{
if
(
upsampleSize_
==
0
)
{
upsampleSize_
=
imgSize_
*
scale_
-
static_cast
<
int
>
(
padOutX_
);
upsampleSizeY_
=
imgSizeY_
*
scaleY_
-
static_cast
<
int
>
(
padOutY_
);
}
return
upsampleSize_
*
upsampleSizeY_
*
channels_
;
}
bool
UpsampleLayer
::
init
(
const
LayerMap
&
layerMap
,
const
ParameterMap
&
parameterMap
)
{
Layer
::
init
(
layerMap
,
parameterMap
);
CHECK_EQ
(
inputLayers_
.
size
(),
2U
);
CHECK_EQ
(
config_
.
inputs_size
(),
2
);
const
auto
&
conf
=
config_
.
inputs
(
0
).
upsample_conf
();
const
auto
&
img_conf
=
conf
.
image_conf
();
imgSizeY_
=
img_conf
.
has_img_size_y
()
?
img_conf
.
img_size_y
()
:
img_conf
.
img_size
();
imgSize_
=
img_conf
.
img_size
();
channels_
=
img_conf
.
channels
();
CHECK
((
conf
.
has_upsample_size
())
||
(
conf
.
has_scale
()))
<<
"scale or upsample_size is required."
;
if
(
conf
.
has_upsample_size
())
{
upsampleSize_
=
conf
.
upsample_size
();
upsampleSizeY_
=
upsampleSize_
;
if
(
conf
.
has_upsample_size_y
())
{
upsampleSizeY_
=
conf
.
upsample_size_y
();
}
}
else
{
if
(
!
conf
.
has_scale_y
())
{
scale_
=
scaleY_
=
conf
.
scale_y
();
CHECK_GT
(
static_cast
<
int
>
(
scale_
),
1
);
}
else
{
scale_
=
conf
.
scale
();
scaleY_
=
conf
.
scale_y
();
}
padOutX_
=
conf
.
pad_out_x
();
padOutY_
=
conf
.
pad_out_y
();
CHECK
(
!
padOutX_
||
scale_
==
2
)
<<
"Output height padding compensation requires scale_ == 2"
;
CHECK
(
!
padOutY_
||
scaleY_
==
2
)
<<
"Output width padding compensation requires scaleY_ == 2"
;
upsampleSize_
=
upsampleSizeY_
=
0
;
}
return
true
;
}
void
UpsampleLayer
::
forward
(
PassType
passType
)
{
Layer
::
forward
(
passType
);
MatrixPtr
input
=
getInputValue
(
0
);
MatrixPtr
mask
=
inputLayers_
[
1
]
->
getOutput
(
"mask"
).
value
;
size_t
batchSize
=
input
->
getHeight
();
size_t
outSize
=
getOutputSize
();
CHECK_EQ
(
input
->
getWidth
(),
mask
->
getWidth
());
CHECK_EQ
(
mask
->
getHeight
(),
batchSize
);
resetOutput
(
batchSize
,
outSize
);
MatrixPtr
output
=
getOutputValue
();
output
->
upsampleForward
(
*
input
,
*
mask
,
imgSize_
,
imgSizeY_
,
channels_
,
upsampleSize_
,
upsampleSizeY_
);
}
void
UpsampleLayer
::
backward
(
const
UpdateCallback
&
callback
)
{
MatrixPtr
mask
=
inputLayers_
[
1
]
->
getOutput
(
"mask"
).
value
;
MatrixPtr
inputGrad
=
getInputGrad
(
0
);
MatrixPtr
outputGrad
=
getOutputGrad
();
inputGrad
->
upsampleBackward
(
*
outputGrad
,
*
mask
,
imgSize_
,
imgSizeY_
,
channels_
,
upsampleSize_
,
upsampleSizeY_
);
}
}
// namespace paddle
paddle/gserver/layers/UpsampleLayer.h
0 → 100644
浏览文件 @
68a75344
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include <vector>
#include "Layer.h"
#include "paddle/math/Matrix.h"
#include "paddle/utils/Logging.h"
#include "paddle/utils/Stat.h"
namespace
paddle
{
/**
* This layer transpose the pooling process.
* It takes two input, the first input is the input data, and
* the second is the mask data from the max-pool-with-mask layer.
*
*/
class
UpsampleLayer
:
public
Layer
{
public:
explicit
UpsampleLayer
(
const
LayerConfig
&
config
)
:
Layer
(
config
)
{}
~
UpsampleLayer
()
{}
bool
init
(
const
LayerMap
&
layerMap
,
const
ParameterMap
&
parameterMap
)
override
;
void
forward
(
PassType
passType
)
override
;
void
backward
(
const
UpdateCallback
&
callback
)
override
;
size_t
getOutputSize
();
protected:
size_t
scale_
,
scaleY_
;
size_t
upsampleSize_
,
upsampleSizeY_
;
size_t
padOutX_
,
padOutY_
;
size_t
imgSize_
,
imgSizeY_
;
size_t
channels_
;
};
}
// namespace paddle
paddle/gserver/tests/CMakeLists.txt
浏览文件 @
68a75344
...
@@ -27,6 +27,7 @@ gserver_test(test_BatchNorm)
...
@@ -27,6 +27,7 @@ gserver_test(test_BatchNorm)
gserver_test
(
test_KmaxSeqScore
)
gserver_test
(
test_KmaxSeqScore
)
gserver_test
(
test_Expand
)
gserver_test
(
test_Expand
)
gserver_test
(
test_MaxPoolingWithMaskOutput
)
gserver_test
(
test_MaxPoolingWithMaskOutput
)
gserver_test
(
test_Upsample
)
set
(
PYTHON_PATH
set
(
PYTHON_PATH
${
PADDLE_SOURCE_DIR
}
/paddle/.set_python_path.sh -d
${
PADDLE_SOURCE_DIR
}
/paddle/.set_python_path.sh -d
...
...
paddle/gserver/tests/test_Upsample.cpp
0 → 100644
浏览文件 @
68a75344
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include <gtest/gtest.h>
#include <string>
#include <vector>
#include "LayerGradUtil.h"
#include "paddle/math/MathUtils.h"
#include "paddle/testing/TestUtil.h"
using
namespace
paddle
;
void
setPoolConfig
(
TestConfig
*
config
,
PoolConfig
*
pool
,
const
string
&
poolType
)
{
(
*
config
).
biasSize
=
0
;
(
*
config
).
layerConfig
.
set_type
(
"pool"
);
(
*
config
).
layerConfig
.
set_num_filters
(
1
);
int
kw
=
2
,
kh
=
2
;
int
pw
=
0
,
ph
=
0
;
int
sw
=
2
,
sh
=
2
;
pool
->
set_pool_type
(
poolType
);
pool
->
set_channels
(
2
);
pool
->
set_size_x
(
kw
);
pool
->
set_size_y
(
kh
);
pool
->
set_start
(
0
);
pool
->
set_padding
(
pw
);
pool
->
set_padding_y
(
ph
);
pool
->
set_stride
(
sw
);
pool
->
set_stride_y
(
sh
);
int
ow
=
outputSize
(
pool
->
img_size
(),
kw
,
pw
,
sw
,
/* caffeMode */
false
);
int
oh
=
outputSize
(
pool
->
img_size_y
(),
kh
,
ph
,
sh
,
/* caffeMode */
false
);
pool
->
set_output_x
(
ow
);
pool
->
set_output_y
(
oh
);
}
LayerPtr
doOneUpsampleTest
(
MatrixPtr
&
inputMat
,
const
string
&
poolType
,
bool
use_gpu
,
real
*
tempGradData
)
{
/* prepare maxPoolWithMaskLayer */
TestConfig
config
;
config
.
inputDefs
.
push_back
({
INPUT_DATA
,
"layer_0"
,
128
,
0
});
LayerInputConfig
*
input
=
config
.
layerConfig
.
add_inputs
();
PoolConfig
*
pool
=
input
->
mutable_pool_conf
();
pool
->
set_img_size
(
8
);
pool
->
set_img_size_y
(
8
);
setPoolConfig
(
&
config
,
pool
,
"max-pool-with-mask"
);
config
.
layerConfig
.
set_size
(
pool
->
output_x
()
*
pool
->
output_y
()
*
pool
->
channels
());
config
.
layerConfig
.
set_name
(
"MaxPoolWithMask"
);
std
::
vector
<
DataLayerPtr
>
dataLayers
;
LayerMap
layerMap
;
vector
<
Argument
>
datas
;
initDataLayer
(
config
,
&
dataLayers
,
&
datas
,
&
layerMap
,
"MaxPoolWithMask"
,
1
,
false
,
use_gpu
);
dataLayers
[
0
]
->
getOutputValue
()
->
copyFrom
(
*
inputMat
);
FLAGS_use_gpu
=
use_gpu
;
std
::
vector
<
ParameterPtr
>
parameters
;
LayerPtr
maxPoolingWithMaskOutputLayer
;
initTestLayer
(
config
,
&
layerMap
,
&
parameters
,
&
maxPoolingWithMaskOutputLayer
);
maxPoolingWithMaskOutputLayer
->
forward
(
PASS_GC
);
/* prepare the upsample layer */
LayerConfig
upsampleLayerConfig
;
upsampleLayerConfig
.
set_type
(
"upsample"
);
LayerInputConfig
*
input1
=
upsampleLayerConfig
.
add_inputs
();
upsampleLayerConfig
.
add_inputs
();
UpsampleConfig
*
upsampleConfig
=
input1
->
mutable_upsample_conf
();
upsampleConfig
->
set_scale
(
2
);
ImageConfig
*
imageConfig
=
upsampleConfig
->
mutable_image_conf
();
imageConfig
->
set_channels
(
2
);
imageConfig
->
set_img_size
(
4
);
imageConfig
->
set_img_size_y
(
4
);
upsampleLayerConfig
.
set_size
(
2
*
8
*
8
);
upsampleLayerConfig
.
set_name
(
"upsample"
);
for
(
size_t
i
=
0
;
i
<
2
;
i
++
)
{
LayerInputConfig
&
inputTemp
=
*
(
upsampleLayerConfig
.
mutable_inputs
(
i
));
inputTemp
.
set_input_layer_name
(
"MaxPoolWithMask"
);
}
LayerPtr
upsampleLayer
;
ParameterMap
parameterMap
;
upsampleLayer
=
Layer
::
create
(
upsampleLayerConfig
);
layerMap
[
upsampleLayerConfig
.
name
()]
=
upsampleLayer
;
upsampleLayer
->
init
(
layerMap
,
parameterMap
);
upsampleLayer
->
setNeedGradient
(
true
);
upsampleLayer
->
forward
(
PASS_GC
);
upsampleLayer
->
getOutputGrad
()
->
copyFrom
(
tempGradData
,
128
);
upsampleLayer
->
backward
();
return
upsampleLayer
;
}
TEST
(
Layer
,
maxPoolingWithMaskOutputLayerFwd
)
{
bool
useGpu
=
false
;
MatrixPtr
inputMat
;
MatrixPtr
inputGPUMat
;
MatrixPtr
tempGradMat
;
inputMat
=
Matrix
::
create
(
1
,
128
,
false
,
useGpu
);
inputMat
->
randomizeUniform
();
tempGradMat
=
Matrix
::
create
(
1
,
128
,
false
,
useGpu
);
tempGradMat
->
randomizeUniform
();
real
*
data
=
inputMat
->
getData
();
real
*
tempGradData
=
tempGradMat
->
getData
();
LayerPtr
upsampleLayerCPU
=
doOneUpsampleTest
(
inputMat
,
"max-pool-with-mask"
,
useGpu
,
tempGradData
);
#ifdef PADDLE_WITH_CUDA
useGpu
=
true
;
inputGPUMat
=
Matrix
::
create
(
1
,
128
,
false
,
useGpu
);
inputGPUMat
->
copyFrom
(
data
,
128
);
LayerPtr
upsampleLayerGPU
=
doOneUpsampleTest
(
inputGPUMat
,
"max-pool-with-mask"
,
useGpu
,
tempGradData
);
checkMatrixEqual
(
upsampleLayerCPU
->
getOutput
(
""
).
value
,
upsampleLayerGPU
->
getOutput
(
""
).
value
);
checkMatrixEqual
(
upsampleLayerCPU
->
getPrev
(
0
)
->
getOutputGrad
(),
upsampleLayerGPU
->
getPrev
(
0
)
->
getOutputGrad
());
#endif
}
paddle/math/Matrix.cpp
浏览文件 @
68a75344
...
@@ -1024,6 +1024,66 @@ void GpuMatrix::check(std::ostream& os, Matrix& refMat, bool printDiff) {
...
@@ -1024,6 +1024,66 @@ void GpuMatrix::check(std::ostream& os, Matrix& refMat, bool printDiff) {
LOG
(
INFO
)
<<
"the diffCnt is "
<<
diffCnt
;
LOG
(
INFO
)
<<
"the diffCnt is "
<<
diffCnt
;
}
}
void
GpuMatrix
::
upsampleForward
(
Matrix
&
input
,
Matrix
&
mask
,
size_t
imgSizeH
,
size_t
imgSizeW
,
size_t
channels
,
size_t
outputH
,
size_t
outputW
)
{
CHECK
(
input
.
useGpu_
==
true
)
<<
"Matrix type are not equal"
;
CHECK
(
mask
.
useGpu_
==
true
)
<<
"Matrix type are not equal"
;
real
*
inputData
=
input
.
getData
();
real
*
maskData
=
mask
.
getData
();
real
*
outData
=
data_
;
size_t
batch
=
input
.
getHeight
();
CHECK
(
imgSizeH
*
imgSizeW
*
channels
==
input
.
getWidth
());
CHECK
(
imgSizeH
*
imgSizeW
*
channels
==
mask
.
getWidth
());
CHECK_EQ
(
batch
,
this
->
getHeight
());
CHECK
(
width_
==
outputH
*
outputW
*
channels
);
hl_upsample_forward
(
inputData
,
maskData
,
batch
,
imgSizeH
,
imgSizeW
,
channels
,
outputH
,
outputW
,
outData
);
}
void
GpuMatrix
::
upsampleBackward
(
Matrix
&
outputGrad
,
Matrix
&
mask
,
size_t
imgSizeH
,
size_t
imgSizeW
,
size_t
channels
,
size_t
outputH
,
size_t
outputW
)
{
CHECK
(
outputGrad
.
useGpu_
==
true
)
<<
"Matrix type are not equal"
;
CHECK
(
mask
.
useGpu_
==
true
)
<<
"Matrix type are not equal"
;
real
*
outputGradData
=
outputGrad
.
getData
();
real
*
maskData
=
mask
.
getData
();
real
*
inputGradData
=
data_
;
size_t
batch
=
outputGrad
.
getHeight
();
CHECK
(
imgSizeH
*
imgSizeW
==
this
->
getWidth
()
/
channels
);
CHECK_EQ
(
batch
,
this
->
getHeight
());
CHECK_EQ
(
channels
*
outputH
*
outputW
,
outputGrad
.
getWidth
());
hl_upsample_backward
(
outputGradData
,
maskData
,
batch
,
imgSizeH
,
imgSizeW
,
channels
,
outputH
,
outputW
,
inputGradData
);
}
void
GpuMatrix
::
maxPoolForward
(
Matrix
&
inputMat
,
void
GpuMatrix
::
maxPoolForward
(
Matrix
&
inputMat
,
size_t
imgSizeH
,
size_t
imgSizeH
,
size_t
imgSizeW
,
size_t
imgSizeW
,
...
@@ -1986,6 +2046,72 @@ void CpuMatrix::inverse(MatrixPtr& matInv, bool memAlloc) {
...
@@ -1986,6 +2046,72 @@ void CpuMatrix::inverse(MatrixPtr& matInv, bool memAlloc) {
CHECK_EQ
(
info
,
0
);
CHECK_EQ
(
info
,
0
);
}
}
void
CpuMatrix
::
upsampleForward
(
Matrix
&
input
,
Matrix
&
mask
,
size_t
imgSizeH
,
size_t
imgSizeW
,
size_t
channels
,
size_t
outputH
,
size_t
outputW
)
{
real
*
inputData
=
input
.
getData
();
real
*
maskData
=
mask
.
getData
();
real
*
outData
=
data_
;
size_t
inLength
=
imgSizeH
*
imgSizeW
;
size_t
outLength
=
outputH
*
outputW
;
size_t
batch
=
input
.
getHeight
();
CHECK
(
inLength
==
input
.
getWidth
()
/
channels
);
CHECK_EQ
(
batch
,
this
->
getHeight
());
CHECK_EQ
(
channels
*
outLength
,
this
->
getWidth
());
for
(
size_t
k
=
0
;
k
<
batch
;
k
++
)
{
for
(
size_t
c
=
0
;
c
<
channels
;
c
++
)
{
for
(
size_t
i
=
0
;
i
<
inLength
;
i
++
)
{
size_t
out_index
=
static_cast
<
int
>
(
maskData
[
i
]);
if
(
out_index
>=
outLength
)
{
LOG
(
FATAL
)
<<
"upsample index "
<<
out_index
<<
" out of range."
;
}
outData
[
out_index
]
=
inputData
[
i
];
}
inputData
+=
inLength
;
maskData
+=
inLength
;
outData
+=
outLength
;
}
}
}
void
CpuMatrix
::
upsampleBackward
(
Matrix
&
outputGrad
,
Matrix
&
mask
,
size_t
imgSizeH
,
size_t
imgSizeW
,
size_t
channels
,
size_t
outputH
,
size_t
outputW
)
{
real
*
outputGradData
=
outputGrad
.
getData
();
real
*
maskData
=
mask
.
getData
();
real
*
inputGradData
=
data_
;
size_t
inLength
=
imgSizeH
*
imgSizeW
;
size_t
outLength
=
outputH
*
outputW
;
size_t
batch
=
outputGrad
.
getHeight
();
CHECK
(
inLength
==
this
->
getWidth
()
/
channels
);
CHECK_EQ
(
batch
,
this
->
getHeight
());
CHECK_EQ
(
channels
*
outLength
,
outputGrad
.
getWidth
());
for
(
size_t
k
=
0
;
k
<
batch
;
k
++
)
{
for
(
size_t
c
=
0
;
c
<
channels
;
c
++
)
{
for
(
size_t
i
=
0
;
i
<
inLength
;
i
++
)
{
size_t
out_index
=
static_cast
<
int
>
(
maskData
[
i
]);
if
(
out_index
>=
outLength
)
{
LOG
(
FATAL
)
<<
"upsample index "
<<
out_index
<<
" out of range."
;
}
inputGradData
[
i
]
=
outputGradData
[
out_index
];
}
inputGradData
+=
inLength
;
maskData
+=
inLength
;
outputGradData
+=
outLength
;
}
}
}
void
CpuMatrix
::
maxPoolForward
(
Matrix
&
inputMat
,
void
CpuMatrix
::
maxPoolForward
(
Matrix
&
inputMat
,
size_t
imgSizeH
,
size_t
imgSizeH
,
size_t
imgSizeW
,
size_t
imgSizeW
,
...
...
paddle/math/Matrix.h
浏览文件 @
68a75344
...
@@ -859,6 +859,26 @@ public:
...
@@ -859,6 +859,26 @@ public:
LOG
(
FATAL
)
<<
"Not implemented"
;
LOG
(
FATAL
)
<<
"Not implemented"
;
}
}
virtual
void
upsampleForward
(
Matrix
&
input
,
Matrix
&
mask
,
size_t
imgSizeH
,
size_t
imgSizeW
,
size_t
channels
,
size_t
outputH
,
size_t
outputW
)
{
LOG
(
FATAL
)
<<
"Not implemeted"
;
}
virtual
void
upsampleBackward
(
Matrix
&
outputGrad
,
Matrix
&
mask
,
size_t
imgSizeH
,
size_t
imgSizeW
,
size_t
channels
,
size_t
outputH
,
size_t
outputW
)
{
LOG
(
FATAL
)
<<
"Not implemeted"
;
}
/**
/**
* Pooling forward operation, pick out the largest element
* Pooling forward operation, pick out the largest element
* in the sizeX of value, if the maskMatP is not NULL, it will
* in the sizeX of value, if the maskMatP is not NULL, it will
...
@@ -1420,6 +1440,22 @@ public:
...
@@ -1420,6 +1440,22 @@ public:
void
classificationError
(
Matrix
&
output
,
IVector
&
label
,
size_t
topkSize
=
1
);
void
classificationError
(
Matrix
&
output
,
IVector
&
label
,
size_t
topkSize
=
1
);
void
upsampleForward
(
Matrix
&
input
,
Matrix
&
mask
,
size_t
imgSizeH
,
size_t
imgSizeW
,
size_t
channels
,
size_t
outputH
,
size_t
outputW
);
void
upsampleBackward
(
Matrix
&
outputGrad
,
Matrix
&
mask
,
size_t
imgSizeH
,
size_t
imgSizeW
,
size_t
channels
,
size_t
outputH
,
size_t
outputW
);
void
maxPoolForward
(
Matrix
&
inputMat
,
void
maxPoolForward
(
Matrix
&
inputMat
,
size_t
imgSizeH
,
size_t
imgSizeH
,
size_t
imgSizeW
,
size_t
imgSizeW
,
...
@@ -1694,6 +1730,22 @@ public:
...
@@ -1694,6 +1730,22 @@ public:
MatrixPtr
clone
(
size_t
height
,
size_t
width
,
bool
useGpu
=
false
);
MatrixPtr
clone
(
size_t
height
,
size_t
width
,
bool
useGpu
=
false
);
void
upsampleForward
(
Matrix
&
input
,
Matrix
&
mask
,
size_t
imgSizeH
,
size_t
imgSizeW
,
size_t
channels
,
size_t
outputH
,
size_t
outputW
);
void
upsampleBackward
(
Matrix
&
outputGrad
,
Matrix
&
mask
,
size_t
imgSizeH
,
size_t
imgSizeW
,
size_t
channels
,
size_t
outputH
,
size_t
outputW
);
void
maxPoolForward
(
Matrix
&
inputMat
,
void
maxPoolForward
(
Matrix
&
inputMat
,
size_t
imgSizeH
,
size_t
imgSizeH
,
size_t
imgSizeW
,
size_t
imgSizeW
,
...
...
proto/ModelConfig.proto
浏览文件 @
68a75344
...
@@ -323,6 +323,16 @@ message ClipConfig {
...
@@ -323,6 +323,16 @@ message ClipConfig {
required
double
max
=
2
;
required
double
max
=
2
;
}
}
message
UpsampleConfig
{
required
ImageConfig
image_conf
=
1
;
optional
uint32
scale
=
2
[
default
=
2
];
optional
uint32
scale_y
=
3
[
default
=
2
];
optional
bool
pad_out_x
=
4
[
default
=
false
];
optional
bool
pad_out_y
=
5
[
default
=
false
];
optional
uint32
upsample_size
=
6
;
optional
uint32
upsample_size_y
=
7
;
}
message
ROIPoolConfig
{
message
ROIPoolConfig
{
required
uint32
pooled_width
=
1
;
required
uint32
pooled_width
=
1
;
required
uint32
pooled_height
=
2
;
required
uint32
pooled_height
=
2
;
...
@@ -359,6 +369,7 @@ message LayerInputConfig {
...
@@ -359,6 +369,7 @@ message LayerInputConfig {
optional
ClipConfig
clip_conf
=
18
;
optional
ClipConfig
clip_conf
=
18
;
optional
ScaleSubRegionConfig
scale_sub_region_conf
=
19
;
optional
ScaleSubRegionConfig
scale_sub_region_conf
=
19
;
optional
ROIPoolConfig
roi_pool_conf
=
20
;
optional
ROIPoolConfig
roi_pool_conf
=
20
;
optional
UpsampleConfig
upsample_conf
=
21
;
}
}
message
LayerConfig
{
message
LayerConfig
{
...
...
python/paddle/fluid/layers/nn.py
浏览文件 @
68a75344
...
@@ -133,6 +133,8 @@ def fc(input,
...
@@ -133,6 +133,8 @@ def fc(input,
bias_attr (ParamAttr|list of ParamAttr, default None): The parameter attribute for the bias
bias_attr (ParamAttr|list of ParamAttr, default None): The parameter attribute for the bias
of this layer. If it is set to None, no bias will be added to the output units.
of this layer. If it is set to None, no bias will be added to the output units.
act (str, default None): Activation to be applied to the output of this layer.
act (str, default None): Activation to be applied to the output of this layer.
use_mkldnn(bool): Use mkldnn kernel or not, it is valid only when the mkldnn
library is installed. Default: False
name (str, default None): The name of this layer.
name (str, default None): The name of this layer.
Returns:
Returns:
...
@@ -153,38 +155,64 @@ def fc(input,
...
@@ -153,38 +155,64 @@ def fc(input,
dtype
=
helper
.
input_dtype
()
dtype
=
helper
.
input_dtype
()
mul_results
=
[]
mul_results
=
[]
for
input_var
,
param_attr
in
helper
.
iter_inputs_and_params
():
if
use_mkldnn
:
input_shape
=
input_var
.
shape
tmp
=
helper
.
create_tmp_variable
(
dtype
)
input_shape
=
input
.
shape
param_shape
=
[
param_shape
=
[
reduce
(
lambda
a
,
b
:
a
*
b
,
input_shape
[
num_flatten_dims
:],
1
)
reduce
(
lambda
a
,
b
:
a
*
b
,
input_shape
[
num_flatten_dims
:],
1
)
]
+
[
size
]
]
+
[
size
]
w
=
helper
.
create_parameter
(
w
=
helper
.
create_parameter
(
attr
=
param_attr
,
shape
=
param_shape
,
dtype
=
dtype
,
is_bias
=
False
)
attr
=
helper
.
param_attr
,
tmp
=
helper
.
create_tmp_variable
(
dtype
)
shape
=
param_shape
,
dtype
=
dtype
,
is_bias
=
False
)
if
bias_attr
is
None
or
bias_attr
is
False
:
bias_attr
=
False
else
:
bias_attr
=
True
helper
.
append_op
(
helper
.
append_op
(
type
=
"
mul
"
,
type
=
"
fc
"
,
inputs
=
{
"
X"
:
input_var
,
inputs
=
{
"
Input"
:
input
,
"
Y
"
:
w
},
"
W
"
:
w
},
outputs
=
{
"Out"
:
tmp
},
outputs
=
{
"Out"
:
tmp
},
attrs
=
{
attrs
=
{
"use_mkldnn"
:
use_mkldnn
,
"x_num_col_dims"
:
num_flatten_dims
,
"bias_attr"
:
bias_attr
})
"y_num_col_dims"
:
1
,
return
helper
.
append_activation
(
tmp
)
'use_mkldnn'
:
use_mkldnn
})
mul_results
.
append
(
tmp
)
# sum
if
len
(
mul_results
)
==
1
:
pre_bias
=
mul_results
[
0
]
else
:
else
:
pre_bias
=
helper
.
create_tmp_variable
(
dtype
)
for
input_var
,
param_attr
in
helper
.
iter_inputs_and_params
():
helper
.
append_op
(
input_shape
=
input_var
.
shape
type
=
"sum"
,
inputs
=
{
"X"
:
mul_results
},
outputs
=
{
"Out"
:
pre_bias
})
param_shape
=
[
# add bias
reduce
(
lambda
a
,
b
:
a
*
b
,
input_shape
[
num_flatten_dims
:],
1
)
pre_activation
=
helper
.
append_bias_op
(
pre_bias
,
dim_start
=
num_flatten_dims
)
]
+
[
size
]
# add activation
return
helper
.
append_activation
(
pre_activation
)
w
=
helper
.
create_parameter
(
attr
=
param_attr
,
shape
=
param_shape
,
dtype
=
dtype
,
is_bias
=
False
)
tmp
=
helper
.
create_tmp_variable
(
dtype
)
helper
.
append_op
(
type
=
"mul"
,
inputs
=
{
"X"
:
input_var
,
"Y"
:
w
},
outputs
=
{
"Out"
:
tmp
},
attrs
=
{
"x_num_col_dims"
:
num_flatten_dims
,
"y_num_col_dims"
:
1
,
})
mul_results
.
append
(
tmp
)
if
len
(
mul_results
)
==
1
:
pre_bias
=
mul_results
[
0
]
else
:
pre_bias
=
helper
.
create_tmp_variable
(
dtype
)
helper
.
append_op
(
type
=
"sum"
,
inputs
=
{
"X"
:
mul_results
},
outputs
=
{
"Out"
:
pre_bias
})
# add bias
pre_activation
=
helper
.
append_bias_op
(
pre_bias
,
dim_start
=
num_flatten_dims
)
# add activation
return
helper
.
append_activation
(
pre_activation
)
def
embedding
(
input
,
def
embedding
(
input
,
...
...
python/paddle/fluid/tests/unittests/test_fc_op.py
0 → 100644
浏览文件 @
68a75344
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import
unittest
import
numpy
as
np
from
op_test
import
OpTest
def
fully_connected_naive
(
input
,
weights
,
bias_data
=
None
):
in_n
,
in_c
,
in_h
,
in_w
=
input
.
shape
w_h
,
w_c
=
weights
.
shape
x_data
=
np
.
reshape
(
input
,
[
in_n
,
in_c
*
in_h
*
in_w
])
w_data
=
np
.
transpose
(
np
.
reshape
(
weights
,
(
w_c
,
in_c
*
in_h
*
in_w
)))
result
=
None
if
not
bias_data
:
result
=
np
.
dot
(
x_data
,
w_data
)
else
:
result
=
np
.
dot
(
x_data
,
w_data
)
+
bias_data
return
result
class
MatrixGenerate
:
def
__init__
(
self
,
mb
,
ic
,
oc
,
h
,
w
):
self
.
input
=
np
.
random
.
random
((
mb
,
ic
,
h
,
w
)).
astype
(
"float32"
)
self
.
weights
=
np
.
random
.
random
((
ic
*
h
*
w
,
oc
)).
astype
(
"float32"
)
class
TestFCMKLDNNOp
(
OpTest
):
def
setUp
(
self
):
self
.
op_type
=
"fc"
self
.
use_mkldnn
=
True
self
.
with_bias
=
True
self
.
matrix
=
MatrixGenerate
(
1
,
10
,
15
,
3
,
3
)
self
.
inputs
=
{
'Input'
:
self
.
matrix
.
input
,
'W'
:
self
.
matrix
.
weights
}
self
.
attrs
=
{
'use_mkldnn'
:
self
.
use_mkldnn
,
'with_bias'
:
self
.
with_bias
}
self
.
outputs
=
{
'Out'
:
fully_connected_naive
(
self
.
matrix
.
input
,
self
.
matrix
.
weights
)
}
def
test_check_output
(
self
):
self
.
check_output
()
def
test_check_grad_normal
(
self
):
self
.
check_grad
(
set
([
'Input'
,
'W'
]),
'Out'
,
max_relative_error
=
0.9
)
def
test_check_grad_no_weight
(
self
):
self
.
check_grad
(
[
'Input'
],
'Out'
,
max_relative_error
=
0.5
,
no_grad_set
=
set
(
'W'
))
class
TestFCMKLDNNOp1
(
TestFCMKLDNNOp
):
def
init_op_type
(
self
):
self
.
matrix
=
MatrixGenerate
(
2
,
15
,
48
,
2
,
2
)
class
TestFCMKLDNNOp2
(
TestFCMKLDNNOp
):
def
init_op_type
(
self
):
self
.
matrix
=
MatrixGenerate
(
2
,
32
,
40
,
1
,
1
)
class
TestFCMKLDNNOp3
(
TestFCMKLDNNOp
):
def
init_op_type
(
self
):
self
.
matrix
=
MatrixGenerate
(
2
,
2
,
4
,
1
,
1
)
class
TestFCMKLDNNOp4
(
TestFCMKLDNNOp
):
def
init_op_type
(
self
):
self
.
with_bias
=
False
self
.
matrix
=
MatrixGenerate
(
2
,
32
,
48
,
2
,
2
)
class
TestFCMKLDNNOp4
(
TestFCMKLDNNOp
):
def
init_op_type
(
self
):
self
.
with_bias
=
False
self
.
matrix
=
MatrixGenerate
(
2
,
32
,
1000
,
6
,
6
)
if
__name__
==
"__main__"
:
unittest
.
main
()
python/paddle/trainer/config_parser.py
浏览文件 @
68a75344
...
@@ -471,6 +471,7 @@ class Input(Cfg):
...
@@ -471,6 +471,7 @@ class Input(Cfg):
maxout
=
None
,
maxout
=
None
,
spp
=
None
,
spp
=
None
,
pad
=
None
,
pad
=
None
,
upsample
=
None
,
format
=
None
,
format
=
None
,
nnz
=
None
,
nnz
=
None
,
is_static
=
None
,
is_static
=
None
,
...
@@ -983,6 +984,13 @@ class Pad(Cfg):
...
@@ -983,6 +984,13 @@ class Pad(Cfg):
self
.
add_keys
(
locals
())
self
.
add_keys
(
locals
())
@
config_class
class
Upsample
(
Cfg
):
def
__init__
(
self
,
scale
,
scale_y
,
pad_out_x
,
pad_out_y
,
upsample_size
,
upsample_size_y
):
self
.
add_keys
(
locals
())
@
config_class
@
config_class
class
Norm
(
Cfg
):
class
Norm
(
Cfg
):
def
__init__
(
self
,
def
__init__
(
self
,
...
@@ -2380,6 +2388,46 @@ class SpatialPyramidPoolLayer(LayerBase):
...
@@ -2380,6 +2388,46 @@ class SpatialPyramidPoolLayer(LayerBase):
self
.
set_cnn_layer
(
name
,
1
,
output_x
,
spp_conf
.
image_conf
.
channels
)
self
.
set_cnn_layer
(
name
,
1
,
output_x
,
spp_conf
.
image_conf
.
channels
)
@
config_layer
(
'upsample'
)
class
UpsampleLayer
(
LayerBase
):
def
__init__
(
self
,
name
,
inputs
,
**
xargs
):
super
(
UpsampleLayer
,
self
).
__init__
(
name
,
'upsample'
,
0
,
inputs
=
inputs
,
**
xargs
)
input_layer
=
self
.
get_input_layer
(
0
)
image_conf
=
self
.
config
.
inputs
[
0
].
upsample_conf
.
image_conf
image_conf
.
img_size
=
input_layer
.
width
image_conf
.
img_size_y
=
input_layer
.
height
image_conf
.
channels
=
input_layer
.
size
/
(
input_layer
.
width
*
input_layer
.
height
)
upsample
=
self
.
inputs
[
0
].
upsample
output_x
=
0
output_y
=
0
output_size
=
0
if
upsample
.
scale
:
self
.
config
.
inputs
[
0
].
upsample_conf
.
scale
=
upsample
.
scale
self
.
config
.
inputs
[
0
].
upsample_conf
.
scale_y
=
upsample
.
scale_y
output_x
=
input_layer
.
width
*
upsample
.
scale
output_y
=
input_layer
.
height
*
upsample
.
scale_y
self
.
config
.
inputs
[
0
].
upsample_conf
.
pad_out_x
=
upsample
.
pad_out_x
self
.
config
.
inputs
[
0
].
upsample_conf
.
pad_out_y
=
upsample
.
pad_out_y
if
upsample
.
upsample_size
:
self
.
config
.
inputs
[
0
].
upsample_conf
.
upsample_size
=
upsample
.
upsample_size
self
.
config
.
inputs
[
0
].
upsample_conf
.
upsample_size_y
=
upsample
.
upsample_size_y
output_x
=
upsample
.
upsample_size
output_y
=
upsample
.
upsample_size_y
output_size
=
image_conf
.
channels
*
output_x
*
output_y
self
.
set_layer_height_width
(
output_y
,
output_x
)
self
.
set_layer_depth
(
input_layer
.
depth
)
self
.
set_layer_size
(
output_size
)
@
config_layer
(
'pad'
)
@
config_layer
(
'pad'
)
class
PadLayer
(
LayerBase
):
class
PadLayer
(
LayerBase
):
def
__init__
(
self
,
name
,
inputs
,
**
xargs
):
def
__init__
(
self
,
name
,
inputs
,
**
xargs
):
...
...
python/paddle/trainer_config_helpers/layers.py
浏览文件 @
68a75344
...
@@ -148,6 +148,7 @@ __all__ = [
...
@@ -148,6 +148,7 @@ __all__ = [
'resize_layer'
,
'resize_layer'
,
'sub_seq_layer'
,
'sub_seq_layer'
,
'scale_sub_region_layer'
,
'scale_sub_region_layer'
,
'upsample_layer'
,
'factorization_machine'
,
'factorization_machine'
,
]
]
...
@@ -166,6 +167,7 @@ class LayerType(object):
...
@@ -166,6 +167,7 @@ class LayerType(object):
SEQUENCE_RESHAPE
=
'seqreshape'
SEQUENCE_RESHAPE
=
'seqreshape'
POOLING_MAX
=
'max'
POOLING_MAX
=
'max'
POOLING_AVG
=
'average'
POOLING_AVG
=
'average'
UPSAMPLE_LAYER
=
'upsample'
FC_LAYER
=
'fc'
FC_LAYER
=
'fc'
COST
=
'cost'
COST
=
'cost'
COSINE_SIM_VEC
=
'cos_vm'
COSINE_SIM_VEC
=
'cos_vm'
...
@@ -3014,6 +3016,83 @@ def img_pool3d_layer(input,
...
@@ -3014,6 +3016,83 @@ def img_pool3d_layer(input,
size
=
l
.
config
.
size
)
size
=
l
.
config
.
size
)
@
wrap_name_default
(
"upsample"
)
@
layer_support
()
def
upsample_layer
(
input
,
name
=
None
,
scale
=
None
,
scale_y
=
None
,
upsample_size
=
None
,
upsample_size_y
=
None
,
pad_out_x
=
False
,
pad_out_y
=
False
,
layer_attr
=
None
):
"""
The DePooling process.
Inputs should be a list of length 2. The first input is a layer,
and the second input should be the MaxWithMaskPoolingLayer
The example usage is:
.. code-block:: python
pool1 = paddle.v2.layer.img_pool(input=input, pool_size=2, stride=2,
pool_type=paddle.pooling.MaxWithMask())
upsample = paddle.v2.layer.upsample(input=[layer1, pool1])
:param name: The name of this layer. It is optional.
:type name: basestring
:param input: contains an input layer and a MaxWithMaskPoolingLayer
:type input: list | tuple | collections.Sequence
:param scale: outputSize = scale * inputSize
:type scale: int | list | tuple | .
:param scale_y: scale_y will be equal to scale, if it's value is None,
:type scale: int | None.
:param upsample_size: specify the outputSize.
:type upsample_size: int | list | tuple.
:param upsample_size_y: specify the y dimension outputSize.
:type upsample_size_y: int.
:param pad_out_x: specify exact x dimension size. This parameter only works when scale is 2
:type pad_out_x: bool.
:param pad_out_y: specify exact y dimension size. This parameter only works when scale is 2
:type pad_out_y: bool.
:param layer_attr: Extra Layer Attribute.
:type layer_attr: ExtraLayerAttribute
:return: LayerOutput object.
:rtype: LayerOutput
"""
assert
(
scale
is
not
None
)
or
(
upsample_size
is
not
None
),
\
'scale or upsample_size, there must be one to be designated'
assert
len
(
input
)
==
2
,
'layer input size must be 2'
assert
input
[
1
].
layer_type
==
LayerType
.
POOL_LAYER
,
\
'the second input should be the MaxPoolWithMaskLayer'
scale_y
=
scale
\
if
scale
is
not
None
else
scale_y
upsample_size_y
=
upsample_size
\
if
upsample_size
is
not
None
else
upsample_size_y
layer_type
=
LayerType
.
UPSAMPLE_LAYER
layer
=
Layer
(
name
=
name
,
type
=
layer_type
,
inputs
=
[
Input
(
input
[
0
].
name
,
upsample
=
Upsample
(
scale
,
scale_y
,
pad_out_x
,
pad_out_y
,
upsample_size
,
upsample_size_y
)),
Input
(
input
[
1
].
name
)
],
**
ExtraLayerAttribute
.
to_kwargs
(
layer_attr
))
sz
=
layer
.
config
.
size
return
LayerOutput
(
name
,
layer_type
=
layer_type
,
parents
=
input
,
size
=
sz
)
@
wrap_name_default
(
"spp"
)
@
wrap_name_default
(
"spp"
)
@
layer_support
()
@
layer_support
()
def
spp_layer
(
input
,
def
spp_layer
(
input
,
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录