Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
Crayon鑫
Paddle
提交
847bb6a2
P
Paddle
项目概览
Crayon鑫
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1
Issue
1
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
847bb6a2
编写于
3月 07, 2019
作者:
J
jerrywgz
浏览文件
操作
浏览文件
下载
差异文件
Merge branch 'develop' of
https://github.com/PaddlePaddle/Paddle
into fpn_ops
上级
f0177a1e
893789a0
变更
70
隐藏空白更改
内联
并排
Showing
70 changed file
with
2946 addition
and
690 deletion
+2946
-690
paddle/fluid/API.spec
paddle/fluid/API.spec
+5
-3
paddle/fluid/framework/details/build_strategy.h
paddle/fluid/framework/details/build_strategy.h
+3
-2
paddle/fluid/framework/details/memory_optimize_helper.cc
paddle/fluid/framework/details/memory_optimize_helper.cc
+7
-5
paddle/fluid/framework/operator.cc
paddle/fluid/framework/operator.cc
+0
-41
paddle/fluid/framework/operator.h
paddle/fluid/framework/operator.h
+2
-78
paddle/fluid/inference/api/analysis_predictor.cc
paddle/fluid/inference/api/analysis_predictor.cc
+3
-0
paddle/fluid/inference/api/api_impl.cc
paddle/fluid/inference/api/api_impl.cc
+3
-0
paddle/fluid/inference/tests/api/analyzer_rnn1_tester.cc
paddle/fluid/inference/tests/api/analyzer_rnn1_tester.cc
+6
-4
paddle/fluid/inference/tests/api/analyzer_seq_pool1_tester.cc
...le/fluid/inference/tests/api/analyzer_seq_pool1_tester.cc
+6
-4
paddle/fluid/inference/tests/api/tester_helper.h
paddle/fluid/inference/tests/api/tester_helper.h
+8
-4
paddle/fluid/operators/detection/CMakeLists.txt
paddle/fluid/operators/detection/CMakeLists.txt
+1
-0
paddle/fluid/operators/detection/box_decoder_and_assign_op.cc
...le/fluid/operators/detection/box_decoder_and_assign_op.cc
+169
-0
paddle/fluid/operators/detection/box_decoder_and_assign_op.cu
...le/fluid/operators/detection/box_decoder_and_assign_op.cu
+147
-0
paddle/fluid/operators/detection/box_decoder_and_assign_op.h
paddle/fluid/operators/detection/box_decoder_and_assign_op.h
+103
-0
paddle/fluid/operators/fused/fused_embedding_seq_pool_op.h
paddle/fluid/operators/fused/fused_embedding_seq_pool_op.h
+10
-13
paddle/fluid/operators/jit/benchmark.cc
paddle/fluid/operators/jit/benchmark.cc
+23
-0
paddle/fluid/operators/jit/gen/CMakeLists.txt
paddle/fluid/operators/jit/gen/CMakeLists.txt
+1
-0
paddle/fluid/operators/jit/gen/vbroadcast.cc
paddle/fluid/operators/jit/gen/vbroadcast.cc
+91
-0
paddle/fluid/operators/jit/gen/vbroadcast.h
paddle/fluid/operators/jit/gen/vbroadcast.h
+53
-0
paddle/fluid/operators/jit/helper.cc
paddle/fluid/operators/jit/helper.cc
+2
-0
paddle/fluid/operators/jit/kernel_base.h
paddle/fluid/operators/jit/kernel_base.h
+9
-0
paddle/fluid/operators/jit/kernel_key.cc
paddle/fluid/operators/jit/kernel_key.cc
+5
-0
paddle/fluid/operators/jit/more/mkl/CMakeLists.txt
paddle/fluid/operators/jit/more/mkl/CMakeLists.txt
+2
-0
paddle/fluid/operators/jit/more/mkl/mkl.cc
paddle/fluid/operators/jit/more/mkl/mkl.cc
+18
-0
paddle/fluid/operators/jit/more/mkl/mkl.h
paddle/fluid/operators/jit/more/mkl/mkl.h
+10
-0
paddle/fluid/operators/jit/refer/CMakeLists.txt
paddle/fluid/operators/jit/refer/CMakeLists.txt
+2
-0
paddle/fluid/operators/jit/refer/refer.cc
paddle/fluid/operators/jit/refer/refer.cc
+3
-0
paddle/fluid/operators/jit/refer/refer.h
paddle/fluid/operators/jit/refer/refer.h
+17
-0
paddle/fluid/operators/jit/test.cc
paddle/fluid/operators/jit/test.cc
+67
-24
paddle/fluid/operators/mkldnn/requantize_mkldnn_op.cc
paddle/fluid/operators/mkldnn/requantize_mkldnn_op.cc
+94
-0
paddle/fluid/operators/ngraph/ops/activation_op.h
paddle/fluid/operators/ngraph/ops/activation_op.h
+1
-1
paddle/fluid/operators/recurrent_op.cc
paddle/fluid/operators/recurrent_op.cc
+29
-16
paddle/fluid/operators/requantize_op.cc
paddle/fluid/operators/requantize_op.cc
+46
-0
paddle/fluid/operators/requantize_op.h
paddle/fluid/operators/requantize_op.h
+47
-0
paddle/fluid/operators/reshape_op.cc
paddle/fluid/operators/reshape_op.cc
+4
-1
paddle/fluid/operators/sequence_ops/sequence_erase_op.cu
paddle/fluid/operators/sequence_ops/sequence_erase_op.cu
+10
-9
paddle/fluid/operators/sequence_ops/sequence_erase_op.h
paddle/fluid/operators/sequence_ops/sequence_erase_op.h
+10
-8
paddle/fluid/operators/spectral_norm_op.cc
paddle/fluid/operators/spectral_norm_op.cc
+197
-0
paddle/fluid/operators/spectral_norm_op.cu
paddle/fluid/operators/spectral_norm_op.cu
+22
-0
paddle/fluid/operators/spectral_norm_op.h
paddle/fluid/operators/spectral_norm_op.h
+273
-0
python/paddle/fluid/__init__.py
python/paddle/fluid/__init__.py
+2
-1
python/paddle/fluid/compiler.py
python/paddle/fluid/compiler.py
+6
-6
python/paddle/fluid/imperative/layer_object_helper.py
python/paddle/fluid/imperative/layer_object_helper.py
+220
-0
python/paddle/fluid/imperative/layers.py
python/paddle/fluid/imperative/layers.py
+48
-1
python/paddle/fluid/imperative/nn.py
python/paddle/fluid/imperative/nn.py
+32
-53
python/paddle/fluid/initializer.py
python/paddle/fluid/initializer.py
+8
-9
python/paddle/fluid/layer_helper.py
python/paddle/fluid/layer_helper.py
+14
-309
python/paddle/fluid/layer_helper_base.py
python/paddle/fluid/layer_helper_base.py
+381
-0
python/paddle/fluid/layers/control_flow.py
python/paddle/fluid/layers/control_flow.py
+2
-2
python/paddle/fluid/layers/detection.py
python/paddle/fluid/layers/detection.py
+66
-3
python/paddle/fluid/layers/nn.py
python/paddle/fluid/layers/nn.py
+96
-7
python/paddle/fluid/optimizer.py
python/paddle/fluid/optimizer.py
+1
-1
python/paddle/fluid/parallel_executor.py
python/paddle/fluid/parallel_executor.py
+12
-7
python/paddle/fluid/tests/unittests/mkldnn/mkldnn_op_test.py
python/paddle/fluid/tests/unittests/mkldnn/mkldnn_op_test.py
+14
-0
python/paddle/fluid/tests/unittests/mkldnn/test_conv2d_int8_mkldnn_op.py
...luid/tests/unittests/mkldnn/test_conv2d_int8_mkldnn_op.py
+1
-14
python/paddle/fluid/tests/unittests/mkldnn/test_requantize_mkldnn_op.py
...fluid/tests/unittests/mkldnn/test_requantize_mkldnn_op.py
+93
-0
python/paddle/fluid/tests/unittests/test_base_layer.py
python/paddle/fluid/tests/unittests/test_base_layer.py
+14
-24
python/paddle/fluid/tests/unittests/test_box_decoder_and_assign_op.py
...e/fluid/tests/unittests/test_box_decoder_and_assign_op.py
+96
-0
python/paddle/fluid/tests/unittests/test_dist_base.py
python/paddle/fluid/tests/unittests/test_dist_base.py
+3
-0
python/paddle/fluid/tests/unittests/test_fuse_elewise_add_act_pass.py
...e/fluid/tests/unittests/test_fuse_elewise_add_act_pass.py
+5
-0
python/paddle/fluid/tests/unittests/test_imperative_basic.py
python/paddle/fluid/tests/unittests/test_imperative_basic.py
+26
-26
python/paddle/fluid/tests/unittests/test_imperative_optimizer.py
...paddle/fluid/tests/unittests/test_imperative_optimizer.py
+1
-1
python/paddle/fluid/tests/unittests/test_imperative_ptb_rnn.py
...n/paddle/fluid/tests/unittests/test_imperative_ptb_rnn.py
+11
-12
python/paddle/fluid/tests/unittests/test_ir_memory_optimize_ifelse_op.py
...luid/tests/unittests/test_ir_memory_optimize_ifelse_op.py
+123
-0
python/paddle/fluid/tests/unittests/test_layers.py
python/paddle/fluid/tests/unittests/test_layers.py
+13
-0
python/paddle/fluid/tests/unittests/test_parallel_executor_fetch_feed.py
...luid/tests/unittests/test_parallel_executor_fetch_feed.py
+5
-1
python/paddle/fluid/tests/unittests/test_pass_builder.py
python/paddle/fluid/tests/unittests/test_pass_builder.py
+3
-0
python/paddle/fluid/tests/unittests/test_py_func_op.py
python/paddle/fluid/tests/unittests/test_py_func_op.py
+4
-0
python/paddle/fluid/tests/unittests/test_sequence_erase_op.py
...on/paddle/fluid/tests/unittests/test_sequence_erase_op.py
+15
-0
python/paddle/fluid/tests/unittests/test_spectral_norm_op.py
python/paddle/fluid/tests/unittests/test_spectral_norm_op.py
+122
-0
未找到文件。
paddle/fluid/API.spec
浏览文件 @
847bb6a2
...
...
@@ -128,6 +128,7 @@ paddle.fluid.layers.row_conv (ArgSpec(args=['input', 'future_context_size', 'par
paddle.fluid.layers.multiplex (ArgSpec(args=['inputs', 'index'], varargs=None, keywords=None, defaults=None), ('document', '013795af319e2e86d3506741941078ee'))
paddle.fluid.layers.layer_norm (ArgSpec(args=['input', 'scale', 'shift', 'begin_norm_axis', 'epsilon', 'param_attr', 'bias_attr', 'act', 'name'], varargs=None, keywords=None, defaults=(True, True, 1, 1e-05, None, None, None, None)), ('document', 'de6a906950bae9f3c245cb744d22b94e'))
paddle.fluid.layers.group_norm (ArgSpec(args=['input', 'groups', 'epsilon', 'param_attr', 'bias_attr', 'act', 'data_layout', 'name'], varargs=None, keywords=None, defaults=(1e-05, None, None, None, 'NCHW', None)), ('document', '419c3a24a83cc89219a029cf4092788b'))
paddle.fluid.layers.spectral_norm (ArgSpec(args=['weight', 'dim', 'power_iters', 'eps', 'name'], varargs=None, keywords=None, defaults=(0, 1, 1e-12, None)), ('document', '3f536aafba30d793287b52d231baff1b'))
paddle.fluid.layers.softmax_with_cross_entropy (ArgSpec(args=['logits', 'label', 'soft_label', 'ignore_index', 'numeric_stable_mode', 'return_softmax'], varargs=None, keywords=None, defaults=(False, -100, True, False)), ('document', 'bce1b75e3d95b75cacd1099655cbb3c3'))
paddle.fluid.layers.smooth_l1 (ArgSpec(args=['x', 'y', 'inside_weight', 'outside_weight', 'sigma'], varargs=None, keywords=None, defaults=(None, None, None)), ('document', 'c6b175d253c55baf4b9c0eca9b1dda88'))
paddle.fluid.layers.one_hot (ArgSpec(args=['input', 'depth'], varargs=None, keywords=None, defaults=None), ('document', '6148b6a555cbfb62fdcd030d8982c18c'))
...
...
@@ -261,7 +262,7 @@ paddle.fluid.layers.Switch.default (ArgSpec(args=['self'], varargs=None, keyword
paddle.fluid.layers.increment (ArgSpec(args=['x', 'value', 'in_place'], varargs=None, keywords=None, defaults=(1.0, True)), ('document', '73bb96ec4783ec1a11e760e8851b0e77'))
paddle.fluid.layers.array_write (ArgSpec(args=['x', 'i', 'array'], varargs=None, keywords=None, defaults=(None,)), ('document', '40b6d15f4c86b2b09df340d7778ad713'))
paddle.fluid.layers.create_array (ArgSpec(args=['dtype'], varargs=None, keywords=None, defaults=None), ('document', '2d4f20087080ba5105b55205ad5c5b6a'))
paddle.fluid.layers.less_than (ArgSpec(args=['x', 'y', 'force_cpu', 'cond'], varargs=None, keywords=
'ignored'
, defaults=(None, None)), ('document', '067bbc799c66289ca8b8924c26b6673f'))
paddle.fluid.layers.less_than (ArgSpec(args=['x', 'y', 'force_cpu', 'cond'], varargs=None, keywords=
None
, defaults=(None, None)), ('document', '067bbc799c66289ca8b8924c26b6673f'))
paddle.fluid.layers.equal (ArgSpec(args=['x', 'y', 'cond'], varargs=None, keywords=None, defaults=(None,)), ('document', '80c29b1dc64718f0116de90d1ac88a77'))
paddle.fluid.layers.array_read (ArgSpec(args=['array', 'i'], varargs=None, keywords=None, defaults=None), ('document', '0275133f1dde2aed528b4d3230edf823'))
paddle.fluid.layers.array_length (ArgSpec(args=['array'], varargs=None, keywords=None, defaults=None), ('document', 'ffb8b9578ec66db565b223d313aa82a2'))
...
...
@@ -286,7 +287,7 @@ paddle.fluid.layers.StaticRNN.step_output (ArgSpec(args=['self', 'o'], varargs=N
paddle.fluid.layers.StaticRNN.update_memory (ArgSpec(args=['self', 'mem', 'var'], varargs=None, keywords=None, defaults=None), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
paddle.fluid.layers.reorder_lod_tensor_by_rank (ArgSpec(args=['x', 'rank_table'], varargs=None, keywords=None, defaults=None), ('document', '3545f529ef04e8f6ecb76b47fa3df01a'))
paddle.fluid.layers.Print (ArgSpec(args=['input', 'first_n', 'message', 'summarize', 'print_tensor_name', 'print_tensor_type', 'print_tensor_shape', 'print_tensor_lod', 'print_phase'], varargs=None, keywords=None, defaults=(-1, None, -1, True, True, True, True, 'both')), ('document', '5fef91b0e21c93610785f2b1f7161732'))
paddle.fluid.layers.is_empty (ArgSpec(args=['x', 'cond'], varargs=None, keywords=
'ignored'
, defaults=(None,)), ('document', 'bbe578dbb49ad13e15b014e98c22b519'))
paddle.fluid.layers.is_empty (ArgSpec(args=['x', 'cond'], varargs=None, keywords=
None
, defaults=(None,)), ('document', 'bbe578dbb49ad13e15b014e98c22b519'))
paddle.fluid.layers.sigmoid (ArgSpec(args=['x', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', '29a25ba78de79152076cacfc5443137d'))
paddle.fluid.layers.logsigmoid (ArgSpec(args=['x', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', '81ccb7acafd06c7728e11581f5d342e3'))
paddle.fluid.layers.exp (ArgSpec(args=['x', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', 'e6b3e769413d96aab4176f96db25984b'))
...
...
@@ -328,7 +329,8 @@ paddle.fluid.layers.polygon_box_transform (ArgSpec(args=['input', 'name'], varar
paddle.fluid.layers.yolov3_loss (ArgSpec(args=['x', 'gtbox', 'gtlabel', 'anchors', 'anchor_mask', 'class_num', 'ignore_thresh', 'downsample_ratio', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', '991e934c3e09abf0edec7c9c978b4691'))
paddle.fluid.layers.box_clip (ArgSpec(args=['input', 'im_info', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', '397e9e02b451d99c56e20f268fa03f2e'))
paddle.fluid.layers.multiclass_nms (ArgSpec(args=['bboxes', 'scores', 'score_threshold', 'nms_top_k', 'keep_top_k', 'nms_threshold', 'normalized', 'nms_eta', 'background_label', 'name'], varargs=None, keywords=None, defaults=(0.3, True, 1.0, 0, None)), ('document', 'ca7d1107b6c5d2d6d8221039a220fde0'))
paddle.fluid.layers.distribute_fpn_proposals (ArgSpec(args=['fpn_rois', 'min_level', 'max_level', 'refer_level', 'refer_scale', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', 'fdffe52577f7e74c090b030867fefc11'))
paddle.fluid.layers.distribute_fpn_proposals (ArgSpec(args=['fpn_rois', 'min_level', 'max_level', 'refer_level', 'refer_scale', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', '7bb011ec26bace2bc23235aa4a17647d'))
paddle.fluid.layers.box_decoder_and_assign (ArgSpec(args=['prior_box', 'prior_box_var', 'target_box', 'box_score', 'box_clip', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', '005a5ae47d6c8fff721931d69d072b9f'))
paddle.fluid.layers.accuracy (ArgSpec(args=['input', 'label', 'k', 'correct', 'total'], varargs=None, keywords=None, defaults=(1, None, None)), ('document', '9808534c12c5e739a10f73ebb0b4eafd'))
paddle.fluid.layers.auc (ArgSpec(args=['input', 'label', 'curve', 'num_thresholds', 'topk', 'slide_steps'], varargs=None, keywords=None, defaults=('ROC', 4095, 1, 1)), ('document', 'e0e95334fce92d16c2d9db6e7caffc47'))
paddle.fluid.layers.exponential_decay (ArgSpec(args=['learning_rate', 'decay_steps', 'decay_rate', 'staircase'], varargs=None, keywords=None, defaults=(False,)), ('document', '98a5050bee8522fcea81aa795adaba51'))
...
...
paddle/fluid/framework/details/build_strategy.h
浏览文件 @
847bb6a2
...
...
@@ -14,6 +14,7 @@
#pragma once
#include <memory>
#include <string>
#include <vector>
...
...
@@ -76,11 +77,11 @@ struct BuildStrategy {
bool
fuse_relu_depthwise_conv_
{
false
};
bool
memory_optimize_
{
fals
e
};
bool
memory_optimize_
{
tru
e
};
// TODO(dzhwinter):
// make enable_inplace, memory_optimize_
// memory_early_delete_ true by default
bool
enable_inplace_
{
fals
e
};
bool
enable_inplace_
{
tru
e
};
bool
enable_sequential_execution_
{
false
};
...
...
paddle/fluid/framework/details/memory_optimize_helper.cc
浏览文件 @
847bb6a2
...
...
@@ -20,6 +20,9 @@
#include <numeric>
#include <sstream>
#include <string>
#include <unordered_map>
#include <unordered_set>
#include "paddle/fluid/framework/operator.h"
#include "paddle/fluid/framework/var_desc.h"
#include "paddle/fluid/platform/cpu_info.h"
...
...
@@ -302,7 +305,10 @@ std::string OrderedSet::ToString() const {
bool
NodeCanReused
(
ir
::
Node
*
node
)
{
// valid the node is a var node
if
(
node
==
nullptr
||
!
node
->
IsVar
()
||
node
->
IsCtrlVar
())
return
false
;
// vars can be @EMPTY@, @LR_DECAY_REUSE_ID@. For example, while_grad
if
(
node
==
nullptr
||
!
node
->
IsVar
()
||
node
->
IsCtrlVar
()
||
node
->
Name
()
==
kEmptyVarName
)
return
false
;
bool
flag
=
true
;
// op output force generated in cpu, can not be reused.
...
...
@@ -348,10 +354,6 @@ bool NodeCanReused(const VarDesc& node) {
if
(
shape
.
empty
()
||
size
<
MinChunkSize
())
{
return
false
;
}
// vars can be @EMPTY@, @LR_DECAY_REUSE_ID@. For example, while_grad
std
::
string
name
=
node
.
Name
();
if
(
!
name
.
empty
()
&&
name
[
0
]
==
'@'
&&
name
[
name
.
size
()
-
1
]
==
'@'
)
return
false
;
return
true
;
}
...
...
paddle/fluid/framework/operator.cc
浏览文件 @
847bb6a2
...
...
@@ -467,12 +467,6 @@ const Variable* ExecutionContext::InputVar(const std::string& name) const {
return
it
->
second
.
empty
()
?
nullptr
:
it
->
second
[
0
];
}
const
Variable
*
ExecutionContext
::
LegacyInputVar
(
const
std
::
string
&
name
)
const
{
auto
ipt
=
op_
.
Input
(
name
);
return
ipt
==
kEmptyVarName
?
nullptr
:
scope_
.
FindVar
(
ipt
);
}
Variable
*
ExecutionContext
::
OutputVar
(
const
std
::
string
&
name
)
const
{
auto
it
=
ctx_
.
outputs
.
find
(
name
);
if
(
it
==
ctx_
.
outputs
.
end
())
return
nullptr
;
...
...
@@ -483,22 +477,11 @@ Variable* ExecutionContext::OutputVar(const std::string& name) const {
return
it
->
second
.
empty
()
?
nullptr
:
it
->
second
[
0
];
}
Variable
*
ExecutionContext
::
LegacyOutputVar
(
const
std
::
string
&
name
)
const
{
auto
opt
=
op_
.
Output
(
name
);
return
opt
==
kEmptyVarName
?
nullptr
:
scope_
.
FindVar
(
opt
);
}
template
<
>
const
Tensor
*
ExecutionContext
::
Input
<
Tensor
>
(
const
std
::
string
&
name
)
const
{
return
Input
<
LoDTensor
>
(
name
);
}
template
<
>
const
Tensor
*
ExecutionContext
::
LegacyInput
<
Tensor
>
(
const
std
::
string
&
name
)
const
{
return
LegacyInput
<
LoDTensor
>
(
name
);
}
template
<
>
const
std
::
vector
<
const
Tensor
*>
ExecutionContext
::
MultiInput
<
Tensor
>
(
const
std
::
string
&
name
)
const
{
...
...
@@ -521,35 +504,11 @@ const std::vector<const Tensor*> ExecutionContext::MultiInput<Tensor>(
return
res
;
}
template
<
>
const
std
::
vector
<
const
Tensor
*>
ExecutionContext
::
LegacyMultiInput
<
Tensor
>
(
const
std
::
string
&
name
)
const
{
auto
names
=
op
().
Inputs
(
name
);
std
::
vector
<
const
Tensor
*>
res
;
res
.
reserve
(
names
.
size
());
std
::
transform
(
names
.
begin
(),
names
.
end
(),
std
::
back_inserter
(
res
),
[
&
](
const
std
::
string
&
sub_name
)
->
const
Tensor
*
{
auto
var
=
scope_
.
FindVar
(
sub_name
);
if
(
var
==
nullptr
)
return
nullptr
;
PADDLE_ENFORCE
(
var
->
IsType
<
LoDTensor
>
(),
"%s should be LoDTensor, but the received type is %s"
,
sub_name
,
ToTypeName
(
var
->
Type
()));
return
&
(
var
->
Get
<
LoDTensor
>
());
});
return
res
;
}
template
<
>
Tensor
*
ExecutionContext
::
Output
<
Tensor
>
(
const
std
::
string
&
name
)
const
{
return
Output
<
LoDTensor
>
(
name
);
}
template
<
>
Tensor
*
ExecutionContext
::
LegacyOutput
<
Tensor
>
(
const
std
::
string
&
name
)
const
{
return
LegacyOutput
<
LoDTensor
>
(
name
);
}
template
<
>
std
::
vector
<
Tensor
*>
ExecutionContext
::
MultiOutput
<
Tensor
>
(
const
std
::
string
&
name
)
const
{
...
...
paddle/fluid/framework/operator.h
浏览文件 @
847bb6a2
...
...
@@ -16,9 +16,11 @@ limitations under the License. */
#include <algorithm>
#include <atomic>
#include <memory>
#include <string>
#include <tuple>
#include <unordered_map>
#include <utility>
#include <vector>
#include "glog/logging.h" // For VLOG
...
...
@@ -253,31 +255,6 @@ class ExecutionContext {
return
it
->
second
;
}
const
std
::
vector
<
Variable
*>
LegacyMultiInputVar
(
const
std
::
string
&
name
)
const
{
auto
names
=
op_
.
Inputs
(
name
);
std
::
vector
<
Variable
*>
res
;
res
.
reserve
(
names
.
size
());
std
::
transform
(
names
.
begin
(),
names
.
end
(),
std
::
back_inserter
(
res
),
[
this
](
const
std
::
string
&
name
)
{
return
name
==
kEmptyVarName
?
nullptr
:
scope_
.
FindVar
(
name
);
});
return
res
;
}
std
::
vector
<
Variable
*>
LegacyMultiOutputVar
(
const
std
::
string
&
name
)
const
{
auto
names
=
op_
.
Outputs
(
name
);
std
::
vector
<
Variable
*>
res
;
res
.
reserve
(
names
.
size
());
std
::
transform
(
names
.
begin
(),
names
.
end
(),
std
::
back_inserter
(
res
),
[
this
](
const
std
::
string
&
name
)
{
return
name
==
kEmptyVarName
?
nullptr
:
scope_
.
FindVar
(
name
);
});
return
res
;
}
template
<
typename
T
>
const
T
*
Input
(
const
std
::
string
&
name
)
const
{
auto
*
var
=
InputVar
(
name
);
...
...
@@ -290,22 +267,6 @@ class ExecutionContext {
return
var
==
nullptr
?
nullptr
:
var
->
GetMutable
<
T
>
();
}
template
<
typename
T
>
const
T
*
LegacyInput
(
const
std
::
string
&
name
)
const
{
auto
*
var
=
LegacyInputVar
(
name
);
return
var
==
nullptr
?
nullptr
:
&
var
->
Get
<
T
>
();
}
template
<
typename
T
>
T
*
LegacyOutput
(
const
std
::
string
&
name
)
const
{
auto
var
=
LegacyOutputVar
(
name
);
return
var
==
nullptr
?
nullptr
:
var
->
GetMutable
<
T
>
();
}
const
Variable
*
LegacyInputVar
(
const
std
::
string
&
name
)
const
;
Variable
*
LegacyOutputVar
(
const
std
::
string
&
name
)
const
;
template
<
typename
T
>
const
std
::
vector
<
const
T
*>
MultiInput
(
const
std
::
string
&
name
)
const
{
auto
it
=
ctx_
.
inputs
.
find
(
name
);
...
...
@@ -338,32 +299,6 @@ class ExecutionContext {
return
res
;
}
template
<
typename
T
>
const
std
::
vector
<
const
T
*>
LegacyMultiInput
(
const
std
::
string
&
name
)
const
{
auto
names
=
op_
.
Inputs
(
name
);
std
::
vector
<
const
T
*>
res
;
res
.
reserve
(
names
.
size
());
std
::
transform
(
names
.
begin
(),
names
.
end
(),
std
::
back_inserter
(
res
),
[
&
](
const
std
::
string
&
sub_name
)
->
const
T
*
{
auto
var
=
scope_
.
FindVar
(
sub_name
);
return
var
==
nullptr
?
nullptr
:
&
var
->
Get
<
T
>
();
});
return
res
;
}
template
<
typename
T
>
std
::
vector
<
T
*>
LegacyMultiOutput
(
const
std
::
string
&
name
)
const
{
auto
names
=
op_
.
Outputs
(
name
);
std
::
vector
<
T
*>
res
;
res
.
reserve
(
names
.
size
());
std
::
transform
(
names
.
begin
(),
names
.
end
(),
std
::
back_inserter
(
res
),
[
&
](
const
std
::
string
&
sub_name
)
->
T
*
{
auto
var
=
scope_
.
FindVar
(
sub_name
);
return
var
==
nullptr
?
nullptr
:
var
->
GetMutable
<
T
>
();
});
return
res
;
}
platform
::
Place
GetPlace
()
const
{
return
device_context_
.
GetPlace
();
}
template
<
typename
DeviceContextType
>
...
...
@@ -436,24 +371,13 @@ class ExecutionContext {
template
<>
const
Tensor
*
ExecutionContext
::
Input
<
Tensor
>
(
const
std
::
string
&
name
)
const
;
template
<>
const
Tensor
*
ExecutionContext
::
LegacyInput
<
Tensor
>
(
const
std
::
string
&
name
)
const
;
template
<>
const
std
::
vector
<
const
Tensor
*>
ExecutionContext
::
MultiInput
<
Tensor
>
(
const
std
::
string
&
name
)
const
;
template
<>
const
std
::
vector
<
const
Tensor
*>
ExecutionContext
::
LegacyMultiInput
<
Tensor
>
(
const
std
::
string
&
name
)
const
;
template
<>
Tensor
*
ExecutionContext
::
Output
<
Tensor
>
(
const
std
::
string
&
name
)
const
;
template
<>
Tensor
*
ExecutionContext
::
LegacyOutput
<
Tensor
>
(
const
std
::
string
&
name
)
const
;
template
<>
std
::
vector
<
Tensor
*>
ExecutionContext
::
MultiOutput
<
Tensor
>
(
const
std
::
string
&
name
)
const
;
...
...
paddle/fluid/inference/api/analysis_predictor.cc
浏览文件 @
847bb6a2
...
...
@@ -183,6 +183,9 @@ void AnalysisPredictor::SetMkldnnThreadID(int tid) {
bool
AnalysisPredictor
::
Run
(
const
std
::
vector
<
PaddleTensor
>
&
inputs
,
std
::
vector
<
PaddleTensor
>
*
output_data
,
int
batch_size
)
{
if
(
UNLIKELY
(
config_
.
cpu_math_library_num_threads
()
>
1
))
{
paddle
::
platform
::
SetNumThreads
(
config_
.
cpu_math_library_num_threads
());
}
VLOG
(
3
)
<<
"Predictor::predict"
;
inference
::
Timer
timer
;
timer
.
tic
();
...
...
paddle/fluid/inference/api/api_impl.cc
浏览文件 @
847bb6a2
...
...
@@ -131,6 +131,9 @@ NativePaddlePredictor::~NativePaddlePredictor() {
bool
NativePaddlePredictor
::
Run
(
const
std
::
vector
<
PaddleTensor
>
&
inputs
,
std
::
vector
<
PaddleTensor
>
*
output_data
,
int
batch_size
)
{
if
(
UNLIKELY
(
config_
.
cpu_math_library_num_threads
()
>
1
))
{
paddle
::
platform
::
SetNumThreads
(
config_
.
cpu_math_library_num_threads
());
}
VLOG
(
3
)
<<
"Predictor::predict"
;
Timer
timer
;
timer
.
tic
();
...
...
paddle/fluid/inference/tests/api/analyzer_rnn1_tester.cc
浏览文件 @
847bb6a2
...
...
@@ -366,15 +366,17 @@ TEST(Analyzer_rnn1, ZeroCopyMultiThread) {
#define NEW_TENSOR(name__) \
auto name__##_tensor = predictor->GetInputTensor(#name__);
auto
base_predictor
=
CreatePaddlePredictor
<
AnalysisConfig
>
(
config
);
std
::
vector
<
std
::
unique_ptr
<
PaddlePredictor
>>
predictors
;
predictors
.
emplace_back
(
CreatePaddlePredictor
<
AnalysisConfig
>
(
config
));
for
(
int
tid
=
1
;
tid
<
FLAGS_num_threads
;
tid
++
)
{
predictors
.
emplace_back
(
predictors
.
front
()
->
Clone
());
}
double
total_time_of_threads
{
0
};
std
::
vector
<
std
::
thread
>
threads
;
for
(
int
tid
=
0
;
tid
<
FLAGS_num_threads
;
tid
++
)
{
threads
.
emplace_back
([
&
,
tid
]
{
// To ensure the thread binding correctly,
// please clone inside the threadpool.
auto
predictor
=
base_predictor
->
Clone
();
auto
&
predictor
=
predictors
[
tid
];
NEW_TENSOR
(
data_lod_attention
);
NEW_TENSOR
(
cell_init
);
NEW_TENSOR
(
data
);
...
...
paddle/fluid/inference/tests/api/analyzer_seq_pool1_tester.cc
浏览文件 @
847bb6a2
...
...
@@ -266,15 +266,17 @@ TEST(Analyzer_seq_pool1, zerocopy_profile_threads) {
SetConfig
(
&
config
);
config
.
SwitchUseFeedFetchOps
(
false
);
auto
base_predictor
=
CreatePaddlePredictor
<
AnalysisConfig
>
(
config
);
std
::
vector
<
std
::
unique_ptr
<
PaddlePredictor
>>
predictors
;
predictors
.
emplace_back
(
CreatePaddlePredictor
<
AnalysisConfig
>
(
config
));
for
(
int
tid
=
1
;
tid
<
FLAGS_num_threads
;
tid
++
)
{
predictors
.
emplace_back
(
predictors
.
front
()
->
Clone
());
}
double
total_time_of_threads
{
0
};
std
::
vector
<
std
::
thread
>
threads
;
for
(
int
tid
=
0
;
tid
<
FLAGS_num_threads
;
tid
++
)
{
threads
.
emplace_back
([
&
,
tid
]
{
// To ensure the thread binding correctly,
// please clone inside the threadpool.
auto
predictor
=
base_predictor
->
Clone
();
auto
&
predictor
=
predictors
[
tid
];
std
::
vector
<
std
::
unique_ptr
<
ZeroCopyTensor
>>
inputs
;
PrepareZeroCopyInputs
(
predictor
,
&
inputs
);
auto
output_tensor
=
predictor
->
GetOutputTensor
(
out_var_name
);
...
...
paddle/fluid/inference/tests/api/tester_helper.h
浏览文件 @
847bb6a2
...
...
@@ -17,8 +17,10 @@
#include <gtest/gtest.h>
#include <algorithm>
#include <memory>
#include <string>
#include <thread> // NOLINT
#include <unordered_map>
#include <vector>
#ifdef WITH_GPERFTOOLS
#include <gperftools/profiler.h>
...
...
@@ -252,7 +254,11 @@ void TestMultiThreadPrediction(
int
batch_size
=
FLAGS_batch_size
;
int
num_times
=
FLAGS_repeat
;
std
::
vector
<
std
::
thread
>
threads
;
auto
main_predictor
=
CreateTestPredictor
(
config
,
use_analysis
);
std
::
vector
<
std
::
unique_ptr
<
PaddlePredictor
>>
predictors
;
predictors
.
emplace_back
(
CreateTestPredictor
(
config
,
use_analysis
));
for
(
int
tid
=
1
;
tid
<
num_threads
;
tid
++
)
{
predictors
.
emplace_back
(
predictors
.
front
()
->
Clone
());
}
size_t
total_time
{
0
};
for
(
int
tid
=
0
;
tid
<
num_threads
;
++
tid
)
{
...
...
@@ -260,9 +266,7 @@ void TestMultiThreadPrediction(
// Each thread should have local inputs and outputs.
// The inputs of each thread are all the same.
std
::
vector
<
PaddleTensor
>
outputs_tid
;
// To ensure the thread binding correctly,
// please clone inside the threadpool.
auto
predictor
=
main_predictor
->
Clone
();
auto
&
predictor
=
predictors
[
tid
];
#ifdef PADDLE_WITH_MKLDNN
if
(
use_analysis
)
{
static_cast
<
AnalysisPredictor
*>
(
predictor
.
get
())
...
...
paddle/fluid/operators/detection/CMakeLists.txt
浏览文件 @
847bb6a2
...
...
@@ -33,6 +33,7 @@ detection_library(rpn_target_assign_op SRCS rpn_target_assign_op.cc)
detection_library
(
generate_proposal_labels_op SRCS generate_proposal_labels_op.cc
)
detection_library
(
box_clip_op SRCS box_clip_op.cc box_clip_op.cu
)
detection_library
(
yolov3_loss_op SRCS yolov3_loss_op.cc
)
detection_library
(
box_decoder_and_assign_op SRCS box_decoder_and_assign_op.cc box_decoder_and_assign_op.cu
)
if
(
WITH_GPU
)
detection_library
(
generate_proposals_op SRCS generate_proposals_op.cc generate_proposals_op.cu DEPS memory cub
)
...
...
paddle/fluid/operators/detection/box_decoder_and_assign_op.cc
0 → 100644
浏览文件 @
847bb6a2
/* Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/operators/detection/box_decoder_and_assign_op.h"
namespace
paddle
{
namespace
operators
{
using
LoDTensor
=
framework
::
LoDTensor
;
class
BoxDecoderAndAssignOp
:
public
framework
::
OperatorWithKernel
{
public:
using
framework
::
OperatorWithKernel
::
OperatorWithKernel
;
protected:
void
InferShape
(
framework
::
InferShapeContext
*
ctx
)
const
override
{
PADDLE_ENFORCE
(
ctx
->
HasInput
(
"PriorBox"
),
"Input(PriorBox) of BoxDecoderAndAssignOp should not be null."
);
PADDLE_ENFORCE
(
ctx
->
HasInput
(
"PriorBoxVar"
),
"Input(PriorBoxVar) of BoxDecoderAndAssignOp should not be null."
);
PADDLE_ENFORCE
(
ctx
->
HasInput
(
"TargetBox"
),
"Input(TargetBox) of BoxDecoderAndAssignOp should not be null."
);
PADDLE_ENFORCE
(
ctx
->
HasInput
(
"BoxScore"
),
"Input(BoxScore) of BoxDecoderAndAssignOp should not be null."
);
PADDLE_ENFORCE
(
ctx
->
HasOutput
(
"DecodeBox"
),
"Output(DecodeBox) of BoxDecoderAndAssignOp should not be null."
);
PADDLE_ENFORCE
(
ctx
->
HasOutput
(
"OutputAssignBox"
),
"Output(OutputAssignBox) of BoxDecoderAndAssignOp should not be null."
);
auto
prior_box_dims
=
ctx
->
GetInputDim
(
"PriorBox"
);
auto
prior_box_var_dims
=
ctx
->
GetInputDim
(
"PriorBoxVar"
);
auto
target_box_dims
=
ctx
->
GetInputDim
(
"TargetBox"
);
auto
box_score_dims
=
ctx
->
GetInputDim
(
"BoxScore"
);
PADDLE_ENFORCE_EQ
(
prior_box_dims
.
size
(),
2
,
"The rank of Input of PriorBox must be 2"
);
PADDLE_ENFORCE_EQ
(
prior_box_dims
[
1
],
4
,
"The shape of PriorBox is [N, 4]"
);
PADDLE_ENFORCE_EQ
(
prior_box_var_dims
.
size
(),
1
,
"The rank of Input of PriorBoxVar must be 1"
);
PADDLE_ENFORCE_EQ
(
prior_box_var_dims
[
0
],
4
,
"The shape of PriorBoxVar is [4]"
);
PADDLE_ENFORCE_EQ
(
target_box_dims
.
size
(),
2
,
"The rank of Input of TargetBox must be 2"
);
PADDLE_ENFORCE_EQ
(
box_score_dims
.
size
(),
2
,
"The rank of Input of BoxScore must be 2"
);
PADDLE_ENFORCE_EQ
(
prior_box_dims
[
0
],
target_box_dims
[
0
],
"The first dim of prior_box and target_box is roi nums "
"and should be same!"
);
PADDLE_ENFORCE_EQ
(
prior_box_dims
[
0
],
box_score_dims
[
0
],
"The first dim of prior_box and box_score is roi nums "
"and should be same!"
);
PADDLE_ENFORCE_EQ
(
target_box_dims
[
1
],
box_score_dims
[
1
]
*
prior_box_dims
[
1
],
"The shape of target_box is [N, classnum * 4], The shape "
"of box_score is [N, classnum], The shape of prior_box "
"is [N, 4]"
);
ctx
->
SetOutputDim
(
"DecodeBox"
,
framework
::
make_ddim
({
target_box_dims
[
0
],
target_box_dims
[
1
]}));
ctx
->
ShareLoD
(
"TargetBox"
,
/*->*/
"DecodeBox"
);
ctx
->
SetOutputDim
(
"OutputAssignBox"
,
framework
::
make_ddim
({
prior_box_dims
[
0
],
prior_box_dims
[
1
]}));
ctx
->
ShareLoD
(
"PriorBox"
,
/*->*/
"OutputAssignBox"
);
}
};
class
BoxDecoderAndAssignOpMaker
:
public
framework
::
OpProtoAndCheckerMaker
{
public:
void
Make
()
override
{
AddInput
(
"PriorBox"
,
"(Tensor, default Tensor<float>) "
"Box list PriorBox is a 2-D Tensor with shape [N, 4] which holds N "
"boxes and each box is represented as [xmin, ymin, xmax, ymax], "
"[xmin, ymin] is the left top coordinate of the anchor box, "
"if the input is image feature map, they are close to the origin "
"of the coordinate system. [xmax, ymax] is the right bottom "
"coordinate of the anchor box."
);
AddInput
(
"PriorBoxVar"
,
"(Tensor, default Tensor<float>, optional) "
"PriorBoxVar is a 2-D Tensor with shape [N, 4] which holds N "
"group of variance. PriorBoxVar will set all elements to 1 by "
"default."
)
.
AsDispensable
();
AddInput
(
"TargetBox"
,
"(LoDTensor or Tensor) "
"This input can be a 2-D LoDTensor with shape "
"[N, classnum*4]. It holds N targets for N boxes."
);
AddInput
(
"BoxScore"
,
"(LoDTensor or Tensor) "
"This input can be a 2-D LoDTensor with shape "
"[N, classnum], each box is represented as [classnum] which is "
"the classification probabilities."
);
AddAttr
<
float
>
(
"box_clip"
,
"(float, default 4.135, np.log(1000. / 16.)) "
"clip box to prevent overflowing"
)
.
SetDefault
(
4.135
f
);
AddOutput
(
"DecodeBox"
,
"(LoDTensor or Tensor) "
"the output tensor of op with shape [N, classnum * 4] "
"representing the result of N target boxes decoded with "
"M Prior boxes and variances for each class."
);
AddOutput
(
"OutputAssignBox"
,
"(LoDTensor or Tensor) "
"the output tensor of op with shape [N, 4] "
"representing the result of N target boxes decoded with "
"M Prior boxes and variances with the best non-background class "
"by BoxScore."
);
AddComment
(
R"DOC(
Bounding Box Coder.
Decode the target bounding box with the prior_box information.
The Decoding schema is described below:
$$
ox = (pw \\times pxv \\times tx + px) - \\frac{tw}{2}
$$
$$
oy = (ph \\times pyv \\times ty + py) - \\frac{th}{2}
$$
$$
ow = \\exp (pwv \\times tw) \\times pw + \\frac{tw}{2}
$$
$$
oh = \\exp (phv \\times th) \\times ph + \\frac{th}{2}
$$
where `tx`, `ty`, `tw`, `th` denote the target box's center coordinates, width
and height respectively. Similarly, `px`, `py`, `pw`, `ph` denote the
prior_box's (anchor) center coordinates, width and height. `pxv`, `pyv`, `pwv`,
`phv` denote the variance of the prior_box and `ox`, `oy`, `ow`, `oh` denote the
decoded coordinates, width and height in decode_box.
decode_box is obtained after box decode, then assigning schema is described below:
For each prior_box, use the best non-background class's decoded values to
update the prior_box locations and get output_assign_box. So, the shape of
output_assign_box is the same as PriorBox.
)DOC"
);
}
};
}
// namespace operators
}
// namespace paddle
namespace
ops
=
paddle
::
operators
;
REGISTER_OPERATOR
(
box_decoder_and_assign
,
ops
::
BoxDecoderAndAssignOp
,
ops
::
BoxDecoderAndAssignOpMaker
,
paddle
::
framework
::
EmptyGradOpMaker
);
REGISTER_OP_CPU_KERNEL
(
box_decoder_and_assign
,
ops
::
BoxDecoderAndAssignKernel
<
paddle
::
platform
::
CPUDeviceContext
,
float
>
,
ops
::
BoxDecoderAndAssignKernel
<
paddle
::
platform
::
CPUDeviceContext
,
double
>
);
paddle/fluid/operators/detection/box_decoder_and_assign_op.cu
0 → 100644
浏览文件 @
847bb6a2
/* Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/memory/memcpy.h"
#include "paddle/fluid/operators/detection/box_decoder_and_assign_op.h"
#include "paddle/fluid/platform/cuda_primitives.h"
namespace
paddle
{
namespace
operators
{
template
<
typename
T
>
__global__
void
DecodeBoxKernel
(
const
T
*
prior_box_data
,
const
T
*
prior_box_var_data
,
const
T
*
target_box_data
,
const
int
roi_num
,
const
int
class_num
,
const
T
box_clip
,
T
*
output_box_data
)
{
const
int
idx
=
threadIdx
.
x
+
blockIdx
.
x
*
blockDim
.
x
;
if
(
idx
<
roi_num
*
class_num
)
{
int
i
=
idx
/
class_num
;
int
j
=
idx
%
class_num
;
T
prior_box_width
=
prior_box_data
[
i
*
4
+
2
]
-
prior_box_data
[
i
*
4
]
+
1
;
T
prior_box_height
=
prior_box_data
[
i
*
4
+
3
]
-
prior_box_data
[
i
*
4
+
1
]
+
1
;
T
prior_box_center_x
=
prior_box_data
[
i
*
4
]
+
prior_box_width
/
2
;
T
prior_box_center_y
=
prior_box_data
[
i
*
4
+
1
]
+
prior_box_height
/
2
;
int
offset
=
i
*
class_num
*
4
+
j
*
4
;
T
dw
=
prior_box_var_data
[
2
]
*
target_box_data
[
offset
+
2
];
T
dh
=
prior_box_var_data
[
3
]
*
target_box_data
[
offset
+
3
];
if
(
dw
>
box_clip
)
{
dw
=
box_clip
;
}
if
(
dh
>
box_clip
)
{
dh
=
box_clip
;
}
T
target_box_center_x
=
0
,
target_box_center_y
=
0
;
T
target_box_width
=
0
,
target_box_height
=
0
;
target_box_center_x
=
prior_box_var_data
[
0
]
*
target_box_data
[
offset
]
*
prior_box_width
+
prior_box_center_x
;
target_box_center_y
=
prior_box_var_data
[
1
]
*
target_box_data
[
offset
+
1
]
*
prior_box_height
+
prior_box_center_y
;
target_box_width
=
expf
(
dw
)
*
prior_box_width
;
target_box_height
=
expf
(
dh
)
*
prior_box_height
;
output_box_data
[
offset
]
=
target_box_center_x
-
target_box_width
/
2
;
output_box_data
[
offset
+
1
]
=
target_box_center_y
-
target_box_height
/
2
;
output_box_data
[
offset
+
2
]
=
target_box_center_x
+
target_box_width
/
2
-
1
;
output_box_data
[
offset
+
3
]
=
target_box_center_y
+
target_box_height
/
2
-
1
;
}
}
template
<
typename
T
>
__global__
void
AssignBoxKernel
(
const
T
*
prior_box_data
,
const
T
*
box_score_data
,
T
*
output_box_data
,
const
int
roi_num
,
const
int
class_num
,
T
*
output_assign_box_data
)
{
const
int
idx
=
threadIdx
.
x
+
blockIdx
.
x
*
blockDim
.
x
;
if
(
idx
<
roi_num
)
{
int
i
=
idx
;
T
max_score
=
-
1
;
int
max_j
=
-
1
;
for
(
int
j
=
0
;
j
<
class_num
;
++
j
)
{
T
score
=
box_score_data
[
i
*
class_num
+
j
];
if
(
score
>
max_score
&&
j
>
0
)
{
max_score
=
score
;
max_j
=
j
;
}
}
if
(
max_j
>
0
)
{
for
(
int
pno
=
0
;
pno
<
4
;
pno
++
)
{
output_assign_box_data
[
i
*
4
+
pno
]
=
output_box_data
[
i
*
class_num
*
4
+
max_j
*
4
+
pno
];
}
}
else
{
for
(
int
pno
=
0
;
pno
<
4
;
pno
++
)
{
output_assign_box_data
[
i
*
4
+
pno
]
=
prior_box_data
[
i
*
4
+
pno
];
}
}
}
}
template
<
typename
DeviceContext
,
typename
T
>
class
BoxDecoderAndAssignCUDAKernel
:
public
framework
::
OpKernel
<
T
>
{
public:
void
Compute
(
const
framework
::
ExecutionContext
&
context
)
const
override
{
PADDLE_ENFORCE
(
platform
::
is_gpu_place
(
context
.
GetPlace
()),
"This kernel only runs on GPU device."
);
auto
*
prior_box
=
context
.
Input
<
framework
::
LoDTensor
>
(
"PriorBox"
);
auto
*
prior_box_var
=
context
.
Input
<
framework
::
Tensor
>
(
"PriorBoxVar"
);
auto
*
target_box
=
context
.
Input
<
framework
::
LoDTensor
>
(
"TargetBox"
);
auto
*
box_score
=
context
.
Input
<
framework
::
LoDTensor
>
(
"BoxScore"
);
auto
*
output_box
=
context
.
Output
<
framework
::
Tensor
>
(
"DecodeBox"
);
auto
*
output_assign_box
=
context
.
Output
<
framework
::
Tensor
>
(
"OutputAssignBox"
);
auto
roi_num
=
target_box
->
dims
()[
0
];
auto
class_num
=
box_score
->
dims
()[
1
];
auto
*
target_box_data
=
target_box
->
data
<
T
>
();
auto
*
prior_box_data
=
prior_box
->
data
<
T
>
();
auto
*
prior_box_var_data
=
prior_box_var
->
data
<
T
>
();
auto
*
box_score_data
=
box_score
->
data
<
T
>
();
output_box
->
mutable_data
<
T
>
({
roi_num
,
class_num
*
4
},
context
.
GetPlace
());
output_assign_box
->
mutable_data
<
T
>
({
roi_num
,
4
},
context
.
GetPlace
());
T
*
output_box_data
=
output_box
->
data
<
T
>
();
T
*
output_assign_box_data
=
output_assign_box
->
data
<
T
>
();
int
block
=
512
;
int
grid
=
(
roi_num
*
class_num
+
block
-
1
)
/
block
;
auto
&
device_ctx
=
context
.
cuda_device_context
();
const
T
box_clip
=
context
.
Attr
<
T
>
(
"box_clip"
);
DecodeBoxKernel
<
T
><<<
grid
,
block
,
0
,
device_ctx
.
stream
()
>>>
(
prior_box_data
,
prior_box_var_data
,
target_box_data
,
roi_num
,
class_num
,
box_clip
,
output_box_data
);
context
.
device_context
().
Wait
();
int
assign_grid
=
(
roi_num
+
block
-
1
)
/
block
;
AssignBoxKernel
<
T
><<<
assign_grid
,
block
,
0
,
device_ctx
.
stream
()
>>>
(
prior_box_data
,
box_score_data
,
output_box_data
,
roi_num
,
class_num
,
output_assign_box_data
);
context
.
device_context
().
Wait
();
}
};
}
// namespace operators
}
// namespace paddle
namespace
ops
=
paddle
::
operators
;
REGISTER_OP_CUDA_KERNEL
(
box_decoder_and_assign
,
ops
::
BoxDecoderAndAssignCUDAKernel
<
paddle
::
platform
::
CUDADeviceContext
,
float
>
,
ops
::
BoxDecoderAndAssignCUDAKernel
<
paddle
::
platform
::
CUDADeviceContext
,
double
>
);
paddle/fluid/operators/detection/box_decoder_and_assign_op.h
0 → 100644
浏览文件 @
847bb6a2
/* Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include <algorithm>
#include <string>
#include <vector>
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/operators/math/math_function.h"
namespace
paddle
{
namespace
operators
{
template
<
typename
DeviceContext
,
typename
T
>
class
BoxDecoderAndAssignKernel
:
public
framework
::
OpKernel
<
T
>
{
public:
void
Compute
(
const
framework
::
ExecutionContext
&
context
)
const
override
{
auto
*
prior_box
=
context
.
Input
<
framework
::
LoDTensor
>
(
"PriorBox"
);
auto
*
prior_box_var
=
context
.
Input
<
framework
::
Tensor
>
(
"PriorBoxVar"
);
auto
*
target_box
=
context
.
Input
<
framework
::
LoDTensor
>
(
"TargetBox"
);
auto
*
box_score
=
context
.
Input
<
framework
::
LoDTensor
>
(
"BoxScore"
);
auto
*
output_box
=
context
.
Output
<
framework
::
Tensor
>
(
"DecodeBox"
);
auto
*
output_assign_box
=
context
.
Output
<
framework
::
Tensor
>
(
"OutputAssignBox"
);
int
roi_num
=
target_box
->
dims
()[
0
];
int
class_num
=
box_score
->
dims
()[
1
];
auto
*
target_box_data
=
target_box
->
data
<
T
>
();
auto
*
prior_box_data
=
prior_box
->
data
<
T
>
();
auto
*
prior_box_var_data
=
prior_box_var
->
data
<
T
>
();
auto
*
box_score_data
=
box_score
->
data
<
T
>
();
output_box
->
mutable_data
<
T
>
({
roi_num
,
class_num
*
4
},
context
.
GetPlace
());
output_assign_box
->
mutable_data
<
T
>
({
roi_num
,
4
},
context
.
GetPlace
());
T
*
output_box_data
=
output_box
->
data
<
T
>
();
T
*
output_assign_box_data
=
output_assign_box
->
data
<
T
>
();
const
T
bbox_clip
=
context
.
Attr
<
T
>
(
"box_clip"
);
for
(
int
i
=
0
;
i
<
roi_num
;
++
i
)
{
T
prior_box_width
=
prior_box_data
[
i
*
4
+
2
]
-
prior_box_data
[
i
*
4
]
+
1
;
T
prior_box_height
=
prior_box_data
[
i
*
4
+
3
]
-
prior_box_data
[
i
*
4
+
1
]
+
1
;
T
prior_box_center_x
=
prior_box_data
[
i
*
4
]
+
prior_box_width
/
2
;
T
prior_box_center_y
=
prior_box_data
[
i
*
4
+
1
]
+
prior_box_height
/
2
;
for
(
int
j
=
0
;
j
<
class_num
;
++
j
)
{
int64_t
offset
=
i
*
class_num
*
4
+
j
*
4
;
T
dw
=
std
::
min
(
prior_box_var_data
[
2
]
*
target_box_data
[
offset
+
2
],
bbox_clip
);
T
dh
=
std
::
min
(
prior_box_var_data
[
3
]
*
target_box_data
[
offset
+
3
],
bbox_clip
);
T
target_box_center_x
=
0
,
target_box_center_y
=
0
;
T
target_box_width
=
0
,
target_box_height
=
0
;
target_box_center_x
=
prior_box_var_data
[
0
]
*
target_box_data
[
offset
]
*
prior_box_width
+
prior_box_center_x
;
target_box_center_y
=
prior_box_var_data
[
1
]
*
target_box_data
[
offset
+
1
]
*
prior_box_height
+
prior_box_center_y
;
target_box_width
=
std
::
exp
(
dw
)
*
prior_box_width
;
target_box_height
=
std
::
exp
(
dh
)
*
prior_box_height
;
output_box_data
[
offset
]
=
target_box_center_x
-
target_box_width
/
2
;
output_box_data
[
offset
+
1
]
=
target_box_center_y
-
target_box_height
/
2
;
output_box_data
[
offset
+
2
]
=
target_box_center_x
+
target_box_width
/
2
-
1
;
output_box_data
[
offset
+
3
]
=
target_box_center_y
+
target_box_height
/
2
-
1
;
}
T
max_score
=
-
1
;
int
max_j
=
-
1
;
for
(
int
j
=
0
;
j
<
class_num
;
++
j
)
{
T
score
=
box_score_data
[
i
*
class_num
+
j
];
if
(
score
>
max_score
&&
j
>
0
)
{
max_score
=
score
;
max_j
=
j
;
}
}
if
(
max_j
>
0
)
{
for
(
int
pno
=
0
;
pno
<
4
;
pno
++
)
{
output_assign_box_data
[
i
*
4
+
pno
]
=
output_box_data
[
i
*
class_num
*
4
+
max_j
*
4
+
pno
];
}
}
else
{
for
(
int
pno
=
0
;
pno
<
4
;
pno
++
)
{
output_assign_box_data
[
i
*
4
+
pno
]
=
prior_box_data
[
i
*
4
+
pno
];
}
}
}
}
};
}
// namespace operators
}
// namespace paddle
paddle/fluid/operators/fused/fused_embedding_seq_pool_op.h
浏览文件 @
847bb6a2
...
...
@@ -22,7 +22,6 @@ limitations under the License. */
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/framework/selected_rows.h"
#include "paddle/fluid/operators/jit/kernels.h"
#include "paddle/fluid/operators/math/blas.h"
namespace
paddle
{
namespace
operators
{
...
...
@@ -47,7 +46,7 @@ struct EmbeddingVSumFunctor {
auto
*
output
=
output_t
->
mutable_data
<
T
>
(
context
.
GetPlace
());
PADDLE_ENFORCE_LE
(
table_width
*
idx_width
,
out_width
);
PADDLE_ENFORCE_GT
(
ids_lod
.
size
(),
1UL
);
PADDLE_ENFORCE_GT
(
ids_lod
.
size
(),
1UL
,
"The LoD[0] could NOT be empty"
);
jit
::
emb_seq_pool_attr_t
attr
(
table_height
,
table_width
,
0
,
idx_width
,
out_width
,
jit
::
SeqPoolType
::
kSum
);
...
...
@@ -83,11 +82,11 @@ class FusedEmbeddingSeqPoolKernel : public framework::OpKernel<T> {
FusedEmbeddingSeqPoolLastDim
(
table_var
->
dims
(),
ids_t
->
dims
());
const
auto
&
ids_lod
=
ids_t
->
lod
();
// in run time, the LoD of ids must be 1
PADDLE_ENFORCE
(
ids_lod
.
size
(),
1
u
,
"The LoD level of Input(Ids) must be 1"
);
PADDLE_ENFORCE_GE
(
ids_lod
[
0
].
size
(),
1u
,
"The LoD could NOT be empty
"
);
PADDLE_ENFORCE
(
ids_lod
.
size
(),
1
UL
,
"The LoD level of Input(Ids) must be 1
"
);
int64_t
batch_size
=
ids_lod
[
0
].
size
()
-
1
;
// in run time, the shape from Ids -> output
// should be [seq_length, 1] -> [batch_size,
embedding_size
]
// should be [seq_length, 1] -> [batch_size,
last_dim
]
output_t
->
Resize
({
batch_size
,
last_dim
});
if
(
combiner_type
==
"sum"
)
{
...
...
@@ -125,7 +124,7 @@ class FusedEmbeddingSeqPoolGradKernel : public framework::OpKernel<T> {
auto
*
ids_data
=
ids
->
data
<
int64_t
>
();
int64_t
ids_num
=
ids
->
numel
();
auto
lod
=
ids
->
lod
()[
0
];
int64_t
row
_width
=
d_output
->
dims
()[
1
];
int64_t
out
_width
=
d_output
->
dims
()[
1
];
framework
::
Vector
<
int64_t
>
*
new_rows
=
d_table
->
mutable_rows
();
new_rows
->
resize
(
ids_num
);
...
...
@@ -136,15 +135,13 @@ class FusedEmbeddingSeqPoolGradKernel : public framework::OpKernel<T> {
T
*
d_table_data
=
d_table_value
->
mutable_data
<
T
>
(
context
.
GetPlace
());
const
T
*
d_output_data
=
d_output
->
data
<
T
>
();
auto
blas
=
math
::
GetBlas
<
platform
::
CPUDeviceContext
,
T
>
(
context
);
auto
vbroadcast
=
jit
::
Get
<
jit
::
kVBroadcast
,
jit
::
VBroadcastTuples
<
T
>
,
platform
::
CPUPlace
>
(
out_width
);
for
(
int
i
=
0
;
i
<
static_cast
<
int
>
(
lod
.
size
())
-
1
;
++
i
)
{
int64_t
h
=
static_cast
<
int64_t
>
(
lod
[
i
+
1
]
-
lod
[
i
]);
int64_t
in_offset
=
lod
[
i
]
*
row_width
;
const
T
*
out_pos
=
d_output_data
+
i
*
row_width
;
T
*
in_pos
=
d_table_data
+
in_offset
;
for
(
int
r
=
0
;
r
!=
h
;
++
r
)
{
blas
.
VCOPY
(
row_width
,
out_pos
,
in_pos
+
r
*
row_width
);
}
const
T
*
src
=
d_output_data
+
i
*
out_width
;
T
*
dst
=
d_table_data
+
lod
[
i
]
*
out_width
;
vbroadcast
(
src
,
dst
,
h
,
out_width
);
}
}
else
{
LOG
(
ERROR
)
<<
"Dense is not supported in fused_embedding_seq_pool_op now"
;
...
...
paddle/fluid/operators/jit/benchmark.cc
浏览文件 @
847bb6a2
...
...
@@ -474,6 +474,23 @@ void BenchCRFDecodingKernel() {
}
}
template
<
jit
::
KernelType
KT
,
typename
T
,
typename
PlaceType
>
void
BenchVBroadcastKernel
()
{
for
(
int64_t
w
:
{
1
,
16
,
64
,
100
,
256
})
{
Tensor
x
;
x
.
Resize
({
w
});
RandomVec
<
T
>
(
w
,
x
.
mutable_data
<
T
>
(
PlaceType
()));
const
T
*
x_data
=
x
.
data
<
T
>
();
for
(
int
h
:
TestSizes
())
{
Tensor
y
;
y
.
Resize
({
h
*
w
});
T
*
y_data
=
y
.
mutable_data
<
T
>
(
PlaceType
());
BenchAllImpls
<
KT
,
jit
::
VBroadcastTuples
<
T
>
,
PlaceType
>
(
w
,
x_data
,
y_data
,
static_cast
<
int64_t
>
(
h
),
w
);
}
}
}
using
T
=
float
;
using
CPUPlace
=
paddle
::
platform
::
CPUPlace
;
...
...
@@ -498,6 +515,7 @@ BENCH_FP32_CPU(kVSquare) { BenchXYNKernel<jit::kVSquare, T, CPUPlace>(); }
BENCH_FP32_CPU
(
kVExp
)
{
BenchXYNKernel
<
jit
::
kVExp
,
T
,
CPUPlace
>
();
}
BENCH_FP32_CPU
(
kVSigmoid
)
{
BenchXYNKernel
<
jit
::
kVSigmoid
,
T
,
CPUPlace
>
();
}
BENCH_FP32_CPU
(
kVTanh
)
{
BenchXYNKernel
<
jit
::
kVTanh
,
T
,
CPUPlace
>
();
}
BENCH_FP32_CPU
(
kVCopy
)
{
BenchXYNKernel
<
jit
::
kVCopy
,
T
,
CPUPlace
>
();
}
// lstm and peephole
BENCH_FP32_CPU
(
kLSTMCtHt
)
{
BenchLSTMKernel
<
jit
::
kLSTMCtHt
,
T
,
CPUPlace
>
();
}
...
...
@@ -535,6 +553,11 @@ BENCH_FP32_CPU(kCRFDecoding) {
BenchCRFDecodingKernel
<
jit
::
kCRFDecoding
,
T
,
CPUPlace
>
();
}
// vbroadcast function
BENCH_FP32_CPU
(
kVBroadcast
)
{
BenchVBroadcastKernel
<
jit
::
kVBroadcast
,
T
,
CPUPlace
>
();
}
// Benchmark all jit kernels including jitcode, mkl and refer.
// To use this tool, run command: ./benchmark [options...]
// Options:
...
...
paddle/fluid/operators/jit/gen/CMakeLists.txt
浏览文件 @
847bb6a2
...
...
@@ -33,3 +33,4 @@ USE_JITKERNEL_GEN(kHMax)
USE_JITKERNEL_GEN
(
kHSum
)
USE_JITKERNEL_GEN
(
kEmbSeqPool
)
USE_JITKERNEL_GEN
(
kSgd
)
USE_JITKERNEL_GEN
(
kVBroadcast
)
paddle/fluid/operators/jit/gen/vbroadcast.cc
0 → 100644
浏览文件 @
847bb6a2
/* Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License. */
#include "paddle/fluid/operators/jit/gen/vbroadcast.h"
#include <memory>
#include <vector>
#include "paddle/fluid/operators/jit/registry.h"
#include "paddle/fluid/platform/cpu_info.h"
namespace
paddle
{
namespace
operators
{
namespace
jit
{
namespace
gen
{
void
VBroadcastJitCode
::
genCode
()
{
preCode
();
constexpr
int
block
=
YMM_FLOAT_BLOCK
;
constexpr
int
max_num_regs
=
16
;
const
int
num_block
=
w_
/
block
;
const
int
num_groups
=
num_block
/
max_num_regs
;
const
size_t
block_size
=
sizeof
(
float
)
*
block
;
std
::
vector
<
int
>
groups
(
num_groups
,
max_num_regs
);
int
rest_num_regs
=
num_block
%
max_num_regs
;
if
(
rest_num_regs
>
0
)
{
groups
.
push_back
(
rest_num_regs
);
}
// protect param_h
mov
(
reg_height
,
param_h
);
Label
l_next_h
;
xor_
(
reg_h_i
,
reg_h_i
);
mov
(
reg_ptr_dst_i
,
param_dst
);
L
(
l_next_h
);
{
mov
(
reg_ptr_src_i
,
param_src
);
for
(
int
num_regs
:
groups
)
{
size_t
w_offset
=
0
;
for
(
int
reg_i
=
0
;
reg_i
<
num_regs
;
++
reg_i
)
{
vmovups
(
ymm_t
(
reg_i
),
ptr
[
reg_ptr_src_i
+
w_offset
]);
w_offset
+=
block_size
;
}
add
(
reg_ptr_src_i
,
num_regs
*
block_size
);
w_offset
=
0
;
for
(
int
reg_i
=
0
;
reg_i
<
num_regs
;
++
reg_i
)
{
vmovups
(
ptr
[
reg_ptr_dst_i
+
w_offset
],
ymm_t
(
reg_i
));
w_offset
+=
block_size
;
}
add
(
reg_ptr_dst_i
,
num_regs
*
block_size
);
}
// end of groups
inc
(
reg_h_i
);
cmp
(
reg_h_i
,
reg_height
);
jl
(
l_next_h
,
T_NEAR
);
}
// end of l_next_h
postCode
();
}
class
VBroadcastCreator
:
public
JitCodeCreator
<
int64_t
>
{
public:
bool
UseMe
(
const
int64_t
&
w
)
const
override
{
return
platform
::
MayIUse
(
platform
::
avx
)
&&
w
%
YMM_FLOAT_BLOCK
==
0
;
}
size_t
CodeSize
(
const
int64_t
&
w
)
const
override
{
return
96
+
(
w
/
YMM_FLOAT_BLOCK
)
*
16
*
8
;
}
std
::
unique_ptr
<
GenBase
>
CreateJitCode
(
const
int64_t
&
w
)
const
override
{
PADDLE_ENFORCE_GT
(
w
,
0
);
return
make_unique
<
VBroadcastJitCode
>
(
w
,
CodeSize
(
w
));
}
};
}
// namespace gen
}
// namespace jit
}
// namespace operators
}
// namespace paddle
namespace
gen
=
paddle
::
operators
::
jit
::
gen
;
REGISTER_JITKERNEL_GEN
(
kVBroadcast
,
gen
::
VBroadcastCreator
);
paddle/fluid/operators/jit/gen/vbroadcast.h
0 → 100644
浏览文件 @
847bb6a2
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License. */
#pragma once
#include <string>
#include "glog/logging.h"
#include "paddle/fluid/operators/jit/gen/jitcode.h"
namespace
paddle
{
namespace
operators
{
namespace
jit
{
namespace
gen
{
class
VBroadcastJitCode
:
public
JitCode
{
public:
explicit
VBroadcastJitCode
(
const
int64_t
&
w
,
size_t
code_size
=
256
*
1024
,
void
*
code_ptr
=
nullptr
)
:
JitCode
(
code_size
,
code_ptr
),
w_
(
w
)
{
this
->
genCode
();
}
DECLARE_JIT_CODE
(
VBroadcastJitCode
);
void
genCode
()
override
;
private:
int
w_
;
reg64_t
param_src
{
abi_param1
};
reg64_t
param_dst
{
abi_param2
};
reg64_t
param_h
{
abi_param3
};
reg64_t
param_w
{
abi_param4
};
reg64_t
reg_height
{
r9
};
reg64_t
reg_h_i
{
r10
};
reg64_t
reg_ptr_src_i
{
r11
};
reg64_t
reg_ptr_dst_i
{
r12
};
};
}
// namespace gen
}
// namespace jit
}
// namespace operators
}
// namespace paddle
paddle/fluid/operators/jit/helper.cc
浏览文件 @
847bb6a2
...
...
@@ -36,6 +36,8 @@ const char* to_string(KernelType kt) {
ONE_CASE
(
kVScal
);
ONE_CASE
(
kVAddBias
);
ONE_CASE
(
kVRelu
);
ONE_CASE
(
kVBroadcast
);
ONE_CASE
(
kVCopy
);
ONE_CASE
(
kVIdentity
);
ONE_CASE
(
kVExp
);
ONE_CASE
(
kVSquare
);
...
...
paddle/fluid/operators/jit/kernel_base.h
浏览文件 @
847bb6a2
...
...
@@ -41,6 +41,8 @@ typedef enum {
kVAdd
,
kVAddBias
,
kVAddRelu
,
kVBroadcast
,
kVCopy
,
kVExp
,
kVIdentity
,
kVMul
,
...
...
@@ -133,6 +135,13 @@ struct GRUTuples {
typedef
void
(
*
func_type
)(
gru_t
*
,
const
gru_attr_t
*
);
};
template
<
typename
T
>
struct
VBroadcastTuples
{
typedef
T
data_type
;
typedef
int64_t
attr_type
;
typedef
void
(
*
func_type
)(
const
T
*
,
T
*
,
int64_t
,
int64_t
);
};
typedef
struct
seq_pool_attr_s
{
int
h
,
w
;
// h should always be the first one
SeqPoolType
type
;
...
...
paddle/fluid/operators/jit/kernel_key.cc
浏览文件 @
847bb6a2
...
...
@@ -24,6 +24,11 @@ size_t JitCodeKey<int>(const int& d) {
return
d
;
}
template
<
>
size_t
JitCodeKey
<
int64_t
>
(
const
int64_t
&
d
)
{
return
d
;
}
// TODO(TJ): refine and benchmark JitCodeKey generatation
constexpr
int
act_type_shift
=
3
;
// suppot 2^3 act types
static
inline
int
act_type_convert
(
KernelType
type
)
{
...
...
paddle/fluid/operators/jit/more/mkl/CMakeLists.txt
浏览文件 @
847bb6a2
...
...
@@ -9,9 +9,11 @@ USE_JITKERNEL_MORE(kVAdd, mkl)
USE_JITKERNEL_MORE
(
kVScal, mkl
)
USE_JITKERNEL_MORE
(
kVExp, mkl
)
USE_JITKERNEL_MORE
(
kVSquare, mkl
)
USE_JITKERNEL_MORE
(
kVCopy, mkl
)
USE_JITKERNEL_MORE
(
kVSigmoid, mkl
)
USE_JITKERNEL_MORE
(
kVTanh, mkl
)
USE_JITKERNEL_MORE
(
kSeqPool, mkl
)
USE_JITKERNEL_MORE
(
kSoftmax, mkl
)
USE_JITKERNEL_MORE
(
kEmbSeqPool, mkl
)
USE_JITKERNEL_MORE
(
kSgd, mkl
)
USE_JITKERNEL_MORE
(
kVBroadcast, mkl
)
paddle/fluid/operators/jit/more/mkl/mkl.cc
浏览文件 @
847bb6a2
...
...
@@ -154,6 +154,21 @@ bool VSquareKernel<float>::UseMe(const int& d) const {
return
d
>
7
;
}
template
<
>
bool
VCopyKernel
<
float
>::
UseMe
(
const
int
&
d
)
const
{
return
d
>
15
;
}
template
<
>
bool
VBroadcastKernel
<
float
>::
UseMe
(
const
int64_t
&
d
)
const
{
return
d
>
127
;
}
template
<
>
bool
VBroadcastKernel
<
double
>::
UseMe
(
const
int64_t
&
attr
)
const
{
return
true
;
}
template
<
>
bool
VSigmoidKernel
<
float
>::
UseMe
(
const
int
&
d
)
const
{
return
d
>
7
;
...
...
@@ -223,6 +238,7 @@ AWALYS_USE_ME_WITH_DOUBLE(VExp);
AWALYS_USE_ME_WITH_DOUBLE
(
VSigmoid
);
AWALYS_USE_ME_WITH_DOUBLE
(
VTanh
);
AWALYS_USE_ME_WITH_DOUBLE
(
VSquare
);
AWALYS_USE_ME_WITH_DOUBLE
(
VCopy
);
AWALYS_USE_ME_WITH_DOUBLE
(
Softmax
);
#undef AWALYS_USE_ME_WITH_DOUBLE
...
...
@@ -244,6 +260,8 @@ REGISTER_MKL_KERNEL(kVAdd, VAdd);
REGISTER_MKL_KERNEL
(
kVScal
,
VScal
);
REGISTER_MKL_KERNEL
(
kVExp
,
VExp
);
REGISTER_MKL_KERNEL
(
kVSquare
,
VSquare
);
REGISTER_MKL_KERNEL
(
kVCopy
,
VCopy
);
REGISTER_MKL_KERNEL
(
kVBroadcast
,
VBroadcast
);
REGISTER_MKL_KERNEL
(
kVSigmoid
,
VSigmoid
);
REGISTER_MKL_KERNEL
(
kVTanh
,
VTanh
);
REGISTER_MKL_KERNEL
(
kSeqPool
,
SeqPool
);
...
...
paddle/fluid/operators/jit/more/mkl/mkl.h
浏览文件 @
847bb6a2
...
...
@@ -50,6 +50,13 @@ void VCopy(const T* x, T* y, int n);
template
<
typename
T
>
void
VAXPY
(
T
a
,
const
T
*
x
,
T
*
y
,
int
n
);
template
<
typename
T
>
void
VBroadcast
(
const
T
*
x
,
T
*
y
,
int64_t
y_h
,
int64_t
x_len
)
{
for
(
int64_t
h
=
0
;
h
<
y_h
;
++
h
)
{
VCopy
(
x
,
y
+
h
*
x_len
,
x_len
);
}
}
template
<
typename
T
>
void
VSigmoid
(
const
T
*
x
,
T
*
y
,
int
n
)
{
const
T
min
=
SIGMOID_THRESHOLD_MIN
;
...
...
@@ -192,6 +199,7 @@ DECLARE_MKL_KERNEL(VExp, XYNTuples);
DECLARE_MKL_KERNEL
(
VSigmoid
,
XYNTuples
);
DECLARE_MKL_KERNEL
(
VTanh
,
XYNTuples
);
DECLARE_MKL_KERNEL
(
VSquare
,
XYNTuples
);
DECLARE_MKL_KERNEL
(
VCopy
,
XYNTuples
);
DECLARE_MKL_KERNEL
(
SeqPool
,
SeqPoolTuples
);
...
...
@@ -201,6 +209,8 @@ DECLARE_MKL_KERNEL(Softmax, SoftmaxTuples);
DECLARE_MKL_KERNEL
(
Sgd
,
SgdTuples
);
DECLARE_MKL_KERNEL
(
VBroadcast
,
VBroadcastTuples
);
#undef DECLARE_MKL_KERNEL
}
// namespace mkl
...
...
paddle/fluid/operators/jit/refer/CMakeLists.txt
浏览文件 @
847bb6a2
...
...
@@ -13,6 +13,7 @@ USE_JITKERNEL_REFER(kVAddRelu)
USE_JITKERNEL_REFER
(
kVSub
)
USE_JITKERNEL_REFER
(
kVScal
)
USE_JITKERNEL_REFER
(
kVAddBias
)
USE_JITKERNEL_REFER
(
kVCopy
)
USE_JITKERNEL_REFER
(
kVRelu
)
USE_JITKERNEL_REFER
(
kVIdentity
)
USE_JITKERNEL_REFER
(
kVExp
)
...
...
@@ -34,3 +35,4 @@ USE_JITKERNEL_REFER(kHMax)
USE_JITKERNEL_REFER
(
kSoftmax
)
USE_JITKERNEL_REFER
(
kEmbSeqPool
)
USE_JITKERNEL_REFER
(
kSgd
)
USE_JITKERNEL_REFER
(
kVBroadcast
)
paddle/fluid/operators/jit/refer/refer.cc
浏览文件 @
847bb6a2
...
...
@@ -30,6 +30,7 @@ REGISTER_REFER_KERNEL(kVScal, VScal);
REGISTER_REFER_KERNEL
(
kVAddBias
,
VAddBias
);
REGISTER_REFER_KERNEL
(
kVRelu
,
VRelu
);
REGISTER_REFER_KERNEL
(
kVCopy
,
VCopy
);
REGISTER_REFER_KERNEL
(
kVIdentity
,
VIdentity
);
REGISTER_REFER_KERNEL
(
kVSquare
,
VSquare
);
REGISTER_REFER_KERNEL
(
kVExp
,
VExp
);
...
...
@@ -61,4 +62,6 @@ REGISTER_REFER_KERNEL(kEmbSeqPool, EmbSeqPool);
REGISTER_REFER_KERNEL
(
kSgd
,
Sgd
);
REGISTER_REFER_KERNEL
(
kVBroadcast
,
VBroadcast
);
#undef REGISTER_REFER_KERNEL
paddle/fluid/operators/jit/refer/refer.h
浏览文件 @
847bb6a2
...
...
@@ -70,6 +70,20 @@ void VAddBias(const T* a, const T* x, T* y, int n) {
}
}
template
<
typename
T
>
void
VCopy
(
const
T
*
x
,
T
*
y
,
int
n
)
{
std
::
memcpy
(
y
,
x
,
n
*
sizeof
(
T
));
}
// x shape: (x_len)
// y shape: (h, x_len)
template
<
typename
T
>
void
VBroadcast
(
const
T
*
x
,
T
*
y
,
int64_t
y_h
,
int64_t
x_len
)
{
for
(
int64_t
h
=
0
;
h
<
y_h
;
++
h
)
{
VCopy
(
x
,
y
+
h
*
x_len
,
x_len
);
}
}
template
<
typename
T
>
void
VRelu
(
const
T
*
x
,
T
*
y
,
int
n
)
{
for
(
int
i
=
0
;
i
<
n
;
++
i
)
{
...
...
@@ -500,6 +514,7 @@ DECLARE_REFER_KERNEL(VExp, XYNTuples);
DECLARE_REFER_KERNEL
(
VSigmoid
,
XYNTuples
);
DECLARE_REFER_KERNEL
(
VTanh
,
XYNTuples
);
DECLARE_REFER_KERNEL
(
VSquare
,
XYNTuples
);
DECLARE_REFER_KERNEL
(
VCopy
,
XYNTuples
);
// lstm_t*, const lstm_attr_t*
DECLARE_REFER_KERNEL
(
LSTMCtHt
,
LSTMTuples
);
...
...
@@ -528,6 +543,8 @@ DECLARE_REFER_KERNEL(EmbSeqPool, EmbSeqPoolTuples);
DECLARE_REFER_KERNEL
(
Sgd
,
SgdTuples
);
DECLARE_REFER_KERNEL
(
VBroadcast
,
VBroadcastTuples
);
#undef DECLARE_REFER_KERNEL
}
// namespace refer
...
...
paddle/fluid/operators/jit/test.cc
浏览文件 @
847bb6a2
...
...
@@ -26,8 +26,8 @@ limitations under the License. */
DEFINE_double
(
acc
,
1e-5
,
"Test accuracy threshold."
);
template
<
typename
T
>
void
RandomVec
(
const
int
n
,
T
*
a
,
const
T
lower
=
static_cast
<
T
>
(
-
2
0
.
f
),
const
T
upper
=
static_cast
<
T
>
(
2
0
.
f
))
{
void
RandomVec
(
const
int
n
,
T
*
a
,
const
T
lower
=
static_cast
<
T
>
(
-
2.
f
),
const
T
upper
=
static_cast
<
T
>
(
2.
f
))
{
static
unsigned
int
seed
=
100
;
std
::
mt19937
rng
(
seed
++
);
std
::
uniform_real_distribution
<
double
>
uniform_dist
(
0
,
1
);
...
...
@@ -157,6 +157,26 @@ struct TestFuncWithRefer<jit::XRNTuples<T>, std::vector<T>, T> {
}
};
template
<
typename
T
>
struct
TestFuncWithRefer
<
jit
::
VBroadcastTuples
<
T
>
,
std
::
vector
<
T
>
,
std
::
vector
<
T
>
,
int64_t
,
typename
jit
::
VBroadcastTuples
<
T
>::
attr_type
>
{
void
operator
()(
const
typename
jit
::
VBroadcastTuples
<
T
>::
func_type
tgt
,
const
std
::
vector
<
T
>&
x
,
const
std
::
vector
<
T
>&
yref
,
int64_t
h
,
const
typename
jit
::
VBroadcastTuples
<
T
>::
attr_type
&
attr
)
{
EXPECT_TRUE
(
tgt
!=
nullptr
);
EXPECT_EQ
(
x
.
size
(),
static_cast
<
size_t
>
(
attr
));
EXPECT_EQ
(
yref
.
size
(),
x
.
size
()
*
h
);
std
::
vector
<
T
>
y
(
yref
.
size
());
const
T
*
x_data
=
x
.
data
();
const
T
*
yref_data
=
yref
.
data
();
T
*
y_data
=
y
.
data
();
tgt
(
x_data
,
y_data
,
h
,
attr
);
ExpectEQ
<
T
>
(
y_data
,
yref_data
,
yref
.
size
());
}
};
template
<
typename
T
>
struct
TestFuncWithRefer
<
jit
::
XYNTuples
<
T
>
,
std
::
vector
<
T
>
,
std
::
vector
<
T
>>
{
void
operator
()(
const
typename
jit
::
XYNTuples
<
T
>::
func_type
tgt
,
...
...
@@ -514,7 +534,7 @@ void TestKernelXRNTuples() {
auto
ref
=
jit
::
GetRefer
<
KT
,
jit
::
XRNTuples
<
T
>>
();
EXPECT_TRUE
(
ref
!=
nullptr
);
std
::
vector
<
T
>
x
(
d
);
RandomVec
<
T
>
(
d
,
x
.
data
()
,
-
2.
f
,
2.
f
);
RandomVec
<
T
>
(
d
,
x
.
data
());
T
ref_res
;
ref
(
x
.
data
(),
&
ref_res
,
d
);
TestAllImpls
<
KT
,
jit
::
XRNTuples
<
T
>
,
PlaceType
,
std
::
vector
<
T
>
,
T
>
(
d
,
x
,
...
...
@@ -532,7 +552,7 @@ void TestKernelXYNTuples() {
std
::
vector
<
T
>
x
(
d
),
yref
(
d
);
std
::
vector
<
T
>
xinp
(
d
);
// inplace test
RandomVec
<
T
>
(
d
,
x
.
data
()
,
-
2.
f
,
2.
f
);
RandomVec
<
T
>
(
d
,
x
.
data
());
std
::
copy
(
x
.
begin
(),
x
.
end
(),
xinp
.
begin
());
const
T
*
x_data
=
x
.
data
();
...
...
@@ -566,7 +586,7 @@ void TestKernelLSTMTuples() {
EXPECT_TRUE
(
ref
!=
nullptr
);
std
::
vector
<
T
>
xsrc
(
4
*
d
),
wp
(
3
*
d
),
ct_1
(
d
);
std
::
vector
<
T
>
ct_ref
(
d
),
ht_ref
(
d
),
checked
(
2
*
d
);
RandomVec
<
T
>
(
4
*
d
,
xsrc
.
data
()
,
-
2.
f
,
2.
f
);
RandomVec
<
T
>
(
4
*
d
,
xsrc
.
data
());
RandomVec
<
T
>
(
3
*
d
,
wp
.
data
(),
-
1.
f
,
1.
f
);
RandomVec
<
T
>
(
d
,
ct_1
.
data
(),
-
1.
f
,
1.
f
);
// x could be changed after compute, so copy to save src
...
...
@@ -614,8 +634,8 @@ void TestKernelGRUTuples() {
auto
ref
=
jit
::
GetRefer
<
KT
,
jit
::
GRUTuples
<
T
>>
();
EXPECT_TRUE
(
ref
!=
nullptr
);
std
::
vector
<
T
>
xsrc
(
3
*
d
),
ht_1
(
d
),
ht_ref
(
d
);
RandomVec
<
T
>
(
3
*
d
,
xsrc
.
data
()
,
-
2.
f
,
2.
f
);
RandomVec
<
T
>
(
d
,
ht_1
.
data
()
,
-
2.
f
,
2.
f
);
RandomVec
<
T
>
(
3
*
d
,
xsrc
.
data
());
RandomVec
<
T
>
(
d
,
ht_1
.
data
());
// x could be changed after compute, so copy to save src
std
::
vector
<
T
>
x
(
xsrc
.
size
());
std
::
copy
(
xsrc
.
begin
(),
xsrc
.
end
(),
x
.
begin
());
...
...
@@ -651,7 +671,7 @@ void TestKernelSeqPoolTuples() {
auto
ref
=
jit
::
GetRefer
<
KT
,
jit
::
SeqPoolTuples
<
T
>>
();
EXPECT_TRUE
(
ref
!=
nullptr
);
std
::
vector
<
T
>
x
(
h
*
w
),
yref
(
w
);
RandomVec
<
T
>
(
h
*
w
,
x
.
data
()
,
-
2.
f
,
2.
f
);
RandomVec
<
T
>
(
h
*
w
,
x
.
data
());
const
T
*
x_data
=
x
.
data
();
T
*
yref_data
=
yref
.
data
();
ref
(
x_data
,
yref_data
,
&
attr
);
...
...
@@ -676,8 +696,8 @@ void TestKernelMatMulTuples() {
auto
ref
=
jit
::
GetRefer
<
KT
,
jit
::
MatMulTuples
<
T
>>
();
EXPECT_TRUE
(
ref
!=
nullptr
);
std
::
vector
<
T
>
a
(
m
*
k
),
b
(
k
*
n
),
c
(
m
*
n
);
RandomVec
<
T
>
(
m
*
k
,
a
.
data
()
,
-
2.
f
,
2.
f
);
RandomVec
<
T
>
(
k
*
n
,
b
.
data
()
,
-
2.
f
,
2.
f
);
RandomVec
<
T
>
(
m
*
k
,
a
.
data
());
RandomVec
<
T
>
(
k
*
n
,
b
.
data
());
const
T
*
a_data
=
a
.
data
();
const
T
*
b_data
=
b
.
data
();
T
*
c_data
=
c
.
data
();
...
...
@@ -699,7 +719,7 @@ void TestKernelSoftmaxTuples() {
auto
ref
=
jit
::
GetRefer
<
KT
,
jit
::
SoftmaxTuples
<
T
>>
();
EXPECT_TRUE
(
ref
!=
nullptr
);
std
::
vector
<
T
>
x
(
bs
*
n
),
y
(
bs
*
n
);
RandomVec
<
T
>
(
bs
*
n
,
x
.
data
()
,
-
2.
f
,
2.
f
);
RandomVec
<
T
>
(
bs
*
n
,
x
.
data
());
const
T
*
x_data
=
x
.
data
();
T
*
y_data
=
y
.
data
();
...
...
@@ -726,7 +746,7 @@ void TestKernelEmbSeqPoolTuples() {
test_sizes
.
erase
(
std
::
remove
(
test_sizes
.
begin
(),
test_sizes
.
end
(),
1000
));
for
(
int
tbl_w
:
test_sizes
)
{
std
::
vector
<
T
>
table
(
tbl_h
*
tbl_w
);
RandomVec
<
T
>
(
tbl_h
*
tbl_w
,
table
.
data
()
,
-
2.
f
,
2.
f
);
RandomVec
<
T
>
(
tbl_h
*
tbl_w
,
table
.
data
());
const
T
*
table_data
=
table
.
data
();
for
(
auto
type
:
pool_types
)
{
for
(
int
idx_w
:
{
1
,
2
,
10
,
16
})
{
...
...
@@ -772,14 +792,14 @@ void TestKernelSgdTuples() {
for
(
int
grad_w
:
TestSizes
())
{
std
::
vector
<
T
>
param
(
param_h
*
grad_w
);
std
::
vector
<
T
>
param_out
(
param_h
*
grad_w
);
RandomVec
<
T
>
(
param_h
*
grad_w
,
param
.
data
()
,
-
2.
f
,
2.
f
);
RandomVec
<
T
>
(
param_h
*
grad_w
,
param
.
data
());
const
T
*
param_data
=
param
.
data
();
T
*
out_data
=
param_out
.
data
();
for
(
int
rows_size
=
1
;
rows_size
<=
param_h
;
++
rows_size
)
{
std
::
vector
<
T
>
grad
(
rows_size
*
grad_w
);
std
::
vector
<
int64_t
>
rows
=
UnDuplicatedRandomVec
(
rows_size
,
0
,
rows_size
-
1
);
RandomVec
<
T
>
(
rows_size
*
grad_w
,
grad
.
data
()
,
-
2.
f
,
2.
f
);
RandomVec
<
T
>
(
rows_size
*
grad_w
,
grad
.
data
());
const
int64_t
*
rows_data
=
rows
.
data
();
const
T
*
grad_data
=
grad
.
data
();
auto
ref
=
jit
::
GetRefer
<
KT
,
jit
::
SgdTuples
<
T
>>
();
...
...
@@ -815,8 +835,8 @@ void TestKernelNCHW16CMulNCTuples() {
int
sz
=
n
*
c
*
h
*
w
;
std
::
vector
<
T
>
x
(
sz
),
y
(
n
*
c
),
zref
(
sz
);
std
::
vector
<
T
>
ztgt
(
sz
),
zjit
(
sz
);
RandomVec
<
T
>
(
sz
,
x
.
data
()
,
-
2.
f
,
2.
f
);
RandomVec
<
T
>
(
n
*
c
,
y
.
data
()
,
-
2.
f
,
2.
f
);
RandomVec
<
T
>
(
sz
,
x
.
data
());
RandomVec
<
T
>
(
n
*
c
,
y
.
data
());
const
T
*
x_data
=
x
.
data
();
const
T
*
y_data
=
y
.
data
();
...
...
@@ -873,11 +893,11 @@ void TestKernelLayerNormTuples() {
int
sz
=
left
*
right
;
std
::
vector
<
T
>
x
(
sz
),
mean
(
left
),
var
(
left
),
scale
(
right
),
bias
(
right
),
outref
(
sz
);
RandomVec
<
T
>
(
sz
,
x
.
data
()
,
-
2.
f
,
2.
f
);
RandomVec
<
T
>
(
left
,
mean
.
data
()
,
-
2.
f
,
2.
f
);
RandomVec
<
T
>
(
left
,
var
.
data
()
,
-
2.
f
,
2.
f
);
RandomVec
<
T
>
(
right
,
scale
.
data
()
,
-
2.
f
,
2.
f
);
RandomVec
<
T
>
(
right
,
bias
.
data
()
,
-
2.
f
,
2.
f
);
RandomVec
<
T
>
(
sz
,
x
.
data
());
RandomVec
<
T
>
(
left
,
mean
.
data
());
RandomVec
<
T
>
(
left
,
var
.
data
());
RandomVec
<
T
>
(
right
,
scale
.
data
());
RandomVec
<
T
>
(
right
,
bias
.
data
());
const
T
*
scale_data
=
scale
.
data
();
const
T
*
bias_data
=
bias
.
data
();
...
...
@@ -903,7 +923,7 @@ void TestKernelCRFDecodingTuples() {
VLOG
(
10
)
<<
"===== Test JITKernel "
<<
jit
::
to_string
(
KT
);
constexpr
int
state_trans_base_idx
=
2
;
auto
test_sizes
=
TestSizes
();
test_sizes
.
erase
(
std
::
remove
(
test_sizes
.
begin
(),
test_sizes
.
end
(),
1
000
));
test_sizes
.
erase
(
std
::
remove
(
test_sizes
.
begin
(),
test_sizes
.
end
(),
2
000
));
for
(
int
seq_len
:
{
1
,
11
,
17
,
50
})
{
for
(
int
tag_num
:
test_sizes
)
{
auto
ref
=
jit
::
GetRefer
<
KT
,
jit
::
CRFDecodingTuples
<
T
>>
();
...
...
@@ -912,8 +932,8 @@ void TestKernelCRFDecodingTuples() {
int
w_sz
=
(
tag_num
+
state_trans_base_idx
)
*
tag_num
;
std
::
vector
<
T
>
x
(
x_sz
),
w
(
w_sz
),
alpharef
(
x_sz
);
std
::
vector
<
int
>
trackref
(
x_sz
);
RandomVec
<
T
>
(
x_sz
,
x
.
data
()
,
-
2.
f
,
2.
f
);
RandomVec
<
T
>
(
w_sz
,
w
.
data
()
,
-
2.
f
,
2.
f
);
RandomVec
<
T
>
(
x_sz
,
x
.
data
());
RandomVec
<
T
>
(
w_sz
,
w
.
data
());
ref
(
seq_len
,
(
const
T
*
)
x
.
data
(),
(
const
T
*
)
w
.
data
(),
alpharef
.
data
(),
trackref
.
data
(),
tag_num
);
...
...
@@ -926,6 +946,27 @@ void TestKernelCRFDecodingTuples() {
}
}
template
<
jit
::
KernelType
KT
,
typename
T
,
typename
PlaceType
>
void
TestKernelVBroadcastTuples
()
{
VLOG
(
10
)
<<
"===== Test JITKernel "
<<
jit
::
to_string
(
KT
);
for
(
int
w
:
TestSizes
())
{
std
::
vector
<
T
>
x
(
w
);
RandomVec
<
T
>
(
w
,
x
.
data
());
const
T
*
x_data
=
x
.
data
();
for
(
int64_t
h
:
{
1
,
2
,
6
})
{
auto
ref
=
jit
::
GetRefer
<
KT
,
jit
::
VBroadcastTuples
<
T
>>
();
EXPECT_TRUE
(
ref
!=
nullptr
);
std
::
vector
<
T
>
y
(
w
*
h
);
T
*
y_data
=
y
.
data
();
ref
(
x_data
,
y_data
,
h
,
w
);
TestAllImpls
<
KT
,
jit
::
VBroadcastTuples
<
T
>
,
PlaceType
,
std
::
vector
<
T
>
,
std
::
vector
<
T
>
,
int64_t
>
(
static_cast
<
int64_t
>
(
w
),
x
,
y
,
h
,
static_cast
<
int64_t
>
(
w
));
}
}
}
#define TEST_CPU_KERNEL(test_tuple, kernel_type) \
TEST(JITKernel, kernel_type) { \
TestKernel##test_tuple<jit::kernel_type, float, CPUPlace>(); \
...
...
@@ -949,6 +990,7 @@ TEST_CPU_KERNEL(XYNTuples, kVSquare);
TEST_CPU_KERNEL
(
XYNTuples
,
kVExp
);
TEST_CPU_KERNEL
(
XYNTuples
,
kVSigmoid
);
TEST_CPU_KERNEL
(
XYNTuples
,
kVTanh
);
TEST_CPU_KERNEL
(
XYNTuples
,
kVCopy
);
TEST_CPU_KERNEL
(
LSTMTuples
,
kLSTMCtHt
);
TEST_CPU_KERNEL
(
LSTMTuples
,
kLSTMC1H1
);
...
...
@@ -966,6 +1008,7 @@ TEST_CPU_KERNEL(EmbSeqPoolTuples, kEmbSeqPool);
TEST_CPU_KERNEL
(
SgdTuples
,
kSgd
);
TEST_CPU_KERNEL
(
LayerNormTuples
,
kLayerNorm
);
TEST_CPU_KERNEL
(
CRFDecodingTuples
,
kCRFDecoding
);
TEST_CPU_KERNEL
(
VBroadcastTuples
,
kVBroadcast
);
TEST
(
JITKernel_key
,
lstm
)
{
jit
::
lstm_attr_t
attr1
(
8
,
jit
::
kVIdentity
,
jit
::
kVSigmoid
,
jit
::
kVTanh
);
...
...
paddle/fluid/operators/mkldnn/requantize_mkldnn_op.cc
0 → 100644
浏览文件 @
847bb6a2
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "mkldnn.hpp"
#include "paddle/fluid/framework/data_layout_transform.h"
#include "paddle/fluid/framework/tensor.h"
#include "paddle/fluid/operators/requantize_op.h"
#include "paddle/fluid/platform/mkldnn_helper.h"
namespace
paddle
{
namespace
operators
{
using
mkldnn
::
memory
;
using
mkldnn
::
primitive
;
using
mkldnn
::
reorder
;
using
platform
::
to_void_cast
;
using
Tensor
=
framework
::
Tensor
;
using
framework
::
DataLayout
;
using
mkldnn
::
stream
;
using
platform
::
GetMKLDNNFormat
;
template
<
typename
T
>
class
ReQuantOpKernel
:
public
framework
::
OpKernel
<
T
>
{
public:
void
Compute
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
auto
*
input
=
ctx
.
Input
<
Tensor
>
(
"Input"
);
auto
scale_in
=
ctx
.
Attr
<
float
>
(
"Scale_in"
);
auto
scale_out
=
ctx
.
Attr
<
float
>
(
"Scale_out"
);
auto
*
output
=
ctx
.
Output
<
Tensor
>
(
"Output"
);
auto
&
dev_ctx
=
ctx
.
template
device_context
<
platform
::
MKLDNNDeviceContext
>();
const
auto
&
engine
=
dev_ctx
.
GetEngine
();
std
::
vector
<
primitive
>
pipeline
;
std
::
vector
<
int
>
src_tz
=
paddle
::
framework
::
vectorize2int
(
input
->
dims
());
std
::
vector
<
int
>
dst_tz
=
paddle
::
framework
::
vectorize2int
(
output
->
dims
());
mkldnn
::
memory
::
data_type
src_dt
=
paddle
::
framework
::
ToMKLDNNDataType
(
input
->
type
());
mkldnn
::
memory
::
data_type
dst_dt
=
src_dt
;
// TODO(Xiaoli) support
// requantize from different
// data type (e.g., s8 to u8)
mkldnn
::
memory
::
format
src_fmt
=
memory
::
format
::
nhwc
;
mkldnn
::
memory
::
format
dst_fmt
=
memory
::
format
::
nhwc
;
const
T
*
input_data
=
input
->
data
<
T
>
();
T
*
output_data
=
output
->
mutable_data
<
T
>
(
ctx
.
GetPlace
());
float
scale_shift
=
scale_out
/
scale_in
;
mkldnn
::
primitive_attr
attri
;
int
mask
=
0
;
attri
.
set_output_scales
(
mask
,
{
scale_shift
});
auto
src_md
=
platform
::
MKLDNNMemDesc
({
src_tz
},
src_dt
,
src_fmt
);
auto
src_pd
=
mkldnn
::
memory
::
primitive_desc
(
src_md
,
engine
);
auto
src_memory
=
std
::
make_shared
<
mkldnn
::
memory
>
(
src_pd
,
to_void_cast
<
T
>
(
input_data
));
std
::
shared_ptr
<
primitive
::
at
>
src_memory_p
=
std
::
shared_ptr
<
primitive
::
at
>
(
new
primitive
::
at
(
*
src_memory
));
auto
dst_md
=
platform
::
MKLDNNMemDesc
({
dst_tz
},
dst_dt
,
dst_fmt
);
auto
dst_pd
=
mkldnn
::
memory
::
primitive_desc
(
dst_md
,
engine
);
auto
dst_memory
=
mkldnn
::
memory
(
dst_pd
,
to_void_cast
<
T
>
(
output_data
));
auto
reorder_pd
=
std
::
shared_ptr
<
reorder
::
primitive_desc
>
(
new
reorder
::
primitive_desc
(
src_pd
,
dst_pd
,
attri
));
auto
reorder_p
=
std
::
shared_ptr
<
reorder
>
(
new
reorder
(
*
reorder_pd
,
*
src_memory_p
,
dst_memory
));
pipeline
.
push_back
(
*
reorder_p
);
stream
(
stream
::
kind
::
eager
).
submit
(
pipeline
).
wait
();
output
->
set_layout
(
DataLayout
::
kMKLDNN
);
output
->
set_format
(
GetMKLDNNFormat
(
dst_memory
));
}
};
}
// namespace operators
}
// namespace paddle
namespace
ops
=
paddle
::
operators
;
REGISTER_OP_KERNEL
(
requantize
,
MKLDNN
,
::
paddle
::
platform
::
CPUPlace
,
ops
::
ReQuantOpKernel
<
int8_t
>
,
ops
::
ReQuantOpKernel
<
uint8_t
>
);
paddle/fluid/operators/ngraph/ops/activation_op.h
浏览文件 @
847bb6a2
...
...
@@ -55,4 +55,4 @@ void BuildTanhGradNode(
}
// namespace paddle
REGISTER_NG_OP
(
relu_grad
,
BuildReluGradNode
);
REGISTER_NG_OP
(
t
han
_grad
,
BuildTanhGradNode
);
REGISTER_NG_OP
(
t
anh
_grad
,
BuildTanhGradNode
);
paddle/fluid/operators/recurrent_op.cc
浏览文件 @
847bb6a2
...
...
@@ -157,11 +157,13 @@ class RecurrentBase : public framework::OperatorBase {
const
std
::
vector
<
std
::
string
>
&
src_vars
,
framework
::
Scope
*
dst_scope
,
const
std
::
vector
<
std
::
string
>
&
dst_vars
,
Callback
callback
)
{
Callback
callback
,
bool
is_backward
=
false
)
{
PADDLE_ENFORCE_EQ
(
src_vars
.
size
(),
dst_vars
.
size
());
for
(
size_t
i
=
0
;
i
<
dst_vars
.
size
();
++
i
)
{
VLOG
(
10
)
<<
"Link "
<<
src_vars
[
i
]
<<
" to "
<<
dst_vars
[
i
];
AccessTensor
(
src_scope
,
src_vars
[
i
],
dst_scope
,
dst_vars
[
i
],
callback
);
AccessTensor
(
src_scope
,
src_vars
[
i
],
dst_scope
,
dst_vars
[
i
],
callback
,
is_backward
);
}
}
...
...
@@ -173,11 +175,13 @@ class RecurrentBase : public framework::OperatorBase {
const
std
::
vector
<
std
::
string
>
&
src_vars
,
const
framework
::
Scope
&
dst_scope
,
const
std
::
vector
<
std
::
string
>
&
dst_vars
,
Callback
callback
)
{
Callback
callback
,
bool
is_backward
=
false
)
{
PADDLE_ENFORCE_EQ
(
src_vars
.
size
(),
dst_vars
.
size
());
for
(
size_t
i
=
0
;
i
<
dst_vars
.
size
();
++
i
)
{
VLOG
(
10
)
<<
"Link "
<<
src_vars
[
i
]
<<
" to "
<<
dst_vars
[
i
];
AccessTensor
(
src_scope
,
src_vars
[
i
],
dst_scope
,
dst_vars
[
i
],
callback
);
AccessTensor
(
src_scope
,
src_vars
[
i
],
dst_scope
,
dst_vars
[
i
],
callback
,
is_backward
);
}
}
...
...
@@ -194,9 +198,13 @@ class RecurrentBase : public framework::OperatorBase {
static
void
AccessTensor
(
const
framework
::
Scope
&
src_scope
,
const
std
::
string
&
src_var_name
,
framework
::
Scope
*
dst_scope
,
const
std
::
string
&
dst_var_name
,
Callback
callback
)
{
const
std
::
string
&
dst_var_name
,
Callback
callback
,
bool
is_backward
=
false
)
{
auto
*
src_var
=
src_scope
.
FindVar
(
src_var_name
);
PADDLE_ENFORCE
(
src_var
!=
nullptr
);
if
(
is_backward
&&
src_var
==
nullptr
)
{
return
;
}
PADDLE_ENFORCE
(
src_var
!=
nullptr
,
"%s is not found."
,
src_var_name
);
auto
&
src_tensor
=
src_var
->
Get
<
framework
::
LoDTensor
>
();
auto
*
dst_var
=
dst_scope
->
Var
(
dst_var_name
);
...
...
@@ -208,12 +216,16 @@ class RecurrentBase : public framework::OperatorBase {
static
void
AccessTensor
(
const
framework
::
Scope
&
src_scope
,
const
std
::
string
&
src_var_name
,
const
framework
::
Scope
&
dst_scope
,
const
std
::
string
&
dst_var_name
,
Callback
callback
)
{
const
std
::
string
&
dst_var_name
,
Callback
callback
,
bool
is_backward
=
false
)
{
auto
*
dst_var
=
dst_scope
.
FindVar
(
dst_var_name
);
if
(
is_backward
&&
dst_var
==
nullptr
)
{
return
;
}
auto
*
src_var
=
src_scope
.
FindVar
(
src_var_name
);
PADDLE_ENFORCE
(
src_var
!=
nullptr
);
PADDLE_ENFORCE
(
src_var
!=
nullptr
,
"%s is not found."
,
src_var_name
);
auto
&
src_tensor
=
src_var
->
Get
<
framework
::
LoDTensor
>
();
auto
*
dst_var
=
dst_scope
.
FindVar
(
dst_var_name
);
PADDLE_ENFORCE
(
dst_var
!=
nullptr
);
PADDLE_ENFORCE
(
dst_var
!=
nullptr
,
"%s is not found."
,
dst_var_name
);
auto
*
dst_tensor
=
dst_var
->
GetMutable
<
framework
::
LoDTensor
>
();
callback
(
src_tensor
,
dst_tensor
);
}
...
...
@@ -345,7 +357,8 @@ class RecurrentGradOp : public RecurrentBase {
auto
dims
=
framework
::
vectorize
(
inside
->
dims
());
dims
.
erase
(
dims
.
begin
());
inside
->
Resize
(
framework
::
make_ddim
(
dims
));
});
},
true
/*is_backward*/
);
auto
og_set
=
List2Set
(
Inputs
(
kOutputGrads
));
if
(
VLOG_IS_ON
(
10
))
{
...
...
@@ -454,7 +467,8 @@ class RecurrentGradOp : public RecurrentBase {
auto
dst
=
outside
->
Slice
(
seq_offset
,
seq_offset
+
1
);
framework
::
TensorCopy
(
inside
,
place
,
dev_ctx
,
&
dst
);
});
},
true
/*is_backward*/
);
VLOG
(
5
)
<<
"Link outside gradient finished "
;
if
(
step_id
+
1
==
seq_len
)
{
// at_end
...
...
@@ -467,7 +481,8 @@ class RecurrentGradOp : public RecurrentBase {
outside
->
Resize
(
inside
.
dims
());
outside
->
mutable_data
(
place
,
inside
.
type
());
framework
::
TensorCopy
(
inside
,
place
,
dev_ctx
,
outside
);
});
},
true
/*is_backward*/
);
VLOG
(
5
)
<<
"Link initialize state gradient finished "
;
}
scopes
.
Next
();
...
...
@@ -608,10 +623,8 @@ class RecurrentGradOpShapeInference : public framework::InferShapeBase {
std
::
vector
<
std
::
string
>
input
{
kInputs
,
kInitialStates
};
std
::
vector
<
std
::
string
>
output
{
kOutputs
};
for
(
auto
&
s
:
input
)
{
// NOTE(zcd): In some case, some of kInputs doesn't have gradient.
PADDLE_ENFORCE
(
ctx
->
HasInputs
(
s
));
PADDLE_ENFORCE
(
ctx
->
HasOutputs
(
framework
::
GradVarName
(
s
)),
"Cannot find the gradient variable %s"
,
framework
::
GradVarName
(
s
));
}
for
(
auto
&
s
:
output
)
{
PADDLE_ENFORCE
(
ctx
->
HasInputs
(
s
));
...
...
paddle/fluid/operators/requantize_op.cc
0 → 100644
浏览文件 @
847bb6a2
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License. */
#include "paddle/fluid/operators/requantize_op.h"
#ifdef PADDLE_WITH_MKLDNN
#include "paddle/fluid/platform/mkldnn_helper.h"
#endif
namespace
paddle
{
namespace
operators
{
framework
::
OpKernelType
ReQuantOp
::
GetExpectedKernelType
(
const
framework
::
ExecutionContext
&
ctx
)
const
{
framework
::
LibraryType
library_
=
framework
::
LibraryType
::
kMKLDNN
;
framework
::
DataLayout
layout_
=
framework
::
DataLayout
::
kMKLDNN
;
return
framework
::
OpKernelType
(
ctx
.
Input
<
Tensor
>
(
"Input"
)
->
type
(),
ctx
.
GetPlace
(),
layout_
,
library_
);
}
void
ReQuantOpMaker
::
Make
()
{
AddInput
(
"Input"
,
"input data"
);
AddOutput
(
"Output"
,
"output data"
);
AddAttr
<
float
>
(
"Scale_in"
,
"scale in data"
).
SetDefault
({
1.0
f
});
AddAttr
<
float
>
(
"Scale_out"
,
"scale out data"
).
SetDefault
({
1.0
f
});
AddComment
(
R"DOC(This op will re-quantize data from INT8 with scale_in to INT8 with scale_out)DOC"
);
}
}
// namespace operators
}
// namespace paddle
namespace
ops
=
paddle
::
operators
;
REGISTER_OPERATOR
(
requantize
,
ops
::
ReQuantOp
,
ops
::
ReQuantOpMaker
,
paddle
::
framework
::
DefaultGradOpDescMaker
<
true
>
);
paddle/fluid/operators/requantize_op.h
0 → 100644
浏览文件 @
847bb6a2
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include <string>
#include <vector>
#include "paddle/fluid/framework/op_registry.h"
namespace
paddle
{
namespace
operators
{
using
framework
::
OpKernelType
;
using
framework
::
Tensor
;
class
ReQuantOp
:
public
framework
::
OperatorWithKernel
{
public:
using
framework
::
OperatorWithKernel
::
OperatorWithKernel
;
void
InferShape
(
framework
::
InferShapeContext
*
ctx
)
const
override
{
ctx
->
SetOutputDim
(
"Output"
,
ctx
->
GetInputDim
(
"Input"
));
ctx
->
ShareLoD
(
"Input"
,
/*->*/
"Output"
);
}
protected:
framework
::
OpKernelType
GetExpectedKernelType
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
;
};
class
ReQuantOpMaker
:
public
framework
::
OpProtoAndCheckerMaker
{
public:
void
Make
()
override
;
};
}
// namespace operators
}
// namespace paddle
paddle/fluid/operators/reshape_op.cc
浏览文件 @
847bb6a2
...
...
@@ -56,6 +56,9 @@ class ReshapeOp : public framework::OperatorWithKernel {
static
framework
::
DDim
ValidateShape
(
const
std
::
vector
<
int
>
shape
,
const
framework
::
DDim
&
in_dims
)
{
const
int64_t
in_size
=
framework
::
product
(
in_dims
);
auto
in_dims_vec
=
framework
::
vectorize
(
in_dims
);
bool
all_positive
=
std
::
all_of
(
in_dims_vec
.
cbegin
(),
in_dims_vec
.
cend
(),
[](
int64_t
i
)
{
return
i
>
0
;
});
// only one dimension can be set to -1, whose size will be automatically
// infered.
const
int64_t
unk_dim_val
=
-
1
;
...
...
@@ -88,7 +91,7 @@ class ReshapeOp : public framework::OperatorWithKernel {
}
if
(
unk_dim_idx
!=
-
1
)
{
if
(
in_size
>
0
)
{
if
(
all_positive
)
{
// in_size < 0 and is un-determinate in compile time, skip the check,
// for example, in_dims = [-1, 8, 1, 1], shape = [-1, 3, 8],
// capacity = -24, in_size = -8, output_shape[0] = 0
...
...
paddle/fluid/operators/sequence_ops/sequence_erase_op.cu
浏览文件 @
847bb6a2
...
...
@@ -64,8 +64,7 @@ class SequenceEraseOpCUDAKernel : public framework::OpKernel<T> {
auto
*
out
=
ctx
.
Output
<
LoDTensor
>
(
"Out"
);
auto
lod
=
in
->
lod
();
PADDLE_ENFORCE_EQ
(
lod
.
size
(),
1UL
,
"Only support one level sequence now."
);
PADDLE_ENFORCE_EQ
(
lod
[
0
].
back
(),
(
size_t
)
in
->
numel
(),
PADDLE_ENFORCE_EQ
(
lod
[
lod
.
size
()
-
1
].
back
(),
(
size_t
)
in
->
numel
(),
"The actual size mismatches with the LoD information."
);
auto
tokens
=
ctx
.
Attr
<
std
::
vector
<
int
>>
(
"tokens"
);
auto
in_len
=
in
->
numel
();
...
...
@@ -85,10 +84,9 @@ class SequenceEraseOpCUDAKernel : public framework::OpKernel<T> {
num_erased
.
begin
()
+
1
);
// Copy LoD to GPU
auto
lod0
=
lod
[
0
];
auto
lod_len
=
lod0
.
size
();
const
size_t
*
dev_in_lod_ptr
=
lod0
.
CUDAData
(
ctx
.
GetPlace
());
auto
last_lod
=
lod
[
lod
.
size
()
-
1
];
auto
lod_len
=
last_lod
.
size
();
const
size_t
*
dev_in_lod_ptr
=
last_lod
.
CUDAData
(
ctx
.
GetPlace
());
// Calc output LoD
thrust
::
device_vector
<
size_t
>
dev_out_lod
(
lod_len
);
size_t
*
dev_out_lod_ptr
=
thrust
::
raw_pointer_cast
(
dev_out_lod
.
data
());
...
...
@@ -96,13 +94,16 @@ class SequenceEraseOpCUDAKernel : public framework::OpKernel<T> {
PADDLE_CUDA_NUM_THREADS
,
0
,
stream
>>>
(
num_erased_ptr
,
dev_in_lod_ptr
,
lod_len
,
dev_out_lod_ptr
);
// Set LoD for output
std
::
vector
<
size_t
>
out_l
od0
(
dev_out_lod
.
begin
(),
dev_out_lod
.
end
());
std
::
vector
<
size_t
>
out_l
ast_lod
(
dev_out_lod
.
begin
(),
dev_out_lod
.
end
());
framework
::
LoD
out_lod
;
out_lod
.
push_back
(
out_lod0
);
for
(
size_t
i
=
0
;
i
<
lod
.
size
()
-
1
;
++
i
)
{
out_lod
.
push_back
(
lod
[
i
]);
}
out_lod
.
push_back
(
out_last_lod
);
out
->
set_lod
(
out_lod
);
// Set output
out
->
Resize
({
static_cast
<
int64_t
>
(
out_l
od0
.
back
()),
1
});
out
->
Resize
({
static_cast
<
int64_t
>
(
out_l
ast_lod
.
back
()),
1
});
auto
out_dat
=
out
->
mutable_data
<
T
>
(
ctx
.
GetPlace
());
SetOutput
<<<
(
in_len
-
1
)
/
PADDLE_CUDA_NUM_THREADS
+
1
,
PADDLE_CUDA_NUM_THREADS
,
0
,
stream
>>>
(
in_dat
,
in_len
,
...
...
paddle/fluid/operators/sequence_ops/sequence_erase_op.h
浏览文件 @
847bb6a2
...
...
@@ -28,19 +28,18 @@ class SequenceEraseKernel : public framework::OpKernel<T> {
auto
*
out
=
ctx
.
Output
<
framework
::
LoDTensor
>
(
"Out"
);
auto
lod
=
in
->
lod
();
PADDLE_ENFORCE_EQ
(
lod
.
size
(),
1UL
,
"Only support one level sequence now."
);
PADDLE_ENFORCE_EQ
(
lod
[
0
].
back
(),
(
size_t
)
in
->
numel
(),
PADDLE_ENFORCE_EQ
(
lod
[
lod
.
size
()
-
1
].
back
(),
(
size_t
)
in
->
numel
(),
"The actual size mismatches with the LoD information."
);
auto
tokens
=
ctx
.
Attr
<
std
::
vector
<
int
>>
(
"tokens"
);
auto
in_len
=
in
->
numel
();
auto
in_dat
=
in
->
data
<
T
>
();
auto
l
od0
=
lod
[
0
];
auto
l
ast_lod
=
lod
[
lod
.
size
()
-
1
];
std
::
vector
<
size_t
>
num_erased
(
in_len
+
1
,
0
);
std
::
vector
<
size_t
>
out_l
od0
(
1
,
0
);
for
(
size_t
i
=
0
;
i
<
l
od0
.
size
()
-
1
;
++
i
)
{
std
::
vector
<
size_t
>
out_l
ast_lod
(
1
,
0
);
for
(
size_t
i
=
0
;
i
<
l
ast_lod
.
size
()
-
1
;
++
i
)
{
size_t
num_out
=
0
;
for
(
auto
j
=
l
od0
[
i
]
+
1
;
j
<=
lod0
[
i
+
1
];
++
j
)
{
for
(
auto
j
=
l
ast_lod
[
i
]
+
1
;
j
<=
last_lod
[
i
+
1
];
++
j
)
{
num_erased
[
j
]
=
num_erased
[
j
-
1
];
if
(
std
::
find
(
tokens
.
begin
(),
tokens
.
end
(),
in_dat
[
j
-
1
])
!=
tokens
.
end
())
{
...
...
@@ -49,7 +48,7 @@ class SequenceEraseKernel : public framework::OpKernel<T> {
num_out
+=
1
;
}
}
out_l
od0
.
push_back
(
out_lod0
.
back
()
+
num_out
);
out_l
ast_lod
.
push_back
(
out_last_lod
.
back
()
+
num_out
);
}
auto
out_len
=
in_len
-
num_erased
[
in_len
];
...
...
@@ -62,7 +61,10 @@ class SequenceEraseKernel : public framework::OpKernel<T> {
}
}
framework
::
LoD
out_lod
;
out_lod
.
push_back
(
out_lod0
);
for
(
size_t
i
=
0
;
i
<
lod
.
size
()
-
1
;
++
i
)
{
out_lod
.
push_back
(
lod
[
i
]);
}
out_lod
.
push_back
(
out_last_lod
);
out
->
set_lod
(
out_lod
);
}
};
...
...
paddle/fluid/operators/spectral_norm_op.cc
0 → 100644
浏览文件 @
847bb6a2
/* Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/operators/spectral_norm_op.h"
#include "paddle/fluid/framework/op_registry.h"
namespace
paddle
{
namespace
operators
{
using
framework
::
Tensor
;
class
SpectralNormOp
:
public
framework
::
OperatorWithKernel
{
public:
using
framework
::
OperatorWithKernel
::
OperatorWithKernel
;
protected:
void
InferShape
(
framework
::
InferShapeContext
*
ctx
)
const
override
{
PADDLE_ENFORCE
(
ctx
->
HasInput
(
"Weight"
),
"Input(Weight) of SpectralNormOp should not be null."
);
PADDLE_ENFORCE
(
ctx
->
HasInput
(
"U"
),
"Input(U) of SpectralNormOp should not be null."
);
PADDLE_ENFORCE
(
ctx
->
HasInput
(
"V"
),
"Input(V) of SpectralNormOp should not be null."
);
PADDLE_ENFORCE
(
ctx
->
HasOutput
(
"Out"
),
"Output(Out) of SpectralNormOp should not be null."
);
auto
dim_weight
=
ctx
->
GetInputDim
(
"Weight"
);
auto
rank_weight
=
dim_weight
.
size
();
PADDLE_ENFORCE
(
rank_weight
>=
2
&&
rank_weight
<=
5
,
"The rank of Input(Weights) can only be 2, 3,"
"4, 5 for fc, conv1d, conv2d, conv3d layers."
);
int
dim
=
ctx
->
Attrs
().
Get
<
int
>
(
"dim"
);
int
power_iters
=
ctx
->
Attrs
().
Get
<
int
>
(
"power_iters"
);
PADDLE_ENFORCE
(
dim
==
0
||
dim
==
1
,
"Attr(dim) can only be 0 or 1"
);
PADDLE_ENFORCE
(
power_iters
>=
0
,
"Attr(power_iters) should be larger equal then 0"
);
int
h
=
dim_weight
[
dim
];
int
w
=
1
;
for
(
int
i
=
0
;
i
<
rank_weight
;
i
++
)
{
if
(
i
!=
dim
)
{
w
*=
dim_weight
[
i
];
}
}
auto
dim_u
=
ctx
->
GetInputDim
(
"U"
);
auto
dim_v
=
ctx
->
GetInputDim
(
"V"
);
PADDLE_ENFORCE_EQ
(
dim_u
[
0
],
h
,
"Input(U) dims[0] should be equal to "
"Input(Weight) dims[Attr(dim)]"
);
PADDLE_ENFORCE_EQ
(
dim_v
[
0
],
w
,
"Input(V) dims[0] should be equal to "
"the product of Input(Weight) dims except dims[Attr(dim)]"
);
ctx
->
SetOutputDim
(
"Out"
,
dim_weight
);
ctx
->
ShareLoD
(
"Weight"
,
/*->*/
"Out"
);
}
protected:
framework
::
OpKernelType
GetExpectedKernelType
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
return
framework
::
OpKernelType
(
ctx
.
Input
<
Tensor
>
(
"Weight"
)
->
type
(),
ctx
.
GetPlace
());
}
};
class
SpectralNormOpMaker
:
public
framework
::
OpProtoAndCheckerMaker
{
public:
void
Make
()
override
{
AddInput
(
"Weight"
,
"The input weight tensor of spectral_norm operator, "
"This can be a 2-D, 3-D, 4-D, 5-D tensor which is the "
"weights of fc, conv1d, conv2d, conv3d layer."
);
AddInput
(
"U"
,
"The weight_u tensor of spectral_norm operator, "
"This can be a 1-D tensor in shape [H, 1],"
"H is the 1st dimentions of Weight after reshape"
"corresponding by Attr(dim). As for Attr(dim) = 1"
"in conv2d layer with weight shape [M, C, K1, K2]"
"Weight will be reshape to [C, M*K1*K2], U will"
"be in shape [C, 1]."
);
AddInput
(
"V"
,
"The weight_v tensor of spectral_norm operator, "
"This can be a 1-D tensor in shape [W, 1], "
"W is the 2nd dimentions of Weight after reshape "
"corresponding by Attr(dim). As for Attr(dim) = 1 "
"in conv2d layer with weight shape [M, C, K1, K2] "
"Weight will be reshape to [C, M*K1*K2], V will "
"be in shape [M*K1*K2, 1]."
);
AddOutput
(
"Out"
,
"The output weight tensor of spectral_norm operator, "
"This tensor is in same shape with Input(Weight)."
);
AddAttr
<
int
>
(
"dim"
,
"The index of dimension which should be permuted "
"to the first before reshaping Input(Weight) to "
"matrix, it should be set as 0 if Input(Weight) is "
"the weight of fc layer, and should be set as 1 if "
"Input(Weight) is the weight of conv layer, "
"default 0."
)
.
SetDefault
(
0
);
AddAttr
<
int
>
(
"power_iters"
,
"number of power iterations to calculate "
"spectral norm, default 1."
)
.
SetDefault
(
1
);
AddAttr
<
float
>
(
"eps"
,
"epsilon for numerical stability in "
"calculating norms"
)
.
SetDefault
(
1e-12
);
AddComment
(
R"DOC(
This layer calculates the spectral normalization value of weight of
fc, conv1d, conv2d, conv3d layers which should be 2-D, 3-D, 4-D, 5-D
tensor.
Spectral normalization stabilizes the training of critic in GANs
(Generative Adversarial Networks). This layer rescaling weight tensor
with spectral normalize value.
For spectral normalization calculations, we rescaling weight
tensor with :math:`\sigma`, while :math:`\sigma{\mathbf{W}}` is
$$\sigma(\mathbf{W}) = \max_{\mathbf{h}: \mathbf{h} \ne 0} \\frac{\|\mathbf{W} \mathbf{h}\|_2}{\|\mathbf{h}\|_2}$$
We calculate :math:`\sigma{\mathbf{W}}` through power iterations as
$$
\mathbf{v} = \mathbf{W}^{T} \mathbf{u}
$$
$$
\mathbf{v} = \\frac{\mathbf{v}}{\|\mathbf{v}\|_2}
$$
$$
\mathbf{u} = \mathbf{W}^{T} \mathbf{v}
$$
$$
\mathbf{u} = \\frac{\mathbf{u}}{\|\mathbf{u}\|_2}
$$
And :math:`\sigma` should be
$$\sigma{\mathbf{W}} = \mathbf{u}^{T} \mathbf{W} \mathbf{v}$$
For details of spectral normalization, please refer to paper:
`Spectral Normalization <https://arxiv.org/abs/1802.05957>`_ .
)DOC"
);
}
};
class
SpectralNormOpGrad
:
public
framework
::
OperatorWithKernel
{
public:
using
framework
::
OperatorWithKernel
::
OperatorWithKernel
;
protected:
void
InferShape
(
framework
::
InferShapeContext
*
ctx
)
const
override
{
PADDLE_ENFORCE
(
ctx
->
HasInput
(
"Weight"
),
"Input(Weight) should not be null"
);
PADDLE_ENFORCE
(
ctx
->
HasInput
(
"U"
),
"Input(U) should not be null"
);
PADDLE_ENFORCE
(
ctx
->
HasInput
(
"V"
),
"Input(V) should not be null"
);
PADDLE_ENFORCE
(
ctx
->
HasInput
(
framework
::
GradVarName
(
"Out"
)),
"Input(Out@GRAD) should not be null"
);
auto
dim_x
=
ctx
->
GetInputDim
(
"Weight"
);
if
(
ctx
->
HasOutput
(
framework
::
GradVarName
(
"Weight"
)))
{
ctx
->
SetOutputDim
(
framework
::
GradVarName
(
"Weight"
),
dim_x
);
}
}
framework
::
OpKernelType
GetExpectedKernelType
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
return
framework
::
OpKernelType
(
ctx
.
Input
<
Tensor
>
(
"Weight"
)
->
type
(),
ctx
.
GetPlace
());
}
};
}
// namespace operators
}
// namespace paddle
namespace
ops
=
paddle
::
operators
;
REGISTER_OPERATOR
(
spectral_norm
,
ops
::
SpectralNormOp
,
ops
::
SpectralNormOpMaker
,
paddle
::
framework
::
DefaultGradOpDescMaker
<
true
>
);
REGISTER_OPERATOR
(
spectral_norm_grad
,
ops
::
SpectralNormOpGrad
);
REGISTER_OP_CPU_KERNEL
(
spectral_norm
,
ops
::
SpectralNormKernel
<
paddle
::
platform
::
CPUDeviceContext
,
float
>
,
ops
::
SpectralNormKernel
<
paddle
::
platform
::
CPUDeviceContext
,
double
>
);
REGISTER_OP_CPU_KERNEL
(
spectral_norm_grad
,
ops
::
SpectralNormGradKernel
<
paddle
::
platform
::
CPUDeviceContext
,
float
>
,
ops
::
SpectralNormGradKernel
<
paddle
::
platform
::
CPUDeviceContext
,
double
>
);
paddle/fluid/operators/spectral_norm_op.cu
0 → 100644
浏览文件 @
847bb6a2
/* Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/operators/spectral_norm_op.h"
namespace
ops
=
paddle
::
operators
;
REGISTER_OP_CUDA_KERNEL
(
spectral_norm
,
ops
::
SpectralNormKernel
<
paddle
::
platform
::
CUDADeviceContext
,
float
>
,
ops
::
SpectralNormKernel
<
paddle
::
platform
::
CUDADeviceContext
,
double
>
);
REGISTER_OP_CUDA_KERNEL
(
spectral_norm_grad
,
ops
::
SpectralNormGradKernel
<
paddle
::
platform
::
CUDADeviceContext
,
float
>
,
ops
::
SpectralNormGradKernel
<
paddle
::
platform
::
CUDADeviceContext
,
double
>
);
paddle/fluid/operators/spectral_norm_op.h
0 → 100644
浏览文件 @
847bb6a2
/* Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include <vector>
#include "paddle/fluid/framework/eigen.h"
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/operators/math/blas.h"
#include "paddle/fluid/operators/math/math_function.h"
namespace
paddle
{
namespace
operators
{
template
<
typename
T
,
size_t
D
,
int
MajorType
=
Eigen
::
RowMajor
,
typename
IndexType
=
Eigen
::
DenseIndex
>
using
EigenTensor
=
framework
::
EigenTensor
<
T
,
D
,
MajorType
,
IndexType
>
;
using
Tensor
=
framework
::
Tensor
;
using
Array1
=
Eigen
::
DSizes
<
int64_t
,
1
>
;
using
Array2
=
Eigen
::
DSizes
<
int64_t
,
2
>
;
using
IndexPair
=
Eigen
::
IndexPair
<
int
>
;
template
<
typename
DeviceContext
,
typename
T
>
static
inline
void
TransCompute
(
const
int
rank
,
const
Tensor
&
in
,
Tensor
*
out
,
const
std
::
vector
<
int
>&
perm
,
const
DeviceContext
&
dev_ctx
)
{
if
(
rank
<=
1
||
rank
>
5
)
{
PADDLE_THROW
(
"Invalid weight rank."
);
}
switch
(
rank
)
{
case
2
:
math
::
Transpose
<
DeviceContext
,
T
,
2
>
trans2
;
trans2
(
dev_ctx
,
in
,
out
,
perm
);
break
;
case
3
:
math
::
Transpose
<
DeviceContext
,
T
,
3
>
trans3
;
trans3
(
dev_ctx
,
in
,
out
,
perm
);
break
;
case
4
:
math
::
Transpose
<
DeviceContext
,
T
,
4
>
trans4
;
trans4
(
dev_ctx
,
in
,
out
,
perm
);
break
;
case
5
:
math
::
Transpose
<
DeviceContext
,
T
,
5
>
trans5
;
trans5
(
dev_ctx
,
in
,
out
,
perm
);
break
;
default:
break
;
}
}
template
<
typename
DeviceContext
,
typename
T
>
static
inline
void
CalcMatrixSigmaAndNormWeight
(
Tensor
*
sigma
,
Tensor
*
u
,
Tensor
*
v
,
Tensor
*
weight
,
const
int
power_iters
,
const
float
eps
,
const
framework
::
ExecutionContext
&
ctx
)
{
auto
&
place
=
*
ctx
.
template
device_context
<
DeviceContext
>().
eigen_device
();
auto
blas
=
math
::
GetBlas
<
DeviceContext
,
T
>
(
ctx
);
auto
sigma_t
=
EigenTensor
<
T
,
2
>::
From
(
*
sigma
);
auto
weight_t
=
EigenTensor
<
T
,
2
>::
From
(
*
weight
);
auto
u_t
=
EigenTensor
<
T
,
2
>::
From
(
*
u
);
auto
v_t
=
EigenTensor
<
T
,
2
>::
From
(
*
v
);
const
int
h
=
weight
->
dims
()[
0
];
const
int
w
=
weight
->
dims
()[
1
];
for
(
int
i
=
0
;
i
<
power_iters
;
i
++
)
{
// V = W^T * U / ||W^T * U||_2
blas
.
MatMul
(
*
weight
,
true
,
*
u
,
false
,
T
(
1
),
v
,
T
(
0
));
auto
v_t_norm
=
v_t
.
square
().
sum
().
sqrt
().
eval
().
reshape
(
Array1
(
1
)).
broadcast
(
Array1
(
w
));
v_t
.
device
(
place
)
=
v_t
/
(
v_t_norm
+
v_t_norm
.
constant
(
eps
));
// U = W^T * V / ||W^T * V||_2
blas
.
MatMul
(
*
weight
,
false
,
*
v
,
false
,
T
(
1
),
u
,
T
(
0
));
auto
u_t_norm
=
u_t
.
square
().
sum
().
sqrt
().
eval
().
reshape
(
Array1
(
1
)).
broadcast
(
Array1
(
h
));
u_t
.
device
(
place
)
=
u_t
/
(
u_t_norm
+
u_t_norm
.
constant
(
eps
));
}
Tensor
weight_v
;
weight_v
.
mutable_data
<
T
>
({
h
,
1
},
ctx
.
GetPlace
());
blas
.
MatMul
(
*
weight
,
false
,
*
v
,
false
,
T
(
1
),
&
weight_v
,
T
(
0
));
auto
weight_v_t
=
EigenTensor
<
T
,
2
>::
From
(
weight_v
);
sigma_t
.
device
(
place
)
=
(
u_t
*
weight_v_t
)
.
sum
()
.
eval
()
.
reshape
(
Array2
(
1
,
1
))
.
broadcast
(
Array2
(
h
,
w
));
weight_t
.
device
(
place
)
=
weight_t
/
sigma_t
;
}
template
<
typename
DeviceContext
,
typename
T
>
class
SpectralNormKernel
:
public
framework
::
OpKernel
<
T
>
{
public:
void
Compute
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
auto
&
dev_ctx
=
ctx
.
template
device_context
<
DeviceContext
>();
auto
weight
=
ctx
.
Input
<
Tensor
>
(
"Weight"
);
auto
u
=
ctx
.
Input
<
Tensor
>
(
"U"
);
auto
v
=
ctx
.
Input
<
Tensor
>
(
"V"
);
auto
out
=
ctx
.
Output
<
Tensor
>
(
"Out"
);
int
dim
=
ctx
.
Attr
<
int
>
(
"dim"
);
int
power_iters
=
ctx
.
Attr
<
int
>
(
"power_iters"
);
float
eps
=
ctx
.
Attr
<
float
>
(
"eps"
);
const
int
h
=
u
->
dims
()[
0
];
const
int
w
=
v
->
dims
()[
0
];
Tensor
weight_mat
;
auto
dims
=
weight
->
dims
();
const
int
rank
=
dims
.
size
();
std
::
vector
<
int
>
real_dims
;
if
(
dim
!=
0
)
{
std
::
vector
<
int
>
perm
;
perm
.
push_back
(
dim
);
real_dims
.
push_back
(
dims
[
dim
]);
for
(
int
i
=
0
;
i
<
rank
;
i
++
)
{
if
(
i
!=
dim
)
{
perm
.
push_back
(
i
);
real_dims
.
push_back
(
dims
[
i
]);
}
}
weight_mat
.
mutable_data
<
T
>
(
framework
::
make_ddim
(
real_dims
),
ctx
.
GetPlace
());
TransCompute
<
DeviceContext
,
T
>
(
rank
,
*
weight
,
&
weight_mat
,
perm
,
dev_ctx
);
}
else
{
for
(
int
i
=
0
;
i
<
rank
;
i
++
)
{
real_dims
.
push_back
(
i
);
}
TensorCopySync
(
*
weight
,
ctx
.
GetPlace
(),
&
weight_mat
);
}
weight_mat
=
weight_mat
.
Resize
({
h
,
w
});
Tensor
sigma
;
sigma
.
mutable_data
<
T
>
(
weight_mat
.
dims
(),
ctx
.
GetPlace
());
Tensor
uu
,
vv
;
TensorCopySync
(
*
u
,
ctx
.
GetPlace
(),
&
uu
);
TensorCopySync
(
*
v
,
ctx
.
GetPlace
(),
&
vv
);
CalcMatrixSigmaAndNormWeight
<
DeviceContext
,
T
>
(
&
sigma
,
&
(
uu
.
Resize
({
h
,
1
})),
&
(
vv
.
Resize
({
w
,
1
})),
&
weight_mat
,
power_iters
,
eps
,
ctx
);
if
(
dim
!=
0
)
{
std
::
vector
<
int
>
perm
;
for
(
int
i
=
0
;
i
<
rank
;
i
++
)
{
if
(
i
<
dim
)
{
perm
.
push_back
(
i
+
1
);
}
else
if
(
i
==
dim
)
{
perm
.
push_back
(
0
);
}
else
{
perm
.
push_back
(
i
);
}
}
out
->
mutable_data
<
T
>
(
dims
,
ctx
.
GetPlace
());
TransCompute
<
DeviceContext
,
T
>
(
rank
,
weight_mat
.
Resize
(
framework
::
make_ddim
(
real_dims
)),
out
,
perm
,
dev_ctx
);
}
else
{
TensorCopySync
(
weight_mat
.
Resize
(
dims
),
ctx
.
GetPlace
(),
out
);
}
}
};
template
<
typename
DeviceContext
,
typename
T
>
class
SpectralNormGradKernel
:
public
framework
::
OpKernel
<
T
>
{
public:
void
Compute
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
auto
&
place
=
*
ctx
.
template
device_context
<
DeviceContext
>().
eigen_device
();
auto
&
dev_ctx
=
ctx
.
template
device_context
<
DeviceContext
>();
auto
blas
=
math
::
GetBlas
<
DeviceContext
,
T
>
(
ctx
);
auto
weight
=
ctx
.
Input
<
Tensor
>
(
"Weight"
);
auto
u
=
ctx
.
Input
<
Tensor
>
(
"U"
);
auto
v
=
ctx
.
Input
<
Tensor
>
(
"V"
);
auto
out_grad
=
ctx
.
Input
<
Tensor
>
(
framework
::
GradVarName
(
"Out"
));
auto
weight_grad
=
ctx
.
Output
<
Tensor
>
(
framework
::
GradVarName
(
"Weight"
));
int
dim
=
ctx
.
Attr
<
int
>
(
"dim"
);
int
power_iters
=
ctx
.
Attr
<
int
>
(
"power_iters"
);
float
eps
=
ctx
.
Attr
<
float
>
(
"eps"
);
const
int
h
=
u
->
dims
()[
0
];
const
int
w
=
v
->
dims
()[
0
];
Tensor
weight_mat
,
out_grad_mat
;
auto
dims
=
weight
->
dims
();
const
int
rank
=
dims
.
size
();
std
::
vector
<
int
>
real_dims
;
if
(
dim
!=
0
)
{
std
::
vector
<
int
>
perm
;
perm
.
push_back
(
dim
);
real_dims
.
push_back
(
dims
[
dim
]);
for
(
int
i
=
0
;
i
<
rank
;
i
++
)
{
if
(
i
!=
dim
)
{
perm
.
push_back
(
i
);
real_dims
.
push_back
(
dims
[
i
]);
}
}
weight_mat
.
mutable_data
<
T
>
(
framework
::
make_ddim
(
real_dims
),
ctx
.
GetPlace
());
out_grad_mat
.
mutable_data
<
T
>
(
framework
::
make_ddim
(
real_dims
),
ctx
.
GetPlace
());
TransCompute
<
DeviceContext
,
T
>
(
rank
,
*
weight
,
&
weight_mat
,
perm
,
dev_ctx
);
TransCompute
<
DeviceContext
,
T
>
(
rank
,
*
out_grad
,
&
out_grad_mat
,
perm
,
dev_ctx
);
}
else
{
for
(
int
i
=
0
;
i
<
rank
;
i
++
)
{
real_dims
.
push_back
(
i
);
}
TensorCopySync
(
*
weight
,
ctx
.
GetPlace
(),
&
weight_mat
);
TensorCopySync
(
*
out_grad
,
ctx
.
GetPlace
(),
&
out_grad_mat
);
}
weight_mat
=
weight_mat
.
Resize
({
h
,
w
});
out_grad_mat
=
out_grad_mat
.
Resize
({
h
,
w
});
Tensor
sigma
;
sigma
.
mutable_data
<
T
>
(
weight_mat
.
dims
(),
ctx
.
GetPlace
());
Tensor
uu
,
vv
;
TensorCopySync
(
*
u
,
ctx
.
GetPlace
(),
&
uu
);
TensorCopySync
(
*
v
,
ctx
.
GetPlace
(),
&
vv
);
CalcMatrixSigmaAndNormWeight
<
DeviceContext
,
T
>
(
&
sigma
,
&
(
uu
.
Resize
({
h
,
1
})),
&
(
vv
.
Resize
({
w
,
1
})),
&
weight_mat
,
power_iters
,
eps
,
ctx
);
Tensor
uv
;
uv
.
mutable_data
<
T
>
({
h
,
w
},
ctx
.
GetPlace
());
blas
.
MatMul
(
uu
.
Resize
({
h
,
1
}),
false
,
vv
.
Resize
({
w
,
1
}),
false
,
T
(
1
),
&
uv
,
T
(
0
));
Tensor
weight_grad_mat
;
weight_grad_mat
.
mutable_data
<
T
>
({
h
,
w
},
ctx
.
GetPlace
());
auto
weight_grad_mat_t
=
EigenTensor
<
T
,
2
>::
From
(
weight_grad_mat
);
auto
weight_mat_t
=
EigenTensor
<
T
,
2
>::
From
(
weight_mat
);
auto
out_grad_mat_t
=
EigenTensor
<
T
,
2
>::
From
(
out_grad_mat
);
auto
sigma_t
=
EigenTensor
<
T
,
2
>::
From
(
sigma
);
auto
uv_t
=
EigenTensor
<
T
,
2
>::
From
(
uv
);
weight_mat_t
.
device
(
place
)
=
weight_mat_t
.
sum
().
eval
().
reshape
(
Array2
(
1
,
1
)).
broadcast
(
Array2
(
h
,
w
));
weight_grad_mat_t
.
device
(
place
)
=
out_grad_mat_t
*
(
out_grad_mat_t
.
constant
(
1.0
)
-
uv_t
*
weight_mat_t
)
/
sigma_t
;
if
(
dim
!=
0
)
{
std
::
vector
<
int
>
perm
;
for
(
int
i
=
0
;
i
<
rank
;
i
++
)
{
if
(
i
<
dim
)
{
perm
.
push_back
(
i
+
1
);
}
else
if
(
i
==
dim
)
{
perm
.
push_back
(
0
);
}
else
{
perm
.
push_back
(
i
);
}
}
weight_grad
->
mutable_data
<
T
>
(
dims
,
ctx
.
GetPlace
());
TransCompute
<
DeviceContext
,
T
>
(
rank
,
weight_grad_mat
.
Resize
(
framework
::
make_ddim
(
real_dims
)),
weight_grad
,
perm
,
dev_ctx
);
}
else
{
TensorCopySync
(
weight_grad_mat
.
Resize
(
dims
),
ctx
.
GetPlace
(),
weight_grad
);
}
}
};
}
// namespace operators
}
// namespace paddle
python/paddle/fluid/__init__.py
浏览文件 @
847bb6a2
...
...
@@ -131,7 +131,8 @@ def __bootstrap__():
'fast_eager_deletion_mode'
,
'allocator_strategy'
,
'reader_queue_speed_test_mode'
,
'print_sub_graph_dir'
,
'pe_profile_fname'
,
'warpctc_dir'
,
'inner_op_parallelism'
,
'enable_parallel_graph'
,
'multiple_of_cupti_buffer_size'
'enable_parallel_graph'
,
'multiple_of_cupti_buffer_size'
,
'enable_subgraph_optimize'
]
if
'Darwin'
not
in
sysstr
:
read_env_flags
.
append
(
'use_pinned_memory'
)
...
...
python/paddle/fluid/compiler.py
浏览文件 @
847bb6a2
...
...
@@ -206,12 +206,12 @@ class CompiledProgram(object):
# FIXME(dzhwinter): enable_inplace should be after memory_optimize
# if turn on python memory optimize, turn off the inplace_pass.
if
self
.
_build_strategy
.
memory_optimize
is
None
:
self
.
_build_strategy
.
memory_optimize
=
False
\
if
self
.
_program
and
self
.
_program
.
_is_mem_optimized
else
Tru
e
if
self
.
_build_strategy
.
enable_inplace
is
None
:
self
.
_build_strategy
.
enable_inplace
=
False
\
if
self
.
_program
and
self
.
_program
.
_is_mem_optimized
else
Tru
e
# memory_optimize and enable_inplace default are True, but we can disable them on purpose
if
self
.
_program
and
self
.
_program
.
_is_mem_optimized
:
self
.
_build_strategy
.
memory_optimize
=
Fals
e
if
self
.
_program
and
self
.
_program
.
_is_mem_optimized
:
self
.
_build_strategy
.
enable_inplace
=
Fals
e
# TODO(wuyi): trainer endpoings should be passed in through
# build_strategy, not program.xxx.
...
...
python/paddle/fluid/imperative/layer_object_helper.py
0 → 100644
浏览文件 @
847bb6a2
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from
__future__
import
print_function
import
copy
import
six
from
..framework
import
Parameter
,
_in_imperative_mode
from
..param_attr
import
ParamAttr
from
..
import
core
from
six.moves
import
zip
from
..layer_helper_base
import
LayerHelperBase
class
LayerObjectHelper
(
LayerHelperBase
):
def
__init__
(
self
,
name
):
super
(
LayerObjectHelper
,
self
).
__init__
(
name
,
layer_type
=
name
)
def
append_op
(
self
,
type
=
None
,
inputs
=
None
,
outputs
=
None
,
attrs
=
None
,
stop_gradient
=
None
):
"""append an operator for this layer object.
Args:
type: operator type
inputs: input variable of the operator
dtype: data type of this parameter
is_bias: if this is a bias parameter
default_initializer: set the default initializer for this parameter
Returns created parameter Variable.
"""
return
self
.
main_program
.
current_block
().
append_op
(
type
=
type
,
inputs
=
inputs
,
outputs
=
outputs
,
attrs
=
attrs
,
stop_gradient
=
stop_gradient
)
def
_multiple_input
(
self
,
inputs_in
):
inputs
=
inputs_in
ret
=
[]
if
isinstance
(
inputs
,
(
list
,
tuple
)):
for
inp
in
inputs
:
ret
.
append
(
self
.
to_variable
(
inp
))
else
:
ret
.
append
(
self
.
to_variable
(
inputs
))
return
ret
# TODO: make it public when we need it
def
_input
(
self
,
inputs_in
):
inputs
=
self
.
_multiple_input
(
inputs_in
)
if
len
(
inputs
)
!=
1
:
raise
"{0} layer only takes one input"
.
format
(
self
.
layer_type
)
return
inputs
[
0
]
def
_multiple_param_attr
(
self
,
length
,
param_attr_in
=
None
):
param_attr
=
param_attr_in
if
isinstance
(
param_attr
,
ParamAttr
):
param_attr
=
[
param_attr
]
if
len
(
param_attr
)
!=
1
and
len
(
param_attr
)
!=
length
:
raise
ValueError
(
"parameter number mismatch"
)
elif
len
(
param_attr
)
==
1
and
length
!=
1
:
tmp
=
[
None
]
*
length
for
i
in
six
.
moves
.
range
(
length
):
tmp
[
i
]
=
copy
.
deepcopy
(
param_attr
[
0
])
param_attr
=
tmp
return
param_attr
def
iter_inputs_and_params
(
self
,
inputs_in
,
param_attr_in
=
None
):
"""Access all inputs and params one by one
Args:
inputs_in: inputs to be iter
param_attr_in: param_attr to be iter
Returns input, param_attr
"""
inputs
=
inputs_in
if
(
inputs_in
is
not
None
)
else
[]
inputs
=
self
.
_multiple_input
(
inputs
)
param_attrs
=
self
.
_multiple_param_attr
(
len
(
inputs
),
param_attr_in
)
for
ipt
,
param_attr
in
zip
(
inputs
,
param_attrs
):
yield
ipt
,
param_attr
def
input_dtype
(
self
,
inputs_in
):
"""Get input data type
Args:
inputs_in: inputs wanted know the data type
Returns dtype of the input
"""
inputs
=
self
.
_multiple_input
(
inputs_in
)
dtype
=
None
for
each
in
inputs
:
if
dtype
is
None
:
dtype
=
each
.
dtype
elif
dtype
!=
each
.
dtype
:
raise
ValueError
(
"Data Type mismatch: %d to %d"
%
(
dtype
,
each
.
dtype
))
return
dtype
def
get_parameter
(
self
,
name
):
"""Get parameter specifically
Args:
name: parameter's name
Returns target parameter
"""
param
=
self
.
main_program
.
global_block
().
var
(
name
)
if
not
isinstance
(
param
,
Parameter
):
raise
ValueError
(
"no Parameter name %s found"
%
name
)
return
param
def
append_bias_op
(
self
,
input_var
,
dim_start
=
1
,
dim_end
=
None
,
bias_attr
=
None
):
"""Append bias operator and return its output. If the user does not set bias_attr, append_bias_op will return input_var
Args:
input_var: the input variable. The len(input_var.shape) is
larger or equal than 2.
dim_start:
dim_end: the shape of the bias will be
bias_attr: the bias_attr of it
Return the Variable of after append bias op
"""
size
=
list
(
input_var
.
shape
[
dim_start
:
dim_end
])
bias_attr
=
bias_attr
if
not
bias_attr
:
return
input_var
b
=
self
.
create_parameter
(
attr
=
bias_attr
,
shape
=
size
,
dtype
=
input_var
.
dtype
,
is_bias
=
True
)
tmp
=
self
.
create_variable_for_type_inference
(
dtype
=
input_var
.
dtype
)
self
.
append_op
(
type
=
'elementwise_add'
,
inputs
=
{
'X'
:
[
input_var
],
'Y'
:
[
b
]},
outputs
=
{
'Out'
:
[
tmp
]},
attrs
=
{
'axis'
:
dim_start
})
return
tmp
# TODO: this should not be called anymore after all activation func move to Layers
def
append_activation
(
self
,
input_var
,
act
=
None
,
use_cudnn
=
None
,
use_mkl_dnn
=
None
):
"""Append activation
Args:
input_var: the input variable. The len(input_var.shape) is
larger or equal than 2.
act: activation type
use_mkl_dnn: if use mkldnn
use_cudnn: if use cudnn
Return the Variable of after append activation
"""
act
=
act
if
act
is
None
:
return
input_var
if
isinstance
(
act
,
six
.
string_types
):
act
=
{
'type'
:
act
}
else
:
raise
TypeError
(
str
(
act
)
+
" should be unicode or str"
)
if
(
use_cudnn
is
not
None
)
and
use_cudnn
:
act
[
'use_cudnn'
]
=
use_cudnn
if
(
use_mkl_dnn
is
not
None
)
and
use_mkl_dnn
:
act
[
'use_mkldnn'
]
=
use_mkl_dnn
act_type
=
act
.
pop
(
'type'
)
tmp
=
input_var
# NOTE(dzhwinter): some activation support inplace compution.
# NOTE(minqiyang): currently, we don't support inplace in imperative mode
if
not
_in_imperative_mode
()
and
core
.
IsInplace
(
act_type
):
tmp
=
input_var
else
:
tmp
=
self
.
create_variable_for_type_inference
(
dtype
=
input_var
.
dtype
)
self
.
append_op
(
type
=
act_type
,
inputs
=
{
"X"
:
[
input_var
]},
outputs
=
{
"Out"
:
[
tmp
]},
attrs
=
act
)
return
tmp
def
is_instance
(
self
,
param
,
cls
):
"""Check if the input parameter is instance of input class
Args:
param: parameter to be check
cls: class of the parameter
Return result of the check (True or False)
"""
param
=
param
if
not
isinstance
(
param
,
cls
):
raise
TypeError
(
"The input {0} parameter of method {1} must be {2}"
,
param
,
self
.
layer_type
,
cls
.
__name__
)
python/paddle/fluid/imperative/layers.py
浏览文件 @
847bb6a2
...
...
@@ -19,8 +19,8 @@ import numpy as np
import
collections
from
..
import
unique_name
from
paddle.fluid
import
core
from
.layer_object_helper
import
LayerObjectHelper
from
paddle.fluid
import
framework
from
paddle.fluid.imperative
import
base
__all__
=
[
'Layer'
,
'PyLayer'
]
...
...
@@ -44,6 +44,8 @@ class Layer(core.Layer):
self
.
_parameters
=
collections
.
OrderedDict
()
self
.
_sub_layers
=
collections
.
OrderedDict
()
self
.
_helper
=
LayerObjectHelper
(
self
.
_full_name
)
def
full_name
(
self
):
"""Full name for this layers.
...
...
@@ -53,6 +55,51 @@ class Layer(core.Layer):
"""
return
self
.
_full_name
def
create_parameter
(
self
,
attr
,
shape
,
dtype
,
is_bias
=
False
,
default_initializer
=
None
):
"""Create parameters for this layers.
Args:
attr: [ParamAttr] should be the parameter attribute for this parameter
shape: shape of the paramter
dtype: data type of this parameter
is_bias: if this is a bias parameter
default_initializer: set the default initializer for this parameter
Returns created parameter Variable.
"""
return
self
.
_helper
.
create_parameter
(
attr
,
shape
,
dtype
,
is_bias
,
default_initializer
)
# TODO: Add more parameter list when we need them
def
create_variable
(
self
,
name
=
None
,
persistable
=
None
,
dtype
=
None
,
type
=
core
.
VarDesc
.
VarType
.
LOD_TENSOR
):
"""Create Variable for this layers.
Args:
name: name of the variable
persistable: if set this variable persistable
dtype: data type of data in the variable
type: type of the variable
Returns created Variable.
"""
if
name
is
not
None
:
var_name
=
"."
.
join
([
self
.
_full_name
,
name
])
else
:
var_name
=
unique_name
.
generate
(
"."
.
join
(
[
self
.
_full_name
,
"_generated_var"
]))
return
self
.
_helper
.
main_program
.
current_block
().
create_var
(
name
=
var_name
,
persistable
=
persistable
,
dtype
=
dtype
,
type
=
type
)
def
parameters
(
self
,
include_sublayers
=
True
):
"""Returns a list of Parameters from current and sub-layers.
...
...
python/paddle/fluid/imperative/nn.py
浏览文件 @
847bb6a2
...
...
@@ -41,21 +41,12 @@ class Conv2D(layers.Layer):
bias_attr
=
None
,
dtype
=
core
.
VarDesc
.
VarType
.
FP32
):
assert
param_attr
is
not
False
,
"param_attr should not be False here."
super
(
Conv2D
,
self
).
__init__
(
name_scope
,
dtype
=
dtype
)
# TODO(minqiyang): Move this to the top.
from
..layer_helper
import
LayerHelper
self
.
_helper
=
LayerHelper
(
self
.
full_name
(),
param_attr
=
param_attr
,
bias_attr
=
bias_attr
,
dtype
=
dtype
,
act
=
act
)
super
(
Conv2D
,
self
).
__init__
(
name_scope
)
self
.
_groups
=
groups
self
.
_stride
=
utils
.
convert_to_list
(
stride
,
2
,
'stride'
)
self
.
_padding
=
utils
.
convert_to_list
(
padding
,
2
,
'padding'
)
self
.
_dilation
=
utils
.
convert_to_list
(
dilation
,
2
,
'dilation'
)
self
.
_act
=
act
if
not
isinstance
(
use_cudnn
,
bool
):
raise
ValueError
(
"use_cudnn should be True or False"
)
self
.
_use_cudnn
=
use_cudnn
...
...
@@ -80,28 +71,28 @@ class Conv2D(layers.Layer):
std
=
(
2.0
/
filter_elem_num
)
**
0.5
return
Normal
(
0.0
,
std
,
0
)
self
.
_filter_param
=
self
.
_helper
.
create_parameter
(
attr
=
self
.
_helper
.
param_attr
,
self
.
_filter_param
=
self
.
create_parameter
(
attr
=
param_attr
,
shape
=
filter_shape
,
dtype
=
self
.
_dtype
,
default_initializer
=
_get_default_param_initializer
())
if
self
.
_use_cudnn
:
self
.
_helper
.
create_variable
(
self
.
create_variable
(
name
=
"kCUDNNFwdAlgoCache"
,
persistable
=
True
,
type
=
core
.
VarDesc
.
VarType
.
RAW
)
self
.
_helper
.
create_variable
(
self
.
create_variable
(
name
=
"kCUDNNBwdDataAlgoCache"
,
persistable
=
True
,
type
=
core
.
VarDesc
.
VarType
.
RAW
)
self
.
_helper
.
create_variable
(
self
.
create_variable
(
name
=
"kCUDNNBwdFilterAlgoCache"
,
persistable
=
True
,
type
=
core
.
VarDesc
.
VarType
.
RAW
)
self
.
_bias_param
=
self
.
_helper
.
create_parameter
(
attr
=
self
.
_helper
.
bias_attr
,
self
.
_bias_param
=
self
.
create_parameter
(
attr
=
bias_attr
,
shape
=
[
num_filters
],
dtype
=
self
.
_dtype
,
is_bias
=
True
)
...
...
@@ -137,7 +128,7 @@ class Conv2D(layers.Layer):
attrs
=
{
'axis'
:
1
})
# Currently, we don't support inplace in imperative mode
return
self
.
_helper
.
append_activation
(
pre_act
)
return
self
.
_helper
.
append_activation
(
pre_act
,
act
=
self
.
_act
)
class
Pool2D
(
layers
.
Layer
):
...
...
@@ -167,9 +158,6 @@ class Pool2D(layers.Layer):
super
(
Pool2D
,
self
).
__init__
(
name_scope
,
dtype
=
dtype
)
from
..layer_helper
import
LayerHelper
self
.
_helper
=
LayerHelper
(
self
.
full_name
(),
dtype
=
dtype
)
self
.
_pool_type
=
pool_type
self
.
_pool_size
=
utils
.
convert_to_list
(
pool_size
,
2
,
'pool_size'
)
self
.
_pool_padding
=
utils
.
convert_to_list
(
pool_padding
,
2
,
...
...
@@ -216,28 +204,25 @@ class FC(layers.Layer):
self
.
_size
=
size
self
.
_num_flatten_dims
=
num_flatten_dims
self
.
_dtype
=
dtype
from
..layer_helper
import
LayerHelper
self
.
_helper
=
LayerHelper
(
self
.
full_name
(),
param_attr
=
param_attr
,
bias_attr
=
bias_attr
,
act
=
act
)
self
.
_param_attr
=
param_attr
self
.
_bias_attr
=
param_attr
self
.
_act
=
act
def
_build_once
(
self
,
input
):
input_shape
=
input
.
shape
param_shape
=
[
reduce
(
lambda
a
,
b
:
a
*
b
,
input_shape
[
self
.
_num_flatten_dims
:],
1
)
]
+
[
self
.
_size
]
self
.
_w
=
self
.
_helper
.
create_parameter
(
attr
=
self
.
_
helper
.
param_attr
,
self
.
_w
=
self
.
create_parameter
(
attr
=
self
.
_param_attr
,
shape
=
param_shape
,
dtype
=
self
.
_dtype
,
is_bias
=
False
)
if
self
.
_
helper
.
bias
_attr
:
if
self
.
_
param
_attr
:
size
=
list
([
self
.
_size
])
self
.
_b
=
self
.
_helper
.
create_parameter
(
attr
=
self
.
_
helper
.
bias
_attr
,
self
.
_b
=
self
.
create_parameter
(
attr
=
self
.
_
param
_attr
,
shape
=
size
,
dtype
=
self
.
_dtype
,
is_bias
=
True
)
...
...
@@ -275,7 +260,7 @@ class FC(layers.Layer):
else
:
pre_activation
=
pre_bias
# Currently, we don't support inplace in imperative mode
return
self
.
_helper
.
append_activation
(
pre_activation
)
return
self
.
_helper
.
append_activation
(
pre_activation
,
act
=
self
.
_act
)
class
BatchNorm
(
layers
.
Layer
):
...
...
@@ -297,16 +282,12 @@ class BatchNorm(layers.Layer):
fuse_with_relu
=
False
,
use_global_stats
=
False
):
super
(
BatchNorm
,
self
).
__init__
(
name_scope
)
self
.
_param_attr
=
param_attr
self
.
_param_attr
=
bias_attr
self
.
_act
=
act
assert
bias_attr
is
not
False
,
"bias_attr should not be False in batch_norm."
from
..layer_helper
import
LayerHelper
self
.
_helper
=
LayerHelper
(
self
.
full_name
(),
param_attr
=
param_attr
,
bias_attr
=
bias_attr
,
act
=
act
)
if
dtype
==
core
.
VarDesc
.
VarType
.
FP16
:
self
.
_dtype
=
core
.
VarDesc
.
VarType
.
FP32
else
:
...
...
@@ -315,23 +296,23 @@ class BatchNorm(layers.Layer):
param_shape
=
[
num_channels
]
# create parameter
self
.
_scale
=
self
.
_helper
.
create_parameter
(
attr
=
self
.
_
helper
.
param_attr
,
self
.
_scale
=
self
.
create_parameter
(
attr
=
self
.
_param_attr
,
shape
=
param_shape
,
dtype
=
self
.
_dtype
,
default_initializer
=
Constant
(
1.0
))
if
use_global_stats
and
self
.
_
helper
.
param_attr
.
learning_rate
==
0.
:
if
use_global_stats
and
self
.
_param_attr
.
learning_rate
==
0.
:
self
.
_scale
.
_stop_gradient
=
True
self
.
_bias
=
self
.
_helper
.
create_parameter
(
attr
=
self
.
_
helper
.
bias
_attr
,
self
.
_bias
=
self
.
create_parameter
(
attr
=
self
.
_
param
_attr
,
shape
=
param_shape
,
dtype
=
self
.
_dtype
,
is_bias
=
True
)
if
use_global_stats
and
self
.
_
helper
.
bias
_attr
.
learning_rate
==
0.
:
if
use_global_stats
and
self
.
_
param
_attr
.
learning_rate
==
0.
:
self
.
_bias
.
_stop_gradient
=
True
self
.
_mean
=
self
.
_helper
.
create_parameter
(
self
.
_mean
=
self
.
create_parameter
(
attr
=
ParamAttr
(
name
=
moving_mean_name
,
initializer
=
Constant
(
0.0
),
...
...
@@ -341,7 +322,7 @@ class BatchNorm(layers.Layer):
dtype
=
self
.
_dtype
)
self
.
_mean
.
_stop_gradient
=
True
self
.
_variance
=
self
.
_helper
.
create_parameter
(
self
.
_variance
=
self
.
create_parameter
(
attr
=
ParamAttr
(
name
=
moving_variance_name
,
initializer
=
Constant
(
1.0
),
...
...
@@ -401,7 +382,7 @@ class BatchNorm(layers.Layer):
})
# Currently, we don't support inplace in imperative mode
return
self
.
_helper
.
append_activation
(
batch_norm_out
)
return
self
.
_helper
.
append_activation
(
batch_norm_out
,
self
.
_act
)
class
Embedding
(
layers
.
Layer
):
...
...
@@ -466,9 +447,7 @@ class Embedding(layers.Layer):
if
self
.
_remote_prefetch
:
assert
self
.
_is_sparse
is
True
and
self
.
_is_distributed
is
False
from
..layer_helper
import
LayerHelper
self
.
_helper
=
LayerHelper
(
self
.
full_name
(),
param_attr
=
param_attr
)
self
.
_w
=
self
.
_helper
.
create_parameter
(
self
.
_w
=
self
.
create_parameter
(
attr
=
self
.
_param_attr
,
shape
=
self
.
_size
,
dtype
=
self
.
_dtype
,
...
...
python/paddle/fluid/initializer.py
浏览文件 @
847bb6a2
...
...
@@ -19,7 +19,6 @@ import numpy as np
from
.wrapped_decorator
import
signature_safe_contextmanager
from
.core
import
VarDesc
from
.
import
unique_name
from
.imperative
import
base
as
imperative_base
__all__
=
[
'Constant'
,
'Uniform'
,
'Normal'
,
'TruncatedNormal'
,
'Xavier'
,
'Bilinear'
,
...
...
@@ -166,7 +165,7 @@ class ConstantInitializer(Initializer):
'force_cpu'
:
self
.
_force_cpu
or
force_init_on_cpu
()
},
stop_gradient
=
True
)
if
not
imperative_base
.
enabled
():
if
not
framework
.
_in_imperative_mode
():
var
.
op
=
op
return
op
...
...
@@ -246,7 +245,7 @@ class UniformInitializer(Initializer):
attrs
=
{
"in_dtype"
:
out_var
.
dtype
,
"out_dtype"
:
var
.
dtype
})
if
not
imperative_base
.
enabled
():
if
not
framework
.
_in_imperative_mode
():
var
.
op
=
op
return
op
...
...
@@ -325,7 +324,7 @@ class NormalInitializer(Initializer):
outputs
=
{
"Out"
:
var
},
attrs
=
{
"in_dtype"
:
out_var
.
dtype
,
"out_dtype"
:
var
.
dtype
})
if
not
imperative_base
.
enabled
():
if
not
framework
.
_in_imperative_mode
():
var
.
op
=
op
return
op
...
...
@@ -404,7 +403,7 @@ class TruncatedNormalInitializer(Initializer):
outputs
=
{
"Out"
:
var
},
attrs
=
{
"in_dtype"
:
out_var
.
dtype
,
"out_dtype"
:
var
.
dtype
})
if
not
imperative_base
.
enabled
():
if
not
framework
.
_in_imperative_mode
():
var
.
op
=
op
return
op
...
...
@@ -510,7 +509,7 @@ class XavierInitializer(Initializer):
"seed"
:
self
.
_seed
},
stop_gradient
=
True
)
if
not
imperative_base
.
enabled
():
if
not
framework
.
_in_imperative_mode
():
var
.
op
=
op
return
op
...
...
@@ -611,7 +610,7 @@ class MSRAInitializer(Initializer):
"seed"
:
self
.
_seed
},
stop_gradient
=
True
)
if
not
imperative_base
.
enabled
():
if
not
framework
.
_in_imperative_mode
():
var
.
op
=
op
return
op
...
...
@@ -710,7 +709,7 @@ class BilinearInitializer(Initializer):
'shape'
:
list
(
shape
),
value_name
:
values
})
if
not
imperative_base
.
enabled
():
if
not
framework
.
_in_imperative_mode
():
var
.
op
=
op
return
op
...
...
@@ -769,7 +768,7 @@ class NumpyArrayInitializer(Initializer):
value_name
:
values
},
stop_gradient
=
True
)
if
not
imperative_base
.
enabled
():
if
not
framework
.
_in_imperative_mode
():
var
.
op
=
op
return
op
...
...
python/paddle/fluid/layer_helper.py
浏览文件 @
847bb6a2
...
...
@@ -15,45 +15,29 @@
from
__future__
import
print_function
import
copy
import
itertools
import
six
import
sys
import
numpy
as
np
from
.framework
import
Variable
,
Parameter
,
default_main_program
,
default_startup_program
,
dtype_is_floating
,
_in_imperative_mode
from
.framework
import
Parameter
,
dtype_is_floating
,
_in_imperative_mode
from
.
import
unique_name
from
paddle.fluid.imperative
import
base
as
imperative_base
from
paddle.fluid.initializer
import
Constant
,
Xavier
from
.param_attr
import
ParamAttr
,
WeightNormParamAttr
from
.param_attr
import
ParamAttr
from
.
import
core
from
six.moves
import
zip
from
.layer_helper_base
import
LayerHelperBase
class
LayerHelper
(
object
):
class
LayerHelper
(
LayerHelperBase
):
def
__init__
(
self
,
layer_type
,
**
kwargs
):
self
.
kwargs
=
kwargs
self
.
layer_type
=
layer_type
name
=
self
.
kwargs
.
get
(
'name'
,
None
)
# TODO(panyx0718, minqiyang): imperative mode
# can not use both `layer_type` and `name`. Deprecate LayerHelper
# and write a Helper for imperative mode.
if
name
is
None
:
self
.
kwargs
[
'name'
]
=
unique_name
.
generate
(
self
.
layer_type
)
self
.
kwargs
[
'name'
]
=
unique_name
.
generate
(
layer_type
)
@
property
def
name
(
self
):
return
self
.
kwargs
[
'name'
]
@
property
def
main_program
(
self
):
return
default_main_program
()
@
property
def
startup_program
(
self
):
return
default_startup_program
()
def
to_variable
(
self
,
x
):
return
imperative_base
.
to_variable
(
x
,
self
.
main_program
.
current_block
())
super
(
LayerHelper
,
self
).
__init__
(
self
.
kwargs
[
'name'
],
layer_type
=
layer_type
)
def
append_op
(
self
,
*
args
,
**
kwargs
):
return
self
.
main_program
.
current_block
().
append_op
(
*
args
,
**
kwargs
)
...
...
@@ -82,6 +66,7 @@ class LayerHelper(object):
def
bias_attr
(
self
):
return
ParamAttr
.
_to_attr
(
self
.
kwargs
.
get
(
'bias_attr'
,
None
))
#TODO (jiabin): reconstruct this in LayerObjHelper and avoid dependency of param_attr
def
multiple_param_attr
(
self
,
length
):
param_attr
=
self
.
param_attr
if
isinstance
(
param_attr
,
ParamAttr
):
...
...
@@ -113,297 +98,13 @@ class LayerHelper(object):
(
dtype
,
each
.
dtype
))
return
dtype
def
_create_weight_normalize
(
self
,
attr
,
shape
,
dtype
):
from
.layers
import
elementwise_mul
,
elementwise_div
,
reshape
# Remove these ops when LayerHelper and layers support indicating
# program and block.
def
__norm_op
(
x
,
out
=
None
,
p
=
2
,
dim
=
None
,
keep_dim
=
False
,
block
=
self
.
startup_program
.
global_block
()):
if
out
is
None
:
out
=
block
.
create_var
(
name
=
unique_name
.
generate
(
"."
.
join
(
[
self
.
name
,
'weight_norm_norm'
])),
dtype
=
dtype
,
persistable
=
False
)
abs_out
=
block
.
create_var
(
name
=
unique_name
.
generate
(
"."
.
join
(
[
self
.
name
,
'weight_norm_abs'
])),
dtype
=
dtype
,
persistable
=
False
)
block
.
append_op
(
type
=
'abs'
,
inputs
=
{
'X'
:
x
},
outputs
=
{
'Out'
:
abs_out
})
pow_out
=
block
.
create_var
(
name
=
unique_name
.
generate
(
"."
.
join
(
[
self
.
name
,
'weight_norm_pow'
])),
dtype
=
dtype
,
persistable
=
False
)
block
.
append_op
(
type
=
'pow'
,
inputs
=
{
'X'
:
abs_out
},
outputs
=
{
'Out'
:
pow_out
},
attrs
=
{
'factor'
:
float
(
p
)})
sum_out
=
block
.
create_var
(
name
=
unique_name
.
generate
(
"."
.
join
(
[
self
.
name
,
'weight_norm_sum'
])),
dtype
=
dtype
,
persistable
=
False
)
block
.
append_op
(
type
=
'reduce_sum'
,
inputs
=
{
'X'
:
pow_out
},
outputs
=
{
'Out'
:
sum_out
},
attrs
=
{
'dim'
:
dim
,
'keep_dim'
:
keep_dim
,
'reduce_all'
:
True
if
dim
is
None
else
False
})
block
.
append_op
(
type
=
'pow'
,
inputs
=
{
'X'
:
sum_out
},
outputs
=
{
'Out'
:
out
},
attrs
=
{
'factor'
:
1.
/
p
})
return
out
def
__reshape_op
(
x
,
shape
,
out
=
None
,
block
=
self
.
startup_program
.
global_block
()):
if
out
is
None
:
out
=
block
.
create_var
(
name
=
unique_name
.
generate
(
"."
.
join
(
[
self
.
name
,
'weight_norm_reshape'
])),
dtype
=
dtype
,
persistable
=
False
)
block
.
append_op
(
type
=
'reshape'
,
inputs
=
{
'X'
:
x
},
outputs
=
{
'Out'
:
out
},
attrs
=
{
'shape'
:
shape
})
return
out
def
__transpose_op
(
x
,
axis
,
out
=
None
,
block
=
self
.
startup_program
.
global_block
()):
if
out
is
None
:
out
=
block
.
create_var
(
name
=
unique_name
.
generate
(
"."
.
join
(
[
self
.
name
,
'weight_norm_transpose'
])),
dtype
=
dtype
,
persistable
=
False
)
block
.
append_op
(
type
=
'transpose'
,
inputs
=
{
'X'
:
x
},
outputs
=
{
'Out'
:
out
},
attrs
=
{
'axis'
:
axis
})
return
out
def
__norm_except_dim
(
x
,
out
=
None
,
dim
=
None
,
block
=
self
.
startup_program
.
global_block
()):
"""Computes the norm over all dimensions except dim"""
if
out
is
None
:
out
=
block
.
create_var
(
name
=
unique_name
.
generate
(
"."
.
join
(
[
self
.
name
,
'weight_norm_norm'
])),
dtype
=
dtype
,
persistable
=
False
)
if
dim
is
None
:
__norm_op
(
x
,
out
,
dim
=
dim
,
block
=
block
)
elif
dim
==
0
:
out_shape
=
[
x
.
shape
[
0
]]
+
[
1
]
*
(
len
(
x
.
shape
)
-
1
)
reshape
=
__reshape_op
(
x
,
shape
=
[
x
.
shape
[
0
],
-
1
],
block
=
block
)
norm
=
__norm_op
(
reshape
,
dim
=
1
,
block
=
block
)
__reshape_op
(
norm
,
out
=
out
,
shape
=
out_shape
,
block
=
block
)
elif
dim
==
len
(
x
.
shape
)
-
1
:
out_shape
=
[
1
]
*
(
len
(
x
.
shape
)
-
1
)
+
[
x
.
shape
[
-
1
]]
reshape
=
__reshape_op
(
x
,
shape
=
[
-
1
,
x
.
shape
[
-
1
]],
block
=
block
)
norm
=
__norm_op
(
reshape
,
dim
=
0
,
block
=
block
)
__reshape_op
(
norm
,
out
=
out
,
shape
=
out_shape
,
block
=
block
)
else
:
perm
=
list
(
range
(
len
(
x
.
shape
)))
perm
[
0
],
perm
[
dim
]
=
dim
,
0
transpose
=
__transpose_op
(
x
,
perm
,
block
=
block
)
norm
=
__norm_op
(
transpose
,
dim
=
0
,
block
=
block
)
__transpose_op
(
norm
,
perm
,
out
=
out
,
block
=
block
)
return
out
def
__weight_normalize
(
g
,
v
,
dim
):
"""Calculations for weight normalization"""
norm
=
__norm_except_dim
(
v
,
dim
=
dim
,
block
=
self
.
main_program
.
current_block
())
scale
=
elementwise_div
(
x
=
g
,
y
=
norm
)
# The shapes of g and norm are the same.
# Currently, elementwise_mul only support broadcast when the shape
# of y is a subset of the shape of x. Thus, we reshape y to squeeze
# to achive the subset.
w
=
elementwise_mul
(
x
=
v
,
y
=
scale
if
dim
is
None
else
reshape
(
x
=
scale
,
shape
=
[
v
.
shape
[
dim
]]),
axis
=-
1
if
dim
is
None
else
dim
)
# To serialize the original parameter for inference, maybe a
# parameter rather than a variable should be returned.
return
w
g_param_attr
=
copy
.
deepcopy
(
attr
)
g_param_attr
.
name
=
attr
.
name
+
'_g'
g_param_shape
=
[
1
]
*
len
(
shape
)
if
attr
.
dim
is
not
None
:
g_param_shape
[
attr
.
dim
]
=
shape
[
attr
.
dim
]
v_param_attr
=
copy
.
deepcopy
(
attr
)
v_param_attr
.
name
=
attr
.
name
+
'_v'
v_param_shape
=
shape
# Add to startup_program to initialize g and v.
# Try to reconstruct the initializer of w by initializing g and v.
# Set the initializers of g and v as below, then the distribution
# of w is the same as initializing w with the given initializer.
# For Data-Dependent Initialization, please compute the init-values
# of g and v in external and then feed the values to g and v by
# executing an extra program.
g_param
=
self
.
startup_program
.
global_block
().
create_parameter
(
dtype
=
dtype
,
shape
=
g_param_shape
,
**
g_param_attr
.
_to_kwargs
(
with_initializer
=
False
))
v_param
=
self
.
startup_program
.
global_block
().
create_parameter
(
dtype
=
dtype
,
shape
=
v_param_shape
,
**
v_param_attr
.
_to_kwargs
(
with_initializer
=
True
))
__norm_except_dim
(
x
=
v_param
,
out
=
g_param
,
dim
=
attr
.
dim
,
block
=
self
.
startup_program
.
global_block
())
# Add weight normalization to main_program
g_param
=
self
.
main_program
.
global_block
().
create_parameter
(
dtype
=
dtype
,
shape
=
g_param_shape
,
**
g_param_attr
.
_to_kwargs
())
v_param
=
self
.
main_program
.
global_block
().
create_parameter
(
dtype
=
dtype
,
shape
=
v_param_shape
,
**
v_param_attr
.
_to_kwargs
())
w_param
=
__weight_normalize
(
g_param
,
v_param
,
dim
=
attr
.
dim
)
return
w_param
def
create_parameter
(
self
,
attr
,
shape
,
dtype
,
is_bias
=
False
,
default_initializer
=
None
):
# Deepcopy the attr so that parameters can be shared in program
attr
=
copy
.
deepcopy
(
attr
)
assert
isinstance
(
attr
,
ParamAttr
)
suffix
=
'b'
if
is_bias
else
'w'
if
attr
.
name
is
None
:
attr
.
name
=
unique_name
.
generate
(
"."
.
join
([
self
.
name
,
suffix
]))
if
default_initializer
is
None
and
attr
.
initializer
is
None
:
if
isinstance
(
dtype
,
core
.
VarDesc
.
VarType
):
if
dtype
!=
core
.
VarDesc
.
VarType
.
FP32
and
\
dtype
!=
core
.
VarDesc
.
VarType
.
FP64
and
\
dtype
!=
core
.
VarDesc
.
VarType
.
FP16
:
raise
TypeError
(
"Can not create parameter with default initializer when dtype is not float type. Set default_initializer to fit the parameter dtype!"
)
else
:
if
not
(
dtype
.
startswith
(
"float"
)
or
dtype
==
"double"
):
raise
TypeError
(
"Can not create parameter with default initializer when dtype is not float type. Set default_initializer to fit the parameter dtype!"
)
if
is_bias
:
attr
.
_set_default_bias_initializer
()
else
:
attr
.
_set_default_param_initializer
()
else
:
attr
.
_set_default_initializer
(
default_initializer
)
# If weight normalization is set, insert extra parameters and ops.
# Refer to https://arxiv.org/pdf/1602.07868.pdf
if
isinstance
(
attr
,
WeightNormParamAttr
):
param
=
self
.
_create_weight_normalize
(
attr
,
shape
,
dtype
)
WeightNormParamAttr
.
params_with_weight_norm
.
append
(
param
)
return
param
if
_in_imperative_mode
():
# In imperative mode, we want the returned parameter to be
# initialized so that it can be used imperatively.
return
self
.
main_program
.
global_block
().
create_parameter
(
dtype
=
dtype
,
shape
=
shape
,
**
attr
.
_to_kwargs
(
with_initializer
=
True
))
else
:
self
.
startup_program
.
global_block
().
create_parameter
(
dtype
=
dtype
,
shape
=
shape
,
**
attr
.
_to_kwargs
(
with_initializer
=
True
))
return
self
.
main_program
.
global_block
().
create_parameter
(
dtype
=
dtype
,
shape
=
shape
,
**
attr
.
_to_kwargs
())
def
get_parameter
(
self
,
name
):
param
=
self
.
main_program
.
global_block
().
var
(
name
)
if
not
isinstance
(
param
,
Parameter
):
raise
ValueError
(
"no Parameter name %s found"
%
name
)
return
param
def
create_variable_for_type_inference
(
self
,
dtype
,
stop_gradient
=
False
):
"""Create a temporary variable that should be type inferred layer.
Note:
The default type will be set to LOD_TENSOR. However, when
the var is used as operator output, its type will be updated
based on operator's `VarTypeInference` implementation in
infer_var_type.
"""
return
self
.
main_program
.
current_block
().
create_var
(
name
=
unique_name
.
generate
(
"."
.
join
([
self
.
name
,
'tmp'
])),
dtype
=
dtype
,
type
=
core
.
VarDesc
.
VarType
.
LOD_TENSOR
,
persistable
=
False
,
stop_gradient
=
stop_gradient
)
def
create_variable
(
self
,
*
args
,
**
kwargs
):
return
self
.
main_program
.
current_block
().
create_var
(
*
args
,
**
kwargs
)
def
create_global_variable
(
self
,
persistable
=
False
,
*
args
,
**
kwargs
):
"""
create global variable, note that there is no initializer for this global variable.
Args:
persistable(bool): True if it is a checkpoint value.
*args: See create_var's documentation
**kwargs: See create_var's documentation
Returns(Variable): the created variable.
"""
return
self
.
main_program
.
global_block
().
create_var
(
*
args
,
persistable
=
persistable
,
**
kwargs
)
def
create_or_get_global_variable
(
self
,
name
,
*
args
,
**
kwargs
):
"""
Creates a global variable if not exists and returns the variable and
a boolean flag which is true when it is a new variable.
"""
if
self
.
main_program
.
global_block
().
has_var
(
name
):
return
self
.
main_program
.
global_block
().
var
(
name
),
False
else
:
return
self
.
create_global_variable
(
name
=
name
,
*
args
,
**
kwargs
),
True
def
set_variable_initializer
(
self
,
var
,
initializer
):
assert
isinstance
(
var
,
Variable
)
if
imperative_base
.
enabled
():
initializer
(
var
,
var
.
block
)
else
:
self
.
startup_program
.
global_block
().
create_var
(
name
=
var
.
name
,
type
=
var
.
type
,
dtype
=
var
.
dtype
,
shape
=
var
.
shape
,
persistable
=
True
,
initializer
=
initializer
)
#TODO (jiabin): reconstruct this in LayerObjHelper and avoid dependency of bias_attr
def
append_bias_op
(
self
,
input_var
,
dim_start
=
1
,
dim_end
=
None
):
"""
Append bias operator and return its output. If the user does not set
...
...
@@ -434,6 +135,7 @@ class LayerHelper(object):
attrs
=
{
'axis'
:
dim_start
})
return
tmp
#TODO (jiabin): reconstruct this in LayerObjHelper and avoid dependency of act
def
append_activation
(
self
,
input_var
):
act
=
self
.
kwargs
.
get
(
'act'
,
None
)
if
act
is
None
:
...
...
@@ -448,10 +150,11 @@ class LayerHelper(object):
if
'use_mkldnn'
in
self
.
kwargs
:
act
[
'use_mkldnn'
]
=
self
.
kwargs
.
get
(
'use_mkldnn'
)
act_type
=
act
.
pop
(
'type'
)
tmp
=
input_var
# NOTE(dzhwinter): some activation support inplace compution.
# NOTE(minqiyang): currently, we don't support inplace in imperative mode
if
not
imperative_base
.
enabled
()
and
core
.
IsInplace
(
act_type
):
if
not
_in_imperative_mode
()
and
core
.
IsInplace
(
act_type
):
tmp
=
input_var
else
:
tmp
=
self
.
create_variable_for_type_inference
(
dtype
=
input_var
.
dtype
)
...
...
@@ -462,6 +165,7 @@ class LayerHelper(object):
attrs
=
act
)
return
tmp
#TODO (jiabin): should we remove this since it has never be used
def
_get_default_initializer
(
self
,
dtype
):
if
dtype
is
None
or
dtype_is_floating
(
dtype
)
is
True
:
return
Xavier
()
...
...
@@ -469,6 +173,7 @@ class LayerHelper(object):
# For integer and boolean types, initialize with all zeros
return
Constant
()
#TODO (jiabin): reconstruct this in LayerObjHelper and avoid dependency of kwargs
def
is_instance
(
self
,
param_name
,
cls
):
param
=
self
.
kwargs
.
get
(
param_name
,
None
)
if
not
isinstance
(
param
,
cls
):
...
...
python/paddle/fluid/layer_helper_base.py
0 → 100644
浏览文件 @
847bb6a2
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from
__future__
import
print_function
import
copy
import
numpy
as
np
from
.framework
import
Variable
,
default_main_program
,
default_startup_program
,
_in_imperative_mode
,
_current_expected_place
from
.
import
unique_name
from
.param_attr
import
ParamAttr
,
WeightNormParamAttr
from
.
import
core
class
LayerHelperBase
(
object
):
def
__init__
(
self
,
name
,
layer_type
):
self
.
_layer_type
=
layer_type
self
.
_name
=
name
@
property
def
name
(
self
):
return
self
.
_name
@
property
def
layer_type
(
self
):
return
self
.
_layer_type
@
property
def
main_program
(
self
):
return
default_main_program
()
@
property
def
startup_program
(
self
):
return
default_startup_program
()
def
to_variable
(
self
,
value
,
block
=
None
):
"""convert value to variable
Args:
value: value to be convert
block: the block of the variable
Return Variable construct from value
"""
if
isinstance
(
value
,
np
.
ndarray
):
assert
_in_imperative_mode
(
),
"to_variable could only be called in imperative mode"
if
not
block
:
block
=
default_main_program
().
current_block
()
py_var
=
Variable
(
block
,
type
=
core
.
VarDesc
.
VarType
.
LOD_TENSOR
,
name
=
None
,
shape
=
value
.
shape
,
dtype
=
value
.
dtype
)
var
=
py_var
.
_ivar
.
value
()
tensor
=
var
.
get_tensor
()
tensor
.
set
(
value
,
_current_expected_place
())
return
py_var
elif
isinstance
(
value
,
Variable
):
return
value
def
_create_weight_normalize
(
self
,
attr
,
shape
,
dtype
):
from
.layers
import
elementwise_mul
,
elementwise_div
,
reshape
# Remove these ops when LayerHelper and layers support indicating
# program and block.
def
__norm_op
(
x
,
out
=
None
,
p
=
2
,
dim
=
None
,
keep_dim
=
False
,
block
=
self
.
startup_program
.
global_block
()):
if
out
is
None
:
out
=
block
.
create_var
(
name
=
unique_name
.
generate
(
"."
.
join
(
[
self
.
name
,
'weight_norm_norm'
])),
dtype
=
dtype
,
persistable
=
False
)
abs_out
=
block
.
create_var
(
name
=
unique_name
.
generate
(
"."
.
join
(
[
self
.
name
,
'weight_norm_abs'
])),
dtype
=
dtype
,
persistable
=
False
)
block
.
append_op
(
type
=
'abs'
,
inputs
=
{
'X'
:
x
},
outputs
=
{
'Out'
:
abs_out
})
pow_out
=
block
.
create_var
(
name
=
unique_name
.
generate
(
"."
.
join
(
[
self
.
name
,
'weight_norm_pow'
])),
dtype
=
dtype
,
persistable
=
False
)
block
.
append_op
(
type
=
'pow'
,
inputs
=
{
'X'
:
abs_out
},
outputs
=
{
'Out'
:
pow_out
},
attrs
=
{
'factor'
:
float
(
p
)})
sum_out
=
block
.
create_var
(
name
=
unique_name
.
generate
(
"."
.
join
(
[
self
.
name
,
'weight_norm_sum'
])),
dtype
=
dtype
,
persistable
=
False
)
block
.
append_op
(
type
=
'reduce_sum'
,
inputs
=
{
'X'
:
pow_out
},
outputs
=
{
'Out'
:
sum_out
},
attrs
=
{
'dim'
:
dim
,
'keep_dim'
:
keep_dim
,
'reduce_all'
:
True
if
dim
is
None
else
False
})
block
.
append_op
(
type
=
'pow'
,
inputs
=
{
'X'
:
sum_out
},
outputs
=
{
'Out'
:
out
},
attrs
=
{
'factor'
:
1.
/
p
})
return
out
def
__reshape_op
(
x
,
shape
,
out
=
None
,
block
=
self
.
startup_program
.
global_block
()):
if
out
is
None
:
out
=
block
.
create_var
(
name
=
unique_name
.
generate
(
"."
.
join
(
[
self
.
name
,
'weight_norm_reshape'
])),
dtype
=
dtype
,
persistable
=
False
)
block
.
append_op
(
type
=
'reshape'
,
inputs
=
{
'X'
:
x
},
outputs
=
{
'Out'
:
out
},
attrs
=
{
'shape'
:
shape
})
return
out
def
__transpose_op
(
x
,
axis
,
out
=
None
,
block
=
self
.
startup_program
.
global_block
()):
if
out
is
None
:
out
=
block
.
create_var
(
name
=
unique_name
.
generate
(
"."
.
join
(
[
self
.
name
,
'weight_norm_transpose'
])),
dtype
=
dtype
,
persistable
=
False
)
block
.
append_op
(
type
=
'transpose'
,
inputs
=
{
'X'
:
x
},
outputs
=
{
'Out'
:
out
},
attrs
=
{
'axis'
:
axis
})
return
out
def
__norm_except_dim
(
x
,
out
=
None
,
dim
=
None
,
block
=
self
.
startup_program
.
global_block
()):
"""Computes the norm over all dimensions except dim"""
if
out
is
None
:
out
=
block
.
create_var
(
name
=
unique_name
.
generate
(
"."
.
join
(
[
self
.
name
,
'weight_norm_norm'
])),
dtype
=
dtype
,
persistable
=
False
)
if
dim
is
None
:
__norm_op
(
x
,
out
,
dim
=
dim
,
block
=
block
)
elif
dim
==
0
:
out_shape
=
[
x
.
shape
[
0
]]
+
[
1
]
*
(
len
(
x
.
shape
)
-
1
)
reshape
=
__reshape_op
(
x
,
shape
=
[
x
.
shape
[
0
],
-
1
],
block
=
block
)
norm
=
__norm_op
(
reshape
,
dim
=
1
,
block
=
block
)
__reshape_op
(
norm
,
out
=
out
,
shape
=
out_shape
,
block
=
block
)
elif
dim
==
len
(
x
.
shape
)
-
1
:
out_shape
=
[
1
]
*
(
len
(
x
.
shape
)
-
1
)
+
[
x
.
shape
[
-
1
]]
reshape
=
__reshape_op
(
x
,
shape
=
[
-
1
,
x
.
shape
[
-
1
]],
block
=
block
)
norm
=
__norm_op
(
reshape
,
dim
=
0
,
block
=
block
)
__reshape_op
(
norm
,
out
=
out
,
shape
=
out_shape
,
block
=
block
)
else
:
perm
=
list
(
range
(
len
(
x
.
shape
)))
perm
[
0
],
perm
[
dim
]
=
dim
,
0
transpose
=
__transpose_op
(
x
,
perm
,
block
=
block
)
norm
=
__norm_op
(
transpose
,
dim
=
0
,
block
=
block
)
__transpose_op
(
norm
,
perm
,
out
=
out
,
block
=
block
)
return
out
def
__weight_normalize
(
g
,
v
,
dim
):
"""Calculations for weight normalization"""
norm
=
__norm_except_dim
(
v
,
dim
=
dim
,
block
=
self
.
main_program
.
current_block
())
scale
=
elementwise_div
(
x
=
g
,
y
=
norm
)
# The shapes of g and norm are the same.
# Currently, elementwise_mul only support broadcast when the shape
# of y is a subset of the shape of x. Thus, we reshape y to squeeze
# to achive the subset.
w
=
elementwise_mul
(
x
=
v
,
y
=
scale
if
dim
is
None
else
reshape
(
x
=
scale
,
shape
=
[
v
.
shape
[
dim
]]),
axis
=-
1
if
dim
is
None
else
dim
)
# To serialize the original parameter for inference, maybe a
# parameter rather than a variable should be returned.
return
w
g_param_attr
=
copy
.
deepcopy
(
attr
)
g_param_attr
.
name
=
attr
.
name
+
'_g'
g_param_shape
=
[
1
]
*
len
(
shape
)
if
attr
.
dim
is
not
None
:
g_param_shape
[
attr
.
dim
]
=
shape
[
attr
.
dim
]
v_param_attr
=
copy
.
deepcopy
(
attr
)
v_param_attr
.
name
=
attr
.
name
+
'_v'
v_param_shape
=
shape
# Add to startup_program to initialize g and v.
# Try to reconstruct the initializer of w by initializing g and v.
# Set the initializers of g and v as below, then the distribution
# of w is the same as initializing w with the given initializer.
# For Data-Dependent Initialization, please compute the init-values
# of g and v in external and then feed the values to g and v by
# executing an extra program.
g_param
=
self
.
startup_program
.
global_block
().
create_parameter
(
dtype
=
dtype
,
shape
=
g_param_shape
,
**
g_param_attr
.
_to_kwargs
(
with_initializer
=
False
))
v_param
=
self
.
startup_program
.
global_block
().
create_parameter
(
dtype
=
dtype
,
shape
=
v_param_shape
,
**
v_param_attr
.
_to_kwargs
(
with_initializer
=
True
))
__norm_except_dim
(
x
=
v_param
,
out
=
g_param
,
dim
=
attr
.
dim
,
block
=
self
.
startup_program
.
global_block
())
# Add weight normalization to main_program
g_param
=
self
.
main_program
.
global_block
().
create_parameter
(
dtype
=
dtype
,
shape
=
g_param_shape
,
**
g_param_attr
.
_to_kwargs
())
v_param
=
self
.
main_program
.
global_block
().
create_parameter
(
dtype
=
dtype
,
shape
=
v_param_shape
,
**
v_param_attr
.
_to_kwargs
())
w_param
=
__weight_normalize
(
g_param
,
v_param
,
dim
=
attr
.
dim
)
return
w_param
# TODO: hide the func after we move the layers to Layers
def
create_parameter
(
self
,
attr
,
shape
,
dtype
,
is_bias
=
False
,
default_initializer
=
None
):
"""Create parameters for this layers.
Args:
attr: [ParamAttr] should be the parameter attribute for this parameter
shape: shape of the paramter
dtype: data type of this parameter
is_bias: if this is a bias parameter
default_initializer: set the default initializer for this parameter
Returns created parameter Variable.
"""
# Deepcopy the attr so that parameters can be shared in program
attr
=
copy
.
deepcopy
(
attr
)
if
attr
is
None
:
attr
=
ParamAttr
.
_to_attr
(
attr
)
assert
isinstance
(
attr
,
ParamAttr
)
suffix
=
'b'
if
is_bias
else
'w'
if
attr
.
name
is
None
:
attr
.
name
=
unique_name
.
generate
(
"."
.
join
([
self
.
name
,
suffix
]))
if
default_initializer
is
None
and
attr
.
initializer
is
None
:
if
isinstance
(
dtype
,
core
.
VarDesc
.
VarType
):
if
dtype
!=
core
.
VarDesc
.
VarType
.
FP32
and
\
dtype
!=
core
.
VarDesc
.
VarType
.
FP64
and
\
dtype
!=
core
.
VarDesc
.
VarType
.
FP16
:
raise
TypeError
(
"Can not create parameter with default initializer when dtype is not float type. Set default_initializer to fit the parameter dtype!"
)
else
:
if
not
(
dtype
.
startswith
(
"float"
)
or
dtype
==
"double"
):
raise
TypeError
(
"Can not create parameter with default initializer when dtype is not float type. Set default_initializer to fit the parameter dtype!"
)
if
is_bias
:
attr
.
_set_default_bias_initializer
()
else
:
attr
.
_set_default_param_initializer
()
else
:
attr
.
_set_default_initializer
(
default_initializer
)
# If weight normalization is set, insert extra parameters and ops.
# Refer to https://arxiv.org/pdf/1602.07868.pdf
if
isinstance
(
attr
,
WeightNormParamAttr
):
param
=
self
.
_create_weight_normalize
(
attr
,
shape
,
dtype
)
WeightNormParamAttr
.
params_with_weight_norm
.
append
(
param
)
return
param
if
_in_imperative_mode
():
# In imperative mode, we want the returned parameter to be
# initialized so that it can be used imperatively.
return
self
.
main_program
.
global_block
().
create_parameter
(
dtype
=
dtype
,
shape
=
shape
,
**
attr
.
_to_kwargs
(
with_initializer
=
True
))
else
:
self
.
startup_program
.
global_block
().
create_parameter
(
dtype
=
dtype
,
shape
=
shape
,
**
attr
.
_to_kwargs
(
with_initializer
=
True
))
return
self
.
main_program
.
global_block
().
create_parameter
(
dtype
=
dtype
,
shape
=
shape
,
**
attr
.
_to_kwargs
())
def
create_variable_for_type_inference
(
self
,
dtype
,
stop_gradient
=
False
):
"""Create a temporary variable that should be type inferred layer.
Note:
The default type will be set to LOD_TENSOR. However, when
the var is used as operator output, its type will be updated
based on operator's `VarTypeInference` implementation in
infer_var_type.
"""
return
self
.
main_program
.
current_block
().
create_var
(
name
=
unique_name
.
generate
(
"."
.
join
([
self
.
name
,
'tmp'
])),
dtype
=
dtype
,
type
=
core
.
VarDesc
.
VarType
.
LOD_TENSOR
,
persistable
=
False
,
stop_gradient
=
stop_gradient
)
def
create_variable
(
self
,
*
args
,
**
kwargs
):
"""Create Variable for this layers.
Returns created Variable.
"""
return
self
.
main_program
.
current_block
().
create_var
(
*
args
,
**
kwargs
)
def
create_global_variable
(
self
,
persistable
=
False
,
*
args
,
**
kwargs
):
"""
create global variable, note that there is no initializer for this global variable.
Args:
persistable(bool): True if it is a checkpoint value.
*args: See create_var's documentation
**kwargs: See create_var's documentation
Returns(Variable): the created variable.
"""
return
self
.
main_program
.
global_block
().
create_var
(
*
args
,
persistable
=
persistable
,
**
kwargs
)
def
create_or_get_global_variable
(
self
,
name
,
*
args
,
**
kwargs
):
"""
Creates a global variable if not exists and returns the variable and
a boolean flag which is true when it is a new variable.
"""
if
self
.
main_program
.
global_block
().
has_var
(
name
):
return
self
.
main_program
.
global_block
().
var
(
name
),
False
else
:
return
self
.
create_global_variable
(
name
=
name
,
*
args
,
**
kwargs
),
True
def
set_variable_initializer
(
self
,
var
,
initializer
):
"""Set target Variable's initializer
Args:
var: target Variable
initializer: initializer to use
"""
assert
isinstance
(
var
,
Variable
)
if
_in_imperative_mode
():
initializer
(
var
,
var
.
block
)
else
:
self
.
startup_program
.
global_block
().
create_var
(
name
=
var
.
name
,
type
=
var
.
type
,
dtype
=
var
.
dtype
,
shape
=
var
.
shape
,
persistable
=
True
,
initializer
=
initializer
)
python/paddle/fluid/layers/control_flow.py
浏览文件 @
847bb6a2
...
...
@@ -848,7 +848,7 @@ def create_array(dtype):
@
templatedoc
()
def
less_than
(
x
,
y
,
force_cpu
=
None
,
cond
=
None
,
**
ignored
):
def
less_than
(
x
,
y
,
force_cpu
=
None
,
cond
=
None
):
"""
${comment}
...
...
@@ -1800,7 +1800,7 @@ def reorder_lod_tensor_by_rank(x, rank_table):
return
out
def
is_empty
(
x
,
cond
=
None
,
**
ignored
):
def
is_empty
(
x
,
cond
=
None
):
"""
Test whether a Variable is empty.
...
...
python/paddle/fluid/layers/detection.py
浏览文件 @
847bb6a2
...
...
@@ -52,6 +52,7 @@ __all__ = [
'box_clip'
,
'multiclass_nms'
,
'distribute_fpn_proposals'
,
'box_decoder_and_assign'
,
]
...
...
@@ -2269,9 +2270,9 @@ def distribute_fpn_proposals(fpn_rois,
fpn_rois = fluid.layers.data(
name='data', shape=[4], dtype='float32', lod_level=1)
multi_rois, restore_ind = fluid.layers.distribute_fpn_proposals(
fpn_rois=fpn_rois,
min_level=2,
max_level=5,
fpn_rois=fpn_rois,
min_level=2,
max_level=5,
refer_level=4,
refer_scale=224)
"""
...
...
@@ -2295,3 +2296,65 @@ def distribute_fpn_proposals(fpn_rois,
'refer_scale'
:
refer_scale
})
return
multi_rois
,
restore_ind
@
templatedoc
()
def
box_decoder_and_assign
(
prior_box
,
prior_box_var
,
target_box
,
box_score
,
box_clip
,
name
=
None
):
"""
${comment}
Args:
prior_box(${prior_box_type}): ${prior_box_comment}
prior_box_var(${prior_box_var_type}): ${prior_box_var_comment}
target_box(${target_box_type}): ${target_box_comment}
box_score(${box_score_type}): ${box_score_comment}
box_clip(${box_clip_type}): ${box_clip_comment}
name(str|None): The name of this operator
Returns:
decode_box(Variable), output_assign_box(Variable):
two variables:
- decode_box(${decode_box_type}): ${decode_box_comment}
- output_assign_box(${output_assign_box_type}): ${output_assign_box_comment}
Examples:
.. code-block:: python
pb = fluid.layers.data(
name='prior_box', shape=[20, 4], dtype='float32')
pbv = fluid.layers.data(
name='prior_box_var', shape=[1, 4], dtype='float32')
loc = fluid.layers.data(
name='target_box', shape=[20, 4*81], dtype='float32')
scores = fluid.layers.data(
name='scores', shape=[20, 81], dtype='float32')
decoded_box, output_assign_box = fluid.layers.box_decoder_and_assign(
pb, pbv, loc, scores, 4.135)
"""
helper
=
LayerHelper
(
"box_decoder_and_assign"
,
**
locals
())
decoded_box
=
helper
.
create_variable_for_type_inference
(
dtype
=
prior_box
.
dtype
)
output_assign_box
=
helper
.
create_variable_for_type_inference
(
dtype
=
prior_box
.
dtype
)
helper
.
append_op
(
type
=
"box_decoder_and_assign"
,
inputs
=
{
"PriorBox"
:
prior_box
,
"PriorBoxVar"
:
prior_box_var
,
"TargetBox"
:
target_box
,
"BoxScore"
:
box_score
},
attrs
=
{
"box_clip"
:
box_clip
},
outputs
=
{
"DecodeBox"
:
decoded_box
,
"OutputAssignBox"
:
output_assign_box
})
return
decoded_box
,
output_assign_box
python/paddle/fluid/layers/nn.py
浏览文件 @
847bb6a2
...
...
@@ -94,6 +94,7 @@ __all__ = [
'multiplex'
,
'layer_norm'
,
'group_norm'
,
'spectral_norm'
,
'softmax_with_cross_entropy'
,
'smooth_l1'
,
'one_hot'
,
...
...
@@ -3346,6 +3347,98 @@ def group_norm(input,
return
helper
.
append_activation
(
group_norm_out
)
@
templatedoc
()
def
spectral_norm
(
weight
,
dim
=
0
,
power_iters
=
1
,
eps
=
1e-12
,
name
=
None
):
"""
**Spectral Normalization Layer**
This layer calculates the spectral normalization value of weight parameters of
fc, conv1d, conv2d, conv3d layers which should be 2-D, 3-D, 4-D, 5-D
Parameters. Calculations are showed as follows.
Step 1:
Generate vector U in shape of [H], and V in shape of [W].
While H is the :attr:`dim` th dimension of the input weights,
and W is the product result of remaining dimensions.
Step 2:
:attr:`power_iters` shoule be a positive interger, do following
calculations with U and V for :attr:`power_iters` rounds.
.. math::
\mathbf{v} :=
\\
frac{\mathbf{W}^{T} \mathbf{u}}{\|\mathbf{W}^{T} \mathbf{u}\|_2}
\mathbf{u} :=
\\
frac{\mathbf{W}^{T} \mathbf{v}}{\|\mathbf{W}^{T} \mathbf{v}\|_2}
Step 3:
Calculate :math:`\sigma(\mathbf{W})` and normalize weight values.
.. math::
\sigma(\mathbf{W}) = \mathbf{u}^{T} \mathbf{W} \mathbf{v}
\mathbf{W} =
\\
frac{\mathbf{W}}{\sigma(\mathbf{W})}
Refer to `Spectral Normalization <https://arxiv.org/abs/1802.05957>`_ .
Args:
weight(${weight_type}): ${weight_comment}
dim(int): ${dim_comment}
power_iters(int): ${power_iters_comment}
eps(float): ${eps_comment}
name (str): The name of this layer. It is optional.
Returns:
Variable: A tensor variable of weight parameters after spectral normalization.
Examples:
>>> weight = fluid.layers.data(name='weight', shape=[8, 32, 32],
>>> dtype='float32')
>>> x = fluid.layers.spectral_norm(weight=data, dim=1, power_iters=2)
"""
helper
=
LayerHelper
(
'spectral_norm'
,
**
locals
())
dtype
=
weight
.
dtype
# create intput and parameters
inputs
=
{
'Weight'
:
weight
}
input_shape
=
weight
.
shape
h
=
input_shape
[
dim
]
w
=
np
.
prod
(
input_shape
)
//
h
u
=
helper
.
create_parameter
(
attr
=
ParamAttr
(),
shape
=
[
h
],
dtype
=
dtype
,
default_initializer
=
Normal
(
0.
,
1.
))
u
.
stop_gradient
=
True
inputs
[
'U'
]
=
u
v
=
helper
.
create_parameter
(
attr
=
ParamAttr
(),
shape
=
[
w
],
dtype
=
dtype
,
default_initializer
=
Normal
(
0.
,
1.
))
inputs
[
'V'
]
=
v
v
.
stop_gradient
=
True
# create output
out
=
helper
.
create_variable
(
dtype
=
dtype
)
helper
.
append_op
(
type
=
"spectral_norm"
,
inputs
=
inputs
,
outputs
=
{
"Out"
:
out
,
},
attrs
=
{
"dim"
:
dim
,
"power_iters"
:
power_iters
,
"eps"
:
eps
,
})
return
out
def
conv2d_transpose
(
input
,
num_filters
,
output_size
=
None
,
...
...
@@ -4740,11 +4833,6 @@ def matmul(x, y, transpose_x=False, transpose_y=False, alpha=1.0, name=None):
"""
def
__check_input
(
x
,
y
):
if
len
(
y
.
shape
)
>
len
(
x
.
shape
):
raise
ValueError
(
"Invalid inputs for matmul. "
"x's rank should be always greater than or equal to y'rank."
)
x_shape
=
list
(
x
.
shape
)
y_shape
=
list
(
y
.
shape
)
if
len
(
x_shape
)
==
1
:
...
...
@@ -4760,10 +4848,11 @@ def matmul(x, y, transpose_x=False, transpose_y=False, alpha=1.0, name=None):
if
x_shape
[
-
1
]
!=
y_shape
[
-
2
]:
raise
ValueError
(
"Invalid inputs for matmul."
)
if
len
(
y_shape
)
>
2
:
if
len
(
y_shape
)
>
2
and
len
(
x_shape
)
>
2
:
for
i
,
dim_x
in
enumerate
(
x_shape
[:
-
2
]):
if
dim_x
!=
y_shape
[
i
]:
raise
ValueError
(
"Invalid inputs for matmul."
)
raise
ValueError
(
"Invalid inputs for matmul. x(%s), y(%s)"
%
(
x
.
shape
,
y
.
shape
))
__check_input
(
x
,
y
)
...
...
python/paddle/fluid/optimizer.py
浏览文件 @
847bb6a2
...
...
@@ -379,7 +379,7 @@ class Optimizer(object):
self
.
_dtype
=
loss
.
dtype
program
=
loss
.
block
.
program
optimize_ops
=
[]
if
imperative_base
.
enabled
():
if
framework
.
_in_imperative_mode
():
if
parameter_list
is
not
None
:
parameters
=
parameter_list
else
:
...
...
python/paddle/fluid/parallel_executor.py
浏览文件 @
847bb6a2
...
...
@@ -106,13 +106,18 @@ class ParallelExecutor(object):
else
framework
.
default_main_program
()
self
.
_compiled_program
=
compiler
.
CompiledProgram
(
main_program
)
if
share_vars_from
:
assert
isinstance
(
share_vars_from
,
ParallelExecutor
),
"The share_vars_from should be ParallelExecutor."
self
.
_compiled_program
.
with_data_parallel
(
loss_name
=
loss_name
,
build_strategy
=
build_strategy
,
exec_strategy
=
exec_strategy
,
share_vars_from
=
share_vars_from
)
share_vars_from
=
share_vars_from
.
_compiled_program
if
share_vars_from
else
None
)
self
.
_place
=
core
.
CUDAPlace
(
0
)
if
use_cuda
else
core
.
CPUPlace
()
self
.
_exe
cutor
=
executor
.
Executor
(
self
.
_place
)
self
.
_exe
=
executor
.
Executor
(
self
.
_place
)
self
.
_compiled_program
.
_compile
(
place
=
self
.
_place
,
scope
=
self
.
_scope
)
def
run
(
self
,
fetch_list
,
feed
=
None
,
feed_dict
=
None
,
return_numpy
=
True
):
...
...
@@ -180,11 +185,11 @@ class ParallelExecutor(object):
loss = pe.run(feed=feeder.feed(cur_batch),
fetch_list=[avg_cost.name]))
"""
return
self
.
_exe
cutor
.
run
(
program
=
self
.
_compiled_program
,
scope
=
self
.
_scope
,
feed
=
feed
,
fetch_list
=
fetch_list
,
return_numpy
=
return_numpy
)
return
self
.
_exe
.
run
(
program
=
self
.
_compiled_program
,
scope
=
self
.
_scope
,
feed
=
feed
,
fetch_list
=
fetch_list
,
return_numpy
=
return_numpy
)
@
property
def
device_count
(
self
):
...
...
python/paddle/fluid/tests/unittests/mkldnn/mkldnn_op_test.py
浏览文件 @
847bb6a2
...
...
@@ -70,3 +70,17 @@ def check_if_mkldnn_primitives_exist_in_bwd(test_case, op_type, x, out,
fetch_list
=
[
'x@GRAD'
,
'out'
])
__assert_close
(
x_grad
,
out
[
0
],
'x@GRAD'
)
def
format_reorder
(
out
,
size
):
in_n
=
size
[
0
]
out_h
=
size
[
2
]
out_w
=
size
[
3
]
out_c
=
size
[
1
]
out_tmp
=
np
.
zeros
((
in_n
,
out_h
,
out_w
,
out_c
))
for
n
in
range
(
in_n
):
for
i
in
range
(
out_h
):
for
j
in
range
(
out_w
):
for
m
in
range
(
out_c
):
out_tmp
[
n
,
i
,
j
,
m
]
=
out
[
n
,
m
,
i
,
j
]
return
out_tmp
.
reshape
(
in_n
,
out_c
,
out_h
,
out_w
)
python/paddle/fluid/tests/unittests/mkldnn/test_conv2d_int8_mkldnn_op.py
浏览文件 @
847bb6a2
...
...
@@ -20,6 +20,7 @@ import numpy as np
import
paddle.fluid.core
as
core
from
paddle.fluid.tests.unittests.op_test
import
OpTest
from
paddle.fluid.tests.unittests.test_conv2d_op
import
conv2d_forward_naive
,
TestConv2dOp
from
mkldnn_op_test
import
format_reorder
def
conv2d_forward_refer
(
input
,
filter
,
group
,
conv_param
):
...
...
@@ -29,20 +30,6 @@ def conv2d_forward_refer(input, filter, group, conv_param):
return
format_reorder
(
out
,
size
)
def
format_reorder
(
out
,
size
):
in_n
=
size
[
0
]
out_h
=
size
[
2
]
out_w
=
size
[
3
]
out_c
=
size
[
1
]
out_tmp
=
np
.
zeros
((
in_n
,
out_h
,
out_w
,
out_c
))
for
n
in
range
(
in_n
):
for
i
in
range
(
out_h
):
for
j
in
range
(
out_w
):
for
m
in
range
(
out_c
):
out_tmp
[
n
,
i
,
j
,
m
]
=
out
[
n
,
m
,
i
,
j
]
return
out_tmp
.
reshape
(
in_n
,
out_c
,
out_h
,
out_w
)
class
TestConv2dInt8Op
(
TestConv2dOp
):
def
setUp
(
self
):
self
.
op_type
=
"conv2d"
...
...
python/paddle/fluid/tests/unittests/mkldnn/test_requantize_mkldnn_op.py
0 → 100644
浏览文件 @
847bb6a2
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from
__future__
import
print_function
import
unittest
import
numpy
as
np
from
paddle.fluid.tests.unittests.op_test
import
OpTest
from
mkldnn_op_test
import
format_reorder
class
TestReQuantizeOp
(
OpTest
):
def
setUp
(
self
):
self
.
op_type
=
'requantize'
self
.
scale_in
=
2.0
self
.
scale_out
=
1.5
self
.
input_size
=
[
1
,
1
,
5
,
5
]
self
.
data_type
=
'int8'
self
.
set_scale
()
self
.
set_data_type
()
scale_shift
=
self
.
scale_out
/
self
.
scale_in
if
self
.
data_type
==
'int8'
:
input
=
(
np
.
random
.
randint
(
0
,
100
,
self
.
input_size
)
-
50
).
astype
(
self
.
data_type
)
output_tmp
=
np
.
round
(
input
.
astype
(
'float32'
)
*
scale_shift
).
astype
(
'int8'
)
else
:
input
=
(
np
.
random
.
randint
(
0
,
100
,
self
.
input_size
)).
astype
(
self
.
data_type
)
output_tmp
=
np
.
round
(
input
.
astype
(
'float32'
)
*
scale_shift
).
astype
(
'uint8'
)
output
=
format_reorder
(
output_tmp
,
self
.
input_size
)
self
.
inputs
=
{
'Input'
:
OpTest
.
np_dtype_to_fluid_dtype
(
input
)}
self
.
outputs
=
{
'Output'
:
output
}
self
.
attrs
=
{
'Scale_in'
:
self
.
scale_in
,
'Scale_out'
:
self
.
scale_out
}
def
test_check_output
(
self
):
self
.
check_output
()
def
set_scale
(
self
):
pass
def
set_data_type
(
OpTest
):
pass
#--------------------test requantize with s8 input--------------------
class
TestReQuantizeOp1
(
TestReQuantizeOp
):
def
set_scale
(
self
):
self
.
scale_in
=
1.5
self
.
scale_out
=
1.5
class
TestReQuantizeOp2
(
TestReQuantizeOp
):
def
set_scale
(
self
):
self
.
scale_in
=
0.1
self
.
scale_out
=
0.2
#--------------------test requantize with u8 input--------------------
class
TestReQuantizeOp3
(
TestReQuantizeOp1
):
def
set_data_type
(
self
):
self
.
data_type
=
'uint8'
class
TestReQuantizeOp4
(
TestReQuantizeOp2
):
def
set_data_type
(
self
):
self
.
data_type
=
'uint8'
if
__name__
==
'__main__'
:
unittest
.
main
()
python/paddle/fluid/tests/unittests/test_base_layer.py
浏览文件 @
847bb6a2
...
...
@@ -16,27 +16,17 @@ import unittest
import
numpy
as
np
import
paddle.fluid
as
fluid
from
paddle.fluid.layer_helper
import
LayerHelper
class
L1
(
fluid
.
imperative
.
Layer
):
def
__init__
(
self
,
prefix
):
super
(
L1
,
self
).
__init__
(
prefix
)
self
.
_helper
=
LayerHelper
(
self
.
full_name
(),
param_attr
=
fluid
.
ParamAttr
(
initializer
=
fluid
.
initializer
.
Constant
(
value
=
0.1
)))
self
.
w1
=
self
.
_helper
.
create_parameter
(
attr
=
self
.
_helper
.
param_attr
,
shape
=
[
2
,
2
],
dtype
=
'float32'
,
is_bias
=
False
)
self
.
w2
=
self
.
_helper
.
create_parameter
(
attr
=
self
.
_helper
.
param_attr
,
shape
=
[
2
,
2
],
dtype
=
'float32'
,
is_bias
=
False
)
self
.
_param_attr
=
fluid
.
ParamAttr
(
initializer
=
fluid
.
initializer
.
Constant
(
value
=
0.1
))
self
.
w1
=
self
.
create_parameter
(
attr
=
self
.
_param_attr
,
shape
=
[
2
,
2
],
dtype
=
'float32'
,
is_bias
=
False
)
self
.
w2
=
self
.
create_parameter
(
attr
=
self
.
_param_attr
,
shape
=
[
2
,
2
],
dtype
=
'float32'
,
is_bias
=
False
)
def
forward
(
self
):
return
self
.
w1
+
self
.
w2
...
...
@@ -67,8 +57,8 @@ class TestBaseLayer(unittest.TestCase):
with
fluid
.
imperative
.
guard
():
l
=
L1
(
'test_one_level'
)
ret
=
l
()
self
.
assertEqual
(
l
.
w1
.
name
,
"test_one_level/L1_0
_0
.w_0"
)
self
.
assertEqual
(
l
.
w2
.
name
,
"test_one_level/L1_0
_0
.w_1"
)
self
.
assertEqual
(
l
.
w1
.
name
,
"test_one_level/L1_0.w_0"
)
self
.
assertEqual
(
l
.
w2
.
name
,
"test_one_level/L1_0.w_1"
)
self
.
assertTrue
(
np
.
allclose
(
ret
.
_numpy
(),
0.2
*
np
.
ones
([
2
,
2
])))
def
test_three_level
(
self
):
...
...
@@ -76,12 +66,12 @@ class TestBaseLayer(unittest.TestCase):
l
=
L3
(
'test_three_level'
)
names
=
[
p
.
name
for
p
in
l
.
parameters
()]
ret
=
l
()
self
.
assertEqual
(
names
[
0
],
"test_three_level/L3_0/L2_0/L1_0
_0
.w_0"
)
self
.
assertEqual
(
names
[
1
],
"test_three_level/L3_0/L2_0/L1_0
_0
.w_1"
)
self
.
assertEqual
(
names
[
2
],
"test_three_level/L3_0/L2_0/L1_1
_0
.w_0"
)
self
.
assertEqual
(
names
[
3
],
"test_three_level/L3_0/L2_0/L1_1
_0
.w_1"
)
self
.
assertEqual
(
names
[
4
],
"test_three_level/L3_0/L2_1/L1_0
_0
.w_0"
)
self
.
assertEqual
(
names
[
5
],
"test_three_level/L3_0/L2_1/L1_0
_0
.w_1"
)
self
.
assertEqual
(
names
[
0
],
"test_three_level/L3_0/L2_0/L1_0.w_0"
)
self
.
assertEqual
(
names
[
1
],
"test_three_level/L3_0/L2_0/L1_0.w_1"
)
self
.
assertEqual
(
names
[
2
],
"test_three_level/L3_0/L2_0/L1_1.w_0"
)
self
.
assertEqual
(
names
[
3
],
"test_three_level/L3_0/L2_0/L1_1.w_1"
)
self
.
assertEqual
(
names
[
4
],
"test_three_level/L3_0/L2_1/L1_0.w_0"
)
self
.
assertEqual
(
names
[
5
],
"test_three_level/L3_0/L2_1/L1_0.w_1"
)
self
.
assertTrue
(
np
.
allclose
(
ret
.
_numpy
(),
0.8
*
np
.
ones
([
2
,
2
])))
...
...
python/paddle/fluid/tests/unittests/test_box_decoder_and_assign_op.py
0 → 100644
浏览文件 @
847bb6a2
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from
__future__
import
print_function
import
unittest
import
numpy
as
np
import
sys
import
math
from
op_test
import
OpTest
def
box_decoder_and_assign
(
deltas
,
weights
,
boxes
,
box_score
,
box_clip
):
boxes
=
boxes
.
astype
(
deltas
.
dtype
,
copy
=
False
)
widths
=
boxes
[:,
2
]
-
boxes
[:,
0
]
+
1.0
heights
=
boxes
[:,
3
]
-
boxes
[:,
1
]
+
1.0
ctr_x
=
boxes
[:,
0
]
+
0.5
*
widths
ctr_y
=
boxes
[:,
1
]
+
0.5
*
heights
wx
,
wy
,
ww
,
wh
=
weights
dx
=
deltas
[:,
0
::
4
]
*
wx
dy
=
deltas
[:,
1
::
4
]
*
wy
dw
=
deltas
[:,
2
::
4
]
*
ww
dh
=
deltas
[:,
3
::
4
]
*
wh
# Prevent sending too large values into np.exp()
dw
=
np
.
minimum
(
dw
,
box_clip
)
dh
=
np
.
minimum
(
dh
,
box_clip
)
pred_ctr_x
=
dx
*
widths
[:,
np
.
newaxis
]
+
ctr_x
[:,
np
.
newaxis
]
pred_ctr_y
=
dy
*
heights
[:,
np
.
newaxis
]
+
ctr_y
[:,
np
.
newaxis
]
pred_w
=
np
.
exp
(
dw
)
*
widths
[:,
np
.
newaxis
]
pred_h
=
np
.
exp
(
dh
)
*
heights
[:,
np
.
newaxis
]
pred_boxes
=
np
.
zeros
(
deltas
.
shape
,
dtype
=
deltas
.
dtype
)
# x1
pred_boxes
[:,
0
::
4
]
=
pred_ctr_x
-
0.5
*
pred_w
# y1
pred_boxes
[:,
1
::
4
]
=
pred_ctr_y
-
0.5
*
pred_h
# x2 (note: "- 1" is correct; don't be fooled by the asymmetry)
pred_boxes
[:,
2
::
4
]
=
pred_ctr_x
+
0.5
*
pred_w
-
1
# y2 (note: "- 1" is correct; don't be fooled by the asymmetry)
pred_boxes
[:,
3
::
4
]
=
pred_ctr_y
+
0.5
*
pred_h
-
1
output_assign_box
=
[]
for
ino
in
range
(
len
(
pred_boxes
)):
rank
=
np
.
argsort
(
-
box_score
[
ino
])
maxidx
=
rank
[
0
]
if
maxidx
==
0
:
maxidx
=
rank
[
1
]
beg_pos
=
maxidx
*
4
end_pos
=
maxidx
*
4
+
4
output_assign_box
.
append
(
pred_boxes
[
ino
,
beg_pos
:
end_pos
])
output_assign_box
=
np
.
array
(
output_assign_box
)
return
pred_boxes
,
output_assign_box
class
TestBoxDecoderAndAssignOpWithLoD
(
OpTest
):
def
test_check_output
(
self
):
self
.
check_output
()
def
setUp
(
self
):
self
.
op_type
=
"box_decoder_and_assign"
lod
=
[[
4
,
8
,
8
]]
num_classes
=
10
prior_box
=
np
.
random
.
random
((
20
,
4
)).
astype
(
'float32'
)
prior_box_var
=
np
.
array
([
0.1
,
0.1
,
0.2
,
0.2
],
dtype
=
np
.
float32
)
target_box
=
np
.
random
.
random
((
20
,
4
*
num_classes
)).
astype
(
'float32'
)
box_score
=
np
.
random
.
random
((
20
,
num_classes
)).
astype
(
'float32'
)
box_clip
=
4.135
output_box
,
output_assign_box
=
box_decoder_and_assign
(
target_box
,
prior_box_var
,
prior_box
,
box_score
,
box_clip
)
self
.
inputs
=
{
'PriorBox'
:
(
prior_box
,
lod
),
'PriorBoxVar'
:
prior_box_var
,
'TargetBox'
:
(
target_box
,
lod
),
'BoxScore'
:
(
box_score
,
lod
),
}
self
.
attrs
=
{
'box_clip'
:
box_clip
}
self
.
outputs
=
{
'DecodeBox'
:
output_box
,
'OutputAssignBox'
:
output_assign_box
}
if
__name__
==
'__main__'
:
unittest
.
main
()
python/paddle/fluid/tests/unittests/test_dist_base.py
浏览文件 @
847bb6a2
...
...
@@ -115,6 +115,9 @@ class TestDistRunnerBase(object):
strategy
.
allow_op_delay
=
False
build_stra
=
fluid
.
BuildStrategy
()
# FIXME force disable enable_inplace and memory_optimize
build_stra
.
enable_inplace
=
False
build_stra
.
memory_optimize
=
False
if
args
.
use_reduce
:
build_stra
.
reduce_strategy
=
fluid
.
BuildStrategy
.
ReduceStrategy
.
Reduce
...
...
python/paddle/fluid/tests/unittests/test_fuse_elewise_add_act_pass.py
浏览文件 @
847bb6a2
...
...
@@ -123,6 +123,9 @@ class TestMNIST(TestParallelExecutorBase):
# NOTE(dzh):
# need to make it compatible with elewise fuse act
# FIXME (liuwei12)
# the new memory optimize strategy will crash this unittest
# add enable_inplace=False here to force pass the unittest
not_fuse_op_first_loss
,
not_fuse_op_last_loss
=
self
.
check_network_convergence
(
model
,
feed_dict
=
{
"image"
:
img
,
...
...
@@ -131,6 +134,7 @@ class TestMNIST(TestParallelExecutorBase):
fuse_elewise_add_act_ops
=
False
,
memory_opt
=
False
,
use_ir_memory_optimize
=
False
,
enable_inplace
=
False
,
optimizer
=
_optimizer
)
fuse_op_first_loss
,
fuse_op_last_loss
=
self
.
check_network_convergence
(
model
,
...
...
@@ -140,6 +144,7 @@ class TestMNIST(TestParallelExecutorBase):
fuse_elewise_add_act_ops
=
True
,
memory_opt
=
False
,
use_ir_memory_optimize
=
False
,
enable_inplace
=
False
,
optimizer
=
_optimizer
)
for
loss
in
zip
(
not_fuse_op_first_loss
,
fuse_op_first_loss
):
...
...
python/paddle/fluid/tests/unittests/test_imperative_basic.py
浏览文件 @
847bb6a2
...
...
@@ -53,11 +53,15 @@ class MLP(fluid.imperative.Layer):
super
(
MLP
,
self
).
__init__
(
name_scope
)
self
.
_fc1
=
FC
(
self
.
full_name
(),
3
,
fluid
.
ParamAttr
(
param_attr
=
fluid
.
ParamAttr
(
initializer
=
fluid
.
initializer
.
Constant
(
value
=
0.1
)),
bias_attr
=
fluid
.
ParamAttr
(
initializer
=
fluid
.
initializer
.
Constant
(
value
=
0.1
)))
self
.
_fc2
=
FC
(
self
.
full_name
(),
4
,
fluid
.
ParamAttr
(
param_attr
=
fluid
.
ParamAttr
(
initializer
=
fluid
.
initializer
.
Constant
(
value
=
0.1
)),
bias_attr
=
fluid
.
ParamAttr
(
initializer
=
fluid
.
initializer
.
Constant
(
value
=
0.1
)))
def
forward
(
self
,
inputs
):
...
...
@@ -74,41 +78,37 @@ class SimpleRNNCell(fluid.imperative.Layer):
self
.
step_input_size
=
step_input_size
self
.
hidden_size
=
hidden_size
self
.
output_size
=
output_size
self
.
_dype
=
core
.
VarDesc
.
VarType
.
FP32
from
paddle.fluid.layer_helper
import
LayerHelper
self
.
_helper
=
LayerHelper
(
'SimpleRNNCell'
,
act
=
"tanh"
,
param_attr
=
param_attr
)
self
.
_dtype
=
core
.
VarDesc
.
VarType
.
FP32
self
.
param_attr
=
param_attr
def
_build_once
(
self
,
inputs
,
pre_hidden
):
i2h_param_shape
=
[
self
.
step_input_size
,
self
.
hidden_size
]
h2h_param_shape
=
[
self
.
hidden_size
,
self
.
hidden_size
]
h2o_param_shape
=
[
self
.
output_size
,
self
.
hidden_size
]
self
.
_i2h_w
=
self
.
_helper
.
create_parameter
(
attr
=
self
.
_helper
.
param_attr
,
self
.
_i2h_w
=
self
.
create_parameter
(
attr
=
self
.
param_attr
,
shape
=
i2h_param_shape
,
dtype
=
self
.
_dtype
,
is_bias
=
False
)
self
.
_h2h_w
=
self
.
_helper
.
create_parameter
(
attr
=
self
.
_helper
.
param_attr
,
self
.
_h2h_w
=
self
.
create_parameter
(
attr
=
self
.
param_attr
,
shape
=
h2h_param_shape
,
dtype
=
self
.
_dtype
,
is_bias
=
False
)
self
.
_h2o_w
=
self
.
_helper
.
create_parameter
(
attr
=
self
.
_helper
.
param_attr
,
self
.
_h2o_w
=
self
.
create_parameter
(
attr
=
self
.
param_attr
,
shape
=
h2o_param_shape
,
dtype
=
self
.
_dtype
,
is_bias
=
False
)
def
forward
(
self
,
input
,
pre_hidden
):
tmp_i2h
=
self
.
_helper
.
create_variable_for_type_inference
(
self
.
_dtype
)
tmp_h2h
=
self
.
_helper
.
create_variable_for_type_inference
(
self
.
_dtype
)
hidden
=
self
.
_helper
.
create_variable_for_type_inference
(
self
.
_dype
)
out
=
self
.
_helper
.
create_variable_for_type_inference
(
self
.
_dype
)
softmax_out
=
self
.
_helper
.
create_variable_for_type_inference
(
self
.
_dtype
)
reduce_out
=
self
.
_helper
.
create_variable_for_type_inference
(
self
.
_dtype
)
tmp_i2h
=
self
.
create_variable
(
dtype
=
self
.
_dtype
)
tmp_h2h
=
self
.
create_variable
(
dtype
=
self
.
_dtype
)
hidden
=
self
.
create_variable
(
dtype
=
self
.
_dtype
)
out
=
self
.
create_variable
(
dtype
=
self
.
_dtype
)
softmax_out
=
self
.
create_variable
(
dtype
=
self
.
_dtype
)
reduce_out
=
self
.
create_variable
(
dtype
=
self
.
_dtype
)
self
.
_helper
.
append_op
(
type
=
"mul"
,
inputs
=
{
"X"
:
input
,
...
...
@@ -132,7 +132,7 @@ class SimpleRNNCell(fluid.imperative.Layer):
outputs
=
{
'Out'
:
hidden
},
attrs
=
{
'axis'
:
-
1
,
'use_mkldnn'
:
False
})
hidden
=
self
.
_helper
.
append_activation
(
hidden
)
hidden
=
self
.
_helper
.
append_activation
(
hidden
,
act
=
'tanh'
)
self
.
_helper
.
append_op
(
type
=
"mul"
,
...
...
@@ -174,7 +174,7 @@ class SimpleRNN(fluid.imperative.Layer):
outs
=
list
()
pre_hiddens
=
list
()
init_hidden
=
fluid
.
layers
.
tensor
.
create_parameter
(
init_hidden
=
self
.
create_parameter
(
attr
=
fluid
.
ParamAttr
(
initializer
=
fluid
.
initializer
.
Constant
(
value
=
0.1
)),
shape
=
[
1
,
3
],
...
...
@@ -337,10 +337,10 @@ class TestImperative(unittest.TestCase):
self
.
assertTrue
(
np
.
allclose
(
dy_grad
,
static_grad
))
params
=
mlp
.
parameters
(
True
)
self
.
assertEqual
(
"mlp/MLP_0/FC_0
_0
.w_0"
,
params
[
0
].
name
)
self
.
assertEqual
(
"mlp/MLP_0/FC_0
_0
.b_0"
,
params
[
1
].
name
)
self
.
assertEqual
(
"mlp/MLP_0/FC_1
_0
.w_0"
,
params
[
2
].
name
)
self
.
assertEqual
(
"mlp/MLP_0/FC_1
_0
.b_0"
,
params
[
3
].
name
)
self
.
assertEqual
(
"mlp/MLP_0/FC_0.w_0"
,
params
[
0
].
name
)
self
.
assertEqual
(
"mlp/MLP_0/FC_0.b_0"
,
params
[
1
].
name
)
self
.
assertEqual
(
"mlp/MLP_0/FC_1.w_0"
,
params
[
2
].
name
)
self
.
assertEqual
(
"mlp/MLP_0/FC_1.b_0"
,
params
[
3
].
name
)
self
.
assertEqual
(
len
(
params
),
4
)
sublayers
=
mlp
.
sublayers
(
True
)
...
...
python/paddle/fluid/tests/unittests/test_imperative_optimizer.py
浏览文件 @
847bb6a2
...
...
@@ -78,7 +78,7 @@ class SimpleImgConvPool(fluid.imperative.Layer):
class
MNIST
(
fluid
.
imperative
.
Layer
):
def
__init__
(
self
,
name_scope
,
param_attr
=
None
,
bias_attr
=
None
):
def
__init__
(
self
,
name_scope
):
super
(
MNIST
,
self
).
__init__
(
name_scope
)
self
.
_simple_img_conv_pool_1
=
SimpleImgConvPool
(
...
...
python/paddle/fluid/tests/unittests/test_imperative_ptb_rnn.py
浏览文件 @
847bb6a2
...
...
@@ -41,19 +41,17 @@ class SimpleLSTMRNN(fluid.imperative.Layer):
self
.
_dropout
=
dropout
self
.
_input
=
None
self
.
_num_steps
=
num_steps
from
paddle.fluid.layer_helper
import
LayerHelper
self
.
_helper
=
LayerHelper
(
'SimpleLSTMRNN'
,
act
=
"tanh"
)
self
.
cell_array
=
[]
self
.
hidden_array
=
[]
def
_build_once
(
self
,
input_embedding
,
init_hidden
=
None
,
init_cell
=
None
):
self
.
weight_1_arr
=
[]
self
.
weight_2_arr
=
[]
self
.
bias_arr
=
[]
self
.
hidden_array
=
[]
self
.
cell_array
=
[]
self
.
mask_array
=
[]
for
i
in
range
(
self
.
_num_layers
):
weight_1
=
self
.
_helper
.
create_parameter
(
weight_1
=
self
.
create_parameter
(
attr
=
fluid
.
ParamAttr
(
initializer
=
fluid
.
initializer
.
UniformInitializer
(
low
=-
self
.
_init_scale
,
high
=
self
.
_init_scale
)),
...
...
@@ -62,7 +60,7 @@ class SimpleLSTMRNN(fluid.imperative.Layer):
default_initializer
=
fluid
.
initializer
.
UniformInitializer
(
low
=-
self
.
_init_scale
,
high
=
self
.
_init_scale
))
self
.
weight_1_arr
.
append
(
weight_1
)
bias_1
=
self
.
_helper
.
create_parameter
(
bias_1
=
self
.
create_parameter
(
attr
=
fluid
.
ParamAttr
(
initializer
=
fluid
.
initializer
.
UniformInitializer
(
low
=-
self
.
_init_scale
,
high
=
self
.
_init_scale
)),
...
...
@@ -71,6 +69,11 @@ class SimpleLSTMRNN(fluid.imperative.Layer):
default_initializer
=
fluid
.
initializer
.
Constant
(
0.0
))
self
.
bias_arr
.
append
(
bias_1
)
def
forward
(
self
,
input_embedding
,
init_hidden
=
None
,
init_cell
=
None
):
self
.
cell_array
=
[]
self
.
hidden_array
=
[]
for
i
in
range
(
self
.
_num_layers
):
pre_hidden
=
fluid
.
layers
.
slice
(
init_hidden
,
axes
=
[
0
],
starts
=
[
i
],
ends
=
[
i
+
1
])
pre_cell
=
fluid
.
layers
.
slice
(
...
...
@@ -82,7 +85,6 @@ class SimpleLSTMRNN(fluid.imperative.Layer):
self
.
hidden_array
.
append
(
pre_hidden
)
self
.
cell_array
.
append
(
pre_cell
)
def
forward
(
self
,
input_embedding
,
init_hidden
=
None
,
init_cell
=
None
):
res
=
[]
for
index
in
range
(
self
.
_num_steps
):
self
.
_input
=
fluid
.
layers
.
slice
(
...
...
@@ -145,8 +147,6 @@ class PtbModel(fluid.imperative.Layer):
self
.
num_layers
=
num_layers
self
.
num_steps
=
num_steps
self
.
dropout
=
dropout
from
paddle.fluid.layer_helper
import
LayerHelper
self
.
_helper
=
LayerHelper
(
'PtbModel'
,
act
=
"tanh"
)
self
.
simple_lstm_rnn
=
SimpleLSTMRNN
(
self
.
full_name
(),
hidden_size
,
...
...
@@ -163,13 +163,13 @@ class PtbModel(fluid.imperative.Layer):
name
=
'embedding_para'
,
initializer
=
fluid
.
initializer
.
UniformInitializer
(
low
=-
init_scale
,
high
=
init_scale
)))
self
.
softmax_weight
=
self
.
_helper
.
create_parameter
(
self
.
softmax_weight
=
self
.
create_parameter
(
attr
=
fluid
.
ParamAttr
(),
shape
=
[
self
.
hidden_size
,
self
.
vocab_size
],
dtype
=
"float32"
,
default_initializer
=
fluid
.
initializer
.
UniformInitializer
(
low
=-
self
.
init_scale
,
high
=
self
.
init_scale
))
self
.
softmax_bias
=
self
.
_helper
.
create_parameter
(
self
.
softmax_bias
=
self
.
create_parameter
(
attr
=
fluid
.
ParamAttr
(),
shape
=
[
self
.
vocab_size
],
dtype
=
"float32"
,
...
...
@@ -180,7 +180,6 @@ class PtbModel(fluid.imperative.Layer):
pass
def
forward
(
self
,
input
,
label
,
init_hidden
,
init_cell
):
init_h
=
fluid
.
layers
.
reshape
(
init_hidden
,
shape
=
[
self
.
num_layers
,
-
1
,
self
.
hidden_size
])
...
...
python/paddle/fluid/tests/unittests/test_ir_memory_optimize_ifelse_op.py
0 → 100644
浏览文件 @
847bb6a2
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# nlp model stack of op operate on lod. It's a classical test case in optimize pass.
from
__future__
import
print_function
import
numpy
as
np
import
paddle
import
paddle.fluid
as
fluid
import
paddle.fluid.layers
as
layers
import
unittest
import
paddle.fluid.core
as
core
from
paddle.fluid
import
compiler
,
Program
,
program_guard
from
paddle.fluid.executor
import
Executor
from
paddle.fluid.backward
import
append_backward
from
paddle.fluid.optimizer
import
MomentumOptimizer
from
ir_memory_optimize_net_base
import
TestIrMemOptBase
class
TestIrMemoryOptimizeIfElseOp
(
unittest
.
TestCase
):
def
check_network_convergence
(
self
,
use_cuda
=
True
,
py_opt
=
False
,
iter_num
=
5
):
prog
=
Program
()
startup_prog
=
Program
()
prog
.
random_seed
=
100
startup_prog
.
random_seed
=
100
with
program_guard
(
prog
,
startup_prog
):
image
=
layers
.
data
(
name
=
'x'
,
shape
=
[
784
],
dtype
=
'float32'
)
label
=
layers
.
data
(
name
=
'y'
,
shape
=
[
1
],
dtype
=
'int64'
)
limit
=
layers
.
fill_constant
(
shape
=
[
1
],
dtype
=
'int64'
,
value
=
5
)
cond
=
layers
.
less_than
(
x
=
label
,
y
=
limit
)
ie
=
layers
.
IfElse
(
cond
)
with
ie
.
true_block
():
true_image
=
ie
.
input
(
image
)
hidden
=
layers
.
fc
(
input
=
true_image
,
size
=
100
,
act
=
'tanh'
)
prob
=
layers
.
fc
(
input
=
hidden
,
size
=
10
,
act
=
'softmax'
)
ie
.
output
(
prob
)
with
ie
.
false_block
():
false_image
=
ie
.
input
(
image
)
hidden
=
layers
.
fc
(
input
=
false_image
,
size
=
200
,
act
=
'tanh'
)
prob
=
layers
.
fc
(
input
=
hidden
,
size
=
10
,
act
=
'softmax'
)
ie
.
output
(
prob
)
prob
=
ie
()
loss
=
layers
.
cross_entropy
(
input
=
prob
[
0
],
label
=
label
)
avg_loss
=
layers
.
mean
(
loss
)
optimizer
=
MomentumOptimizer
(
learning_rate
=
0.001
,
momentum
=
0.9
)
optimizer
.
minimize
(
avg_loss
,
startup_prog
)
train_reader
=
paddle
.
batch
(
paddle
.
dataset
.
mnist
.
train
(),
batch_size
=
200
)
place
=
fluid
.
CUDAPlace
(
0
)
if
use_cuda
else
fluid
.
CPUPlace
()
exe
=
Executor
(
place
)
exec_strategy
=
fluid
.
ExecutionStrategy
()
exec_strategy
.
use_cuda
=
use_cuda
if
py_opt
:
fluid
.
memory_optimize
(
fluid
.
default_main_program
())
train_cp
=
compiler
.
CompiledProgram
(
fluid
.
default_main_program
())
train_cp
=
train_cp
.
with_data_parallel
(
loss_name
=
avg_loss
.
name
,
exec_strategy
=
exec_strategy
)
fetch_list
=
[
avg_loss
.
name
]
exe
.
run
(
startup_prog
)
PASS_NUM
=
100
loop
=
0
ret
=
[]
for
pass_id
in
range
(
PASS_NUM
):
for
data
in
train_reader
():
x_data
=
np
.
array
([
x
[
0
]
for
x
in
data
]).
astype
(
"float32"
)
y_data
=
np
.
array
([
x
[
1
]
for
x
in
data
]).
astype
(
"int64"
)
y_data
=
y_data
.
reshape
((
y_data
.
shape
[
0
],
1
))
outs
=
exe
.
run
(
train_cp
,
feed
=
{
'x'
:
x_data
,
'y'
:
y_data
},
fetch_list
=
[
avg_loss
])
loop
+=
1
ret
.
append
(
outs
[
0
])
if
iter_num
==
loop
:
return
ret
return
ret
def
test_ifelse
(
self
):
ret1
=
self
.
check_network_convergence
(
False
,
True
)
print
(
ret1
)
ret2
=
self
.
check_network_convergence
(
False
,
False
)
print
(
ret2
)
self
.
assertTrue
(
np
.
allclose
(
ret1
,
ret2
))
if
fluid
.
core
.
is_compiled_with_cuda
():
ret1
=
self
.
check_network_convergence
(
True
,
True
)
print
(
ret1
)
ret2
=
self
.
check_network_convergence
(
True
,
False
)
print
(
ret2
)
self
.
assertTrue
(
np
.
allclose
(
ret1
,
ret2
))
#self.assertEqual(ret1, ret2)
if
__name__
==
"__main__"
:
unittest
.
main
()
python/paddle/fluid/tests/unittests/test_layers.py
浏览文件 @
847bb6a2
...
...
@@ -1035,6 +1035,19 @@ class TestBook(unittest.TestCase):
print
(
str
(
program
))
def
test_spectral_norm
(
self
):
program
=
Program
()
with
program_guard
(
program
):
weight
=
layers
.
data
(
name
=
'weight'
,
shape
=
[
2
,
3
,
32
,
32
],
dtype
=
"float32"
,
append_batch_size
=
False
)
out
=
layers
.
spectral_norm
(
weight
,
dim
=
1
,
power_iters
=
1
)
self
.
assertIsNotNone
(
out
)
print
(
str
(
program
))
def
test_shuffle_channel
(
self
):
program
=
Program
()
with
program_guard
(
program
):
...
...
python/paddle/fluid/tests/unittests/test_parallel_executor_fetch_feed.py
浏览文件 @
847bb6a2
...
...
@@ -59,8 +59,12 @@ class TestFetchAndFeed(unittest.TestCase):
exe
=
fluid
.
Executor
(
place
)
exe
.
run
(
startup
)
#FIXME force disable enable_inplace and memory_optimize to pass the unittest
build_strategy
=
fluid
.
BuildStrategy
()
build_strategy
.
enable_inplace
=
False
build_strategy
.
memory_optimize
=
False
train_cp
=
compiler
.
CompiledProgram
(
main_program
).
with_data_parallel
(
loss_name
=
loss
.
name
)
loss_name
=
loss
.
name
,
build_strategy
=
build_strategy
)
run_parallel_exe
(
train_cp
,
exe
,
use_cuda
,
data
,
label
,
loss
)
...
...
python/paddle/fluid/tests/unittests/test_pass_builder.py
浏览文件 @
847bb6a2
...
...
@@ -96,6 +96,9 @@ class TestPassBuilder(unittest.TestCase):
build_strategy
=
fluid
.
BuildStrategy
()
self
.
assertFalse
(
build_strategy
.
fuse_elewise_add_act_ops
)
build_strategy
.
fuse_elewise_add_act_ops
=
True
#FIXME: currently fuse_elewise_add_act_ops not compatible with below options
build_strategy
.
enable_inplace
=
False
build_strategy
.
memory_optimize
=
False
pass_builder
=
build_strategy
.
_finalize_strategy_and_create_passes
()
self
.
assertTrue
(
"fuse_elewise_add_act_pass"
in
[
p
.
type
()
for
p
in
pass_builder
.
all_passes
()])
...
...
python/paddle/fluid/tests/unittests/test_py_func_op.py
浏览文件 @
847bb6a2
...
...
@@ -142,6 +142,10 @@ def test_main(use_cuda, use_py_func_op, use_parallel_executor):
exe
=
fluid
.
Executor
(
place
)
exe
.
run
(
fluid
.
default_startup_program
())
#FIXME force use old memory optimzie strategy here to pass the unittest
#since open the new strategy will crash the unittest
fluid
.
memory_optimize
(
fluid
.
default_main_program
())
train_cp
=
compiler
.
CompiledProgram
(
fluid
.
default_main_program
())
if
use_parallel_executor
:
train_cp
=
train_cp
.
with_data_parallel
(
loss_name
=
loss
.
name
)
...
...
python/paddle/fluid/tests/unittests/test_sequence_erase_op.py
浏览文件 @
847bb6a2
...
...
@@ -49,6 +49,21 @@ class TestSequenceEraseOpInt32(OpTest):
self
.
check_output
()
class
TestSequenceEraseOpInt32LoD2
(
OpTest
):
def
setUp
(
self
):
self
.
op_type
=
"sequence_erase"
in_seq
=
np
.
random
.
randint
(
0
,
10
,
(
30
,
1
)).
astype
(
"int32"
)
lod
=
[[
1
,
3
],
[
9
,
4
,
11
,
6
]]
tokens
=
[
2
,
3
,
5
]
out_seq
,
new_lod0
=
sequence_erase
(
in_seq
,
lod
[
-
1
],
tokens
)
self
.
attrs
=
{
'tokens'
:
tokens
}
self
.
inputs
=
{
'X'
:
(
in_seq
,
lod
)}
self
.
outputs
=
{
'Out'
:
(
out_seq
,
lod
[:
-
1
]
+
[
new_lod0
])}
def
test_check_output
(
self
):
self
.
check_output
()
class
TestSequenceEraseOpInt64
(
OpTest
):
def
setUp
(
self
):
self
.
op_type
=
"sequence_erase"
...
...
python/paddle/fluid/tests/unittests/test_spectral_norm_op.py
0 → 100644
浏览文件 @
847bb6a2
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from
__future__
import
division
import
unittest
import
numpy
as
np
from
op_test
import
OpTest
from
paddle.fluid
import
core
def
spectral_norm
(
weight
,
u
,
v
,
dim
,
power_iters
,
eps
):
shape
=
weight
.
shape
weight_mat
=
weight
.
copy
()
h
=
shape
[
dim
]
w
=
np
.
prod
(
shape
)
//
h
if
dim
!=
0
:
perm
=
[
dim
]
+
[
d
for
d
in
range
(
len
(
shape
))
if
d
!=
dim
]
weight_mat
=
weight_mat
.
transpose
(
perm
)
weight_mat
=
weight_mat
.
reshape
((
h
,
w
))
u
=
u
.
reshape
((
h
,
1
))
v
=
v
.
reshape
((
w
,
1
))
for
i
in
range
(
power_iters
):
v
=
np
.
matmul
(
weight_mat
.
T
,
u
)
v_norm
=
np
.
sqrt
((
v
*
v
).
sum
())
v
=
v
/
(
v_norm
+
eps
)
u
=
np
.
matmul
(
weight_mat
,
v
)
u_norm
=
np
.
sqrt
((
u
*
u
).
sum
())
u
=
u
/
(
u_norm
+
eps
)
sigma
=
(
u
*
np
.
matmul
(
weight_mat
,
v
)).
sum
()
return
weight
/
sigma
class
TestSpectralNormOpNoGrad
(
OpTest
):
def
setUp
(
self
):
self
.
initTestCase
()
self
.
op_type
=
'spectral_norm'
weight
=
np
.
random
.
random
(
self
.
weight_shape
).
astype
(
'float32'
)
u
=
np
.
random
.
normal
(
0.
,
1.
,
self
.
u_shape
).
astype
(
'float32'
)
v
=
np
.
random
.
normal
(
0.
,
1.
,
self
.
v_shape
).
astype
(
'float32'
)
self
.
attrs
=
{
"dim"
:
self
.
dim
,
"power_iters"
:
self
.
power_iters
,
"eps"
:
self
.
eps
,
}
self
.
inputs
=
{
"Weight"
:
weight
,
"U"
:
u
,
"V"
:
v
,
}
output
=
spectral_norm
(
weight
,
u
,
v
,
self
.
dim
,
self
.
power_iters
,
self
.
eps
)
self
.
outputs
=
{
"Out"
:
output
}
def
test_check_output
(
self
):
self
.
check_output
()
def
initTestCase
(
self
):
self
.
weight_shape
=
(
2
,
3
)
self
.
u_shape
=
(
2
,
)
self
.
v_shape
=
(
3
,
)
self
.
dim
=
0
self
.
power_iters
=
5
self
.
eps
=
1e-12
class
TestSpectralNormOpNoGrad2
(
TestSpectralNormOpNoGrad
):
def
initTestCase
(
self
):
self
.
weight_shape
=
(
2
,
3
,
3
,
3
)
self
.
u_shape
=
(
3
,
)
self
.
v_shape
=
(
18
,
)
self
.
dim
=
1
self
.
power_iters
=
10
self
.
eps
=
1e-12
class
TestSpectralNormOp
(
TestSpectralNormOpNoGrad
):
def
test_check_grad_ignore_uv
(
self
):
self
.
check_grad
(
[
'Weight'
],
'Out'
,
no_grad_set
=
set
([
"U"
,
"V"
]),
max_relative_error
=
0.1
)
def
initTestCase
(
self
):
self
.
weight_shape
=
(
2
,
3
)
self
.
u_shape
=
(
2
,
)
self
.
v_shape
=
(
3
,
)
self
.
dim
=
0
self
.
power_iters
=
0
self
.
eps
=
1e-12
class
TestSpectralNormOp2
(
TestSpectralNormOp
):
def
initTestCase
(
self
):
self
.
weight_shape
=
(
2
,
3
,
3
,
3
)
self
.
u_shape
=
(
3
,
)
self
.
v_shape
=
(
18
,
)
self
.
dim
=
1
self
.
power_iters
=
0
self
.
eps
=
1e-12
if
__name__
==
"__main__"
:
unittest
.
main
()
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录