Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
PaddleDetection
提交
40dbd97f
P
PaddleDetection
项目概览
PaddlePaddle
/
PaddleDetection
大约 1 年 前同步成功
通知
694
Star
11112
Fork
2696
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
184
列表
看板
标记
里程碑
合并请求
40
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
PaddleDetection
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
184
Issue
184
列表
看板
标记
里程碑
合并请求
40
合并请求
40
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
体验新版 GitCode,发现更多精彩内容 >>
提交
40dbd97f
编写于
9月 06, 2018
作者:
T
tensor-tang
浏览文件
操作
浏览文件
下载
差异文件
Merge remote-tracking branch 'ups/develop' into refine/op/peephole
上级
3eb55f06
b8057515
变更
27
隐藏空白更改
内联
并排
Showing
27 changed file
with
868 addition
and
279 deletion
+868
-279
paddle/fluid/framework/ir/attention_lstm_fuse_pass.cc
paddle/fluid/framework/ir/attention_lstm_fuse_pass.cc
+0
-3
paddle/fluid/framework/ir/graph_pattern_detector.cc
paddle/fluid/framework/ir/graph_pattern_detector.cc
+1
-1
paddle/fluid/framework/ir/graph_viz_pass.cc
paddle/fluid/framework/ir/graph_viz_pass.cc
+42
-17
paddle/fluid/inference/analysis/analyzer.cc
paddle/fluid/inference/analysis/analyzer.cc
+0
-1
paddle/fluid/inference/analysis/analyzer_tester.cc
paddle/fluid/inference/analysis/analyzer_tester.cc
+84
-80
paddle/fluid/inference/api/analysis_predictor.cc
paddle/fluid/inference/api/analysis_predictor.cc
+0
-1
paddle/fluid/inference/api/helper.h
paddle/fluid/inference/api/helper.h
+41
-0
paddle/fluid/operators/fake_quantize_op.cu
paddle/fluid/operators/fake_quantize_op.cu
+2
-1
paddle/fluid/operators/flatten_op.cc
paddle/fluid/operators/flatten_op.cc
+115
-0
paddle/fluid/operators/layer_norm_op.cu
paddle/fluid/operators/layer_norm_op.cu
+7
-7
paddle/fluid/operators/reshape_op.cc
paddle/fluid/operators/reshape_op.cc
+100
-0
paddle/fluid/operators/squeeze_op.cc
paddle/fluid/operators/squeeze_op.cc
+119
-7
paddle/fluid/operators/transpose_op.cc
paddle/fluid/operators/transpose_op.cc
+103
-3
paddle/fluid/operators/transpose_op.cu.cc
paddle/fluid/operators/transpose_op.cu.cc
+7
-0
paddle/fluid/operators/unsqueeze_op.cc
paddle/fluid/operators/unsqueeze_op.cc
+117
-6
paddle/scripts/paddle_build.sh
paddle/scripts/paddle_build.sh
+2
-0
python/paddle/fluid/layers/nn.py
python/paddle/fluid/layers/nn.py
+21
-11
python/paddle/fluid/tests/unittests/dist_transformer.py
python/paddle/fluid/tests/unittests/dist_transformer.py
+20
-14
python/paddle/fluid/tests/unittests/op_test.py
python/paddle/fluid/tests/unittests/op_test.py
+13
-5
python/paddle/fluid/tests/unittests/test_flatten_op.py
python/paddle/fluid/tests/unittests/test_flatten_op.py
+6
-3
python/paddle/fluid/tests/unittests/test_prelu_op.py
python/paddle/fluid/tests/unittests/test_prelu_op.py
+13
-9
python/paddle/fluid/tests/unittests/test_reshape_op.py
python/paddle/fluid/tests/unittests/test_reshape_op.py
+30
-94
python/paddle/fluid/tests/unittests/test_squeeze_op.py
python/paddle/fluid/tests/unittests/test_squeeze_op.py
+6
-3
python/paddle/fluid/tests/unittests/test_transpose_op.py
python/paddle/fluid/tests/unittests/test_transpose_op.py
+7
-4
python/paddle/fluid/tests/unittests/test_unsqueeze_op.py
python/paddle/fluid/tests/unittests/test_unsqueeze_op.py
+6
-3
python/paddle/fluid/transpiler/details/program_utils.py
python/paddle/fluid/transpiler/details/program_utils.py
+1
-1
python/paddle/fluid/transpiler/distribute_transpiler.py
python/paddle/fluid/transpiler/distribute_transpiler.py
+5
-5
未找到文件。
paddle/fluid/framework/ir/attention_lstm_fuse_pass.cc
浏览文件 @
40dbd97f
...
...
@@ -13,13 +13,10 @@
// limitations under the License.
#include "paddle/fluid/framework/ir/attention_lstm_fuse_pass.h"
#include <string>
#include "paddle/fluid/framework/ir/graph_pattern_detector.h"
#include "paddle/fluid/framework/ir/graph_viz_pass.h"
#include "paddle/fluid/framework/lod_tensor.h"
#include "paddle/fluid/inference/api/helper.h"
namespace
paddle
{
namespace
framework
{
...
...
paddle/fluid/framework/ir/graph_pattern_detector.cc
浏览文件 @
40dbd97f
...
...
@@ -85,7 +85,7 @@ void GraphPatternDetector::operator()(Graph* graph,
LOG
(
INFO
)
<<
"detect "
<<
subgraphs
.
size
()
<<
" subgraph matches the pattern"
;
int
id
=
0
;
for
(
auto
&
g
:
subgraphs
)
{
LOG
(
INFO
)
<<
"optimizing #"
<<
id
++
<<
" subgraph"
;
VLOG
(
3
)
<<
"optimizing #"
<<
id
++
<<
" subgraph"
;
handler
(
g
,
graph
);
}
}
...
...
paddle/fluid/framework/ir/graph_viz_pass.cc
浏览文件 @
40dbd97f
...
...
@@ -50,20 +50,37 @@ std::unique_ptr<ir::Graph> GraphVizPass::ApplyImpl(
Dot
dot
;
std
::
vector
<
Dot
::
Attr
>
op_attrs
({
Dot
::
Attr
(
"style"
,
"filled"
),
Dot
::
Attr
(
"shape"
,
"box"
),
Dot
::
Attr
(
"fillcolor"
,
"red"
)});
std
::
vector
<
Dot
::
Attr
>
var_attrs
({
Dot
::
Attr
(
"style"
,
"filled,rounded"
),
// Dot::Attr("shape", "diamond"),
Dot
::
Attr
(
"fillcolor"
,
"yellow"
)});
std
::
vector
<
Dot
::
Attr
>
marked_op_attrs
({
Dot
::
Attr
(
"style"
,
"filled"
),
Dot
::
Attr
(
"shape"
,
"box"
),
Dot
::
Attr
(
"fillcolor"
,
"lightgray"
)});
std
::
vector
<
Dot
::
Attr
>
marked_var_attrs
(
{
Dot
::
Attr
(
"style"
,
"filled,rounded"
),
// Dot::Attr("shape", "diamond"),
Dot
::
Attr
(
"fillcolor"
,
"lightgray"
)});
const
std
::
vector
<
Dot
::
Attr
>
op_attrs
({
Dot
::
Attr
(
"style"
,
"rounded,filled,bold"
),
//
Dot
::
Attr
(
"shape"
,
"box"
),
//
Dot
::
Attr
(
"color"
,
"#303A3A"
),
//
Dot
::
Attr
(
"fontcolor"
,
"#ffffff"
),
//
Dot
::
Attr
(
"width"
,
"1.3"
),
//
Dot
::
Attr
(
"height"
,
"0.84"
),
//
Dot
::
Attr
(
"fontname"
,
"Arial"
),
//
});
const
std
::
vector
<
Dot
::
Attr
>
arg_attrs
({
Dot
::
Attr
(
"shape"
,
"box"
),
//
Dot
::
Attr
(
"style"
,
"rounded,filled,bold"
),
//
Dot
::
Attr
(
"fontname"
,
"Arial"
),
//
Dot
::
Attr
(
"fillcolor"
,
"#999999"
),
//
Dot
::
Attr
(
"color"
,
"#dddddd"
),
//
});
const
std
::
vector
<
Dot
::
Attr
>
param_attrs
({
Dot
::
Attr
(
"shape"
,
"box"
),
//
Dot
::
Attr
(
"style"
,
"rounded,filled,bold"
),
//
Dot
::
Attr
(
"fontname"
,
"Arial"
),
//
Dot
::
Attr
(
"color"
,
"#148b97"
),
//
Dot
::
Attr
(
"fontcolor"
,
"#ffffff"
),
//
});
const
std
::
vector
<
Dot
::
Attr
>
marked_op_attrs
(
{
Dot
::
Attr
(
"style"
,
"rounded,filled,bold"
),
Dot
::
Attr
(
"shape"
,
"box"
),
Dot
::
Attr
(
"fillcolor"
,
"yellow"
)});
const
std
::
vector
<
Dot
::
Attr
>
marked_var_attrs
(
{
Dot
::
Attr
(
"style"
,
"filled,rounded"
),
Dot
::
Attr
(
"shape"
,
"box"
),
Dot
::
Attr
(
"fillcolor"
,
"yellow"
)});
auto
marked_nodes
=
ConsumeMarkedNodes
(
graph
.
get
());
// Create nodes
...
...
@@ -74,9 +91,17 @@ std::unique_ptr<ir::Graph> GraphVizPass::ApplyImpl(
marked_nodes
.
count
(
n
)
?
marked_op_attrs
:
op_attrs
;
dot
.
AddNode
(
node_id
,
attr
,
node_id
);
}
else
if
(
n
->
IsVar
())
{
decltype
(
op_attrs
)
attr
=
marked_nodes
.
count
(
n
)
?
marked_var_attrs
:
var_attrs
;
dot
.
AddNode
(
node_id
,
attr
,
node_id
);
decltype
(
op_attrs
)
*
attr
;
if
(
marked_nodes
.
count
(
n
))
{
attr
=
&
marked_var_attrs
;
}
else
if
(
const_cast
<
Node
*>
(
n
)
->
Var
()
&&
const_cast
<
Node
*>
(
n
)
->
Var
()
->
Persistable
())
{
attr
=
&
param_attrs
;
}
else
{
attr
=
&
arg_attrs
;
}
dot
.
AddNode
(
node_id
,
*
attr
,
node_id
);
}
node2dot
[
n
]
=
node_id
;
}
...
...
paddle/fluid/inference/analysis/analyzer.cc
浏览文件 @
40dbd97f
...
...
@@ -106,7 +106,6 @@ void Analyzer::Run(Argument* argument) {
}
}
passes
.
push_back
(
"graph_viz_pass"
);
// Ugly support fluid-to-ir-pass
argument
->
Set
(
kFluidToIrPassesAttr
,
new
std
::
vector
<
std
::
string
>
(
passes
));
for
(
auto
&
x
:
data_
)
{
...
...
paddle/fluid/inference/analysis/analyzer_tester.cc
浏览文件 @
40dbd97f
...
...
@@ -16,6 +16,7 @@
#include <google/protobuf/text_format.h>
#include <gtest/gtest.h>
#include <thread> // NOLINT
#include "paddle/fluid/framework/ir/fuse_pass_base.h"
#include "paddle/fluid/framework/ir/pass.h"
#include "paddle/fluid/inference/analysis/ut_helper.h"
...
...
@@ -24,12 +25,12 @@
#include "paddle/fluid/inference/api/paddle_inference_api.h"
#include "paddle/fluid/inference/api/paddle_inference_pass.h"
#include "paddle/fluid/inference/utils/singleton.h"
#include "paddle/fluid/platform/profiler.h"
DEFINE_string
(
infer_ditu_rnn_model
,
""
,
"model path for ditu RNN"
);
DEFINE_string
(
infer_ditu_rnn_data
,
""
,
"data path for ditu RNN"
);
DEFINE_int32
(
batch_size
,
10
,
"batch size."
);
DEFINE_int32
(
repeat
,
1
,
"Running the inference program repeat times."
);
DEFINE_int32
(
num_threads
,
1
,
"Running the inference program in multi-threads."
);
namespace
paddle
{
namespace
inference
{
...
...
@@ -220,39 +221,6 @@ void PrepareInputs(std::vector<PaddleTensor> *input_slots, DataRecord *data,
}
}
std
::
string
DescribeTensor
(
const
PaddleTensor
&
tensor
)
{
std
::
stringstream
os
;
os
<<
"Tensor ["
<<
tensor
.
name
<<
"]
\n
"
;
os
<<
" - type: "
;
switch
(
tensor
.
dtype
)
{
case
PaddleDType
::
FLOAT32
:
os
<<
"float32"
;
break
;
case
PaddleDType
::
INT64
:
os
<<
"int64"
;
break
;
default:
os
<<
"unset"
;
}
os
<<
'\n'
;
os
<<
" - shape: "
<<
to_string
(
tensor
.
shape
)
<<
'\n'
;
os
<<
" - lod: "
;
for
(
auto
&
l
:
tensor
.
lod
)
{
os
<<
to_string
(
l
)
<<
"; "
;
}
os
<<
"
\n
"
;
os
<<
" - data: "
;
int
dim
=
std
::
accumulate
(
tensor
.
shape
.
begin
(),
tensor
.
shape
.
end
(),
1
,
[](
int
a
,
int
b
)
{
return
a
*
b
;
});
for
(
int
i
=
0
;
i
<
dim
;
i
++
)
{
os
<<
static_cast
<
float
*>
(
tensor
.
data
.
data
())[
i
]
<<
" "
;
}
os
<<
'\n'
;
return
os
.
str
();
}
}
// namespace
const
float
ditu_rnn_target_data
[]
=
{
...
...
@@ -266,11 +234,29 @@ const float ditu_rnn_target_data[] = {
10.7286
,
12.0595
,
10.6672
,
0
,
0
,
0
,
0
,
0
,
93.5771
,
3.84641
,
0
,
0
,
0
,
0
,
0
,
0
,
169.426
,
0
,
0
,
0
,
0
,
0
,
0
,
0
};
void
CompareResult
(
const
std
::
vector
<
PaddleTensor
>
&
outputs
,
const
std
::
vector
<
PaddleTensor
>
&
base_outputs
)
{
PADDLE_ENFORCE_GT
(
outputs
.
size
(),
0
);
PADDLE_ENFORCE_EQ
(
outputs
.
size
(),
base_outputs
.
size
());
for
(
size_t
i
=
0
;
i
<
outputs
.
size
();
i
++
)
{
auto
&
out
=
outputs
[
i
];
auto
&
base_out
=
base_outputs
[
i
];
size_t
size
=
std
::
accumulate
(
out
.
shape
.
begin
(),
out
.
shape
.
end
(),
1
,
[](
int
a
,
int
b
)
{
return
a
*
b
;
});
size_t
size1
=
std
::
accumulate
(
base_out
.
shape
.
begin
(),
base_out
.
shape
.
end
(),
1
,
[](
int
a
,
int
b
)
{
return
a
*
b
;
});
PADDLE_ENFORCE_EQ
(
size
,
size1
);
PADDLE_ENFORCE_GT
(
size
,
0
);
float
*
data
=
static_cast
<
float
*>
(
out
.
data
.
data
());
float
*
base_data
=
static_cast
<
float
*>
(
base_out
.
data
.
data
());
for
(
size_t
i
=
0
;
i
<
size
;
i
++
)
{
EXPECT_NEAR
(
data
[
i
],
base_data
[
i
],
1e-3
);
}
}
}
// Test with a really complicate model.
void
TestDituRNNPrediction
(
const
std
::
string
&
model_path
,
const
std
::
string
&
data_path
,
int
batch_size
,
bool
use_analysis
,
bool
activate_ir
,
int
num_times
=
1
)
{
void
TestDituRNNPrediction
(
bool
use_analysis
,
bool
activate_ir
,
int
num_threads
)
{
AnalysisConfig
config
;
config
.
prog_file
=
FLAGS_infer_ditu_rnn_model
+
"/__model__"
;
config
.
param_file
=
FLAGS_infer_ditu_rnn_model
+
"/param"
;
...
...
@@ -281,6 +267,8 @@ void TestDituRNNPrediction(const std::string &model_path,
PADDLE_ENFORCE
(
config
.
ir_mode
==
AnalysisConfig
::
IrPassMode
::
kExclude
);
// default
config
.
ir_passes
.
clear
();
// Do not exclude any pass.
int
batch_size
=
FLAGS_batch_size
;
int
num_times
=
FLAGS_repeat
;
auto
base_predictor
=
CreatePaddlePredictor
<
NativeConfig
,
PaddleEngineKind
::
kNative
>
(
config
);
...
...
@@ -288,40 +276,55 @@ void TestDituRNNPrediction(const std::string &model_path,
CreatePaddlePredictor
<
AnalysisConfig
,
PaddleEngineKind
::
kAnalysis
>
(
config
);
std
::
vector
<
PaddleTensor
>
input_slots
;
DataRecord
data
(
data_path
,
batch_size
);
DataRecord
data
(
FLAGS_infer_ditu_rnn_data
,
batch_size
);
// Prepare inputs.
PrepareInputs
(
&
input_slots
,
&
data
,
batch_size
);
std
::
vector
<
PaddleTensor
>
outputs
,
base_outputs
;
base_predictor
->
Run
(
input_slots
,
&
base_outputs
);
Timer
timer
;
timer
.
tic
();
for
(
int
i
=
0
;
i
<
num_times
;
i
++
)
{
predictor
->
Run
(
input_slots
,
&
outputs
);
}
LOG
(
INFO
)
<<
"===========profile result==========="
;
LOG
(
INFO
)
<<
"batch_size: "
<<
batch_size
<<
", repeat: "
<<
num_times
<<
", latency: "
<<
timer
.
toc
()
/
num_times
<<
"ms"
;
LOG
(
INFO
)
<<
"====================================="
;
PADDLE_ENFORCE_GT
(
outputs
.
size
(),
0
);
PADDLE_ENFORCE_EQ
(
outputs
.
size
(),
base_outputs
.
size
());
for
(
size_t
i
=
0
;
i
<
outputs
.
size
();
i
++
)
{
auto
&
out
=
outputs
[
i
];
auto
&
base_out
=
base_outputs
[
i
];
size_t
size
=
std
::
accumulate
(
out
.
shape
.
begin
(),
out
.
shape
.
end
(),
1
,
[](
int
a
,
int
b
)
{
return
a
*
b
;
});
size_t
size1
=
std
::
accumulate
(
base_out
.
shape
.
begin
(),
base_out
.
shape
.
end
(),
1
,
[](
int
a
,
int
b
)
{
return
a
*
b
;
});
PADDLE_ENFORCE_EQ
(
size
,
size1
);
PADDLE_ENFORCE_GT
(
size
,
0
);
float
*
data
=
static_cast
<
float
*>
(
out
.
data
.
data
());
float
*
base_data
=
static_cast
<
float
*>
(
base_out
.
data
.
data
());
for
(
size_t
j
=
0
;
j
<
size
;
j
++
)
{
EXPECT_NEAR
(
data
[
j
],
base_data
[
j
],
1e-3
);
if
(
num_threads
==
1
)
{
// Prepare inputs.
Timer
timer
;
timer
.
tic
();
for
(
int
i
=
0
;
i
<
num_times
;
i
++
)
{
predictor
->
Run
(
input_slots
,
&
outputs
);
}
PrintTime
(
batch_size
,
num_times
,
1
,
0
,
timer
.
toc
()
/
num_times
);
CompareResult
(
outputs
,
base_outputs
);
}
else
{
std
::
vector
<
std
::
thread
>
threads
;
std
::
vector
<
std
::
unique_ptr
<
PaddlePredictor
>>
predictors
;
// TODO(yanchunwei): Bug here, the analyzer phase can't be parallelled
// because AttentionLSTM's hard code nodeid will be damanged.
for
(
int
tid
=
0
;
tid
<
num_threads
;
++
tid
)
{
predictors
.
emplace_back
(
CreatePaddlePredictor
<
AnalysisConfig
,
PaddleEngineKind
::
kAnalysis
>
(
config
));
}
for
(
int
tid
=
0
;
tid
<
num_threads
;
++
tid
)
{
threads
.
emplace_back
([
&
,
tid
]()
{
// Each thread should have local input_slots and outputs.
std
::
vector
<
PaddleTensor
>
input_slots
;
DataRecord
data
(
FLAGS_infer_ditu_rnn_data
,
batch_size
);
PrepareInputs
(
&
input_slots
,
&
data
,
batch_size
);
std
::
vector
<
PaddleTensor
>
outputs
;
Timer
timer
;
timer
.
tic
();
for
(
int
i
=
0
;
i
<
num_times
;
i
++
)
{
predictors
[
tid
]
->
Run
(
input_slots
,
&
outputs
);
}
PrintTime
(
batch_size
,
num_times
,
num_threads
,
tid
,
timer
.
toc
()
/
num_times
);
CompareResult
(
outputs
,
base_outputs
);
});
}
for
(
int
i
=
0
;
i
<
num_threads
;
++
i
)
{
threads
[
i
].
join
();
}
}
LOG
(
INFO
)
<<
"====================================="
;
if
(
use_analysis
&&
activate_ir
)
{
AnalysisPredictor
*
analysis_predictor
=
...
...
@@ -350,25 +353,26 @@ void TestDituRNNPrediction(const std::string &model_path,
}
}
// Directly infer with the original model.
TEST
(
Analyzer
,
DituRNN_without_analysis
)
{
TestDituRNNPrediction
(
FLAGS_infer_ditu_rnn_model
,
FLAGS_infer_ditu_rnn_data
,
FLAGS_batch_size
,
false
,
false
,
FLAGS_repeat
);
// Inference with analysis and IR, easy for profiling independently.
TEST
(
Analyzer
,
DituRNN
)
{
TestDituRNNPrediction
(
true
,
true
,
FLAGS_num_threads
);
}
// Inference with the original model with the analysis turned on, the analysis
// module will transform the program to a data flow graph.
TEST
(
Analyzer
,
DituRNN_with_analysis
)
{
LOG
(
INFO
)
<<
"ditu rnn with analysis"
;
TestDituRNNPrediction
(
FLAGS_infer_ditu_rnn_model
,
FLAGS_infer_ditu_rnn_data
,
FLAGS_batch_size
,
true
,
false
,
FLAGS_repeat
);
}
// Inference with analysis and IR. The IR module will fuse some large kernels.
TEST
(
Analyzer
,
DituRNN_with_analysis_with_IR
)
{
LOG
(
INFO
)
<<
"ditu rnn with analysis and IR fuse"
;
TestDituRNNPrediction
(
FLAGS_infer_ditu_rnn_model
,
FLAGS_infer_ditu_rnn_data
,
FLAGS_batch_size
,
true
,
true
,
FLAGS_repeat
);
// Other unit-tests of DituRNN, test different options of use_analysis,
// activate_ir and multi-threads.
TEST
(
Analyzer
,
DituRNN_tests
)
{
int
num_threads
[
2
]
=
{
1
,
4
};
for
(
auto
i
:
num_threads
)
{
// Directly infer with the original model.
TestDituRNNPrediction
(
false
,
false
,
i
);
// Inference with the original model with the analysis turned on, the
// analysis
// module will transform the program to a data flow graph.
TestDituRNNPrediction
(
true
,
false
,
i
);
// Inference with analysis and IR. The IR module will fuse some large
// kernels.
TestDituRNNPrediction
(
true
,
true
,
i
);
}
}
}
// namespace analysis
...
...
paddle/fluid/inference/api/analysis_predictor.cc
浏览文件 @
40dbd97f
...
...
@@ -35,7 +35,6 @@ bool AnalysisPredictor::Init(
}
else
{
place_
=
paddle
::
platform
::
CPUPlace
();
}
PADDLE_ENFORCE
(
!
parent_scope
);
if
(
parent_scope
)
{
scope_
=
parent_scope
;
sub_scope_
=
&
(
parent_scope
->
NewScope
());
...
...
paddle/fluid/inference/api/helper.h
浏览文件 @
40dbd97f
...
...
@@ -14,6 +14,7 @@
#pragma once
#include <glog/logging.h>
#include <sys/time.h>
#include <algorithm>
#include <numeric>
...
...
@@ -88,5 +89,45 @@ static void TensorAssignData(PaddleTensor *tensor,
}
}
std
::
string
DescribeTensor
(
const
PaddleTensor
&
tensor
)
{
std
::
stringstream
os
;
os
<<
"Tensor ["
<<
tensor
.
name
<<
"]
\n
"
;
os
<<
" - type: "
;
switch
(
tensor
.
dtype
)
{
case
PaddleDType
::
FLOAT32
:
os
<<
"float32"
;
break
;
case
PaddleDType
::
INT64
:
os
<<
"int64"
;
break
;
default:
os
<<
"unset"
;
}
os
<<
'\n'
;
os
<<
" - shape: "
<<
to_string
(
tensor
.
shape
)
<<
'\n'
;
os
<<
" - lod: "
;
for
(
auto
&
l
:
tensor
.
lod
)
{
os
<<
to_string
(
l
)
<<
"; "
;
}
os
<<
"
\n
"
;
os
<<
" - data: "
;
int
dim
=
std
::
accumulate
(
tensor
.
shape
.
begin
(),
tensor
.
shape
.
end
(),
1
,
[](
int
a
,
int
b
)
{
return
a
*
b
;
});
for
(
int
i
=
0
;
i
<
dim
;
i
++
)
{
os
<<
static_cast
<
float
*>
(
tensor
.
data
.
data
())[
i
]
<<
" "
;
}
os
<<
'\n'
;
return
os
.
str
();
}
void
PrintTime
(
int
batch_size
,
int
repeat
,
int
num_threads
,
int
tid
,
double
latency
)
{
LOG
(
INFO
)
<<
"batch_size: "
<<
batch_size
<<
", repeat: "
<<
repeat
<<
", threads: "
<<
num_threads
<<
", thread id: "
<<
tid
<<
", latency: "
<<
latency
<<
"ms"
;
}
}
// namespace inference
}
// namespace paddle
paddle/fluid/operators/fake_quantize_op.cu
浏览文件 @
40dbd97f
...
...
@@ -119,7 +119,8 @@ struct FindRangeAbsMaxFunctor<platform::CUDADeviceContext, T> {
const
framework
::
Tensor
&
last_scale
,
const
framework
::
Tensor
&
iter
,
const
int
window_size
,
framework
::
Tensor
*
scales_arr
,
framework
::
Tensor
*
out_scale
)
{
auto
&
gpu_place
=
boost
::
get
<
platform
::
CUDAPlace
>
(
ctx
.
GetPlace
());
const
auto
gpu_place
=
boost
::
get
<
platform
::
CUDAPlace
>
(
ctx
.
GetPlace
());
T
*
scale_arr
=
scales_arr
->
mutable_data
<
T
>
(
gpu_place
);
T
*
out_scale_data
=
out_scale
->
mutable_data
<
T
>
(
gpu_place
);
...
...
paddle/fluid/operators/flatten_op.cc
浏览文件 @
40dbd97f
...
...
@@ -157,6 +157,116 @@ class FlattenGradOp : public framework::OperatorBase {
}
};
// FIXME(zcd): flatten2 adds an intermediate output(XShape) based on flatten,
// the XShape is used to carry the shape and lod of X which will be used in
// flatten_grad, in this way, the framework can reuse the memory of X
// immediately the flatten2_op is finished.
// Considering compatibility issues, we could not fix flatten2_op
class
Flatten2OpInferShape
:
public
FlattenOpInferShape
{
public:
void
operator
()(
framework
::
InferShapeContext
*
ctx
)
const
override
{
FlattenOpInferShape
::
operator
()(
ctx
);
PADDLE_ENFORCE
(
ctx
->
HasOutput
(
"XShape"
),
"Output (XShape) of Flatten op should not be null."
);
const
auto
&
in_dims
=
ctx
->
GetInputDim
(
"X"
);
std
::
vector
<
int64_t
>
xshape_dims
(
in_dims
.
size
()
+
1
);
xshape_dims
[
0
]
=
0
;
for
(
int
i
=
0
;
i
<
in_dims
.
size
();
++
i
)
{
xshape_dims
[
i
+
1
]
=
in_dims
[
i
];
}
ctx
->
SetOutputDim
(
"XShape"
,
framework
::
make_ddim
(
xshape_dims
));
ctx
->
ShareLoD
(
"X"
,
"XShape"
);
}
};
class
Flatten2Op
:
public
framework
::
OperatorBase
{
public:
using
OperatorBase
::
OperatorBase
;
private:
void
RunImpl
(
const
framework
::
Scope
&
scope
,
const
platform
::
Place
&
place
)
const
override
{
auto
&
axis
=
Attr
<
int
>
(
"axis"
);
auto
in_dims
=
scope
.
FindVar
(
Input
(
"X"
))
->
Get
<
framework
::
LoDTensor
>
().
dims
();
const
auto
&
out_dims
=
FlattenOpInferShape
::
GetOutputShape
(
axis
,
in_dims
);
framework
::
AttributeMap
attrs
;
attrs
[
"shape"
]
=
out_dims
;
attrs
[
"inplace"
]
=
false
;
// Invoke Reshape Op
auto
reshape_op
=
framework
::
OpRegistry
::
CreateOp
(
"reshape2"
,
{{
"X"
,
{
Input
(
"X"
)}},
{
"Shape"
,
{}}},
{{
"Out"
,
{
Output
(
"Out"
)}},
{
"XShape"
,
{
Output
(
"XShape"
)}}},
attrs
);
reshape_op
->
Run
(
scope
,
place
);
}
};
class
Flatten2OpMaker
:
public
FlattenOpMaker
{
public:
void
Make
()
override
{
FlattenOpMaker
::
Make
();
AddOutput
(
"XShape"
,
"XShape is just used to store the shape and lod of X, which will "
"be used in FlattenGradOp."
)
.
AsIntermediate
();
}
};
class
Flatten2GradOpMaker
:
public
framework
::
SingleGradOpDescMaker
{
public:
using
framework
::
SingleGradOpDescMaker
::
SingleGradOpDescMaker
;
std
::
unique_ptr
<
framework
::
OpDesc
>
Apply
()
const
override
{
auto
*
grad_op
=
new
framework
::
OpDesc
();
grad_op
->
SetType
(
"flatten2_grad"
);
grad_op
->
SetInput
(
"XShape"
,
Output
(
"XShape"
));
grad_op
->
SetInput
(
framework
::
GradVarName
(
"Out"
),
OutputGrad
(
"Out"
));
grad_op
->
SetOutput
(
framework
::
GradVarName
(
"X"
),
InputGrad
(
"X"
));
grad_op
->
SetAttrMap
(
Attrs
());
return
std
::
unique_ptr
<
framework
::
OpDesc
>
(
grad_op
);
}
};
class
Flatten2GradInferShape
:
public
framework
::
InferShapeBase
{
public:
void
operator
()(
framework
::
InferShapeContext
*
context
)
const
override
{
PADDLE_ENFORCE
(
context
->
HasInput
(
"XShape"
),
"Input(XShape) shouldn't be null."
);
PADDLE_ENFORCE
(
context
->
HasInput
(
framework
::
GradVarName
(
"Out"
)),
"Input(Out@GRAD) shouldn't be null."
);
auto
xshape_dims
=
context
->
GetInputDim
(
"XShape"
);
auto
x_dims
=
framework
::
slice_ddim
(
xshape_dims
,
1
,
xshape_dims
.
size
());
context
->
SetOutputDim
(
framework
::
GradVarName
(
"X"
),
x_dims
);
context
->
ShareLoD
(
"XShape"
,
framework
::
GradVarName
(
"X"
));
}
};
class
Flatten2GradOp
:
public
framework
::
OperatorBase
{
public:
using
OperatorBase
::
OperatorBase
;
private:
void
RunImpl
(
const
framework
::
Scope
&
scope
,
const
platform
::
Place
&
place
)
const
override
{
auto
dx_name
=
Output
(
framework
::
GradVarName
(
"X"
));
auto
dout_name
=
Input
(
framework
::
GradVarName
(
"Out"
));
auto
xshape_name
=
Input
(
"XShape"
);
auto
xshape_dims
=
scope
.
FindVar
(
xshape_name
)
->
Get
<
framework
::
LoDTensor
>
().
dims
();
auto
x_dims
=
framework
::
slice_ddim
(
xshape_dims
,
1
,
xshape_dims
.
size
());
framework
::
AttributeMap
attrs
;
attrs
[
"shape"
]
=
framework
::
vectorize2int
(
x_dims
);
attrs
[
"inplace"
]
=
false
;
auto
reshape_op
=
framework
::
OpRegistry
::
CreateOp
(
"reshape2"
,
{{
"X"
,
{
dout_name
}},
{
"Shape"
,
{}}},
{{
"Out"
,
{
dx_name
}},
{
"XShape"
,
{
xshape_name
}}},
attrs
);
reshape_op
->
Run
(
scope
,
place
);
}
};
}
// namespace operators
}
// namespace paddle
...
...
@@ -167,3 +277,8 @@ REGISTER_OPERATOR(flatten, ops::FlattenOp, ops::FlattenOpMaker,
ops
::
FlattenOpInferShape
,
paddle
::
framework
::
DefaultGradOpDescMaker
<
true
>
);
REGISTER_OPERATOR
(
flatten_grad
,
ops
::
FlattenGradOp
,
ops
::
FlattenGradInferShape
);
REGISTER_OPERATOR
(
flatten2
,
ops
::
Flatten2Op
,
ops
::
Flatten2OpMaker
,
ops
::
Flatten2OpInferShape
,
ops
::
Flatten2GradOpMaker
);
REGISTER_OPERATOR
(
flatten2_grad
,
ops
::
Flatten2GradOp
,
ops
::
Flatten2GradInferShape
);
paddle/fluid/operators/layer_norm_op.cu
浏览文件 @
40dbd97f
...
...
@@ -67,27 +67,27 @@ template <typename T, int BlockDim>
__global__
void
LayerNormForward
(
const
T
*
x
,
const
T
*
scale
,
const
T
*
bias
,
T
*
y
,
T
*
mean
,
T
*
var
,
float
epsilon
,
int
feature_size
)
{
using
BlockReduce
=
cub
::
BlockReduce
<
PairForLayerNorm
<
T
>
,
BlockDim
>
;
using
BlockReduce
=
cub
::
BlockReduce
<
PairForLayerNorm
<
double
>
,
BlockDim
>
;
__shared__
typename
BlockReduce
::
TempStorage
temp_storage
;
int
beg_idx
=
blockIdx
.
x
*
feature_size
+
threadIdx
.
x
;
int
end_idx
=
(
blockIdx
.
x
+
1
)
*
feature_size
;
// Step 1: Reduce to calculate mean and var
T
mean_val
=
static_cast
<
T
>
(
0
)
;
T
var_val
=
static_cast
<
T
>
(
0
)
;
double
mean_val
=
0
;
double
var_val
=
0
;
for
(
int
i
=
beg_idx
;
i
<
end_idx
;
i
+=
BlockDim
)
{
T
tmp
=
x
[
i
];
mean_val
+=
tmp
;
var_val
+=
(
tmp
*
tmp
);
}
auto
pair
=
BlockReduce
(
temp_storage
)
.
Reduce
(
PairForLayerNorm
<
T
>
(
mean_val
,
var_val
),
PairForLayerNormAddFunctor
<
T
>
());
.
Reduce
(
PairForLayerNorm
<
double
>
(
mean_val
,
var_val
),
PairForLayerNormAddFunctor
<
double
>
());
if
(
threadIdx
.
x
==
0
)
{
auto
tmp
=
pair
.
first_
/
feature_size
;
mean
[
blockIdx
.
x
]
=
tmp
;
var
[
blockIdx
.
x
]
=
pair
.
second_
/
feature_size
-
tmp
*
tmp
;
mean
[
blockIdx
.
x
]
=
static_cast
<
T
>
(
tmp
)
;
var
[
blockIdx
.
x
]
=
static_cast
<
T
>
(
pair
.
second_
/
feature_size
-
tmp
*
tmp
)
;
}
__syncthreads
();
mean_val
=
mean
[
blockIdx
.
x
];
...
...
paddle/fluid/operators/reshape_op.cc
浏览文件 @
40dbd97f
...
...
@@ -246,6 +246,88 @@ class ReshapeGradKernel {
}
};
// FIXME(zcd): reshape2 adds an intermediate output(XShape) based on reshape,
// the XShape is used to carry the shape and lod of X which will be used in
// reshape_grad, in this way, the framework can reuse the memory of X
// immediately the reshape_op is finished.
// Considering compatibility issues, we could not fix reshape_op
class
Reshape2Op
:
public
ReshapeOp
{
public:
Reshape2Op
(
const
std
::
string
&
type
,
const
framework
::
VariableNameMap
&
inputs
,
const
framework
::
VariableNameMap
&
outputs
,
const
framework
::
AttributeMap
&
attrs
)
:
ReshapeOp
(
type
,
inputs
,
outputs
,
attrs
)
{}
void
InferShape
(
framework
::
InferShapeContext
*
ctx
)
const
override
{
ReshapeOp
::
InferShape
(
ctx
);
PADDLE_ENFORCE
(
ctx
->
HasOutput
(
"XShape"
),
"Output(XShape) of ReshapeOp should not be null."
);
const
auto
&
x_dims
=
ctx
->
GetInputDim
(
"X"
);
std
::
vector
<
int64_t
>
xshape_dims
(
x_dims
.
size
()
+
1
);
xshape_dims
[
0
]
=
0
;
for
(
int
i
=
0
;
i
<
x_dims
.
size
();
++
i
)
{
xshape_dims
[
i
+
1
]
=
x_dims
[
i
];
}
ctx
->
SetOutputDim
(
"XShape"
,
framework
::
make_ddim
(
xshape_dims
));
ctx
->
ShareLoD
(
"X"
,
/*->*/
"XShape"
);
}
};
class
Reshape2OpMaker
:
public
ReshapeOpMaker
{
public:
void
Make
()
override
{
ReshapeOpMaker
::
Make
();
AddOutput
(
"XShape"
,
"XShape is just used to store the shape and lod of X, which will "
"be used in FlattenGradOp."
)
.
AsIntermediate
();
}
};
class
Reshape2GradMaker
:
public
framework
::
SingleGradOpDescMaker
{
public:
using
framework
::
SingleGradOpDescMaker
::
SingleGradOpDescMaker
;
std
::
unique_ptr
<
framework
::
OpDesc
>
Apply
()
const
override
{
auto
*
grad_op
=
new
framework
::
OpDesc
();
grad_op
->
SetType
(
"reshape2_grad"
);
grad_op
->
SetInput
(
"XShape"
,
Output
(
"XShape"
));
grad_op
->
SetInput
(
framework
::
GradVarName
(
"Out"
),
OutputGrad
(
"Out"
));
grad_op
->
SetOutput
(
framework
::
GradVarName
(
"X"
),
InputGrad
(
"X"
));
grad_op
->
SetAttrMap
(
Attrs
());
return
std
::
unique_ptr
<
framework
::
OpDesc
>
(
grad_op
);
}
};
class
Reshape2GradOp
:
public
framework
::
OperatorWithKernel
{
public:
Reshape2GradOp
(
const
std
::
string
&
type
,
const
framework
::
VariableNameMap
&
inputs
,
const
framework
::
VariableNameMap
&
outputs
,
const
framework
::
AttributeMap
&
attrs
)
:
OperatorWithKernel
(
type
,
inputs
,
outputs
,
attrs
)
{}
void
InferShape
(
framework
::
InferShapeContext
*
ctx
)
const
override
{
PADDLE_ENFORCE
(
ctx
->
HasInput
(
"XShape"
),
"Input(XShape) shouldn't be null."
);
PADDLE_ENFORCE
(
ctx
->
HasInput
(
framework
::
GradVarName
(
"Out"
)),
"Input(Out@GRAD) shouldn't be null."
);
auto
xshape_dims
=
ctx
->
GetInputDim
(
"XShape"
);
auto
x_dims
=
framework
::
slice_ddim
(
xshape_dims
,
1
,
xshape_dims
.
size
());
ctx
->
SetOutputDim
(
framework
::
GradVarName
(
"X"
),
x_dims
);
ctx
->
ShareLoD
(
"XShape"
,
framework
::
GradVarName
(
"X"
));
}
protected:
framework
::
OpKernelType
GetExpectedKernelType
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
return
framework
::
OpKernelType
(
framework
::
ToDataType
(
ctx
.
Input
<
framework
::
LoDTensor
>
(
framework
::
GradVarName
(
"Out"
))
->
type
()),
ctx
.
device_context
());
}
};
}
// namespace operators
}
// namespace paddle
namespace
ops
=
paddle
::
operators
;
...
...
@@ -261,6 +343,17 @@ REGISTER_OP_CPU_KERNEL_FUNCTOR(reshape_grad, float, ops::ReshapeGradKernel,
ops
::
ReshapeGradKernel
,
int64_t
,
ops
::
ReshapeGradKernel
);
REGISTER_OPERATOR
(
reshape2
,
ops
::
Reshape2Op
,
ops
::
Reshape2OpMaker
,
ops
::
Reshape2GradMaker
);
REGISTER_OPERATOR
(
reshape2_grad
,
ops
::
Reshape2GradOp
);
REGISTER_OP_CPU_KERNEL_FUNCTOR
(
reshape2
,
float
,
ops
::
ReshapeKernel
,
double
,
ops
::
ReshapeKernel
,
int
,
ops
::
ReshapeKernel
,
int64_t
,
ops
::
ReshapeKernel
);
REGISTER_OP_CPU_KERNEL_FUNCTOR
(
reshape2_grad
,
float
,
ops
::
ReshapeGradKernel
,
double
,
ops
::
ReshapeGradKernel
,
int
,
ops
::
ReshapeGradKernel
,
int64_t
,
ops
::
ReshapeGradKernel
);
#ifdef PADDLE_WITH_CUDA
REGISTER_OP_CUDA_KERNEL_FUNCTOR
(
reshape
,
float
,
ops
::
ReshapeKernel
,
double
,
ops
::
ReshapeKernel
,
int
,
ops
::
ReshapeKernel
,
...
...
@@ -269,4 +362,11 @@ REGISTER_OP_CUDA_KERNEL_FUNCTOR(reshape_grad, float, ops::ReshapeGradKernel,
double
,
ops
::
ReshapeGradKernel
,
int
,
ops
::
ReshapeGradKernel
,
int64_t
,
ops
::
ReshapeGradKernel
);
REGISTER_OP_CUDA_KERNEL_FUNCTOR
(
reshape2
,
float
,
ops
::
ReshapeKernel
,
double
,
ops
::
ReshapeKernel
,
int
,
ops
::
ReshapeKernel
,
int64_t
,
ops
::
ReshapeKernel
);
REGISTER_OP_CUDA_KERNEL_FUNCTOR
(
reshape2_grad
,
float
,
ops
::
ReshapeGradKernel
,
double
,
ops
::
ReshapeGradKernel
,
int
,
ops
::
ReshapeGradKernel
,
int64_t
,
ops
::
ReshapeGradKernel
);
#endif
paddle/fluid/operators/squeeze_op.cc
浏览文件 @
40dbd97f
...
...
@@ -126,15 +126,15 @@ class SqueezeOpMaker : public framework::OpProtoAndCheckerMaker {
.
SetDefault
({});
AddComment
(
R"DOC(
Squeeze Operator.
Remove single-dimensional entries from the shape of a tensor.
Takes a parameter axes with a list of axes to squeeze.
If axes is not provided, all the single dimensions will be removed from the shape.
Remove single-dimensional entries from the shape of a tensor.
Takes a parameter axes with a list of axes to squeeze.
If axes is not provided, all the single dimensions will be removed from the shape.
If an axis is selected with shape entry not equal to one, an error is raised.
Examples:
Case 1:
Given
Given
X.shape = (1, 3, 1, 5)
and
axes = [0]
...
...
@@ -144,7 +144,7 @@ class SqueezeOpMaker : public framework::OpProtoAndCheckerMaker {
Case 2:
Given
X.shape = (1, 3, 1, 5)
and
and
axes = []
we get:
Out.shape = (3, 5)
...
...
@@ -181,6 +181,113 @@ class SqueezeGradOp : public framework::OperatorBase {
}
};
// FIXME(zcd): squeeze2 adds an intermediate output(XShape) based on squeeze,
// the XShape is used to carry the shape and lod of X which will be used in
// squeeze_grad, in this way, the framework can reuse the memory of X
// immediately the squeeze2_op is finished.
// Considering compatibility issues, we could not fix squeeze2_op
class
Squeeze2OpMaker
:
public
SqueezeOpMaker
{
public:
void
Make
()
override
{
SqueezeOpMaker
::
Make
();
AddOutput
(
"XShape"
,
"XShape is just used to store the shape and lod of X, which will "
"be used in SqueezeGradOp."
)
.
AsIntermediate
();
}
};
class
Squeeze2OpInferShape
:
public
SqueezeOpInferShape
{
public:
void
operator
()(
framework
::
InferShapeContext
*
ctx
)
const
override
{
SqueezeOpInferShape
::
operator
()(
ctx
);
PADDLE_ENFORCE
(
ctx
->
HasOutput
(
"XShape"
),
"Output(XShape) of Squeeze operator should not be null."
);
const
auto
&
x_dims
=
ctx
->
GetInputDim
(
"X"
);
std
::
vector
<
int64_t
>
xshape_dims
(
x_dims
.
size
()
+
1
);
xshape_dims
[
0
]
=
0
;
for
(
int
i
=
0
;
i
<
x_dims
.
size
();
++
i
)
{
xshape_dims
[
i
+
1
]
=
x_dims
[
i
];
}
ctx
->
SetOutputDim
(
"XShape"
,
framework
::
make_ddim
(
xshape_dims
));
ctx
->
ShareLoD
(
"X"
,
/*->*/
"XShape"
);
}
};
class
Squeeze2Op
:
public
framework
::
OperatorBase
{
public:
using
OperatorBase
::
OperatorBase
;
private:
void
RunImpl
(
const
framework
::
Scope
&
scope
,
const
platform
::
Place
&
place
)
const
override
{
auto
&
axes
=
Attr
<
std
::
vector
<
int
>>
(
"axes"
);
auto
x_dims
=
scope
.
FindVar
(
Input
(
"X"
))
->
Get
<
framework
::
LoDTensor
>
().
dims
();
auto
out_dims
=
Squeeze2OpInferShape
::
GetOutputShape
(
axes
,
x_dims
);
framework
::
AttributeMap
attrs
;
attrs
[
"shape"
]
=
framework
::
vectorize2int
(
out_dims
);
// Invoke Reshape Op
auto
reshape_op
=
framework
::
OpRegistry
::
CreateOp
(
"reshape2"
,
{{
"X"
,
{
Input
(
"X"
)}},
{
"Shape"
,
{}}},
{{
"Out"
,
{
Output
(
"Out"
)}},
{
"XShape"
,
{
Output
(
"XShape"
)}}},
attrs
);
reshape_op
->
Run
(
scope
,
place
);
}
};
class
Squeeze2GradOpMaker
:
public
framework
::
SingleGradOpDescMaker
{
public:
using
framework
::
SingleGradOpDescMaker
::
SingleGradOpDescMaker
;
std
::
unique_ptr
<
framework
::
OpDesc
>
Apply
()
const
override
{
auto
*
grad_op
=
new
framework
::
OpDesc
();
grad_op
->
SetType
(
"squeeze2_grad"
);
grad_op
->
SetInput
(
"XShape"
,
Output
(
"XShape"
));
grad_op
->
SetInput
(
framework
::
GradVarName
(
"Out"
),
OutputGrad
(
"Out"
));
grad_op
->
SetOutput
(
framework
::
GradVarName
(
"X"
),
InputGrad
(
"X"
));
grad_op
->
SetAttrMap
(
Attrs
());
return
std
::
unique_ptr
<
framework
::
OpDesc
>
(
grad_op
);
}
};
class
Squeeze2GradInferShape
:
public
framework
::
InferShapeBase
{
public:
void
operator
()(
framework
::
InferShapeContext
*
context
)
const
override
{
PADDLE_ENFORCE
(
context
->
HasInput
(
"XShape"
),
"Input(XShape) shouldn't be null."
);
PADDLE_ENFORCE
(
context
->
HasInput
(
framework
::
GradVarName
(
"Out"
)),
"Input(Out@GRAD) shouldn't be null."
);
auto
xshape_dims
=
context
->
GetInputDim
(
"XShape"
);
auto
x_dims
=
framework
::
slice_ddim
(
xshape_dims
,
1
,
xshape_dims
.
size
());
context
->
SetOutputDim
(
framework
::
GradVarName
(
"X"
),
x_dims
);
context
->
ShareLoD
(
"XShape"
,
framework
::
GradVarName
(
"X"
));
}
};
class
Squeeze2GradOp
:
public
framework
::
OperatorBase
{
public:
using
OperatorBase
::
OperatorBase
;
private:
void
RunImpl
(
const
framework
::
Scope
&
scope
,
const
platform
::
Place
&
place
)
const
override
{
auto
dx_name
=
Output
(
framework
::
GradVarName
(
"X"
));
auto
dout_name
=
Input
(
framework
::
GradVarName
(
"Out"
));
auto
xshape_name
=
Input
(
"XShape"
);
auto
xshape_dims
=
scope
.
FindVar
(
xshape_name
)
->
Get
<
framework
::
LoDTensor
>
().
dims
();
auto
x_dims
=
framework
::
slice_ddim
(
xshape_dims
,
1
,
xshape_dims
.
size
());
framework
::
AttributeMap
attrs
;
attrs
[
"shape"
]
=
framework
::
vectorize2int
(
x_dims
);
auto
reshape_op
=
framework
::
OpRegistry
::
CreateOp
(
"reshape2"
,
{{
"X"
,
{
dout_name
}},
{
"Shape"
,
{}}},
{{
"Out"
,
{
dx_name
}},
{
"XShape"
,
{
xshape_name
}}},
attrs
);
reshape_op
->
Run
(
scope
,
place
);
}
};
}
// namespace operators
}
// namespace paddle
...
...
@@ -192,3 +299,8 @@ REGISTER_OPERATOR(squeeze, ops::SqueezeOp, ops::SqueezeOpMaker,
ops
::
SqueezeOpInferShape
,
paddle
::
framework
::
DefaultGradOpDescMaker
<
true
>
);
REGISTER_OPERATOR
(
squeeze_grad
,
ops
::
SqueezeGradOp
,
ops
::
SqueezeGradInferShape
);
REGISTER_OPERATOR
(
squeeze2
,
ops
::
Squeeze2Op
,
ops
::
Squeeze2OpMaker
,
ops
::
Squeeze2OpInferShape
,
ops
::
Squeeze2GradOpMaker
);
REGISTER_OPERATOR
(
squeeze2_grad
,
ops
::
Squeeze2GradOp
,
ops
::
Squeeze2GradInferShape
);
paddle/fluid/operators/transpose_op.cc
浏览文件 @
40dbd97f
...
...
@@ -13,6 +13,7 @@ See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/operators/transpose_op.h"
#include <string>
#include <vector>
namespace
paddle
{
...
...
@@ -24,7 +25,7 @@ class TransposeOp : public framework::OperatorWithKernel {
public:
using
framework
::
OperatorWithKernel
::
OperatorWithKernel
;
void
InferShape
(
framework
::
InferShapeContext
*
ctx
)
const
override
{
void
InferShape
(
framework
::
InferShapeContext
*
ctx
)
const
override
{
PADDLE_ENFORCE
(
ctx
->
HasInput
(
"X"
),
"Input(X) should not be null"
);
PADDLE_ENFORCE
(
ctx
->
HasOutput
(
"Out"
),
"Output(Out) should not be null"
);
auto
x_dims
=
ctx
->
GetInputDim
(
"X"
);
...
...
@@ -90,7 +91,7 @@ The behavior of this operator is similar to how `numpy.transpose` works.
2 &5
\end{pmatrix}$$
- Given a input tensor with shape $(N, C, H, W)$ and the `axes` is
- Given a input tensor with shape $(N, C, H, W)$ and the `axes` is
$[0, 2, 3, 1]$, then shape of the output tensor will be: $(N, H, W, C)$.
)DOC"
);
...
...
@@ -101,7 +102,7 @@ class TransposeOpGrad : public framework::OperatorWithKernel {
public:
using
framework
::
OperatorWithKernel
::
OperatorWithKernel
;
void
InferShape
(
framework
::
InferShapeContext
*
ctx
)
const
override
{
void
InferShape
(
framework
::
InferShapeContext
*
ctx
)
const
override
{
PADDLE_ENFORCE
(
ctx
->
HasInput
(
"X"
),
"Input(X) should not be null"
);
PADDLE_ENFORCE
(
ctx
->
HasInput
(
framework
::
GradVarName
(
"Out"
)),
"Input(Out@GRAD) should not be null"
);
...
...
@@ -113,6 +114,93 @@ class TransposeOpGrad : public framework::OperatorWithKernel {
}
};
// FIXME(zcd): transpose2 adds an intermediate output(XShape) based on
// transpose, the XShape is used to carry the shape and lod of X which
// will be used in transpose_grad, in this way, the framework can reuse
// the memory of X immediately the transpose2_op is finished.
// Considering compatibility issues, we could not fix transpose2_op
class
Transpose2Op
:
public
TransposeOp
{
public:
Transpose2Op
(
const
std
::
string
&
type
,
const
framework
::
VariableNameMap
&
inputs
,
const
framework
::
VariableNameMap
&
outputs
,
const
framework
::
AttributeMap
&
attrs
)
:
TransposeOp
(
type
,
inputs
,
outputs
,
attrs
)
{}
void
InferShape
(
framework
::
InferShapeContext
*
ctx
)
const
override
{
TransposeOp
::
InferShape
(
ctx
);
PADDLE_ENFORCE
(
ctx
->
HasOutput
(
"XShape"
),
"Output(XShape) should not be null"
);
const
auto
&
in_dims
=
ctx
->
GetInputDim
(
"X"
);
std
::
vector
<
int64_t
>
x_shape_dim
(
in_dims
.
size
()
+
1
);
x_shape_dim
[
0
]
=
0
;
for
(
int
i
=
0
;
i
<
in_dims
.
size
();
++
i
)
{
x_shape_dim
[
i
+
1
]
=
in_dims
[
i
];
}
ctx
->
SetOutputDim
(
"XShape"
,
framework
::
make_ddim
(
x_shape_dim
));
ctx
->
ShareLoD
(
"X"
,
/*->*/
"XShape"
);
}
protected:
framework
::
OpKernelType
GetExpectedKernelType
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
return
framework
::
OpKernelType
(
framework
::
ToDataType
(
ctx
.
Input
<
framework
::
LoDTensor
>
(
"X"
)
->
type
()),
ctx
.
device_context
());
}
};
class
Transpose2OpMaker
:
public
TransposeOpMaker
{
public:
void
Make
()
override
{
TransposeOpMaker
::
Make
();
AddOutput
(
"XShape"
,
"(Tensor)The output tensor."
).
AsIntermediate
();
}
};
class
Transpose2GradMaker
:
public
framework
::
SingleGradOpDescMaker
{
public:
using
framework
::
SingleGradOpDescMaker
::
SingleGradOpDescMaker
;
std
::
unique_ptr
<
framework
::
OpDesc
>
Apply
()
const
override
{
auto
*
grad_op
=
new
framework
::
OpDesc
();
grad_op
->
SetType
(
"transpose2_grad"
);
grad_op
->
SetInput
(
"XShape"
,
Output
(
"XShape"
));
grad_op
->
SetInput
(
framework
::
GradVarName
(
"Out"
),
OutputGrad
(
"Out"
));
grad_op
->
SetOutput
(
framework
::
GradVarName
(
"X"
),
InputGrad
(
"X"
));
grad_op
->
SetAttrMap
(
Attrs
());
return
std
::
unique_ptr
<
framework
::
OpDesc
>
(
grad_op
);
}
};
class
Transpose2OpGrad
:
public
framework
::
OperatorWithKernel
{
public:
using
framework
::
OperatorWithKernel
::
OperatorWithKernel
;
void
InferShape
(
framework
::
InferShapeContext
*
ctx
)
const
override
{
PADDLE_ENFORCE
(
ctx
->
HasInput
(
"XShape"
),
"Input(XShape) should not be null"
);
PADDLE_ENFORCE
(
ctx
->
HasInput
(
framework
::
GradVarName
(
"Out"
)),
"Input(Out@GRAD) should not be null"
);
if
(
ctx
->
HasOutput
(
framework
::
GradVarName
(
"X"
)))
{
auto
xshape_dim
=
ctx
->
GetInputDim
(
"XShape"
);
auto
x_shape_dim
=
framework
::
slice_ddim
(
xshape_dim
,
1
,
xshape_dim
.
size
());
ctx
->
SetOutputDim
(
framework
::
GradVarName
(
"X"
),
x_shape_dim
);
ctx
->
ShareLoD
(
"XShape"
,
framework
::
GradVarName
(
"X"
));
}
}
protected:
framework
::
OpKernelType
GetExpectedKernelType
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
return
framework
::
OpKernelType
(
framework
::
ToDataType
(
ctx
.
Input
<
framework
::
LoDTensor
>
(
framework
::
GradVarName
(
"Out"
))
->
type
()),
ctx
.
device_context
());
}
};
}
// namespace operators
}
// namespace paddle
...
...
@@ -120,8 +208,20 @@ namespace ops = paddle::operators;
REGISTER_OPERATOR
(
transpose
,
ops
::
TransposeOp
,
ops
::
TransposeOpMaker
,
paddle
::
framework
::
DefaultGradOpDescMaker
<
true
>
);
REGISTER_OPERATOR
(
transpose_grad
,
ops
::
TransposeOpGrad
);
REGISTER_OP_CPU_KERNEL
(
transpose
,
ops
::
TransposeKernel
<
paddle
::
platform
::
CPUDeviceContext
,
float
>
);
REGISTER_OP_CPU_KERNEL
(
transpose_grad
,
ops
::
TransposeGradKernel
<
paddle
::
platform
::
CPUDeviceContext
,
float
>
);
REGISTER_OPERATOR
(
transpose2
,
ops
::
Transpose2Op
,
ops
::
Transpose2OpMaker
,
ops
::
Transpose2GradMaker
);
REGISTER_OPERATOR
(
transpose2_grad
,
ops
::
Transpose2OpGrad
);
REGISTER_OP_CPU_KERNEL
(
transpose2
,
ops
::
TransposeKernel
<
paddle
::
platform
::
CPUDeviceContext
,
float
>
);
REGISTER_OP_CPU_KERNEL
(
transpose2_grad
,
ops
::
TransposeGradKernel
<
paddle
::
platform
::
CPUDeviceContext
,
float
>
);
paddle/fluid/operators/transpose_op.cu.cc
浏览文件 @
40dbd97f
...
...
@@ -21,3 +21,10 @@ REGISTER_OP_CUDA_KERNEL(
REGISTER_OP_CUDA_KERNEL
(
transpose_grad
,
ops
::
TransposeGradKernel
<
paddle
::
platform
::
CUDADeviceContext
,
float
>
);
REGISTER_OP_CUDA_KERNEL
(
transpose2
,
ops
::
TransposeKernel
<
paddle
::
platform
::
CUDADeviceContext
,
float
>
);
REGISTER_OP_CUDA_KERNEL
(
transpose2_grad
,
ops
::
TransposeGradKernel
<
paddle
::
platform
::
CUDADeviceContext
,
float
>
);
paddle/fluid/operators/unsqueeze_op.cc
浏览文件 @
40dbd97f
...
...
@@ -127,13 +127,13 @@ class UnsqueezeOpMaker : public framework::OpProtoAndCheckerMaker {
});
AddComment
(
R"DOC(
Unsqueeze Operator.
Insert single-dimensional entries to the shape of a tensor.
Takes one required argument axes, a list of dimensions that will be inserted.
Dimension indices in axes are as seen in the output tensor.
For example:
Given a tensor such that tensor with shape [3, 4, 5],
Insert single-dimensional entries to the shape of a tensor.
Takes one required argument axes, a list of dimensions that will be inserted.
Dimension indices in axes are as seen in the output tensor.
For example:
Given a tensor such that tensor with shape [3, 4, 5],
then Unsqueeze(tensor, axes=[0, 4]) has shape [1, 3, 4, 5, 1]
)DOC"
);
}
...
...
@@ -168,6 +168,112 @@ class UnsqueezeGradOp : public framework::OperatorBase {
}
};
// FIXME(zcd): unsqueeze2 adds an intermediate output(XShape) based on
// unsqueeze, the XShape is used to carry the shape and lod of X which
// will be used in unsqueeze_grad, in this way, the framework can reuse
// the memory of X immediately the unsqueeze2_op is finished.
// Considering compatibility issues, we could not fix unsqueeze2_op
class
Unsqueeze2OpInferShape
:
public
UnsqueezeOpInferShape
{
public:
void
operator
()(
framework
::
InferShapeContext
*
ctx
)
const
override
{
UnsqueezeOpInferShape
::
operator
()(
ctx
);
PADDLE_ENFORCE
(
ctx
->
HasOutput
(
"XShape"
),
"Output(XShape) of Unsqueeze operator should not be null."
);
const
auto
&
x_dims
=
ctx
->
GetInputDim
(
"X"
);
std
::
vector
<
int64_t
>
xshape_dims
(
x_dims
.
size
()
+
1
);
xshape_dims
[
0
]
=
0
;
for
(
int
i
=
0
;
i
<
x_dims
.
size
();
++
i
)
{
xshape_dims
[
i
+
1
]
=
x_dims
[
i
];
}
ctx
->
SetOutputDim
(
"XShape"
,
framework
::
make_ddim
(
xshape_dims
));
ctx
->
ShareLoD
(
"X"
,
/*->*/
"XShape"
);
}
};
class
Unsqueeze2OpMaker
:
public
UnsqueezeOpMaker
{
public:
void
Make
()
override
{
UnsqueezeOpMaker
::
Make
();
AddOutput
(
"XShape"
,
"XShape is just used to store the shape and lod of X, which will "
"be used in UnsqueezeGradOp."
)
.
AsIntermediate
();
}
};
class
Unsqueeze2Op
:
public
framework
::
OperatorBase
{
public:
using
OperatorBase
::
OperatorBase
;
private:
void
RunImpl
(
const
framework
::
Scope
&
scope
,
const
platform
::
Place
&
place
)
const
override
{
auto
&
axes
=
Attr
<
std
::
vector
<
int
>>
(
"axes"
);
auto
x_dims
=
scope
.
FindVar
(
Input
(
"X"
))
->
Get
<
framework
::
LoDTensor
>
().
dims
();
auto
out_dims
=
Unsqueeze2OpInferShape
::
GetOutputShape
(
axes
,
x_dims
);
framework
::
AttributeMap
attrs
;
attrs
[
"shape"
]
=
framework
::
vectorize2int
(
out_dims
);
// Invoke Reshape op.
auto
reshape_op
=
framework
::
OpRegistry
::
CreateOp
(
"reshape2"
,
{{
"X"
,
{
Input
(
"X"
)}},
{
"Shape"
,
{}}},
{{
"Out"
,
{
Output
(
"Out"
)}},
{
"XShape"
,
{
Output
(
"XShape"
)}}},
attrs
);
reshape_op
->
Run
(
scope
,
place
);
}
};
class
Unsqueeze2GradOpMaker
:
public
framework
::
SingleGradOpDescMaker
{
public:
using
framework
::
SingleGradOpDescMaker
::
SingleGradOpDescMaker
;
std
::
unique_ptr
<
framework
::
OpDesc
>
Apply
()
const
override
{
auto
*
grad_op
=
new
framework
::
OpDesc
();
grad_op
->
SetType
(
"unsqueeze2_grad"
);
grad_op
->
SetInput
(
"XShape"
,
Output
(
"XShape"
));
grad_op
->
SetInput
(
framework
::
GradVarName
(
"Out"
),
OutputGrad
(
"Out"
));
grad_op
->
SetOutput
(
framework
::
GradVarName
(
"X"
),
InputGrad
(
"X"
));
grad_op
->
SetAttrMap
(
Attrs
());
return
std
::
unique_ptr
<
framework
::
OpDesc
>
(
grad_op
);
}
};
class
Unsqueeze2GradInferShape
:
public
framework
::
InferShapeBase
{
public:
void
operator
()(
framework
::
InferShapeContext
*
context
)
const
override
{
PADDLE_ENFORCE
(
context
->
HasInput
(
"XShape"
),
"Input(XShape) shouldn't be null."
);
PADDLE_ENFORCE
(
context
->
HasInput
(
framework
::
GradVarName
(
"Out"
)),
"Input(Out@GRAD) shouldn't be null."
);
auto
xshape_dims
=
context
->
GetInputDim
(
"XShape"
);
auto
x_dims
=
framework
::
slice_ddim
(
xshape_dims
,
1
,
xshape_dims
.
size
());
context
->
SetOutputDim
(
framework
::
GradVarName
(
"X"
),
x_dims
);
context
->
ShareLoD
(
"XShape"
,
framework
::
GradVarName
(
"X"
));
}
};
class
Unsqueeze2GradOp
:
public
framework
::
OperatorBase
{
public:
using
OperatorBase
::
OperatorBase
;
private:
void
RunImpl
(
const
framework
::
Scope
&
scope
,
const
platform
::
Place
&
place
)
const
override
{
auto
dx_name
=
Output
(
framework
::
GradVarName
(
"X"
));
auto
dout_name
=
Input
(
framework
::
GradVarName
(
"Out"
));
auto
xshape_name
=
Input
(
"XShape"
);
auto
xshape_dims
=
scope
.
FindVar
(
xshape_name
)
->
Get
<
framework
::
LoDTensor
>
().
dims
();
auto
x_dims
=
framework
::
slice_ddim
(
xshape_dims
,
1
,
xshape_dims
.
size
());
framework
::
AttributeMap
attrs
;
attrs
[
"shape"
]
=
framework
::
vectorize2int
(
x_dims
);
auto
reshape_op
=
framework
::
OpRegistry
::
CreateOp
(
"reshape2"
,
{{
"X"
,
{
dout_name
}},
{
"Shape"
,
{}}},
{{
"Out"
,
{
dx_name
}},
{
"XShape"
,
{
xshape_name
}}},
attrs
);
reshape_op
->
Run
(
scope
,
place
);
}
};
}
// namespace operators
}
// namespace paddle
...
...
@@ -180,3 +286,8 @@ REGISTER_OPERATOR(unsqueeze, ops::UnsqueezeOp, ops::UnsqueezeOpMaker,
paddle
::
framework
::
DefaultGradOpDescMaker
<
true
>
);
REGISTER_OPERATOR
(
unsqueeze_grad
,
ops
::
UnsqueezeGradOp
,
ops
::
UnsqueezeGradInferShape
);
REGISTER_OPERATOR
(
unsqueeze2
,
ops
::
Unsqueeze2Op
,
ops
::
Unsqueeze2OpMaker
,
ops
::
Unsqueeze2OpInferShape
,
ops
::
Unsqueeze2GradOpMaker
);
REGISTER_OPERATOR
(
unsqueeze2_grad
,
ops
::
Unsqueeze2GradOp
,
ops
::
Unsqueeze2GradInferShape
);
paddle/scripts/paddle_build.sh
浏览文件 @
40dbd97f
...
...
@@ -115,6 +115,7 @@ function cmake_gen() {
-DWITH_FLUID_ONLY=
${
WITH_FLUID_ONLY
:-
OFF
}
-DCMAKE_EXPORT_COMPILE_COMMANDS=ON
-DWITH_CONTRIB=
${
WITH_CONTRIB
:-
ON
}
-DWITH_INFERENCE=
${
WITH_INFERENCE
:-
ON
}
-DWITH_ANAKIN=
${
WITH_ANAKIN
:-
OFF
}
-DPY_VERSION=
${
PY_VERSION
:-
2
.7
}
========================================
...
...
@@ -144,6 +145,7 @@ EOF
-DWITH_FLUID_ONLY
=
${
WITH_FLUID_ONLY
:-
OFF
}
\
-DCMAKE_EXPORT_COMPILE_COMMANDS
=
ON
\
-DWITH_CONTRIB
=
${
WITH_CONTRIB
:-
ON
}
\
-DWITH_INFERENCE
=
${
WITH_INFERENCE
:-
ON
}
\
-DWITH_ANAKIN
=
${
WITH_ANAKIN
:-
OFF
}
\
-DPY_VERSION
=
${
PY_VERSION
:-
2
.7
}
}
...
...
python/paddle/fluid/layers/nn.py
浏览文件 @
40dbd97f
...
...
@@ -4025,10 +4025,12 @@ def transpose(x, perm, name=None):
helper
=
LayerHelper
(
'transpose'
,
**
locals
())
out
=
helper
.
create_tmp_variable
(
x
.
dtype
)
x_shape
=
helper
.
create_tmp_variable
(
x
.
dtype
)
helper
.
append_op
(
type
=
'transpose'
,
type
=
'transpose
2
'
,
inputs
=
{
'X'
:
[
x
]},
outputs
=
{
'Out'
:
[
out
]},
outputs
=
{
'Out'
:
[
out
],
'XShape'
:
[
x_shape
]},
attrs
=
{
'axis'
:
perm
})
return
out
...
...
@@ -4520,13 +4522,15 @@ def reshape(x, shape, actual_shape=None, act=None, inplace=True, name=None):
"Each dimension size given in shape must not be negtive "
"except one unknown dimension."
)
helper
=
LayerHelper
(
"reshape"
,
**
locals
())
helper
=
LayerHelper
(
"reshape
2
"
,
**
locals
())
out
=
helper
.
create_tmp_variable
(
dtype
=
x
.
dtype
)
x_shape
=
helper
.
create_tmp_variable
(
dtype
=
x
.
dtype
)
helper
.
append_op
(
type
=
"reshape"
,
type
=
"reshape
2
"
,
inputs
=
inputs
,
attrs
=
{
"shape"
:
shape
},
outputs
=
{
"Out"
:
out
})
outputs
=
{
"Out"
:
out
,
"XShape"
:
x_shape
})
return
helper
.
append_activation
(
out
)
...
...
@@ -4570,11 +4574,13 @@ def squeeze(input, axes, name=None):
"""
helper
=
LayerHelper
(
"squeeze"
,
**
locals
())
out
=
helper
.
create_tmp_variable
(
dtype
=
input
.
dtype
)
x_shape
=
helper
.
create_tmp_variable
(
dtype
=
input
.
dtype
)
helper
.
append_op
(
type
=
"squeeze"
,
type
=
"squeeze
2
"
,
inputs
=
{
"X"
:
input
},
attrs
=
{
"axes"
:
axes
},
outputs
=
{
"Out"
:
out
})
outputs
=
{
"Out"
:
out
,
"XShape"
:
x_shape
})
return
out
...
...
@@ -4605,11 +4611,13 @@ def unsqueeze(input, axes, name=None):
"""
helper
=
LayerHelper
(
"unsqueeze"
,
**
locals
())
out
=
helper
.
create_tmp_variable
(
dtype
=
input
.
dtype
)
x_shape
=
helper
.
create_tmp_variable
(
dtype
=
input
.
dtype
)
helper
.
append_op
(
type
=
"unsqueeze"
,
type
=
"unsqueeze
2
"
,
inputs
=
{
"X"
:
input
},
attrs
=
{
"axes"
:
axes
},
outputs
=
{
"Out"
:
out
})
outputs
=
{
"Out"
:
out
,
"XShape"
:
x_shape
})
return
out
...
...
@@ -5811,10 +5819,12 @@ def flatten(x, axis=1, name=None):
raise
ValueError
(
"The axis should be a int, and in range [0, rank(x)]"
)
out
=
helper
.
create_tmp_variable
(
x
.
dtype
)
x_shape
=
helper
.
create_tmp_variable
(
x
.
dtype
)
helper
.
append_op
(
type
=
'flatten'
,
type
=
'flatten
2
'
,
inputs
=
{
"X"
:
x
},
outputs
=
{
'Out'
:
out
},
outputs
=
{
'Out'
:
out
,
'XShape'
:
x_shape
},
attrs
=
{
"axis"
:
axis
})
return
out
...
...
python/paddle/fluid/tests/unittests/dist_transformer.py
浏览文件 @
40dbd97f
...
...
@@ -36,6 +36,7 @@ import paddle.fluid as fluid
import
paddle.fluid.layers
as
layers
from
paddle.fluid
import
core
from
test_dist_base
import
TestDistRunnerBase
,
runtime_main
import
paddle.compat
as
cpt
from
paddle.compat
import
long_type
import
hashlib
...
...
@@ -315,8 +316,9 @@ def pad_batch_data(insts,
"""
return_list
=
[]
max_len
=
max
(
len
(
inst
)
for
inst
in
insts
)
num_token
=
reduce
(
lambda
x
,
y
:
x
+
y
,
[
len
(
inst
)
for
inst
in
insts
])
if
return_num_token
else
0
num_token
=
six
.
moves
.
reduce
(
lambda
x
,
y
:
x
+
y
,
[
len
(
inst
)
for
inst
in
insts
])
if
return_num_token
else
0
# Any token included in dict can be used to pad, since the paddings' loss
# will be masked out by weights and make no effect on parameter gradients.
inst_data
=
np
.
array
(
...
...
@@ -328,7 +330,7 @@ def pad_batch_data(insts,
return_list
+=
[
inst_weight
.
astype
(
"float32"
).
reshape
([
-
1
,
1
])]
else
:
# position data
inst_pos
=
np
.
array
([
range
(
1
,
len
(
inst
)
+
1
)
+
[
0
]
*
(
max_len
-
len
(
inst
))
list
(
range
(
1
,
len
(
inst
)
+
1
)
)
+
[
0
]
*
(
max_len
-
len
(
inst
))
for
inst
in
insts
])
return_list
+=
[
inst_pos
.
astype
(
"int64"
).
reshape
([
-
1
,
1
])]
...
...
@@ -385,10 +387,11 @@ def prepare_batch_input(insts, data_input_names, src_pad_idx, trg_pad_idx,
return_num_token
=
True
)
data_input_dict
=
dict
(
zip
(
data_input_names
,
[
src_word
,
src_pos
,
src_slf_attn_bias
,
trg_word
,
trg_pos
,
trg_slf_attn_bias
,
trg_src_attn_bias
,
lbl_word
,
lbl_weight
]))
list
(
zip
(
data_input_names
,
[
src_word
,
src_pos
,
src_slf_attn_bias
,
trg_word
,
trg_pos
,
trg_slf_attn_bias
,
trg_src_attn_bias
,
lbl_word
,
lbl_weight
])))
return
data_input_dict
,
np
.
asarray
([
num_token
],
dtype
=
"float32"
)
...
...
@@ -561,7 +564,7 @@ def train_loop(exe, train_progm, dev_count, sum_cost, avg_cost, lr_scheduler,
np
.
log
(
TrainTaskConfig
.
label_smooth_eps
/
(
ModelHyperParams
.
trg_vocab_size
-
1
)
+
1e-20
))
init
=
False
for
pass_id
in
xrange
(
TrainTaskConfig
.
pass_num
):
for
pass_id
in
six
.
moves
.
xrange
(
TrainTaskConfig
.
pass_num
):
pass_start_time
=
time
.
time
()
for
batch_id
,
data
in
enumerate
(
train_data
()):
if
batch_id
>=
5
:
...
...
@@ -587,11 +590,11 @@ def train_loop(exe, train_progm, dev_count, sum_cost, avg_cost, lr_scheduler,
ModelHyperParams
.
eos_idx
,
ModelHyperParams
.
n_head
,
ModelHyperParams
.
d_model
)
total_num_token
+=
num_token
feed_kv_pairs
=
data_input_dict
.
items
(
)
feed_kv_pairs
=
list
(
data_input_dict
.
items
()
)
if
TrainTaskConfig
.
local
:
feed_kv_pairs
+=
{
feed_kv_pairs
+=
list
(
{
lr_scheduler
.
learning_rate
.
name
:
lr_rate
}.
items
()
}.
items
()
)
feed_list
.
append
(
dict
(
feed_kv_pairs
))
if
not
init
:
...
...
@@ -873,6 +876,7 @@ class DataReader(object):
f
=
tarfile
.
open
(
fpaths
[
0
],
"r"
)
for
line
in
f
.
extractfile
(
tar_fname
):
line
=
cpt
.
to_text
(
line
)
fields
=
line
.
strip
(
"
\n
"
).
split
(
self
.
_field_delimiter
)
if
(
not
self
.
_only_src
and
len
(
fields
)
==
2
)
or
(
self
.
_only_src
and
len
(
fields
)
==
1
):
...
...
@@ -882,8 +886,9 @@ class DataReader(object):
if
not
os
.
path
.
isfile
(
fpath
):
raise
IOError
(
"Invalid file: %s"
%
fpath
)
with
open
(
fpath
,
"r"
)
as
f
:
with
open
(
fpath
,
"r
b
"
)
as
f
:
for
line
in
f
:
line
=
cpt
.
to_text
(
line
)
fields
=
line
.
strip
(
"
\n
"
).
split
(
self
.
_field_delimiter
)
if
(
not
self
.
_only_src
and
len
(
fields
)
==
2
)
or
(
self
.
_only_src
and
len
(
fields
)
==
1
):
...
...
@@ -892,8 +897,9 @@ class DataReader(object):
@
staticmethod
def
load_dict
(
dict_path
,
reverse
=
False
):
word_dict
=
{}
with
open
(
dict_path
,
"r"
)
as
fdict
:
with
open
(
dict_path
,
"r
b
"
)
as
fdict
:
for
idx
,
line
in
enumerate
(
fdict
):
line
=
cpt
.
to_text
(
line
)
if
reverse
:
word_dict
[
idx
]
=
line
.
strip
(
"
\n
"
)
else
:
...
...
@@ -1034,7 +1040,7 @@ def multi_head_attention(queries,
# size of the input as the output dimension size.
return
layers
.
reshape
(
x
=
trans_x
,
shape
=
map
(
int
,
[
0
,
0
,
trans_x
.
shape
[
2
]
*
trans_x
.
shape
[
3
]]
))
shape
=
list
(
map
(
int
,
[
0
,
0
,
trans_x
.
shape
[
2
]
*
trans_x
.
shape
[
3
]])
))
def
scaled_dot_product_attention
(
q
,
k
,
v
,
attn_bias
,
d_model
,
dropout_rate
):
"""
...
...
python/paddle/fluid/tests/unittests/op_test.py
浏览文件 @
40dbd97f
...
...
@@ -249,7 +249,7 @@ class OpTest(unittest.TestCase):
outs
,
_
=
self
.
_calc_output
(
place
)
return
outs
def
_calc_output
(
self
,
place
,
parallel
=
False
):
def
_calc_output
(
self
,
place
,
parallel
=
False
,
no_check_set
=
None
):
program
=
Program
()
block
=
program
.
global_block
()
...
...
@@ -273,6 +273,8 @@ class OpTest(unittest.TestCase):
# if not, fill the fetch_list by the user configured outputs in test.
if
len
(
fetch_list
)
==
0
:
for
var_name
,
var
in
six
.
iteritems
(
outputs
):
if
no_check_set
is
not
None
and
var_name
in
no_check_set
:
continue
if
isinstance
(
var
,
list
):
for
v
in
var
:
fetch_list
.
append
(
v
)
...
...
@@ -291,11 +293,17 @@ class OpTest(unittest.TestCase):
return_numpy
=
False
)
return
outs
,
fetch_list
def
check_output_with_place
(
self
,
place
,
atol
,
equal_nan
=
False
):
outs
,
fetch_list
=
self
.
_calc_output
(
place
)
def
check_output_with_place
(
self
,
place
,
atol
,
no_check_set
=
None
,
equal_nan
=
False
):
outs
,
fetch_list
=
self
.
_calc_output
(
place
,
no_check_set
=
no_check_set
)
for
out_name
,
out_dup
in
Operator
.
get_op_outputs
(
self
.
op_type
):
if
out_name
not
in
self
.
outputs
:
continue
if
no_check_set
is
not
None
and
out_name
in
no_check_set
:
continue
def
find_actual
(
target_name
,
fetch_list
):
found
=
[
...
...
@@ -360,10 +368,10 @@ class OpTest(unittest.TestCase):
places
.
append
(
core
.
CUDAPlace
(
0
))
return
places
def
check_output
(
self
,
atol
=
1e-5
,
equal_nan
=
False
):
def
check_output
(
self
,
atol
=
1e-5
,
no_check_set
=
None
,
equal_nan
=
False
):
places
=
self
.
_get_places
()
for
place
in
places
:
self
.
check_output_with_place
(
place
,
atol
,
equal_nan
)
self
.
check_output_with_place
(
place
,
atol
,
no_check_set
,
equal_nan
)
def
check_output_customized
(
self
,
checker
):
places
=
self
.
_get_places
()
...
...
python/paddle/fluid/tests/unittests/test_flatten_op.py
浏览文件 @
40dbd97f
...
...
@@ -22,14 +22,17 @@ from op_test import OpTest
class
TestFlattenOp
(
OpTest
):
def
setUp
(
self
):
self
.
op_type
=
"flatten"
self
.
op_type
=
"flatten
2
"
self
.
init_test_case
()
self
.
inputs
=
{
"X"
:
np
.
random
.
random
(
self
.
in_shape
).
astype
(
"float32"
)}
self
.
init_attrs
()
self
.
outputs
=
{
"Out"
:
self
.
inputs
[
"X"
].
reshape
(
self
.
new_shape
)}
self
.
outputs
=
{
"Out"
:
self
.
inputs
[
"X"
].
reshape
(
self
.
new_shape
),
"XShape"
:
np
.
random
.
random
(
self
.
in_shape
).
astype
(
"float32"
)
}
def
test_check_output
(
self
):
self
.
check_output
()
self
.
check_output
(
no_check_set
=
[
"XShape"
]
)
def
test_check_grad
(
self
):
self
.
check_grad
([
"X"
],
"Out"
)
...
...
python/paddle/fluid/tests/unittests/test_prelu_op.py
浏览文件 @
40dbd97f
...
...
@@ -16,6 +16,7 @@ from __future__ import print_function
import
unittest
import
numpy
as
np
import
six
from
op_test
import
OpTest
...
...
@@ -62,17 +63,20 @@ class PReluTest(OpTest):
# TODO(minqiyang): Resume these test cases after fixing Python3 CI job issues
# class TestCase1(PReluTest):
# def initTestCase(self):
# self.attrs = {'mode': "all"}
if
six
.
PY2
:
# class TestCase2(PReluTest):
# def initTestCase(self):
# self.attrs = {'mode': "channel"}
class
TestCase1
(
PReluTest
):
def
initTestCase
(
self
):
self
.
attrs
=
{
'mode'
:
"all"
}
class
TestCase2
(
PReluTest
):
def
initTestCase
(
self
):
self
.
attrs
=
{
'mode'
:
"channel"
}
class
TestCase3
(
PReluTest
):
def
initTestCase
(
self
):
self
.
attrs
=
{
'mode'
:
"element"
}
# class TestCase3(PReluTest):
# def initTestCase(self):
# self.attrs = {'mode': "element"}
if
__name__
==
"__main__"
:
unittest
.
main
()
python/paddle/fluid/tests/unittests/test_reshape_op.py
浏览文件 @
40dbd97f
...
...
@@ -22,106 +22,39 @@ from op_test import OpTest
class
TestReshapeOp
(
OpTest
):
def
setUp
(
self
):
ori_shape
=
(
2
,
25
)
new_shape
=
(
5
,
10
)
self
.
op_type
=
"reshape"
self
.
inputs
=
{
"X"
:
np
.
random
.
random
(
ori_shape
).
astype
(
"float32"
)}
self
.
attrs
=
{
"shape"
:
new_shape
}
self
.
outputs
=
{
"Out"
:
self
.
inputs
[
"X"
].
reshape
(
new_shape
)}
def
test_check_output
(
self
):
self
.
check_output
()
def
test_check_grad
(
self
):
self
.
check_grad
([
"X"
],
"Out"
)
class
TestReshapeOpDimInfer1
(
OpTest
):
def
setUp
(
self
):
ori_shape
=
(
5
,
10
)
new_shape
=
(
5
,
-
1
,
5
)
self
.
op_type
=
"reshape"
self
.
inputs
=
{
"X"
:
np
.
random
.
random
(
ori_shape
).
astype
(
"float32"
)}
self
.
attrs
=
{
"shape"
:
new_shape
}
self
.
outputs
=
{
"Out"
:
self
.
inputs
[
"X"
].
reshape
(
self
.
attrs
[
"shape"
])}
def
test_check_output
(
self
):
self
.
check_output
()
def
test_check_grad
(
self
):
self
.
check_grad
([
"X"
],
"Out"
)
class
TestReshapeOpDimInfer2
(
OpTest
):
def
setUp
(
self
):
ori_shape
=
(
2
,
2
,
6
)
new_shape
=
(
2
,
0
,
3
,
-
1
)
infered_shape
=
(
2
,
2
,
3
,
-
1
)
self
.
op_type
=
"reshape"
self
.
inputs
=
{
"X"
:
np
.
random
.
random
(
ori_shape
).
astype
(
"float32"
)}
self
.
attrs
=
{
"shape"
:
new_shape
}
self
.
outputs
=
{
"Out"
:
self
.
inputs
[
"X"
].
reshape
(
infered_shape
)}
def
test_check_output
(
self
):
self
.
check_output
()
def
test_check_grad
(
self
):
self
.
check_grad
([
"X"
],
"Out"
)
class
TestReshapeOpInplace
(
OpTest
):
def
setUp
(
self
):
ori_shape
=
(
2
,
25
)
new_shape
=
(
5
,
10
)
self
.
op_type
=
"reshape"
self
.
inputs
=
{
"X"
:
np
.
random
.
random
(
ori_shape
).
astype
(
"float32"
)}
self
.
attrs
=
{
"shape"
:
new_shape
}
self
.
outputs
=
{
"Out"
:
self
.
inputs
[
"X"
].
reshape
(
new_shape
)}
def
test_check_output
(
self
):
self
.
check_output
()
def
test_check_grad
(
self
):
self
.
check_grad
([
"X"
],
"Out"
)
class
TestReshapeOpDimInferInplace1
(
OpTest
):
def
setUp
(
self
):
ori_shape
=
(
5
,
10
)
new_shape
=
(
5
,
-
1
,
5
)
self
.
init_data
()
self
.
op_type
=
"reshape2"
self
.
inputs
=
{
"X"
:
np
.
random
.
random
(
self
.
ori_shape
).
astype
(
"float32"
)}
self
.
attrs
=
{
"shape"
:
self
.
new_shape
}
self
.
outputs
=
{
"Out"
:
self
.
inputs
[
"X"
].
reshape
(
self
.
infered_shape
),
'XShape'
:
np
.
random
.
random
(
self
.
ori_shape
).
astype
(
"float32"
)
}
self
.
op_type
=
"reshape"
self
.
inputs
=
{
"X"
:
np
.
random
.
random
(
ori_shape
).
astype
(
"float32"
)}
self
.
attrs
=
{
"shape"
:
new_shape
}
self
.
outputs
=
{
"Out"
:
self
.
inputs
[
"X"
].
reshape
(
new_shape
)}
def
init_data
(
self
):
self
.
ori_shape
=
(
2
,
25
)
self
.
new_shape
=
(
5
,
10
)
self
.
infered_shape
=
(
5
,
10
)
def
test_check_output
(
self
):
self
.
check_output
()
self
.
check_output
(
no_check_set
=
[
'XShape'
]
)
def
test_check_grad
(
self
):
self
.
check_grad
([
"X"
],
"Out"
)
class
TestReshapeOpDimInferInplace2
(
OpTest
):
def
setUp
(
self
):
ori_shape
=
(
2
,
2
,
6
)
new_shape
=
(
2
,
0
,
3
,
-
1
)
infered_shape
=
(
2
,
2
,
3
,
-
1
)
self
.
op_type
=
"reshape"
self
.
inputs
=
{
"X"
:
np
.
random
.
random
(
ori_shape
).
astype
(
"float32"
)}
self
.
attrs
=
{
"shape"
:
new_shape
}
self
.
outputs
=
{
"Out"
:
self
.
inputs
[
"X"
].
reshape
(
infered_shape
)}
class
TestReshapeOpDimInfer1
(
TestReshapeOp
):
def
init_data
(
self
):
self
.
ori_shape
=
(
5
,
10
)
self
.
new_shape
=
(
5
,
-
1
,
5
)
self
.
infered_shape
=
(
5
,
-
1
,
5
)
def
test_check_output
(
self
):
self
.
check_output
()
def
test_check_grad
(
self
):
self
.
check_grad
([
"X"
],
"Out"
)
class
TestReshapeOpDimInfer2
(
TestReshapeOp
):
def
init_data
(
self
):
self
.
ori_shape
=
(
2
,
2
,
6
)
self
.
new_shape
=
(
2
,
0
,
3
,
-
1
)
self
.
infered_shape
=
(
2
,
2
,
3
,
-
1
)
class
TestReshapeOpWithInputShape
(
OpTest
):
...
...
@@ -130,20 +63,23 @@ class TestReshapeOpWithInputShape(OpTest):
new_shape
=
(
0
,
-
1
,
5
)
actual_shape
=
(
2
,
3
,
5
)
self
.
op_type
=
"reshape"
self
.
op_type
=
"reshape
2
"
self
.
inputs
=
{
"X"
:
np
.
random
.
random
(
ori_shape
).
astype
(
"float32"
),
"Shape"
:
np
.
array
(
actual_shape
,
dtype
=
"int32"
)
}
self
.
attrs
=
{
"shape"
:
new_shape
}
self
.
outputs
=
{
"Out"
:
self
.
inputs
[
"X"
].
reshape
(
actual_shape
)}
self
.
outputs
=
{
"Out"
:
self
.
inputs
[
"X"
].
reshape
(
actual_shape
),
'XShape'
:
np
.
random
.
random
(
ori_shape
).
astype
(
"float32"
)
}
def
test_check_output
(
self
):
self
.
check_output
()
self
.
check_output
(
no_check_set
=
[
'XShape'
]
)
def
test_check_grad
(
self
):
self
.
check_grad
([
"X"
],
"Out"
)
self
.
check_grad
([
"X"
],
"Out"
,
sum_outputs
=
[
"Out"
]
)
if
__name__
==
"__main__"
:
...
...
python/paddle/fluid/tests/unittests/test_squeeze_op.py
浏览文件 @
40dbd97f
...
...
@@ -23,14 +23,17 @@ from op_test import OpTest
# Correct: General.
class
TestSqueezeOp
(
OpTest
):
def
setUp
(
self
):
self
.
op_type
=
"squeeze"
self
.
op_type
=
"squeeze
2
"
self
.
init_test_case
()
self
.
inputs
=
{
"X"
:
np
.
random
.
random
(
self
.
ori_shape
).
astype
(
"float32"
)}
self
.
init_attrs
()
self
.
outputs
=
{
"Out"
:
self
.
inputs
[
"X"
].
reshape
(
self
.
new_shape
)}
self
.
outputs
=
{
"Out"
:
self
.
inputs
[
"X"
].
reshape
(
self
.
new_shape
),
"XShape"
:
np
.
random
.
random
(
self
.
ori_shape
).
astype
(
"float32"
)
}
def
test_check_output
(
self
):
self
.
check_output
()
self
.
check_output
(
no_check_set
=
[
'XShape'
]
)
def
test_check_grad
(
self
):
self
.
check_grad
([
"X"
],
"Out"
)
...
...
python/paddle/fluid/tests/unittests/test_transpose_op.py
浏览文件 @
40dbd97f
...
...
@@ -22,16 +22,19 @@ from op_test import OpTest
class
TestTransposeOp
(
OpTest
):
def
setUp
(
self
):
self
.
initTestCase
()
self
.
op_type
=
"transpose"
self
.
op_type
=
"transpose
2
"
self
.
inputs
=
{
'X'
:
np
.
random
.
random
(
self
.
shape
).
astype
(
"float32"
)}
self
.
attrs
=
{
'axis'
:
list
(
self
.
axis
)}
self
.
outputs
=
{
'Out'
:
self
.
inputs
[
'X'
].
transpose
(
self
.
axis
)}
self
.
outputs
=
{
'XShape'
:
np
.
random
.
random
(
self
.
shape
).
astype
(
"float32"
),
'Out'
:
self
.
inputs
[
'X'
].
transpose
(
self
.
axis
)
}
def
test_check_output
(
self
):
self
.
check_output
()
self
.
check_output
(
no_check_set
=
[
'XShape'
]
)
def
test_check_grad
(
self
):
self
.
check_grad
([
'X'
],
'Out'
)
self
.
check_grad
([
'X'
],
'Out'
,
sum_outputs
=
[
'Out'
]
)
def
initTestCase
(
self
):
self
.
shape
=
(
3
,
4
)
...
...
python/paddle/fluid/tests/unittests/test_unsqueeze_op.py
浏览文件 @
40dbd97f
...
...
@@ -24,13 +24,16 @@ from op_test import OpTest
class
TestUnsqueezeOp
(
OpTest
):
def
setUp
(
self
):
self
.
init_test_case
()
self
.
op_type
=
"unsqueeze"
self
.
op_type
=
"unsqueeze
2
"
self
.
inputs
=
{
"X"
:
np
.
random
.
random
(
self
.
ori_shape
).
astype
(
"float32"
)}
self
.
init_attrs
()
self
.
outputs
=
{
"Out"
:
self
.
inputs
[
"X"
].
reshape
(
self
.
new_shape
)}
self
.
outputs
=
{
"Out"
:
self
.
inputs
[
"X"
].
reshape
(
self
.
new_shape
),
"XShape"
:
np
.
random
.
random
(
self
.
ori_shape
).
astype
(
"float32"
)
}
def
test_check_output
(
self
):
self
.
check_output
()
self
.
check_output
(
no_check_set
=
[
"XShape"
]
)
def
test_check_grad
(
self
):
self
.
check_grad
([
"X"
],
"Out"
)
...
...
python/paddle/fluid/transpiler/details/program_utils.py
浏览文件 @
40dbd97f
...
...
@@ -153,7 +153,7 @@ def block_to_code(block, block_idx):
indent
+=
1
# sort all vars
all_vars
=
sorted
(
block
.
vars
.
iteritems
(
),
key
=
lambda
x
:
x
[
0
])
all_vars
=
sorted
(
six
.
iteritems
(
block
.
vars
),
key
=
lambda
x
:
x
[
0
])
for
var
in
all_vars
:
print
(
"{}{}"
.
format
(
get_indent_space
(
indent
),
variable_to_code
(
var
[
1
])))
...
...
python/paddle/fluid/transpiler/distribute_transpiler.py
浏览文件 @
40dbd97f
...
...
@@ -300,7 +300,7 @@ class DistributeTranspiler(object):
input_deps
=
grad_name_to_send_dummy_out
.
values
()
program
.
global_block
().
append_op
(
type
=
"send_barrier"
,
inputs
=
{
"X"
:
input_deps
},
inputs
=
{
"X"
:
list
(
input_deps
)
},
outputs
=
{
"Out"
:
send_barrier_out
},
attrs
=
{
"endpoints"
:
pserver_endpoints
,
...
...
@@ -401,7 +401,7 @@ class DistributeTranspiler(object):
Args:
recv_vars (list): Variable list to recv for current trainer_id
eplist (list): A list of strings indicating
eplist (list): A list of strings indicating
Returns:
Program: trainer side startup program.
...
...
@@ -455,7 +455,7 @@ class DistributeTranspiler(object):
if
len
(
splited_var
)
<=
1
:
continue
# NOTE: if enable memory optimization, origin vars maybe removed.
if
startup_program
.
global_block
().
vars
.
has_key
(
varname
)
:
if
varname
in
startup_program
.
global_block
().
vars
:
orig_param
=
startup_program
.
global_block
().
vars
[
varname
]
else
:
origin_param_var
=
self
.
origin_program
.
global_block
().
vars
[
...
...
@@ -690,7 +690,7 @@ class DistributeTranspiler(object):
Args:
endpoint (str): current pserver endpoint.
Returns:
tuple: (main_program, startup_program), of type "Program"
"""
...
...
@@ -713,7 +713,7 @@ class DistributeTranspiler(object):
endpoint (str): current pserver endpoint.
pserver_program (Program): deprecated, call get_pserver_program first.
startup_program (Program): deprecated, should pass startup_program
when initalizing
when initalizing
Returns:
Program: parameter server side startup program.
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录