Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
BaiXuePrincess
Paddle
提交
40dbd97f
P
Paddle
项目概览
BaiXuePrincess
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
40dbd97f
编写于
9月 06, 2018
作者:
T
tensor-tang
浏览文件
操作
浏览文件
下载
差异文件
Merge remote-tracking branch 'ups/develop' into refine/op/peephole
上级
3eb55f06
b8057515
变更
27
显示空白变更内容
内联
并排
Showing
27 changed file
with
868 addition
and
279 deletion
+868
-279
paddle/fluid/framework/ir/attention_lstm_fuse_pass.cc
paddle/fluid/framework/ir/attention_lstm_fuse_pass.cc
+0
-3
paddle/fluid/framework/ir/graph_pattern_detector.cc
paddle/fluid/framework/ir/graph_pattern_detector.cc
+1
-1
paddle/fluid/framework/ir/graph_viz_pass.cc
paddle/fluid/framework/ir/graph_viz_pass.cc
+42
-17
paddle/fluid/inference/analysis/analyzer.cc
paddle/fluid/inference/analysis/analyzer.cc
+0
-1
paddle/fluid/inference/analysis/analyzer_tester.cc
paddle/fluid/inference/analysis/analyzer_tester.cc
+84
-80
paddle/fluid/inference/api/analysis_predictor.cc
paddle/fluid/inference/api/analysis_predictor.cc
+0
-1
paddle/fluid/inference/api/helper.h
paddle/fluid/inference/api/helper.h
+41
-0
paddle/fluid/operators/fake_quantize_op.cu
paddle/fluid/operators/fake_quantize_op.cu
+2
-1
paddle/fluid/operators/flatten_op.cc
paddle/fluid/operators/flatten_op.cc
+115
-0
paddle/fluid/operators/layer_norm_op.cu
paddle/fluid/operators/layer_norm_op.cu
+7
-7
paddle/fluid/operators/reshape_op.cc
paddle/fluid/operators/reshape_op.cc
+100
-0
paddle/fluid/operators/squeeze_op.cc
paddle/fluid/operators/squeeze_op.cc
+119
-7
paddle/fluid/operators/transpose_op.cc
paddle/fluid/operators/transpose_op.cc
+103
-3
paddle/fluid/operators/transpose_op.cu.cc
paddle/fluid/operators/transpose_op.cu.cc
+7
-0
paddle/fluid/operators/unsqueeze_op.cc
paddle/fluid/operators/unsqueeze_op.cc
+117
-6
paddle/scripts/paddle_build.sh
paddle/scripts/paddle_build.sh
+2
-0
python/paddle/fluid/layers/nn.py
python/paddle/fluid/layers/nn.py
+21
-11
python/paddle/fluid/tests/unittests/dist_transformer.py
python/paddle/fluid/tests/unittests/dist_transformer.py
+20
-14
python/paddle/fluid/tests/unittests/op_test.py
python/paddle/fluid/tests/unittests/op_test.py
+13
-5
python/paddle/fluid/tests/unittests/test_flatten_op.py
python/paddle/fluid/tests/unittests/test_flatten_op.py
+6
-3
python/paddle/fluid/tests/unittests/test_prelu_op.py
python/paddle/fluid/tests/unittests/test_prelu_op.py
+13
-9
python/paddle/fluid/tests/unittests/test_reshape_op.py
python/paddle/fluid/tests/unittests/test_reshape_op.py
+30
-94
python/paddle/fluid/tests/unittests/test_squeeze_op.py
python/paddle/fluid/tests/unittests/test_squeeze_op.py
+6
-3
python/paddle/fluid/tests/unittests/test_transpose_op.py
python/paddle/fluid/tests/unittests/test_transpose_op.py
+7
-4
python/paddle/fluid/tests/unittests/test_unsqueeze_op.py
python/paddle/fluid/tests/unittests/test_unsqueeze_op.py
+6
-3
python/paddle/fluid/transpiler/details/program_utils.py
python/paddle/fluid/transpiler/details/program_utils.py
+1
-1
python/paddle/fluid/transpiler/distribute_transpiler.py
python/paddle/fluid/transpiler/distribute_transpiler.py
+5
-5
未找到文件。
paddle/fluid/framework/ir/attention_lstm_fuse_pass.cc
浏览文件 @
40dbd97f
...
...
@@ -13,13 +13,10 @@
// limitations under the License.
#include "paddle/fluid/framework/ir/attention_lstm_fuse_pass.h"
#include <string>
#include "paddle/fluid/framework/ir/graph_pattern_detector.h"
#include "paddle/fluid/framework/ir/graph_viz_pass.h"
#include "paddle/fluid/framework/lod_tensor.h"
#include "paddle/fluid/inference/api/helper.h"
namespace
paddle
{
namespace
framework
{
...
...
paddle/fluid/framework/ir/graph_pattern_detector.cc
浏览文件 @
40dbd97f
...
...
@@ -85,7 +85,7 @@ void GraphPatternDetector::operator()(Graph* graph,
LOG
(
INFO
)
<<
"detect "
<<
subgraphs
.
size
()
<<
" subgraph matches the pattern"
;
int
id
=
0
;
for
(
auto
&
g
:
subgraphs
)
{
LOG
(
INFO
)
<<
"optimizing #"
<<
id
++
<<
" subgraph"
;
VLOG
(
3
)
<<
"optimizing #"
<<
id
++
<<
" subgraph"
;
handler
(
g
,
graph
);
}
}
...
...
paddle/fluid/framework/ir/graph_viz_pass.cc
浏览文件 @
40dbd97f
...
...
@@ -50,20 +50,37 @@ std::unique_ptr<ir::Graph> GraphVizPass::ApplyImpl(
Dot
dot
;
std
::
vector
<
Dot
::
Attr
>
op_attrs
({
Dot
::
Attr
(
"style"
,
"filled"
),
Dot
::
Attr
(
"shape"
,
"box"
),
Dot
::
Attr
(
"fillcolor"
,
"red"
)});
std
::
vector
<
Dot
::
Attr
>
var_attrs
({
Dot
::
Attr
(
"style"
,
"filled,rounded"
),
// Dot::Attr("shape", "diamond"),
const
std
::
vector
<
Dot
::
Attr
>
op_attrs
({
Dot
::
Attr
(
"style"
,
"rounded,filled,bold"
),
//
Dot
::
Attr
(
"shape"
,
"box"
),
//
Dot
::
Attr
(
"color"
,
"#303A3A"
),
//
Dot
::
Attr
(
"fontcolor"
,
"#ffffff"
),
//
Dot
::
Attr
(
"width"
,
"1.3"
),
//
Dot
::
Attr
(
"height"
,
"0.84"
),
//
Dot
::
Attr
(
"fontname"
,
"Arial"
),
//
});
const
std
::
vector
<
Dot
::
Attr
>
arg_attrs
({
Dot
::
Attr
(
"shape"
,
"box"
),
//
Dot
::
Attr
(
"style"
,
"rounded,filled,bold"
),
//
Dot
::
Attr
(
"fontname"
,
"Arial"
),
//
Dot
::
Attr
(
"fillcolor"
,
"#999999"
),
//
Dot
::
Attr
(
"color"
,
"#dddddd"
),
//
});
const
std
::
vector
<
Dot
::
Attr
>
param_attrs
({
Dot
::
Attr
(
"shape"
,
"box"
),
//
Dot
::
Attr
(
"style"
,
"rounded,filled,bold"
),
//
Dot
::
Attr
(
"fontname"
,
"Arial"
),
//
Dot
::
Attr
(
"color"
,
"#148b97"
),
//
Dot
::
Attr
(
"fontcolor"
,
"#ffffff"
),
//
});
const
std
::
vector
<
Dot
::
Attr
>
marked_op_attrs
(
{
Dot
::
Attr
(
"style"
,
"rounded,filled,bold"
),
Dot
::
Attr
(
"shape"
,
"box"
),
Dot
::
Attr
(
"fillcolor"
,
"yellow"
)});
const
std
::
vector
<
Dot
::
Attr
>
marked_var_attrs
(
{
Dot
::
Attr
(
"style"
,
"filled,rounded"
),
Dot
::
Attr
(
"shape"
,
"box"
),
Dot
::
Attr
(
"fillcolor"
,
"yellow"
)});
std
::
vector
<
Dot
::
Attr
>
marked_op_attrs
({
Dot
::
Attr
(
"style"
,
"filled"
),
Dot
::
Attr
(
"shape"
,
"box"
),
Dot
::
Attr
(
"fillcolor"
,
"lightgray"
)});
std
::
vector
<
Dot
::
Attr
>
marked_var_attrs
(
{
Dot
::
Attr
(
"style"
,
"filled,rounded"
),
// Dot::Attr("shape", "diamond"),
Dot
::
Attr
(
"fillcolor"
,
"lightgray"
)});
auto
marked_nodes
=
ConsumeMarkedNodes
(
graph
.
get
());
// Create nodes
...
...
@@ -74,9 +91,17 @@ std::unique_ptr<ir::Graph> GraphVizPass::ApplyImpl(
marked_nodes
.
count
(
n
)
?
marked_op_attrs
:
op_attrs
;
dot
.
AddNode
(
node_id
,
attr
,
node_id
);
}
else
if
(
n
->
IsVar
())
{
decltype
(
op_attrs
)
attr
=
marked_nodes
.
count
(
n
)
?
marked_var_attrs
:
var_attrs
;
dot
.
AddNode
(
node_id
,
attr
,
node_id
);
decltype
(
op_attrs
)
*
attr
;
if
(
marked_nodes
.
count
(
n
))
{
attr
=
&
marked_var_attrs
;
}
else
if
(
const_cast
<
Node
*>
(
n
)
->
Var
()
&&
const_cast
<
Node
*>
(
n
)
->
Var
()
->
Persistable
())
{
attr
=
&
param_attrs
;
}
else
{
attr
=
&
arg_attrs
;
}
dot
.
AddNode
(
node_id
,
*
attr
,
node_id
);
}
node2dot
[
n
]
=
node_id
;
}
...
...
paddle/fluid/inference/analysis/analyzer.cc
浏览文件 @
40dbd97f
...
...
@@ -106,7 +106,6 @@ void Analyzer::Run(Argument* argument) {
}
}
passes
.
push_back
(
"graph_viz_pass"
);
// Ugly support fluid-to-ir-pass
argument
->
Set
(
kFluidToIrPassesAttr
,
new
std
::
vector
<
std
::
string
>
(
passes
));
for
(
auto
&
x
:
data_
)
{
...
...
paddle/fluid/inference/analysis/analyzer_tester.cc
浏览文件 @
40dbd97f
...
...
@@ -16,6 +16,7 @@
#include <google/protobuf/text_format.h>
#include <gtest/gtest.h>
#include <thread> // NOLINT
#include "paddle/fluid/framework/ir/fuse_pass_base.h"
#include "paddle/fluid/framework/ir/pass.h"
#include "paddle/fluid/inference/analysis/ut_helper.h"
...
...
@@ -24,12 +25,12 @@
#include "paddle/fluid/inference/api/paddle_inference_api.h"
#include "paddle/fluid/inference/api/paddle_inference_pass.h"
#include "paddle/fluid/inference/utils/singleton.h"
#include "paddle/fluid/platform/profiler.h"
DEFINE_string
(
infer_ditu_rnn_model
,
""
,
"model path for ditu RNN"
);
DEFINE_string
(
infer_ditu_rnn_data
,
""
,
"data path for ditu RNN"
);
DEFINE_int32
(
batch_size
,
10
,
"batch size."
);
DEFINE_int32
(
repeat
,
1
,
"Running the inference program repeat times."
);
DEFINE_int32
(
num_threads
,
1
,
"Running the inference program in multi-threads."
);
namespace
paddle
{
namespace
inference
{
...
...
@@ -220,39 +221,6 @@ void PrepareInputs(std::vector<PaddleTensor> *input_slots, DataRecord *data,
}
}
std
::
string
DescribeTensor
(
const
PaddleTensor
&
tensor
)
{
std
::
stringstream
os
;
os
<<
"Tensor ["
<<
tensor
.
name
<<
"]
\n
"
;
os
<<
" - type: "
;
switch
(
tensor
.
dtype
)
{
case
PaddleDType
::
FLOAT32
:
os
<<
"float32"
;
break
;
case
PaddleDType
::
INT64
:
os
<<
"int64"
;
break
;
default:
os
<<
"unset"
;
}
os
<<
'\n'
;
os
<<
" - shape: "
<<
to_string
(
tensor
.
shape
)
<<
'\n'
;
os
<<
" - lod: "
;
for
(
auto
&
l
:
tensor
.
lod
)
{
os
<<
to_string
(
l
)
<<
"; "
;
}
os
<<
"
\n
"
;
os
<<
" - data: "
;
int
dim
=
std
::
accumulate
(
tensor
.
shape
.
begin
(),
tensor
.
shape
.
end
(),
1
,
[](
int
a
,
int
b
)
{
return
a
*
b
;
});
for
(
int
i
=
0
;
i
<
dim
;
i
++
)
{
os
<<
static_cast
<
float
*>
(
tensor
.
data
.
data
())[
i
]
<<
" "
;
}
os
<<
'\n'
;
return
os
.
str
();
}
}
// namespace
const
float
ditu_rnn_target_data
[]
=
{
...
...
@@ -266,11 +234,29 @@ const float ditu_rnn_target_data[] = {
10.7286
,
12.0595
,
10.6672
,
0
,
0
,
0
,
0
,
0
,
93.5771
,
3.84641
,
0
,
0
,
0
,
0
,
0
,
0
,
169.426
,
0
,
0
,
0
,
0
,
0
,
0
,
0
};
void
CompareResult
(
const
std
::
vector
<
PaddleTensor
>
&
outputs
,
const
std
::
vector
<
PaddleTensor
>
&
base_outputs
)
{
PADDLE_ENFORCE_GT
(
outputs
.
size
(),
0
);
PADDLE_ENFORCE_EQ
(
outputs
.
size
(),
base_outputs
.
size
());
for
(
size_t
i
=
0
;
i
<
outputs
.
size
();
i
++
)
{
auto
&
out
=
outputs
[
i
];
auto
&
base_out
=
base_outputs
[
i
];
size_t
size
=
std
::
accumulate
(
out
.
shape
.
begin
(),
out
.
shape
.
end
(),
1
,
[](
int
a
,
int
b
)
{
return
a
*
b
;
});
size_t
size1
=
std
::
accumulate
(
base_out
.
shape
.
begin
(),
base_out
.
shape
.
end
(),
1
,
[](
int
a
,
int
b
)
{
return
a
*
b
;
});
PADDLE_ENFORCE_EQ
(
size
,
size1
);
PADDLE_ENFORCE_GT
(
size
,
0
);
float
*
data
=
static_cast
<
float
*>
(
out
.
data
.
data
());
float
*
base_data
=
static_cast
<
float
*>
(
base_out
.
data
.
data
());
for
(
size_t
i
=
0
;
i
<
size
;
i
++
)
{
EXPECT_NEAR
(
data
[
i
],
base_data
[
i
],
1e-3
);
}
}
}
// Test with a really complicate model.
void
TestDituRNNPrediction
(
const
std
::
string
&
model_path
,
const
std
::
string
&
data_path
,
int
batch_size
,
bool
use_analysis
,
bool
activate_ir
,
int
num_times
=
1
)
{
void
TestDituRNNPrediction
(
bool
use_analysis
,
bool
activate_ir
,
int
num_threads
)
{
AnalysisConfig
config
;
config
.
prog_file
=
FLAGS_infer_ditu_rnn_model
+
"/__model__"
;
config
.
param_file
=
FLAGS_infer_ditu_rnn_model
+
"/param"
;
...
...
@@ -281,6 +267,8 @@ void TestDituRNNPrediction(const std::string &model_path,
PADDLE_ENFORCE
(
config
.
ir_mode
==
AnalysisConfig
::
IrPassMode
::
kExclude
);
// default
config
.
ir_passes
.
clear
();
// Do not exclude any pass.
int
batch_size
=
FLAGS_batch_size
;
int
num_times
=
FLAGS_repeat
;
auto
base_predictor
=
CreatePaddlePredictor
<
NativeConfig
,
PaddleEngineKind
::
kNative
>
(
config
);
...
...
@@ -288,40 +276,55 @@ void TestDituRNNPrediction(const std::string &model_path,
CreatePaddlePredictor
<
AnalysisConfig
,
PaddleEngineKind
::
kAnalysis
>
(
config
);
std
::
vector
<
PaddleTensor
>
input_slots
;
DataRecord
data
(
data_path
,
batch_size
);
DataRecord
data
(
FLAGS_infer_ditu_rnn_data
,
batch_size
);
// Prepare inputs.
PrepareInputs
(
&
input_slots
,
&
data
,
batch_size
);
std
::
vector
<
PaddleTensor
>
outputs
,
base_outputs
;
base_predictor
->
Run
(
input_slots
,
&
base_outputs
);
LOG
(
INFO
)
<<
"===========profile result==========="
;
if
(
num_threads
==
1
)
{
// Prepare inputs.
Timer
timer
;
timer
.
tic
();
for
(
int
i
=
0
;
i
<
num_times
;
i
++
)
{
predictor
->
Run
(
input_slots
,
&
outputs
);
}
LOG
(
INFO
)
<<
"===========profile result==========="
;
LOG
(
INFO
)
<<
"batch_size: "
<<
batch_size
<<
", repeat: "
<<
num_times
<<
", latency: "
<<
timer
.
toc
()
/
num_times
<<
"ms"
;
LOG
(
INFO
)
<<
"====================================="
;
PADDLE_ENFORCE_GT
(
outputs
.
size
(),
0
);
PADDLE_ENFORCE_EQ
(
outputs
.
size
(),
base_outputs
.
size
());
for
(
size_t
i
=
0
;
i
<
outputs
.
size
();
i
++
)
{
auto
&
out
=
outputs
[
i
];
auto
&
base_out
=
base_outputs
[
i
];
size_t
size
=
std
::
accumulate
(
out
.
shape
.
begin
(),
out
.
shape
.
end
(),
1
,
[](
int
a
,
int
b
)
{
return
a
*
b
;
});
size_t
size1
=
std
::
accumulate
(
base_out
.
shape
.
begin
(),
base_out
.
shape
.
end
(),
1
,
[](
int
a
,
int
b
)
{
return
a
*
b
;
});
PADDLE_ENFORCE_EQ
(
size
,
size1
);
PADDLE_ENFORCE_GT
(
size
,
0
);
float
*
data
=
static_cast
<
float
*>
(
out
.
data
.
data
());
float
*
base_data
=
static_cast
<
float
*>
(
base_out
.
data
.
data
());
for
(
size_t
j
=
0
;
j
<
size
;
j
++
)
{
EXPECT_NEAR
(
data
[
j
],
base_data
[
j
],
1e-3
);
PrintTime
(
batch_size
,
num_times
,
1
,
0
,
timer
.
toc
()
/
num_times
);
CompareResult
(
outputs
,
base_outputs
);
}
else
{
std
::
vector
<
std
::
thread
>
threads
;
std
::
vector
<
std
::
unique_ptr
<
PaddlePredictor
>>
predictors
;
// TODO(yanchunwei): Bug here, the analyzer phase can't be parallelled
// because AttentionLSTM's hard code nodeid will be damanged.
for
(
int
tid
=
0
;
tid
<
num_threads
;
++
tid
)
{
predictors
.
emplace_back
(
CreatePaddlePredictor
<
AnalysisConfig
,
PaddleEngineKind
::
kAnalysis
>
(
config
));
}
for
(
int
tid
=
0
;
tid
<
num_threads
;
++
tid
)
{
threads
.
emplace_back
([
&
,
tid
]()
{
// Each thread should have local input_slots and outputs.
std
::
vector
<
PaddleTensor
>
input_slots
;
DataRecord
data
(
FLAGS_infer_ditu_rnn_data
,
batch_size
);
PrepareInputs
(
&
input_slots
,
&
data
,
batch_size
);
std
::
vector
<
PaddleTensor
>
outputs
;
Timer
timer
;
timer
.
tic
();
for
(
int
i
=
0
;
i
<
num_times
;
i
++
)
{
predictors
[
tid
]
->
Run
(
input_slots
,
&
outputs
);
}
PrintTime
(
batch_size
,
num_times
,
num_threads
,
tid
,
timer
.
toc
()
/
num_times
);
CompareResult
(
outputs
,
base_outputs
);
});
}
for
(
int
i
=
0
;
i
<
num_threads
;
++
i
)
{
threads
[
i
].
join
();
}
}
LOG
(
INFO
)
<<
"====================================="
;
if
(
use_analysis
&&
activate_ir
)
{
AnalysisPredictor
*
analysis_predictor
=
...
...
@@ -350,25 +353,26 @@ void TestDituRNNPrediction(const std::string &model_path,
}
}
// Directly infer with the original model.
TEST
(
Analyzer
,
DituRNN_without_analysis
)
{
TestDituRNNPrediction
(
FLAGS_infer_ditu_rnn_model
,
FLAGS_infer_ditu_rnn_data
,
FLAGS_batch_size
,
false
,
false
,
FLAGS_repeat
);
// Inference with analysis and IR, easy for profiling independently.
TEST
(
Analyzer
,
DituRNN
)
{
TestDituRNNPrediction
(
true
,
true
,
FLAGS_num_threads
);
}
// Inference with the original model with the analysis turned on, the analysis
// module will transform the program to a data flow graph.
TEST
(
Analyzer
,
DituRNN_with_analysis
)
{
LOG
(
INFO
)
<<
"ditu rnn with analysis"
;
TestDituRNNPrediction
(
FLAGS_infer_ditu_rnn_model
,
FLAGS_infer_ditu_rnn_data
,
FLAGS_batch_size
,
true
,
false
,
FLAGS_repeat
);
}
// Inference with analysis and IR. The IR module will fuse some large kernels.
TEST
(
Analyzer
,
DituRNN_with_analysis_with_IR
)
{
LOG
(
INFO
)
<<
"ditu rnn with analysis and IR fuse"
;
TestDituRNNPrediction
(
FLAGS_infer_ditu_rnn_model
,
FLAGS_infer_ditu_rnn_data
,
FLAGS_batch_size
,
true
,
true
,
FLAGS_repeat
);
// Other unit-tests of DituRNN, test different options of use_analysis,
// activate_ir and multi-threads.
TEST
(
Analyzer
,
DituRNN_tests
)
{
int
num_threads
[
2
]
=
{
1
,
4
};
for
(
auto
i
:
num_threads
)
{
// Directly infer with the original model.
TestDituRNNPrediction
(
false
,
false
,
i
);
// Inference with the original model with the analysis turned on, the
// analysis
// module will transform the program to a data flow graph.
TestDituRNNPrediction
(
true
,
false
,
i
);
// Inference with analysis and IR. The IR module will fuse some large
// kernels.
TestDituRNNPrediction
(
true
,
true
,
i
);
}
}
}
// namespace analysis
...
...
paddle/fluid/inference/api/analysis_predictor.cc
浏览文件 @
40dbd97f
...
...
@@ -35,7 +35,6 @@ bool AnalysisPredictor::Init(
}
else
{
place_
=
paddle
::
platform
::
CPUPlace
();
}
PADDLE_ENFORCE
(
!
parent_scope
);
if
(
parent_scope
)
{
scope_
=
parent_scope
;
sub_scope_
=
&
(
parent_scope
->
NewScope
());
...
...
paddle/fluid/inference/api/helper.h
浏览文件 @
40dbd97f
...
...
@@ -14,6 +14,7 @@
#pragma once
#include <glog/logging.h>
#include <sys/time.h>
#include <algorithm>
#include <numeric>
...
...
@@ -88,5 +89,45 @@ static void TensorAssignData(PaddleTensor *tensor,
}
}
std
::
string
DescribeTensor
(
const
PaddleTensor
&
tensor
)
{
std
::
stringstream
os
;
os
<<
"Tensor ["
<<
tensor
.
name
<<
"]
\n
"
;
os
<<
" - type: "
;
switch
(
tensor
.
dtype
)
{
case
PaddleDType
::
FLOAT32
:
os
<<
"float32"
;
break
;
case
PaddleDType
::
INT64
:
os
<<
"int64"
;
break
;
default:
os
<<
"unset"
;
}
os
<<
'\n'
;
os
<<
" - shape: "
<<
to_string
(
tensor
.
shape
)
<<
'\n'
;
os
<<
" - lod: "
;
for
(
auto
&
l
:
tensor
.
lod
)
{
os
<<
to_string
(
l
)
<<
"; "
;
}
os
<<
"
\n
"
;
os
<<
" - data: "
;
int
dim
=
std
::
accumulate
(
tensor
.
shape
.
begin
(),
tensor
.
shape
.
end
(),
1
,
[](
int
a
,
int
b
)
{
return
a
*
b
;
});
for
(
int
i
=
0
;
i
<
dim
;
i
++
)
{
os
<<
static_cast
<
float
*>
(
tensor
.
data
.
data
())[
i
]
<<
" "
;
}
os
<<
'\n'
;
return
os
.
str
();
}
void
PrintTime
(
int
batch_size
,
int
repeat
,
int
num_threads
,
int
tid
,
double
latency
)
{
LOG
(
INFO
)
<<
"batch_size: "
<<
batch_size
<<
", repeat: "
<<
repeat
<<
", threads: "
<<
num_threads
<<
", thread id: "
<<
tid
<<
", latency: "
<<
latency
<<
"ms"
;
}
}
// namespace inference
}
// namespace paddle
paddle/fluid/operators/fake_quantize_op.cu
浏览文件 @
40dbd97f
...
...
@@ -119,7 +119,8 @@ struct FindRangeAbsMaxFunctor<platform::CUDADeviceContext, T> {
const
framework
::
Tensor
&
last_scale
,
const
framework
::
Tensor
&
iter
,
const
int
window_size
,
framework
::
Tensor
*
scales_arr
,
framework
::
Tensor
*
out_scale
)
{
auto
&
gpu_place
=
boost
::
get
<
platform
::
CUDAPlace
>
(
ctx
.
GetPlace
());
const
auto
gpu_place
=
boost
::
get
<
platform
::
CUDAPlace
>
(
ctx
.
GetPlace
());
T
*
scale_arr
=
scales_arr
->
mutable_data
<
T
>
(
gpu_place
);
T
*
out_scale_data
=
out_scale
->
mutable_data
<
T
>
(
gpu_place
);
...
...
paddle/fluid/operators/flatten_op.cc
浏览文件 @
40dbd97f
...
...
@@ -157,6 +157,116 @@ class FlattenGradOp : public framework::OperatorBase {
}
};
// FIXME(zcd): flatten2 adds an intermediate output(XShape) based on flatten,
// the XShape is used to carry the shape and lod of X which will be used in
// flatten_grad, in this way, the framework can reuse the memory of X
// immediately the flatten2_op is finished.
// Considering compatibility issues, we could not fix flatten2_op
class
Flatten2OpInferShape
:
public
FlattenOpInferShape
{
public:
void
operator
()(
framework
::
InferShapeContext
*
ctx
)
const
override
{
FlattenOpInferShape
::
operator
()(
ctx
);
PADDLE_ENFORCE
(
ctx
->
HasOutput
(
"XShape"
),
"Output (XShape) of Flatten op should not be null."
);
const
auto
&
in_dims
=
ctx
->
GetInputDim
(
"X"
);
std
::
vector
<
int64_t
>
xshape_dims
(
in_dims
.
size
()
+
1
);
xshape_dims
[
0
]
=
0
;
for
(
int
i
=
0
;
i
<
in_dims
.
size
();
++
i
)
{
xshape_dims
[
i
+
1
]
=
in_dims
[
i
];
}
ctx
->
SetOutputDim
(
"XShape"
,
framework
::
make_ddim
(
xshape_dims
));
ctx
->
ShareLoD
(
"X"
,
"XShape"
);
}
};
class
Flatten2Op
:
public
framework
::
OperatorBase
{
public:
using
OperatorBase
::
OperatorBase
;
private:
void
RunImpl
(
const
framework
::
Scope
&
scope
,
const
platform
::
Place
&
place
)
const
override
{
auto
&
axis
=
Attr
<
int
>
(
"axis"
);
auto
in_dims
=
scope
.
FindVar
(
Input
(
"X"
))
->
Get
<
framework
::
LoDTensor
>
().
dims
();
const
auto
&
out_dims
=
FlattenOpInferShape
::
GetOutputShape
(
axis
,
in_dims
);
framework
::
AttributeMap
attrs
;
attrs
[
"shape"
]
=
out_dims
;
attrs
[
"inplace"
]
=
false
;
// Invoke Reshape Op
auto
reshape_op
=
framework
::
OpRegistry
::
CreateOp
(
"reshape2"
,
{{
"X"
,
{
Input
(
"X"
)}},
{
"Shape"
,
{}}},
{{
"Out"
,
{
Output
(
"Out"
)}},
{
"XShape"
,
{
Output
(
"XShape"
)}}},
attrs
);
reshape_op
->
Run
(
scope
,
place
);
}
};
class
Flatten2OpMaker
:
public
FlattenOpMaker
{
public:
void
Make
()
override
{
FlattenOpMaker
::
Make
();
AddOutput
(
"XShape"
,
"XShape is just used to store the shape and lod of X, which will "
"be used in FlattenGradOp."
)
.
AsIntermediate
();
}
};
class
Flatten2GradOpMaker
:
public
framework
::
SingleGradOpDescMaker
{
public:
using
framework
::
SingleGradOpDescMaker
::
SingleGradOpDescMaker
;
std
::
unique_ptr
<
framework
::
OpDesc
>
Apply
()
const
override
{
auto
*
grad_op
=
new
framework
::
OpDesc
();
grad_op
->
SetType
(
"flatten2_grad"
);
grad_op
->
SetInput
(
"XShape"
,
Output
(
"XShape"
));
grad_op
->
SetInput
(
framework
::
GradVarName
(
"Out"
),
OutputGrad
(
"Out"
));
grad_op
->
SetOutput
(
framework
::
GradVarName
(
"X"
),
InputGrad
(
"X"
));
grad_op
->
SetAttrMap
(
Attrs
());
return
std
::
unique_ptr
<
framework
::
OpDesc
>
(
grad_op
);
}
};
class
Flatten2GradInferShape
:
public
framework
::
InferShapeBase
{
public:
void
operator
()(
framework
::
InferShapeContext
*
context
)
const
override
{
PADDLE_ENFORCE
(
context
->
HasInput
(
"XShape"
),
"Input(XShape) shouldn't be null."
);
PADDLE_ENFORCE
(
context
->
HasInput
(
framework
::
GradVarName
(
"Out"
)),
"Input(Out@GRAD) shouldn't be null."
);
auto
xshape_dims
=
context
->
GetInputDim
(
"XShape"
);
auto
x_dims
=
framework
::
slice_ddim
(
xshape_dims
,
1
,
xshape_dims
.
size
());
context
->
SetOutputDim
(
framework
::
GradVarName
(
"X"
),
x_dims
);
context
->
ShareLoD
(
"XShape"
,
framework
::
GradVarName
(
"X"
));
}
};
class
Flatten2GradOp
:
public
framework
::
OperatorBase
{
public:
using
OperatorBase
::
OperatorBase
;
private:
void
RunImpl
(
const
framework
::
Scope
&
scope
,
const
platform
::
Place
&
place
)
const
override
{
auto
dx_name
=
Output
(
framework
::
GradVarName
(
"X"
));
auto
dout_name
=
Input
(
framework
::
GradVarName
(
"Out"
));
auto
xshape_name
=
Input
(
"XShape"
);
auto
xshape_dims
=
scope
.
FindVar
(
xshape_name
)
->
Get
<
framework
::
LoDTensor
>
().
dims
();
auto
x_dims
=
framework
::
slice_ddim
(
xshape_dims
,
1
,
xshape_dims
.
size
());
framework
::
AttributeMap
attrs
;
attrs
[
"shape"
]
=
framework
::
vectorize2int
(
x_dims
);
attrs
[
"inplace"
]
=
false
;
auto
reshape_op
=
framework
::
OpRegistry
::
CreateOp
(
"reshape2"
,
{{
"X"
,
{
dout_name
}},
{
"Shape"
,
{}}},
{{
"Out"
,
{
dx_name
}},
{
"XShape"
,
{
xshape_name
}}},
attrs
);
reshape_op
->
Run
(
scope
,
place
);
}
};
}
// namespace operators
}
// namespace paddle
...
...
@@ -167,3 +277,8 @@ REGISTER_OPERATOR(flatten, ops::FlattenOp, ops::FlattenOpMaker,
ops
::
FlattenOpInferShape
,
paddle
::
framework
::
DefaultGradOpDescMaker
<
true
>
);
REGISTER_OPERATOR
(
flatten_grad
,
ops
::
FlattenGradOp
,
ops
::
FlattenGradInferShape
);
REGISTER_OPERATOR
(
flatten2
,
ops
::
Flatten2Op
,
ops
::
Flatten2OpMaker
,
ops
::
Flatten2OpInferShape
,
ops
::
Flatten2GradOpMaker
);
REGISTER_OPERATOR
(
flatten2_grad
,
ops
::
Flatten2GradOp
,
ops
::
Flatten2GradInferShape
);
paddle/fluid/operators/layer_norm_op.cu
浏览文件 @
40dbd97f
...
...
@@ -67,27 +67,27 @@ template <typename T, int BlockDim>
__global__
void
LayerNormForward
(
const
T
*
x
,
const
T
*
scale
,
const
T
*
bias
,
T
*
y
,
T
*
mean
,
T
*
var
,
float
epsilon
,
int
feature_size
)
{
using
BlockReduce
=
cub
::
BlockReduce
<
PairForLayerNorm
<
T
>
,
BlockDim
>
;
using
BlockReduce
=
cub
::
BlockReduce
<
PairForLayerNorm
<
double
>
,
BlockDim
>
;
__shared__
typename
BlockReduce
::
TempStorage
temp_storage
;
int
beg_idx
=
blockIdx
.
x
*
feature_size
+
threadIdx
.
x
;
int
end_idx
=
(
blockIdx
.
x
+
1
)
*
feature_size
;
// Step 1: Reduce to calculate mean and var
T
mean_val
=
static_cast
<
T
>
(
0
)
;
T
var_val
=
static_cast
<
T
>
(
0
)
;
double
mean_val
=
0
;
double
var_val
=
0
;
for
(
int
i
=
beg_idx
;
i
<
end_idx
;
i
+=
BlockDim
)
{
T
tmp
=
x
[
i
];
mean_val
+=
tmp
;
var_val
+=
(
tmp
*
tmp
);
}
auto
pair
=
BlockReduce
(
temp_storage
)
.
Reduce
(
PairForLayerNorm
<
T
>
(
mean_val
,
var_val
),
PairForLayerNormAddFunctor
<
T
>
());
.
Reduce
(
PairForLayerNorm
<
double
>
(
mean_val
,
var_val
),
PairForLayerNormAddFunctor
<
double
>
());
if
(
threadIdx
.
x
==
0
)
{
auto
tmp
=
pair
.
first_
/
feature_size
;
mean
[
blockIdx
.
x
]
=
tmp
;
var
[
blockIdx
.
x
]
=
pair
.
second_
/
feature_size
-
tmp
*
tmp
;
mean
[
blockIdx
.
x
]
=
static_cast
<
T
>
(
tmp
)
;
var
[
blockIdx
.
x
]
=
static_cast
<
T
>
(
pair
.
second_
/
feature_size
-
tmp
*
tmp
)
;
}
__syncthreads
();
mean_val
=
mean
[
blockIdx
.
x
];
...
...
paddle/fluid/operators/reshape_op.cc
浏览文件 @
40dbd97f
...
...
@@ -246,6 +246,88 @@ class ReshapeGradKernel {
}
};
// FIXME(zcd): reshape2 adds an intermediate output(XShape) based on reshape,
// the XShape is used to carry the shape and lod of X which will be used in
// reshape_grad, in this way, the framework can reuse the memory of X
// immediately the reshape_op is finished.
// Considering compatibility issues, we could not fix reshape_op
class
Reshape2Op
:
public
ReshapeOp
{
public:
Reshape2Op
(
const
std
::
string
&
type
,
const
framework
::
VariableNameMap
&
inputs
,
const
framework
::
VariableNameMap
&
outputs
,
const
framework
::
AttributeMap
&
attrs
)
:
ReshapeOp
(
type
,
inputs
,
outputs
,
attrs
)
{}
void
InferShape
(
framework
::
InferShapeContext
*
ctx
)
const
override
{
ReshapeOp
::
InferShape
(
ctx
);
PADDLE_ENFORCE
(
ctx
->
HasOutput
(
"XShape"
),
"Output(XShape) of ReshapeOp should not be null."
);
const
auto
&
x_dims
=
ctx
->
GetInputDim
(
"X"
);
std
::
vector
<
int64_t
>
xshape_dims
(
x_dims
.
size
()
+
1
);
xshape_dims
[
0
]
=
0
;
for
(
int
i
=
0
;
i
<
x_dims
.
size
();
++
i
)
{
xshape_dims
[
i
+
1
]
=
x_dims
[
i
];
}
ctx
->
SetOutputDim
(
"XShape"
,
framework
::
make_ddim
(
xshape_dims
));
ctx
->
ShareLoD
(
"X"
,
/*->*/
"XShape"
);
}
};
class
Reshape2OpMaker
:
public
ReshapeOpMaker
{
public:
void
Make
()
override
{
ReshapeOpMaker
::
Make
();
AddOutput
(
"XShape"
,
"XShape is just used to store the shape and lod of X, which will "
"be used in FlattenGradOp."
)
.
AsIntermediate
();
}
};
class
Reshape2GradMaker
:
public
framework
::
SingleGradOpDescMaker
{
public:
using
framework
::
SingleGradOpDescMaker
::
SingleGradOpDescMaker
;
std
::
unique_ptr
<
framework
::
OpDesc
>
Apply
()
const
override
{
auto
*
grad_op
=
new
framework
::
OpDesc
();
grad_op
->
SetType
(
"reshape2_grad"
);
grad_op
->
SetInput
(
"XShape"
,
Output
(
"XShape"
));
grad_op
->
SetInput
(
framework
::
GradVarName
(
"Out"
),
OutputGrad
(
"Out"
));
grad_op
->
SetOutput
(
framework
::
GradVarName
(
"X"
),
InputGrad
(
"X"
));
grad_op
->
SetAttrMap
(
Attrs
());
return
std
::
unique_ptr
<
framework
::
OpDesc
>
(
grad_op
);
}
};
class
Reshape2GradOp
:
public
framework
::
OperatorWithKernel
{
public:
Reshape2GradOp
(
const
std
::
string
&
type
,
const
framework
::
VariableNameMap
&
inputs
,
const
framework
::
VariableNameMap
&
outputs
,
const
framework
::
AttributeMap
&
attrs
)
:
OperatorWithKernel
(
type
,
inputs
,
outputs
,
attrs
)
{}
void
InferShape
(
framework
::
InferShapeContext
*
ctx
)
const
override
{
PADDLE_ENFORCE
(
ctx
->
HasInput
(
"XShape"
),
"Input(XShape) shouldn't be null."
);
PADDLE_ENFORCE
(
ctx
->
HasInput
(
framework
::
GradVarName
(
"Out"
)),
"Input(Out@GRAD) shouldn't be null."
);
auto
xshape_dims
=
ctx
->
GetInputDim
(
"XShape"
);
auto
x_dims
=
framework
::
slice_ddim
(
xshape_dims
,
1
,
xshape_dims
.
size
());
ctx
->
SetOutputDim
(
framework
::
GradVarName
(
"X"
),
x_dims
);
ctx
->
ShareLoD
(
"XShape"
,
framework
::
GradVarName
(
"X"
));
}
protected:
framework
::
OpKernelType
GetExpectedKernelType
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
return
framework
::
OpKernelType
(
framework
::
ToDataType
(
ctx
.
Input
<
framework
::
LoDTensor
>
(
framework
::
GradVarName
(
"Out"
))
->
type
()),
ctx
.
device_context
());
}
};
}
// namespace operators
}
// namespace paddle
namespace
ops
=
paddle
::
operators
;
...
...
@@ -261,6 +343,17 @@ REGISTER_OP_CPU_KERNEL_FUNCTOR(reshape_grad, float, ops::ReshapeGradKernel,
ops
::
ReshapeGradKernel
,
int64_t
,
ops
::
ReshapeGradKernel
);
REGISTER_OPERATOR
(
reshape2
,
ops
::
Reshape2Op
,
ops
::
Reshape2OpMaker
,
ops
::
Reshape2GradMaker
);
REGISTER_OPERATOR
(
reshape2_grad
,
ops
::
Reshape2GradOp
);
REGISTER_OP_CPU_KERNEL_FUNCTOR
(
reshape2
,
float
,
ops
::
ReshapeKernel
,
double
,
ops
::
ReshapeKernel
,
int
,
ops
::
ReshapeKernel
,
int64_t
,
ops
::
ReshapeKernel
);
REGISTER_OP_CPU_KERNEL_FUNCTOR
(
reshape2_grad
,
float
,
ops
::
ReshapeGradKernel
,
double
,
ops
::
ReshapeGradKernel
,
int
,
ops
::
ReshapeGradKernel
,
int64_t
,
ops
::
ReshapeGradKernel
);
#ifdef PADDLE_WITH_CUDA
REGISTER_OP_CUDA_KERNEL_FUNCTOR
(
reshape
,
float
,
ops
::
ReshapeKernel
,
double
,
ops
::
ReshapeKernel
,
int
,
ops
::
ReshapeKernel
,
...
...
@@ -269,4 +362,11 @@ REGISTER_OP_CUDA_KERNEL_FUNCTOR(reshape_grad, float, ops::ReshapeGradKernel,
double
,
ops
::
ReshapeGradKernel
,
int
,
ops
::
ReshapeGradKernel
,
int64_t
,
ops
::
ReshapeGradKernel
);
REGISTER_OP_CUDA_KERNEL_FUNCTOR
(
reshape2
,
float
,
ops
::
ReshapeKernel
,
double
,
ops
::
ReshapeKernel
,
int
,
ops
::
ReshapeKernel
,
int64_t
,
ops
::
ReshapeKernel
);
REGISTER_OP_CUDA_KERNEL_FUNCTOR
(
reshape2_grad
,
float
,
ops
::
ReshapeGradKernel
,
double
,
ops
::
ReshapeGradKernel
,
int
,
ops
::
ReshapeGradKernel
,
int64_t
,
ops
::
ReshapeGradKernel
);
#endif
paddle/fluid/operators/squeeze_op.cc
浏览文件 @
40dbd97f
...
...
@@ -181,6 +181,113 @@ class SqueezeGradOp : public framework::OperatorBase {
}
};
// FIXME(zcd): squeeze2 adds an intermediate output(XShape) based on squeeze,
// the XShape is used to carry the shape and lod of X which will be used in
// squeeze_grad, in this way, the framework can reuse the memory of X
// immediately the squeeze2_op is finished.
// Considering compatibility issues, we could not fix squeeze2_op
class
Squeeze2OpMaker
:
public
SqueezeOpMaker
{
public:
void
Make
()
override
{
SqueezeOpMaker
::
Make
();
AddOutput
(
"XShape"
,
"XShape is just used to store the shape and lod of X, which will "
"be used in SqueezeGradOp."
)
.
AsIntermediate
();
}
};
class
Squeeze2OpInferShape
:
public
SqueezeOpInferShape
{
public:
void
operator
()(
framework
::
InferShapeContext
*
ctx
)
const
override
{
SqueezeOpInferShape
::
operator
()(
ctx
);
PADDLE_ENFORCE
(
ctx
->
HasOutput
(
"XShape"
),
"Output(XShape) of Squeeze operator should not be null."
);
const
auto
&
x_dims
=
ctx
->
GetInputDim
(
"X"
);
std
::
vector
<
int64_t
>
xshape_dims
(
x_dims
.
size
()
+
1
);
xshape_dims
[
0
]
=
0
;
for
(
int
i
=
0
;
i
<
x_dims
.
size
();
++
i
)
{
xshape_dims
[
i
+
1
]
=
x_dims
[
i
];
}
ctx
->
SetOutputDim
(
"XShape"
,
framework
::
make_ddim
(
xshape_dims
));
ctx
->
ShareLoD
(
"X"
,
/*->*/
"XShape"
);
}
};
class
Squeeze2Op
:
public
framework
::
OperatorBase
{
public:
using
OperatorBase
::
OperatorBase
;
private:
void
RunImpl
(
const
framework
::
Scope
&
scope
,
const
platform
::
Place
&
place
)
const
override
{
auto
&
axes
=
Attr
<
std
::
vector
<
int
>>
(
"axes"
);
auto
x_dims
=
scope
.
FindVar
(
Input
(
"X"
))
->
Get
<
framework
::
LoDTensor
>
().
dims
();
auto
out_dims
=
Squeeze2OpInferShape
::
GetOutputShape
(
axes
,
x_dims
);
framework
::
AttributeMap
attrs
;
attrs
[
"shape"
]
=
framework
::
vectorize2int
(
out_dims
);
// Invoke Reshape Op
auto
reshape_op
=
framework
::
OpRegistry
::
CreateOp
(
"reshape2"
,
{{
"X"
,
{
Input
(
"X"
)}},
{
"Shape"
,
{}}},
{{
"Out"
,
{
Output
(
"Out"
)}},
{
"XShape"
,
{
Output
(
"XShape"
)}}},
attrs
);
reshape_op
->
Run
(
scope
,
place
);
}
};
class
Squeeze2GradOpMaker
:
public
framework
::
SingleGradOpDescMaker
{
public:
using
framework
::
SingleGradOpDescMaker
::
SingleGradOpDescMaker
;
std
::
unique_ptr
<
framework
::
OpDesc
>
Apply
()
const
override
{
auto
*
grad_op
=
new
framework
::
OpDesc
();
grad_op
->
SetType
(
"squeeze2_grad"
);
grad_op
->
SetInput
(
"XShape"
,
Output
(
"XShape"
));
grad_op
->
SetInput
(
framework
::
GradVarName
(
"Out"
),
OutputGrad
(
"Out"
));
grad_op
->
SetOutput
(
framework
::
GradVarName
(
"X"
),
InputGrad
(
"X"
));
grad_op
->
SetAttrMap
(
Attrs
());
return
std
::
unique_ptr
<
framework
::
OpDesc
>
(
grad_op
);
}
};
class
Squeeze2GradInferShape
:
public
framework
::
InferShapeBase
{
public:
void
operator
()(
framework
::
InferShapeContext
*
context
)
const
override
{
PADDLE_ENFORCE
(
context
->
HasInput
(
"XShape"
),
"Input(XShape) shouldn't be null."
);
PADDLE_ENFORCE
(
context
->
HasInput
(
framework
::
GradVarName
(
"Out"
)),
"Input(Out@GRAD) shouldn't be null."
);
auto
xshape_dims
=
context
->
GetInputDim
(
"XShape"
);
auto
x_dims
=
framework
::
slice_ddim
(
xshape_dims
,
1
,
xshape_dims
.
size
());
context
->
SetOutputDim
(
framework
::
GradVarName
(
"X"
),
x_dims
);
context
->
ShareLoD
(
"XShape"
,
framework
::
GradVarName
(
"X"
));
}
};
class
Squeeze2GradOp
:
public
framework
::
OperatorBase
{
public:
using
OperatorBase
::
OperatorBase
;
private:
void
RunImpl
(
const
framework
::
Scope
&
scope
,
const
platform
::
Place
&
place
)
const
override
{
auto
dx_name
=
Output
(
framework
::
GradVarName
(
"X"
));
auto
dout_name
=
Input
(
framework
::
GradVarName
(
"Out"
));
auto
xshape_name
=
Input
(
"XShape"
);
auto
xshape_dims
=
scope
.
FindVar
(
xshape_name
)
->
Get
<
framework
::
LoDTensor
>
().
dims
();
auto
x_dims
=
framework
::
slice_ddim
(
xshape_dims
,
1
,
xshape_dims
.
size
());
framework
::
AttributeMap
attrs
;
attrs
[
"shape"
]
=
framework
::
vectorize2int
(
x_dims
);
auto
reshape_op
=
framework
::
OpRegistry
::
CreateOp
(
"reshape2"
,
{{
"X"
,
{
dout_name
}},
{
"Shape"
,
{}}},
{{
"Out"
,
{
dx_name
}},
{
"XShape"
,
{
xshape_name
}}},
attrs
);
reshape_op
->
Run
(
scope
,
place
);
}
};
}
// namespace operators
}
// namespace paddle
...
...
@@ -192,3 +299,8 @@ REGISTER_OPERATOR(squeeze, ops::SqueezeOp, ops::SqueezeOpMaker,
ops
::
SqueezeOpInferShape
,
paddle
::
framework
::
DefaultGradOpDescMaker
<
true
>
);
REGISTER_OPERATOR
(
squeeze_grad
,
ops
::
SqueezeGradOp
,
ops
::
SqueezeGradInferShape
);
REGISTER_OPERATOR
(
squeeze2
,
ops
::
Squeeze2Op
,
ops
::
Squeeze2OpMaker
,
ops
::
Squeeze2OpInferShape
,
ops
::
Squeeze2GradOpMaker
);
REGISTER_OPERATOR
(
squeeze2_grad
,
ops
::
Squeeze2GradOp
,
ops
::
Squeeze2GradInferShape
);
paddle/fluid/operators/transpose_op.cc
浏览文件 @
40dbd97f
...
...
@@ -13,6 +13,7 @@ See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/operators/transpose_op.h"
#include <string>
#include <vector>
namespace
paddle
{
...
...
@@ -24,7 +25,7 @@ class TransposeOp : public framework::OperatorWithKernel {
public:
using
framework
::
OperatorWithKernel
::
OperatorWithKernel
;
void
InferShape
(
framework
::
InferShapeContext
*
ctx
)
const
override
{
void
InferShape
(
framework
::
InferShapeContext
*
ctx
)
const
override
{
PADDLE_ENFORCE
(
ctx
->
HasInput
(
"X"
),
"Input(X) should not be null"
);
PADDLE_ENFORCE
(
ctx
->
HasOutput
(
"Out"
),
"Output(Out) should not be null"
);
auto
x_dims
=
ctx
->
GetInputDim
(
"X"
);
...
...
@@ -101,7 +102,7 @@ class TransposeOpGrad : public framework::OperatorWithKernel {
public:
using
framework
::
OperatorWithKernel
::
OperatorWithKernel
;
void
InferShape
(
framework
::
InferShapeContext
*
ctx
)
const
override
{
void
InferShape
(
framework
::
InferShapeContext
*
ctx
)
const
override
{
PADDLE_ENFORCE
(
ctx
->
HasInput
(
"X"
),
"Input(X) should not be null"
);
PADDLE_ENFORCE
(
ctx
->
HasInput
(
framework
::
GradVarName
(
"Out"
)),
"Input(Out@GRAD) should not be null"
);
...
...
@@ -113,6 +114,93 @@ class TransposeOpGrad : public framework::OperatorWithKernel {
}
};
// FIXME(zcd): transpose2 adds an intermediate output(XShape) based on
// transpose, the XShape is used to carry the shape and lod of X which
// will be used in transpose_grad, in this way, the framework can reuse
// the memory of X immediately the transpose2_op is finished.
// Considering compatibility issues, we could not fix transpose2_op
class
Transpose2Op
:
public
TransposeOp
{
public:
Transpose2Op
(
const
std
::
string
&
type
,
const
framework
::
VariableNameMap
&
inputs
,
const
framework
::
VariableNameMap
&
outputs
,
const
framework
::
AttributeMap
&
attrs
)
:
TransposeOp
(
type
,
inputs
,
outputs
,
attrs
)
{}
void
InferShape
(
framework
::
InferShapeContext
*
ctx
)
const
override
{
TransposeOp
::
InferShape
(
ctx
);
PADDLE_ENFORCE
(
ctx
->
HasOutput
(
"XShape"
),
"Output(XShape) should not be null"
);
const
auto
&
in_dims
=
ctx
->
GetInputDim
(
"X"
);
std
::
vector
<
int64_t
>
x_shape_dim
(
in_dims
.
size
()
+
1
);
x_shape_dim
[
0
]
=
0
;
for
(
int
i
=
0
;
i
<
in_dims
.
size
();
++
i
)
{
x_shape_dim
[
i
+
1
]
=
in_dims
[
i
];
}
ctx
->
SetOutputDim
(
"XShape"
,
framework
::
make_ddim
(
x_shape_dim
));
ctx
->
ShareLoD
(
"X"
,
/*->*/
"XShape"
);
}
protected:
framework
::
OpKernelType
GetExpectedKernelType
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
return
framework
::
OpKernelType
(
framework
::
ToDataType
(
ctx
.
Input
<
framework
::
LoDTensor
>
(
"X"
)
->
type
()),
ctx
.
device_context
());
}
};
class
Transpose2OpMaker
:
public
TransposeOpMaker
{
public:
void
Make
()
override
{
TransposeOpMaker
::
Make
();
AddOutput
(
"XShape"
,
"(Tensor)The output tensor."
).
AsIntermediate
();
}
};
class
Transpose2GradMaker
:
public
framework
::
SingleGradOpDescMaker
{
public:
using
framework
::
SingleGradOpDescMaker
::
SingleGradOpDescMaker
;
std
::
unique_ptr
<
framework
::
OpDesc
>
Apply
()
const
override
{
auto
*
grad_op
=
new
framework
::
OpDesc
();
grad_op
->
SetType
(
"transpose2_grad"
);
grad_op
->
SetInput
(
"XShape"
,
Output
(
"XShape"
));
grad_op
->
SetInput
(
framework
::
GradVarName
(
"Out"
),
OutputGrad
(
"Out"
));
grad_op
->
SetOutput
(
framework
::
GradVarName
(
"X"
),
InputGrad
(
"X"
));
grad_op
->
SetAttrMap
(
Attrs
());
return
std
::
unique_ptr
<
framework
::
OpDesc
>
(
grad_op
);
}
};
class
Transpose2OpGrad
:
public
framework
::
OperatorWithKernel
{
public:
using
framework
::
OperatorWithKernel
::
OperatorWithKernel
;
void
InferShape
(
framework
::
InferShapeContext
*
ctx
)
const
override
{
PADDLE_ENFORCE
(
ctx
->
HasInput
(
"XShape"
),
"Input(XShape) should not be null"
);
PADDLE_ENFORCE
(
ctx
->
HasInput
(
framework
::
GradVarName
(
"Out"
)),
"Input(Out@GRAD) should not be null"
);
if
(
ctx
->
HasOutput
(
framework
::
GradVarName
(
"X"
)))
{
auto
xshape_dim
=
ctx
->
GetInputDim
(
"XShape"
);
auto
x_shape_dim
=
framework
::
slice_ddim
(
xshape_dim
,
1
,
xshape_dim
.
size
());
ctx
->
SetOutputDim
(
framework
::
GradVarName
(
"X"
),
x_shape_dim
);
ctx
->
ShareLoD
(
"XShape"
,
framework
::
GradVarName
(
"X"
));
}
}
protected:
framework
::
OpKernelType
GetExpectedKernelType
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
return
framework
::
OpKernelType
(
framework
::
ToDataType
(
ctx
.
Input
<
framework
::
LoDTensor
>
(
framework
::
GradVarName
(
"Out"
))
->
type
()),
ctx
.
device_context
());
}
};
}
// namespace operators
}
// namespace paddle
...
...
@@ -120,8 +208,20 @@ namespace ops = paddle::operators;
REGISTER_OPERATOR
(
transpose
,
ops
::
TransposeOp
,
ops
::
TransposeOpMaker
,
paddle
::
framework
::
DefaultGradOpDescMaker
<
true
>
);
REGISTER_OPERATOR
(
transpose_grad
,
ops
::
TransposeOpGrad
);
REGISTER_OP_CPU_KERNEL
(
transpose
,
ops
::
TransposeKernel
<
paddle
::
platform
::
CPUDeviceContext
,
float
>
);
REGISTER_OP_CPU_KERNEL
(
transpose_grad
,
ops
::
TransposeGradKernel
<
paddle
::
platform
::
CPUDeviceContext
,
float
>
);
REGISTER_OPERATOR
(
transpose2
,
ops
::
Transpose2Op
,
ops
::
Transpose2OpMaker
,
ops
::
Transpose2GradMaker
);
REGISTER_OPERATOR
(
transpose2_grad
,
ops
::
Transpose2OpGrad
);
REGISTER_OP_CPU_KERNEL
(
transpose2
,
ops
::
TransposeKernel
<
paddle
::
platform
::
CPUDeviceContext
,
float
>
);
REGISTER_OP_CPU_KERNEL
(
transpose2_grad
,
ops
::
TransposeGradKernel
<
paddle
::
platform
::
CPUDeviceContext
,
float
>
);
paddle/fluid/operators/transpose_op.cu.cc
浏览文件 @
40dbd97f
...
...
@@ -21,3 +21,10 @@ REGISTER_OP_CUDA_KERNEL(
REGISTER_OP_CUDA_KERNEL
(
transpose_grad
,
ops
::
TransposeGradKernel
<
paddle
::
platform
::
CUDADeviceContext
,
float
>
);
REGISTER_OP_CUDA_KERNEL
(
transpose2
,
ops
::
TransposeKernel
<
paddle
::
platform
::
CUDADeviceContext
,
float
>
);
REGISTER_OP_CUDA_KERNEL
(
transpose2_grad
,
ops
::
TransposeGradKernel
<
paddle
::
platform
::
CUDADeviceContext
,
float
>
);
paddle/fluid/operators/unsqueeze_op.cc
浏览文件 @
40dbd97f
...
...
@@ -168,6 +168,112 @@ class UnsqueezeGradOp : public framework::OperatorBase {
}
};
// FIXME(zcd): unsqueeze2 adds an intermediate output(XShape) based on
// unsqueeze, the XShape is used to carry the shape and lod of X which
// will be used in unsqueeze_grad, in this way, the framework can reuse
// the memory of X immediately the unsqueeze2_op is finished.
// Considering compatibility issues, we could not fix unsqueeze2_op
class
Unsqueeze2OpInferShape
:
public
UnsqueezeOpInferShape
{
public:
void
operator
()(
framework
::
InferShapeContext
*
ctx
)
const
override
{
UnsqueezeOpInferShape
::
operator
()(
ctx
);
PADDLE_ENFORCE
(
ctx
->
HasOutput
(
"XShape"
),
"Output(XShape) of Unsqueeze operator should not be null."
);
const
auto
&
x_dims
=
ctx
->
GetInputDim
(
"X"
);
std
::
vector
<
int64_t
>
xshape_dims
(
x_dims
.
size
()
+
1
);
xshape_dims
[
0
]
=
0
;
for
(
int
i
=
0
;
i
<
x_dims
.
size
();
++
i
)
{
xshape_dims
[
i
+
1
]
=
x_dims
[
i
];
}
ctx
->
SetOutputDim
(
"XShape"
,
framework
::
make_ddim
(
xshape_dims
));
ctx
->
ShareLoD
(
"X"
,
/*->*/
"XShape"
);
}
};
class
Unsqueeze2OpMaker
:
public
UnsqueezeOpMaker
{
public:
void
Make
()
override
{
UnsqueezeOpMaker
::
Make
();
AddOutput
(
"XShape"
,
"XShape is just used to store the shape and lod of X, which will "
"be used in UnsqueezeGradOp."
)
.
AsIntermediate
();
}
};
class
Unsqueeze2Op
:
public
framework
::
OperatorBase
{
public:
using
OperatorBase
::
OperatorBase
;
private:
void
RunImpl
(
const
framework
::
Scope
&
scope
,
const
platform
::
Place
&
place
)
const
override
{
auto
&
axes
=
Attr
<
std
::
vector
<
int
>>
(
"axes"
);
auto
x_dims
=
scope
.
FindVar
(
Input
(
"X"
))
->
Get
<
framework
::
LoDTensor
>
().
dims
();
auto
out_dims
=
Unsqueeze2OpInferShape
::
GetOutputShape
(
axes
,
x_dims
);
framework
::
AttributeMap
attrs
;
attrs
[
"shape"
]
=
framework
::
vectorize2int
(
out_dims
);
// Invoke Reshape op.
auto
reshape_op
=
framework
::
OpRegistry
::
CreateOp
(
"reshape2"
,
{{
"X"
,
{
Input
(
"X"
)}},
{
"Shape"
,
{}}},
{{
"Out"
,
{
Output
(
"Out"
)}},
{
"XShape"
,
{
Output
(
"XShape"
)}}},
attrs
);
reshape_op
->
Run
(
scope
,
place
);
}
};
class
Unsqueeze2GradOpMaker
:
public
framework
::
SingleGradOpDescMaker
{
public:
using
framework
::
SingleGradOpDescMaker
::
SingleGradOpDescMaker
;
std
::
unique_ptr
<
framework
::
OpDesc
>
Apply
()
const
override
{
auto
*
grad_op
=
new
framework
::
OpDesc
();
grad_op
->
SetType
(
"unsqueeze2_grad"
);
grad_op
->
SetInput
(
"XShape"
,
Output
(
"XShape"
));
grad_op
->
SetInput
(
framework
::
GradVarName
(
"Out"
),
OutputGrad
(
"Out"
));
grad_op
->
SetOutput
(
framework
::
GradVarName
(
"X"
),
InputGrad
(
"X"
));
grad_op
->
SetAttrMap
(
Attrs
());
return
std
::
unique_ptr
<
framework
::
OpDesc
>
(
grad_op
);
}
};
class
Unsqueeze2GradInferShape
:
public
framework
::
InferShapeBase
{
public:
void
operator
()(
framework
::
InferShapeContext
*
context
)
const
override
{
PADDLE_ENFORCE
(
context
->
HasInput
(
"XShape"
),
"Input(XShape) shouldn't be null."
);
PADDLE_ENFORCE
(
context
->
HasInput
(
framework
::
GradVarName
(
"Out"
)),
"Input(Out@GRAD) shouldn't be null."
);
auto
xshape_dims
=
context
->
GetInputDim
(
"XShape"
);
auto
x_dims
=
framework
::
slice_ddim
(
xshape_dims
,
1
,
xshape_dims
.
size
());
context
->
SetOutputDim
(
framework
::
GradVarName
(
"X"
),
x_dims
);
context
->
ShareLoD
(
"XShape"
,
framework
::
GradVarName
(
"X"
));
}
};
class
Unsqueeze2GradOp
:
public
framework
::
OperatorBase
{
public:
using
OperatorBase
::
OperatorBase
;
private:
void
RunImpl
(
const
framework
::
Scope
&
scope
,
const
platform
::
Place
&
place
)
const
override
{
auto
dx_name
=
Output
(
framework
::
GradVarName
(
"X"
));
auto
dout_name
=
Input
(
framework
::
GradVarName
(
"Out"
));
auto
xshape_name
=
Input
(
"XShape"
);
auto
xshape_dims
=
scope
.
FindVar
(
xshape_name
)
->
Get
<
framework
::
LoDTensor
>
().
dims
();
auto
x_dims
=
framework
::
slice_ddim
(
xshape_dims
,
1
,
xshape_dims
.
size
());
framework
::
AttributeMap
attrs
;
attrs
[
"shape"
]
=
framework
::
vectorize2int
(
x_dims
);
auto
reshape_op
=
framework
::
OpRegistry
::
CreateOp
(
"reshape2"
,
{{
"X"
,
{
dout_name
}},
{
"Shape"
,
{}}},
{{
"Out"
,
{
dx_name
}},
{
"XShape"
,
{
xshape_name
}}},
attrs
);
reshape_op
->
Run
(
scope
,
place
);
}
};
}
// namespace operators
}
// namespace paddle
...
...
@@ -180,3 +286,8 @@ REGISTER_OPERATOR(unsqueeze, ops::UnsqueezeOp, ops::UnsqueezeOpMaker,
paddle
::
framework
::
DefaultGradOpDescMaker
<
true
>
);
REGISTER_OPERATOR
(
unsqueeze_grad
,
ops
::
UnsqueezeGradOp
,
ops
::
UnsqueezeGradInferShape
);
REGISTER_OPERATOR
(
unsqueeze2
,
ops
::
Unsqueeze2Op
,
ops
::
Unsqueeze2OpMaker
,
ops
::
Unsqueeze2OpInferShape
,
ops
::
Unsqueeze2GradOpMaker
);
REGISTER_OPERATOR
(
unsqueeze2_grad
,
ops
::
Unsqueeze2GradOp
,
ops
::
Unsqueeze2GradInferShape
);
paddle/scripts/paddle_build.sh
浏览文件 @
40dbd97f
...
...
@@ -115,6 +115,7 @@ function cmake_gen() {
-DWITH_FLUID_ONLY=
${
WITH_FLUID_ONLY
:-
OFF
}
-DCMAKE_EXPORT_COMPILE_COMMANDS=ON
-DWITH_CONTRIB=
${
WITH_CONTRIB
:-
ON
}
-DWITH_INFERENCE=
${
WITH_INFERENCE
:-
ON
}
-DWITH_ANAKIN=
${
WITH_ANAKIN
:-
OFF
}
-DPY_VERSION=
${
PY_VERSION
:-
2
.7
}
========================================
...
...
@@ -144,6 +145,7 @@ EOF
-DWITH_FLUID_ONLY
=
${
WITH_FLUID_ONLY
:-
OFF
}
\
-DCMAKE_EXPORT_COMPILE_COMMANDS
=
ON
\
-DWITH_CONTRIB
=
${
WITH_CONTRIB
:-
ON
}
\
-DWITH_INFERENCE
=
${
WITH_INFERENCE
:-
ON
}
\
-DWITH_ANAKIN
=
${
WITH_ANAKIN
:-
OFF
}
\
-DPY_VERSION
=
${
PY_VERSION
:-
2
.7
}
}
...
...
python/paddle/fluid/layers/nn.py
浏览文件 @
40dbd97f
...
...
@@ -4025,10 +4025,12 @@ def transpose(x, perm, name=None):
helper
=
LayerHelper
(
'transpose'
,
**
locals
())
out
=
helper
.
create_tmp_variable
(
x
.
dtype
)
x_shape
=
helper
.
create_tmp_variable
(
x
.
dtype
)
helper
.
append_op
(
type
=
'transpose'
,
type
=
'transpose
2
'
,
inputs
=
{
'X'
:
[
x
]},
outputs
=
{
'Out'
:
[
out
]},
outputs
=
{
'Out'
:
[
out
],
'XShape'
:
[
x_shape
]},
attrs
=
{
'axis'
:
perm
})
return
out
...
...
@@ -4520,13 +4522,15 @@ def reshape(x, shape, actual_shape=None, act=None, inplace=True, name=None):
"Each dimension size given in shape must not be negtive "
"except one unknown dimension."
)
helper
=
LayerHelper
(
"reshape"
,
**
locals
())
helper
=
LayerHelper
(
"reshape
2
"
,
**
locals
())
out
=
helper
.
create_tmp_variable
(
dtype
=
x
.
dtype
)
x_shape
=
helper
.
create_tmp_variable
(
dtype
=
x
.
dtype
)
helper
.
append_op
(
type
=
"reshape"
,
type
=
"reshape
2
"
,
inputs
=
inputs
,
attrs
=
{
"shape"
:
shape
},
outputs
=
{
"Out"
:
out
})
outputs
=
{
"Out"
:
out
,
"XShape"
:
x_shape
})
return
helper
.
append_activation
(
out
)
...
...
@@ -4570,11 +4574,13 @@ def squeeze(input, axes, name=None):
"""
helper
=
LayerHelper
(
"squeeze"
,
**
locals
())
out
=
helper
.
create_tmp_variable
(
dtype
=
input
.
dtype
)
x_shape
=
helper
.
create_tmp_variable
(
dtype
=
input
.
dtype
)
helper
.
append_op
(
type
=
"squeeze"
,
type
=
"squeeze
2
"
,
inputs
=
{
"X"
:
input
},
attrs
=
{
"axes"
:
axes
},
outputs
=
{
"Out"
:
out
})
outputs
=
{
"Out"
:
out
,
"XShape"
:
x_shape
})
return
out
...
...
@@ -4605,11 +4611,13 @@ def unsqueeze(input, axes, name=None):
"""
helper
=
LayerHelper
(
"unsqueeze"
,
**
locals
())
out
=
helper
.
create_tmp_variable
(
dtype
=
input
.
dtype
)
x_shape
=
helper
.
create_tmp_variable
(
dtype
=
input
.
dtype
)
helper
.
append_op
(
type
=
"unsqueeze"
,
type
=
"unsqueeze
2
"
,
inputs
=
{
"X"
:
input
},
attrs
=
{
"axes"
:
axes
},
outputs
=
{
"Out"
:
out
})
outputs
=
{
"Out"
:
out
,
"XShape"
:
x_shape
})
return
out
...
...
@@ -5811,10 +5819,12 @@ def flatten(x, axis=1, name=None):
raise
ValueError
(
"The axis should be a int, and in range [0, rank(x)]"
)
out
=
helper
.
create_tmp_variable
(
x
.
dtype
)
x_shape
=
helper
.
create_tmp_variable
(
x
.
dtype
)
helper
.
append_op
(
type
=
'flatten'
,
type
=
'flatten
2
'
,
inputs
=
{
"X"
:
x
},
outputs
=
{
'Out'
:
out
},
outputs
=
{
'Out'
:
out
,
'XShape'
:
x_shape
},
attrs
=
{
"axis"
:
axis
})
return
out
...
...
python/paddle/fluid/tests/unittests/dist_transformer.py
浏览文件 @
40dbd97f
...
...
@@ -36,6 +36,7 @@ import paddle.fluid as fluid
import
paddle.fluid.layers
as
layers
from
paddle.fluid
import
core
from
test_dist_base
import
TestDistRunnerBase
,
runtime_main
import
paddle.compat
as
cpt
from
paddle.compat
import
long_type
import
hashlib
...
...
@@ -315,7 +316,8 @@ def pad_batch_data(insts,
"""
return_list
=
[]
max_len
=
max
(
len
(
inst
)
for
inst
in
insts
)
num_token
=
reduce
(
lambda
x
,
y
:
x
+
y
,
num_token
=
six
.
moves
.
reduce
(
lambda
x
,
y
:
x
+
y
,
[
len
(
inst
)
for
inst
in
insts
])
if
return_num_token
else
0
# Any token included in dict can be used to pad, since the paddings' loss
# will be masked out by weights and make no effect on parameter gradients.
...
...
@@ -328,7 +330,7 @@ def pad_batch_data(insts,
return_list
+=
[
inst_weight
.
astype
(
"float32"
).
reshape
([
-
1
,
1
])]
else
:
# position data
inst_pos
=
np
.
array
([
range
(
1
,
len
(
inst
)
+
1
)
+
[
0
]
*
(
max_len
-
len
(
inst
))
list
(
range
(
1
,
len
(
inst
)
+
1
)
)
+
[
0
]
*
(
max_len
-
len
(
inst
))
for
inst
in
insts
])
return_list
+=
[
inst_pos
.
astype
(
"int64"
).
reshape
([
-
1
,
1
])]
...
...
@@ -385,10 +387,11 @@ def prepare_batch_input(insts, data_input_names, src_pad_idx, trg_pad_idx,
return_num_token
=
True
)
data_input_dict
=
dict
(
list
(
zip
(
data_input_names
,
[
src_word
,
src_pos
,
src_slf_attn_bias
,
trg_word
,
trg_pos
,
trg_slf_attn_bias
,
trg_src_attn_bias
,
lbl_word
,
lbl_weight
]
))
])
))
return
data_input_dict
,
np
.
asarray
([
num_token
],
dtype
=
"float32"
)
...
...
@@ -561,7 +564,7 @@ def train_loop(exe, train_progm, dev_count, sum_cost, avg_cost, lr_scheduler,
np
.
log
(
TrainTaskConfig
.
label_smooth_eps
/
(
ModelHyperParams
.
trg_vocab_size
-
1
)
+
1e-20
))
init
=
False
for
pass_id
in
xrange
(
TrainTaskConfig
.
pass_num
):
for
pass_id
in
six
.
moves
.
xrange
(
TrainTaskConfig
.
pass_num
):
pass_start_time
=
time
.
time
()
for
batch_id
,
data
in
enumerate
(
train_data
()):
if
batch_id
>=
5
:
...
...
@@ -587,11 +590,11 @@ def train_loop(exe, train_progm, dev_count, sum_cost, avg_cost, lr_scheduler,
ModelHyperParams
.
eos_idx
,
ModelHyperParams
.
n_head
,
ModelHyperParams
.
d_model
)
total_num_token
+=
num_token
feed_kv_pairs
=
data_input_dict
.
items
(
)
feed_kv_pairs
=
list
(
data_input_dict
.
items
()
)
if
TrainTaskConfig
.
local
:
feed_kv_pairs
+=
{
feed_kv_pairs
+=
list
(
{
lr_scheduler
.
learning_rate
.
name
:
lr_rate
}.
items
()
}.
items
()
)
feed_list
.
append
(
dict
(
feed_kv_pairs
))
if
not
init
:
...
...
@@ -873,6 +876,7 @@ class DataReader(object):
f
=
tarfile
.
open
(
fpaths
[
0
],
"r"
)
for
line
in
f
.
extractfile
(
tar_fname
):
line
=
cpt
.
to_text
(
line
)
fields
=
line
.
strip
(
"
\n
"
).
split
(
self
.
_field_delimiter
)
if
(
not
self
.
_only_src
and
len
(
fields
)
==
2
)
or
(
self
.
_only_src
and
len
(
fields
)
==
1
):
...
...
@@ -882,8 +886,9 @@ class DataReader(object):
if
not
os
.
path
.
isfile
(
fpath
):
raise
IOError
(
"Invalid file: %s"
%
fpath
)
with
open
(
fpath
,
"r"
)
as
f
:
with
open
(
fpath
,
"r
b
"
)
as
f
:
for
line
in
f
:
line
=
cpt
.
to_text
(
line
)
fields
=
line
.
strip
(
"
\n
"
).
split
(
self
.
_field_delimiter
)
if
(
not
self
.
_only_src
and
len
(
fields
)
==
2
)
or
(
self
.
_only_src
and
len
(
fields
)
==
1
):
...
...
@@ -892,8 +897,9 @@ class DataReader(object):
@
staticmethod
def
load_dict
(
dict_path
,
reverse
=
False
):
word_dict
=
{}
with
open
(
dict_path
,
"r"
)
as
fdict
:
with
open
(
dict_path
,
"r
b
"
)
as
fdict
:
for
idx
,
line
in
enumerate
(
fdict
):
line
=
cpt
.
to_text
(
line
)
if
reverse
:
word_dict
[
idx
]
=
line
.
strip
(
"
\n
"
)
else
:
...
...
@@ -1034,7 +1040,7 @@ def multi_head_attention(queries,
# size of the input as the output dimension size.
return
layers
.
reshape
(
x
=
trans_x
,
shape
=
map
(
int
,
[
0
,
0
,
trans_x
.
shape
[
2
]
*
trans_x
.
shape
[
3
]]
))
shape
=
list
(
map
(
int
,
[
0
,
0
,
trans_x
.
shape
[
2
]
*
trans_x
.
shape
[
3
]])
))
def
scaled_dot_product_attention
(
q
,
k
,
v
,
attn_bias
,
d_model
,
dropout_rate
):
"""
...
...
python/paddle/fluid/tests/unittests/op_test.py
浏览文件 @
40dbd97f
...
...
@@ -249,7 +249,7 @@ class OpTest(unittest.TestCase):
outs
,
_
=
self
.
_calc_output
(
place
)
return
outs
def
_calc_output
(
self
,
place
,
parallel
=
False
):
def
_calc_output
(
self
,
place
,
parallel
=
False
,
no_check_set
=
None
):
program
=
Program
()
block
=
program
.
global_block
()
...
...
@@ -273,6 +273,8 @@ class OpTest(unittest.TestCase):
# if not, fill the fetch_list by the user configured outputs in test.
if
len
(
fetch_list
)
==
0
:
for
var_name
,
var
in
six
.
iteritems
(
outputs
):
if
no_check_set
is
not
None
and
var_name
in
no_check_set
:
continue
if
isinstance
(
var
,
list
):
for
v
in
var
:
fetch_list
.
append
(
v
)
...
...
@@ -291,11 +293,17 @@ class OpTest(unittest.TestCase):
return_numpy
=
False
)
return
outs
,
fetch_list
def
check_output_with_place
(
self
,
place
,
atol
,
equal_nan
=
False
):
outs
,
fetch_list
=
self
.
_calc_output
(
place
)
def
check_output_with_place
(
self
,
place
,
atol
,
no_check_set
=
None
,
equal_nan
=
False
):
outs
,
fetch_list
=
self
.
_calc_output
(
place
,
no_check_set
=
no_check_set
)
for
out_name
,
out_dup
in
Operator
.
get_op_outputs
(
self
.
op_type
):
if
out_name
not
in
self
.
outputs
:
continue
if
no_check_set
is
not
None
and
out_name
in
no_check_set
:
continue
def
find_actual
(
target_name
,
fetch_list
):
found
=
[
...
...
@@ -360,10 +368,10 @@ class OpTest(unittest.TestCase):
places
.
append
(
core
.
CUDAPlace
(
0
))
return
places
def
check_output
(
self
,
atol
=
1e-5
,
equal_nan
=
False
):
def
check_output
(
self
,
atol
=
1e-5
,
no_check_set
=
None
,
equal_nan
=
False
):
places
=
self
.
_get_places
()
for
place
in
places
:
self
.
check_output_with_place
(
place
,
atol
,
equal_nan
)
self
.
check_output_with_place
(
place
,
atol
,
no_check_set
,
equal_nan
)
def
check_output_customized
(
self
,
checker
):
places
=
self
.
_get_places
()
...
...
python/paddle/fluid/tests/unittests/test_flatten_op.py
浏览文件 @
40dbd97f
...
...
@@ -22,14 +22,17 @@ from op_test import OpTest
class
TestFlattenOp
(
OpTest
):
def
setUp
(
self
):
self
.
op_type
=
"flatten"
self
.
op_type
=
"flatten
2
"
self
.
init_test_case
()
self
.
inputs
=
{
"X"
:
np
.
random
.
random
(
self
.
in_shape
).
astype
(
"float32"
)}
self
.
init_attrs
()
self
.
outputs
=
{
"Out"
:
self
.
inputs
[
"X"
].
reshape
(
self
.
new_shape
)}
self
.
outputs
=
{
"Out"
:
self
.
inputs
[
"X"
].
reshape
(
self
.
new_shape
),
"XShape"
:
np
.
random
.
random
(
self
.
in_shape
).
astype
(
"float32"
)
}
def
test_check_output
(
self
):
self
.
check_output
()
self
.
check_output
(
no_check_set
=
[
"XShape"
]
)
def
test_check_grad
(
self
):
self
.
check_grad
([
"X"
],
"Out"
)
...
...
python/paddle/fluid/tests/unittests/test_prelu_op.py
浏览文件 @
40dbd97f
...
...
@@ -16,6 +16,7 @@ from __future__ import print_function
import
unittest
import
numpy
as
np
import
six
from
op_test
import
OpTest
...
...
@@ -62,17 +63,20 @@ class PReluTest(OpTest):
# TODO(minqiyang): Resume these test cases after fixing Python3 CI job issues
# class TestCase1(PReluTest):
# def initTestCase(self):
# self.attrs = {'mode': "all"}
if
six
.
PY2
:
# class TestCase2(PReluTest):
# def initTestCase(self):
# self.attrs = {'mode': "channel"}
class
TestCase1
(
PReluTest
):
def
initTestCase
(
self
):
self
.
attrs
=
{
'mode'
:
"all"
}
class
TestCase2
(
PReluTest
):
def
initTestCase
(
self
):
self
.
attrs
=
{
'mode'
:
"channel"
}
class
TestCase3
(
PReluTest
):
def
initTestCase
(
self
):
self
.
attrs
=
{
'mode'
:
"element"
}
# class TestCase3(PReluTest):
# def initTestCase(self):
# self.attrs = {'mode': "element"}
if
__name__
==
"__main__"
:
unittest
.
main
()
python/paddle/fluid/tests/unittests/test_reshape_op.py
浏览文件 @
40dbd97f
...
...
@@ -22,106 +22,39 @@ from op_test import OpTest
class
TestReshapeOp
(
OpTest
):
def
setUp
(
self
):
ori_shape
=
(
2
,
25
)
new_shape
=
(
5
,
10
)
self
.
op_type
=
"reshape"
self
.
inputs
=
{
"X"
:
np
.
random
.
random
(
ori_shape
).
astype
(
"float32"
)}
self
.
attrs
=
{
"shape"
:
new_shape
}
self
.
outputs
=
{
"Out"
:
self
.
inputs
[
"X"
].
reshape
(
new_shape
)}
def
test_check_output
(
self
):
self
.
check_output
()
def
test_check_grad
(
self
):
self
.
check_grad
([
"X"
],
"Out"
)
class
TestReshapeOpDimInfer1
(
OpTest
):
def
setUp
(
self
):
ori_shape
=
(
5
,
10
)
new_shape
=
(
5
,
-
1
,
5
)
self
.
op_type
=
"reshape"
self
.
inputs
=
{
"X"
:
np
.
random
.
random
(
ori_shape
).
astype
(
"float32"
)}
self
.
attrs
=
{
"shape"
:
new_shape
}
self
.
outputs
=
{
"Out"
:
self
.
inputs
[
"X"
].
reshape
(
self
.
attrs
[
"shape"
])}
def
test_check_output
(
self
):
self
.
check_output
()
def
test_check_grad
(
self
):
self
.
check_grad
([
"X"
],
"Out"
)
class
TestReshapeOpDimInfer2
(
OpTest
):
def
setUp
(
self
):
ori_shape
=
(
2
,
2
,
6
)
new_shape
=
(
2
,
0
,
3
,
-
1
)
infered_shape
=
(
2
,
2
,
3
,
-
1
)
self
.
op_type
=
"reshape"
self
.
inputs
=
{
"X"
:
np
.
random
.
random
(
ori_shape
).
astype
(
"float32"
)}
self
.
attrs
=
{
"shape"
:
new_shape
}
self
.
outputs
=
{
"Out"
:
self
.
inputs
[
"X"
].
reshape
(
infered_shape
)}
def
test_check_output
(
self
):
self
.
check_output
()
def
test_check_grad
(
self
):
self
.
check_grad
([
"X"
],
"Out"
)
class
TestReshapeOpInplace
(
OpTest
):
def
setUp
(
self
):
ori_shape
=
(
2
,
25
)
new_shape
=
(
5
,
10
)
self
.
op_type
=
"reshape"
self
.
inputs
=
{
"X"
:
np
.
random
.
random
(
ori_shape
).
astype
(
"float32"
)}
self
.
attrs
=
{
"shape"
:
new_shape
}
self
.
outputs
=
{
"Out"
:
self
.
inputs
[
"X"
].
reshape
(
new_shape
)}
def
test_check_output
(
self
):
self
.
check_output
()
def
test_check_grad
(
self
):
self
.
check_grad
([
"X"
],
"Out"
)
class
TestReshapeOpDimInferInplace1
(
OpTest
):
def
setUp
(
self
):
ori_shape
=
(
5
,
10
)
new_shape
=
(
5
,
-
1
,
5
)
self
.
init_data
()
self
.
op_type
=
"reshape2"
self
.
inputs
=
{
"X"
:
np
.
random
.
random
(
self
.
ori_shape
).
astype
(
"float32"
)}
self
.
attrs
=
{
"shape"
:
self
.
new_shape
}
self
.
outputs
=
{
"Out"
:
self
.
inputs
[
"X"
].
reshape
(
self
.
infered_shape
),
'XShape'
:
np
.
random
.
random
(
self
.
ori_shape
).
astype
(
"float32"
)
}
self
.
op_type
=
"reshape"
self
.
inputs
=
{
"X"
:
np
.
random
.
random
(
ori_shape
).
astype
(
"float32"
)}
self
.
attrs
=
{
"shape"
:
new_shape
}
self
.
outputs
=
{
"Out"
:
self
.
inputs
[
"X"
].
reshape
(
new_shape
)}
def
init_data
(
self
):
self
.
ori_shape
=
(
2
,
25
)
self
.
new_shape
=
(
5
,
10
)
self
.
infered_shape
=
(
5
,
10
)
def
test_check_output
(
self
):
self
.
check_output
()
self
.
check_output
(
no_check_set
=
[
'XShape'
]
)
def
test_check_grad
(
self
):
self
.
check_grad
([
"X"
],
"Out"
)
class
TestReshapeOpDimInferInplace2
(
OpTest
):
def
setUp
(
self
):
ori_shape
=
(
2
,
2
,
6
)
new_shape
=
(
2
,
0
,
3
,
-
1
)
infered_shape
=
(
2
,
2
,
3
,
-
1
)
self
.
op_type
=
"reshape"
self
.
inputs
=
{
"X"
:
np
.
random
.
random
(
ori_shape
).
astype
(
"float32"
)}
self
.
attrs
=
{
"shape"
:
new_shape
}
self
.
outputs
=
{
"Out"
:
self
.
inputs
[
"X"
].
reshape
(
infered_shape
)}
class
TestReshapeOpDimInfer1
(
TestReshapeOp
):
def
init_data
(
self
):
self
.
ori_shape
=
(
5
,
10
)
self
.
new_shape
=
(
5
,
-
1
,
5
)
self
.
infered_shape
=
(
5
,
-
1
,
5
)
def
test_check_output
(
self
):
self
.
check_output
()
def
test_check_grad
(
self
):
self
.
check_grad
([
"X"
],
"Out"
)
class
TestReshapeOpDimInfer2
(
TestReshapeOp
):
def
init_data
(
self
):
self
.
ori_shape
=
(
2
,
2
,
6
)
self
.
new_shape
=
(
2
,
0
,
3
,
-
1
)
self
.
infered_shape
=
(
2
,
2
,
3
,
-
1
)
class
TestReshapeOpWithInputShape
(
OpTest
):
...
...
@@ -130,20 +63,23 @@ class TestReshapeOpWithInputShape(OpTest):
new_shape
=
(
0
,
-
1
,
5
)
actual_shape
=
(
2
,
3
,
5
)
self
.
op_type
=
"reshape"
self
.
op_type
=
"reshape
2
"
self
.
inputs
=
{
"X"
:
np
.
random
.
random
(
ori_shape
).
astype
(
"float32"
),
"Shape"
:
np
.
array
(
actual_shape
,
dtype
=
"int32"
)
}
self
.
attrs
=
{
"shape"
:
new_shape
}
self
.
outputs
=
{
"Out"
:
self
.
inputs
[
"X"
].
reshape
(
actual_shape
)}
self
.
outputs
=
{
"Out"
:
self
.
inputs
[
"X"
].
reshape
(
actual_shape
),
'XShape'
:
np
.
random
.
random
(
ori_shape
).
astype
(
"float32"
)
}
def
test_check_output
(
self
):
self
.
check_output
()
self
.
check_output
(
no_check_set
=
[
'XShape'
]
)
def
test_check_grad
(
self
):
self
.
check_grad
([
"X"
],
"Out"
)
self
.
check_grad
([
"X"
],
"Out"
,
sum_outputs
=
[
"Out"
]
)
if
__name__
==
"__main__"
:
...
...
python/paddle/fluid/tests/unittests/test_squeeze_op.py
浏览文件 @
40dbd97f
...
...
@@ -23,14 +23,17 @@ from op_test import OpTest
# Correct: General.
class
TestSqueezeOp
(
OpTest
):
def
setUp
(
self
):
self
.
op_type
=
"squeeze"
self
.
op_type
=
"squeeze
2
"
self
.
init_test_case
()
self
.
inputs
=
{
"X"
:
np
.
random
.
random
(
self
.
ori_shape
).
astype
(
"float32"
)}
self
.
init_attrs
()
self
.
outputs
=
{
"Out"
:
self
.
inputs
[
"X"
].
reshape
(
self
.
new_shape
)}
self
.
outputs
=
{
"Out"
:
self
.
inputs
[
"X"
].
reshape
(
self
.
new_shape
),
"XShape"
:
np
.
random
.
random
(
self
.
ori_shape
).
astype
(
"float32"
)
}
def
test_check_output
(
self
):
self
.
check_output
()
self
.
check_output
(
no_check_set
=
[
'XShape'
]
)
def
test_check_grad
(
self
):
self
.
check_grad
([
"X"
],
"Out"
)
...
...
python/paddle/fluid/tests/unittests/test_transpose_op.py
浏览文件 @
40dbd97f
...
...
@@ -22,16 +22,19 @@ from op_test import OpTest
class
TestTransposeOp
(
OpTest
):
def
setUp
(
self
):
self
.
initTestCase
()
self
.
op_type
=
"transpose"
self
.
op_type
=
"transpose
2
"
self
.
inputs
=
{
'X'
:
np
.
random
.
random
(
self
.
shape
).
astype
(
"float32"
)}
self
.
attrs
=
{
'axis'
:
list
(
self
.
axis
)}
self
.
outputs
=
{
'Out'
:
self
.
inputs
[
'X'
].
transpose
(
self
.
axis
)}
self
.
outputs
=
{
'XShape'
:
np
.
random
.
random
(
self
.
shape
).
astype
(
"float32"
),
'Out'
:
self
.
inputs
[
'X'
].
transpose
(
self
.
axis
)
}
def
test_check_output
(
self
):
self
.
check_output
()
self
.
check_output
(
no_check_set
=
[
'XShape'
]
)
def
test_check_grad
(
self
):
self
.
check_grad
([
'X'
],
'Out'
)
self
.
check_grad
([
'X'
],
'Out'
,
sum_outputs
=
[
'Out'
]
)
def
initTestCase
(
self
):
self
.
shape
=
(
3
,
4
)
...
...
python/paddle/fluid/tests/unittests/test_unsqueeze_op.py
浏览文件 @
40dbd97f
...
...
@@ -24,13 +24,16 @@ from op_test import OpTest
class
TestUnsqueezeOp
(
OpTest
):
def
setUp
(
self
):
self
.
init_test_case
()
self
.
op_type
=
"unsqueeze"
self
.
op_type
=
"unsqueeze
2
"
self
.
inputs
=
{
"X"
:
np
.
random
.
random
(
self
.
ori_shape
).
astype
(
"float32"
)}
self
.
init_attrs
()
self
.
outputs
=
{
"Out"
:
self
.
inputs
[
"X"
].
reshape
(
self
.
new_shape
)}
self
.
outputs
=
{
"Out"
:
self
.
inputs
[
"X"
].
reshape
(
self
.
new_shape
),
"XShape"
:
np
.
random
.
random
(
self
.
ori_shape
).
astype
(
"float32"
)
}
def
test_check_output
(
self
):
self
.
check_output
()
self
.
check_output
(
no_check_set
=
[
"XShape"
]
)
def
test_check_grad
(
self
):
self
.
check_grad
([
"X"
],
"Out"
)
...
...
python/paddle/fluid/transpiler/details/program_utils.py
浏览文件 @
40dbd97f
...
...
@@ -153,7 +153,7 @@ def block_to_code(block, block_idx):
indent
+=
1
# sort all vars
all_vars
=
sorted
(
block
.
vars
.
iteritems
(
),
key
=
lambda
x
:
x
[
0
])
all_vars
=
sorted
(
six
.
iteritems
(
block
.
vars
),
key
=
lambda
x
:
x
[
0
])
for
var
in
all_vars
:
print
(
"{}{}"
.
format
(
get_indent_space
(
indent
),
variable_to_code
(
var
[
1
])))
...
...
python/paddle/fluid/transpiler/distribute_transpiler.py
浏览文件 @
40dbd97f
...
...
@@ -300,7 +300,7 @@ class DistributeTranspiler(object):
input_deps
=
grad_name_to_send_dummy_out
.
values
()
program
.
global_block
().
append_op
(
type
=
"send_barrier"
,
inputs
=
{
"X"
:
input_deps
},
inputs
=
{
"X"
:
list
(
input_deps
)
},
outputs
=
{
"Out"
:
send_barrier_out
},
attrs
=
{
"endpoints"
:
pserver_endpoints
,
...
...
@@ -455,7 +455,7 @@ class DistributeTranspiler(object):
if
len
(
splited_var
)
<=
1
:
continue
# NOTE: if enable memory optimization, origin vars maybe removed.
if
startup_program
.
global_block
().
vars
.
has_key
(
varname
)
:
if
varname
in
startup_program
.
global_block
().
vars
:
orig_param
=
startup_program
.
global_block
().
vars
[
varname
]
else
:
origin_param_var
=
self
.
origin_program
.
global_block
().
vars
[
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录