Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
BaiXuePrincess
Paddle
提交
c407dfa3
P
Paddle
项目概览
BaiXuePrincess
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
c407dfa3
编写于
3月 20, 2019
作者:
N
nhzlx
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
cherry-pick from feature/anakin-engine: refine paddle-anakin to new interface. #16276
上级
a25331bc
变更
7
隐藏空白更改
内联
并排
Showing
7 changed file
with
56 addition
and
42 deletion
+56
-42
paddle/fluid/framework/ir/CMakeLists.txt
paddle/fluid/framework/ir/CMakeLists.txt
+1
-1
paddle/fluid/framework/ir/transpose_flatten_concat_fuse_pass.cc
.../fluid/framework/ir/transpose_flatten_concat_fuse_pass.cc
+6
-0
paddle/fluid/inference/anakin/convert/op_converter.h
paddle/fluid/inference/anakin/convert/op_converter.h
+4
-17
paddle/fluid/inference/anakin/engine.cc
paddle/fluid/inference/anakin/engine.cc
+1
-2
paddle/fluid/inference/anakin/engine.h
paddle/fluid/inference/anakin/engine.h
+7
-4
paddle/fluid/inference/api/paddle_pass_builder.cc
paddle/fluid/inference/api/paddle_pass_builder.cc
+1
-1
paddle/fluid/operators/anakin/anakin_engine_op.h
paddle/fluid/operators/anakin/anakin_engine_op.h
+36
-17
未找到文件。
paddle/fluid/framework/ir/CMakeLists.txt
浏览文件 @
c407dfa3
...
@@ -79,7 +79,7 @@ pass_library(anakin_fillconstant_elementwisemul_fuse inference)
...
@@ -79,7 +79,7 @@ pass_library(anakin_fillconstant_elementwisemul_fuse inference)
# be detected by our pass. The index here represents the number of structures in the
# be detected by our pass. The index here represents the number of structures in the
# pattern. We use index 3 ~ 6, because these quantities of structures are
# pattern. We use index 3 ~ 6, because these quantities of structures are
# common in the models.
# common in the models.
foreach
(
index RANGE
3
6
)
foreach
(
index RANGE
2
6
)
file
(
APPEND
${
pass_file
}
"USE_PASS(transpose_flatten
${
index
}
_concat_fuse_pass);
\n
"
)
file
(
APPEND
${
pass_file
}
"USE_PASS(transpose_flatten
${
index
}
_concat_fuse_pass);
\n
"
)
endforeach
()
endforeach
()
...
...
paddle/fluid/framework/ir/transpose_flatten_concat_fuse_pass.cc
浏览文件 @
c407dfa3
...
@@ -12,7 +12,9 @@
...
@@ -12,7 +12,9 @@
// See the License for the specific language governing permissions and
// See the License for the specific language governing permissions and
// limitations under the License.
// limitations under the License.
#include <memory>
#include <string>
#include <string>
#include <unordered_set>
#include <vector>
#include <vector>
#include "paddle/fluid/framework/ir/graph_viz_pass.h"
#include "paddle/fluid/framework/ir/graph_viz_pass.h"
...
@@ -123,6 +125,7 @@ std::unique_ptr<ir::Graph> TransposeFlattenConcatFusePass<times>::ApplyImpl(
...
@@ -123,6 +125,7 @@ std::unique_ptr<ir::Graph> TransposeFlattenConcatFusePass<times>::ApplyImpl(
}
}
template
class
TransposeFlattenConcatFusePass
<
1
>;
template
class
TransposeFlattenConcatFusePass
<
1
>;
template
class
TransposeFlattenConcatFusePass
<
2
>;
template
class
TransposeFlattenConcatFusePass
<
3
>;
template
class
TransposeFlattenConcatFusePass
<
3
>;
template
class
TransposeFlattenConcatFusePass
<
4
>;
template
class
TransposeFlattenConcatFusePass
<
4
>;
template
class
TransposeFlattenConcatFusePass
<
5
>;
template
class
TransposeFlattenConcatFusePass
<
5
>;
...
@@ -135,6 +138,9 @@ template class TransposeFlattenConcatFusePass<6>;
...
@@ -135,6 +138,9 @@ template class TransposeFlattenConcatFusePass<6>;
REGISTER_PASS
(
transpose_flatten_concat_fuse_pass
,
REGISTER_PASS
(
transpose_flatten_concat_fuse_pass
,
paddle
::
framework
::
ir
::
TransposeFlattenConcatFusePass
<
1
>
);
paddle
::
framework
::
ir
::
TransposeFlattenConcatFusePass
<
1
>
);
REGISTER_PASS
(
transpose_flatten2_concat_fuse_pass
,
paddle
::
framework
::
ir
::
TransposeFlattenConcatFusePass
<
2
>
);
REGISTER_PASS
(
transpose_flatten3_concat_fuse_pass
,
REGISTER_PASS
(
transpose_flatten3_concat_fuse_pass
,
paddle
::
framework
::
ir
::
TransposeFlattenConcatFusePass
<
3
>
);
paddle
::
framework
::
ir
::
TransposeFlattenConcatFusePass
<
3
>
);
...
...
paddle/fluid/inference/anakin/convert/op_converter.h
浏览文件 @
c407dfa3
...
@@ -117,27 +117,14 @@ class AnakinOpConverter {
...
@@ -117,27 +117,14 @@ class AnakinOpConverter {
}
}
temp_max_input_shape
[
input
]
=
input_shape
;
temp_max_input_shape
[
input
]
=
input_shape
;
engine
->
SetInputShape
(
input
,
input_shape
);
engine
->
SetInputShape
(
input
,
input_shape
);
// engine->Graph()->RegistVar(input);
// For share from data.
engine
->
Graph
()
->
RegistVar
(
input
);
// For share from data.
}
}
engine
->
SetMaxInputShape
(
temp_max_input_shape
);
engine
->
SetMaxInputShape
(
temp_max_input_shape
);
engine
->
Optimize
();
engine
->
Optimize
();
// For anakin share with fluid tensor.
engine
->
AllocTmpMem
();
engine
->
InitGraph
();
engine
->
InitGraph
();
/*
for(auto& input : inputs) {
platform::CUDAPlace gpu_place(engine->GetDevice());
auto input_var = scope->Var();
auto input_tensor = input_var->GetMutable<framework::LoDTensor>();
auto input_max_shape = temp_max_input_shape[input];
input_tensor->Resize(framework::make_ddim(input_max_shape));
auto input_data = input_tensor->mutable_data<float>(gpu_place);
auto* anakin_input = engine->Net()->get_in(input);
::anakin::saber::Tensor<::anakin::saber::NV> tmp_anakin_tensor(input_data,
::anakin::saber::NV(), 0, input_max_shape);
anakin_input->share_from(tmp_anakin_tensor);
}
*/
}
}
void
SetEngine
(
AnakinNvEngine
*
engine
)
{
engine_
=
engine
;
}
void
SetEngine
(
AnakinNvEngine
*
engine
)
{
engine_
=
engine
;
}
...
...
paddle/fluid/inference/anakin/engine.cc
浏览文件 @
c407dfa3
...
@@ -97,15 +97,14 @@ void AnakinEngine<TargetT, PrecisionType, RunType>::Execute(
...
@@ -97,15 +97,14 @@ void AnakinEngine<TargetT, PrecisionType, RunType>::Execute(
anakin_input = net_->get_in(input.first);
anakin_input = net_->get_in(input.first);
}
}
*/
*/
anakin_input
->
reshape
(
fluid_input_shape
);
anakin_input
->
reshape
(
fluid_input_shape
);
::
anakin
::
saber
::
Tensor
<
TargetT
>
tmp_anakin_tensor
(
data
,
TargetT
(),
0
,
::
anakin
::
saber
::
Tensor
<
TargetT
>
tmp_anakin_tensor
(
data
,
TargetT
(),
0
,
fluid_input_shape
);
fluid_input_shape
);
anakin_input
->
copy_from
(
tmp_anakin_tensor
);
anakin_input
->
copy_from
(
tmp_anakin_tensor
);
}
}
cudaDeviceSynchronize
();
net_
->
prediction
();
net_
->
prediction
();
cudaDeviceSynchronize
();
for
(
const
auto
&
output
:
outputs
)
{
for
(
const
auto
&
output
:
outputs
)
{
platform
::
CUDAPlace
gpu_place
(
device_
);
platform
::
CUDAPlace
gpu_place
(
device_
);
auto
*
tensor
=
output
.
second
;
auto
*
tensor
=
output
.
second
;
...
...
paddle/fluid/inference/anakin/engine.h
浏览文件 @
c407dfa3
...
@@ -84,17 +84,20 @@ class AnakinEngine {
...
@@ -84,17 +84,20 @@ class AnakinEngine {
int
GetMaxBatchSize
()
{
return
max_batch_size_
;
}
int
GetMaxBatchSize
()
{
return
max_batch_size_
;
}
void
Freeze
();
void
Freeze
();
void
Optimize
();
void
Optimize
();
void
AllocTmpMem
()
{
PADDLE_ENFORCE
(
net_
->
alloc_memory_first
(
*
graph_
),
"anakin alloc temp memory first failed"
);
}
void
Save
(
std
::
string
path
)
{
graph_
->
save
(
path
);
}
void
Save
(
std
::
string
path
)
{
graph_
->
save
(
path
);
}
bool
IsInit
()
{
return
initialized_
;
}
int
GetDevice
()
{
return
device_
;
}
int
GetDevice
()
{
return
device_
;
}
// void SaveSerializedData(std::string& data) { graph_->save_to_string(data);
// }
// void LoadSerializedData(const std::string& data) {
// graph_->load_from_string(data); }
void
Execute
(
const
std
::
map
<
std
::
string
,
framework
::
LoDTensor
*>
&
inputs
,
void
Execute
(
const
std
::
map
<
std
::
string
,
framework
::
LoDTensor
*>
&
inputs
,
const
std
::
map
<
std
::
string
,
framework
::
LoDTensor
*>
&
outputs
,
const
std
::
map
<
std
::
string
,
framework
::
LoDTensor
*>
&
outputs
,
cudaStream_t
stream
);
cudaStream_t
stream
);
private:
private:
bool
initialized_
{
false
};
int
max_batch_size_
;
int
max_batch_size_
;
std
::
map
<
std
::
string
,
std
::
vector
<
int
>>
max_input_shape_
;
std
::
map
<
std
::
string
,
std
::
vector
<
int
>>
max_input_shape_
;
int
device_
;
int
device_
;
...
...
paddle/fluid/inference/api/paddle_pass_builder.cc
浏览文件 @
c407dfa3
...
@@ -99,7 +99,7 @@ GpuPassStrategy::GpuPassStrategy() : PassStrategy({}) {
...
@@ -99,7 +99,7 @@ GpuPassStrategy::GpuPassStrategy() : PassStrategy({}) {
#endif
#endif
});
});
for
(
int
i
=
6
;
i
>=
3
;
i
--
)
{
for
(
int
i
=
6
;
i
>=
2
;
i
--
)
{
passes_
.
push_back
(
"transpose_flatten"
+
std
::
to_string
(
i
)
+
passes_
.
push_back
(
"transpose_flatten"
+
std
::
to_string
(
i
)
+
"_concat_fuse_pass"
);
"_concat_fuse_pass"
);
}
}
...
...
paddle/fluid/operators/anakin/anakin_engine_op.h
浏览文件 @
c407dfa3
...
@@ -97,23 +97,7 @@ class AnakinEngineOp : public framework::OperatorBase {
...
@@ -97,23 +97,7 @@ class AnakinEngineOp : public framework::OperatorBase {
if
(
param_names_
.
count
(
x
))
continue
;
if
(
param_names_
.
count
(
x
))
continue
;
auto
&
t
=
auto
&
t
=
inference
::
analysis
::
GetFromScope
<
framework
::
LoDTensor
>
(
scope
,
x
);
inference
::
analysis
::
GetFromScope
<
framework
::
LoDTensor
>
(
scope
,
x
);
/*
auto t_shape = framework::vectorize(t.dims());
auto *anakin_input = engine->Net()->get_in(x);
auto net_shape = anakin_input->shape();
size_t anakin_net_input_size = net_shape.count() * sizeof(float);
size_t fluid_input_size = t.memory_size();
if (fluid_input_size < anakin_net_input_size) {
framework::LoDTensor temp_t;
auto t_dims = t.dims();
temp_t.Resize(t_dims);
TensorCopySync(t, dev_place, &temp_t);
t.Resize(framework::make_ddim(net_shape));
t.mutable_data<float>(dev_place);
TensorCopySync(temp_t, dev_place, &t);
}
*/
inputs
.
insert
({
x
,
&
t
});
inputs
.
insert
({
x
,
&
t
});
}
}
...
@@ -136,6 +120,41 @@ class AnakinEngineOp : public framework::OperatorBase {
...
@@ -136,6 +120,41 @@ class AnakinEngineOp : public framework::OperatorBase {
inference
::
Singleton
<
inference
::
anakin
::
AnakinEngineManager
>::
Global
()
inference
::
Singleton
<
inference
::
anakin
::
AnakinEngineManager
>::
Global
()
.
Get
(
engine_key_
);
.
Get
(
engine_key_
);
}
}
// BUG here, detect that the tensor data pointer here will change sometime.
// Will fix it later.
/*
// For share with the tensor from fluid, We do the net init in the first net
precit.
if (!anakin_engine_->IsInit()) {
auto temp_max_input_shape = anakin_engine_->GetMaxInputShape();
anakin_engine_->AllocTmpMem();
for(auto& input : Inputs("Xs")) {
if (param_names_.count(input)) continue;
platform::CUDAPlace
gpu_place(boost::get<platform::CUDAPlace>(dev_place).device);
auto *input_var = scope.FindVar(input);
auto input_tensor = input_var->GetMutable<framework::LoDTensor>();
auto input_max_shape = temp_max_input_shape[input];
framework::LoDTensor temp_t;
auto t_dims = input_tensor->dims();
temp_t.Resize(t_dims);
TensorCopySync(*input_tensor, dev_place, &temp_t);
input_tensor->Resize(framework::make_ddim(input_max_shape));
input_tensor->mutable_data<float>(dev_place);
TensorCopySync(temp_t, dev_place, input_tensor);
auto* input_data = input_tensor->mutable_data<float>(gpu_place);
auto* anakin_input = anakin_engine_->Net()->get_in(input);
::anakin::saber::Tensor<::anakin::saber::NV>
tmp_anakin_tensor(input_data,
::anakin::saber::NV(), 0, input_max_shape);
anakin_input->share_from(tmp_anakin_tensor);
}
anakin_engine_->InitGraph();
}
*/
return
anakin_engine_
;
return
anakin_engine_
;
}
}
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录