Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
Paddle-Lite
提交
97b54fbe
P
Paddle-Lite
项目概览
PaddlePaddle
/
Paddle-Lite
通知
331
Star
4
Fork
1
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
271
列表
看板
标记
里程碑
合并请求
78
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle-Lite
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
271
Issue
271
列表
看板
标记
里程碑
合并请求
78
合并请求
78
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
97b54fbe
编写于
7月 10, 2020
作者:
Q
Qi Li
提交者:
GitHub
7月 10, 2020
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
[NPU] enhance cache offline model, test=develop (#3805)
* [NPU] enhance cache offline model, test=develop
上级
be7cc8f8
变更
17
展开全部
隐藏空白更改
内联
并排
Showing
17 changed file
with
793 addition
and
458 deletion
+793
-458
.gitignore
.gitignore
+3
-0
cmake/cross_compiling/android.cmake
cmake/cross_compiling/android.cmake
+5
-1
lite/api/cxx_api_impl.cc
lite/api/cxx_api_impl.cc
+4
-0
lite/backends/npu/device.cc
lite/backends/npu/device.cc
+103
-77
lite/backends/npu/device.h
lite/backends/npu/device.h
+10
-6
lite/core/mir/subgraph/subgraph_detector.cc
lite/core/mir/subgraph/subgraph_detector.cc
+41
-35
lite/kernels/npu/bridges/engine.cc
lite/kernels/npu/bridges/engine.cc
+74
-40
lite/kernels/npu/bridges/engine.h
lite/kernels/npu/bridges/engine.h
+12
-25
lite/kernels/npu/bridges/graph.h
lite/kernels/npu/bridges/graph.h
+1
-1
lite/kernels/npu/bridges/matmul_op.cc
lite/kernels/npu/bridges/matmul_op.cc
+4
-4
lite/kernels/npu/bridges/utility.h
lite/kernels/npu/bridges/utility.h
+16
-15
lite/kernels/npu/subgraph_compute.cc
lite/kernels/npu/subgraph_compute.cc
+297
-211
lite/kernels/npu/subgraph_compute.h
lite/kernels/npu/subgraph_compute.h
+51
-38
lite/utils/env.h
lite/utils/env.h
+2
-0
lite/utils/io.h
lite/utils/io.h
+35
-0
lite/utils/md5.h
lite/utils/md5.h
+104
-0
lite/utils/string.h
lite/utils/string.h
+31
-5
未找到文件。
.gitignore
浏览文件 @
97b54fbe
...
...
@@ -120,3 +120,6 @@ metal/MobileNetDemo/MobileNetDemo/Resources
lite/model_parser/flatbuffers/framework_generated.h
build*
# hiai libs
ai_ddk_lib*
cmake/cross_compiling/android.cmake
浏览文件 @
97b54fbe
...
...
@@ -35,7 +35,11 @@ endif()
if
(
NOT DEFINED ANDROID_API_LEVEL
)
set
(
ANDROID_API_LEVEL
"23"
)
if
(
ARM_TARGET_ARCH_ABI STREQUAL
"armv7"
)
set
(
ANDROID_API_LEVEL
"22"
)
if
(
LITE_WITH_NPU AND NOT LITE_ON_TINY_PUBLISH
)
set
(
ANDROID_API_LEVEL
"24"
)
# HIAI DDK depends on android-24
else
()
set
(
ANDROID_API_LEVEL
"22"
)
endif
()
endif
()
endif
()
...
...
lite/api/cxx_api_impl.cc
浏览文件 @
97b54fbe
...
...
@@ -73,6 +73,10 @@ void CxxPaddleApiImpl::Init(const lite_api::CxxConfig &config) {
}
mode_
=
config
.
power_mode
();
threads_
=
config
.
threads
();
#ifdef LITE_WITH_NPU
Context
<
TargetType
::
kNPU
>::
SetSubgraphModelCacheDir
(
config
.
subgraph_model_cache_dir
());
#endif
#if (defined LITE_WITH_X86) && (defined PADDLE_WITH_MKLML) && \
!(defined LITE_ON_MODEL_OPTIMIZE_TOOL)
int
num_threads
=
config
.
x86_math_library_num_threads
();
...
...
lite/backends/npu/device.cc
浏览文件 @
97b54fbe
...
...
@@ -20,96 +20,122 @@ namespace paddle {
namespace
lite
{
namespace
npu
{
bool
WriteToOMFile
(
const
domi
::
ModelBufferData
&
om_model_buff
,
std
::
string
om_file_path
)
{
FILE
*
fp
;
fp
=
fopen
(
om_file_path
.
c_str
(),
"wb"
);
CHECK
(
fp
!=
nullptr
)
<<
om_file_path
<<
" open failed!"
;
uint32_t
write_size
=
(
uint32_t
)
fwrite
(
om_model_buff
.
data
,
1
,
om_model_buff
.
length
,
fp
);
CHECK_EQ
(
write_size
,
om_model_buff
.
length
)
<<
"write om file failed !"
;
fclose
(
fp
);
return
true
;
}
bool
ReadFromOMFile
(
domi
::
ModelBufferData
*
om_model_buff
,
std
::
string
om_file_path
)
{
FILE
*
fp
;
fp
=
fopen
(
om_file_path
.
c_str
(),
"rb"
);
CHECK
(
fp
!=
nullptr
)
<<
om_file_path
<<
" open failed!"
;
fseek
(
fp
,
0
,
SEEK_END
);
uint32_t
model_length
=
(
uint32_t
)
ftell
(
fp
);
fseek
(
fp
,
0
,
SEEK_SET
);
om_model_buff
->
data
=
malloc
(
model_length
);
om_model_buff
->
length
=
model_length
;
uint32_t
read_size
=
(
uint32_t
)
fread
(
om_model_buff
->
data
,
1
,
model_length
,
fp
);
CHECK_EQ
(
read_size
,
model_length
)
<<
"read om file failed !"
;
fclose
(
fp
);
return
true
;
}
std
::
shared_ptr
<
hiai
::
AiModelMngerClient
>
Device
::
Build
(
const
std
::
string
model_name
,
// NOLINT
std
::
vector
<
ge
::
Operator
>&
input_nodes
,
// NOLINT
std
::
vector
<
ge
::
Operator
>&
output_nodes
,
// NOLINT
const
std
::
string
model_cache_full_dir
=
""
// NOLINT
)
{
VLOG
(
3
)
<<
"[NPU] Build model"
;
// Build the HiAI IR graph to the HiAI om model
ge
::
Graph
ir_graph
(
"graph"
);
ir_graph
.
SetInputs
(
input_nodes
).
SetOutputs
(
output_nodes
);
ge
::
Model
om_model
(
"model"
,
"model"
);
om_model
.
SetGraph
(
ir_graph
);
domi
::
HiaiIrBuild
ir_build
;
domi
::
ModelBufferData
om_model_buf
;
if
(
!
model_cache_full_dir
.
empty
()
&&
IsFileExists
(
model_cache_full_dir
))
{
VLOG
(
3
)
<<
"Will read om model from "
<<
model_cache_full_dir
;
ReadFromOMFile
(
&
om_model_buf
,
model_cache_full_dir
);
}
else
{
if
(
!
ir_build
.
CreateModelBuff
(
om_model
,
om_model_buf
))
{
LOG
(
WARNING
)
<<
"[NPU] CreateModelBuff failed!"
;
return
nullptr
;
}
if
(
!
ir_build
.
BuildIRModel
(
om_model
,
om_model_buf
))
{
LOG
(
WARNING
)
<<
"[NPU] BuildIRModel failed!"
;
ir_build
.
ReleaseModelBuff
(
om_model_buf
);
return
nullptr
;
}
if
(
!
model_cache_full_dir
.
empty
())
{
VLOG
(
3
)
<<
"Will write om model to "
<<
model_cache_full_dir
;
WriteToOMFile
(
om_model_buf
,
model_cache_full_dir
);
}
}
std
::
shared_ptr
<
hiai
::
AiModelMngerClient
>
Device
::
Load
(
const
std
::
string
&
model_name
,
std
::
vector
<
char
>*
model_buffer
,
bool
*
model_comp
)
{
// Create a HiAI model manager client to load the HiAI om model
std
::
shared_ptr
<
hiai
::
AiModelMngerClient
>
model_client
(
new
hiai
::
AiModelMngerClient
());
auto
model_client
=
std
::
make_shared
<
hiai
::
AiModelMngerClient
>
();
if
(
model_client
->
Init
(
nullptr
)
!=
hiai
::
AI_SUCCESS
)
{
LOG
(
WARNING
)
<<
"[NPU] AiModelMngerClient init failed)!"
;
ir_build
.
ReleaseModelBuff
(
om_model_buf
);
LOG
(
WARNING
)
<<
"[NPU] Init hiai model client failed!"
;
return
nullptr
;
}
// Check HiAI DDK version
const
char
*
ddk_version
=
model_client
->
GetVersion
();
if
(
ddk_version
)
{
LOG
(
INFO
)
<<
"[NPU] HiAI DDK version: "
<<
ddk_version
;
}
else
{
LOG
(
WARNING
)
<<
"[NPU] Unable to get HiAI DDK version!"
;
}
// Check model compatibility
auto
model_desc
=
std
::
make_shared
<
hiai
::
AiModelDescription
>
(
model_name
,
freq_level
(),
framework_type
(),
model_type
(),
device_type
());
model_desc
->
SetModelBuffer
(
om_model_buf
.
data
,
om_model_buf
.
length
);
std
::
vector
<
std
::
shared_ptr
<
hiai
::
AiModelDescription
>>
model_descs
;
model_descs
.
push_back
(
model_desc
);
model_desc
->
SetModelBuffer
(
reinterpret_cast
<
const
void
*>
(
model_buffer
->
data
()),
model_buffer
->
size
());
if
(
!*
model_comp
&&
model_client
->
CheckModelCompatibility
(
*
model_desc
,
*
model_comp
)
!=
hiai
::
AI_SUCCESS
)
{
*
model_comp
=
false
;
VLOG
(
3
)
<<
"[NPU] model is NOT compatiblitiable, setting model_comp to "
<<
*
model_comp
;
}
else
{
*
model_comp
=
true
;
VLOG
(
3
)
<<
"[NPU] model is compatiblitiable, setting model_comp to "
<<
*
model_comp
;
}
// Rebuild and write the data of the compatible model to the model buffer
if
(
!*
model_comp
)
{
std
::
shared_ptr
<
hiai
::
AiModelBuilder
>
model_builder
=
std
::
make_shared
<
hiai
::
AiModelBuilder
>
(
model_client
);
hiai
::
MemBuffer
*
org_model_buffer
=
model_builder
->
InputMemBufferCreate
(
reinterpret_cast
<
void
*>
(
model_buffer
->
data
()),
model_buffer
->
size
());
if
(
org_model_buffer
)
{
std
::
vector
<
hiai
::
MemBuffer
*>
org_model_buffers
;
org_model_buffers
.
push_back
(
org_model_buffer
);
hiai
::
MemBuffer
*
new_model_buffer
=
model_builder
->
OutputMemBufferCreate
(
framework_type
(),
org_model_buffers
);
// VLOG(3) << "[NPU] new model buffer memeory size is " <<
// new_model_buffer->GetMemBufferSize();
if
(
new_model_buffer
)
{
uint32_t
new_model_size
=
0
;
if
(
model_builder
->
BuildModel
(
org_model_buffers
,
new_model_buffer
,
new_model_size
)
==
hiai
::
AI_SUCCESS
)
{
// need to change to new_model_size as GetMemBufferSize is not
// correct.
model_buffer
->
resize
(
new_model_size
);
memcpy
(
reinterpret_cast
<
void
*>
(
model_buffer
->
data
()),
new_model_buffer
->
GetMemBufferData
(),
new_model_size
);
// Reset the model buffer
model_desc
->
SetModelBuffer
(
reinterpret_cast
<
const
void
*>
(
model_buffer
->
data
()),
model_buffer
->
size
());
VLOG
(
3
)
<<
"[NPU] Rebuild the compatible model done."
;
}
else
{
LOG
(
WARNING
)
<<
"[NPU] Rebuild the compatible model failed!"
;
}
model_builder
->
MemBufferDestroy
(
new_model_buffer
);
}
else
{
LOG
(
WARNING
)
<<
"[NPU] OutputMemBufferCreate failed!"
;
}
model_builder
->
MemBufferDestroy
(
org_model_buffer
);
}
else
{
LOG
(
WARNING
)
<<
"[NPU] InputMemBufferCreate failed!"
;
}
}
// Load the compatible model
std
::
vector
<
std
::
shared_ptr
<
hiai
::
AiModelDescription
>>
model_descs
{
model_desc
};
if
(
model_client
->
Load
(
model_descs
)
!=
hiai
::
AI_SUCCESS
)
{
LOG
(
WARNING
)
<<
"[NPU] AiModelMngerClient load model failed!"
;
ir_build
.
ReleaseModelBuff
(
om_model_buf
);
return
nullptr
;
}
ir_build
.
ReleaseModelBuff
(
om_model_buf
);
VLOG
(
3
)
<<
"[NPU] Build done"
;
VLOG
(
3
)
<<
"[NPU] Load model done."
;
return
model_client
;
}
bool
Device
::
Build
(
std
::
vector
<
ge
::
Operator
>&
input_nodes
,
// NOLINT
std
::
vector
<
ge
::
Operator
>&
output_nodes
,
// NOLINT
std
::
vector
<
char
>*
model_buffer
)
{
// Convert the HiAI IR graph to the HiAI om model
ge
::
Graph
ir_graph
(
"graph"
);
ir_graph
.
SetInputs
(
input_nodes
).
SetOutputs
(
output_nodes
);
ge
::
Model
om_model
(
"model"
,
"model"
);
om_model
.
SetGraph
(
ir_graph
);
// Build the HiAI om model, serialize and output it to the om buffer
domi
::
HiaiIrBuild
ir_build
;
domi
::
ModelBufferData
om_buffer
;
if
(
!
ir_build
.
CreateModelBuff
(
om_model
,
om_buffer
))
{
LOG
(
WARNING
)
<<
"[NPU] CreateModelBuff failed!"
;
return
false
;
}
if
(
!
ir_build
.
BuildIRModel
(
om_model
,
om_buffer
))
{
LOG
(
WARNING
)
<<
"[NPU] BuildIRModel failed!"
;
ir_build
.
ReleaseModelBuff
(
om_buffer
);
return
false
;
}
model_buffer
->
resize
(
om_buffer
.
length
);
memcpy
(
reinterpret_cast
<
void
*>
(
model_buffer
->
data
()),
reinterpret_cast
<
void
*>
(
om_buffer
.
data
),
om_buffer
.
length
);
ir_build
.
ReleaseModelBuff
(
om_buffer
);
VLOG
(
3
)
<<
"[NPU] Build model done."
;
return
true
;
}
}
// namespace npu
}
// namespace lite
}
// namespace paddle
lite/backends/npu/device.h
浏览文件 @
97b54fbe
...
...
@@ -38,14 +38,18 @@ class Device {
int
model_type
()
{
return
model_type_
;
}
int
device_type
()
{
return
device_type_
;
}
// Load the HiAI om model from buffer, rebuild the model if it's incompatible
// with the current device, then create a HiAI model manager client(from HiAI
// Server) to run inference
std
::
shared_ptr
<
hiai
::
AiModelMngerClient
>
Load
(
const
std
::
string
&
model_name
,
std
::
vector
<
char
>*
model_buffer
,
bool
*
model_comp
);
// Build the HiAI IR graph to om model, return HiAI model manager client to
// load om model and run inference.
std
::
shared_ptr
<
hiai
::
AiModelMngerClient
>
Build
(
const
std
::
string
model_name
,
// NOLINT
std
::
vector
<
ge
::
Operator
>&
input_nodes
,
// NOLINT
std
::
vector
<
ge
::
Operator
>&
output_nodes
,
// NOLINT
const
std
::
string
model_cache_name
// NOLINT
);
// NOLINT
bool
Build
(
std
::
vector
<
ge
::
Operator
>&
input_nodes
,
// NOLINT
std
::
vector
<
ge
::
Operator
>&
output_nodes
,
// NOLINT
std
::
vector
<
char
>*
model_buffer
);
private:
int
freq_level_
{
3
};
...
...
lite/core/mir/subgraph/subgraph_detector.cc
浏览文件 @
97b54fbe
...
...
@@ -425,42 +425,51 @@ void SubgraphFuser::InsertNewNode(SSAGraph *graph,
subgraph_op_desc
.
SetAttr
<
int32_t
>
(
"sub_block"
,
sub_block_idx
);
// Extract input and output nodes from the target subgraph
std
::
set
<
Node
*>
i
nput
_var_nodes
;
std
::
set
<
Node
*>
i
data
_var_nodes
;
std
::
set
<
Node
*>
weight_var_nodes
;
std
::
set
<
Node
*>
o
utput
_var_nodes
;
std
::
set
<
Node
*>
o
data
_var_nodes
;
std
::
set
<
Node
*>
local_var_nodes
;
std
::
set
<
Node
*>
unused_var_nodes
;
ExtractInputsOutputs
(
subgraph_nodes
,
&
i
nput
_var_nodes
,
&
i
data
_var_nodes
,
&
weight_var_nodes
,
&
o
utput
_var_nodes
,
&
o
data
_var_nodes
,
&
local_var_nodes
,
&
unused_var_nodes
);
// A simplified model without the original weight/local/unused nodes on the
// subgraph ops will be saved only if 'SUBGRAPH_DISABLE_ONLINE_MODE' is set to
// true and Predictor->Run(...), Predictor->Save(...) is called.
std
::
set
<
Node
*>
input_var_nodes
(
idata_var_nodes
.
begin
(),
idata_var_nodes
.
end
());
std
::
set
<
Node
*>
output_var_nodes
(
odata_var_nodes
.
begin
(),
odata_var_nodes
.
end
());
if
(
!
GetBoolFromEnv
(
SUBGRAPH_DISABLE_ONLINE_MODE
))
{
input_var_nodes
.
insert
(
weight_var_nodes
.
begin
(),
weight_var_nodes
.
end
());
output_var_nodes
.
insert
(
local_var_nodes
.
begin
(),
local_var_nodes
.
end
());
output_var_nodes
.
insert
(
unused_var_nodes
.
begin
(),
unused_var_nodes
.
end
());
}
// Set input and output name mapping which stores the real inputs and
// outputs
std
::
vector
<
std
::
string
>
i
nput
_var_names
;
std
::
vector
<
std
::
string
>
o
utput
_var_names
;
for
(
auto
&
var_node
:
i
nput
_var_nodes
)
{
i
nput
_var_names
.
push_back
(
var_node
->
AsArg
().
name
);
std
::
vector
<
std
::
string
>
i
data
_var_names
;
std
::
vector
<
std
::
string
>
o
data
_var_names
;
for
(
auto
&
var_node
:
i
data
_var_nodes
)
{
i
data
_var_names
.
push_back
(
var_node
->
AsArg
().
name
);
}
for
(
auto
&
var_node
:
o
utput
_var_nodes
)
{
o
utput
_var_names
.
push_back
(
var_node
->
AsArg
().
name
);
for
(
auto
&
var_node
:
o
data
_var_nodes
)
{
o
data
_var_names
.
push_back
(
var_node
->
AsArg
().
name
);
}
subgraph_op_desc
.
SetAttr
<
std
::
vector
<
std
::
string
>>
(
"input_data_names"
,
i
nput
_var_names
);
i
data
_var_names
);
subgraph_op_desc
.
SetAttr
<
std
::
vector
<
std
::
string
>>
(
"output_data_names"
,
output_var_names
);
odata_var_names
);
// Set all of the inputs and outputs to the target subgraph op
// To prevent vars are removed in RuntimeProgram::UpdateVarsOfProgram()
for
(
auto
&
var_node
:
weight_var_nodes
)
{
std
::
vector
<
std
::
string
>
input_var_names
;
std
::
vector
<
std
::
string
>
output_var_names
;
for
(
auto
&
var_node
:
input_var_nodes
)
{
input_var_names
.
push_back
(
var_node
->
AsArg
().
name
);
}
for
(
auto
&
var_node
:
local_var_nodes
)
{
output_var_names
.
push_back
(
var_node
->
AsArg
().
name
);
}
for
(
auto
&
var_node
:
unused_var_nodes
)
{
for
(
auto
&
var_node
:
output_var_nodes
)
{
output_var_names
.
push_back
(
var_node
->
AsArg
().
name
);
}
subgraph_op_desc
.
SetInput
(
"Inputs"
,
input_var_names
);
...
...
@@ -500,26 +509,13 @@ void SubgraphFuser::InsertNewNode(SSAGraph *graph,
for
(
auto
&
var_node
:
input_var_nodes
)
{
IR_NODE_LINK_TO
(
var_node
,
subgraph_op_node
);
}
for
(
auto
&
var_node
:
weight_var_nodes
)
{
IR_NODE_LINK_TO
(
var_node
,
subgraph_op_node
);
}
for
(
auto
&
var_node
:
output_var_nodes
)
{
IR_OP_VAR_LINK
(
subgraph_op_node
,
var_node
);
}
for
(
auto
&
var_node
:
local_var_nodes
)
{
IR_OP_VAR_LINK
(
subgraph_op_node
,
var_node
);
}
for
(
auto
&
var_node
:
unused_var_nodes
)
{
IR_OP_VAR_LINK
(
subgraph_op_node
,
var_node
);
}
// Remove subgraph nodes and unused var nodes
auto
nodes2rm
=
GetNodes2RM
(
subgraph_nodes
,
{
input_var_nodes
,
weight_var_nodes
,
output_var_nodes
,
local_var_nodes
,
unused_var_nodes
});
auto
nodes2rm
=
GetNodes2RM
(
subgraph_nodes
,
{
input_var_nodes
,
output_var_nodes
});
GraphSafeRemoveNodes
(
graph
,
nodes2rm
);
}
...
...
@@ -594,7 +590,17 @@ std::set<const Node *> GetNodes2RM(
std
::
set
<
const
Node
*>
nodes2rm
(
op_nodes
.
begin
(),
op_nodes
.
end
());
for
(
auto
&
op_node
:
op_nodes
)
{
for
(
auto
&
var_node
:
op_node
->
inlinks
)
{
if
(
!
nodes2rm
.
count
(
var_node
))
{
bool
skip
=
false
;
// skip the var node which is used by any other ops that doesn't belong to
// the subgraph ops.
for
(
auto
&
out_op_node
:
var_node
->
outlinks
)
{
if
(
std
::
find
(
op_nodes
.
begin
(),
op_nodes
.
end
(),
out_op_node
)
!=
op_nodes
.
end
())
{
skip
=
true
;
break
;
}
}
if
(
!
skip
&&
!
nodes2rm
.
count
(
var_node
))
{
nodes2rm
.
insert
(
var_node
);
}
}
...
...
lite/kernels/npu/bridges/engine.cc
浏览文件 @
97b54fbe
...
...
@@ -15,6 +15,7 @@
#include "lite/kernels/npu/bridges/engine.h"
#include <sys/time.h>
#include <time.h>
#include <algorithm>
#include <utility>
#include "lite/kernels/npu/bridges/registry.h"
...
...
@@ -22,11 +23,50 @@ namespace paddle {
namespace
lite
{
namespace
subgraph
{
int
Engine
::
BuildDeviceProgram
()
{
return
FAILED
;
}
Engine
::
Engine
(
KernelContext
*
ctx
,
int
block_idx
,
cpp
::
BlockDesc
*
block_desc
,
const
std
::
vector
<
std
::
string
>
&
input_names
,
const
std
::
vector
<
std
::
string
>
&
output_names
,
lite
::
Scope
*
scope
)
:
ctx_
(
ctx
),
block_idx_
(
block_idx
),
block_desc_
(
block_desc
),
scope_
(
scope
)
{
input_names_
=
input_names
;
output_names_
=
output_names
;
// Sort the name of input and output tensors, it's convenient for us to get
// the info of input and output tensors in the same order from the device
// program, because the result of subgraph division may be different but right
// at each call of the subgraph pass.
std
::
stable_sort
(
input_names_
.
begin
(),
input_names_
.
end
());
std
::
stable_sort
(
output_names_
.
begin
(),
output_names_
.
end
());
}
int
Engine
::
LaunchDeviceProgram
()
{
return
0
;
}
bool
Engine
::
Run
()
{
if
(
is_first_epoch_
)
{
PrepareWorkspaceForDeviceProgram
();
is_first_epoch_
=
false
;
}
if
(
InputShapeChanged
())
{
BuildDeviceProgram
();
}
return
LaunchDeviceProgram
();
}
int
Engine
::
BuildOriginProgram
()
{
bool
Engine
::
PrepareWorkspaceForOriginProgram
()
{
origin_idims_
.
resize
(
input_names_
.
size
());
origin_itensors_
.
resize
(
input_names_
.
size
());
for
(
int
i
=
0
;
i
<
input_names_
.
size
();
i
++
)
{
origin_itensors_
[
i
]
=
scope_
->
FindMutableTensor
(
input_names_
[
i
]);
CHECK
(
origin_itensors_
[
i
]);
}
origin_otensors_
.
resize
(
output_names_
.
size
());
for
(
int
i
=
0
;
i
<
output_names_
.
size
();
i
++
)
{
origin_otensors_
[
i
]
=
scope_
->
FindMutableTensor
(
output_names_
[
i
]);
CHECK
(
origin_otensors_
[
i
]);
}
return
true
;
}
bool
Engine
::
BuildOriginProgram
()
{
// TODO(hong19860320) The block_desc need to be divided into subgraphs during
// the exection time. But only see them as a subgraph now.
origin_program_
.
clear
();
...
...
@@ -34,11 +74,14 @@ int Engine::BuildOriginProgram() {
auto
op_desc
=
block_desc_
->
GetOp
<
cpp
::
OpDesc
>
(
op_idx
);
CHECK
(
op_desc
);
std
::
string
op_type
=
op_desc
->
Type
();
// Create op and pick up the best kernel
auto
op
=
LiteOpRegistry
::
Global
().
Create
(
op_desc
->
Type
());
CHECK
(
op
)
<<
"no Op found for "
<<
op_type
;
op
->
Attach
(
*
op_desc
,
scope_
);
std
::
unique_ptr
<
KernelBase
>
picked_kernel
;
if
(
op_desc
->
HasAttr
(
kKernelTypeAttr
))
{
// Create op and pick up kernel according to the kKernelTypeAttr attribute
// Create op and pick up the best kernel according to the
// kKernelTypeAttr attribute
auto
kernel_type
=
op_desc
->
GetAttr
<
std
::
string
>
(
kKernelTypeAttr
);
std
::
string
alias
;
Place
place
;
...
...
@@ -48,12 +91,14 @@ int Engine::BuildOriginProgram() {
auto
kernels
=
op
->
CreateKernels
({
place
});
CHECK_GT
(
kernels
.
size
(),
0u
)
<<
"No kernels found for "
<<
op_type
;
auto
it
=
std
::
find_if
(
kernels
.
begin
(),
kernels
.
end
(),
[
&
](
std
::
unique_ptr
<
KernelBase
>
&
it
)
{
kernels
.
begin
(),
kernels
.
end
(),
[
&
](
std
::
unique_ptr
<
KernelBase
>
&
it
)
{
return
it
->
alias
()
==
alias
;
});
CHECK
(
it
!=
kernels
.
end
());
picked_kernel
=
std
::
move
(
*
it
);
}
else
{
// TODO(hong19860320) add kernel picking according to the type of input
// and output tensors
VLOG
(
3
)
<<
"The attr '"
<<
kKernelTypeAttr
<<
"' not found, pick the first kernel for "
<<
op_type
;
std
::
vector
<
std
::
unique_ptr
<
KernelBase
>>
kernels
;
...
...
@@ -74,52 +119,41 @@ int Engine::BuildOriginProgram() {
}
origin_program_
.
emplace_back
(
std
::
move
(
op
),
std
::
move
(
picked_kernel
));
}
return
0
;
CHECK
(
!
origin_program_
.
empty
())
<<
"no instructions"
;
return
true
;
}
int
Engine
::
LaunchOriginProgram
()
{
for
(
auto
&
inst
:
origin_program_
)
{
auto
op_type
=
inst
.
op
()
->
op_info
()
->
Type
();
if
(
op_type
==
"feed"
||
op_type
==
"fetch"
)
continue
;
inst
.
Run
();
bool
Engine
::
LaunchOriginProgram
()
{
if
(
origin_program_
.
empty
())
{
BuildOriginProgram
();
}
if
(
!
origin_program_
.
empty
())
{
for
(
auto
&
inst
:
origin_program_
)
{
auto
op_type
=
inst
.
op
()
->
op_info
()
->
Type
();
if
(
op_type
==
"feed"
||
op_type
==
"fetch"
)
continue
;
inst
.
Run
();
}
return
true
;
}
return
0
;
return
false
;
}
int
Engine
::
Build
()
{
// In order to attach all of the ops of the block desc, we need to build the
// original program firstly.
BuildOriginProgram
();
// Run InferShape() of all of ops, and convert Paddle ops to NPU/XPU IR graph
build_device_program_status_
=
BuildDeviceProgram
();
return
build_device_program_status_
;
bool
Engine
::
PrepareWorkspaceForDeviceProgram
()
{
return
PrepareWorkspaceForOriginProgram
();
}
void
Engine
::
InitDeviceTensor
()
{
return
;
}
bool
Engine
::
BuildDeviceProgram
()
{
return
BuildOriginProgram
();
}
bool
Engine
::
LaunchDeviceProgram
()
{
return
LaunchOriginProgram
();
}
bool
Engine
::
InputShapeChanged
()
{
bool
changed
=
false
;
for
(
size_t
i
=
0
;
i
<
origin_itensors_
.
size
();
i
++
)
{
if
(
origin_itensors_
[
i
]
->
dims
()
!=
origin_idims_
[
i
])
{
return
true
;
}
}
return
false
;
}
int
Engine
::
Launch
()
{
// Rebuild device program when the shapes of input tensors have been changed.
if
(
CHECK_SUCCESS
(
build_device_program_status_
)
&&
CHECK_REBUILD_WHEN_SHAPE_CHANGED
(
build_device_program_status_
)
&&
InputShapeChanged
())
{
Build
();
InitDeviceTensor
();
}
if
(
CHECK_FAILED
(
build_device_program_status_
))
{
LaunchOriginProgram
();
}
else
{
LaunchDeviceProgram
();
auto
origin_idim
=
origin_itensors_
[
i
]
->
dims
().
Vectorize
();
changed
|=
origin_idim
!=
origin_idims_
[
i
];
origin_idims_
[
i
]
=
origin_idim
;
}
return
0
;
return
changed
;
}
}
// namespace subgraph
...
...
lite/kernels/npu/bridges/engine.h
浏览文件 @
97b54fbe
...
...
@@ -33,49 +33,36 @@ class Engine {
cpp
::
BlockDesc
*
block_desc
,
const
std
::
vector
<
std
::
string
>
&
input_names
,
const
std
::
vector
<
std
::
string
>
&
output_names
,
lite
::
Scope
*
scope
,
std
::
string
model_cache_dir
=
""
)
:
ctx_
(
ctx
),
block_idx_
(
block_idx
),
block_desc_
(
block_desc
),
input_names_
(
input_names
),
output_names_
(
output_names
),
scope_
(
scope
),
model_cache_dir_
(
model_cache_dir
)
{}
lite
::
Scope
*
scope
);
virtual
~
Engine
()
=
default
;
virtual
int
Build
();
virtual
int
Launch
();
virtual
bool
Run
();
private:
Engine
(
const
Engine
&
)
=
delete
;
protected:
virtual
int
BuildDeviceProgram
();
virtual
int
LaunchDeviceProgram
();
virtual
bool
PrepareWorkspaceForOriginProgram
();
virtual
bool
BuildOriginProgram
();
virtual
bool
LaunchOriginProgram
();
virtual
int
BuildOriginProgram
();
virtual
int
LaunchOriginProgram
();
virtual
bool
PrepareWorkspaceForDeviceProgram
();
virtual
bool
BuildDeviceProgram
();
virtual
bool
LaunchDeviceProgram
();
virtual
void
InitDeviceTensor
();
virtual
bool
InputShapeChanged
();
KernelContext
*
ctx_
{
nullptr
};
int
block_idx_
;
cpp
::
BlockDesc
*
block_desc_
;
int
block_idx_
{
-
1
}
;
cpp
::
BlockDesc
*
block_desc_
{
nullptr
}
;
std
::
vector
<
std
::
string
>
input_names_
;
std
::
vector
<
std
::
string
>
output_names_
;
Scope
*
scope_
{
nullptr
};
// SUCCESS: device program build successed. FAILED: device program build
// failed. REBUILD_WHEN_SHAPE_CHANGED: device program build successed but need
// to rebuild when input shape changed.
int
build_device_program_status_
{
0
};
std
::
vector
<
DDim
>
origin_idims_
;
std
::
vector
<
DDim
>
origin_odims_
;
bool
is_first_epoch_
{
true
};
std
::
vector
<
std
::
vector
<
int64_t
>>
origin_idims_
;
std
::
vector
<
Tensor
*>
origin_itensors_
;
std
::
vector
<
Tensor
*>
origin_otensors_
;
std
::
vector
<
Instruction
>
origin_program_
;
std
::
string
model_cache_dir_
{
""
};
};
}
// namespace subgraph
...
...
lite/kernels/npu/bridges/graph.h
浏览文件 @
97b54fbe
...
...
@@ -19,7 +19,7 @@
#include <string>
#include <utility>
#include <vector>
#include "graph/
op
/all_ops.h"
#include "graph/
compatible
/all_ops.h"
#include "lite/core/op_lite.h"
#include "lite/core/tensor.h"
...
...
lite/kernels/npu/bridges/matmul_op.cc
浏览文件 @
97b54fbe
...
...
@@ -94,10 +94,10 @@ int MatMulConverter(void* ctx, OpLite* op, KernelBase* kernel) {
}
else
{
matmul_node
=
graph
->
Add
<
ge
::
op
::
BatchMatMul
>
(
out_name
);
auto
matmul_op
=
matmul_node
->
data
<
ge
::
op
::
BatchMatMul
>
();
matmul_op
->
set_input_x
(
*
x_node
->
data
());
matmul_op
->
set_input_
y
(
*
y_node
->
data
());
matmul_op
->
set_attr_adj_x
(
transpose_x
);
matmul_op
->
set_attr_adj_
y
(
transpose_y
);
matmul_op
->
set_input_x
1
(
*
x_node
->
data
());
matmul_op
->
set_input_
x2
(
*
y_node
->
data
());
matmul_op
->
set_attr_adj_x
1
(
transpose_x
);
matmul_op
->
set_attr_adj_
x2
(
transpose_y
);
}
if
(
fabs
(
alpha
-
1.
f
)
>
1e-6
f
)
{
...
...
lite/kernels/npu/bridges/utility.h
浏览文件 @
97b54fbe
...
...
@@ -20,11 +20,11 @@
#include <string>
#include <vector>
#include "graph/buffer.h"
#include "graph/compatible/operator_reg.h"
#include "graph/graph.h"
#include "graph/model.h"
#include "graph/op/all_ops.h"
#include "graph/operator.h"
#include "graph/operator_reg.h"
#include "lite/core/op_lite.h"
#include "lite/utils/macros.h"
...
...
@@ -97,25 +97,26 @@ REG_OP(Pad)
/*
* Multiplies slices of two tensors in batches.
* <Input>
*
x
: The input tensor
*
y
: The input tensor
*
x1
: The input tensor
*
x2
: The input tensor
* <Output>
*
z
: The output tensor
*
y
: The output tensor
* <Attr>
* adj_x : adj_x is true, the input tensor x is transposed, otherwise
* it will not be transposed. Default is false (The current version only
* supports false).
* adj_y : adj_y is true, the input tensor y is transposed, otherwise
* it will not be transposed. Default is false.
* adj_x1 : adj_x1 is true, the input tensor x1 is transposed,
* otherwise it will not be transposed.
* Default is false (The current version only supports false).
* adj_x2 : adj_x2 is true, the input tensor x2 is transposed,
* otherwise it will not be transposed.
* Default is false.
* <Added in HiAI version>
*
100.320.010.010
* 100.320.010.010
*/
REG_OP
(
BatchMatMul
)
.
INPUT
(
x
,
TensorType
({
DT_FLOAT
}))
.
INPUT
(
y
,
TensorType
({
DT_FLOAT
}))
.
OUTPUT
(
z
,
TensorType
({
DT_FLOAT
}))
.
ATTR
(
adj_x
,
AttrValue
::
BOOL
{
false
})
.
ATTR
(
adj_
y
,
AttrValue
::
BOOL
{
false
})
.
INPUT
(
x
1
,
TensorType
({
DT_FLOAT
}))
.
INPUT
(
x2
,
TensorType
({
DT_FLOAT
}))
.
OUTPUT
(
y
,
TensorType
({
DT_FLOAT
}))
.
ATTR
(
adj_x
1
,
AttrValue
::
BOOL
{
false
})
.
ATTR
(
adj_
x2
,
AttrValue
::
BOOL
{
false
})
.
OP_END
()
}
// namespace ge
...
...
lite/kernels/npu/subgraph_compute.cc
浏览文件 @
97b54fbe
此差异已折叠。
点击以展开。
lite/kernels/npu/subgraph_compute.h
浏览文件 @
97b54fbe
...
...
@@ -28,52 +28,65 @@ namespace lite {
namespace
kernels
{
namespace
npu
{
class
SubgraphEngine
:
public
subgraph
::
Engine
{
class
DeviceProgram
{
public:
SubgraphEngine
(
KernelContext
*
ctx
,
int
block_idx
,
cpp
::
BlockDesc
*
block_desc
,
const
std
::
vector
<
std
::
string
>
&
input_names
,
const
std
::
vector
<
std
::
string
>
&
output_names
,
Scope
*
scope
,
std
::
string
model_cache_dir
=
""
)
:
subgraph
::
Engine
(
ctx
,
block_idx
,
block_desc
,
input_names
,
output_names
,
scope
,
model_cache_dir
)
{}
DeviceProgram
()
{}
~
DeviceProgram
()
{}
std
::
string
GenerateModelName
(
const
std
::
vector
<
std
::
string
>&
input_names
,
const
std
::
vector
<
std
::
string
>&
output_names
,
const
std
::
vector
<
std
::
vector
<
int64_t
>>&
origin_idims
);
bool
LoadFromCacheFile
(
const
std
::
vector
<
std
::
string
>&
input_names
,
const
std
::
vector
<
std
::
string
>&
output_names
,
const
std
::
vector
<
std
::
vector
<
int64_t
>>&
origin_idims
,
const
std
::
string
&
model_cache_dir
);
bool
BuildGraphAndCacheToFile
(
const
std
::
vector
<
Instruction
>&
origin_program
,
const
std
::
vector
<
std
::
string
>&
input_names
,
const
std
::
vector
<
std
::
string
>&
output_names
,
const
std
::
vector
<
std
::
vector
<
int64_t
>>&
origin_idims
,
const
std
::
vector
<
Tensor
*>&
origin_otensors
,
const
std
::
string
&
model_cache_dir
);
bool
ShareBufferWithOriginTensors
(
const
std
::
vector
<
std
::
string
>&
input_names
,
const
std
::
vector
<
std
::
string
>&
output_names
,
std
::
vector
<
Tensor
*>*
origin_itensors
,
std
::
vector
<
Tensor
*>*
origin_otensors
,
std
::
vector
<
std
::
shared_ptr
<
hiai
::
AiTensor
>>*
device_itensors
,
std
::
vector
<
std
::
shared_ptr
<
hiai
::
AiTensor
>>*
device_otensors
);
bool
ZeroCopyRun
(
std
::
vector
<
std
::
shared_ptr
<
hiai
::
AiTensor
>>*
device_itensors
,
std
::
vector
<
std
::
shared_ptr
<
hiai
::
AiTensor
>>*
device_otensors
);
struct
device_program_t
{
explicit
device_program_t
(
std
::
shared_ptr
<
hiai
::
AiModelMngerClient
>
_client
)
:
client
(
_client
)
{}
std
::
shared_ptr
<
hiai
::
AiModelMngerClient
>
client
{
nullptr
};
std
::
vector
<
DDim
>
origin_idims
{};
std
::
vector
<
DDim
>
origin_odims
{};
std
::
vector
<
hiai
::
TensorDimension
>
device_idims
{};
std
::
vector
<
hiai
::
TensorDimension
>
device_odims
{};
};
public:
std
::
string
model_name_
{
""
};
std
::
shared_ptr
<
hiai
::
AiModelMngerClient
>
model_client_
{
nullptr
};
std
::
vector
<
std
::
vector
<
int64_t
>>
origin_odims_
;
std
::
vector
<
PrecisionType
>
origin_otypes_
;
std
::
vector
<
hiai
::
TensorDimension
>
device_idims_
{};
std
::
vector
<
hiai
::
TensorDimension
>
device_odims_
{};
};
int
Build
()
override
;
class
SubgraphEngine
:
public
subgraph
::
Engine
{
public:
SubgraphEngine
(
KernelContext
*
ctx
,
int
block_idx
,
cpp
::
BlockDesc
*
block_desc
,
const
std
::
vector
<
std
::
string
>&
input_names
,
const
std
::
vector
<
std
::
string
>&
output_names
,
Scope
*
scope
)
:
subgraph
::
Engine
(
ctx
,
block_idx
,
block_desc
,
input_names
,
output_names
,
scope
)
{}
protected:
int
BuildDeviceProgram
()
override
;
int
LaunchDeviceProgram
()
override
;
void
InitDeviceTensor
()
override
;
bool
InputShapeChanged
()
override
;
std
::
string
GenerateModelCacheName
()
const
;
bool
PrepareWorkspaceForDeviceProgram
()
override
;
bool
BuildDeviceProgram
()
override
;
bool
LaunchDeviceProgram
()
override
;
std
::
string
model_name_
{
"model.om"
};
std
::
vector
<
std
::
vector
<
int64_t
>>
inputs_shape_
{};
std
::
map
<
std
::
vector
<
std
::
vector
<
int64_t
>>
,
std
::
shared_ptr
<
device_program_t
>>
device_program_map_
{};
std
::
vector
<
std
::
string
>
device_inames_
{};
std
::
vector
<
std
::
string
>
device_onames_
{};
std
::
vector
<
std
::
shared_ptr
<
hiai
::
AiTensor
>>
device_itensors_
{};
std
::
vector
<
std
::
shared_ptr
<
hiai
::
AiTensor
>>
device_otensors_
{};
std
::
map
<
std
::
vector
<
std
::
vector
<
int64_t
>>
,
std
::
shared_ptr
<
DeviceProgram
>>
device_programs_
;
};
class
SubgraphCompute
:
public
KernelLite
<
TARGET
(
kNPU
),
PRECISION
(
kAny
)
>
{
...
...
lite/utils/env.h
浏览文件 @
97b54fbe
...
...
@@ -22,6 +22,8 @@
#define SUBGRAPH_CUSTOM_PARTITION_CONFIG_FILE \
"SUBGRAPH_CUSTOM_PARTITION_CONFIG_FILE"
#define SUBGRAPH_DISABLE_ONLINE_MODE "SUBGRAPH_DISABLE_ONLINE_MODE"
namespace
paddle
{
namespace
lite
{
...
...
lite/utils/io.h
浏览文件 @
97b54fbe
...
...
@@ -120,5 +120,40 @@ static std::vector<std::string> ListDir(const std::string& path,
return
paths
;
}
static
bool
ReadFile
(
const
std
::
string
&
filename
,
std
::
vector
<
char
>*
contents
)
{
FILE
*
fp
=
fopen
(
filename
.
c_str
(),
"rb"
);
if
(
!
fp
)
return
false
;
fseek
(
fp
,
0
,
SEEK_END
);
size_t
size
=
ftell
(
fp
);
fseek
(
fp
,
0
,
SEEK_SET
);
contents
->
clear
();
contents
->
resize
(
size
);
size_t
offset
=
0
;
char
*
ptr
=
reinterpret_cast
<
char
*>
(
&
(
contents
->
at
(
0
)));
while
(
offset
<
size
)
{
size_t
already_read
=
fread
(
ptr
,
1
,
size
-
offset
,
fp
);
offset
+=
already_read
;
ptr
+=
already_read
;
}
fclose
(
fp
);
return
true
;
}
static
bool
WriteFile
(
const
std
::
string
&
filename
,
const
std
::
vector
<
char
>&
contents
)
{
FILE
*
fp
=
fopen
(
filename
.
c_str
(),
"wb"
);
if
(
!
fp
)
return
false
;
size_t
size
=
contents
.
size
();
size_t
offset
=
0
;
const
char
*
ptr
=
reinterpret_cast
<
const
char
*>
(
&
(
contents
.
at
(
0
)));
while
(
offset
<
size
)
{
size_t
already_written
=
fwrite
(
ptr
,
1
,
size
-
offset
,
fp
);
offset
+=
already_written
;
ptr
+=
already_written
;
}
fclose
(
fp
);
return
true
;
}
}
// namespace lite
}
// namespace paddle
lite/utils/md5.h
0 → 100644
浏览文件 @
97b54fbe
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <string>
namespace
paddle
{
namespace
lite
{
std
::
string
MD5
(
std
::
string
message
)
{
const
uint32_t
shiftAmounts
[]
=
{
7
,
12
,
17
,
22
,
7
,
12
,
17
,
22
,
7
,
12
,
17
,
22
,
7
,
12
,
17
,
22
,
5
,
9
,
14
,
20
,
5
,
9
,
14
,
20
,
5
,
9
,
14
,
20
,
5
,
9
,
14
,
20
,
4
,
11
,
16
,
23
,
4
,
11
,
16
,
23
,
4
,
11
,
16
,
23
,
4
,
11
,
16
,
23
,
6
,
10
,
15
,
21
,
6
,
10
,
15
,
21
,
6
,
10
,
15
,
21
,
6
,
10
,
15
,
21
};
const
uint32_t
partsOfSines
[]
=
{
0xd76aa478
,
0xe8c7b756
,
0x242070db
,
0xc1bdceee
,
0xf57c0faf
,
0x4787c62a
,
0xa8304613
,
0xfd469501
,
0x698098d8
,
0x8b44f7af
,
0xffff5bb1
,
0x895cd7be
,
0x6b901122
,
0xfd987193
,
0xa679438e
,
0x49b40821
,
0xf61e2562
,
0xc040b340
,
0x265e5a51
,
0xe9b6c7aa
,
0xd62f105d
,
0x02441453
,
0xd8a1e681
,
0xe7d3fbc8
,
0x21e1cde6
,
0xc33707d6
,
0xf4d50d87
,
0x455a14ed
,
0xa9e3e905
,
0xfcefa3f8
,
0x676f02d9
,
0x8d2a4c8a
,
0xfffa3942
,
0x8771f681
,
0x6d9d6122
,
0xfde5380c
,
0xa4beea44
,
0x4bdecfa9
,
0xf6bb4b60
,
0xbebfbc70
,
0x289b7ec6
,
0xeaa127fa
,
0xd4ef3085
,
0x04881d05
,
0xd9d4d039
,
0xe6db99e5
,
0x1fa27cf8
,
0xc4ac5665
,
0xf4292244
,
0x432aff97
,
0xab9423a7
,
0xfc93a039
,
0x655b59c3
,
0x8f0ccc92
,
0xffeff47d
,
0x85845dd1
,
0x6fa87e4f
,
0xfe2ce6e0
,
0xa3014314
,
0x4e0811a1
,
0xf7537e82
,
0xbd3af235
,
0x2ad7d2bb
,
0xeb86d391
};
uint32_t
state
[
4
];
state
[
0
]
=
0x67452301
;
state
[
1
]
=
0xefcdab89
;
state
[
2
]
=
0x98badcfe
;
state
[
3
]
=
0x10325476
;
// Pad with zeros
int
size
=
((((
message
.
length
()
+
8
)
/
64
)
+
1
)
*
64
)
-
8
;
uint8_t
*
buf
=
reinterpret_cast
<
uint8_t
*>
(
calloc
(
size
+
64
,
1
));
memcpy
(
buf
,
message
.
c_str
(),
message
.
length
());
buf
[
message
.
length
()]
=
128
;
uint32_t
bits
=
8
*
message
.
length
();
memcpy
(
buf
+
size
,
&
bits
,
4
);
// Process at each 512-bit(64 bytes) chunk
#define LEFTROTATE(x, c) (((x) << (c)) | ((x) >> (32 - (c))))
for
(
int
offset
=
0
;
offset
<
size
;
offset
+=
64
)
{
uint32_t
A
=
state
[
0
];
uint32_t
B
=
state
[
1
];
uint32_t
C
=
state
[
2
];
uint32_t
D
=
state
[
3
];
uint32_t
*
W
=
reinterpret_cast
<
uint32_t
*>
(
buf
+
offset
);
for
(
uint32_t
i
=
0
;
i
<
64
;
i
++
)
{
uint32_t
F
,
g
;
if
(
i
<
16
)
{
F
=
(
B
&
C
)
|
((
~
B
)
&
D
);
g
=
i
;
}
else
if
(
i
<
32
)
{
F
=
(
D
&
B
)
|
((
~
D
)
&
C
);
g
=
(
5
*
i
+
1
)
%
16
;
}
else
if
(
i
<
48
)
{
F
=
B
^
C
^
D
;
g
=
(
3
*
i
+
5
)
%
16
;
}
else
{
F
=
C
^
(
B
|
(
~
D
));
g
=
(
7
*
i
)
%
16
;
}
uint32_t
T
=
D
;
D
=
C
;
C
=
B
;
B
=
B
+
LEFTROTATE
((
A
+
F
+
partsOfSines
[
i
]
+
W
[
g
]),
shiftAmounts
[
i
]);
A
=
T
;
}
state
[
0
]
+=
A
;
state
[
1
]
+=
B
;
state
[
2
]
+=
C
;
state
[
3
]
+=
D
;
}
#undef LEFTROTATE
free
(
buf
);
// Convert digest to string
std
::
string
res
;
res
.
reserve
(
16
<<
1
);
const
uint8_t
*
digest
=
reinterpret_cast
<
uint8_t
*>
(
state
);
char
hex
[
3
];
for
(
size_t
i
=
0
;
i
<
16
;
i
++
)
{
snprintf
(
hex
,
sizeof
(
hex
),
"%02x"
,
digest
[
i
]);
res
.
append
(
hex
);
}
return
res
;
}
}
// namespace lite
}
// namespace paddle
lite/utils/string.h
浏览文件 @
97b54fbe
...
...
@@ -67,6 +67,31 @@ static std::string to_string(int index) {
return
std
::
string
(
buffer
);
}
template
<
typename
T
=
std
::
string
>
static
T
parse_string
(
const
std
::
string
&
v
)
{
return
v
;
}
template
<
>
int32_t
parse_string
<
int32_t
>
(
const
std
::
string
&
v
)
{
return
std
::
stoi
(
v
);
}
template
<
>
int64_t
parse_string
<
int64_t
>
(
const
std
::
string
&
v
)
{
return
std
::
stoll
(
v
);
}
template
<
>
float
parse_string
<
float
>
(
const
std
::
string
&
v
)
{
return
std
::
stof
(
v
);
}
template
<
>
double
parse_string
<
double
>
(
const
std
::
string
&
v
)
{
return
std
::
stod
(
v
);
}
template
<
typename
T
>
std
::
string
Join
(
const
std
::
vector
<
T
>&
vec
,
const
std
::
string
&
delim
)
{
if
(
vec
.
empty
())
return
""
;
...
...
@@ -91,19 +116,20 @@ static std::string Repr(const std::vector<std::string>& v) {
return
"{"
+
Join
(
tmp
,
","
)
+
"}"
;
}
static
std
::
vector
<
std
::
string
>
Split
(
const
std
::
string
&
original
,
const
std
::
string
&
separator
)
{
std
::
vector
<
std
::
string
>
results
;
template
<
class
T
=
std
::
string
>
static
std
::
vector
<
T
>
Split
(
const
std
::
string
&
original
,
const
std
::
string
&
separator
)
{
std
::
vector
<
T
>
results
;
std
::
string
::
size_type
pos1
,
pos2
;
pos2
=
original
.
find
(
separator
);
pos1
=
0
;
while
(
std
::
string
::
npos
!=
pos2
)
{
results
.
push_back
(
original
.
substr
(
pos1
,
pos2
-
pos1
));
results
.
push_back
(
parse_string
<
T
>
(
original
.
substr
(
pos1
,
pos2
-
pos1
)
));
pos1
=
pos2
+
separator
.
size
();
pos2
=
original
.
find
(
separator
,
pos1
);
}
if
(
pos1
!=
original
.
length
())
{
results
.
push_back
(
original
.
substr
(
pos1
));
results
.
push_back
(
parse_string
<
T
>
(
original
.
substr
(
pos1
)
));
}
return
results
;
}
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录