Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
PaddleSlim
提交
5566db12
P
PaddleSlim
项目概览
PaddlePaddle
/
PaddleSlim
1 年多 前同步成功
通知
51
Star
1434
Fork
344
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
53
列表
看板
标记
里程碑
合并请求
16
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
PaddleSlim
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
53
Issue
53
列表
看板
标记
里程碑
合并请求
16
合并请求
16
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
5566db12
编写于
4月 13, 2020
作者:
W
wanghaoshuang
浏览文件
操作
浏览文件
下载
差异文件
Merge branch 'bert' of
https://github.com/wanghaoshuang/PaddleSlim
into bert
上级
b40815f4
fa01827d
变更
1
隐藏空白更改
内联
并排
Showing
1 changed file
with
245 addition
and
0 deletion
+245
-0
paddleslim/teachers/bert/utils/init.py
paddleslim/teachers/bert/utils/init.py
+245
-0
未找到文件。
paddleslim/teachers/bert/utils/init.py
0 → 100644
浏览文件 @
5566db12
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from
__future__
import
print_function
import
os
import
six
import
ast
import
copy
import
numpy
as
np
import
paddle.fluid
as
fluid
def
cast_fp32_to_fp16
(
exe
,
main_program
):
print
(
"Cast parameters to float16 data format."
)
for
param
in
main_program
.
global_block
().
all_parameters
():
if
not
param
.
name
.
endswith
(
".master"
):
param_t
=
fluid
.
global_scope
().
find_var
(
param
.
name
).
get_tensor
()
data
=
np
.
array
(
param_t
)
if
param
.
name
.
find
(
"layer_norm"
)
==
-
1
:
param_t
.
set
(
np
.
float16
(
data
).
view
(
np
.
uint16
),
exe
.
place
)
master_param_var
=
fluid
.
global_scope
().
find_var
(
param
.
name
+
".master"
)
if
master_param_var
is
not
None
:
master_param_var
.
get_tensor
().
set
(
data
,
exe
.
place
)
def
init_checkpoint
(
exe
,
init_checkpoint_path
,
main_program
,
use_fp16
=
False
):
assert
os
.
path
.
exists
(
init_checkpoint_path
),
"[%s] cann't be found."
%
init_checkpoint_path
def
existed_persitables
(
var
):
if
not
fluid
.
io
.
is_persistable
(
var
):
return
False
return
os
.
path
.
exists
(
os
.
path
.
join
(
init_checkpoint_path
,
var
.
name
))
fluid
.
io
.
load_vars
(
exe
,
init_checkpoint_path
,
main_program
=
main_program
,
predicate
=
existed_persitables
)
print
(
"Load model from {}"
.
format
(
init_checkpoint_path
))
if
use_fp16
:
cast_fp32_to_fp16
(
exe
,
main_program
)
def
init_pretraining_params
(
exe
,
pretraining_params_path
,
main_program
,
use_fp16
=
False
):
assert
os
.
path
.
exists
(
pretraining_params_path
),
"[%s] cann't be found."
%
pretraining_params_path
def
existed_params
(
var
):
if
not
isinstance
(
var
,
fluid
.
framework
.
Parameter
):
return
False
return
os
.
path
.
exists
(
os
.
path
.
join
(
pretraining_params_path
,
var
.
name
))
fluid
.
io
.
load_vars
(
exe
,
pretraining_params_path
,
main_program
=
main_program
,
predicate
=
existed_params
)
print
(
"Load pretraining parameters from {}."
.
format
(
pretraining_params_path
))
if
use_fp16
:
cast_fp32_to_fp16
(
exe
,
main_program
)
def
init_from_static_model
(
dir_path
,
cls_model
,
bert_config
):
def
load_numpy_weight
(
file_name
):
if
six
.
PY2
:
res
=
np
.
load
(
os
.
path
.
join
(
dir_path
,
file_name
),
allow_pickle
=
True
)
else
:
res
=
np
.
load
(
os
.
path
.
join
(
dir_path
,
file_name
),
allow_pickle
=
True
,
encoding
=
'latin1'
)
assert
res
is
not
None
return
res
# load word embedding
_param
=
load_numpy_weight
(
"word_embedding"
)
cls_model
.
bert_layer
.
_src_emb
.
set_dict
({
"weight"
:
_param
})
print
(
"INIT word embedding"
)
_param
=
load_numpy_weight
(
"pos_embedding"
)
cls_model
.
bert_layer
.
_pos_emb
.
set_dict
({
"weight"
:
_param
})
print
(
"INIT pos embedding"
)
_param
=
load_numpy_weight
(
"sent_embedding"
)
cls_model
.
bert_layer
.
_sent_emb
.
set_dict
({
"weight"
:
_param
})
print
(
"INIT sent embedding"
)
_param0
=
load_numpy_weight
(
"pooled_fc.w_0"
)
_param1
=
load_numpy_weight
(
"pooled_fc.b_0"
)
cls_model
.
bert_layer
.
pooled_fc
.
set_dict
({
"weight"
:
_param0
,
"bias"
:
_param1
})
print
(
"INIT pooled_fc"
)
_param0
=
load_numpy_weight
(
"pre_encoder_layer_norm_scale"
)
_param1
=
load_numpy_weight
(
"pre_encoder_layer_norm_bias"
)
cls_model
.
bert_layer
.
pre_process_layer
.
_sub_layers
[
"layer_norm_0"
].
set_dict
({
"weight"
:
_param0
,
"bias"
:
_param1
})
print
(
"INIT pre_encoder layer norm"
)
for
_i
in
range
(
bert_config
[
"num_hidden_layers"
]):
_param_weight
=
"encoder_layer_%d_multi_head_att_query_fc.w_0"
%
_i
_param_bias
=
"encoder_layer_%d_multi_head_att_query_fc.b_0"
%
_i
_param_weight
=
load_numpy_weight
(
_param_weight
)
_param_bias
=
load_numpy_weight
(
_param_bias
)
cls_model
.
bert_layer
.
_encoder
.
_sub_layers
[
"esl_%d"
%
_i
].
_multihead_attention_layer
.
_q_fc
.
set_dict
({
"weight"
:
_param_weight
,
"bias"
:
_param_bias
})
print
(
"INIT multi_head_att_query_fc %d"
%
_i
)
_param_weight
=
"encoder_layer_%d_multi_head_att_key_fc.w_0"
%
_i
_param_bias
=
"encoder_layer_%d_multi_head_att_key_fc.b_0"
%
_i
_param_weight
=
load_numpy_weight
(
_param_weight
)
_param_bias
=
load_numpy_weight
(
_param_bias
)
cls_model
.
bert_layer
.
_encoder
.
_sub_layers
[
"esl_%d"
%
_i
].
_multihead_attention_layer
.
_k_fc
.
set_dict
({
"weight"
:
_param_weight
,
"bias"
:
_param_bias
})
print
(
"INIT multi_head_att_key_fc %d"
%
_i
)
_param_weight
=
"encoder_layer_%d_multi_head_att_value_fc.w_0"
%
_i
_param_bias
=
"encoder_layer_%d_multi_head_att_value_fc.b_0"
%
_i
_param_weight
=
load_numpy_weight
(
_param_weight
)
_param_bias
=
load_numpy_weight
(
_param_bias
)
cls_model
.
bert_layer
.
_encoder
.
_sub_layers
[
"esl_%d"
%
_i
].
_multihead_attention_layer
.
_v_fc
.
set_dict
({
"weight"
:
_param_weight
,
"bias"
:
_param_bias
})
print
(
"INIT multi_head_att_value_fc %d"
%
_i
)
# init output fc
_param_weight
=
"encoder_layer_%d_multi_head_att_output_fc.w_0"
%
_i
_param_bias
=
"encoder_layer_%d_multi_head_att_output_fc.b_0"
%
_i
_param_weight
=
load_numpy_weight
(
_param_weight
)
_param_bias
=
load_numpy_weight
(
_param_bias
)
cls_model
.
bert_layer
.
_encoder
.
_sub_layers
[
"esl_%d"
%
_i
].
_multihead_attention_layer
.
_proj_fc
.
set_dict
({
"weight"
:
_param_weight
,
"bias"
:
_param_bias
})
print
(
"INIT multi_head_att_output_fc %d"
%
_i
)
# init layer_norm 1
_param_weight
=
"encoder_layer_%d_post_att_layer_norm_scale"
%
_i
_param_bias
=
"encoder_layer_%d_post_att_layer_norm_bias"
%
_i
_param_weight
=
load_numpy_weight
(
_param_weight
)
_param_bias
=
load_numpy_weight
(
_param_bias
)
cls_model
.
bert_layer
.
_encoder
.
_sub_layers
[
"esl_%d"
%
_i
].
_postprocess_layer
.
layer_norm_0
.
set_dict
({
"weight"
:
_param_weight
,
"bias"
:
_param_bias
})
print
(
"INIT layer norm in attention at %d layer"
%
_i
)
# init layer_norm 2
_param_weight
=
"encoder_layer_%d_post_ffn_layer_norm_scale"
%
_i
_param_bias
=
"encoder_layer_%d_post_ffn_layer_norm_bias"
%
_i
_param_weight
=
load_numpy_weight
(
_param_weight
)
_param_bias
=
load_numpy_weight
(
_param_bias
)
cls_model
.
bert_layer
.
_encoder
.
_sub_layers
[
"esl_%d"
%
_i
].
_postprocess_layer2
.
layer_norm_0
.
set_dict
({
"weight"
:
_param_weight
,
"bias"
:
_param_bias
})
print
(
"INIT layer norm in FFN at %d layer"
%
_i
)
# init FFN 1
_param_weight
=
"encoder_layer_%d_ffn_fc_0.w_0"
%
_i
_param_bias
=
"encoder_layer_%d_ffn_fc_0.b_0"
%
_i
_param_weight
=
load_numpy_weight
(
_param_weight
)
_param_bias
=
load_numpy_weight
(
_param_bias
)
cls_model
.
bert_layer
.
_encoder
.
_sub_layers
[
"esl_%d"
%
_i
].
_positionwise_feed_forward
.
_i2h
.
set_dict
({
"weight"
:
_param_weight
,
"bias"
:
_param_bias
})
print
(
"INIT FFN-1 at %d layer"
%
_i
)
# init FFN 2
_param_weight
=
"encoder_layer_%d_ffn_fc_1.w_0"
%
_i
_param_bias
=
"encoder_layer_%d_ffn_fc_1.b_0"
%
_i
_param_weight
=
load_numpy_weight
(
_param_weight
)
_param_bias
=
load_numpy_weight
(
_param_bias
)
cls_model
.
bert_layer
.
_encoder
.
_sub_layers
[
"esl_%d"
%
_i
].
_positionwise_feed_forward
.
_h2o
.
set_dict
({
"weight"
:
_param_weight
,
"bias"
:
_param_bias
})
print
(
"INIT FFN-2 at %d layer"
%
_i
)
# init cls fc
#_param_weight = "cls_out_w"
#_param_bias = "cls_out_b"
#_param_weight = load_numpy_weight(_param_weight)
#_param_bias = load_numpy_weight(_param_bias)
#cls_model.cls_fc.set_dict({"weight":_param_weight, "bias":_param_bias})
#print("INIT CLS FC layer")
return
True
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录