update PaddleNLP lexical_analysis for Release/1.6 (#3664)

* update for paddle 1.6 * update optimize op in paddle 1.6 * fix ernie based in paddle 1.6 * fix coding for windows

update PaddleNLP lexical_analysis for Release/1.6 (#3664)
* update for paddle 1.6 * update optimize op in paddle 1.6 * fix ernie based in paddle 1.6 * fix coding for windows
e3558f53 · SYSU_BOND · bbking · 0f134803 · e3558f53 · e3558f53
12 changed file
--- a/PaddleNLP/emotion_detection/inference_model.py
+++ b/PaddleNLP/emotion_detection/inference_model.py
-# -*- encoding: utf8 -*-
+# -*- encoding: UTF8 -*-
 # Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");

--- a/PaddleNLP/lexical_analysis/README.md
+++ b/PaddleNLP/lexical_analysis/README.md
@@ -26,12 +26,20 @@ Lexical Analysis of Chinese，简称 LAC，是一个联合的词法分析模型
 git clone https://github.com/PaddlePaddle/models.git
 cd models/PaddleNLP/lexical_analysis
 ```
+#### 3. 环境依赖
+PaddlePaddle的版本要求是：Python 2 版本是 2.7.15+、Python 3 版本是 3.5.1+/3.6/3.7。LAC的代码可支持Python2/3，无具体版本限制
 ### 数据准备
 #### 1. 快速下载
 本项目涉及的**数据集**和**预训练模型**的数据可通过执行以下脚本进行快速下载，若仅需使用部分数据，可根据需要参照下列介绍进行部分下载
+```bash
+python download.py all
+```
+或在支持运行shell脚本的环境下执行：
 ```bash
 sh download.sh
 ```
@@ -40,34 +48,18 @@ sh download.sh
 下载数据集文件，解压后会生成 `./data/` 文件夹
 ```bash
-wget --no-check-certificate https://baidu-nlp.bj.bcebos.com/lexical_analysis-dataset-2.0.0.tar.gz
+python download.py dataset
-tar xvf lexical_analysis-dataset-2.0.0.tar.gz
 ```
 #### 3. 预训练模型
-我们开源了在自建数据集上训练的词法分析模型，可供用户直接使用，这里提供两种下载方式：
+我们开源了在自建数据集上训练的词法分析模型，可供用户直接使用，可通过下述链接进行下载:
-方式一：基于 PaddleHub 命令行工具，PaddleHub 的安装参考 [PaddleHub](https://github.com/PaddlePaddle/PaddleHub)
-```bash
-# download baseline model
-hub download lexical_analysis
-tar xvf lexical_analysis-2.0.0.tar.gz
-# download ERNIE finetuned model
-hub download lexical_analysis_finetuned
-tar xvf lexical_analysis_finetuned-1.0.0.tar.gz
-```
-方式二：直接下载
 ```bash
 # download baseline model
-wget --no-check-certificate https://baidu-nlp.bj.bcebos.com/lexical_analysis-2.0.0.tar.gz
+python download.py lac
-tar xvf lexical_analysis-2.0.0.tar.gz
 # download ERNIE finetuned model
-wget --no-check-certificate https://baidu-nlp.bj.bcebos.com/lexical_analysis_finetuned-1.0.0.tar.gz
+python download.py finetuned
-tar xvf lexical_analysis_finetuned-1.0.0.tar.gz
 ```
 注：若需进行ERNIE Finetune训练，需自行下载  [ERNIE](https://baidu-nlp.bj.bcebos.com/ERNIE_stable-1.0.1.tar.gz) 开放的模型，下载链接为： [https://baidu-nlp.bj.bcebos.com/ERNIE_stable-1.0.1.tar.gz](https://baidu-nlp.bj.bcebos.com/ERNIE_stable-1.0.1.tar.gz)，下载后解压至 `./pretrained/` 目录下。
@@ -189,6 +181,7 @@ python inference_model.py \
 ├── compare.py                          # 执行LAC与其他开源分词的对比脚本
 ├── creator.py                          # 执行创建网络和数据读取器的脚本
 ├── data/                               # 存放数据集的目录
+├── downloads.py                        # 用于下载数据和模型的脚本
 ├── downloads.sh                        # 用于下载数据和模型的脚本
 ├── eval.py                             # 词法分析评估的脚本
 ├── inference_model.py                  # 执行保存inference_model的脚本，用于准备上线部署环境

--- a/PaddleNLP/lexical_analysis/compare.py
+++ b/PaddleNLP/lexical_analysis/compare.py
+# -*- coding: utf-8 -*-
 #   Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
@@ -11,7 +12,7 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-# -*- coding: UTF-8 -*-
 """
 evaluate wordseg for LAC and other open-source wordseg tools
 """

--- a/PaddleNLP/lexical_analysis/creator.py
+++ b/PaddleNLP/lexical_analysis/creator.py
+# -*- coding: UTF-8 -*-
 #   Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
@@ -11,9 +12,9 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-# -*- coding: UTF-8 -*-
 """
-The function lex_net(args) define the lexical analysis network structure
+Define the function to create lexical analysis model and model's data reader
 """
 import sys
 import os
@@ -24,22 +25,24 @@ import paddle.fluid as fluid
 from paddle.fluid.initializer import NormalInitializer
 from reader import Dataset
+from ernie_reader import SequenceLabelReader
 sys.path.append("..")
 from models.sequence_labeling import nets
-from models.representation.ernie import ernie_encoder
+from models.representation.ernie import ernie_encoder, ernie_pyreader
-from preprocess.ernie import task_reader
-def create_model(args,  vocab_size, num_labels, mode = 'train'):
+def create_model(args, vocab_size, num_labels, mode='train'):
    """create lac model"""
    # model's input data
-    words = fluid.layers.data(name='words', shape=[-1, 1], dtype='int64',lod_level=1)
+    words = fluid.data(name='words', shape=[-1, 1], dtype='int64', lod_level=1)
-    targets = fluid.layers.data(name='targets', shape=[-1, 1], dtype='int64', lod_level= 1)
+    targets = fluid.data(name='targets', shape=[-1, 1], dtype='int64', lod_level=1)
    # for inference process
-    if mode=='infer':
+    if mode == 'infer':
        crf_decode = nets.lex_net(words, args, vocab_size, num_labels, for_infer=True, target=None)
-        return { "feed_list":[words],"words":words, "crf_decode":crf_decode,}
+        return {"feed_list": [words], "words": words, "crf_decode": crf_decode, }
    # for test or train process
    avg_cost, crf_decode = nets.lex_net(words, args, vocab_size, num_labels, for_infer=False, target=targets)
@@ -54,10 +57,10 @@ def create_model(args,  vocab_size, num_labels, mode = 'train'):
    chunk_evaluator.reset()
    ret = {
-        "feed_list":[words, targets],
+        "feed_list": [words, targets],
        "words": words,
        "targets": targets,
-        "avg_cost":avg_cost,
+        "avg_cost": avg_cost,
        "crf_decode": crf_decode,
        "precision": precision,
        "recall": recall,
@@ -67,26 +70,28 @@ def create_model(args,  vocab_size, num_labels, mode = 'train'):
        "num_label_chunks": num_label_chunks,
        "num_correct_chunks": num_correct_chunks
    }
-    return  ret
+    return ret
 def create_pyreader(args, file_name, feed_list, place, model='lac', reader=None, return_reader=False, mode='train'):
    # init reader
-    pyreader = fluid.io.PyReader(
-        feed_list=feed_list,
-        capacity=300,
-        use_double_buffer=True,
-        iterable=True
-    )
    if model == 'lac':
-        if reader==None:
+        pyreader = fluid.io.PyReader(
+            feed_list=feed_list,
+            capacity=50,
+            use_double_buffer=True,
+            iterable=True
+        )
+        if reader == None:
            reader = Dataset(args)
        # create lac pyreader
        if mode == 'train':
            pyreader.decorate_sample_list_generator(
-                paddle.batch(
+                fluid.io.batch(
-                    paddle.reader.shuffle(
+                    fluid.io.shuffle(
                        reader.file_reader(file_name),
                        buf_size=args.traindata_shuffle_buffer
                    ),
@@ -96,7 +101,7 @@ def create_pyreader(args, file_name, feed_list, place, model='lac', reader=None,
            )
        else:
            pyreader.decorate_sample_list_generator(
-                paddle.batch(
+                fluid.io.batch(
                    reader.file_reader(file_name, mode=mode),
                    batch_size=args.batch_size
                ),
@@ -105,49 +110,58 @@ def create_pyreader(args, file_name, feed_list, place, model='lac', reader=None,
    elif model == 'ernie':
        # create ernie pyreader
-        if reader==None:
+        pyreader = fluid.io.DataLoader.from_generator(
-            reader = task_reader.SequenceLabelReader(
+            feed_list=feed_list,
+            capacity=50,
+            use_double_buffer=True,
+            iterable=True
+        )
+        if reader == None:
+            reader = SequenceLabelReader(
                vocab_path=args.vocab_path,
                label_map_config=args.label_map_config,
                max_seq_len=args.max_seq_len,
                do_lower_case=args.do_lower_case,
-                in_tokens=False,
                random_seed=args.random_seed)
        if mode == 'train':
-            pyreader.decorate_batch_generator(
+            pyreader.set_batch_generator(
                reader.data_generator(
                    file_name, args.batch_size, args.epoch, shuffle=True, phase="train"
                ),
                places=place
            )
        else:
-            pyreader.decorate_batch_generator(
+            pyreader.set_batch_generator(
                reader.data_generator(
                    file_name, args.batch_size, epoch=1, shuffle=False, phase=mode
                ),
                places=place
            )
    if return_reader:
        return pyreader, reader
    else:
        return pyreader
-def create_ernie_model(args, ernie_config):
+def create_ernie_model(args, ernie_config):
    """
    Create Model for LAC based on ERNIE encoder
    """
    # ERNIE's input data
-    src_ids = fluid.layers.data(name='src_ids', shape=[args.max_seq_len, 1], dtype='int64',lod_level=0)
-    sent_ids = fluid.layers.data(name='sent_ids', shape=[args.max_seq_len, 1], dtype='int64',lod_level=0)
+    src_ids = fluid.data(name='src_ids', shape=[-1, args.max_seq_len, 1], dtype='int64')
-    pos_ids = fluid.layers.data(name='pos_ids', shape=[args.max_seq_len, 1], dtype='int64',lod_level=0)
+    sent_ids = fluid.data(name='sent_ids', shape=[-1, args.max_seq_len, 1], dtype='int64')
-    input_mask = fluid.layers.data(name='input_mask', shape=[args.max_seq_len, 1], dtype='float32',lod_level=0)
+    pos_ids = fluid.data(name='pos_ids', shape=[-1, args.max_seq_len, 1], dtype='int64')
-    padded_labels =fluid.layers.data(name='padded_labels', shape=[args.max_seq_len, 1], dtype='int64',lod_level=0)
+    input_mask = fluid.data(name='input_mask', shape=[-1, args.max_seq_len, 1], dtype='float32')
-    seq_lens = fluid.layers.data(name='seq_lens', shape=[-1], dtype='int64',lod_level=0)
+    padded_labels = fluid.data(name='padded_labels', shape=[-1, args.max_seq_len, 1], dtype='int64')
+    seq_lens = fluid.data(name='seq_lens', shape=[-1], dtype='int64', lod_level=0)
    squeeze_labels = fluid.layers.squeeze(padded_labels, axes=[-1])
+    # ernie_pyreader
    ernie_inputs = {
        "src_ids": src_ids,
        "sent_ids": sent_ids,
@@ -176,9 +190,10 @@ def create_ernie_model(args, ernie_config):
            name='crfw',
            learning_rate=args.crf_learning_rate),
        length=seq_lens)
    avg_cost = fluid.layers.mean(x=crf_cost)
    crf_decode = fluid.layers.crf_decoding(
-            input=emission, param_attr=fluid.ParamAttr(name='crfw'),length=seq_lens)
+        input=emission, param_attr=fluid.ParamAttr(name='crfw'), length=seq_lens)
    (precision, recall, f1_score, num_infer_chunks, num_label_chunks,
     num_correct_chunks) = fluid.layers.chunk_eval(
@@ -192,17 +207,17 @@ def create_ernie_model(args, ernie_config):
    ret = {
        "feed_list": [src_ids, sent_ids, pos_ids, input_mask, padded_labels, seq_lens],
-        "words":src_ids,
+        "words": src_ids,
-        "labels":padded_labels,
+        "labels": padded_labels,
-        "avg_cost":avg_cost,
+        "avg_cost": avg_cost,
-        "crf_decode":crf_decode,
+        "crf_decode": crf_decode,
-        "precision" : precision,
+        "precision": precision,
        "recall": recall,
        "f1_score": f1_score,
-        "chunk_evaluator":chunk_evaluator,
+        "chunk_evaluator": chunk_evaluator,
-        "num_infer_chunks":num_infer_chunks,
+        "num_infer_chunks": num_infer_chunks,
-        "num_label_chunks":num_label_chunks,
+        "num_label_chunks": num_label_chunks,
-        "num_correct_chunks":num_correct_chunks
+        "num_correct_chunks": num_correct_chunks
    }
    return ret
--- a/PaddleNLP/lexical_analysis/ernie_reader.py
+++ b/PaddleNLP/lexical_analysis/ernie_reader.py
+#   Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""
+This module provides reader for ernie model
+"""
+import sys
+from collections import namedtuple
+import numpy as np
+sys.path.append("..")
+from preprocess.ernie.task_reader import BaseReader,tokenization
+def pad_batch_data(insts,
+                   pad_idx=0,
+                   max_len=128,
+                   return_pos=False,
+                   return_input_mask=False,
+                   return_max_len=False,
+                   return_num_token=False,
+                   return_seq_lens=False):
+    """
+    Pad the instances to the max sequence length in batch, and generate the
+    corresponding position data and input mask.
+    """
+    return_list = []
+    # max_len = max(len(inst) for inst in insts)
+    max_len = max_len
+    # Any token included in dict can be used to pad, since the paddings' loss
+    # will be masked out by weights and make no effect on parameter gradients.
+    inst_data = np.array(
+        [inst + list([pad_idx] * (max_len - len(inst))) for inst in insts])
+    return_list += [inst_data.astype("int64").reshape([-1, max_len, 1])]
+    # position data
+    if return_pos:
+        inst_pos = np.array([
+            list(range(0, len(inst))) + [pad_idx] * (max_len - len(inst))
+            for inst in insts
+        ])
+        return_list += [inst_pos.astype("int64").reshape([-1, max_len, 1])]
+    if return_input_mask:
+        # This is used to avoid attention on paddings.
+        input_mask_data = np.array([[1] * len(inst) + [0] *
+                                    (max_len - len(inst)) for inst in insts])
+        input_mask_data = np.expand_dims(input_mask_data, axis=-1)
+        return_list += [input_mask_data.astype("float32")]
+    if return_max_len:
+        return_list += [max_len]
+    if return_num_token:
+        num_token = 0
+        for inst in insts:
+            num_token += len(inst)
+        return_list += [num_token]
+    if return_seq_lens:
+        seq_lens = np.array([len(inst) for inst in insts])
+        return_list += [seq_lens.astype("int64").reshape([-1])]
+    return return_list if len(return_list) > 1 else return_list[0]
+class SequenceLabelReader(BaseReader):
+    """SequenceLabelReader"""
+    def _pad_batch_records(self, batch_records):
+        batch_token_ids = [record.token_ids for record in batch_records]
+        batch_text_type_ids = [record.text_type_ids for record in batch_records]
+        batch_position_ids = [record.position_ids for record in batch_records]
+        batch_label_ids = [record.label_ids for record in batch_records]
+        # padding
+        padded_token_ids, input_mask, batch_seq_lens = pad_batch_data(
+            batch_token_ids,
+            max_len=self.max_seq_len,
+            pad_idx=self.pad_id,
+            return_input_mask=True,
+            return_seq_lens=True)
+        padded_text_type_ids = pad_batch_data(
+            batch_text_type_ids, max_len=self.max_seq_len, pad_idx=self.pad_id)
+        padded_position_ids = pad_batch_data(
+            batch_position_ids, max_len=self.max_seq_len, pad_idx=self.pad_id)
+        padded_label_ids = pad_batch_data(
+            batch_label_ids, max_len=self.max_seq_len, pad_idx=len(self.label_map) - 1)
+        return_list = [
+            padded_token_ids, padded_text_type_ids, padded_position_ids,
+            input_mask, padded_label_ids, batch_seq_lens
+        ]
+        return return_list
+    def _reseg_token_label(self, tokens, labels, tokenizer):
+        assert len(tokens) == len(labels)
+        ret_tokens = []
+        ret_labels = []
+        for token, label in zip(tokens, labels):
+            sub_token = tokenizer.tokenize(token)
+            if len(sub_token) == 0:
+                continue
+            ret_tokens.extend(sub_token)
+            ret_labels.append(label)
+            if len(sub_token) < 2:
+                continue
+            sub_label = label
+            if label.startswith("B-"):
+                sub_label = "I-" + label[2:]
+            ret_labels.extend([sub_label] * (len(sub_token) - 1))
+        assert len(ret_tokens) == len(ret_labels)
+        return ret_tokens, ret_labels
+    def _convert_example_to_record(self, example, max_seq_length, tokenizer):
+        tokens = tokenization.convert_to_unicode(example.text_a).split(u"")
+        labels = tokenization.convert_to_unicode(example.label).split(u"")
+        tokens, labels = self._reseg_token_label(tokens, labels, tokenizer)
+        if len(tokens) > max_seq_length - 2:
+            tokens = tokens[0:(max_seq_length - 2)]
+            labels = labels[0:(max_seq_length - 2)]
+        tokens = ["[CLS]"] + tokens + ["[SEP]"]
+        token_ids = tokenizer.convert_tokens_to_ids(tokens)
+        position_ids = list(range(len(token_ids)))
+        text_type_ids = [0] * len(token_ids)
+        no_entity_id = len(self.label_map) - 1
+        labels = [
+            label if label in self.label_map else u"O" for label in labels
+        ]
+        label_ids = [no_entity_id] + [
+            self.label_map[label] for label in labels
+        ] + [no_entity_id]
+        Record = namedtuple(
+            'Record',
+            ['token_ids', 'text_type_ids', 'position_ids', 'label_ids'])
+        record = Record(
+            token_ids=token_ids,
+            text_type_ids=text_type_ids,
+            position_ids=position_ids,
+            label_ids=label_ids)
+        return record
\ No newline at end of file
--- a/PaddleNLP/lexical_analysis/eval.py
+++ b/PaddleNLP/lexical_analysis/eval.py
+# -*- coding: UTF-8 -*-
 #   Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
@@ -11,7 +12,7 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-# -*- coding: UTF-8 -*-
 import argparse
 import os
 import time
@@ -25,6 +26,7 @@ import reader
 import creator
 sys.path.append('../models/')
 from model_check import check_cuda
+from model_check import check_version
 parser = argparse.ArgumentParser(__doc__)
 # 1. model parameters
@@ -109,4 +111,5 @@ def test_process(exe, program, reader, test_ret):
 if __name__ == '__main__':
    args = parser.parse_args()
    check_cuda(args.use_cuda)
+    check_version()
    do_eval(args)
--- a/PaddleNLP/lexical_analysis/inference_model.py
+++ b/PaddleNLP/lexical_analysis/inference_model.py
@@ -12,6 +12,7 @@ import reader
 import utils
 sys.path.append('../models/')
 from model_check import check_cuda
+from model_check import check_version
 def save_inference_model(args):
@@ -101,6 +102,7 @@ if __name__=="__main__":
    utils.load_yaml(parser,'conf/args.yaml')
    args = parser.parse_args()
    check_cuda(args.use_cuda)
+    check_version()
    print("save inference model")
    save_inference_model(args)

--- a/PaddleNLP/lexical_analysis/predict.py
+++ b/PaddleNLP/lexical_analysis/predict.py
+# -*- coding: UTF-8 -*-
 #   Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
@@ -11,7 +12,6 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-# -*- coding: UTF-8 -*-
 import argparse
 import os
 import time
@@ -25,6 +25,7 @@ import reader
 import creator
 sys.path.append('../models/')
 from model_check import check_cuda
+from model_check import check_version
 parser = argparse.ArgumentParser(__doc__)
 # 1. model parameters
@@ -120,4 +121,5 @@ def infer_process(exe, program, reader, fetch_vars, dataset):
 if __name__=="__main__":
    args = parser.parse_args()
    check_cuda(args.use_cuda)
+    check_version()
    do_infer(args)
--- a/PaddleNLP/lexical_analysis/run_ernie_sequence_labeling.py
+++ b/PaddleNLP/lexical_analysis/run_ernie_sequence_labeling.py
@@ -37,6 +37,7 @@ import utils
 sys.path.append("..")
 from models.representation.ernie import ErnieConfig
 from models.model_check import check_cuda
+from models.model_check import check_version
 def evaluate(exe, test_program, test_pyreader, test_ret):
    """
@@ -160,6 +161,7 @@ def do_train(args):
                fetch_list = []
            start_time = time.time()
            outputs = exe.run(program=compiled_prog, feed=data[0], fetch_list=fetch_list)
            end_time = time.time()
            if steps % args.print_steps == 0:
@@ -271,6 +273,7 @@ if __name__ == "__main__":
    utils.load_yaml(parser, './conf/ernie_args.yaml')
    args = parser.parse_args()
    check_cuda(args.use_cuda)
+    check_version()
    utils.print_arguments(args)
    if args.mode == 'train':

--- a/PaddleNLP/lexical_analysis/train.py
+++ b/PaddleNLP/lexical_analysis/train.py
+# -*- coding: UTF-8 -*-
 #   Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
@@ -11,7 +12,6 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-# -*- coding: UTF-8 -*-
 import os
 import sys
@@ -31,6 +31,7 @@ import creator
 from  eval import test_process
 sys.path.append('../models/')
 from model_check import check_cuda
+from model_check import check_version
 # the function to train model
 def do_train(args):
@@ -86,9 +87,9 @@ def do_train(args):
        print("%d %s are used to train model"%(dev_count, device))
        # multi cpu/gpu config
        exec_strategy = fluid.ExecutionStrategy()
-        # exec_strategy.num_threads = dev_count * 6
        build_strategy = fluid.compiler.BuildStrategy()
-        # build_strategy.enable_inplace = True
        compiled_prog = fluid.compiler.CompiledProgram(train_program).with_data_parallel(
            loss_name=train_ret['avg_cost'].name,
@@ -191,6 +192,7 @@ if __name__ == "__main__":
    args = parser.parse_args()
    check_cuda(args.use_cuda)
+    check_version()
    print(args)

--- a/PaddleNLP/lexical_analysis/utils.py
+++ b/PaddleNLP/lexical_analysis/utils.py
@@ -50,7 +50,7 @@ class ArgumentGroup(object):
 def load_yaml(parser, file_name, **kwargs):
    with open(file_name) as f:
-        args = yaml.load(f)
+        args = yaml.load(f, Loader=yaml.FullLoader)
        for title in args:
            group = parser.add_argument_group(title=title, description='')
            for name in args[title]:

--- a/PaddleNLP/models/sequence_labeling/nets.py
+++ b/PaddleNLP/models/sequence_labeling/nets.py
@@ -85,7 +85,7 @@ def lex_net(word, args, vocab_size, num_labels, for_infer = True, target=None):
        """
        Configure the network
        """
-        word_embedding = fluid.layers.embedding(
+        word_embedding = fluid.embedding(
            input=word,
            size=[vocab_size, word_emb_dim],
            dtype='float32',