add lac

3eda4ade · xujinanne · 9ce0dcb3 · 3eda4ade · 3eda4ade · 3eda4ade
18 changed file
--- a/dygraph/lexical_analysis/args.yaml
+++ b/dygraph/lexical_analysis/args.yaml
+model:
+  word_emb_dim:
+    val: 128
+    meaning: "The dimension in which a word is embedded."
+  grnn_hidden_dim:
+    val: 128
+    meaning: "The number of hidden nodes in the GRNN layer."
+  bigru_num:
+    val: 2
+    meaning: "The number of bi_gru layers in the network."
+  init_checkpoint:
+    val: ""
+    meaning: "Path to init model"
+  inference_save_dir:
+    val: ""
+    meaning: "Path to save inference model"
+train:
+  random_seed:
+    val: 0
+    meaning: "Random seed for training"
+  print_steps:
+    val: 1
+    meaning: "Print the result per xxx batch of training"
+  save_steps:
+    val: 10
+    meaning: "Save the model once per xxxx batch of training"
+  validation_steps:
+    val: 10
+    meaning: "Do the validation once per xxxx batch of training"
+  batch_size:
+    val: 100
+    meaning: "The number of sequences contained in a mini-batch"
+  epoch:
+    val: 10
+    meaning: "Corpus iteration num"
+  use_cuda:
+    val: True
+    meaning: "If set, use GPU for training."
+  traindata_shuffle_buffer:
+    val: 20000
+    meaning: "The buffer size used in shuffle the training data."
+  base_learning_rate:
+    val: 0.001
+    meaning: "The basic learning rate that affects the entire network."
+  emb_learning_rate:
+    val: 2
+    meaning: "The real learning rate of the embedding layer will be (emb_learning_rate * base_learning_rate)."
+  crf_learning_rate:
+    val: 0.2
+    meaning: "The real learning rate of the embedding layer will be (crf_learning_rate * base_learning_rate)."
+  enable_ce:
+    val: false
+    meaning: 'If set, run the task with continuous evaluation logs.'
+  cpu_num:
+    val: 10
+    meaning: "The number of cpu used to train model, this argument wouldn't be valid if use_cuda=true"
+data:
+  word_dict_path:
+    val: "./conf/word.dic"
+    meaning: "The path of the word dictionary."
+  label_dict_path:
+    val: "./conf/tag.dic"
+    meaning: "The path of the label dictionary."
+  word_rep_dict_path:
+    val: "./conf/q2b.dic"
+    meaning: "The path of the word replacement Dictionary."
+  train_data:
+    val: "./data/train.tsv"
+    meaning: "The folder where the training data is located."
+  test_data:
+    val: "./data/test.tsv"
+    meaning: "The folder where the test data is located."
+  infer_data:
+    val: "./data/infer.tsv"
+    meaning: "The folder where the infer data is located."
+  model_save_dir:
+    val: "./models"
+    meaning: "The model will be saved in this path."
+  max_seq_lens:
+    val: 65
+    meaning: "The max sentence lengths of data"
\ No newline at end of file
--- a/dygraph/lexical_analysis/conf/args.yaml
+++ b/dygraph/lexical_analysis/conf/args.yaml
+model:
+  word_emb_dim:
+    val: 128
+    meaning: "The dimension in which a word is embedded."
+  grnn_hidden_dim:
+    val: 128
+    meaning: "The number of hidden nodes in the GRNN layer."
+  bigru_num:
+    val: 2
+    meaning: "The number of bi_gru layers in the network."
+  init_checkpoint:
+    val: ""
+    meaning: "Path to init model"
+  inference_save_dir:
+    val: ""
+    meaning: "Path to save inference model"
+train:
+  random_seed:
+    val: 0
+    meaning: "Random seed for training"
+  print_steps:
+    val: 1
+    meaning: "Print the result per xxx batch of training"
+  save_steps:
+    val: 10
+    meaning: "Save the model once per xxxx batch of training"
+  validation_steps:
+    val: 10
+    meaning: "Do the validation once per xxxx batch of training"
+  batch_size:
+    val: 300
+    meaning: "The number of sequences contained in a mini-batch"
+  epoch:
+    val: 10
+    meaning: "Corpus iteration num"
+  use_cuda:
+    val: False
+    meaning: "If set, use GPU for training."
+  traindata_shuffle_buffer:
+    val: 20000
+    meaning: "The buffer size used in shuffle the training data."
+  base_learning_rate:
+    val: 0.001
+    meaning: "The basic learning rate that affects the entire network."
+  emb_learning_rate:
+    val: 2
+    meaning: "The real learning rate of the embedding layer will be (emb_learning_rate * base_learning_rate)."
+  crf_learning_rate:
+    val: 0.2
+    meaning: "The real learning rate of the embedding layer will be (crf_learning_rate * base_learning_rate)."
+  enable_ce:
+    val: false
+    meaning: 'If set, run the task with continuous evaluation logs.'
+  cpu_num:
+    val: 10
+    meaning: "The number of cpu used to train model, this argument wouldn't be valid if use_cuda=true"
+data:
+  word_dict_path:
+    val: "./conf/word.dic"
+    meaning: "The path of the word dictionary."
+  label_dict_path:
+    val: "./conf/tag.dic"
+    meaning: "The path of the label dictionary."
+  word_rep_dict_path:
+    val: "./conf/q2b.dic"
+    meaning: "The path of the word replacement Dictionary."
+  train_data:
+    val: "./data/train.tsv"
+    meaning: "The folder where the training data is located."
+  test_data:
+    val: "./data/test.tsv"
+    meaning: "The folder where the test data is located."
+  infer_data:
+    val: "./data/infer.tsv"
+    meaning: "The folder where the infer data is located."
+  model_save_dir:
+    val: "./models"
+    meaning: "The model will be saved in this path."
--- a/dygraph/lexical_analysis/conf/customization.dic
+++ b/dygraph/lexical_analysis/conf/customization.dic
--- a/dygraph/lexical_analysis/conf/customization.dic.example
+++ b/dygraph/lexical_analysis/conf/customization.dic.example
+[D:MONTH]
+月
+月份
--- a/dygraph/lexical_analysis/conf/ernie_args.yaml
+++ b/dygraph/lexical_analysis/conf/ernie_args.yaml
+model:
+  ernie_config_path:
+    val: "../LARK/ERNIE/config/ernie_config.json"
+    meaning: "Path to the json file for ernie model config."
+  init_checkpoint:
+    val: ""
+    meaning: "Path to init model"
+  mode:
+    val: "train"
+    meaning: "Setting to train or eval or infer"
+  init_pretraining_params:
+    val: "pretrained/params/"
+    meaning: "Init pre-training params which preforms fine-tuning from. If the arg 'init_checkpoint' has been set, this argument wouldn't be valid."
+train:
+  random_seed:
+    val: 0
+    meaning: "Random seed for training"
+  batch_size:
+    val: 10
+    meaning: "The number of sequences contained in a mini-batch"
+  epoch:
+    val: 10
+    meaning: "Corpus iteration num"
+  use_cuda:
+    val: True
+    meaning: "If set, use GPU for training."
+  base_learning_rate:
+    val: 0.0002
+    meaning: "The basic learning rate that affects the entire network."
+  init_bound:
+    val: 0.1
+    meaning: "init bound for initialization."
+  crf_learning_rate:
+    val: 0.2
+    meaning: "The real learning rate of the embedding layer will be (crf_learning_rate * base_learning_rate)."
+  cpu_num:
+    val: 10
+    meaning: "The number of cpu used to train model, it works when use_cuda=False"
+  print_steps:
+    val: 1
+    meaning: "Print the result per xxx batch of training"
+  save_steps:
+    val: 10
+    meaning: "Save the model once per xxxx batch of training"
+  validation_steps:
+    val: 5
+    meaning: "Do the validation once per xxxx batch of training"
+data:
+  vocab_path:
+    val: "../LARK/ERNIE/config/vocab.txt"
+    meaning: "The path of the vocabulary."
+  label_map_config:
+    val: "./conf/label_map.json"
+    meaning: "The path of the label dictionary."
+  num_labels:
+    val: 57
+    meaning: "label number"
+  max_seq_len:
+    val: 128
+    meaning: "Number of words of the longest seqence."
+  do_lower_case:
+    val: True
+    meaning: "Whether to lower case the input text. Should be True for uncased models and False for cased models."
+  train_data:
+    val: "./data/train.tsv"
+    meaning: "The folder where the training data is located."
+  test_data:
+    val: "./data/test.tsv"
+    meaning: "The folder where the test data is located."
+  infer_data:
+    val: "./data/test.tsv"
+    meaning: "The folder where the infer data is located."
+  model_save_dir:
+    val: "./ernie_models"
+    meaning: "The model will be saved in this path."
--- a/dygraph/lexical_analysis/conf/label_map.json
+++ b/dygraph/lexical_analysis/conf/label_map.json
+{"d-B": 8, "c-I": 7, "PER-I": 49, "nr-B": 16, "u-B": 36, "c-B": 6, "nr-I": 17, "an-I": 5, "ns-B": 18, "vn-I": 43, "w-B": 44, "an-B": 4, "PER-B": 48, "vn-B": 42, "ns-I": 19, "a-I": 1, "r-B": 30, "xc-B": 46, "LOC-B": 50, "ad-I": 3, "nz-B": 24, "u-I": 37, "a-B": 0, "ad-B": 2, "vd-I": 41, "nw-B": 22, "m-I": 13, "d-I": 9, "n-B": 14, "nz-I": 25, "vd-B": 40, "nw-I": 23, "n-I": 15, "nt-B": 20, "ORG-I": 53, "nt-I": 21, "ORG-B": 52, "LOC-I": 51, "t-B": 34, "TIME-I": 55, "O": 56, "s-I": 33, "f-I": 11, "TIME-B": 54, "t-I": 35, "f-B": 10, "s-B": 32, "r-I": 31, "q-B": 28, "v-I": 39, "v-B": 38, "w-I": 45, "q-I": 29, "p-B": 26, "xc-I": 47, "m-B": 12, "p-I": 27}
\ No newline at end of file
--- a/dygraph/lexical_analysis/conf/q2b.dic
+++ b/dygraph/lexical_analysis/conf/q2b.dic
+、	,
+。	.
+—	-
+～	~
+‖	|
+…	.
+‘	'
+’	'
+“	"
+”	"
+〔	(
+〕	)
+〈	<
+〉	>
+「	'
+」	'
+『	"
+』	"
+〖	[
+〗	]
+【	[
+】	]
+∶	:
+＄	$
+！	!
+＂	"
+＃	#
+％	%
+＆	&
+＇	'
+（	(
+）	)
+＊	*
+＋	+
+，	,
+－	-
+．	.
+／	/
+０	0
+１	1
+２	2
+３	3
+４	4
+５	5
+６	6
+７	7
+８	8
+９	9
+：	:
+；	;
+＜	<
+＝	=
+＞	>
+？	?
+＠	@
+Ａ	a
+Ｂ	b
+Ｃ	c
+Ｄ	d
+Ｅ	e
+Ｆ	f
+Ｇ	g
+Ｈ	h
+Ｉ	i
+Ｊ	j
+Ｋ	k
+Ｌ	l
+Ｍ	m
+Ｎ	n
+Ｏ	o
+Ｐ	p
+Ｑ	q
+Ｒ	r
+Ｓ	s
+Ｔ	t
+Ｕ	u
+Ｖ	v
+Ｗ	w
+Ｘ	x
+Ｙ	y
+Ｚ	z
+［	[
+＼	\
+］	]
+＾	^
+＿	_
+｀	`
+ａ	a
+ｂ	b
+ｃ	c
+ｄ	d
+ｅ	e
+ｆ	f
+ｇ	g
+ｈ	h
+ｉ	i
+ｊ	j
+ｋ	k
+ｌ	l
+ｍ	m
+ｎ	n
+ｏ	o
+ｐ	p
+ｑ	q
+ｒ	r
+ｓ	s
+ｔ	t
+ｕ	u
+ｖ	v
+ｗ	w
+ｘ	x
+ｙ	y
+ｚ	z
+｛	{
+｜	|
+｝	}
+￣	~
+〝	"
+〞	"
+﹐	,
+﹑	,
+﹒	.
+﹔	;
+﹕	:
+﹖	?
+﹗	!
+﹙	(
+﹚	)
+﹛	{
+﹜	{
+﹝	[
+﹞	]
+﹟	#
+﹠	&
+﹡	*
+﹢	+
+﹣	-
+﹤	<
+﹥	>
+﹦	=
+﹨	\
+﹩	$
+﹪	%
+﹫	@
+ 	,
+A	a
+B	b
+C	c
+D	d
+E	e
+F	f
+G	g
+H	h
+I	i
+J	j
+K	k
+L	l
+M	m
+N	n
+O	o
+P	p
+Q	q
+R	r
+S	s
+T	t
+U	u
+V	v
+W	w
+X	x
+Y	y
+Z	z
--- a/dygraph/lexical_analysis/conf/strong_punc.dic
+++ b/dygraph/lexical_analysis/conf/strong_punc.dic
+!
+。
+！
+;
+；
--- a/dygraph/lexical_analysis/conf/tag.dic
+++ b/dygraph/lexical_analysis/conf/tag.dic
+0	a-B
+1	a-I
+2	ad-B
+3	ad-I
+4	an-B
+5	an-I
+6	c-B
+7	c-I
+8	d-B
+9	d-I
+10	f-B
+11	f-I
+12	m-B
+13	m-I
+14	n-B
+15	n-I
+16	nr-B
+17	nr-I
+18	ns-B
+19	ns-I
+20	nt-B
+21	nt-I
+22	nw-B
+23	nw-I
+24	nz-B
+25	nz-I
+26	p-B
+27	p-I
+28	q-B
+29	q-I
+30	r-B
+31	r-I
+32	s-B
+33	s-I
+34	t-B
+35	t-I
+36	u-B
+37	u-I
+38	v-B
+39	v-I
+40	vd-B
+41	vd-I
+42	vn-B
+43	vn-I
+44	w-B
+45	w-I
+46	xc-B
+47	xc-I
+48	PER-B
+49	PER-I
+50	LOC-B
+51	LOC-I
+52	ORG-B
+53	ORG-I
+54	TIME-B
+55	TIME-I
+56	O
--- a/dygraph/lexical_analysis/conf/word.dic
+++ b/dygraph/lexical_analysis/conf/word.dic
--- a/dygraph/lexical_analysis/data/infer.tsv
+++ b/dygraph/lexical_analysis/data/infer.tsv
--- a/dygraph/lexical_analysis/data/test.tsv
+++ b/dygraph/lexical_analysis/data/test.tsv
--- a/dygraph/lexical_analysis/data/train.tsv
+++ b/dygraph/lexical_analysis/data/train.tsv
--- a/dygraph/lexical_analysis/main.py
+++ b/dygraph/lexical_analysis/main.py
+# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from __future__ import print_function
+import os
+import time
+import argparse
+import numpy as np
+import paddle.fluid as fluid
+from paddle.fluid.dygraph.base import to_variable
+import nets
+import reader
+import utils
+def train(args, place):
+    with fluid.dygraph.guard(place):
+        dataset = reader.Dataset(args)
+        num_train_examples = dataset.get_num_examples(args.train_data)
+        max_train_steps = args.epoch * num_train_examples // args.batch_size
+        #define reader
+        train_processor = reader.LACProcessor(args, args.train_data,
+                                              args.word_dict_path)
+        test_processor = dataset.file_reader(args.test_data, mode="test")
+        #define network
+        model = nets.LAC("lac_net", args, dataset.vocab_size, args.batch_size,
+                         args.max_seq_lens)
+        sgd_optimizer = fluid.optimizer.Adagrad(
+            learning_rate=args.base_learning_rate)
+        steps = 0
+        total_cost, total_acc, total_num_seqs = [], [], []
+        for eop in range(args.epoch):
+            time_begin = time.time()
+            for data in train_processor.data_generator("train")():
+                steps += 1
+                doc = to_variable(
+                    np.array([
+                        np.pad(x[0][0:args.max_seq_lens], (
+                            0, args.max_seq_lens - len(x[0][
+                                0:args.max_seq_lens])),
+                               'constant',
+                               constant_values=(dataset.vocab_size))
+                        for x in data
+                    ]).astype('int64').reshape(-1, 1))
+                seq_lens = to_variable(
+                    np.array([len(x[0]) for x in data]).astype('int64'))
+                targets = to_variable(
+                    np.array([
+                        np.pad(x[1][0:args.max_seq_lens], (
+                            0, args.max_seq_lens - len(x[1][
+                                0:args.max_seq_lens])),
+                               'constant',
+                               constant_values=(dataset.num_labels))
+                        for x in data
+                    ]).astype('int64'))
+                model.train()
+                avg_cost, prediction, acc = model(doc, targets, seq_lens)
+                avg_cost.backward()
+                np_mask = (doc.numpy() != dataset.vocab_size).astype('int32')
+                word_num = np.sum(np_mask)
+                sgd_optimizer.minimize(avg_cost)
+                model.clear_gradients()
+                total_cost.append(avg_cost.numpy() * word_num)
+                total_acc.append(acc.numpy() * word_num)
+                total_num_seqs.append(word_num)
+                if steps % args.skip_steps == 0:
+                    time_end = time.time()
+                    used_time = time_end - time_begin
+                    print("step: %d, ave loss: %f, "
+                          "ave acc: %f, speed: %f steps/s" %
+                          (steps, np.sum(total_cost) / np.sum(total_num_seqs),
+                           np.sum(total_acc) / np.sum(total_num_seqs),
+                           args.skip_steps / used_time))
+                    total_cost, total_acc, total_num_seqs = [], [], []
+                    time_begin = time.time()
+                if steps % args.validation_steps == 0:
+                    total_eval_cost, total_eval_acc, total_eval_num_seqs = [], [], []
+                    model.eval()
+                    eval_steps = 0
+                    for data in train_processor.data_generator("train")():
+                        steps += 1
+                        eval_doc = to_variable(
+                            np.array([
+                                np.pad(x[0][0:args.max_seq_lens], (
+                                    0, args.max_seq_lens - len(x[0][
+                                        0:args.max_seq_lens])),
+                                       'constant',
+                                       constant_values=(dataset.vocab_size))
+                                for x in data
+                            ]).astype('int64').reshape(-1, 1))
+                        eval_seq_lens = to_variable(
+                            np.array([len(x[0]) for x in data]).astype('int64')
+                            .reshape(args.batch_size, 1))
+                        eval_targets = to_variable(
+                            np.array([
+                                np.pad(x[1][0:args.max_seq_lens], (
+                                    0, args.max_seq_lens - len(x[1][
+                                        0:args.max_seq_lens])),
+                                       'constant',
+                                       constant_values=(dataset.num_labels))
+                                for x in data
+                            ]).astype('int64'))
+                        eval_avg_cost, eval_prediction, eval_acc = model(
+                            eval_doc, eval_targets, eval_seq_lens)
+                        eval_np_mask = (
+                            eval_np_doc != dataset.vocab_size).astype('int32')
+                        eval_word_num = np.sum(eval_np_mask)
+                        total_eval_cost.append(eval_avg_cost.numpy() *
+                                               eval_word_num)
+                        total_eval_acc.append(eval_acc.numpy() * eval_word_num)
+                        total_eval_num_seqs.append(eval_word_num)
+                        eval_steps += 1
+                    time_end = time.time()
+                    used_time = time_end - time_begin
+                    print("Final validation result: step: %d, ave loss: %f, "
+                          "ave acc: %f, speed: %f steps/s" %
+                          (steps, np.sum(total_eval_cost) /
+                           np.sum(total_eval_num_seqs), np.sum(total_eval_acc) /
+                           np.sum(total_eval_num_seqs), eval_steps / used_time))
+                    time_begin = time.time()
+                    if args.ce:
+                        print("kpis\ttrain_loss\t%0.3f" %
+                              (np.sum(total_eval_cost) /
+                               np.sum(total_eval_num_seqs)))
+                        print("kpis\ttrain_acc\t%0.3f" %
+                              (np.sum(total_eval_acc) /
+                               np.sum(total_eval_num_seqs)))
+                    if steps % args.save_steps == 0:
+                        save_path = "save_dir_" + str(steps)
+                        print('save model to: ' + save_path)
+                        fluid.dygraph.save_dygraph(model.state_dict(),
+                                                   save_path)
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(__doc__)
+    utils.load_yaml(parser, 'args.yaml')
+    args = parser.parse_args()
+    if args.use_cuda:
+        place = fluid.CUDAPlace(int(os.getenv('FLAGS_selected_gpus', '0')))
+        dev_count = fluid.core.get_cuda_device_count()
+    else:
+        place = fluid.CPUPlace()
+    dev_count = 1
+    print(args)
+    train(args, place)
--- a/dygraph/lexical_analysis/nets.py
+++ b/dygraph/lexical_analysis/nets.py
+# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import paddle.fluid as fluid
+from paddle.fluid.dygraph.nn import Conv2D, Pool2D, FC, Embedding
+from paddle.fluid.dygraph import GRUUnit
+from paddle.fluid.dygraph.base import to_variable
+import numpy as np
+class DynamicGRU(fluid.dygraph.Layer):
+    def __init__(self,
+                 scope_name,
+                 size,
+                 param_attr=None,
+                 bias_attr=None,
+                 is_reverse=False,
+                 gate_activation='sigmoid',
+                 candidate_activation='tanh',
+                 h_0=None,
+                 origin_mode=False,
+                 init_size=None):
+        super(DynamicGRU, self).__init__(scope_name)
+        self.gru_unit = GRUUnit(
+            self.full_name(),
+            size * 3,
+            param_attr=param_attr,
+            bias_attr=bias_attr,
+            activation=candidate_activation,
+            gate_activation=gate_activation,
+            origin_mode=origin_mode)
+        self.size = size
+        self.h_0 = h_0
+        self.is_reverse = is_reverse
+    def forward(self, inputs):
+        hidden = self.h_0
+        res = []
+        for i in range(inputs.shape[1]):
+            if self.is_reverse:
+                i = inputs.shape[1] - 1 - i
+            input_ = inputs[:, i:i + 1, :]
+            input_ = fluid.layers.reshape(
+                input_, [-1, input_.shape[2]], inplace=False)
+            hidden, reset, gate = self.gru_unit(input_, hidden)
+            hidden_ = fluid.layers.reshape(
+                hidden, [-1, 1, hidden.shape[1]], inplace=False)
+            res.append(hidden_)
+        if self.is_reverse:
+            res = res[::-1]
+        res = fluid.layers.concat(res, axis=1)
+        return res
+class LAC(fluid.dygraph.Layer):
+    def __init__(self,
+                 name_scope,
+                 args,
+                 vocab_size,
+                 num_labels,
+                 for_infer=True,
+                 target=None):
+        super(LAC, self).__init__(name_scope)
+        self.word_emb_dim = args.word_emb_dim
+        self.dict_dim = vocab_size
+        self.grnn_hidden_dim = args.grnn_hidden_dim
+        self.emb_lr = args.emb_learning_rate if 'emb_learning_rate' in dir(
+            args) else 1.0
+        self.crf_lr = args.emb_learning_rate if 'crf_learning_rate' in dir(
+            args) else 1.0
+        self.bigru_num = args.bigru_num
+        self.init_bound = 0.1
+        self.IS_SPARSE = True
+        self.max_seq_lens = args.max_seq_lens
+        self.grnn_hidden_dim = args.grnn_hidden_dim
+        self._word_embedding = Embedding(
+            self.full_name(),
+            size=[vocab_size, self.word_emb_dim],
+            dtype='float32',
+            is_sparse=self.IS_SPARSE,
+            param_attr=fluid.ParamAttr(
+                learning_rate=self.emb_lr,
+                initializer=fluid.initializer.Uniform(
+                    low=-self.init_bound, high=self.init_bound)))
+        self._emission_fc = FC(
+            self.full_name(),
+            size=num_labels,
+            param_attr=fluid.ParamAttr(
+                initializer=fluid.initializer.Uniform(
+                    low=-self.init_bound, high=self.init_bound),
+                regularizer=fluid.regularizer.L2DecayRegularizer(
+                    regularization_coeff=1e-4)))
+    def _bigru_layer(input_feature, grnn_hidden_dim):
+        """
+        define the bidirectional gru layer
+        """
+        pre_gru = FC(input=input_feature,
+                     size=grnn_hidden_dim * 3,
+                     param_attr=fluid.ParamAttr(
+                         initializer=fluid.initializer.Uniform(
+                             low=-init_bound, high=init_bound),
+                         regularizer=fluid.regularizer.L2DecayRegularizer(
+                             regularization_coeff=1e-4)))
+        gru = DynamicGRU(
+            input=pre_gru,
+            size=grnn_hidden_dim,
+            param_attr=fluid.ParamAttr(
+                initializer=fluid.initializer.Uniform(
+                    low=-init_bound, high=init_bound),
+                regularizer=fluid.regularizer.L2DecayRegularizer(
+                    regularization_coeff=1e-4)))
+        pre_gru_r = FC(input=input_feature,
+                       size=grnn_hidden_dim * 3,
+                       param_attr=fluid.ParamAttr(
+                           initializer=fluid.initializer.Uniform(
+                               low=-init_bound, high=init_bound),
+                           regularizer=fluid.regularizer.L2DecayRegularizer(
+                               regularization_coeff=1e-4)))
+        gru_r = DynamicGRU(
+            input=pre_gru_r,
+            size=grnn_hidden_dim,
+            is_reverse=True,
+            param_attr=fluid.ParamAttr(
+                initializer=fluid.initializer.Uniform(
+                    low=-init_bound, high=init_bound),
+                regularizer=fluid.regularizer.L2DecayRegularizer(
+                    regularization_coeff=1e-4)))
+        bi_merge = fluid.layers.concat(input=[gru, gru_r], axis=1)
+        return bi_merge
+    def forward(self, inputs, targets, seq_lens):
+        emb = self._word_embedding(inputs)
+        o_np_mask = (inputs.numpy() != self.dict_dim).astype('float32')
+        mask_emb = fluid.layers.expand(
+            to_variable(o_np_mask), [1, self.word_emb_dim])
+        emb = emb * mask_emb
+        emb = fluid.layers.reshape(
+            emb, shape=[-1, 1, self.max_seq_lens, self.hid_dim])
+        input_feature = emb
+        for i in range(self.bigru_num):
+            bigru_output = _bigru_layer(input_feature, self._grnn_hidden_dim)
+            input_feature = bigru_output
+        emission = self_emission_fc(input_feature)
+        if targets is not None:
+            crf_cost = fluid.layers.linear_chain_crf(
+                input=emission,
+                label=target,
+                param_attr=fluid.ParamAttr(
+                    name='crfw', learning_rate=crf_lr),
+                length=seq_lens)
+            avg_cost = fluid.layers.mean(x=crf_cost)
+            crf_decode = fluid.layers.crf_decoding(
+                input=emission,
+                param_attr=fluid.ParamAttr(name='crfw'),
+                length=seq_lens)
+            return avg_cost, crf_decode
+        else:
+            size = emission.shape[1]
+            fluid.layers.create_parameter(
+                shape=[size + 2, size], dtype=emission.dtype, name='crfw')
+            crf_decode = fluid.layers.crf_decoding(
+                input=emission,
+                param_attr=fluid.ParamAttr(name='crfw'),
+                length=seq_lens)
+            return crf_decode
--- a/dygraph/lexical_analysis/reader.py
+++ b/dygraph/lexical_analysis/reader.py
+#   Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""
+The file_reader converts raw corpus to input.
+"""
+import os
+import argparse
+import __future__
+import io
+import glob
+import paddle
+def load_kv_dict(dict_path,
+                 reverse=False,
+                 delimiter="\t",
+                 key_func=None,
+                 value_func=None):
+    """
+    Load key-value dict from file
+    """
+    result_dict = {}
+    for line in io.open(dict_path, "r", encoding='utf8'):
+        terms = line.strip("\n").split(delimiter)
+        if len(terms) != 2:
+            continue
+        if reverse:
+            value, key = terms
+        else:
+            key, value = terms
+        if key in result_dict:
+            raise KeyError("key duplicated with [%s]" % (key))
+        if key_func:
+            key = key_func(key)
+        if value_func:
+            value = value_func(value)
+        result_dict[key] = value
+    return result_dict
+class Dataset(object):
+    """data reader"""
+    def __init__(self, args, mode="train"):
+        # read dict
+        self.word2id_dict = load_kv_dict(
+            args.word_dict_path, reverse=True, value_func=int)
+        self.id2word_dict = load_kv_dict(args.word_dict_path)
+        self.label2id_dict = load_kv_dict(
+            args.label_dict_path, reverse=True, value_func=int)
+        self.id2label_dict = load_kv_dict(args.label_dict_path)
+        self.word_replace_dict = load_kv_dict(args.word_rep_dict_path)
+    @property
+    def vocab_size(self):
+        """vocabuary size"""
+        return max(self.word2id_dict.values()) + 1
+    @property
+    def num_labels(self):
+        """num_labels"""
+        return max(self.label2id_dict.values()) + 1
+    def get_num_examples(self, filename):
+        """num of line of file"""
+        return sum(1 for line in io.open(filename, "r", encoding='utf8'))
+    def word_to_ids(self, words):
+        """convert word to word index"""
+        word_ids = []
+        for word in words:
+            word = self.word_replace_dict.get(word, word)
+            if word not in self.word2id_dict:
+                word = "OOV"
+            word_id = self.word2id_dict[word]
+            word_ids.append(word_id)
+        return word_ids
+    def label_to_ids(self, labels):
+        """convert label to label index"""
+        label_ids = []
+        for label in labels:
+            if label not in self.label2id_dict:
+                label = "O"
+            label_id = self.label2id_dict[label]
+            label_ids.append(label_id)
+        return label_ids
+    def file_reader(self, filename, max_seq_len=64, mode="train"):
+        """
+        yield (word_idx, target_idx) one by one from file,
+            or yield (word_idx, ) in `infer` mode
+        """
+        def wrapper():
+            fread = io.open(filename, "r", encoding="utf-8")
+            if mode == "infer":
+                for line in fread:
+                    words = line.strip()
+                    word_ids = self.word_to_ids(words)
+                    yield (word_ids[0:max_seq_len], )
+            else:
+                headline = next(fread)
+                headline = headline.strip().split('\t')
+                assert len(headline) == 2 and headline[
+                    0] == "text_a" and headline[1] == "label"
+                for line in fread:
+                    words, labels = line.strip("\n").split("\t")
+                    if len(words) < 1:
+                        continue
+                    word_ids = self.word_to_ids(words.split("\002"))
+                    label_ids = self.label_to_ids(labels.split("\002"))
+                    assert len(word_ids) == len(label_ids)
+                    yield word_ids[0:max_seq_len], label_ids[0:max_seq_len]
+            fread.close()
+        return wrapper
+class LACProcessor(object):
+    def __init__(self, args, data_dir, vocab_path, random_seed=None):
+        self.num_examples = {"train": -1, "dev": -1, "infer": -1}
+        self.args = args
+        self.dataset = Dataset(args)
+        self.data_dir = data_dir
+    def get_train_examples(self, data_dir):
+        return self.dataset.file_reader(self.data_dir, 65, mode="train")
+    def get_dev_examples(self, data_dir):
+        return self.dataset.file_reader(self.data_dir, 65, mode="dev")
+    def get_test_examples(self, data_dir):
+        return self.dataset.file_reader(self.data_dir, 65, mode="test")
+    def data_generator(self, mode='train', epoch=1, shuffle=True):
+        if mode == "train":
+            return paddle.batch(
+                self.get_train_examples(self.data_dir), 300, drop_last=True)
+        elif mode == "dev":
+            return paddle.batch(
+                self.get_dev_examples(self.data_dir), 300, drop_last=True)
+        elif mode == "infer":
+            return paddle.batch(
+                self.get_test_examples(self.data_dir), 300, drop_last=True)
+        else:
+            raise ValueError(
+                "Unknown phase, which should be in ['train', 'dev', 'infer'].")
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(__doc__)
+    parser.add_argument(
+        "--word_dict_path",
+        type=str,
+        default="./conf/word.dic",
+        help="word dict")
+    parser.add_argument(
+        "--label_dict_path",
+        type=str,
+        default="./conf/tag.dic",
+        help="label dict")
+    parser.add_argument(
+        "--word_rep_dict_path",
+        type=str,
+        default="./conf/q2b.dic",
+        help="word replace dict")
+    args = parser.parse_args()
+    dataset = Dataset(args)
+    processor = LACProcessor(args, "data/train.tsv", args.word_dict_path)
+    for data in processor.data_generator("train")():
+        for xx in data:
+            print(xx)
--- a/dygraph/lexical_analysis/test_nets.py
+++ b/dygraph/lexical_analysis/test_nets.py
--- a/dygraph/lexical_analysis/utils.py
+++ b/dygraph/lexical_analysis/utils.py
+#   Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""
+util tools
+"""
+from __future__ import print_function
+import os
+import sys
+import numpy as np
+import paddle.fluid as fluid
+import yaml
+import io
+def str2bool(v):
+    """
+    argparse does not support True or False in python
+    """
+    return v.lower() in ("true", "t", "1")
+class ArgumentGroup(object):
+    """
+    Put arguments to one group
+    """
+    def __init__(self, parser, title, des):
+        """none"""
+        self._group = parser.add_argument_group(title=title, description=des)
+    def add_arg(self, name, type, default, help, **kwargs):
+        """ Add argument """
+        type = str2bool if type == bool else type
+        self._group.add_argument(
+            "--" + name,
+            default=default,
+            type=type,
+            help=help + ' Default: %(default)s.',
+            **kwargs)
+def load_yaml(parser, file_name, **kwargs):
+    with io.open(file_name, 'r', encoding='utf8') as f:
+        args = yaml.load(f)
+        for title in args:
+            group = parser.add_argument_group(title=title, description='')
+            for name in args[title]:
+                _type = type(args[title][name]['val'])
+                _type = str2bool if _type == bool else _type
+                group.add_argument(
+                    "--" + name,
+                    default=args[title][name]['val'],
+                    type=_type,
+                    help=args[title][name]['meaning'] +
+                    ' Default: %(default)s.',
+                    **kwargs)
+def print_arguments(args):
+    """none"""
+    print('-----------  Configuration Arguments -----------')
+    for arg, value in sorted(vars(args).items()):
+        print('%s: %s' % (arg, value))
+    print('------------------------------------------------')