predict.py 2.7 KB
Newer Older
0
0YuanZhang0 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14
#   Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
0
seq_tag  
0YuanZhang0 已提交
15
SequenceTagging predict structure
0
0YuanZhang0 已提交
16 17 18 19 20
"""

from __future__ import division
from __future__ import print_function

0
fix_bug  
0YuanZhang0 已提交
21
import six
0
0YuanZhang0 已提交
22

23
import paddle
0
0YuanZhang0 已提交
24 25
import paddle.fluid as fluid
from paddle.fluid.layers.utils import flatten
26
from paddle.static import InputSpec as Input
27 28 29 30 31

from sequence_tagging import SeqTagging, LacLoss, ChunkEval
from reader import LacDataset, LacDataLoader
from utils.check import check_gpu, check_version
from utils.configure import PDConfig
0
0YuanZhang0 已提交
32 33 34


def main(args):
35
    place = paddle.set_device(args.device)
0
0YuanZhang0 已提交
36 37
    fluid.enable_dygraph(place) if args.dynamic else None

0
seq_tag  
0YuanZhang0 已提交
38 39
    inputs = [
        Input(
40 41 42
            [None, None], 'int64', name='words'),
        Input(
            [None], 'int64', name='length'),
0
seq_tag  
0YuanZhang0 已提交
43
    ]
0
0YuanZhang0 已提交
44 45

    dataset = LacDataset(args)
0
seq_tag  
0YuanZhang0 已提交
46
    predict_dataset = LacDataLoader(args, place, phase="predict")
0
0YuanZhang0 已提交
47 48 49

    vocab_size = dataset.vocab_size
    num_labels = dataset.num_labels
50 51 52 53
    model = paddle.Model(
        SeqTagging(
            args, vocab_size, num_labels, mode="predict"),
        inputs=inputs)
0
0YuanZhang0 已提交
54 55

    model.mode = "test"
56
    model.prepare()
0
0YuanZhang0 已提交
57 58 59 60

    model.load(args.init_from_checkpoint, skip_mismatch=True)

    f = open(args.output_file, "wb")
0
seq_tag  
0YuanZhang0 已提交
61 62
    for data in predict_dataset.dataloader:
        if len(data) == 1:
0
0YuanZhang0 已提交
63
            input_data = data[0]
0
seq_tag  
0YuanZhang0 已提交
64
        else:
0
0YuanZhang0 已提交
65
            input_data = data
0
seq_tag  
0YuanZhang0 已提交
66 67
        results, length = model.test_batch(inputs=flatten(input_data))
        for i in range(len(results)):
0
0YuanZhang0 已提交
68
            word_len = length[i]
0
seq_tag  
0YuanZhang0 已提交
69
            word_ids = results[i][:word_len]
0
0YuanZhang0 已提交
70
            tags = [dataset.id2label_dict[str(id)] for id in word_ids]
0
fix_bug  
0YuanZhang0 已提交
71 72 73 74 75 76
            if six.PY3:
                tags = [bytes(tag, encoding="utf8") for tag in tags]
                out = b"\002".join(tags) + b"\n"
                f.write(out)
            else:
                f.write("\002".join(tags) + "\n")
0
0YuanZhang0 已提交
77

0
seq_tag  
0YuanZhang0 已提交
78 79

if __name__ == '__main__':
0
0YuanZhang0 已提交
80 81 82 83 84 85
    args = PDConfig(yaml_file="sequence_tagging.yaml")
    args.build()
    args.Print()

    use_gpu = True if args.device == "gpu" else False
    check_gpu(use_gpu)
86 87
    # TODO: add check for 2.0.0-alpha0 if fluid.require_version support
    # check_version()
0
0YuanZhang0 已提交
88
    main(args)