sequence_label.py

#   Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from __future__ import unicode_literals
from __future__ import absolute_import


import os
import time
import argparse
import numpy as np
import multiprocessing

import paddle
import logging
import paddle.fluid as fluid

from six.moves import xrange

from model.ernie import ErnieModel

log = logging.getLogger(__name__)

def create_model(args, pyreader_name, ernie_config, is_prediction=False):
    src_ids = fluid.layers.data(name='1', shape=[-1, args.max_seq_len, 1], dtype='int64')
    sent_ids = fluid.layers.data(name='2', shape=[-1, args.max_seq_len, 1], dtype='int64')
    pos_ids = fluid.layers.data(name='3', shape=[-1, args.max_seq_len, 1], dtype='int64')
    task_ids = fluid.layers.data(name='4', shape=[-1, args.max_seq_len, 1], dtype='int64')
    input_mask = fluid.layers.data(name='5', shape=[-1, args.max_seq_len, 1], dtype='float32')
    labels = fluid.layers.data(name='7', shape=[-1, args.max_seq_len, 1], dtype='int64')
    seq_lens = fluid.layers.data(name='8', shape=[-1], dtype='int64')

    pyreader = fluid.io.DataLoader.from_generator(feed_list=[src_ids, sent_ids, pos_ids, task_ids, input_mask, labels, seq_lens], 
            capacity=70,
            iterable=False)

    ernie = ErnieModel(
        src_ids=src_ids,
        position_ids=pos_ids,
        sentence_ids=sent_ids,
        task_ids=task_ids,
        input_mask=input_mask,
        config=ernie_config,
        use_fp16=args.use_fp16)

    enc_out = ernie.get_sequence_output()
    enc_out = fluid.layers.dropout(
        x=enc_out, dropout_prob=0.1, dropout_implementation="upscale_in_train")
    logits = fluid.layers.fc(
        input=enc_out,
        size=args.num_labels,
        num_flatten_dims=2,
        param_attr=fluid.ParamAttr(
            name="cls_seq_label_out_w",
            initializer=fluid.initializer.TruncatedNormal(scale=0.02)),
        bias_attr=fluid.ParamAttr(
            name="cls_seq_label_out_b",
            initializer=fluid.initializer.Constant(0.)))
    infers = fluid.layers.argmax(logits, axis=2)

    ret_infers = fluid.layers.reshape(x=infers, shape=[-1, 1])
    lod_labels = fluid.layers.sequence_unpad(labels, seq_lens)
    lod_infers = fluid.layers.sequence_unpad(infers, seq_lens)

    (_, _, _, num_infer, num_label, num_correct) = fluid.layers.chunk_eval(
         input=lod_infers,
         label=lod_labels,
         chunk_scheme=args.chunk_scheme,
         num_chunk_types=((args.num_labels-1)//(len(args.chunk_scheme)-1)))

    labels = fluid.layers.flatten(labels, axis=2)
    ce_loss, probs = fluid.layers.softmax_with_cross_entropy(
        logits=fluid.layers.flatten(
            logits, axis=2),
        label=labels,
        return_softmax=True)
    input_mask = fluid.layers.flatten(input_mask, axis=2)
    ce_loss = ce_loss * input_mask
    loss = fluid.layers.mean(x=ce_loss)

    graph_vars = {
        "inputs": src_ids,
        "loss": loss,
        "probs": probs,
        "seqlen": seq_lens,
        "num_infer": num_infer,
        "num_label": num_label,
        "num_correct": num_correct,
    }

    for k, v in graph_vars.items():
        v.persistable = True

    return pyreader, graph_vars


def calculate_f1(num_label, num_infer, num_correct):

    num_infer = np.sum(num_infer)
    num_label = np.sum(num_label)
    num_correct = np.sum(num_correct)
    
    if num_infer == 0:
        precision = 0.0
    else:
        precision = num_correct * 1.0 / num_infer

    if num_label == 0:
        recall = 0.0
    else:
        recall = num_correct * 1.0 / num_label

    if num_correct == 0:
        f1 = 0.0
    else:
        f1 = 2 * precision * recall / (precision + recall)
    return precision, recall, f1


def evaluate(exe,
             program,
             pyreader,
             graph_vars,
             tag_num,
             dev_count=1):
    fetch_list = [
        graph_vars["num_infer"].name, graph_vars["num_label"].name,
        graph_vars["num_correct"].name
    ]

    total_label, total_infer, total_correct = 0.0, 0.0, 0.0
    time_begin = time.time()
    pyreader.start()
    while True:
        try:
            np_num_infer, np_num_label, np_num_correct = exe.run(program=program,
                                                    fetch_list=fetch_list)
            total_infer += np.sum(np_num_infer)
            total_label += np.sum(np_num_label)
            total_correct += np.sum(np_num_correct)

        except fluid.core.EOFException:
            pyreader.reset()
            break

    precision, recall, f1 = calculate_f1(total_label, total_infer,
                                         total_correct)
    time_end = time.time()
    return  \
        "[evaluation] f1: %f, precision: %f, recall: %f, elapsed time: %f s" \
        % (f1, precision, recall, time_end - time_begin)


def chunk_predict(np_inputs, np_probs, np_lens, dev_count=1):
    inputs = np_inputs.reshape([-1]).astype(np.int32)
    probs = np_probs.reshape([-1, np_probs.shape[-1]])

    all_lens = np_lens.reshape([dev_count, -1]).astype(np.int32).tolist()

    base_index = 0
    out = []
    for dev_index in xrange(dev_count):
        lens = all_lens[dev_index]
        max_len = 0
        for l in lens:
            max_len = max(max_len, l)

        for i in xrange(len(lens)):
            seq_st = base_index + i * max_len + 1
            seq_en = seq_st + (lens[i] - 2)
            prob = probs[seq_st:seq_en, :]
            infers = np.argmax(prob, -1)
            out.append((
                    inputs[seq_st:seq_en].tolist(), 
                    infers.tolist(),
                    prob.tolist()))
        base_index += max_len * len(lens)
    return out


def predict(exe,
            test_program,
            test_pyreader,
            graph_vars,
            dev_count=1):
    fetch_list = [
        graph_vars["inputs"].name,
        graph_vars["probs"].name,
        graph_vars["seqlen"].name,
    ]

    test_pyreader.start()
    res = []
    while True:
        try:
            inputs, probs, np_lens = exe.run(program=test_program,
                                        fetch_list=fetch_list)
            r = chunk_predict(inputs, probs, np_lens, dev_count)
            res += r
        except fluid.core.EOFException:
            test_pyreader.reset()
            break
    return res