analysis.py

# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import os
import sys
import random
import numpy as np
import argparse
import time

import paddle
from paddleslim.common import load_config as load_slim_config
from paddleslim.quant.analysis import Analysis
from ppfleetx.data import build_dataloader
from ppfleetx.distributed.apis import env
from utils import parse_config


def argsparser():
    parser = argparse.ArgumentParser(description=__doc__)
    parser.add_argument(
        '--config_path',
        type=str,
        default=None,
        help="path of compression strategy config.",
        required=True)
    parser.add_argument(
        '--save_dir',
        type=str,
        default='analysis_results',
        help="directory to save compressed model.")
    parser.add_argument(
        '--devices',
        type=str,
        default='gpu',
        help="which device used to compress.")
    return parser


def eval_reader_wrapper(reader):
    def gen():
        for data in reader:
            tokens, loss_mask, attention_mask, position_ids, labels, info = data
            in_dict = {}
            in_dict['tokens'] = tokens
            in_dict['ids'] = position_ids
            yield in_dict, labels, loss_mask, info

    return gen


def eval_function(exe, program, feed_names, fetch_list):
    tic_eval = time.time()
    score_name = "loss" if not global_config['cloze_eval'] else "number correct"
    first_step = True
    eval_losses = []
    total_score = 0
    for eval_step, (data, labels, loss_mask, info) in enumerate(eval_loader()):
        preds = exe.run(
            program=program,
            feed=data,
            fetch_list=fetch_list,
            return_numpy=False)

        paddle.disable_static()

        labels = paddle.to_tensor(labels)
        preds = paddle.to_tensor(preds[0])
        loss_mask = paddle.to_tensor(loss_mask)
        info = paddle.to_tensor(info)

        if not global_config['cloze_eval']:
            if first_step:
                num_original_tokens = info.numpy()[0][0]
                num_tokenized_tokens = info.numpy()[0][1]
                first_step = False

            masked_lm_loss = paddle.nn.functional.cross_entropy(
                preds, labels, reduction="none")
            loss = paddle.sum(masked_lm_loss * loss_mask)
            eval_losses.append(float(loss))
            total_score += loss.numpy() / (num_tokenized_tokens - 1)

        else:
            if first_step:
                num_examples = info.numpy()[0][0]
                first_step = False
            outputs = paddle.argmax(preds, -1)
            acc = paddle.cast(outputs == labels, 'float32')
            acc = paddle.where(
                paddle.cast(loss_mask, 'bool'), acc, paddle.ones_like(acc))
            acc = paddle.sum(paddle.prod(acc, -1))
            eval_losses.append(float(acc))
            total_score += float(acc)

        if eval_step != 0 and (eval_step % 10 == 0):
            print("[eval] step: %d, %s: %.9f, speed: %.2f step/s" %
                  (eval_step, score_name, total_score,
                   1. / (time.time() - tic_eval)))
            tic_eval = time.time()
        paddle.enable_static()

    metric = None
    if not global_config['cloze_eval']:
        total_loss = float(total_score)
        ppl = math.exp(min(20, total_loss))
        token_ratio = (num_tokenized_tokens - 1) / (num_original_tokens - 1)
        adjusted_ppl = math.exp(min(20, total_loss * token_ratio))
        string = ' validation results on {} | '.format(
            gpt_config['Data']['Eval']['dataset']['name'])
        string += 'avg loss: {:.4E} | '.format(total_loss)
        string += 'ppl: {:.4E} | '.format(ppl)
        string += 'adjusted ppl: {:.4E} | '.format(adjusted_ppl)
        string += 'token ratio: {} |'.format(token_ratio)
        metric = ppl
    else:
        num_correct = float(total_score)
        acc = float(num_correct / num_examples)
        string = ' validation results on {} | '.format(
            gpt_config['Data']['Eval']['dataset']['name'])
        string += 'number correct: {:.4E} | '.format(num_correct)
        string += 'total examples: {:.4E} | '.format(num_examples)
        string += 'avg accuracy: {:.4E}'.format(acc)
        metric = acc

    print(string)
    return metric


def main():
    global global_config, all_config
    all_config = load_slim_config(FLAGS.config_path)
    assert "Global" in all_config, "Key 'Global' not found in config file. \n{}".format(
        all_config)
    global_config = all_config["Global"]

    seed = all_config['Global']['seed']
    random.seed(seed)
    np.random.seed(seed)
    paddle.seed(seed)
    env.set_seed(seed)

    global gpt_config
    gpt_config = parse_config(global_config['reader_config'])

    if not global_config['cloze_eval']:
        gpt_config['Data']['Eval']['dataset']['name'] = "LM_Eval_Dataset"
    else:
        gpt_config['Data']['Eval']['dataset']['name'] = "Lambada_Eval_Dataset"

    valid_data_loader = build_dataloader(gpt_config['Data'], "Eval")

    global eval_loader
    eval_loader = eval_reader_wrapper(valid_data_loader)

    analyzer = Analysis(
        quant_model_dir=global_config["quant_model_dir"],
        float_model_dir=global_config["float_model_dir"],
        model_filename=global_config["model_filename"],
        params_filename=global_config["params_filename"],
        eval_function=eval_function,
        data_loader=eval_loader,
        save_dir=FLAGS.save_dir,
        quant_config=all_config['quant_config'],
        resume=global_config['resume'], )
    analyzer.metric_error_analyse()


if __name__ == '__main__':
    paddle.enable_static()
    parser = argsparser()
    FLAGS = parser.parse_args()
    assert FLAGS.devices in ['cpu', 'gpu', 'xpu', 'npu']
    paddle.set_device(FLAGS.devices)

    main()