# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import os
import numpy as np
import cv2
import time
import sys
import argparse
import yaml
from utils import preprocess, postprocess
import paddle
from paddle.inference import create_predictor
from paddleslim.common import load_config


def argsparser():
    parser = argparse.ArgumentParser(description=__doc__)
    parser.add_argument(
        '--config_path',
        type=str,
        default='./image_classification/configs/infer.yaml',
        help='config file path')
    parser.add_argument(
        '--model_dir',
        type=str,
        default='./MobileNetV1_infer',
        help='model directory')
    parser.add_argument(
        '--use_fp16', type=bool, default=False, help='Whether to use fp16')
    parser.add_argument(
        '--use_int8', type=bool, default=False, help='Whether to use int8')
    return parser


class Predictor(object):
    def __init__(self, config):

        # HALF precission predict only work when using tensorrt
        if config['use_fp16'] is True:
            assert config['use_tensorrt'] is True
        self.config = config

        self.paddle_predictor = self.create_paddle_predictor()
        input_names = self.paddle_predictor.get_input_names()
        self.input_tensor = self.paddle_predictor.get_input_handle(input_names[
            0])

        output_names = self.paddle_predictor.get_output_names()
        self.output_tensor = self.paddle_predictor.get_output_handle(
            output_names[0])

    def create_paddle_predictor(self):
        inference_model_dir = self.config['model_dir']
        model_file = os.path.join(inference_model_dir,
                                  self.config['model_filename'])
        params_file = os.path.join(inference_model_dir,
                                   self.config['params_filename'])
        config = paddle.inference.Config(model_file, params_file)
        precision = paddle.inference.Config.Precision.Float32
        if self.config['use_int8']:
            precision = paddle.inference.Config.Precision.Int8
        elif self.config['use_fp16']:
            precision = paddle.inference.Config.Precision.Half

        if self.config['use_gpu']:
            config.enable_use_gpu(self.config['gpu_mem'], 0)
        else:
            config.disable_gpu()
            if self.config['enable_mkldnn']:
                # cache 10 different shapes for mkldnn to avoid memory leak
                config.set_mkldnn_cache_capacity(10)
                config.enable_mkldnn()
        config.set_cpu_math_library_num_threads(self.config['cpu_num_threads'])

        if self.config['enable_profile']:
            config.enable_profile()
        config.disable_glog_info()
        config.switch_ir_optim(self.config['ir_optim'])  # default true
        if self.config['use_tensorrt']:
            config.enable_tensorrt_engine(
                precision_mode=precision,
                max_batch_size=self.config['batch_size'],
                workspace_size=1 << 30,
                min_subgraph_size=30,
                use_calib_mode=False)

        config.enable_memory_optim()
        # use zero copy
        config.switch_use_feed_fetch_ops(False)
        predictor = create_predictor(config)

        return predictor

    def predict(self):
        test_num = 1000
        test_time = 0.0
        for i in range(0, test_num + 10):
            inputs = np.random.rand(config['batch_size'], 3,
                                    config['image_size'],
                                    config['image_size']).astype(np.float32)
            start_time = time.time()
            self.input_tensor.copy_from_cpu(inputs)
            self.paddle_predictor.run()
            batch_output = self.output_tensor.copy_to_cpu().flatten()
            if i >= 10:
                test_time += time.time() - start_time
            time.sleep(0.01)  # sleep for T4 GPU

        fp_message = "FP16" if config['use_fp16'] else "FP32"
        fp_message = "INT8" if config['use_int8'] else fp_message
        trt_msg = "using tensorrt" if config[
            'use_tensorrt'] else "not using tensorrt"
        print("{0}\t{1}\tbatch size: {2}\ttime(ms): {3}".format(
            trt_msg, fp_message, config[
                'batch_size'], 1000 * test_time / test_num))


if __name__ == "__main__":
    parser = argsparser()
    args = parser.parse_args()
    config = load_config(args.config_path)
    if args.model_dir != config['model_dir']:
        config['model_dir'] = args.model_dir
    if args.use_fp16 != config['use_fp16']:
        config['use_fp16'] = args.use_fp16
    if args.use_int8 != config['use_int8']:
        config['use_int8'] = args.use_int8
    predictor = Predictor(config)
    predictor.predict()