inference.py

import paddle
import argparse
import numpy as np
import random
import os
from collections import OrderedDict
import sys
import cv2

sys.path.append(".")

from ppgan.utils.config import get_config
from ppgan.datasets.builder import build_dataloader
from ppgan.engine.trainer import IterLoader
from ppgan.utils.visual import save_image
from ppgan.utils.visual import tensor2img
from ppgan.utils.filesystem import makedirs
from ppgan.metrics import build_metric


MODEL_CLASSES = ["pix2pix", "cyclegan", "wav2lip", "esrgan", \
                 "edvr", "fom", "stylegan2", "basicvsr", "msvsr", "singan", "swinir", "invdn", "aotgan"]


def parse_args():
    parser = argparse.ArgumentParser()
    parser.add_argument(
        "--model_path",
        default=None,
        type=str,
        required=True,
        help="The path prefix of inference model to be used.",
    )
    parser.add_argument("--model_type",
                        default=None,
                        type=str,
                        required=True,
                        help="Model type selected in the list: " +
                        ", ".join(MODEL_CLASSES))
    parser.add_argument(
        "--device",
        default="gpu",
        type=str,
        choices=["cpu", "gpu", "xpu", "npu"],
        help="The device to select to train the model, is must be cpu/gpu/xpu.")
    parser.add_argument('-c',
                        '--config-file',
                        metavar="FILE",
                        help='config file path')
    parser.add_argument("--output_path",
                        type=str,
                        default="infer_output",
                        help="output_path")
    # config options
    parser.add_argument("-o",
                        "--opt",
                        nargs='+',
                        help="set configuration options")
    # fix random numbers by setting seed
    parser.add_argument('--seed',
                        type=int,
                        default=None,
                        help='fix random numbers by setting seed\".')
    # for tensorRT
    parser.add_argument("--run_mode",
                        default="fluid",
                        type=str,
                        choices=["fluid", "trt_fp32", "trt_fp16"],
                        help="mode of running(fluid/trt_fp32/trt_fp16)")
    parser.add_argument("--trt_min_shape",
                        default=1,
                        type=int,
                        help="trt_min_shape for tensorRT")
    parser.add_argument("--trt_max_shape",
                        default=1280,
                        type=int,
                        help="trt_max_shape for tensorRT")
    parser.add_argument("--trt_opt_shape",
                        default=640,
                        type=int,
                        help="trt_opt_shape for tensorRT")
    parser.add_argument("--min_subgraph_size",
                        default=3,
                        type=int,
                        help="trt_opt_shape for tensorRT")
    parser.add_argument("--batch_size",
                        default=1,
                        type=int,
                        help="batch_size for tensorRT")
    parser.add_argument("--use_dynamic_shape",
                        dest="use_dynamic_shape",
                        action="store_true",
                        help="use_dynamic_shape for tensorRT")
    parser.add_argument("--trt_calib_mode",
                        dest="trt_calib_mode",
                        action="store_true",
                        help="trt_calib_mode for tensorRT")
    args = parser.parse_args()
    return args


def create_predictor(model_path,
                     device="gpu",
                     run_mode='fluid',
                     batch_size=1,
                     min_subgraph_size=3,
                     use_dynamic_shape=False,
                     trt_min_shape=1,
                     trt_max_shape=1280,
                     trt_opt_shape=640,
                     trt_calib_mode=False):
    config = paddle.inference.Config(model_path + ".pdmodel",
                                     model_path + ".pdiparams")
    if device == "gpu":
        config.enable_use_gpu(100, 0)
    elif device == "cpu":
        config.disable_gpu()
    elif device == "npu":
        config.enable_npu()
    elif device == "xpu":
        config.enable_xpu()
    else:
        config.disable_gpu()

    precision_map = {
        'trt_int8': paddle.inference.Config.Precision.Int8,
        'trt_fp32': paddle.inference.Config.Precision.Float32,
        'trt_fp16': paddle.inference.Config.Precision.Half
    }
    if run_mode in precision_map.keys():
        config.enable_tensorrt_engine(workspace_size=1 << 25,
                                      max_batch_size=batch_size,
                                      min_subgraph_size=min_subgraph_size,
                                      precision_mode=precision_map[run_mode],
                                      use_static=False,
                                      use_calib_mode=trt_calib_mode)

        if use_dynamic_shape:
            min_input_shape = {
                'image': [batch_size, 3, trt_min_shape, trt_min_shape]
            }
            max_input_shape = {
                'image': [batch_size, 3, trt_max_shape, trt_max_shape]
            }
            opt_input_shape = {
                'image': [batch_size, 3, trt_opt_shape, trt_opt_shape]
            }
            config.set_trt_dynamic_shape_info(min_input_shape, max_input_shape,
                                              opt_input_shape)
            print('trt set dynamic shape done!')

    predictor = paddle.inference.create_predictor(config)
    return predictor


def setup_metrics(cfg):
    metrics = OrderedDict()
    if isinstance(list(cfg.values())[0], dict):
        for metric_name, cfg_ in cfg.items():
            metrics[metric_name] = build_metric(cfg_)
    else:
        metric = build_metric(cfg)
        metrics[metric.__class__.__name__] = metric

    return metrics


def main():
    args = parse_args()
    if args.seed:
        paddle.seed(args.seed)
        random.seed(args.seed)
        np.random.seed(args.seed)
    cfg = get_config(args.config_file, args.opt)
    predictor = create_predictor(args.model_path, args.device, args.run_mode,
                                 args.batch_size, args.min_subgraph_size,
                                 args.use_dynamic_shape, args.trt_min_shape,
                                 args.trt_max_shape, args.trt_opt_shape,
                                 args.trt_calib_mode)
    input_handles = [
        predictor.get_input_handle(name)
        for name in predictor.get_input_names()
    ]

    output_handle = predictor.get_output_handle(predictor.get_output_names()[0])
    test_dataloader = build_dataloader(cfg.dataset.test,
                                       is_train=False,
                                       distributed=False)

    max_eval_steps = len(test_dataloader)
    iter_loader = IterLoader(test_dataloader)
    min_max = cfg.get('min_max', None)
    if min_max is None:
        min_max = (-1., 1.)

    model_type = args.model_type
    makedirs(os.path.join(args.output_path, model_type))

    validate_cfg = cfg.get('validate', None)
    metrics = None
    if validate_cfg and 'metrics' in validate_cfg:
        metrics = setup_metrics(validate_cfg['metrics'])
        for metric in metrics.values():
            metric.reset()

    for i in range(max_eval_steps):
        data = next(iter_loader)
        if model_type == "pix2pix":
            real_A = data['B'].numpy()
            input_handles[0].copy_from_cpu(real_A)
            predictor.run()
            prediction = output_handle.copy_to_cpu()
            prediction = paddle.to_tensor(prediction)
            image_numpy = tensor2img(prediction[0], min_max)
            save_image(
                image_numpy,
                os.path.join(args.output_path, "pix2pix/{}.png".format(i)))
            metric_file = os.path.join(args.output_path, "pix2pix/metric.txt")
            real_B = paddle.to_tensor(data['A'])
            for metric in metrics.values():
                metric.update(prediction, real_B)

        elif model_type == "cyclegan":
            real_A = data['A'].numpy()
            input_handles[0].copy_from_cpu(real_A)
            predictor.run()
            prediction = output_handle.copy_to_cpu()
            prediction = paddle.to_tensor(prediction)
            image_numpy = tensor2img(prediction[0], min_max)
            save_image(
                image_numpy,
                os.path.join(args.output_path, "cyclegan/{}.png".format(i)))
            metric_file = os.path.join(args.output_path, "cyclegan/metric.txt")
            real_B = paddle.to_tensor(data['B'])
            for metric in metrics.values():
                metric.update(prediction, real_B)

        elif model_type == "wav2lip":
            indiv_mels, x = data['indiv_mels'].numpy()[0], data['x'].numpy()[0]
            x = x.transpose([1, 0, 2, 3])
            input_handles[0].copy_from_cpu(indiv_mels)
            input_handles[1].copy_from_cpu(x)
            predictor.run()
            prediction = output_handle.copy_to_cpu()
            for j in range(prediction.shape[0]):
                prediction[j] = prediction[j][::-1, :, :]
                image_numpy = paddle.to_tensor(prediction[j])
                image_numpy = tensor2img(image_numpy, (0, 1))
                save_image(image_numpy,
                           "infer_output/wav2lip/{}_{}.png".format(i, j))

        elif model_type == "esrgan":
            lq = data['lq'].numpy()
            input_handles[0].copy_from_cpu(lq)
            predictor.run()
            prediction = output_handle.copy_to_cpu()
            prediction = paddle.to_tensor(prediction[0])
            image_numpy = tensor2img(prediction, min_max)
            gt_numpy = tensor2img(data['gt'][0], min_max)
            save_image(
                image_numpy,
                os.path.join(args.output_path, "esrgan/{}.png".format(i)))
            metric_file = os.path.join(args.output_path, model_type,
                                       "metric.txt")
            for metric in metrics.values():
                metric.update(image_numpy, gt_numpy)
            break
        elif model_type == "edvr":
            lq = data['lq'].numpy()
            input_handles[0].copy_from_cpu(lq)
            predictor.run()
            prediction = output_handle.copy_to_cpu()
            prediction = paddle.to_tensor(prediction[0])
            image_numpy = tensor2img(prediction, min_max)
            gt_numpy = tensor2img(data['gt'][0, 0], min_max)
            save_image(image_numpy,
                       os.path.join(args.output_path, "edvr/{}.png".format(i)))
            metric_file = os.path.join(args.output_path, model_type,
                                       "metric.txt")
            for metric in metrics.values():
                metric.update(image_numpy, gt_numpy)
            break
        elif model_type == "stylegan2":
            noise = paddle.randn([1, 1, 512]).cpu().numpy()
            input_handles[0].copy_from_cpu(noise)
            input_handles[1].copy_from_cpu(np.array([0.7]).astype('float32'))
            predictor.run()
            prediction = output_handle.copy_to_cpu()
            prediction = paddle.to_tensor(prediction)
            image_numpy = tensor2img(prediction[0], min_max)
            save_image(
                image_numpy,
                os.path.join(args.output_path, "stylegan2/{}.png".format(i)))
            metric_file = os.path.join(args.output_path, "stylegan2/metric.txt")
            real_img = paddle.to_tensor(data['A'])
            for metric in metrics.values():
                metric.update(prediction, real_img)
        elif model_type in ["basicvsr", "msvsr"]:
            lq = data['lq'].numpy()
            input_handles[0].copy_from_cpu(lq)
            predictor.run()
            if len(predictor.get_output_names()) > 1:
                output_handle = predictor.get_output_handle(
                    predictor.get_output_names()[-1])
            prediction = output_handle.copy_to_cpu()
            prediction = paddle.to_tensor(prediction)
            _, t, _, _, _ = prediction.shape

            out_img = []
            gt_img = []
            for ti in range(t):
                out_tensor = prediction[0, ti]
                gt_tensor = data['gt'][0, ti]
                out_img.append(tensor2img(out_tensor, (0., 1.)))
                gt_img.append(tensor2img(gt_tensor, (0., 1.)))

            image_numpy = tensor2img(prediction[0], min_max)
            save_image(
                image_numpy,
                os.path.join(args.output_path, model_type, "{}.png".format(i)))

            metric_file = os.path.join(args.output_path, model_type,
                                       "metric.txt")
            for metric in metrics.values():
                metric.update(out_img, gt_img, is_seq=True)
        elif model_type == "singan":
            predictor.run()
            prediction = output_handle.copy_to_cpu()
            prediction = paddle.to_tensor(prediction)
            image_numpy = tensor2img(prediction, min_max)
            save_image(
                image_numpy,
                os.path.join(args.output_path, "singan/{}.png".format(i)))
            metric_file = os.path.join(args.output_path, "singan/metric.txt")
            for metric in metrics.values():
                metric.update(prediction, data['A'])
        elif model_type == 'gfpgan':
            input_handles[0].copy_from_cpu(data['lq'].numpy())
            predictor.run()
            prediction = output_handle.copy_to_cpu()
            prediction = paddle.to_tensor(prediction)
            image_numpy = tensor2img(prediction, min_max)
            save_image(
                image_numpy,
                os.path.join(args.output_path, "gfpgan/{}.png".format(i)))
        elif model_type == "swinir":
            lq = data[1].numpy()
            _, _, h_old, w_old = lq.shape
            window_size = 8
            tile = 128
            tile_overlap = 32
            # after feed data to model, shape of feature map is change
            h_pad = (h_old // window_size + 1) * window_size - h_old
            w_pad = (w_old // window_size + 1) * window_size - w_old
            lq = np.concatenate([lq, np.flip(lq, 2)],
                                axis=2)[:, :, :h_old + h_pad, :]
            lq = np.concatenate([lq, np.flip(lq, 3)],
                                axis=3)[:, :, :, :w_old + w_pad]
            lq = lq.astype("float32")

            b, c, h, w = lq.shape
            tile = min(tile, h, w)
            assert tile % window_size == 0, "tile size should be a multiple of window_size"
            sf = 1  # scale
            stride = tile - tile_overlap
            h_idx_list = list(range(0, h - tile, stride)) + [h - tile]
            w_idx_list = list(range(0, w - tile, stride)) + [w - tile]
            E = np.zeros([b, c, h * sf, w * sf], dtype=np.float32)
            W = np.zeros_like(E)

            for h_idx in h_idx_list:
                for w_idx in w_idx_list:
                    in_patch = lq[..., h_idx:h_idx + tile, w_idx:w_idx + tile]
                    input_handles[0].copy_from_cpu(in_patch)
                    predictor.run()
                    out_patch = output_handle.copy_to_cpu()
                    out_patch_mask = np.ones_like(out_patch)

                    E[..., h_idx * sf:(h_idx + tile) * sf,
                      w_idx * sf:(w_idx + tile) * sf] += out_patch
                    W[..., h_idx * sf:(h_idx + tile) * sf,
                      w_idx * sf:(w_idx + tile) * sf] += out_patch_mask

            output = np.true_divide(E, W)
            prediction = output[..., :h_old * sf, :w_old * sf]

            prediction = paddle.to_tensor(prediction)
            target = tensor2img(data[0], (0., 1.))
            prediction = tensor2img(prediction, (0., 1.))

            metric_file = os.path.join(args.output_path, model_type,
                                       "metric.txt")
            for metric in metrics.values():
                metric.update(prediction, target)

            lq = tensor2img(data[1], (0., 1.))

            sample_result = np.concatenate((lq, prediction, target), 1)
            sample = cv2.cvtColor(sample_result, cv2.COLOR_RGB2BGR)
            file_name = os.path.join(args.output_path, model_type,
                                     "{}.png".format(i))
            cv2.imwrite(file_name, sample)
        elif model_type == "invdn":
            noisy = data[0].numpy()
            noise_channel = 3 * 4**(cfg.model.generator.down_num) - 3
            input_handles[0].copy_from_cpu(noisy)
            input_handles[1].copy_from_cpu(
                np.random.randn(noisy.shape[0], noise_channel, noisy.shape[2],
                                noisy.shape[3]).astype(np.float32))
            predictor.run()
            output_handles = [
                predictor.get_output_handle(name)
                for name in predictor.get_output_names()
            ]
            prediction = output_handles[0].copy_to_cpu()
            prediction = paddle.to_tensor(prediction[0])
            image_numpy = tensor2img(prediction, min_max)
            gt_numpy = tensor2img(data[1], min_max)
            save_image(image_numpy,
                       os.path.join(args.output_path, "invdn/{}.png".format(i)))
            metric_file = os.path.join(args.output_path, model_type,
                                       "metric.txt")
            for metric in metrics.values():
                metric.update(image_numpy, gt_numpy)
            break
        elif model_type == 'aotgan':
            input_data = paddle.concat((data['img'], data['mask']), axis=1).numpy()
            input_handles[0].copy_from_cpu(input_data)
            predictor.run()
            prediction = output_handle.copy_to_cpu()
            prediction = paddle.to_tensor(prediction)
            image_numpy = tensor2img(prediction, min_max)
            save_image(
                image_numpy,
                os.path.join(args.output_path, "aotgan/{}.png".format(i)))

    if metrics:
        log_file = open(metric_file, 'a')
        for metric_name, metric in metrics.items():
            loss_string = "Metric {}: {:.4f}".format(metric_name,
                                                     metric.accumulate())
            print(loss_string, file=log_file)
        log_file.close()


if __name__ == '__main__':
    main()