From 233658adb19ab7de1a20287ae53864a30f2351a8 Mon Sep 17 00:00:00 2001 From: wuyefeilin <30919197+wuyefeilin@users.noreply.github.com> Date: Wed, 2 Aug 2023 10:59:32 +0800 Subject: [PATCH] add dataset alias for PaddleX (#10528) --- ppocr/data/__init__.py | 25 ++++++++++++++++++++++--- 1 file changed, 22 insertions(+), 3 deletions(-) diff --git a/ppocr/data/__init__.py b/ppocr/data/__init__.py index 71d878b6..48cd8ad8 100644 --- a/ppocr/data/__init__.py +++ b/ppocr/data/__init__.py @@ -39,7 +39,16 @@ from ppocr.data.pgnet_dataset import PGDataSet from ppocr.data.pubtab_dataset import PubTabDataSet from ppocr.data.multi_scale_sampler import MultiScaleSampler -__all__ = ['build_dataloader', 'transform', 'create_operators', 'set_signal_handlers'] +# for PaddleX dataset_type +TextDetDataset = SimpleDataSet +TextRecDataset = SimpleDataSet +MSTextRecDataset = MultiScaleDataSet +PubTabTableRecDataset = PubTabDataSet +KieDataset = SimpleDataSet + +__all__ = [ + 'build_dataloader', 'transform', 'create_operators', 'set_signal_handlers' +] def term_mp(sig_num, frame): @@ -76,8 +85,18 @@ def build_dataloader(config, mode, device, logger, seed=None): config = copy.deepcopy(config) support_dict = [ - 'SimpleDataSet', 'LMDBDataSet', 'PGDataSet', 'PubTabDataSet', - 'LMDBDataSetSR', 'LMDBDataSetTableMaster', 'MultiScaleDataSet' + 'SimpleDataSet', + 'LMDBDataSet', + 'PGDataSet', + 'PubTabDataSet', + 'LMDBDataSetSR', + 'LMDBDataSetTableMaster', + 'MultiScaleDataSet', + 'TextDetDataset', + 'TextRecDataset', + 'MSTextRecDataset', + 'PubTabTableRecDataset', + 'KieDataset', ] module_name = config[mode]['dataset']['name'] assert module_name in support_dict, Exception( -- GitLab