From d97d98fe01dc3dc6e3e42c787269a7a89f96c4c2 Mon Sep 17 00:00:00 2001 From: LDOUBLEV Date: Thu, 10 Dec 2020 11:00:05 +0800 Subject: [PATCH] opt random sample --- ppocr/data/simple_dataset.py | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/ppocr/data/simple_dataset.py b/ppocr/data/simple_dataset.py index 1099fa44..ab17dd1a 100644 --- a/ppocr/data/simple_dataset.py +++ b/ppocr/data/simple_dataset.py @@ -48,12 +48,6 @@ class SimpleDataSet(Dataset): self.shuffle_data_random() self.ops = create_operators(dataset_config['transforms'], global_config) - def _sample_dataset(self, datas, sample_ratio): - sample_num = round(len(datas) * sample_ratio) - - nums, rem = int(sample_num // len(datas)), int(sample_num % len(datas)) - return list(datas) * nums + random.sample(datas, rem) - def get_image_info_list(self, file_list, ratio_list): if isinstance(file_list, str): file_list = [file_list] @@ -61,7 +55,8 @@ class SimpleDataSet(Dataset): for idx, file in enumerate(file_list): with open(file, "rb") as f: lines = f.readlines() - lines = self._sample_dataset(lines, ratio_list[idx]) + lines = random.sample(lines, + round(len(lines) * ratio_list[idx])) data_lines.extend(lines) return data_lines -- GitLab