From b098ef69a4a45efa8011a2f4b93ee55eca4bcc26 Mon Sep 17 00:00:00 2001 From: dzhwinter Date: Wed, 24 May 2017 19:26:39 +0800 Subject: [PATCH] "remove the rar extractfile, prevent small files" --- python/paddle/v2/dataset/__init__.py | 3 ++- python/paddle/v2/dataset/mq2007.py | 6 ++---- 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/python/paddle/v2/dataset/__init__.py b/python/paddle/v2/dataset/__init__.py index 80ff6295c34..26252d5bbd7 100644 --- a/python/paddle/v2/dataset/__init__.py +++ b/python/paddle/v2/dataset/__init__.py @@ -24,8 +24,9 @@ import conll05 import uci_housing import sentiment import wmt14 +import mq2007 __all__ = [ 'mnist', 'imikolov', 'imdb', 'cifar', 'movielens', 'conll05', 'sentiment' - 'uci_housing', 'wmt14' + 'uci_housing', 'wmt14', 'mq2007' ] diff --git a/python/paddle/v2/dataset/mq2007.py b/python/paddle/v2/dataset/mq2007.py index fd71b341662..d8c9918d140 100644 --- a/python/paddle/v2/dataset/mq2007.py +++ b/python/paddle/v2/dataset/mq2007.py @@ -41,9 +41,7 @@ def __initialize_meta_info__(): """ fn = fetch() rar = rarfile.RarFile(fn) - dirpath = os.path.dirname(fn) - rar.extractall(path=dirpath) - return dirpath + return rar class Query(object): @@ -273,7 +271,7 @@ def load_from_text(filepath, shuffle=True, fill_missing=-1): querylists = [] querylist = None fn = __initialize_meta_info__() - with open(os.path.join(fn, filepath)) as f: + with fn.open(os.path.join(fn, filepath)) as f: for line in f: query = Query() query = query._parse_(line) -- GitLab