feature_column.py 1.8 KB
Newer Older
C
chenxuyi 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13
#   Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
C
chenxuyi 已提交
14
"""FeatureColumns and many Column"""
C
chenxuyi 已提交
15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31
from __future__ import print_function
from __future__ import absolute_import
from __future__ import unicode_literals

import os
import sys
import struct
from six.moves import zip, map
import itertools
import gzip
from functools import partial
import six
import logging

import numpy as np
from glob import glob

M
Meiyim 已提交
32
from propeller.data.feature_column import FeatureColumns as FCBase
C
chenxuyi 已提交
33
from propeller.paddle.data.functional import Dataset
M
Meiyim 已提交
34
import multiprocessing
C
chenxuyi 已提交
35 36 37

log = logging.getLogger(__name__)

M
Meiyim 已提交
38
__all__ = ['FeatureColumns']
C
chenxuyi 已提交
39 40


M
Meiyim 已提交
41
class FeatureColumns(FCBase):
C
chenxuyi 已提交
42
    """A Dataset Factory object"""
C
chenxuyi 已提交
43

M
Meiyim 已提交
44
    def build_dataset(self, *args, **kwargs):
C
chenxuyi 已提交
45 46 47 48
        """
        build `Dataset` from `data_dir` or `data_file`
        if `use_gz`, will try to convert data_files to gz format and save to `gz_dir`, if `gz_dir` not given, will create one.
        """
M
Meiyim 已提交
49 50
        ds = super(FeatureColumns, self).build_dataset(*args, **kwargs)
        ds.__class__ = Dataset
C
chenxuyi 已提交
51 52
        return ds

M
Meiyim 已提交
53
    def build_dataset_from_stdin(self, *args, **kwargs):
C
chenxuyi 已提交
54
        """doc"""
M
Meiyim 已提交
55 56 57
        ds = super(FeatureColumns, self).build_dataset_from_stdin(*args,
                                                                  **kwargs)
        ds.__class__ = Dataset
C
chenxuyi 已提交
58
        return ds