creator.py 3.2 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
14
"""
15 16
Creator package contains some simple reader creator, which could
be used in user program.
17
"""
18

19
__all__ = ['np_array', 'text_file', "cloud_reader"]
20 21 22 23


def np_array(x):
    """
H
Helin Wang 已提交
24 25 26
    Creates a reader that yields elements of x, if it is a
    numpy vector. Or rows of x, if it is a numpy matrix.
    Or any sub-hyperplane indexed by the highest dimension.
27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44

    :param x: the numpy array to create reader from.
    :returns: data reader created from x.
    """

    def reader():
        if x.ndim < 1:
            yield x

        for e in x:
            yield e

    return reader


def text_file(path):
    """
    Creates a data reader that outputs text line by line from given text file.
45
    Trailing new line ('\\\\n') of each line will be removed.
46 47 48 49 50 51 52 53 54 55 56 57

    :path: path of the text file.
    :returns: data reader of text file
    """

    def reader():
        f = open(path, "r")
        for l in f:
            yield l.rstrip('\n')
        f.close()

    return reader
58 59


G
gongweibao 已提交
60
def recordio_local(paths, buf_size=100):
61
    """
62
    Creates a data reader from given RecordIO file paths separated by ",",
G
gongweibao 已提交
63
        glob pattern is supported.
G
gongweibao 已提交
64 65
    :path: path of recordio files.
    :returns: data reader of recordio files.
66 67
    """

G
gongweibao 已提交
68
    import recordio as rec
G
gongweibao 已提交
69
    import paddle.v2.reader.decorator as dec
G
gongweibao 已提交
70

71
    def reader():
G
gongweibao 已提交
72 73 74 75 76 77 78 79 80 81
        a = ','.join(paths)
        f = rec.reader(a)
        while True:
            r = f.read()
            if r is None:
                break
            yield r
        f.close()

    return dec.buffered(reader, buf_size)
82

G
gongweibao 已提交
83

84 85 86 87
pass_num = 0


def cloud_reader(paths, etcd_endpoints, timeout_sec=5, buf_size=64):
G
gongweibao 已提交
88
    """
89 90
    Create a data reader that yield a record one by one from
        the paths:
G
gongweibao 已提交
91
    :path: path of recordio files.
92
    :etcd_endpoints: the endpoints for etcd cluster
G
gongweibao 已提交
93
    :returns: data reader of recordio files.
94 95 96 97 98 99 100

    ..  code-block:: python
        from paddle.v2.reader.creator import cloud_reader
        etcd_endpoints = "http://127.0.0.1:2379"
        trainer.train.(
            reader=cloud_reader(["/work/dataset/uci_housing/uci_housing*"], etcd_endpoints),
        )
G
gongweibao 已提交
101 102
    """
    import os
103 104 105 106
    import cPickle as pickle
    import paddle.v2.master as master
    c = master.client(etcd_endpoints, timeout_sec, buf_size)
    c.set_dataset(paths)
G
gongweibao 已提交
107

G
gongweibao 已提交
108
    def reader():
109 110 111
        global pass_num
        c.paddle_start_get_records(pass_num)
        pass_num += 1
G
gongweibao 已提交
112 113

        while True:
114 115 116 117
            r, e = c.next_record()
            if not r:
                if e != -2:
                    print "get record error: ", e
G
gongweibao 已提交
118
                break
119
            yield pickle.loads(r)
G
gongweibao 已提交
120 121

    return reader
反馈
建议
客服 返回
顶部