test_PyDataProvider2.py 3.6 KB
Newer Older
1
#  Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
Z
zhangjinchao01 已提交
2
#
Y
ying 已提交
3 4 5
#Licensed under the Apache License, Version 2.0 (the "License");
#you may not use this file except in compliance with the License.
#You may obtain a copy of the License at
Z
zhangjinchao01 已提交
6
#
Y
ying 已提交
7
#    http://www.apache.org/licenses/LICENSE-2.0
Z
zhangjinchao01 已提交
8
#
Y
ying 已提交
9 10 11 12 13
#Unless required by applicable law or agreed to in writing, software
#distributed under the License is distributed on an "AS IS" BASIS,
#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
#See the License for the specific language governing permissions and
#limitations under the License.
14 15
import random

Z
zhangjinchao01 已提交
16 17 18
from paddle.trainer.PyDataProvider2 import *


Y
Yu Yang 已提交
19
@provider(slots=[dense_vector(200, seq_type=SequenceType.NO_SEQUENCE)])
Z
zhangjinchao01 已提交
20 21 22 23 24 25 26 27 28 29 30 31 32 33 34
def test_dense_no_seq(setting, filename):
    for i in xrange(200):
        yield [(float(j - 100) * float(i + 1)) / 200.0 for j in xrange(200)]


@provider(input_types=[integer_value(200, seq_type=SequenceType.NO_SEQUENCE)])
def test_index_no_seq(setting, filename):
    for i in xrange(200):
        yield i


def test_init_hooker(setting, value, **kwargs):
    setting.value = value


35 36 37 38
@provider(
    input_types=[dense_vector(
        20, seq_type=SequenceType.NO_SEQUENCE)],
    init_hook=test_init_hooker)
Z
zhangjinchao01 已提交
39 40 41 42 43
def test_init_hook(setting, filename):
    for i in xrange(200):
        yield setting.value


44 45 46 47
@provider(input_types=[
    sparse_binary_vector(
        30000, seq_type=SequenceType.NO_SEQUENCE)
])
Z
zhangjinchao01 已提交
48 49 50 51 52
def test_sparse_non_value_no_seq(setting, filename):
    for i in xrange(200):
        yield [(i + 1) * (j + 1) for j in xrange(10)]


53 54 55 56
@provider(input_types=[
    sparse_float_vector(
        30000, seq_type=SequenceType.NO_SEQUENCE)
])
Z
zhangjinchao01 已提交
57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76
def test_sparse_value_no_seq(setting, filename):
    for i in xrange(200):
        yield [((i + 1) * (j + 1), float(j) / float(i + 1)) for j in xrange(10)]


@provider(input_types=[integer_value(200, seq_type=SequenceType.SEQUENCE)])
def test_index_seq(setting, filename):
    for i in xrange(200):
        yield range(i + 1)


@provider(input_types=[index_slot(200, seq_type=SequenceType.SUB_SEQUENCE)])
def test_index_sub_seq(setting, filename):
    def gen_sub_seq(l):
        l += 1
        for j in xrange(l):
            yield range(j + 1)

    for i in xrange(200):
        yield list(gen_sub_seq(i))
77 78 79 80 81 82 83 84


@provider(input_types=[index_slot(100)], min_pool_size=1000)
def test_min_pool_size(setting, filename):
    for _ in xrange(1 << 14):
        yield random.randint(0, 100 - 1)


85 86 87 88 89
@provider(
    input_types=[index_slot(
        100, seq_type=SequenceType.SEQUENCE)],
    can_over_batch_size=False,
    calc_batch_size=lambda x: len(x[0]))
90 91 92 93 94 95
def test_can_over_batch_size(setting, filename):
    for _ in xrange(1 << 10):
        seq_len = random.randint(0, 99)
        yield [random.randint(0, 100 - 1) for _ in xrange(seq_len)]


96
@provider(input_types={'input1': index_slot(10), 'input2': index_slot(10)})
97 98
def test_input_order(setting, filename):
    for _ in xrange(1000):
99
        yield {'input1': 0, 'input2': 1}
100 101


102 103 104 105 106
@provider(
    input_types=[index_slot(10)],
    check=True,
    check_fail_continue=True,
    should_shuffle="123")  # also test should shuffle
107 108 109 110 111 112 113 114 115
def test_check(settings, filename):
    yield_good_value = False

    while not yield_good_value:
        for _ in xrange(10000):
            i = random.randint(0, 100)
            if i < 10:
                yield_good_value = True
            yield i
Y
Yu Yang 已提交
116 117 118 119 120 121 122 123 124 125


@provider(
    input_types=[index_slot(10)],
    min_pool_size=1000,
    cache=CacheType.CACHE_PASS_IN_MEM, )
def test_min_pool_size_with_cache(settings, filename):
    import random
    for _ in xrange(2**20):
        yield random.randint(0, 9)