test_config.py 13.9 KB
Newer Older
Z
zhunaipan 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14
# Copyright 2019 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
E
eric 已提交
15
"""
16
Testing configuration manager
E
eric 已提交
17 18 19
"""
import filecmp
import glob
20
import numpy as np
E
eric 已提交
21 22
import os

Z
zhunaipan 已提交
23
import mindspore.dataset as ds
E
eric 已提交
24
import mindspore.dataset.transforms.vision.c_transforms as vision
25
import mindspore.dataset.transforms.vision.py_transforms as py_vision
J
jinyaohui 已提交
26
from mindspore import log as logger
27

E
eric 已提交
28 29
DATA_DIR = ["../data/dataset/test_tf_file_3_images/train-0000-of-0001.data"]
SCHEMA_DIR = "../data/dataset/test_tf_file_3_images/datasetSchema.json"
Z
zhunaipan 已提交
30

J
jinyaohui 已提交
31

Z
zhunaipan 已提交
32
def test_basic():
33 34 35 36 37 38 39 40
    """
    Test basic configuration functions    
    """
    # Save original configuration values
    num_parallel_workers_original = ds.config.get_num_parallel_workers()
    prefetch_size_original = ds.config.get_prefetch_size()
    seed_original = ds.config.get_seed()

Z
zhunaipan 已提交
41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60
    ds.config.load('../data/dataset/declient.cfg')

    # assert ds.config.get_rows_per_buffer() == 32
    assert ds.config.get_num_parallel_workers() == 4
    # assert ds.config.get_worker_connector_size() == 16
    assert ds.config.get_prefetch_size() == 16
    assert ds.config.get_seed() == 5489

    # ds.config.set_rows_per_buffer(1)
    ds.config.set_num_parallel_workers(2)
    # ds.config.set_worker_connector_size(3)
    ds.config.set_prefetch_size(4)
    ds.config.set_seed(5)

    # assert ds.config.get_rows_per_buffer() == 1
    assert ds.config.get_num_parallel_workers() == 2
    # assert ds.config.get_worker_connector_size() == 3
    assert ds.config.get_prefetch_size() == 4
    assert ds.config.get_seed() == 5

61 62 63 64 65
    # Restore original configuration values
    ds.config.set_num_parallel_workers(num_parallel_workers_original)
    ds.config.set_prefetch_size(prefetch_size_original)
    ds.config.set_seed(seed_original)

66 67 68 69 70 71 72 73

def test_get_seed():
    """
    This gets the seed value without explicitly setting a default, expect int.
    """
    assert isinstance(ds.config.get_seed(), int)


E
eric 已提交
74
def test_pipeline():
75 76
    """
    Test that our configuration pipeline works when we set parameters at different locations in dataset code
E
eric 已提交
77
    """
78 79 80
    # Save original configuration values
    num_parallel_workers_original = ds.config.get_num_parallel_workers()

E
eric 已提交
81 82 83 84 85 86 87 88 89 90
    data1 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, shuffle=False)
    ds.config.set_num_parallel_workers(2)
    data1 = data1.map(input_columns=["image"], operations=[vision.Decode(True)])
    ds.serialize(data1, "testpipeline.json")

    data2 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, shuffle=False)
    ds.config.set_num_parallel_workers(4)
    data2 = data2.map(input_columns=["image"], operations=[vision.Decode(True)])
    ds.serialize(data2, "testpipeline2.json")

91
    # check that the generated output is different
E
eric 已提交
92 93
    assert (filecmp.cmp('testpipeline.json', 'testpipeline2.json'))

94
    # this test passes currently because our num_parallel_workers don't get updated.
E
eric 已提交
95

96
    # remove generated jason files
E
eric 已提交
97 98 99 100 101 102 103
    file_list = glob.glob('*.json')
    for f in file_list:
        try:
            os.remove(f)
        except IOError:
            logger.info("Error while deleting: {}".format(f))

104 105 106
    # Restore original configuration values
    ds.config.set_num_parallel_workers(num_parallel_workers_original)

Z
zhunaipan 已提交
107

108 109 110 111 112 113
def test_deterministic_run_fail():
    """
    Test RandomCrop with seed, expected to fail
    """
    logger.info("test_deterministic_run_fail")

114 115 116 117
    # Save original configuration values
    num_parallel_workers_original = ds.config.get_num_parallel_workers()
    seed_original = ds.config.get_seed()

118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138
    # when we set the seed all operations within our dataset should be deterministic
    ds.config.set_seed(0)
    ds.config.set_num_parallel_workers(1)
    # First dataset
    data1 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False)
    # Assuming we get the same seed on calling constructor, if this op is re-used then result won't be
    # the same in between the two datasets. For example, RandomCrop constructor takes seed (0)
    # outputs a deterministic series of numbers, e,g "a" = [1, 2, 3, 4, 5, 6] <- pretend these are random
    random_crop_op = vision.RandomCrop([512, 512], [200, 200, 200, 200])
    decode_op = vision.Decode()
    data1 = data1.map(input_columns=["image"], operations=decode_op)
    data1 = data1.map(input_columns=["image"], operations=random_crop_op)

    # Second dataset
    data2 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False)
    data2 = data2.map(input_columns=["image"], operations=decode_op)
    # If seed is set up on constructor
    data2 = data2.map(input_columns=["image"], operations=random_crop_op)

    try:
        for item1, item2 in zip(data1.create_dict_iterator(), data2.create_dict_iterator()):
J
jinyaohui 已提交
139
            np.testing.assert_equal(item1["image"], item2["image"])
140 141 142 143 144 145

    except BaseException as e:
        # two datasets split the number out of the sequence a
        logger.info("Got an exception in DE: {}".format(str(e)))
        assert "Array" in str(e)

146 147 148 149
    # Restore original configuration values
    ds.config.set_num_parallel_workers(num_parallel_workers_original)
    ds.config.set_seed(seed_original)

150 151 152 153 154 155

def test_deterministic_run_pass():
    """
    Test deterministic run with with setting the seed
    """
    logger.info("test_deterministic_run_pass")
156 157 158 159 160

    # Save original configuration values
    num_parallel_workers_original = ds.config.get_num_parallel_workers()
    seed_original = ds.config.get_seed()
 
161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180
    ds.config.set_seed(0)
    ds.config.set_num_parallel_workers(1)

    # First dataset
    data1 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False)
    # We get the seed when constructor is called
    random_crop_op = vision.RandomCrop([512, 512], [200, 200, 200, 200])
    decode_op = vision.Decode()
    data1 = data1.map(input_columns=["image"], operations=decode_op)
    data1 = data1.map(input_columns=["image"], operations=random_crop_op)

    # Second dataset
    data2 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False)
    data2 = data2.map(input_columns=["image"], operations=decode_op)
    # Since seed is set up on constructor, so the two ops output deterministic sequence.
    # Assume the generated random sequence "a" = [1, 2, 3, 4, 5, 6] <- pretend these are random
    random_crop_op2 = vision.RandomCrop([512, 512], [200, 200, 200, 200])
    data2 = data2.map(input_columns=["image"], operations=random_crop_op2)
    try:
        for item1, item2 in zip(data1.create_dict_iterator(), data2.create_dict_iterator()):
J
jinyaohui 已提交
181
            np.testing.assert_equal(item1["image"], item2["image"])
182 183 184 185 186
    except BaseException as e:
        # two datasets both use numbers from the generated sequence "a"
        logger.info("Got an exception in DE: {}".format(str(e)))
        assert "Array" in str(e)

187 188 189 190
    # Restore original configuration values
    ds.config.set_num_parallel_workers(num_parallel_workers_original)
    ds.config.set_seed(seed_original)

191 192 193 194 195 196

def test_seed_undeterministic():
    """
    Test seed with num parallel workers in c, this test is expected to fail some of the time
    """
    logger.info("test_seed_undeterministic")
197 198 199 200 201

    # Save original configuration values
    num_parallel_workers_original = ds.config.get_num_parallel_workers()
    seed_original = ds.config.get_seed()

202
    ds.config.set_seed(0)
203
    ds.config.set_num_parallel_workers(1)
204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220

    # First dataset
    data1 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False)
    # seed will be read in during constructor call
    random_crop_op = vision.RandomCrop([512, 512], [200, 200, 200, 200])
    decode_op = vision.Decode()
    data1 = data1.map(input_columns=["image"], operations=decode_op)
    data1 = data1.map(input_columns=["image"], operations=random_crop_op)

    # Second dataset
    data2 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False)
    data2 = data2.map(input_columns=["image"], operations=decode_op)
    # If seed is set up on constructor, so the two ops output deterministic sequence
    random_crop_op2 = vision.RandomCrop([512, 512], [200, 200, 200, 200])
    data2 = data2.map(input_columns=["image"], operations=random_crop_op2)

    for item1, item2 in zip(data1.create_dict_iterator(), data2.create_dict_iterator()):
J
jinyaohui 已提交
221
        np.testing.assert_equal(item1["image"], item2["image"])
222

223 224 225 226
    # Restore original configuration values
    ds.config.set_num_parallel_workers(num_parallel_workers_original)
    ds.config.set_seed(seed_original)

227 228 229 230 231 232 233

def test_deterministic_run_distribution():
    """
    Test deterministic run with with setting the seed being used in a distribution
    """
    logger.info("test_deterministic_run_distribution")

234 235 236 237
    # Save original configuration values
    num_parallel_workers_original = ds.config.get_num_parallel_workers()
    seed_original = ds.config.get_seed()

238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256
    # when we set the seed all operations within our dataset should be deterministic
    ds.config.set_seed(0)
    ds.config.set_num_parallel_workers(1)

    # First dataset
    data1 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False)
    random_crop_op = vision.RandomHorizontalFlip(0.1)
    decode_op = vision.Decode()
    data1 = data1.map(input_columns=["image"], operations=decode_op)
    data1 = data1.map(input_columns=["image"], operations=random_crop_op)

    # Second dataset
    data2 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False)
    data2 = data2.map(input_columns=["image"], operations=decode_op)
    # If seed is set up on constructor, so the two ops output deterministic sequence
    random_crop_op2 = vision.RandomHorizontalFlip(0.1)
    data2 = data2.map(input_columns=["image"], operations=random_crop_op2)

    for item1, item2 in zip(data1.create_dict_iterator(), data2.create_dict_iterator()):
J
jinyaohui 已提交
257
        np.testing.assert_equal(item1["image"], item2["image"])
258

259 260 261 262
    # Restore original configuration values
    ds.config.set_num_parallel_workers(num_parallel_workers_original)
    ds.config.set_seed(seed_original)

263 264 265 266 267 268

def test_deterministic_python_seed():
    """
    Test deterministic execution with seed in python
    """
    logger.info("deterministic_random_crop_op_python_2")
269 270 271 272 273

    # Save original configuration values
    num_parallel_workers_original = ds.config.get_num_parallel_workers()
    seed_original = ds.config.get_seed()

274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301
    ds.config.set_seed(0)
    ds.config.set_num_parallel_workers(1)

    # First dataset
    data1 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False)

    transforms = [
        py_vision.Decode(),
        py_vision.RandomCrop([512, 512], [200, 200, 200, 200]),
        py_vision.ToTensor(),
    ]
    transform = py_vision.ComposeOp(transforms)
    data1 = data1.map(input_columns=["image"], operations=transform())
    data1_output = []
    # config.set_seed() calls random.seed()
    for data_one in data1.create_dict_iterator():
        data1_output.append(data_one["image"])

    # Second dataset
    data2 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False)
    data2 = data2.map(input_columns=["image"], operations=transform())
    # config.set_seed() calls random.seed(), resets seed for next dataset iterator
    ds.config.set_seed(0)

    data2_output = []
    for data_two in data2.create_dict_iterator():
        data2_output.append(data_two["image"])

J
jinyaohui 已提交
302
    np.testing.assert_equal(data1_output, data2_output)
303

304 305 306 307
    # Restore original configuration values
    ds.config.set_num_parallel_workers(num_parallel_workers_original)
    ds.config.set_seed(seed_original)

308 309 310 311 312 313

def test_deterministic_python_seed_multi_thread():
    """
    Test deterministic execution with seed in python, this fails with multi-thread pyfunc run
    """
    logger.info("deterministic_random_crop_op_python_2")
314 315 316 317

    # Save original configuration values
    seed_original = ds.config.get_seed()

318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345
    ds.config.set_seed(0)
    # when we set the seed all operations within our dataset should be deterministic
    # First dataset
    data1 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False)
    transforms = [
        py_vision.Decode(),
        py_vision.RandomCrop([512, 512], [200, 200, 200, 200]),
        py_vision.ToTensor(),
    ]
    transform = py_vision.ComposeOp(transforms)
    data1 = data1.map(input_columns=["image"], operations=transform(), python_multiprocessing=True)
    data1_output = []
    # config.set_seed() calls random.seed()
    for data_one in data1.create_dict_iterator():
        data1_output.append(data_one["image"])

    # Second dataset
    data2 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False)
    # If seed is set up on constructor
    data2 = data2.map(input_columns=["image"], operations=transform(), python_multiprocessing=True)
    # config.set_seed() calls random.seed()
    ds.config.set_seed(0)

    data2_output = []
    for data_two in data2.create_dict_iterator():
        data2_output.append(data_two["image"])

    try:
J
jinyaohui 已提交
346
        np.testing.assert_equal(data1_output, data2_output)
347 348 349 350 351
    except BaseException as e:
        # expect output to not match during multi-threaded excution
        logger.info("Got an exception in DE: {}".format(str(e)))
        assert "Array" in str(e)

352 353 354
    # Restore original configuration values
    ds.config.set_seed(seed_original)

355

Z
zhunaipan 已提交
356 357
if __name__ == '__main__':
    test_basic()
E
eric 已提交
358
    test_pipeline()
359 360 361 362 363 364
    test_deterministic_run_pass()
    test_deterministic_run_distribution()
    test_deterministic_run_fail()
    test_deterministic_python_seed()
    test_seed_undeterministic()
    test_get_seed()