# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
"""
At most cases, we have non-stream dataset, which means we can random access it with __getitem__, and we can get the length of the dataset with __len__.
At most cases, we have non-stream dataset, which means we can random access it with __getitem__, and we can get the length of the dataset with __len__.
...
@@ -6,10 +19,10 @@ This suffices for a sampler. We implemente sampler as iterable of valid indices.
...
@@ -6,10 +19,10 @@ This suffices for a sampler. We implemente sampler as iterable of valid indices.
So the sampler is only responsible for generating valid indices.
So the sampler is only responsible for generating valid indices.
"""
"""
importnumpyasnp
importnumpyasnp
importrandom
importrandom
classSampler(object):
classSampler(object):
def__init__(self,data_source):
def__init__(self,data_source):
pass
pass
...
@@ -42,12 +55,14 @@ class RandomSampler(Sampler):
...
@@ -42,12 +55,14 @@ class RandomSampler(Sampler):
"replacement={}".format(self.replacement))
"replacement={}".format(self.replacement))
ifself._num_samplesisnotNoneandnotreplacement:
ifself._num_samplesisnotNoneandnotreplacement:
raiseValueError("With replacement=False, num_samples should not be specified, "
raiseValueError(
"With replacement=False, num_samples should not be specified, "
@@ -15,8 +15,3 @@ One of the reasons we choose to load data lazily (only load metadata before hand
...
@@ -15,8 +15,3 @@ One of the reasons we choose to load data lazily (only load metadata before hand
For deep learning practice, we typically batch examples. So the dataset should comes with a method to batch examples. Assuming the record is implemented as a tuple with several items. When an item is represented as a fix-sized array, to batch them is trivial, just `np.stack` suffices. But for array with dynamic size, padding is needed. We decide to implement a batching method for each item. Then batching a record can be implemented by these methods. For a dataset, a `_batch_examples` should be implemented. But in most cases, you can choose one from `batching.py`.
For deep learning practice, we typically batch examples. So the dataset should comes with a method to batch examples. Assuming the record is implemented as a tuple with several items. When an item is represented as a fix-sized array, to batch them is trivial, just `np.stack` suffices. But for array with dynamic size, padding is needed. We decide to implement a batching method for each item. Then batching a record can be implemented by these methods. For a dataset, a `_batch_examples` should be implemented. But in most cases, you can choose one from `batching.py`.