提交 70fc5682 编写于 作者: M Megvii Engine Team

docs(mge/data): update MNIST dataset docstring

GitOrigin-RevId: 536a46325fed874e8e892c835e4cc2ee3932901b
上级 8fb062df
......@@ -15,50 +15,73 @@ logger = get_logger(__name__)
class MNIST(VisionDataset):
r""":class:`~.Dataset` for MNIST meta data."""
url_path = "http://yann.lecun.com/exdb/mnist/"
"""
Url prefix for downloading raw file.
r"""MNIST dataset.
The MNIST_ database (Modified National Institute of Standards and Technology database)
is a large database of handwritten digits that is commonly used for training various image processing systems.
The database is also widely used for training and testing in the field of machine learning.
It was created by "re-mixing" the samples from `NIST`_'s original datasets.
Furthermore, the black and white images from NIST were normalized to fit into a 28x28 pixel
bounding box and anti-aliased, which introduced grayscale levels.
The MNIST database contains 60,000 training images and 10,000 testing images.
The above introduction comes from `MNIST database - Wikipedia
<https://en.wikipedia.org/wiki/MNIST_database>`_.
Args:
root: Path for MNIST dataset downloading or loading. If it's ``None``,
it will be set to ``~/.cache/megengine`` (the default root path).
train: If ``True``, use traning dataset; Otherwise use the test set.
download: If ``True``, downloads the dataset from the internet and puts it in ``root`` directory.
If dataset is already downloaded, it is not downloaded again.
Returns:
The MNIST :class:`~.Dataset` that can work with :class:`~.DataLoader`.
Example:
>>> from megengine.data.dataset import MNIST # doctest: +SKIP
>>> mnist = MNIST("/data/datasets/MNIST") # Set the root path # doctest: +SKIP
>>> image, label = mnist[0] # doctest: +SKIP
>>> image.shape # doctest: +SKIP
(28, 28, 1)
.. versionchanged:: 1.11 The original URL has been updated to a mirror URL
*"Please refrain from accessing these files from automated scripts with high frequency. Make copies!"*
As requested by the original provider of the MNIST dataset,
now the dataset will be downloaded from the mirror site:
https://ossci-datasets.s3.amazonaws.com/mnist/
.. seealso::
* MNIST dataset is used in :ref:`megengine-quick-start` tutorial as an example.
* You can find a lot of machine learning projects using MNIST dataset on the internet.
.. _MNIST: http://yann.lecun.com/exdb/mnist/
.. _NIST: https://www.nist.gov/data
"""
url_path = "https://ossci-datasets.s3.amazonaws.com/mnist/"
raw_file_name = [
"train-images-idx3-ubyte.gz",
"train-labels-idx1-ubyte.gz",
"t10k-images-idx3-ubyte.gz",
"t10k-labels-idx1-ubyte.gz",
]
"""
Raw file names of both training set and test set (10k).
"""
raw_file_md5 = [
"f68b3c2dcbeaaa9fbdd348bbdeb94873",
"d53e105ee54ea40749a09fcbcd1e9432",
"9fb629c4189551a2d022fa330f9573f3",
"ec29112dd5afa0611ce80d1b7f02629c",
]
"""
Md5 for checking raw files.
"""
def __init__(
self,
root: str = None,
train: bool = True,
download: bool = True,
timeout: int = 500,
self, root: str = None, train: bool = True, download: bool = True,
):
r"""
:param root: path for mnist dataset downloading or loading, if ``None``,
set ``root`` to the ``_default_root``.
:param train: if ``True``, loading trainingset, else loading test set.
:param download: if raw files do not exists and download sets to ``True``,
download raw files and process, otherwise raise ValueError, default is True.
"""
super().__init__(root, order=("image", "image_category"))
self.timeout = timeout
# process the root path
if root is None:
self.root = self._default_root
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册