docs(mge/data): update MNIST dataset docstring

GitOrigin-RevId: 536a46325fed874e8e892c835e4cc2ee3932901b

docs(mge/data): update MNIST dataset docstring
GitOrigin-RevId: 536a46325fed874e8e892c835e4cc2ee3932901b
70fc5682 · Megvii Engine Team · 8fb062df · 70fc5682
隐藏空白更改
内联并排

Showing with 49 addition and 26 deletion

imperative/python/megengine/data/dataset/vision/mnist.py imperative/python/megengine/data/dataset/vision/mnist.py +49 -26

未找到文件。
--- a/imperative/python/megengine/data/dataset/vision/mnist.py
+++ b/imperative/python/megengine/data/dataset/vision/mnist.py
@@ -15,50 +15,73 @@ logger = get_logger(__name__)


 class MNIST(VisionDataset):
-    r""":class:`~.Dataset` for MNIST meta data."""
-
-    url_path = "http://yann.lecun.com/exdb/mnist/"
-    """
-    Url prefix for downloading raw file.
+    r"""MNIST dataset.
+    The MNIST_ database (Modified National Institute of Standards and Technology database)
+    is a large database of handwritten digits that is commonly used for training various image processing systems.
+    The database is also widely used for training and testing in the field of machine learning.
+    It was created by "re-mixing" the samples from `NIST`_'s original datasets.
+    Furthermore, the black and white images from NIST were normalized to fit into a 28x28 pixel
+    bounding box and anti-aliased, which introduced grayscale levels.
+    The MNIST database contains 60,000 training images and 10,000 testing images.
+
+    The above introduction comes from `MNIST database - Wikipedia
+    <https://en.wikipedia.org/wiki/MNIST_database>`_.
+
+    Args:
+        root:  Path for MNIST dataset downloading or loading. If it's ``None``,
+            it will be set to ``~/.cache/megengine`` (the default root path). 
+        train: If ``True``, use traning dataset; Otherwise use the test set.
+        download: If ``True``, downloads the dataset from the internet and puts it in ``root`` directory.
+            If dataset is already downloaded, it is not downloaded again.
+
+    Returns:
+        The MNIST :class:`~.Dataset` that can work with :class:`~.DataLoader`.
+
+    Example:
+
+       >>> from megengine.data.dataset import MNIST   # doctest: +SKIP
+       >>> mnist = MNIST("/data/datasets/MNIST")  # Set the root path   # doctest: +SKIP
+       >>> image, label = mnist[0]  # doctest: +SKIP
+       >>> image.shape   # doctest: +SKIP
+       (28, 28, 1)
+
+    .. versionchanged:: 1.11 The original URL has been updated to a mirror URL
+
+       *"Please refrain from accessing these files from automated scripts with high frequency. Make copies!"*
+       As requested by the original provider of the MNIST dataset,
+       now the dataset will be downloaded from the mirror site:
+       https://ossci-datasets.s3.amazonaws.com/mnist/
+
+    .. seealso::
+
+       * MNIST dataset is used in :ref:`megengine-quick-start` tutorial as an example.
+       * You can find a lot of machine learning projects using MNIST dataset on the internet.
+    
+    .. _MNIST: http://yann.lecun.com/exdb/mnist/
+    .. _NIST: https://www.nist.gov/data
    """
+
+    url_path = "https://ossci-datasets.s3.amazonaws.com/mnist/"
+
    raw_file_name = [
        "train-images-idx3-ubyte.gz",
        "train-labels-idx1-ubyte.gz",
        "t10k-images-idx3-ubyte.gz",
        "t10k-labels-idx1-ubyte.gz",
    ]
-    """
-    Raw file names of both training set and test set (10k).
-    """
+
    raw_file_md5 = [
        "f68b3c2dcbeaaa9fbdd348bbdeb94873",
        "d53e105ee54ea40749a09fcbcd1e9432",
        "9fb629c4189551a2d022fa330f9573f3",
        "ec29112dd5afa0611ce80d1b7f02629c",
    ]
-    """
-    Md5 for checking raw files.
-    """

    def __init__(
-        self,
-        root: str = None,
-        train: bool = True,
-        download: bool = True,
-        timeout: int = 500,
+        self, root: str = None, train: bool = True, download: bool = True,
    ):
-        r"""
-        :param root: path for mnist dataset downloading or loading, if ``None``,
-            set ``root`` to the ``_default_root``.
-        :param train: if ``True``, loading trainingset, else loading test set.
-        :param download: if raw files do not exists and download sets to ``True``,
-            download raw files and process, otherwise raise ValueError, default is True.
-
-        """
        super().__init__(root, order=("image", "image_category"))

-        self.timeout = timeout
-
        # process the root path
        if root is None:
            self.root = self._default_root