diff --git a/docs/normalization/group_norm/index.html b/docs/normalization/group_norm/index.html index 56e8fd462c215bdde2eb4bdb6a6721d8d17684a0..6779f7b6f0e6d5ac5069dffa87766d4587f4d9cf 100644 --- a/docs/normalization/group_norm/index.html +++ b/docs/normalization/group_norm/index.html @@ -133,8 +133,8 @@ $m$ is the size of the set $\mathcal{S}_i$ which is same for all $i$.
Group normalization normalizes values of the same sample and the same group of channels together.
Here’s a CIFAR 10 classification model that uses instance normalization.
+ +87import torch
diff --git a/labml_nn/normalization/group_norm/__init__.py b/labml_nn/normalization/group_norm/__init__.py
index 2f4ce810550dd5266687582c73ab06cff797f240..149683813abbeec664e569ee19c11d0b204a40c8 100644
--- a/labml_nn/normalization/group_norm/__init__.py
+++ b/labml_nn/normalization/group_norm/__init__.py
@@ -79,8 +79,8 @@ Group normalization normalizes values of the same sample and the same group of c
Here's a [CIFAR 10 classification model](experiment.html) that uses instance normalization.
[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/lab-ml/nn/blob/master/labml_nn/normalization/group_norm/experiment.ipynb)
-[![View Run](https://img.shields.io/badge/labml-experiment-brightgreen)](https://app.labml.ai/run/011254fe647011ebbb8e0242ac1c0002)
-[![WandB](https://img.shields.io/badge/wandb-run-yellow)](https://app.labml.ai/run/011254fe647011ebbb8e0242ac1c0002)
+[![View Run](https://img.shields.io/badge/labml-experiment-brightgreen)](https://app.labml.ai/run/081d950aa4e011eb8f9f0242ac1c0002)
+[![WandB](https://img.shields.io/badge/wandb-run-yellow)](https://wandb.ai/vpj/cifar10/runs/310etthp)
"""
diff --git a/labml_nn/normalization/group_norm/experiment.ipynb b/labml_nn/normalization/group_norm/experiment.ipynb
new file mode 100644
index 0000000000000000000000000000000000000000..0bec0875d89b18b9f25ca4110478f92462aa9e83
--- /dev/null
+++ b/labml_nn/normalization/group_norm/experiment.ipynb
@@ -0,0 +1,620 @@
+{
+ "nbformat": 4,
+ "nbformat_minor": 0,
+ "metadata": {
+ "colab": {
+ "name": "Group Norm",
+ "provenance": [],
+ "collapsed_sections": []
+ },
+ "kernelspec": {
+ "name": "python3",
+ "display_name": "Python 3"
+ },
+ "accelerator": "GPU",
+ "widgets": {
+ "application/vnd.jupyter.widget-state+json": {
+ "14841e99103e41f69dd9b709301d3204": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_name": "HBoxModel",
+ "state": {
+ "_view_name": "HBoxView",
+ "_dom_classes": [],
+ "_model_name": "HBoxModel",
+ "_view_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_view_count": null,
+ "_view_module_version": "1.5.0",
+ "box_style": "",
+ "layout": "IPY_MODEL_60f10c7bff5c4eea845a14f3f3075e8d",
+ "_model_module": "@jupyter-widgets/controls",
+ "children": [
+ "IPY_MODEL_cf2f7c0f10454901bc5b48872b364dbf",
+ "IPY_MODEL_8755fc8b0f6b40b3b08822e0a705d403"
+ ]
+ }
+ },
+ "60f10c7bff5c4eea845a14f3f3075e8d": {
+ "model_module": "@jupyter-widgets/base",
+ "model_name": "LayoutModel",
+ "state": {
+ "_view_name": "LayoutView",
+ "grid_template_rows": null,
+ "right": null,
+ "justify_content": null,
+ "_view_module": "@jupyter-widgets/base",
+ "overflow": null,
+ "_model_module_version": "1.2.0",
+ "_view_count": null,
+ "flex_flow": null,
+ "width": null,
+ "min_width": null,
+ "border": null,
+ "align_items": null,
+ "bottom": null,
+ "_model_module": "@jupyter-widgets/base",
+ "top": null,
+ "grid_column": null,
+ "overflow_y": null,
+ "overflow_x": null,
+ "grid_auto_flow": null,
+ "grid_area": null,
+ "grid_template_columns": null,
+ "flex": null,
+ "_model_name": "LayoutModel",
+ "justify_items": null,
+ "grid_row": null,
+ "max_height": null,
+ "align_content": null,
+ "visibility": null,
+ "align_self": null,
+ "height": null,
+ "min_height": null,
+ "padding": null,
+ "grid_auto_rows": null,
+ "grid_gap": null,
+ "max_width": null,
+ "order": null,
+ "_view_module_version": "1.2.0",
+ "grid_template_areas": null,
+ "object_position": null,
+ "object_fit": null,
+ "grid_auto_columns": null,
+ "margin": null,
+ "display": null,
+ "left": null
+ }
+ },
+ "cf2f7c0f10454901bc5b48872b364dbf": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_name": "FloatProgressModel",
+ "state": {
+ "_view_name": "ProgressView",
+ "style": "IPY_MODEL_c6c192b58fa242008fbc2983c7866c5f",
+ "_dom_classes": [],
+ "description": "",
+ "_model_name": "FloatProgressModel",
+ "bar_style": "success",
+ "max": 170498071,
+ "_view_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "value": 170498071,
+ "_view_count": null,
+ "_view_module_version": "1.5.0",
+ "orientation": "horizontal",
+ "min": 0,
+ "description_tooltip": null,
+ "_model_module": "@jupyter-widgets/controls",
+ "layout": "IPY_MODEL_6e5c4becab6b40aaafce1a4575d3199c"
+ }
+ },
+ "8755fc8b0f6b40b3b08822e0a705d403": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_name": "HTMLModel",
+ "state": {
+ "_view_name": "HTMLView",
+ "style": "IPY_MODEL_328dbbbc3cdb4163896913308059c23c",
+ "_dom_classes": [],
+ "description": "",
+ "_model_name": "HTMLModel",
+ "placeholder": "",
+ "_view_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "value": " 170499072/? [00:03<00:00, 54808451.08it/s]",
+ "_view_count": null,
+ "_view_module_version": "1.5.0",
+ "description_tooltip": null,
+ "_model_module": "@jupyter-widgets/controls",
+ "layout": "IPY_MODEL_3bff44b4205f40119715fae60d4a04a9"
+ }
+ },
+ "c6c192b58fa242008fbc2983c7866c5f": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_name": "ProgressStyleModel",
+ "state": {
+ "_view_name": "StyleView",
+ "_model_name": "ProgressStyleModel",
+ "description_width": "initial",
+ "_view_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.5.0",
+ "_view_count": null,
+ "_view_module_version": "1.2.0",
+ "bar_color": null,
+ "_model_module": "@jupyter-widgets/controls"
+ }
+ },
+ "6e5c4becab6b40aaafce1a4575d3199c": {
+ "model_module": "@jupyter-widgets/base",
+ "model_name": "LayoutModel",
+ "state": {
+ "_view_name": "LayoutView",
+ "grid_template_rows": null,
+ "right": null,
+ "justify_content": null,
+ "_view_module": "@jupyter-widgets/base",
+ "overflow": null,
+ "_model_module_version": "1.2.0",
+ "_view_count": null,
+ "flex_flow": null,
+ "width": null,
+ "min_width": null,
+ "border": null,
+ "align_items": null,
+ "bottom": null,
+ "_model_module": "@jupyter-widgets/base",
+ "top": null,
+ "grid_column": null,
+ "overflow_y": null,
+ "overflow_x": null,
+ "grid_auto_flow": null,
+ "grid_area": null,
+ "grid_template_columns": null,
+ "flex": null,
+ "_model_name": "LayoutModel",
+ "justify_items": null,
+ "grid_row": null,
+ "max_height": null,
+ "align_content": null,
+ "visibility": null,
+ "align_self": null,
+ "height": null,
+ "min_height": null,
+ "padding": null,
+ "grid_auto_rows": null,
+ "grid_gap": null,
+ "max_width": null,
+ "order": null,
+ "_view_module_version": "1.2.0",
+ "grid_template_areas": null,
+ "object_position": null,
+ "object_fit": null,
+ "grid_auto_columns": null,
+ "margin": null,
+ "display": null,
+ "left": null
+ }
+ },
+ "328dbbbc3cdb4163896913308059c23c": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_name": "DescriptionStyleModel",
+ "state": {
+ "_view_name": "StyleView",
+ "_model_name": "DescriptionStyleModel",
+ "description_width": "",
+ "_view_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.5.0",
+ "_view_count": null,
+ "_view_module_version": "1.2.0",
+ "_model_module": "@jupyter-widgets/controls"
+ }
+ },
+ "3bff44b4205f40119715fae60d4a04a9": {
+ "model_module": "@jupyter-widgets/base",
+ "model_name": "LayoutModel",
+ "state": {
+ "_view_name": "LayoutView",
+ "grid_template_rows": null,
+ "right": null,
+ "justify_content": null,
+ "_view_module": "@jupyter-widgets/base",
+ "overflow": null,
+ "_model_module_version": "1.2.0",
+ "_view_count": null,
+ "flex_flow": null,
+ "width": null,
+ "min_width": null,
+ "border": null,
+ "align_items": null,
+ "bottom": null,
+ "_model_module": "@jupyter-widgets/base",
+ "top": null,
+ "grid_column": null,
+ "overflow_y": null,
+ "overflow_x": null,
+ "grid_auto_flow": null,
+ "grid_area": null,
+ "grid_template_columns": null,
+ "flex": null,
+ "_model_name": "LayoutModel",
+ "justify_items": null,
+ "grid_row": null,
+ "max_height": null,
+ "align_content": null,
+ "visibility": null,
+ "align_self": null,
+ "height": null,
+ "min_height": null,
+ "padding": null,
+ "grid_auto_rows": null,
+ "grid_gap": null,
+ "max_width": null,
+ "order": null,
+ "_view_module_version": "1.2.0",
+ "grid_template_areas": null,
+ "object_position": null,
+ "object_fit": null,
+ "grid_auto_columns": null,
+ "margin": null,
+ "display": null,
+ "left": null
+ }
+ }
+ }
+ }
+ },
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "AYV_dMVDxyc2"
+ },
+ "source": [
+ "[![Github](https://img.shields.io/github/stars/lab-ml/nn?style=social)](https://github.com/lab-ml/nn)\n",
+ "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/lab-ml/nn/blob/master/labml_nn/normalization/group_norm/experiment.ipynb) \n",
+ "\n",
+ "## Group Norm - CIFAR 10\n",
+ "\n",
+ "This is an experiment training a model with group norm to classify CIFAR-10 dataset."
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "AahG_i2y5tY9"
+ },
+ "source": [
+ "Install the `labml-nn` package. Optionally `wandb` package for experiment stats."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "metadata": {
+ "id": "ZCzmCrAIVg0L"
+ },
+ "source": [
+ "!pip install labml-nn wandb"
+ ],
+ "execution_count": null,
+ "outputs": []
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "SE2VUQ6L5zxI"
+ },
+ "source": [
+ "Imports"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "metadata": {
+ "id": "0hJXx_g0wS2C"
+ },
+ "source": [
+ "import torch\n",
+ "import torch.nn as nn\n",
+ "\n",
+ "from labml import experiment\n",
+ "from labml_nn.normalization.group_norm.experiment import Configs"
+ ],
+ "execution_count": 6,
+ "outputs": []
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "Lpggo0wM6qb-"
+ },
+ "source": [
+ "Create an experiment"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "metadata": {
+ "id": "bFcr9k-l4cAg"
+ },
+ "source": [
+ "experiment.create(name=\"cifar10\", comment=\"group norm\")"
+ ],
+ "execution_count": 7,
+ "outputs": []
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "-OnHLi626tJt"
+ },
+ "source": [
+ "Initialize configurations"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "metadata": {
+ "id": "Piz0c5f44hRo"
+ },
+ "source": [
+ "conf = Configs()"
+ ],
+ "execution_count": 8,
+ "outputs": []
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "wwMzCqpD6vkL"
+ },
+ "source": [
+ "Set experiment configurations and assign a configurations dictionary to override configurations"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 17
+ },
+ "id": "e6hmQhTw4nks",
+ "outputId": "50ad9e07-84f4-47cf-9d26-034448eb611b"
+ },
+ "source": [
+ "experiment.configs(conf, {\n",
+ " 'optimizer.optimizer': 'Adam',\n",
+ " 'optimizer.learning_rate': 2.5e-4,\n",
+ "})"
+ ],
+ "execution_count": 9,
+ "outputs": [
+ {
+ "output_type": "display_data",
+ "data": {
+ "text/html": [
+ ""
+ ],
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {
+ "tags": []
+ }
+ }
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "KJZRf8527GxL"
+ },
+ "source": [
+ "Start the experiment and run the training loop."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 882,
+ "referenced_widgets": [
+ "14841e99103e41f69dd9b709301d3204",
+ "60f10c7bff5c4eea845a14f3f3075e8d",
+ "cf2f7c0f10454901bc5b48872b364dbf",
+ "8755fc8b0f6b40b3b08822e0a705d403",
+ "c6c192b58fa242008fbc2983c7866c5f",
+ "6e5c4becab6b40aaafce1a4575d3199c",
+ "328dbbbc3cdb4163896913308059c23c",
+ "3bff44b4205f40119715fae60d4a04a9"
+ ]
+ },
+ "id": "aIAWo7Fw5DR8",
+ "outputId": "2e82cce8-eaad-4cab-88d3-efd5d095b5b7"
+ },
+ "source": [
+ "with experiment.start():\n",
+ " conf.run()"
+ ],
+ "execution_count": null,
+ "outputs": [
+ {
+ "output_type": "display_data",
+ "data": {
+ "text/html": [
+ "\n",
+ "cifar10: 081d950aa4e011eb8f9f0242ac1c0002\n",
+ "\tgroup norm\n",
+ "\t[dirty]: \"\"\n",
+ "\n",
+ "--------------------------------------------------\n",
+ "LABML WARNING\n",
+ "LabML App Warning: empty_token: Please create a valid token at https://app.labml.ai.\n",
+ "Click on the experiment link to monitor the experiment and add it to your experiments list.\n",
+ "--------------------------------------------------\n",
+ "Monitor experiment at https://app.labml.ai/run/081d950aa4e011eb8f9f0242ac1c0002\n",
+ "Initialize...\n",
+ " Prepare mode...[DONE]\t4.29ms\n",
+ " Prepare model...\n",
+ " Prepare device...\n",
+ " Prepare device_info...[DONE]\t67.86ms\n",
+ " Prepare device...[DONE]\t79.38ms\n",
+ " Prepare model...[DONE]\t10,940.55ms\n",
+ "Initialize...[DONE]\t11,069.97ms\n",
+ "Prepare validator...\n",
+ " Prepare valid_loader...\n",
+ " Prepare valid_dataset...\n",
+ " Prepare dataset_transforms...[DONE]\t8.51ms\n",
+ " Prepare valid_dataset...[DONE]\t4,699.14ms\n",
+ " Prepare valid_loader...[DONE]\t4,813.97ms\n",
+ "Prepare validator...[DONE]\t4,920.79ms\n",
+ "Prepare trainer...\n",
+ " Prepare train_loader...\n",
+ " Prepare train_dataset...[DONE]\t978.39ms\n",
+ " Prepare train_loader...[DONE]\t1,079.91ms\n",
+ "Prepare trainer...[DONE]\t1,133.44ms\n",
+ "Prepare training_loop...\n",
+ " Prepare loop_count...[DONE]\t47.45ms\n",
+ "Prepare training_loop...[DONE]\t288.33ms\n",
+ " 50,000: Train: 100% 52,404ms Valid: 100% 4,279ms loss.train: 2.34893 accuracy.train: 0.220780 loss.valid: 1.82418 accuracy.valid: 0.224600 57,111ms 0:00m/ 0:08m \n",
+ " 89,168: Train: 78% 57,275ms Valid: 70% 4,571ms loss.train: 1.67616 accuracy.train: 0.300985 loss.valid: 1.66962 accuracy.valid: 0.288923 57,111ms 0:01m/ 0:07m
"
+ ],
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {
+ "tags": []
+ }
+ },
+ {
+ "output_type": "display_data",
+ "data": {
+ "application/javascript": [
+ "\n",
+ " window._wandbApiKey = new Promise((resolve, reject) => {\n",
+ " function loadScript(url) {\n",
+ " return new Promise(function(resolve, reject) {\n",
+ " let newScript = document.createElement(\"script\");\n",
+ " newScript.onerror = reject;\n",
+ " newScript.onload = resolve;\n",
+ " document.body.appendChild(newScript);\n",
+ " newScript.src = url;\n",
+ " });\n",
+ " }\n",
+ " loadScript(\"https://cdn.jsdelivr.net/npm/postmate/build/postmate.min.js\").then(() => {\n",
+ " const iframe = document.createElement('iframe')\n",
+ " iframe.style.cssText = \"width:0;height:0;border:none\"\n",
+ " document.body.appendChild(iframe)\n",
+ " const handshake = new Postmate({\n",
+ " container: iframe,\n",
+ " url: 'https://wandb.ai/authorize'\n",
+ " });\n",
+ " const timeout = setTimeout(() => reject(\"Couldn't auto authenticate\"), 5000)\n",
+ " handshake.then(function(child) {\n",
+ " child.on('authorize', data => {\n",
+ " clearTimeout(timeout)\n",
+ " resolve(data)\n",
+ " });\n",
+ " });\n",
+ " })\n",
+ " });\n",
+ " "
+ ],
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {
+ "tags": []
+ }
+ },
+ {
+ "output_type": "stream",
+ "text": [
+ "\u001b[34m\u001b[1mwandb\u001b[0m: You can find your API key in your browser here: https://wandb.ai/authorize\n"
+ ],
+ "name": "stderr"
+ },
+ {
+ "output_type": "stream",
+ "text": [
+ "wandb: Paste an API key from your profile and hit enter: ··········\n"
+ ],
+ "name": "stdout"
+ },
+ {
+ "output_type": "stream",
+ "text": [
+ "\u001b[34m\u001b[1mwandb\u001b[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc\n"
+ ],
+ "name": "stderr"
+ },
+ {
+ "output_type": "display_data",
+ "data": {
+ "text/html": [
+ "\n",
+ " Tracking run with wandb version 0.10.27
\n",
+ " Syncing run firm-snowflake-1 to Weights & Biases (Documentation).
\n",
+ " Project page: https://wandb.ai/vpj/cifar10
\n",
+ " Run page: https://wandb.ai/vpj/cifar10/runs/310etthp
\n",
+ " Run data is saved locally in /content/logs/cifar10/081d950aa4e011eb8f9f0242ac1c0002/wandb/run-20210424_093315-310etthp
\n",
+ " "
+ ],
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {
+ "tags": []
+ }
+ },
+ {
+ "output_type": "stream",
+ "text": [
+ "Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to /content/data/cifar-10-python.tar.gz\n"
+ ],
+ "name": "stdout"
+ },
+ {
+ "output_type": "display_data",
+ "data": {
+ "application/vnd.jupyter.widget-view+json": {
+ "model_id": "14841e99103e41f69dd9b709301d3204",
+ "version_minor": 0,
+ "version_major": 2
+ },
+ "text/plain": [
+ "HBox(children=(FloatProgress(value=0.0, max=170498071.0), HTML(value='')))"
+ ]
+ },
+ "metadata": {
+ "tags": []
+ }
+ },
+ {
+ "output_type": "stream",
+ "text": [
+ "\n",
+ "Extracting /content/data/cifar-10-python.tar.gz to /content/data\n",
+ "Files already downloaded and verified\n"
+ ],
+ "name": "stdout"
+ }
+ ]
+ },
+ {
+ "cell_type": "code",
+ "metadata": {
+ "id": "oBXXlP2b7XZO"
+ },
+ "source": [
+ ""
+ ],
+ "execution_count": null,
+ "outputs": []
+ }
+ ]
+}
\ No newline at end of file
diff --git a/labml_nn/normalization/group_norm/readme.md b/labml_nn/normalization/group_norm/readme.md
new file mode 100644
index 0000000000000000000000000000000000000000..802f9dccc94210ca25e05a98345ea5747d76e920
--- /dev/null
+++ b/labml_nn/normalization/group_norm/readme.md
@@ -0,0 +1,76 @@
+# [Group Normalization](https://nn.labml.ai/normalization/group_norm/index.html)
+
+This is a [PyTorch](https://pytorch.org) implementation of
+the paper [Group Normalization](https://arxiv.org/abs/1803.08494).
+
+[Batch Normalization](https://nn.labml.ai/normalization/batch_norm/index.html) works well for sufficiently large batch sizes,
+but does not perform well for small batch sizes, because it normalizes across the batch.
+Training large models with large batch sizes is not possible due to the memory capacity of the
+devices.
+
+This paper introduces Group Normalization, which normalizes a set of features together as a group.
+This is based on the observation that classical features such as
+[SIFT](https://en.wikipedia.org/wiki/Scale-invariant_feature_transform) and
+[HOG](https://en.wikipedia.org/wiki/Histogram_of_oriented_gradients) are group-wise features.
+The paper proposes dividing feature channels into groups and then separately normalizing
+all channels within each group.
+
+## Formulation
+
+All normalization layers can be defined by the following computation.
+
+$$\hat{x}_i = \frac{1}{\sigma_i} (x_i - \mu_i)$$
+
+where $x$ is the tensor representing the batch,
+and $i$ is the index of a single value.
+For instance, when it's 2D images
+$i = (i_N, i_C, i_H, i_W)$ is a 4-d vector for indexing
+image within batch, feature channel, vertical coordinate and horizontal coordinate.
+$\mu_i$ and $\sigma_i$ are mean and standard deviation.
+
+\begin{align}
+\mu_i &= \frac{1}{m} \sum_{k \in \mathcal{S}_i} x_k \\
+\sigma_i &= \sqrt{\frac{1}{m} \sum_{k \in \mathcal{S}_i} (x_k - \mu_i)^2 + \epsilon}
+\end{align}
+
+$\mathcal{S}_i$ is the set of indexes across which the mean and standard deviation
+are calculated for index $i$.
+$m$ is the size of the set $\mathcal{S}_i$ which is same for all $i$.
+
+The definition of $\mathcal{S}_i$ is different for
+[Batch normalization](https://nn.labml.ai/normalization/batch_norm/index.html),
+[Layer normalization](https://nn.labml.ai/normalization/layer_norm/index.html), and
+[Instance normalization](https://nn.labml.ai/normalization/instance_norm/index.html).
+
+### [Batch Normalization](https://nn.labml.ai/normalization/batch_norm/index.html)
+
+$$\mathcal{S}_i = \{k | k_C = i_C\}$$
+
+The values that share the same feature channel are normalized together.
+
+### [Layer Normalization](https://nn.labml.ai/normalization/layer_norm/index.html)
+
+$$\mathcal{S}_i = \{k | k_N = i_N\}$$
+
+The values from the same sample in the batch are normalized together.
+
+### [Instance Normalization](https://nn.labml.ai/normalization/instance_norm/index.html)
+
+$$\mathcal{S}_i = \{k | k_N = i_N, k_C = i_C\}$$
+
+The values from the same sample and same feature channel are normalized together.
+
+### Group Normalization
+
+$$\mathcal{S}_i = \{k | k_N = i_N,
+ \bigg \lfloor \frac{k_C}{C/G} \bigg \rfloor = \bigg \lfloor \frac{i_C}{C/G} \bigg \rfloor\}$$
+
+where $G$ is the number of groups and $C$ is the number of channels.
+
+Group normalization normalizes values of the same sample and the same group of channels together.
+
+Here's a [CIFAR 10 classification model](https://nn.labml.ai/normalization/group_norm/experiment.html) that uses instance normalization.
+
+[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/lab-ml/nn/blob/master/labml_nn/normalization/group_norm/experiment.ipynb)
+[![View Run](https://img.shields.io/badge/labml-experiment-brightgreen)](https://app.labml.ai/run/081d950aa4e011eb8f9f0242ac1c0002)
+[![WandB](https://img.shields.io/badge/wandb-run-yellow)](https://wandb.ai/vpj/cifar10/runs/310etthp)
\ No newline at end of file