diff --git a/labml_nn/transformers/vit/__init__.py b/labml_nn/transformers/vit/__init__.py index 4d4f8d09cea1f9c28e938a7c758da4be6543a25b..07d0b3db1590f6fe9a83e385caee0699e11aebca 100644 --- a/labml_nn/transformers/vit/__init__.py +++ b/labml_nn/transformers/vit/__init__.py @@ -39,7 +39,7 @@ Here's [an experiment](experiment.html) that trains ViT on CIFAR-10. This doesn't do very well because it's trained on a small dataset. It's a simple experiment that anyone can run and play with ViTs. -[![View Run](https://img.shields.io/badge/labml-experiment-brightgreen)](https://app.labml.ai/run/8b531d9ce3dc11eb84fc87df6756eb8f) +[![View Run](https://img.shields.io/badge/labml-experiment-brightgreen)](https://app.labml.ai/run/afdd5332188b11edbdf543360515b595) """ import torch @@ -114,7 +114,7 @@ class LearnedPositionalEmbeddings(Module): * `x` is the patch embeddings of shape `[patches, batch_size, d_model]` """ # Get the positional embeddings for the given patches - pe = self.positional_encodings[x.shape[0]] + pe = self.positional_encodings[:x.shape[0]] # Add to patch embeddings and return return x + pe diff --git a/labml_nn/transformers/vit/experiment.py b/labml_nn/transformers/vit/experiment.py index febcb186f05fec1e2a940b6dfd42742ef5b74e2f..4f85e08c2b220435a59667875f665d09bbc86e6e 100644 --- a/labml_nn/transformers/vit/experiment.py +++ b/labml_nn/transformers/vit/experiment.py @@ -7,7 +7,7 @@ summary: > # Train a [Vision Transformer (ViT)](index.html) on CIFAR 10 -[![View Run](https://img.shields.io/badge/labml-experiment-brightgreen)](https://app.labml.ai/run/8b531d9ce3dc11eb84fc87df6756eb8f) +[![View Run](https://img.shields.io/badge/labml-experiment-brightgreen)](https://app.labml.ai/run/afdd5332188b11edbdf543360515b595) """ from labml import experiment @@ -76,7 +76,7 @@ def main(): 'transformer.d_model': 512, # Training epochs and batch size - 'epochs': 1000, + 'epochs': 32, 'train_batch_size': 64, # Augment CIFAR 10 images for training