From 1b0f8944bded49da536fe9d18eac70154d1c273f Mon Sep 17 00:00:00 2001 From: Varuna Jayasiri Date: Sat, 5 Sep 2020 13:25:27 +0530 Subject: [PATCH] spelling --- labml_nn/transformers/mha.py | 6 +++--- labml_nn/transformers/models.py | 10 +++++----- labml_nn/transformers/positional_encoding.py | 4 ++-- 3 files changed, 10 insertions(+), 10 deletions(-) diff --git a/labml_nn/transformers/mha.py b/labml_nn/transformers/mha.py index 4f635e79..065a4ba6 100644 --- a/labml_nn/transformers/mha.py +++ b/labml_nn/transformers/mha.py @@ -9,17 +9,17 @@ from typing import Optional import torch from labml import tracker +from labml_helpers.module import Module from torch import nn as nn from torch.nn import functional as F -from labml_helpers.module import Module - class PrepareForMultiHeadAttention(Module): """ This module does a linear transformation and splits the vector into given number of heads for multi-head attention. """ + def __init__(self, d_model: int, heads: int, d_k: int, bias: bool): super().__init__() self.linear = nn.Linear(d_model, heads * d_k, bias=bias) @@ -38,7 +38,7 @@ class PrepareForMultiHeadAttention(Module): class MultiHeadAttention(Module): - def __init__(self, heads: int, d_model: int, dropout_prob: float = 0.1, bias: bool=True): + def __init__(self, heads: int, d_model: int, dropout_prob: float = 0.1, bias: bool = True): """ ## Multi-Head Attention Module diff --git a/labml_nn/transformers/models.py b/labml_nn/transformers/models.py index 361477b4..e2f71f32 100644 --- a/labml_nn/transformers/models.py +++ b/labml_nn/transformers/models.py @@ -12,7 +12,7 @@ from .positional_encoding import get_positional_encoding class EmbeddingsWithPositionalEncoding(Module): """ - ## Embed tokenas and add [fixed positional encoding](positional_encoding.html) + ## Embed tokens and add [fixed positional encoding](positional_encoding.html) """ def __init__(self, d_model: int, n_vocab: int, max_len: int = 5000): super().__init__() @@ -27,13 +27,13 @@ class EmbeddingsWithPositionalEncoding(Module): class EmbeddingsWithLearnedPositionalEncoding(Module): """ - ## Embed tokenas and add parameterized positional encodings + ## Embed tokens and add parameterized positional encodings """ def __init__(self, d_model: int, n_vocab: int, max_len: int = 5000): super().__init__() self.linear = nn.Embedding(n_vocab, d_model) self.d_model = d_model - self.positional_encodings = nn.Parameter(torch.zeros(max_len, 1, d_model)) + self.positional_encodings = nn.Parameter(torch.zeros(max_len, 1, d_model), requires_grad=True) def __call__(self, x: torch.Tensor): pe = self.positional_encodings[:x.shape[0]] @@ -67,7 +67,7 @@ class TransformerLayer(Module): in where the layer-normalization is done. Here we do a layer normalization before attention and feed-forward networks, and add the original residual vectors. - Alternative is to do a layer normalzation after adding the residuals. + Alternative is to do a layer normalization after adding the residuals. But we found this to be less stable when training. We found a detailed discussion about this in paper [On Layer Normalization in the Transformer Architecture](https://arxiv.org/abs/2002.04745). @@ -162,7 +162,7 @@ class Generator(Module): """ ## Generator - This predicts the tokens and gives the lof softmaxes of those. + This predicts the tokens and gives the lof softmax of those. You don't need this if you are using `nn.CrossEntropyLoss`. """ def __init__(self, n_vocab: int, d_model: int): diff --git a/labml_nn/transformers/positional_encoding.py b/labml_nn/transformers/positional_encoding.py index 82957b46..d3f4f9f1 100644 --- a/labml_nn/transformers/positional_encoding.py +++ b/labml_nn/transformers/positional_encoding.py @@ -9,8 +9,8 @@ PE_{p,2i} &= sin\Bigg(\frac{p}{10000^{\frac{2i}{d_{model}}}}\Bigg) \\ PE_{p,2i + 1} &= cos\Bigg(\frac{p}{10000^{\frac{2i}{d_{model}}}}\Bigg) \end{align} -Where $1 \leq 2i, 2i + 1 \leq d_{model}$ are the feature indexes in the encoding, -and $p$ is the position. +Where $1 \leq 2i, 2i + 1 \leq d_{model}$ + are the feature indexes in the encoding, and $p$ is the position. """ import math -- GitLab