Merge branch 'develop' into multihead_attention

dace68ac · ying · 113cd6b3 · 7eb02eb4 · dace68ac · dace68ac
隐藏空白更改
内联并排

Showing with 5 addition and 4 deletion

python/paddle/v2/dataset/wmt16.py python/paddle/v2/dataset/wmt16.py +3 -3

python/paddle/v2/fluid/nets.py python/paddle/v2/fluid/nets.py +2 -1

未找到文件。
--- a/python/paddle/v2/dataset/wmt16.py
+++ b/python/paddle/v2/dataset/wmt16.py
@@ -305,9 +305,9 @@ def get_dict(lang, dict_size, reverse=False):

    dict_path = os.path.join(paddle.v2.dataset.common.DATA_HOME,
                             "wmt16/%s_%d.dict" % (lang, dict_size))
-    assert (os.path.exists(dict_path), "Word dictionary does not exist. "
-            "Please invoke paddle.dataset.wmt16.train/test/validation "
-            "first to build the dictionary.")
+    assert os.path.exists(dict_path), "Word dictionary does not exist. "
+    "Please invoke paddle.dataset.wmt16.train/test/validation first "
+    "to build the dictionary."
    tar_file = os.path.join(paddle.v2.dataset.common.DATA_HOME, "wmt16.tar.gz")
    return __load_dict(tar_file, dict_size, lang, reverse)


--- a/python/paddle/v2/fluid/nets.py
+++ b/python/paddle/v2/fluid/nets.py
@@ -248,7 +248,8 @@ def scaled_dot_product_attention(queries,
        reshaped = layers.reshape(
            x=x,
            shape=list(x.shape[:-1]) + [num_heads, hidden_size // num_heads])
-        # permuate the original dimensions into:
+
+        # permuate the dimensions into:
        # [batch_size, num_heads, max_sequence_len, hidden_size_per_head]
        return layers.transpose(x=reshaped, perm=[0, 2, 1, 3])