Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
曾经的那一瞬间
Models
提交
a108f087
M
Models
项目概览
曾经的那一瞬间
/
Models
11 个月 前同步成功
通知
1
Star
0
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
DevOps
流水线
流水线任务
计划
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
M
Models
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
DevOps
DevOps
流水线
流水线任务
计划
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
流水线任务
提交
Issue看板
前往新版Gitcode,体验更适合开发者的 AI 搜索 >>
提交
a108f087
编写于
5月 30, 2023
作者:
Y
Yufan Zhuang
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
update for PR comments
上级
01cf744f
变更
4
显示空白变更内容
内联
并排
Showing
4 changed file
with
57 addition
and
16 deletion
+57
-16
official/projects/lra/exponential_moving_average.py
official/projects/lra/exponential_moving_average.py
+2
-1
official/projects/lra/mega_encoder.py
official/projects/lra/mega_encoder.py
+1
-0
official/projects/lra/mega_encoder_test.py
official/projects/lra/mega_encoder_test.py
+43
-0
official/projects/lra/moving_average_gated_attention.py
official/projects/lra/moving_average_gated_attention.py
+11
-15
未找到文件。
official/projects/lra/exponential_moving_average.py
浏览文件 @
a108f087
...
...
@@ -31,8 +31,9 @@ class MultiHeadEMA(tf.keras.layers.Layer):
ndim
=
2
,
bidirectional
=
False
,
truncation
=
None
,
**
kwargs
):
super
().
__init__
()
super
().
__init__
(
**
kwargs
)
self
.
embed_dim
=
embed_dim
self
.
ndim
=
ndim
...
...
official/projects/lra/mega_encoder.py
浏览文件 @
a108f087
...
...
@@ -28,6 +28,7 @@ _Initializer = Union[str, tf.keras.initializers.Initializer]
_approx_gelu
=
lambda
x
:
tf
.
keras
.
activations
.
gelu
(
x
,
approximate
=
True
)
@
tf
.
keras
.
utils
.
register_keras_serializable
(
package
=
'Text'
)
class
MegaEncoder
(
tf
.
keras
.
layers
.
Layer
):
"""MegaEncoder.
...
...
official/projects/lra/mega_encoder_test.py
0 → 100644
浏览文件 @
a108f087
# Copyright 2023 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Tests for official.nlp.projects.lra.mega_encoder."""
import
numpy
as
np
import
tensorflow
as
tf
from
official.projects.lra
import
mega_encoder
class
MegaEncoderTest
(
tf
.
test
.
TestCase
):
def
test_encoder
(
self
):
sequence_length
=
1024
batch_size
=
2
vocab_size
=
1024
network
=
mega_encoder
.
MegaEncoder
(
num_layers
=
1
,
vocab_size
=
1024
,
max_sequence_length
=
4096
)
word_id_data
=
np
.
random
.
randint
(
vocab_size
,
size
=
(
batch_size
,
sequence_length
))
mask_data
=
np
.
random
.
randint
(
2
,
size
=
(
batch_size
,
sequence_length
))
type_id_data
=
np
.
random
.
randint
(
2
,
size
=
(
batch_size
,
sequence_length
))
outputs
=
network
({
"input_word_ids"
:
word_id_data
,
"input_mask"
:
mask_data
,
"input_type_ids"
:
type_id_data
})
self
.
assertEqual
(
outputs
[
"sequence_output"
].
shape
,
(
batch_size
,
sequence_length
,
128
))
if
__name__
==
"__main__"
:
tf
.
test
.
main
()
\ No newline at end of file
official/projects/lra/moving_average_gated_attention.py
浏览文件 @
a108f087
...
...
@@ -48,8 +48,6 @@ class RelativePositionBias(tf.keras.layers.Layer):
def
call
(
self
,
seq_len
):
if
seq_len
is
None
:
seq_len
=
self
.
max_positions
#import pdb
#pdb.set_trace()
seq_len
=
tf
.
get_static_value
(
seq_len
)
# seq_len * 2 -1
b
=
self
.
rel_pos_bias
[(
self
.
max_positions
-
seq_len
):(
self
.
max_positions
+
...
...
@@ -199,7 +197,8 @@ class MovingAverageGatedAttention(tf.keras.layers.Layer):
super
().
build
(
input_shape
)
def
get_config
(
self
):
config
=
{
base_config
=
super
().
get_config
()
base_config
.
update
({
"embed_dim"
:
self
.
embed_dim
,
"zdim"
:
...
...
@@ -226,11 +225,10 @@ class MovingAverageGatedAttention(tf.keras.layers.Layer):
self
.
_attention_axes
,
"return_attention_scores"
:
self
.
return_attention_scores
}
base_config
=
super
().
get_config
()
return
dict
(
list
(
base_config
.
items
())
+
list
(
config
.
items
()))
})
return
base_config
def
softmax_attention
(
self
,
q
,
k
):
def
_
softmax_attention
(
self
,
q
,
k
):
slen
=
k
.
shape
[
1
]
# C x C
if
slen
is
None
:
...
...
@@ -246,16 +244,14 @@ class MovingAverageGatedAttention(tf.keras.layers.Layer):
return
attn_weights
def
call
(
self
,
inputs
:
Any
)
->
Any
:
"""MEGA encoder block call.
Args:
inputs: a single tensor or a list of tensors. `input tensor`
"""
MEGA encoder block call.
Args: inputs: a single tensor or a list of tensors. `input tensor`
as the single sequence of embeddings. [`input tensor`,
`attention mask`] to have the
additional attention mask. [`query tensor`, `key value tensor`,
`attention mask`] to have separate input streams for the query, and
key/value to the multi-head attention.
Returns:
An output tensor with the same dimensions as input/query tensor.
"""
...
...
@@ -310,7 +306,7 @@ class MovingAverageGatedAttention(tf.keras.layers.Layer):
# L x B x E -> B x L x E
v
=
tf
.
transpose
(
v
,
perm
=
(
1
,
0
,
2
))
attn_weights
=
self
.
softmax_attention
(
q
,
k
)
attn_weights
=
self
.
_
softmax_attention
(
q
,
k
)
v
=
self
.
hidden_dropout
(
v
)
kernel
=
tf
.
squeeze
(
self
.
attention_dropout
(
attn_weights
))
# B x K x C x E -> B x L x E -> L x B x E
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录