提交 a6420f1c 编写于 作者: C cuicheng01

Update tnt.py

上级 03a3da16
...@@ -114,7 +114,8 @@ class Attention(nn.Layer): ...@@ -114,7 +114,8 @@ class Attention(nn.Layer):
(2, 0, 3, 1, 4)) (2, 0, 3, 1, 4))
q, k = qk[0], qk[1] q, k = qk[0], qk[1]
v = self.v(x).reshape((B, N, self.num_heads, x.shape[-1] // self.num_heads)).transpose( v = self.v(x).reshape(
(B, N, self.num_heads, x.shape[-1] // self.num_heads)).transpose(
(0, 2, 1, 3)) (0, 2, 1, 3))
attn = paddle.matmul(q, k.transpose((0, 1, 3, 2))) * self.scale attn = paddle.matmul(q, k.transpose((0, 1, 3, 2))) * self.scale
...@@ -122,7 +123,8 @@ class Attention(nn.Layer): ...@@ -122,7 +123,8 @@ class Attention(nn.Layer):
attn = self.attn_drop(attn) attn = self.attn_drop(attn)
x = paddle.matmul(attn, v) x = paddle.matmul(attn, v)
x = x.transpose((0, 2, 1, 3)).reshape((B, N, x.shape[-1] * x.shape[-3])) x = x.transpose((0, 2, 1, 3)).reshape(
(B, N, x.shape[-1] * x.shape[-3]))
x = self.proj(x) x = self.proj(x)
x = self.proj_drop(x) x = self.proj_drop(x)
return x return x
...@@ -183,18 +185,22 @@ class Block(nn.Layer): ...@@ -183,18 +185,22 @@ class Block(nn.Layer):
def forward(self, pixel_embed, patch_embed): def forward(self, pixel_embed, patch_embed):
# inner # inner
pixel_embed = paddle.add(pixel_embed, self.drop_path( pixel_embed = paddle.add(
self.attn_in(self.norm_in(pixel_embed)))) pixel_embed,
pixel_embed = paddle.add(pixel_embed, self.drop_path( self.drop_path(self.attn_in(self.norm_in(pixel_embed))))
self.mlp_in(self.norm_mlp_in(pixel_embed)))) pixel_embed = paddle.add(
pixel_embed,
self.drop_path(self.mlp_in(self.norm_mlp_in(pixel_embed))))
# outer # outer
B, N, C = patch_embed.shape B, N, C = patch_embed.shape
patch_embed[:, 1:] = paddle.add(patch_embed[:, 1:], self.proj( patch_embed[:, 1:] = paddle.add(
self.norm1_proj(pixel_embed).reshape((B, N - 1, -1)))) patch_embed[:, 1:],
patch_embed = paddle.add(patch_embed, self.drop_path( self.proj(self.norm1_proj(pixel_embed).reshape((B, N - 1, -1))))
self.attn_out(self.norm_out(patch_embed)))) patch_embed = paddle.add(
patch_embed = paddle.add(patch_embed, self.drop_path( patch_embed,
self.mlp(self.norm_mlp(patch_embed)))) self.drop_path(self.attn_out(self.norm_out(patch_embed))))
patch_embed = paddle.add(
patch_embed, self.drop_path(self.mlp(self.norm_mlp(patch_embed))))
return pixel_embed, patch_embed return pixel_embed, patch_embed
...@@ -225,8 +231,8 @@ class PixelEmbed(nn.Layer): ...@@ -225,8 +231,8 @@ class PixelEmbed(nn.Layer):
x = x.transpose((0, 2, 1)).reshape( x = x.transpose((0, 2, 1)).reshape(
(-1, self.in_dim, self.new_patch_size, self.new_patch_size)) (-1, self.in_dim, self.new_patch_size, self.new_patch_size))
x = x + pixel_pos x = x + pixel_pos
x = x.reshape((-1, self.in_dim, self.new_patch_size * self.new_patch_size)).transpose( x = x.reshape((-1, self.in_dim, self.new_patch_size *
(0, 2, 1)) self.new_patch_size)).transpose((0, 2, 1))
return x return x
...@@ -328,7 +334,8 @@ class TNT(nn.Layer): ...@@ -328,7 +334,8 @@ class TNT(nn.Layer):
patch_embed = self.norm2_proj( patch_embed = self.norm2_proj(
self.proj( self.proj(
self.norm1_proj( self.norm1_proj(
pixel_embed.reshape((-1, self.num_patches, pixel_embed.shape[-1] * pixel_embed.shape[-2]))))) pixel_embed.reshape((-1, self.num_patches, pixel_embed.
shape[-1] * pixel_embed.shape[-2])))))
patch_embed = paddle.concat( patch_embed = paddle.concat(
(self.cls_token.expand((B, -1, -1)), patch_embed), axis=1) (self.cls_token.expand((B, -1, -1)), patch_embed), axis=1)
patch_embed = patch_embed + self.patch_pos patch_embed = patch_embed + self.patch_pos
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册