Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
s920243400
PaddleDetection
提交
87960310
P
PaddleDetection
项目概览
s920243400
/
PaddleDetection
与 Fork 源项目一致
Fork自
PaddlePaddle / PaddleDetection
通知
2
Star
0
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
PaddleDetection
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
87960310
编写于
12月 09, 2022
作者:
W
wangxinxin08
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
fix bugs
上级
e142c6a3
变更
10
隐藏空白更改
内联
并排
Showing
10 changed file
with
30 addition
and
19 deletion
+30
-19
ppdet/modeling/vl/embedder/__init__.py
ppdet/modeling/vl/embedder/__init__.py
+1
-0
ppdet/modeling/vl/embedder/clip/__init__.py
ppdet/modeling/vl/embedder/clip/__init__.py
+1
-1
ppdet/modeling/vl/embedder/clip/clip.py
ppdet/modeling/vl/embedder/clip/clip.py
+1
-1
ppdet/modeling/vl/embedder/clip/layers.py
ppdet/modeling/vl/embedder/clip/layers.py
+5
-5
ppdet/modeling/vl/embedder/clip/models.py
ppdet/modeling/vl/embedder/clip/models.py
+12
-8
ppdet/modeling/vl/head/__init__.py
ppdet/modeling/vl/head/__init__.py
+2
-0
ppdet/modeling/vl/head/owl_vit_head.py
ppdet/modeling/vl/head/owl_vit_head.py
+4
-3
ppdet/modeling/vl/loss/__init__.py
ppdet/modeling/vl/loss/__init__.py
+2
-0
ppdet/modeling/vl/loss/owl_vit_loss.py
ppdet/modeling/vl/loss/owl_vit_loss.py
+1
-1
ppdet/modeling/vl/tokenizer/simple_tokenizer.py
ppdet/modeling/vl/tokenizer/simple_tokenizer.py
+1
-0
未找到文件。
ppdet/modeling/vl/embedder/__init__.py
浏览文件 @
87960310
...
...
@@ -21,6 +21,7 @@ import paddle.nn as nn
import
paddle.nn.functional
as
F
from
ppdet.core.workspace
import
register
from
.clip
import
*
__all__
=
[
'ClipImageTextEmbedder'
]
...
...
ppdet/modeling/vl/embedder/clip/__init__.py
浏览文件 @
87960310
...
...
@@ -12,6 +12,6 @@
# See the License for the specific language governing permissions and
# limitations under the License.
from
.models
import
ModifiedResNet
,
TextEncoder
,
Vi
sionTransformer
from
.models
import
ModifiedResNet
,
TextEncoder
,
Vi
T
from
.layers
import
LayerNorm
,
QuickGELU
,
AttentionPool2D
from
.clip
import
CLIP
ppdet/modeling/vl/embedder/clip/clip.py
浏览文件 @
87960310
...
...
@@ -31,7 +31,7 @@ from ppdet.modeling.layers import MultiHeadAttention
from
ppdet.modeling.initializer
import
zeros_
,
normal_
from
ppdet.core.workspace
import
register
from
.models
import
ModifiedResNet
,
Vi
sionTransformer
,
TextEncoder
from
.models
import
ModifiedResNet
,
Vi
T
,
TextEncoder
@
register
...
...
ppdet/modeling/vl/embedder/clip/layers.py
浏览文件 @
87960310
...
...
@@ -84,7 +84,7 @@ class Bottleneck(nn.Layer):
return
out
class
AttentionPool2D
(
nn
.
Module
):
class
AttentionPool2D
(
nn
.
Layer
):
def
__init__
(
self
,
spacial_dim
,
embed_dim
,
num_heads
,
output_dim
):
super
().
__init__
()
# TODO: need check whether it is consistent with torch or not
...
...
@@ -151,10 +151,9 @@ class ResidualAttentionBlock(nn.Layer):
self
.
attn
=
MultiHeadAttention
(
d_model
,
n_head
)
self
.
ln_1
=
LayerNorm
(
d_model
)
self
.
mlp
=
nn
.
Sequential
(
OrderedDict
([(
"c_fc"
,
nn
.
Linear
(
d_model
,
d_model
*
4
)),
(
"gelu"
,
QuickGELU
()),
(
"c_proj"
,
nn
.
Linear
(
d_model
*
4
,
d_model
)
)]))
self
.
mlp
=
nn
.
Sequential
((
"c_fc"
,
nn
.
Linear
(
d_model
,
d_model
*
4
)),
(
"gelu"
,
QuickGELU
()),
(
"c_proj"
,
nn
.
Linear
(
d_model
*
4
,
d_model
)))
self
.
ln_2
=
LayerNorm
(
d_model
)
self
.
attn_mask
=
attn_mask
self
.
droplayer_p
=
droplayer_p
...
...
@@ -192,6 +191,7 @@ class Transformer(nn.Layer):
super
().
__init__
()
self
.
width
=
width
self
.
layers
=
layers
self
.
stochastic_droplayer_rate
=
stochastic_droplayer_rate
blocks
=
[]
for
i
in
range
(
self
.
layers
):
droplayer_p
=
(
i
/
max
(
self
.
layers
-
1
,
...
...
ppdet/modeling/vl/embedder/clip/models.py
浏览文件 @
87960310
...
...
@@ -32,7 +32,7 @@ from ppdet.core.workspace import register
from
.layers
import
*
__all__
=
[
'ModifiedResNet'
,
'Vi
sionTransformer
'
,
'TextEncoder'
]
__all__
=
[
'ModifiedResNet'
,
'Vi
T
'
,
'TextEncoder'
]
@
register
...
...
@@ -105,7 +105,7 @@ class ModifiedResNet(nn.Layer):
@
register
class
Vi
sionTransformer
(
nn
.
Layer
):
class
Vi
T
(
nn
.
Layer
):
def
__init__
(
self
,
input_resolution
,
patch_size
,
...
...
@@ -115,6 +115,7 @@ class VisionTransformer(nn.Layer):
output_dim
=
None
,
stochastic_droplayer_rate
=
0.0
):
super
().
__init__
()
self
.
width
=
width
self
.
input_resolution
=
input_resolution
self
.
output_dim
=
output_dim
self
.
conv1
=
nn
.
Conv2D
(
...
...
@@ -122,7 +123,7 @@ class VisionTransformer(nn.Layer):
out_channels
=
width
,
kernel_size
=
patch_size
,
stride
=
patch_size
,
bias
=
False
)
bias
_attr
=
False
)
scale
=
width
**-
0.5
self
.
class_embedding
=
self
.
create_parameter
(
shape
=
[
width
],
attr
=
ParamAttr
(
initializer
=
Normal
(
std
=
scale
)))
...
...
@@ -157,9 +158,14 @@ class VisionTransformer(nn.Layer):
@
register
class
TextEncoder
(
nn
.
Layer
):
def
__init__
(
self
,
context_length
,
vocab_size
,
transformer_width
,
transformer_heads
,
transformer_layers
,
stochastic_droplayer_rate
):
def
__init__
(
self
,
embed_dim
,
context_length
,
vocab_size
,
transformer_width
,
transformer_heads
,
transformer_layers
,
stochastic_droplayer_rate
=
0.0
):
super
().
__init__
()
self
.
context_length
=
context_length
...
...
@@ -178,8 +184,6 @@ class TextEncoder(nn.Layer):
self
.
ln_final
=
LayerNorm
(
transformer_width
)
self
.
text_projection
=
nn
.
Linear
(
transformer_width
,
embed_dim
,
bias_attr
=
False
)
self
.
logit_scale
=
self
.
create_parameter
(
shape
=
[],
attr
=
ParamAttr
(
initializer
=
Constant
(
np
.
log
(
1.
/
0.07
))))
def
build_attention_mask
(
self
):
# lazily create causal attention mask, with full attention between the vision tokens
...
...
ppdet/modeling/vl/head/__init__.py
浏览文件 @
87960310
...
...
@@ -11,3 +11,5 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from
.owl_vit_head
import
*
\ No newline at end of file
ppdet/modeling/vl/head/owl_vit_head.py
浏览文件 @
87960310
...
...
@@ -22,6 +22,7 @@ import paddle
import
paddle.nn
as
nn
import
paddle.nn.functional
as
F
from
ppdet.modeling.ops
import
get_act_fn
from
ppdet.core.workspace
import
register
from
..utils
import
compute_box_bias
...
...
@@ -46,12 +47,13 @@ class PredictorMLP(nn.Layer):
in_dim
,
out_dim
,
num_layers
,
mlp_dim
,
hidden_activation
,
mlp_dim
=
None
,
hidden_activation
=
'gelu'
,
out_activation
=
None
):
super
().
__init__
()
layers
=
[]
mlp_dim
=
in_dim
if
mlp_dim
is
None
else
mlp_dim
for
_
in
range
(
num_layers
-
1
):
layers
.
append
(
nn
.
Linear
(
in_dim
,
mlp_dim
))
in_dim
=
mlp_dim
...
...
@@ -138,7 +140,6 @@ class OWLViTHead(nn.Layer):
self
.
class_head
=
class_head
self
.
bbox_head
=
bbox_head
self
.
box_bias
=
box_bias
self
.
matcher
=
matcher
self
.
loss
=
loss
def
box_predictor
(
self
,
image_features
,
feature_map
):
...
...
ppdet/modeling/vl/loss/__init__.py
浏览文件 @
87960310
...
...
@@ -11,3 +11,5 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from
.owl_vit_loss
import
*
\ No newline at end of file
ppdet/modeling/vl/loss/owl_vit_loss.py
浏览文件 @
87960310
...
...
@@ -32,7 +32,7 @@ class OWLViTLoss(nn.Layer):
__inject__
=
[
'HungarianMatcher'
]
def
__init__
(
self
,
num_classes
,
num_classes
=
80
,
matcher
=
'HungarianMatcher'
,
normalization
=
'per_example'
,
loss_coeff
=
None
,
...
...
ppdet/modeling/vl/tokenizer/simple_tokenizer.py
浏览文件 @
87960310
...
...
@@ -21,6 +21,7 @@ from __future__ import print_function
import
gzip
import
html
import
os
import
functools
from
functools
import
lru_cache
import
ftfy
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录