Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
PaddleDetection
提交
e527466d
P
PaddleDetection
项目概览
PaddlePaddle
/
PaddleDetection
大约 2 年 前同步成功
通知
708
Star
11112
Fork
2696
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
184
列表
看板
标记
里程碑
合并请求
40
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
PaddleDetection
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
184
Issue
184
列表
看板
标记
里程碑
合并请求
40
合并请求
40
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
e527466d
编写于
2月 19, 2021
作者:
W
wangxinxin08
提交者:
GitHub
2月 19, 2021
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
unify data transform (#2227)
上级
5b6bebf2
变更
20
显示空白变更内容
内联
并排
Showing
20 changed file
with
1811 addition
and
4948 deletion
+1811
-4948
dygraph/configs/cascade_rcnn/_base_/cascade_fpn_reader.yml
dygraph/configs/cascade_rcnn/_base_/cascade_fpn_reader.yml
+16
-16
dygraph/configs/cascade_rcnn/_base_/cascade_mask_fpn_reader.yml
...h/configs/cascade_rcnn/_base_/cascade_mask_fpn_reader.yml
+16
-16
dygraph/configs/faster_rcnn/_base_/faster_fpn_reader.yml
dygraph/configs/faster_rcnn/_base_/faster_fpn_reader.yml
+16
-16
dygraph/configs/faster_rcnn/_base_/faster_reader.yml
dygraph/configs/faster_rcnn/_base_/faster_reader.yml
+16
-16
dygraph/configs/fcos/_base_/fcos_reader.yml
dygraph/configs/fcos/_base_/fcos_reader.yml
+16
-16
dygraph/configs/mask_rcnn/_base_/mask_fpn_reader.yml
dygraph/configs/mask_rcnn/_base_/mask_fpn_reader.yml
+16
-16
dygraph/configs/mask_rcnn/_base_/mask_reader.yml
dygraph/configs/mask_rcnn/_base_/mask_reader.yml
+16
-16
dygraph/configs/ppyolo/_base_/ppyolo_reader.yml
dygraph/configs/ppyolo/_base_/ppyolo_reader.yml
+21
-21
dygraph/configs/solov2/_base_/solov2_reader.yml
dygraph/configs/solov2/_base_/solov2_reader.yml
+17
-17
dygraph/configs/ssd/_base_/ssd_mobilenet_reader.yml
dygraph/configs/ssd/_base_/ssd_mobilenet_reader.yml
+18
-18
dygraph/configs/ssd/_base_/ssd_reader.yml
dygraph/configs/ssd/_base_/ssd_reader.yml
+18
-18
dygraph/configs/ssd/_base_/ssdlite300_reader.yml
dygraph/configs/ssd/_base_/ssdlite300_reader.yml
+18
-18
dygraph/configs/ssd/_base_/ssdlite320_reader.yml
dygraph/configs/ssd/_base_/ssdlite320_reader.yml
+18
-18
dygraph/configs/ttfnet/_base_/ttfnet_reader.yml
dygraph/configs/ttfnet/_base_/ttfnet_reader.yml
+15
-15
dygraph/configs/yolov3/_base_/yolov3_reader.yml
dygraph/configs/yolov3/_base_/yolov3_reader.yml
+21
-21
dygraph/ppdet/data/transform/__init__.py
dygraph/ppdet/data/transform/__init__.py
+0
-5
dygraph/ppdet/data/transform/batch_operator.py
dygraph/ppdet/data/transform/batch_operator.py
+0
-790
dygraph/ppdet/data/transform/batch_operators.py
dygraph/ppdet/data/transform/batch_operators.py
+258
-107
dygraph/ppdet/data/transform/operator.py
dygraph/ppdet/data/transform/operator.py
+0
-1919
dygraph/ppdet/data/transform/operators.py
dygraph/ppdet/data/transform/operators.py
+1295
-1869
未找到文件。
dygraph/configs/cascade_rcnn/_base_/cascade_fpn_reader.yml
浏览文件 @
e527466d
worker_num
:
2
worker_num
:
2
TrainReader
:
TrainReader
:
sample_transforms
:
sample_transforms
:
-
Decode
Op
:
{}
-
Decode
:
{}
-
RandomResize
Op
:
{
target_size
:
[[
640
,
1333
],
[
672
,
1333
],
[
704
,
1333
],
[
736
,
1333
],
[
768
,
1333
],
[
800
,
1333
]],
interp
:
2
,
keep_ratio
:
True
}
-
RandomResize
:
{
target_size
:
[[
640
,
1333
],
[
672
,
1333
],
[
704
,
1333
],
[
736
,
1333
],
[
768
,
1333
],
[
800
,
1333
]],
interp
:
2
,
keep_ratio
:
True
}
-
RandomFlip
Op
:
{
prob
:
0.5
}
-
RandomFlip
:
{
prob
:
0.5
}
-
NormalizeImage
Op
:
{
is_scale
:
true
,
mean
:
[
0.485
,
0.456
,
0.406
],
std
:
[
0.229
,
0.224
,
0.225
]}
-
NormalizeImage
:
{
is_scale
:
true
,
mean
:
[
0.485
,
0.456
,
0.406
],
std
:
[
0.229
,
0.224
,
0.225
]}
-
Permute
Op
:
{}
-
Permute
:
{}
batch_transforms
:
batch_transforms
:
-
PadBatch
Op
:
{
pad_to_stride
:
32
,
pad_gt
:
true
}
-
PadBatch
:
{
pad_to_stride
:
32
,
pad_gt
:
true
}
batch_size
:
1
batch_size
:
1
shuffle
:
true
shuffle
:
true
drop_last
:
true
drop_last
:
true
...
@@ -15,12 +15,12 @@ TrainReader:
...
@@ -15,12 +15,12 @@ TrainReader:
EvalReader
:
EvalReader
:
sample_transforms
:
sample_transforms
:
-
Decode
Op
:
{}
-
Decode
:
{}
-
Resize
Op
:
{
interp
:
2
,
target_size
:
[
800
,
1333
],
keep_ratio
:
True
}
-
Resize
:
{
interp
:
2
,
target_size
:
[
800
,
1333
],
keep_ratio
:
True
}
-
NormalizeImage
Op
:
{
is_scale
:
true
,
mean
:
[
0.485
,
0.456
,
0.406
],
std
:
[
0.229
,
0.224
,
0.225
]}
-
NormalizeImage
:
{
is_scale
:
true
,
mean
:
[
0.485
,
0.456
,
0.406
],
std
:
[
0.229
,
0.224
,
0.225
]}
-
Permute
Op
:
{}
-
Permute
:
{}
batch_transforms
:
batch_transforms
:
-
PadBatch
Op
:
{
pad_to_stride
:
32
,
pad_gt
:
false
}
-
PadBatch
:
{
pad_to_stride
:
32
,
pad_gt
:
false
}
batch_size
:
1
batch_size
:
1
shuffle
:
false
shuffle
:
false
drop_last
:
false
drop_last
:
false
...
@@ -29,12 +29,12 @@ EvalReader:
...
@@ -29,12 +29,12 @@ EvalReader:
TestReader
:
TestReader
:
sample_transforms
:
sample_transforms
:
-
Decode
Op
:
{}
-
Decode
:
{}
-
Resize
Op
:
{
interp
:
2
,
target_size
:
[
800
,
1333
],
keep_ratio
:
True
}
-
Resize
:
{
interp
:
2
,
target_size
:
[
800
,
1333
],
keep_ratio
:
True
}
-
NormalizeImage
Op
:
{
is_scale
:
true
,
mean
:
[
0.485
,
0.456
,
0.406
],
std
:
[
0.229
,
0.224
,
0.225
]}
-
NormalizeImage
:
{
is_scale
:
true
,
mean
:
[
0.485
,
0.456
,
0.406
],
std
:
[
0.229
,
0.224
,
0.225
]}
-
Permute
Op
:
{}
-
Permute
:
{}
batch_transforms
:
batch_transforms
:
-
PadBatch
Op
:
{
pad_to_stride
:
32
,
pad_gt
:
false
}
-
PadBatch
:
{
pad_to_stride
:
32
,
pad_gt
:
false
}
batch_size
:
1
batch_size
:
1
shuffle
:
false
shuffle
:
false
drop_last
:
false
drop_last
:
false
dygraph/configs/cascade_rcnn/_base_/cascade_mask_fpn_reader.yml
浏览文件 @
e527466d
worker_num
:
2
worker_num
:
2
TrainReader
:
TrainReader
:
sample_transforms
:
sample_transforms
:
-
Decode
Op
:
{}
-
Decode
:
{}
-
RandomResize
Op
:
{
target_size
:
[[
640
,
1333
],
[
672
,
1333
],
[
704
,
1333
],
[
736
,
1333
],
[
768
,
1333
],
[
800
,
1333
]],
interp
:
2
,
keep_ratio
:
True
}
-
RandomResize
:
{
target_size
:
[[
640
,
1333
],
[
672
,
1333
],
[
704
,
1333
],
[
736
,
1333
],
[
768
,
1333
],
[
800
,
1333
]],
interp
:
2
,
keep_ratio
:
True
}
-
RandomFlip
Op
:
{
prob
:
0.5
}
-
RandomFlip
:
{
prob
:
0.5
}
-
NormalizeImage
Op
:
{
is_scale
:
true
,
mean
:
[
0.485
,
0.456
,
0.406
],
std
:
[
0.229
,
0.224
,
0.225
]}
-
NormalizeImage
:
{
is_scale
:
true
,
mean
:
[
0.485
,
0.456
,
0.406
],
std
:
[
0.229
,
0.224
,
0.225
]}
-
Permute
Op
:
{}
-
Permute
:
{}
batch_transforms
:
batch_transforms
:
-
PadBatch
Op
:
{
pad_to_stride
:
32
,
pad_gt
:
true
}
-
PadBatch
:
{
pad_to_stride
:
32
,
pad_gt
:
true
}
batch_size
:
1
batch_size
:
1
shuffle
:
true
shuffle
:
true
drop_last
:
true
drop_last
:
true
...
@@ -15,12 +15,12 @@ TrainReader:
...
@@ -15,12 +15,12 @@ TrainReader:
EvalReader
:
EvalReader
:
sample_transforms
:
sample_transforms
:
-
Decode
Op
:
{}
-
Decode
:
{}
-
Resize
Op
:
{
interp
:
2
,
target_size
:
[
800
,
1333
],
keep_ratio
:
True
}
-
Resize
:
{
interp
:
2
,
target_size
:
[
800
,
1333
],
keep_ratio
:
True
}
-
NormalizeImage
Op
:
{
is_scale
:
true
,
mean
:
[
0.485
,
0.456
,
0.406
],
std
:
[
0.229
,
0.224
,
0.225
]}
-
NormalizeImage
:
{
is_scale
:
true
,
mean
:
[
0.485
,
0.456
,
0.406
],
std
:
[
0.229
,
0.224
,
0.225
]}
-
Permute
Op
:
{}
-
Permute
:
{}
batch_transforms
:
batch_transforms
:
-
PadBatch
Op
:
{
pad_to_stride
:
32
,
pad_gt
:
false
}
-
PadBatch
:
{
pad_to_stride
:
32
,
pad_gt
:
false
}
batch_size
:
1
batch_size
:
1
shuffle
:
false
shuffle
:
false
drop_last
:
false
drop_last
:
false
...
@@ -29,12 +29,12 @@ EvalReader:
...
@@ -29,12 +29,12 @@ EvalReader:
TestReader
:
TestReader
:
sample_transforms
:
sample_transforms
:
-
Decode
Op
:
{}
-
Decode
:
{}
-
Resize
Op
:
{
interp
:
2
,
target_size
:
[
800
,
1333
],
keep_ratio
:
True
}
-
Resize
:
{
interp
:
2
,
target_size
:
[
800
,
1333
],
keep_ratio
:
True
}
-
NormalizeImage
Op
:
{
is_scale
:
true
,
mean
:
[
0.485
,
0.456
,
0.406
],
std
:
[
0.229
,
0.224
,
0.225
]}
-
NormalizeImage
:
{
is_scale
:
true
,
mean
:
[
0.485
,
0.456
,
0.406
],
std
:
[
0.229
,
0.224
,
0.225
]}
-
Permute
Op
:
{}
-
Permute
:
{}
batch_transforms
:
batch_transforms
:
-
PadBatch
Op
:
{
pad_to_stride
:
32
,
pad_gt
:
false
}
-
PadBatch
:
{
pad_to_stride
:
32
,
pad_gt
:
false
}
batch_size
:
1
batch_size
:
1
shuffle
:
false
shuffle
:
false
drop_last
:
false
drop_last
:
false
dygraph/configs/faster_rcnn/_base_/faster_fpn_reader.yml
浏览文件 @
e527466d
worker_num
:
2
worker_num
:
2
TrainReader
:
TrainReader
:
sample_transforms
:
sample_transforms
:
-
Decode
Op
:
{}
-
Decode
:
{}
-
RandomResize
Op
:
{
target_size
:
[[
640
,
1333
],
[
672
,
1333
],
[
704
,
1333
],
[
736
,
1333
],
[
768
,
1333
],
[
800
,
1333
]],
interp
:
2
,
keep_ratio
:
True
}
-
RandomResize
:
{
target_size
:
[[
640
,
1333
],
[
672
,
1333
],
[
704
,
1333
],
[
736
,
1333
],
[
768
,
1333
],
[
800
,
1333
]],
interp
:
2
,
keep_ratio
:
True
}
-
RandomFlip
Op
:
{
prob
:
0.5
}
-
RandomFlip
:
{
prob
:
0.5
}
-
NormalizeImage
Op
:
{
is_scale
:
true
,
mean
:
[
0.485
,
0.456
,
0.406
],
std
:
[
0.229
,
0.224
,
0.225
]}
-
NormalizeImage
:
{
is_scale
:
true
,
mean
:
[
0.485
,
0.456
,
0.406
],
std
:
[
0.229
,
0.224
,
0.225
]}
-
Permute
Op
:
{}
-
Permute
:
{}
batch_transforms
:
batch_transforms
:
-
PadBatch
Op
:
{
pad_to_stride
:
32
,
pad_gt
:
true
}
-
PadBatch
:
{
pad_to_stride
:
32
,
pad_gt
:
true
}
batch_size
:
1
batch_size
:
1
shuffle
:
true
shuffle
:
true
drop_last
:
true
drop_last
:
true
...
@@ -15,12 +15,12 @@ TrainReader:
...
@@ -15,12 +15,12 @@ TrainReader:
EvalReader
:
EvalReader
:
sample_transforms
:
sample_transforms
:
-
Decode
Op
:
{}
-
Decode
:
{}
-
Resize
Op
:
{
interp
:
2
,
target_size
:
[
800
,
1333
],
keep_ratio
:
True
}
-
Resize
:
{
interp
:
2
,
target_size
:
[
800
,
1333
],
keep_ratio
:
True
}
-
NormalizeImage
Op
:
{
is_scale
:
true
,
mean
:
[
0.485
,
0.456
,
0.406
],
std
:
[
0.229
,
0.224
,
0.225
]}
-
NormalizeImage
:
{
is_scale
:
true
,
mean
:
[
0.485
,
0.456
,
0.406
],
std
:
[
0.229
,
0.224
,
0.225
]}
-
Permute
Op
:
{}
-
Permute
:
{}
batch_transforms
:
batch_transforms
:
-
PadBatch
Op
:
{
pad_to_stride
:
32
,
pad_gt
:
false
}
-
PadBatch
:
{
pad_to_stride
:
32
,
pad_gt
:
false
}
batch_size
:
1
batch_size
:
1
shuffle
:
false
shuffle
:
false
drop_last
:
false
drop_last
:
false
...
@@ -29,12 +29,12 @@ EvalReader:
...
@@ -29,12 +29,12 @@ EvalReader:
TestReader
:
TestReader
:
sample_transforms
:
sample_transforms
:
-
Decode
Op
:
{}
-
Decode
:
{}
-
Resize
Op
:
{
interp
:
2
,
target_size
:
[
800
,
1333
],
keep_ratio
:
True
}
-
Resize
:
{
interp
:
2
,
target_size
:
[
800
,
1333
],
keep_ratio
:
True
}
-
NormalizeImage
Op
:
{
is_scale
:
true
,
mean
:
[
0.485
,
0.456
,
0.406
],
std
:
[
0.229
,
0.224
,
0.225
]}
-
NormalizeImage
:
{
is_scale
:
true
,
mean
:
[
0.485
,
0.456
,
0.406
],
std
:
[
0.229
,
0.224
,
0.225
]}
-
Permute
Op
:
{}
-
Permute
:
{}
batch_transforms
:
batch_transforms
:
-
PadBatch
Op
:
{
pad_to_stride
:
32
,
pad_gt
:
false
}
-
PadBatch
:
{
pad_to_stride
:
32
,
pad_gt
:
false
}
batch_size
:
1
batch_size
:
1
shuffle
:
false
shuffle
:
false
drop_last
:
false
drop_last
:
false
dygraph/configs/faster_rcnn/_base_/faster_reader.yml
浏览文件 @
e527466d
worker_num
:
2
worker_num
:
2
TrainReader
:
TrainReader
:
sample_transforms
:
sample_transforms
:
-
Decode
Op
:
{}
-
Decode
:
{}
-
RandomResize
Op
:
{
target_size
:
[[
640
,
1333
],
[
672
,
1333
],
[
704
,
1333
],
[
736
,
1333
],
[
768
,
1333
],
[
800
,
1333
]],
interp
:
2
,
keep_ratio
:
True
}
-
RandomResize
:
{
target_size
:
[[
640
,
1333
],
[
672
,
1333
],
[
704
,
1333
],
[
736
,
1333
],
[
768
,
1333
],
[
800
,
1333
]],
interp
:
2
,
keep_ratio
:
True
}
-
RandomFlip
Op
:
{
prob
:
0.5
}
-
RandomFlip
:
{
prob
:
0.5
}
-
NormalizeImage
Op
:
{
is_scale
:
true
,
mean
:
[
0.485
,
0.456
,
0.406
],
std
:
[
0.229
,
0.224
,
0.225
]}
-
NormalizeImage
:
{
is_scale
:
true
,
mean
:
[
0.485
,
0.456
,
0.406
],
std
:
[
0.229
,
0.224
,
0.225
]}
-
Permute
Op
:
{}
-
Permute
:
{}
batch_transforms
:
batch_transforms
:
-
PadBatch
Op
:
{
pad_to_stride
:
-1.
,
pad_gt
:
true
}
-
PadBatch
:
{
pad_to_stride
:
-1.
,
pad_gt
:
true
}
batch_size
:
1
batch_size
:
1
shuffle
:
true
shuffle
:
true
drop_last
:
true
drop_last
:
true
...
@@ -15,12 +15,12 @@ TrainReader:
...
@@ -15,12 +15,12 @@ TrainReader:
EvalReader
:
EvalReader
:
sample_transforms
:
sample_transforms
:
-
Decode
Op
:
{}
-
Decode
:
{}
-
Resize
Op
:
{
interp
:
2
,
target_size
:
[
800
,
1333
],
keep_ratio
:
True
}
-
Resize
:
{
interp
:
2
,
target_size
:
[
800
,
1333
],
keep_ratio
:
True
}
-
NormalizeImage
Op
:
{
is_scale
:
true
,
mean
:
[
0.485
,
0.456
,
0.406
],
std
:
[
0.229
,
0.224
,
0.225
]}
-
NormalizeImage
:
{
is_scale
:
true
,
mean
:
[
0.485
,
0.456
,
0.406
],
std
:
[
0.229
,
0.224
,
0.225
]}
-
Permute
Op
:
{}
-
Permute
:
{}
batch_transforms
:
batch_transforms
:
-
PadBatch
Op
:
{
pad_to_stride
:
-1.
,
pad_gt
:
false
}
-
PadBatch
:
{
pad_to_stride
:
-1.
,
pad_gt
:
false
}
batch_size
:
1
batch_size
:
1
shuffle
:
false
shuffle
:
false
drop_last
:
false
drop_last
:
false
...
@@ -29,12 +29,12 @@ EvalReader:
...
@@ -29,12 +29,12 @@ EvalReader:
TestReader
:
TestReader
:
sample_transforms
:
sample_transforms
:
-
Decode
Op
:
{}
-
Decode
:
{}
-
Resize
Op
:
{
interp
:
2
,
target_size
:
[
800
,
1333
],
keep_ratio
:
True
}
-
Resize
:
{
interp
:
2
,
target_size
:
[
800
,
1333
],
keep_ratio
:
True
}
-
NormalizeImage
Op
:
{
is_scale
:
true
,
mean
:
[
0.485
,
0.456
,
0.406
],
std
:
[
0.229
,
0.224
,
0.225
]}
-
NormalizeImage
:
{
is_scale
:
true
,
mean
:
[
0.485
,
0.456
,
0.406
],
std
:
[
0.229
,
0.224
,
0.225
]}
-
Permute
Op
:
{}
-
Permute
:
{}
batch_transforms
:
batch_transforms
:
-
PadBatch
Op
:
{
pad_to_stride
:
-1.
,
pad_gt
:
false
}
-
PadBatch
:
{
pad_to_stride
:
-1.
,
pad_gt
:
false
}
batch_size
:
1
batch_size
:
1
shuffle
:
false
shuffle
:
false
drop_last
:
false
drop_last
:
false
dygraph/configs/fcos/_base_/fcos_reader.yml
浏览文件 @
e527466d
worker_num
:
2
worker_num
:
2
TrainReader
:
TrainReader
:
sample_transforms
:
sample_transforms
:
-
Decode
Op
:
{}
-
Decode
:
{}
-
RandomFlip
Op
:
{
prob
:
0.5
}
-
RandomFlip
:
{
prob
:
0.5
}
-
NormalizeImage
Op
:
{
is_scale
:
true
,
mean
:
[
0.485
,
0.456
,
0.406
],
std
:
[
0.229
,
0.224
,
0.225
]}
-
NormalizeImage
:
{
is_scale
:
true
,
mean
:
[
0.485
,
0.456
,
0.406
],
std
:
[
0.229
,
0.224
,
0.225
]}
-
Resize
Image
:
{
target_size
:
800
,
max_size
:
1333
,
interp
:
1
,
use_cv2
:
true
}
-
Resize
:
{
target_size
:
[
800
,
1333
],
keep_ratio
:
true
,
interp
:
1
}
-
Permute
Op
:
{}
-
Permute
:
{}
batch_transforms
:
batch_transforms
:
-
PadBatch
Op
:
{
pad_to_stride
:
128
}
-
PadBatch
:
{
pad_to_stride
:
128
}
-
Gt2FCOSTarget
:
-
Gt2FCOSTarget
:
object_sizes_boundary
:
[
64
,
128
,
256
,
512
]
object_sizes_boundary
:
[
64
,
128
,
256
,
512
]
center_sampling_radius
:
1.5
center_sampling_radius
:
1.5
...
@@ -20,23 +20,23 @@ TrainReader:
...
@@ -20,23 +20,23 @@ TrainReader:
EvalReader
:
EvalReader
:
sample_transforms
:
sample_transforms
:
-
Decode
Op
:
{}
-
Decode
:
{}
-
NormalizeImage
Op
:
{
is_scale
:
true
,
mean
:
[
0.485
,
0.456
,
0.406
],
std
:
[
0.229
,
0.224
,
0.225
]}
-
NormalizeImage
:
{
is_scale
:
true
,
mean
:
[
0.485
,
0.456
,
0.406
],
std
:
[
0.229
,
0.224
,
0.225
]}
-
Resize
Op
:
{
interp
:
1
,
target_size
:
[
800
,
1333
],
keep_ratio
:
True
}
-
Resize
:
{
interp
:
1
,
target_size
:
[
800
,
1333
],
keep_ratio
:
True
}
-
Permute
Op
:
{}
-
Permute
:
{}
batch_transforms
:
batch_transforms
:
-
PadBatch
Op
:
{
pad_to_stride
:
128
}
-
PadBatch
:
{
pad_to_stride
:
128
}
batch_size
:
1
batch_size
:
1
shuffle
:
false
shuffle
:
false
TestReader
:
TestReader
:
sample_transforms
:
sample_transforms
:
-
Decode
Op
:
{}
-
Decode
:
{}
-
NormalizeImage
Op
:
{
is_scale
:
true
,
mean
:
[
0.485
,
0.456
,
0.406
],
std
:
[
0.229
,
0.224
,
0.225
]}
-
NormalizeImage
:
{
is_scale
:
true
,
mean
:
[
0.485
,
0.456
,
0.406
],
std
:
[
0.229
,
0.224
,
0.225
]}
-
Resize
Op
:
{
interp
:
1
,
target_size
:
[
800
,
1333
],
keep_ratio
:
True
}
-
Resize
:
{
interp
:
1
,
target_size
:
[
800
,
1333
],
keep_ratio
:
True
}
-
Permute
Op
:
{}
-
Permute
:
{}
batch_transforms
:
batch_transforms
:
-
PadBatch
Op
:
{
pad_to_stride
:
128
}
-
PadBatch
:
{
pad_to_stride
:
128
}
batch_size
:
1
batch_size
:
1
shuffle
:
false
shuffle
:
false
dygraph/configs/mask_rcnn/_base_/mask_fpn_reader.yml
浏览文件 @
e527466d
worker_num
:
2
worker_num
:
2
TrainReader
:
TrainReader
:
sample_transforms
:
sample_transforms
:
-
Decode
Op
:
{}
-
Decode
:
{}
-
RandomResize
Op
:
{
target_size
:
[[
640
,
1333
],
[
672
,
1333
],
[
704
,
1333
],
[
736
,
1333
],
[
768
,
1333
],
[
800
,
1333
]],
interp
:
2
,
keep_ratio
:
True
}
-
RandomResize
:
{
target_size
:
[[
640
,
1333
],
[
672
,
1333
],
[
704
,
1333
],
[
736
,
1333
],
[
768
,
1333
],
[
800
,
1333
]],
interp
:
2
,
keep_ratio
:
True
}
-
RandomFlip
Op
:
{
prob
:
0.5
}
-
RandomFlip
:
{
prob
:
0.5
}
-
NormalizeImage
Op
:
{
is_scale
:
true
,
mean
:
[
0.485
,
0.456
,
0.406
],
std
:
[
0.229
,
0.224
,
0.225
]}
-
NormalizeImage
:
{
is_scale
:
true
,
mean
:
[
0.485
,
0.456
,
0.406
],
std
:
[
0.229
,
0.224
,
0.225
]}
-
Permute
Op
:
{}
-
Permute
:
{}
batch_transforms
:
batch_transforms
:
-
PadBatch
Op
:
{
pad_to_stride
:
32
,
pad_gt
:
true
}
-
PadBatch
:
{
pad_to_stride
:
32
,
pad_gt
:
true
}
batch_size
:
1
batch_size
:
1
shuffle
:
true
shuffle
:
true
drop_last
:
true
drop_last
:
true
EvalReader
:
EvalReader
:
sample_transforms
:
sample_transforms
:
-
Decode
Op
:
{}
-
Decode
:
{}
-
Resize
Op
:
{
interp
:
2
,
target_size
:
[
800
,
1333
],
keep_ratio
:
True
}
-
Resize
:
{
interp
:
2
,
target_size
:
[
800
,
1333
],
keep_ratio
:
True
}
-
NormalizeImage
Op
:
{
is_scale
:
true
,
mean
:
[
0.485
,
0.456
,
0.406
],
std
:
[
0.229
,
0.224
,
0.225
]}
-
NormalizeImage
:
{
is_scale
:
true
,
mean
:
[
0.485
,
0.456
,
0.406
],
std
:
[
0.229
,
0.224
,
0.225
]}
-
Permute
Op
:
{}
-
Permute
:
{}
batch_transforms
:
batch_transforms
:
-
PadBatch
Op
:
{
pad_to_stride
:
32
,
pad_gt
:
false
}
-
PadBatch
:
{
pad_to_stride
:
32
,
pad_gt
:
false
}
batch_size
:
1
batch_size
:
1
shuffle
:
false
shuffle
:
false
drop_last
:
false
drop_last
:
false
...
@@ -28,12 +28,12 @@ EvalReader:
...
@@ -28,12 +28,12 @@ EvalReader:
TestReader
:
TestReader
:
sample_transforms
:
sample_transforms
:
-
Decode
Op
:
{}
-
Decode
:
{}
-
Resize
Op
:
{
interp
:
2
,
target_size
:
[
800
,
1333
],
keep_ratio
:
True
}
-
Resize
:
{
interp
:
2
,
target_size
:
[
800
,
1333
],
keep_ratio
:
True
}
-
NormalizeImage
Op
:
{
is_scale
:
true
,
mean
:
[
0.485
,
0.456
,
0.406
],
std
:
[
0.229
,
0.224
,
0.225
]}
-
NormalizeImage
:
{
is_scale
:
true
,
mean
:
[
0.485
,
0.456
,
0.406
],
std
:
[
0.229
,
0.224
,
0.225
]}
-
Permute
Op
:
{}
-
Permute
:
{}
batch_transforms
:
batch_transforms
:
-
PadBatch
Op
:
{
pad_to_stride
:
32
,
pad_gt
:
false
}
-
PadBatch
:
{
pad_to_stride
:
32
,
pad_gt
:
false
}
batch_size
:
1
batch_size
:
1
shuffle
:
false
shuffle
:
false
drop_last
:
false
drop_last
:
false
dygraph/configs/mask_rcnn/_base_/mask_reader.yml
浏览文件 @
e527466d
worker_num
:
2
worker_num
:
2
TrainReader
:
TrainReader
:
sample_transforms
:
sample_transforms
:
-
Decode
Op
:
{}
-
Decode
:
{}
-
RandomResize
Op
:
{
target_size
:
[[
640
,
1333
],
[
672
,
1333
],
[
704
,
1333
],
[
736
,
1333
],
[
768
,
1333
],
[
800
,
1333
]],
interp
:
2
,
keep_ratio
:
True
}
-
RandomResize
:
{
target_size
:
[[
640
,
1333
],
[
672
,
1333
],
[
704
,
1333
],
[
736
,
1333
],
[
768
,
1333
],
[
800
,
1333
]],
interp
:
2
,
keep_ratio
:
True
}
-
RandomFlip
Op
:
{
prob
:
0.5
}
-
RandomFlip
:
{
prob
:
0.5
}
-
NormalizeImage
Op
:
{
is_scale
:
true
,
mean
:
[
0.485
,
0.456
,
0.406
],
std
:
[
0.229
,
0.224
,
0.225
]}
-
NormalizeImage
:
{
is_scale
:
true
,
mean
:
[
0.485
,
0.456
,
0.406
],
std
:
[
0.229
,
0.224
,
0.225
]}
-
Permute
Op
:
{}
-
Permute
:
{}
batch_transforms
:
batch_transforms
:
-
PadBatch
Op
:
{
pad_to_stride
:
-1.
,
pad_gt
:
true
}
-
PadBatch
:
{
pad_to_stride
:
-1.
,
pad_gt
:
true
}
batch_size
:
1
batch_size
:
1
shuffle
:
true
shuffle
:
true
drop_last
:
true
drop_last
:
true
...
@@ -15,12 +15,12 @@ TrainReader:
...
@@ -15,12 +15,12 @@ TrainReader:
EvalReader
:
EvalReader
:
sample_transforms
:
sample_transforms
:
-
Decode
Op
:
{}
-
Decode
:
{}
-
Resize
Op
:
{
interp
:
2
,
target_size
:
[
800
,
1333
],
keep_ratio
:
True
}
-
Resize
:
{
interp
:
2
,
target_size
:
[
800
,
1333
],
keep_ratio
:
True
}
-
NormalizeImage
Op
:
{
is_scale
:
true
,
mean
:
[
0.485
,
0.456
,
0.406
],
std
:
[
0.229
,
0.224
,
0.225
]}
-
NormalizeImage
:
{
is_scale
:
true
,
mean
:
[
0.485
,
0.456
,
0.406
],
std
:
[
0.229
,
0.224
,
0.225
]}
-
Permute
Op
:
{}
-
Permute
:
{}
batch_transforms
:
batch_transforms
:
-
PadBatch
Op
:
{
pad_to_stride
:
-1.
,
pad_gt
:
false
}
-
PadBatch
:
{
pad_to_stride
:
-1.
,
pad_gt
:
false
}
batch_size
:
1
batch_size
:
1
shuffle
:
false
shuffle
:
false
drop_last
:
false
drop_last
:
false
...
@@ -29,12 +29,12 @@ EvalReader:
...
@@ -29,12 +29,12 @@ EvalReader:
TestReader
:
TestReader
:
sample_transforms
:
sample_transforms
:
-
Decode
Op
:
{}
-
Decode
:
{}
-
Resize
Op
:
{
interp
:
2
,
target_size
:
[
800
,
1333
],
keep_ratio
:
True
}
-
Resize
:
{
interp
:
2
,
target_size
:
[
800
,
1333
],
keep_ratio
:
True
}
-
NormalizeImage
Op
:
{
is_scale
:
true
,
mean
:
[
0.485
,
0.456
,
0.406
],
std
:
[
0.229
,
0.224
,
0.225
]}
-
NormalizeImage
:
{
is_scale
:
true
,
mean
:
[
0.485
,
0.456
,
0.406
],
std
:
[
0.229
,
0.224
,
0.225
]}
-
Permute
Op
:
{}
-
Permute
:
{}
batch_transforms
:
batch_transforms
:
-
PadBatch
Op
:
{
pad_to_stride
:
-1.
,
pad_gt
:
false
}
-
PadBatch
:
{
pad_to_stride
:
-1.
,
pad_gt
:
false
}
batch_size
:
1
batch_size
:
1
shuffle
:
false
shuffle
:
false
drop_last
:
false
drop_last
:
false
...
...
dygraph/configs/ppyolo/_base_/ppyolo_reader.yml
浏览文件 @
e527466d
...
@@ -3,20 +3,20 @@ TrainReader:
...
@@ -3,20 +3,20 @@ TrainReader:
inputs_def
:
inputs_def
:
num_max_boxes
:
50
num_max_boxes
:
50
sample_transforms
:
sample_transforms
:
-
Decode
Op
:
{}
-
Decode
:
{}
-
Mixup
Op
:
{
alpha
:
1.5
,
beta
:
1.5
}
-
Mixup
:
{
alpha
:
1.5
,
beta
:
1.5
}
-
RandomDistort
Op
:
{}
-
RandomDistort
:
{}
-
RandomExpand
Op
:
{
fill_value
:
[
123.675
,
116.28
,
103.53
]}
-
RandomExpand
:
{
fill_value
:
[
123.675
,
116.28
,
103.53
]}
-
RandomCrop
Op
:
{}
-
RandomCrop
:
{}
-
RandomFlip
Op
:
{}
-
RandomFlip
:
{}
batch_transforms
:
batch_transforms
:
-
BatchRandomResize
Op
:
{
target_size
:
[
320
,
352
,
384
,
416
,
448
,
480
,
512
,
544
,
576
,
608
],
random_size
:
True
,
random_interp
:
True
,
keep_ratio
:
False
}
-
BatchRandomResize
:
{
target_size
:
[
320
,
352
,
384
,
416
,
448
,
480
,
512
,
544
,
576
,
608
],
random_size
:
True
,
random_interp
:
True
,
keep_ratio
:
False
}
-
NormalizeBox
Op
:
{}
-
NormalizeBox
:
{}
-
PadBox
Op
:
{
num_max_boxes
:
50
}
-
PadBox
:
{
num_max_boxes
:
50
}
-
BboxXYXY2XYWH
Op
:
{}
-
BboxXYXY2XYWH
:
{}
-
NormalizeImage
Op
:
{
mean
:
[
0.485
,
0.456
,
0.406
],
std
:
[
0.229
,
0.224
,
0.225
],
is_scale
:
True
}
-
NormalizeImage
:
{
mean
:
[
0.485
,
0.456
,
0.406
],
std
:
[
0.229
,
0.224
,
0.225
],
is_scale
:
True
}
-
Permute
Op
:
{}
-
Permute
:
{}
-
Gt2YoloTarget
Op
:
{
anchor_masks
:
[[
6
,
7
,
8
],
[
3
,
4
,
5
],
[
0
,
1
,
2
]],
anchors
:
[[
10
,
13
],
[
16
,
30
],
[
33
,
23
],
[
30
,
61
],
[
62
,
45
],
[
59
,
119
],
[
116
,
90
],
[
156
,
198
],
[
373
,
326
]],
downsample_ratios
:
[
32
,
16
,
8
]}
-
Gt2YoloTarget
:
{
anchor_masks
:
[[
6
,
7
,
8
],
[
3
,
4
,
5
],
[
0
,
1
,
2
]],
anchors
:
[[
10
,
13
],
[
16
,
30
],
[
33
,
23
],
[
30
,
61
],
[
62
,
45
],
[
59
,
119
],
[
116
,
90
],
[
156
,
198
],
[
373
,
326
]],
downsample_ratios
:
[
32
,
16
,
8
]}
batch_size
:
24
batch_size
:
24
shuffle
:
true
shuffle
:
true
drop_last
:
true
drop_last
:
true
...
@@ -25,10 +25,10 @@ TrainReader:
...
@@ -25,10 +25,10 @@ TrainReader:
EvalReader
:
EvalReader
:
sample_transforms
:
sample_transforms
:
-
Decode
Op
:
{}
-
Decode
:
{}
-
Resize
Op
:
{
target_size
:
[
608
,
608
],
keep_ratio
:
False
,
interp
:
2
}
-
Resize
:
{
target_size
:
[
608
,
608
],
keep_ratio
:
False
,
interp
:
2
}
-
NormalizeImage
Op
:
{
mean
:
[
0.485
,
0.456
,
0.406
],
std
:
[
0.229
,
0.224
,
0.225
],
is_scale
:
True
}
-
NormalizeImage
:
{
mean
:
[
0.485
,
0.456
,
0.406
],
std
:
[
0.229
,
0.224
,
0.225
],
is_scale
:
True
}
-
Permute
Op
:
{}
-
Permute
:
{}
batch_size
:
8
batch_size
:
8
drop_empty
:
false
drop_empty
:
false
...
@@ -36,8 +36,8 @@ TestReader:
...
@@ -36,8 +36,8 @@ TestReader:
inputs_def
:
inputs_def
:
image_shape
:
[
3
,
608
,
608
]
image_shape
:
[
3
,
608
,
608
]
sample_transforms
:
sample_transforms
:
-
Decode
Op
:
{}
-
Decode
:
{}
-
Resize
Op
:
{
target_size
:
[
608
,
608
],
keep_ratio
:
False
,
interp
:
2
}
-
Resize
:
{
target_size
:
[
608
,
608
],
keep_ratio
:
False
,
interp
:
2
}
-
NormalizeImage
Op
:
{
mean
:
[
0.485
,
0.456
,
0.406
],
std
:
[
0.229
,
0.224
,
0.225
],
is_scale
:
True
}
-
NormalizeImage
:
{
mean
:
[
0.485
,
0.456
,
0.406
],
std
:
[
0.229
,
0.224
,
0.225
],
is_scale
:
True
}
-
Permute
Op
:
{}
-
Permute
:
{}
batch_size
:
1
batch_size
:
1
dygraph/configs/solov2/_base_/solov2_reader.yml
浏览文件 @
e527466d
worker_num
:
2
worker_num
:
2
TrainReader
:
TrainReader
:
sample_transforms
:
sample_transforms
:
-
Decode
Op
:
{}
-
Decode
:
{}
-
Poly2Mask
:
{}
-
Poly2Mask
:
{}
-
Resize
Op
:
{
interp
:
1
,
target_size
:
[
800
,
1333
],
keep_ratio
:
True
}
-
Resize
:
{
interp
:
1
,
target_size
:
[
800
,
1333
],
keep_ratio
:
True
}
-
RandomFlip
Op
:
{}
-
RandomFlip
:
{}
-
NormalizeImage
Op
:
{
is_scale
:
true
,
mean
:
[
0.485
,
0.456
,
0.406
],
std
:
[
0.229
,
0.224
,
0.225
]}
-
NormalizeImage
:
{
is_scale
:
true
,
mean
:
[
0.485
,
0.456
,
0.406
],
std
:
[
0.229
,
0.224
,
0.225
]}
-
Permute
Op
:
{}
-
Permute
:
{}
batch_transforms
:
batch_transforms
:
-
PadBatch
Op
:
{
pad_to_stride
:
32
}
-
PadBatch
:
{
pad_to_stride
:
32
}
-
Gt2Solov2Target
Op
:
{
num_grids
:
[
40
,
36
,
24
,
16
,
12
],
-
Gt2Solov2Target
:
{
num_grids
:
[
40
,
36
,
24
,
16
,
12
],
scale_ranges
:
[[
1
,
96
],
[
48
,
192
],
[
96
,
384
],
[
192
,
768
],
[
384
,
2048
]],
scale_ranges
:
[[
1
,
96
],
[
48
,
192
],
[
96
,
384
],
[
192
,
768
],
[
384
,
2048
]],
coord_sigma
:
0.2
}
coord_sigma
:
0.2
}
batch_size
:
2
batch_size
:
2
...
@@ -19,12 +19,12 @@ TrainReader:
...
@@ -19,12 +19,12 @@ TrainReader:
EvalReader
:
EvalReader
:
sample_transforms
:
sample_transforms
:
-
Decode
Op
:
{}
-
Decode
:
{}
-
NormalizeImage
Op
:
{
is_scale
:
true
,
mean
:
[
0.485
,
0.456
,
0.406
],
std
:
[
0.229
,
0.224
,
0.225
]}
-
NormalizeImage
:
{
is_scale
:
true
,
mean
:
[
0.485
,
0.456
,
0.406
],
std
:
[
0.229
,
0.224
,
0.225
]}
-
Resize
Op
:
{
interp
:
1
,
target_size
:
[
800
,
1333
],
keep_ratio
:
True
}
-
Resize
:
{
interp
:
1
,
target_size
:
[
800
,
1333
],
keep_ratio
:
True
}
-
Permute
Op
:
{}
-
Permute
:
{}
batch_transforms
:
batch_transforms
:
-
PadBatch
Op
:
{
pad_to_stride
:
32
}
-
PadBatch
:
{
pad_to_stride
:
32
}
batch_size
:
1
batch_size
:
1
shuffle
:
false
shuffle
:
false
drop_last
:
false
drop_last
:
false
...
@@ -33,12 +33,12 @@ EvalReader:
...
@@ -33,12 +33,12 @@ EvalReader:
TestReader
:
TestReader
:
sample_transforms
:
sample_transforms
:
-
Decode
Op
:
{}
-
Decode
:
{}
-
NormalizeImage
Op
:
{
is_scale
:
true
,
mean
:
[
0.485
,
0.456
,
0.406
],
std
:
[
0.229
,
0.224
,
0.225
]}
-
NormalizeImage
:
{
is_scale
:
true
,
mean
:
[
0.485
,
0.456
,
0.406
],
std
:
[
0.229
,
0.224
,
0.225
]}
-
Resize
Op
:
{
interp
:
1
,
target_size
:
[
800
,
1333
],
keep_ratio
:
True
}
-
Resize
:
{
interp
:
1
,
target_size
:
[
800
,
1333
],
keep_ratio
:
True
}
-
Permute
Op
:
{}
-
Permute
:
{}
batch_transforms
:
batch_transforms
:
-
PadBatch
Op
:
{
pad_to_stride
:
32
}
-
PadBatch
:
{
pad_to_stride
:
32
}
batch_size
:
1
batch_size
:
1
shuffle
:
false
shuffle
:
false
drop_last
:
false
drop_last
:
false
dygraph/configs/ssd/_base_/ssd_mobilenet_reader.yml
浏览文件 @
e527466d
...
@@ -3,17 +3,17 @@ TrainReader:
...
@@ -3,17 +3,17 @@ TrainReader:
inputs_def
:
inputs_def
:
num_max_boxes
:
90
num_max_boxes
:
90
sample_transforms
:
sample_transforms
:
-
Decode
Op
:
{}
-
Decode
:
{}
-
RandomDistort
Op
:
{
brightness
:
[
0.5
,
1.125
,
0.875
],
random_apply
:
False
}
-
RandomDistort
:
{
brightness
:
[
0.5
,
1.125
,
0.875
],
random_apply
:
False
}
-
RandomExpand
Op
:
{
fill_value
:
[
127.5
,
127.5
,
127.5
]}
-
RandomExpand
:
{
fill_value
:
[
127.5
,
127.5
,
127.5
]}
-
RandomCrop
Op
:
{
allow_no_crop
:
Fasle
}
-
RandomCrop
:
{
allow_no_crop
:
Fasle
}
-
RandomFlip
Op
:
{}
-
RandomFlip
:
{}
-
Resize
Op
:
{
target_size
:
[
300
,
300
],
keep_ratio
:
False
,
interp
:
1
}
-
Resize
:
{
target_size
:
[
300
,
300
],
keep_ratio
:
False
,
interp
:
1
}
-
NormalizeBox
Op
:
{}
-
NormalizeBox
:
{}
-
PadBox
Op
:
{
num_max_boxes
:
90
}
-
PadBox
:
{
num_max_boxes
:
90
}
batch_transforms
:
batch_transforms
:
-
NormalizeImage
Op
:
{
mean
:
[
127.5
,
127.5
,
127.5
],
std
:
[
127.502231
,
127.502231
,
127.502231
],
is_scale
:
false
}
-
NormalizeImage
:
{
mean
:
[
127.5
,
127.5
,
127.5
],
std
:
[
127.502231
,
127.502231
,
127.502231
],
is_scale
:
false
}
-
Permute
Op
:
{}
-
Permute
:
{}
batch_size
:
32
batch_size
:
32
shuffle
:
true
shuffle
:
true
drop_last
:
true
drop_last
:
true
...
@@ -21,10 +21,10 @@ TrainReader:
...
@@ -21,10 +21,10 @@ TrainReader:
EvalReader
:
EvalReader
:
sample_transforms
:
sample_transforms
:
-
Decode
Op
:
{}
-
Decode
:
{}
-
Resize
Op
:
{
target_size
:
[
300
,
300
],
keep_ratio
:
False
,
interp
:
1
}
-
Resize
:
{
target_size
:
[
300
,
300
],
keep_ratio
:
False
,
interp
:
1
}
-
NormalizeImage
Op
:
{
mean
:
[
127.5
,
127.5
,
127.5
],
std
:
[
127.502231
,
127.502231
,
127.502231
],
is_scale
:
false
}
-
NormalizeImage
:
{
mean
:
[
127.5
,
127.5
,
127.5
],
std
:
[
127.502231
,
127.502231
,
127.502231
],
is_scale
:
false
}
-
Permute
Op
:
{}
-
Permute
:
{}
batch_size
:
1
batch_size
:
1
drop_empty
:
false
drop_empty
:
false
...
@@ -33,8 +33,8 @@ TestReader:
...
@@ -33,8 +33,8 @@ TestReader:
inputs_def
:
inputs_def
:
image_shape
:
[
3
,
300
,
300
]
image_shape
:
[
3
,
300
,
300
]
sample_transforms
:
sample_transforms
:
-
Decode
Op
:
{}
-
Decode
:
{}
-
Resize
Op
:
{
target_size
:
[
300
,
300
],
keep_ratio
:
False
,
interp
:
1
}
-
Resize
:
{
target_size
:
[
300
,
300
],
keep_ratio
:
False
,
interp
:
1
}
-
NormalizeImage
Op
:
{
mean
:
[
127.5
,
127.5
,
127.5
],
std
:
[
127.502231
,
127.502231
,
127.502231
],
is_scale
:
false
}
-
NormalizeImage
:
{
mean
:
[
127.5
,
127.5
,
127.5
],
std
:
[
127.502231
,
127.502231
,
127.502231
],
is_scale
:
false
}
-
Permute
Op
:
{}
-
Permute
:
{}
batch_size
:
1
batch_size
:
1
dygraph/configs/ssd/_base_/ssd_reader.yml
浏览文件 @
e527466d
...
@@ -4,18 +4,18 @@ TrainReader:
...
@@ -4,18 +4,18 @@ TrainReader:
num_max_boxes
:
90
num_max_boxes
:
90
sample_transforms
:
sample_transforms
:
-
Decode
Op
:
{}
-
Decode
:
{}
-
RandomDistort
Op
:
{
brightness
:
[
0.5
,
1.125
,
0.875
],
random_apply
:
False
}
-
RandomDistort
:
{
brightness
:
[
0.5
,
1.125
,
0.875
],
random_apply
:
False
}
-
RandomExpand
Op
:
{
fill_value
:
[
104.
,
117.
,
123.
]}
-
RandomExpand
:
{
fill_value
:
[
104.
,
117.
,
123.
]}
-
RandomCrop
Op
:
{
allow_no_crop
:
true
}
-
RandomCrop
:
{
allow_no_crop
:
true
}
-
RandomFlip
Op
:
{}
-
RandomFlip
:
{}
-
Resize
Op
:
{
target_size
:
[
300
,
300
],
keep_ratio
:
False
,
interp
:
1
}
-
Resize
:
{
target_size
:
[
300
,
300
],
keep_ratio
:
False
,
interp
:
1
}
-
NormalizeBox
Op
:
{}
-
NormalizeBox
:
{}
-
PadBox
Op
:
{
num_max_boxes
:
90
}
-
PadBox
:
{
num_max_boxes
:
90
}
batch_transforms
:
batch_transforms
:
-
NormalizeImage
Op
:
{
mean
:
[
104.
,
117.
,
123.
],
std
:
[
1.
,
1.
,
1.
],
is_scale
:
false
}
-
NormalizeImage
:
{
mean
:
[
104.
,
117.
,
123.
],
std
:
[
1.
,
1.
,
1.
],
is_scale
:
false
}
-
Permute
Op
:
{}
-
Permute
:
{}
batch_size
:
8
batch_size
:
8
shuffle
:
true
shuffle
:
true
...
@@ -24,10 +24,10 @@ TrainReader:
...
@@ -24,10 +24,10 @@ TrainReader:
EvalReader
:
EvalReader
:
sample_transforms
:
sample_transforms
:
-
Decode
Op
:
{}
-
Decode
:
{}
-
Resize
Op
:
{
target_size
:
[
300
,
300
],
keep_ratio
:
False
,
interp
:
1
}
-
Resize
:
{
target_size
:
[
300
,
300
],
keep_ratio
:
False
,
interp
:
1
}
-
NormalizeImage
Op
:
{
mean
:
[
104.
,
117.
,
123.
],
std
:
[
1.
,
1.
,
1.
],
is_scale
:
false
}
-
NormalizeImage
:
{
mean
:
[
104.
,
117.
,
123.
],
std
:
[
1.
,
1.
,
1.
],
is_scale
:
false
}
-
Permute
Op
:
{}
-
Permute
:
{}
batch_size
:
1
batch_size
:
1
drop_empty
:
false
drop_empty
:
false
...
@@ -35,8 +35,8 @@ TestReader:
...
@@ -35,8 +35,8 @@ TestReader:
inputs_def
:
inputs_def
:
image_shape
:
[
3
,
300
,
300
]
image_shape
:
[
3
,
300
,
300
]
sample_transforms
:
sample_transforms
:
-
Decode
Op
:
{}
-
Decode
:
{}
-
Resize
Op
:
{
target_size
:
[
300
,
300
],
keep_ratio
:
False
,
interp
:
1
}
-
Resize
:
{
target_size
:
[
300
,
300
],
keep_ratio
:
False
,
interp
:
1
}
-
NormalizeImage
Op
:
{
mean
:
[
104.
,
117.
,
123.
],
std
:
[
1.
,
1.
,
1.
],
is_scale
:
false
}
-
NormalizeImage
:
{
mean
:
[
104.
,
117.
,
123.
],
std
:
[
1.
,
1.
,
1.
],
is_scale
:
false
}
-
Permute
Op
:
{}
-
Permute
:
{}
batch_size
:
1
batch_size
:
1
dygraph/configs/ssd/_base_/ssdlite300_reader.yml
浏览文件 @
e527466d
...
@@ -3,17 +3,17 @@ TrainReader:
...
@@ -3,17 +3,17 @@ TrainReader:
inputs_def
:
inputs_def
:
num_max_boxes
:
90
num_max_boxes
:
90
sample_transforms
:
sample_transforms
:
-
Decode
Op
:
{}
-
Decode
:
{}
-
RandomDistort
Op
:
{
brightness
:
[
0.5
,
1.125
,
0.875
],
random_apply
:
False
}
-
RandomDistort
:
{
brightness
:
[
0.5
,
1.125
,
0.875
],
random_apply
:
False
}
-
RandomExpand
Op
:
{
fill_value
:
[
123.675
,
116.28
,
103.53
]}
-
RandomExpand
:
{
fill_value
:
[
123.675
,
116.28
,
103.53
]}
-
RandomCrop
Op
:
{
allow_no_crop
:
Fasle
}
-
RandomCrop
:
{
allow_no_crop
:
Fasle
}
-
RandomFlip
Op
:
{}
-
RandomFlip
:
{}
-
Resize
Op
:
{
target_size
:
[
300
,
300
],
keep_ratio
:
False
,
interp
:
1
}
-
Resize
:
{
target_size
:
[
300
,
300
],
keep_ratio
:
False
,
interp
:
1
}
-
NormalizeBox
Op
:
{}
-
NormalizeBox
:
{}
-
PadBox
Op
:
{
num_max_boxes
:
90
}
-
PadBox
:
{
num_max_boxes
:
90
}
batch_transforms
:
batch_transforms
:
-
NormalizeImage
Op
:
{
mean
:
[
0.485
,
0.456
,
0.406
],
std
:
[
0.229
,
0.224
,
0.225
],
is_scale
:
true
}
-
NormalizeImage
:
{
mean
:
[
0.485
,
0.456
,
0.406
],
std
:
[
0.229
,
0.224
,
0.225
],
is_scale
:
true
}
-
Permute
Op
:
{}
-
Permute
:
{}
batch_size
:
64
batch_size
:
64
shuffle
:
true
shuffle
:
true
drop_last
:
true
drop_last
:
true
...
@@ -21,10 +21,10 @@ TrainReader:
...
@@ -21,10 +21,10 @@ TrainReader:
EvalReader
:
EvalReader
:
sample_transforms
:
sample_transforms
:
-
Decode
Op
:
{}
-
Decode
:
{}
-
Resize
Op
:
{
target_size
:
[
300
,
300
],
keep_ratio
:
False
,
interp
:
1
}
-
Resize
:
{
target_size
:
[
300
,
300
],
keep_ratio
:
False
,
interp
:
1
}
-
NormalizeImage
Op
:
{
mean
:
[
0.485
,
0.456
,
0.406
],
std
:
[
0.229
,
0.224
,
0.225
],
is_scale
:
true
}
-
NormalizeImage
:
{
mean
:
[
0.485
,
0.456
,
0.406
],
std
:
[
0.229
,
0.224
,
0.225
],
is_scale
:
true
}
-
Permute
Op
:
{}
-
Permute
:
{}
batch_size
:
1
batch_size
:
1
drop_empty
:
false
drop_empty
:
false
...
@@ -33,8 +33,8 @@ TestReader:
...
@@ -33,8 +33,8 @@ TestReader:
inputs_def
:
inputs_def
:
image_shape
:
[
3
,
300
,
300
]
image_shape
:
[
3
,
300
,
300
]
sample_transforms
:
sample_transforms
:
-
Decode
Op
:
{}
-
Decode
:
{}
-
Resize
Op
:
{
target_size
:
[
300
,
300
],
keep_ratio
:
False
,
interp
:
1
}
-
Resize
:
{
target_size
:
[
300
,
300
],
keep_ratio
:
False
,
interp
:
1
}
-
NormalizeImage
Op
:
{
mean
:
[
0.485
,
0.456
,
0.406
],
std
:
[
0.229
,
0.224
,
0.225
],
is_scale
:
true
}
-
NormalizeImage
:
{
mean
:
[
0.485
,
0.456
,
0.406
],
std
:
[
0.229
,
0.224
,
0.225
],
is_scale
:
true
}
-
Permute
Op
:
{}
-
Permute
:
{}
batch_size
:
1
batch_size
:
1
dygraph/configs/ssd/_base_/ssdlite320_reader.yml
浏览文件 @
e527466d
...
@@ -3,17 +3,17 @@ TrainReader:
...
@@ -3,17 +3,17 @@ TrainReader:
inputs_def
:
inputs_def
:
num_max_boxes
:
90
num_max_boxes
:
90
sample_transforms
:
sample_transforms
:
-
Decode
Op
:
{}
-
Decode
:
{}
-
RandomDistort
Op
:
{
brightness
:
[
0.5
,
1.125
,
0.875
],
random_apply
:
False
}
-
RandomDistort
:
{
brightness
:
[
0.5
,
1.125
,
0.875
],
random_apply
:
False
}
-
RandomExpand
Op
:
{
fill_value
:
[
123.675
,
116.28
,
103.53
]}
-
RandomExpand
:
{
fill_value
:
[
123.675
,
116.28
,
103.53
]}
-
RandomCrop
Op
:
{
allow_no_crop
:
Fasle
}
-
RandomCrop
:
{
allow_no_crop
:
Fasle
}
-
RandomFlip
Op
:
{}
-
RandomFlip
:
{}
-
Resize
Op
:
{
target_size
:
[
320
,
320
],
keep_ratio
:
False
,
interp
:
1
}
-
Resize
:
{
target_size
:
[
320
,
320
],
keep_ratio
:
False
,
interp
:
1
}
-
NormalizeBox
Op
:
{}
-
NormalizeBox
:
{}
-
PadBox
Op
:
{
num_max_boxes
:
90
}
-
PadBox
:
{
num_max_boxes
:
90
}
batch_transforms
:
batch_transforms
:
-
NormalizeImage
Op
:
{
mean
:
[
0.485
,
0.456
,
0.406
],
std
:
[
0.229
,
0.224
,
0.225
],
is_scale
:
true
}
-
NormalizeImage
:
{
mean
:
[
0.485
,
0.456
,
0.406
],
std
:
[
0.229
,
0.224
,
0.225
],
is_scale
:
true
}
-
Permute
Op
:
{}
-
Permute
:
{}
batch_size
:
64
batch_size
:
64
shuffle
:
true
shuffle
:
true
drop_last
:
true
drop_last
:
true
...
@@ -21,10 +21,10 @@ TrainReader:
...
@@ -21,10 +21,10 @@ TrainReader:
EvalReader
:
EvalReader
:
sample_transforms
:
sample_transforms
:
-
Decode
Op
:
{}
-
Decode
:
{}
-
Resize
Op
:
{
target_size
:
[
320
,
320
],
keep_ratio
:
False
,
interp
:
1
}
-
Resize
:
{
target_size
:
[
320
,
320
],
keep_ratio
:
False
,
interp
:
1
}
-
NormalizeImage
Op
:
{
mean
:
[
0.485
,
0.456
,
0.406
],
std
:
[
0.229
,
0.224
,
0.225
],
is_scale
:
true
}
-
NormalizeImage
:
{
mean
:
[
0.485
,
0.456
,
0.406
],
std
:
[
0.229
,
0.224
,
0.225
],
is_scale
:
true
}
-
Permute
Op
:
{}
-
Permute
:
{}
batch_size
:
1
batch_size
:
1
drop_empty
:
false
drop_empty
:
false
...
@@ -33,8 +33,8 @@ TestReader:
...
@@ -33,8 +33,8 @@ TestReader:
inputs_def
:
inputs_def
:
image_shape
:
[
3
,
320
,
320
]
image_shape
:
[
3
,
320
,
320
]
sample_transforms
:
sample_transforms
:
-
Decode
Op
:
{}
-
Decode
:
{}
-
Resize
Op
:
{
target_size
:
[
320
,
320
],
keep_ratio
:
False
,
interp
:
1
}
-
Resize
:
{
target_size
:
[
320
,
320
],
keep_ratio
:
False
,
interp
:
1
}
-
NormalizeImage
Op
:
{
mean
:
[
0.485
,
0.456
,
0.406
],
std
:
[
0.229
,
0.224
,
0.225
],
is_scale
:
true
}
-
NormalizeImage
:
{
mean
:
[
0.485
,
0.456
,
0.406
],
std
:
[
0.229
,
0.224
,
0.225
],
is_scale
:
true
}
-
Permute
Op
:
{}
-
Permute
:
{}
batch_size
:
1
batch_size
:
1
dygraph/configs/ttfnet/_base_/ttfnet_reader.yml
浏览文件 @
e527466d
worker_num
:
2
worker_num
:
2
TrainReader
:
TrainReader
:
sample_transforms
:
sample_transforms
:
-
Decode
Op
:
{}
-
Decode
:
{}
-
RandomFlip
Op
:
{
prob
:
0.5
}
-
RandomFlip
:
{
prob
:
0.5
}
-
Resize
Op
:
{
interp
:
1
,
target_size
:
[
512
,
512
],
keep_ratio
:
False
}
-
Resize
:
{
interp
:
1
,
target_size
:
[
512
,
512
],
keep_ratio
:
False
}
-
NormalizeImage
Op
:
{
mean
:
[
123.675
,
116.28
,
103.53
],
std
:
[
58.395
,
57.12
,
57.375
],
is_scale
:
false
}
-
NormalizeImage
:
{
mean
:
[
123.675
,
116.28
,
103.53
],
std
:
[
58.395
,
57.12
,
57.375
],
is_scale
:
false
}
-
Permute
Op
:
{}
-
Permute
:
{}
batch_transforms
:
batch_transforms
:
-
Gt2TTFTarget
Op
:
{
down_ratio
:
4
}
-
Gt2TTFTarget
:
{
down_ratio
:
4
}
-
PadBatch
Op
:
{
pad_to_stride
:
32
,
pad_gt
:
true
}
-
PadBatch
:
{
pad_to_stride
:
32
,
pad_gt
:
true
}
batch_size
:
12
batch_size
:
12
shuffle
:
true
shuffle
:
true
drop_last
:
true
drop_last
:
true
EvalReader
:
EvalReader
:
sample_transforms
:
sample_transforms
:
-
Decode
Op
:
{}
-
Decode
:
{}
-
Resize
Op
:
{
interp
:
1
,
target_size
:
[
512
,
512
],
keep_ratio
:
False
}
-
Resize
:
{
interp
:
1
,
target_size
:
[
512
,
512
],
keep_ratio
:
False
}
-
NormalizeImage
Op
:
{
is_scale
:
false
,
mean
:
[
123.675
,
116.28
,
103.53
],
std
:
[
58.395
,
57.12
,
57.375
]}
-
NormalizeImage
:
{
is_scale
:
false
,
mean
:
[
123.675
,
116.28
,
103.53
],
std
:
[
58.395
,
57.12
,
57.375
]}
-
Permute
Op
:
{}
-
Permute
:
{}
batch_size
:
1
batch_size
:
1
drop_last
:
false
drop_last
:
false
drop_empty
:
false
drop_empty
:
false
TestReader
:
TestReader
:
sample_transforms
:
sample_transforms
:
-
Decode
Op
:
{}
-
Decode
:
{}
-
Resize
Op
:
{
interp
:
1
,
target_size
:
[
512
,
512
],
keep_ratio
:
False
}
-
Resize
:
{
interp
:
1
,
target_size
:
[
512
,
512
],
keep_ratio
:
False
}
-
NormalizeImage
Op
:
{
is_scale
:
false
,
mean
:
[
123.675
,
116.28
,
103.53
],
std
:
[
58.395
,
57.12
,
57.375
]}
-
NormalizeImage
:
{
is_scale
:
false
,
mean
:
[
123.675
,
116.28
,
103.53
],
std
:
[
58.395
,
57.12
,
57.375
]}
-
Permute
Op
:
{}
-
Permute
:
{}
batch_size
:
1
batch_size
:
1
drop_last
:
false
drop_last
:
false
drop_empty
:
false
drop_empty
:
false
dygraph/configs/yolov3/_base_/yolov3_reader.yml
浏览文件 @
e527466d
...
@@ -3,20 +3,20 @@ TrainReader:
...
@@ -3,20 +3,20 @@ TrainReader:
inputs_def
:
inputs_def
:
num_max_boxes
:
50
num_max_boxes
:
50
sample_transforms
:
sample_transforms
:
-
Decode
Op
:
{}
-
Decode
:
{}
-
Mixup
Op
:
{
alpha
:
1.5
,
beta
:
1.5
}
-
Mixup
:
{
alpha
:
1.5
,
beta
:
1.5
}
-
RandomDistort
Op
:
{}
-
RandomDistort
:
{}
-
RandomExpand
Op
:
{
fill_value
:
[
123.675
,
116.28
,
103.53
]}
-
RandomExpand
:
{
fill_value
:
[
123.675
,
116.28
,
103.53
]}
-
RandomCrop
Op
:
{}
-
RandomCrop
:
{}
-
RandomFlip
Op
:
{}
-
RandomFlip
:
{}
batch_transforms
:
batch_transforms
:
-
BatchRandomResize
Op
:
{
target_size
:
[
320
,
352
,
384
,
416
,
448
,
480
,
512
,
544
,
576
,
608
],
random_size
:
True
,
random_interp
:
True
,
keep_ratio
:
False
}
-
BatchRandomResize
:
{
target_size
:
[
320
,
352
,
384
,
416
,
448
,
480
,
512
,
544
,
576
,
608
],
random_size
:
True
,
random_interp
:
True
,
keep_ratio
:
False
}
-
NormalizeBox
Op
:
{}
-
NormalizeBox
:
{}
-
PadBox
Op
:
{
num_max_boxes
:
50
}
-
PadBox
:
{
num_max_boxes
:
50
}
-
BboxXYXY2XYWH
Op
:
{}
-
BboxXYXY2XYWH
:
{}
-
NormalizeImage
Op
:
{
mean
:
[
0.485
,
0.456
,
0.406
],
std
:
[
0.229
,
0.224
,
0.225
],
is_scale
:
True
}
-
NormalizeImage
:
{
mean
:
[
0.485
,
0.456
,
0.406
],
std
:
[
0.229
,
0.224
,
0.225
],
is_scale
:
True
}
-
Permute
Op
:
{}
-
Permute
:
{}
-
Gt2YoloTarget
Op
:
{
anchor_masks
:
[[
6
,
7
,
8
],
[
3
,
4
,
5
],
[
0
,
1
,
2
]],
anchors
:
[[
10
,
13
],
[
16
,
30
],
[
33
,
23
],
[
30
,
61
],
[
62
,
45
],
[
59
,
119
],
[
116
,
90
],
[
156
,
198
],
[
373
,
326
]],
downsample_ratios
:
[
32
,
16
,
8
]}
-
Gt2YoloTarget
:
{
anchor_masks
:
[[
6
,
7
,
8
],
[
3
,
4
,
5
],
[
0
,
1
,
2
]],
anchors
:
[[
10
,
13
],
[
16
,
30
],
[
33
,
23
],
[
30
,
61
],
[
62
,
45
],
[
59
,
119
],
[
116
,
90
],
[
156
,
198
],
[
373
,
326
]],
downsample_ratios
:
[
32
,
16
,
8
]}
batch_size
:
8
batch_size
:
8
shuffle
:
true
shuffle
:
true
drop_last
:
true
drop_last
:
true
...
@@ -27,10 +27,10 @@ EvalReader:
...
@@ -27,10 +27,10 @@ EvalReader:
inputs_def
:
inputs_def
:
num_max_boxes
:
50
num_max_boxes
:
50
sample_transforms
:
sample_transforms
:
-
Decode
Op
:
{}
-
Decode
:
{}
-
Resize
Op
:
{
target_size
:
[
608
,
608
],
keep_ratio
:
False
,
interp
:
2
}
-
Resize
:
{
target_size
:
[
608
,
608
],
keep_ratio
:
False
,
interp
:
2
}
-
NormalizeImage
Op
:
{
mean
:
[
0.485
,
0.456
,
0.406
],
std
:
[
0.229
,
0.224
,
0.225
],
is_scale
:
True
}
-
NormalizeImage
:
{
mean
:
[
0.485
,
0.456
,
0.406
],
std
:
[
0.229
,
0.224
,
0.225
],
is_scale
:
True
}
-
Permute
Op
:
{}
-
Permute
:
{}
batch_size
:
1
batch_size
:
1
drop_empty
:
false
drop_empty
:
false
...
@@ -38,8 +38,8 @@ TestReader:
...
@@ -38,8 +38,8 @@ TestReader:
inputs_def
:
inputs_def
:
image_shape
:
[
3
,
608
,
608
]
image_shape
:
[
3
,
608
,
608
]
sample_transforms
:
sample_transforms
:
-
Decode
Op
:
{}
-
Decode
:
{}
-
Resize
Op
:
{
target_size
:
[
608
,
608
],
keep_ratio
:
False
,
interp
:
2
}
-
Resize
:
{
target_size
:
[
608
,
608
],
keep_ratio
:
False
,
interp
:
2
}
-
NormalizeImage
Op
:
{
mean
:
[
0.485
,
0.456
,
0.406
],
std
:
[
0.229
,
0.224
,
0.225
],
is_scale
:
True
}
-
NormalizeImage
:
{
mean
:
[
0.485
,
0.456
,
0.406
],
std
:
[
0.229
,
0.224
,
0.225
],
is_scale
:
True
}
-
Permute
Op
:
{}
-
Permute
:
{}
batch_size
:
1
batch_size
:
1
dygraph/ppdet/data/transform/__init__.py
浏览文件 @
e527466d
...
@@ -14,14 +14,9 @@
...
@@ -14,14 +14,9 @@
from
.
import
operators
from
.
import
operators
from
.
import
batch_operators
from
.
import
batch_operators
from
.
import
operator
from
.
import
batch_operator
# TODO: operators and batch_operators will be replaced by operator and batch_operator
from
.operators
import
*
from
.operators
import
*
from
.operator
import
*
from
.batch_operators
import
*
from
.batch_operators
import
*
from
.batch_operator
import
*
__all__
=
[]
__all__
=
[]
__all__
+=
registered_ops
__all__
+=
registered_ops
dygraph/ppdet/data/transform/batch_operator.py
已删除
100644 → 0
浏览文件 @
5b6bebf2
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from
__future__
import
absolute_import
from
__future__
import
division
from
__future__
import
print_function
try
:
from
collections.abc
import
Sequence
except
Exception
:
from
collections
import
Sequence
import
cv2
import
numpy
as
np
from
.operator
import
register_op
,
BaseOperator
,
ResizeOp
from
.op_helper
import
jaccard_overlap
,
gaussian2D
from
scipy
import
ndimage
from
ppdet.utils.logger
import
setup_logger
logger
=
setup_logger
(
__name__
)
__all__
=
[
'PadBatchOp'
,
'BatchRandomResizeOp'
,
'Gt2YoloTargetOp'
,
'Gt2FCOSTargetOp'
,
'Gt2TTFTargetOp'
,
'Gt2Solov2TargetOp'
,
]
@
register_op
class
PadBatchOp
(
BaseOperator
):
"""
Pad a batch of samples so they can be divisible by a stride.
The layout of each image should be 'CHW'.
Args:
pad_to_stride (int): If `pad_to_stride > 0`, pad zeros to ensure
height and width is divisible by `pad_to_stride`.
"""
def
__init__
(
self
,
pad_to_stride
=
0
,
pad_gt
=
False
):
super
(
PadBatchOp
,
self
).
__init__
()
self
.
pad_to_stride
=
pad_to_stride
self
.
pad_gt
=
pad_gt
def
__call__
(
self
,
samples
,
context
=
None
):
"""
Args:
samples (list): a batch of sample, each is dict.
"""
coarsest_stride
=
self
.
pad_to_stride
max_shape
=
np
.
array
([
data
[
'image'
].
shape
for
data
in
samples
]).
max
(
axis
=
0
)
if
coarsest_stride
>
0
:
max_shape
[
1
]
=
int
(
np
.
ceil
(
max_shape
[
1
]
/
coarsest_stride
)
*
coarsest_stride
)
max_shape
[
2
]
=
int
(
np
.
ceil
(
max_shape
[
2
]
/
coarsest_stride
)
*
coarsest_stride
)
padding_batch
=
[]
for
data
in
samples
:
im
=
data
[
'image'
]
im_c
,
im_h
,
im_w
=
im
.
shape
[:]
padding_im
=
np
.
zeros
(
(
im_c
,
max_shape
[
1
],
max_shape
[
2
]),
dtype
=
np
.
float32
)
padding_im
[:,
:
im_h
,
:
im_w
]
=
im
data
[
'image'
]
=
padding_im
if
'semantic'
in
data
and
data
[
'semantic'
]
is
not
None
:
semantic
=
data
[
'semantic'
]
padding_sem
=
np
.
zeros
(
(
1
,
max_shape
[
1
],
max_shape
[
2
]),
dtype
=
np
.
float32
)
padding_sem
[:,
:
im_h
,
:
im_w
]
=
semantic
data
[
'semantic'
]
=
padding_sem
if
'gt_segm'
in
data
and
data
[
'gt_segm'
]
is
not
None
:
gt_segm
=
data
[
'gt_segm'
]
padding_segm
=
np
.
zeros
(
(
gt_segm
.
shape
[
0
],
max_shape
[
1
],
max_shape
[
2
]),
dtype
=
np
.
uint8
)
padding_segm
[:,
:
im_h
,
:
im_w
]
=
gt_segm
data
[
'gt_segm'
]
=
padding_segm
if
self
.
pad_gt
:
gt_num
=
[]
if
'gt_poly'
in
data
and
data
[
'gt_poly'
]
is
not
None
and
len
(
data
[
'gt_poly'
])
>
0
:
pad_mask
=
True
else
:
pad_mask
=
False
if
pad_mask
:
poly_num
=
[]
poly_part_num
=
[]
point_num
=
[]
for
data
in
samples
:
gt_num
.
append
(
data
[
'gt_bbox'
].
shape
[
0
])
if
pad_mask
:
poly_num
.
append
(
len
(
data
[
'gt_poly'
]))
for
poly
in
data
[
'gt_poly'
]:
poly_part_num
.
append
(
int
(
len
(
poly
)))
for
p_p
in
poly
:
point_num
.
append
(
int
(
len
(
p_p
)
/
2
))
gt_num_max
=
max
(
gt_num
)
for
i
,
data
in
enumerate
(
samples
):
gt_box_data
=
-
np
.
ones
([
gt_num_max
,
4
],
dtype
=
np
.
float32
)
gt_class_data
=
-
np
.
ones
([
gt_num_max
],
dtype
=
np
.
int32
)
is_crowd_data
=
np
.
ones
([
gt_num_max
],
dtype
=
np
.
int32
)
if
pad_mask
:
poly_num_max
=
max
(
poly_num
)
poly_part_num_max
=
max
(
poly_part_num
)
point_num_max
=
max
(
point_num
)
gt_masks_data
=
-
np
.
ones
(
[
poly_num_max
,
poly_part_num_max
,
point_num_max
,
2
],
dtype
=
np
.
float32
)
gt_num
=
data
[
'gt_bbox'
].
shape
[
0
]
gt_box_data
[
0
:
gt_num
,
:]
=
data
[
'gt_bbox'
]
gt_class_data
[
0
:
gt_num
]
=
np
.
squeeze
(
data
[
'gt_class'
])
is_crowd_data
[
0
:
gt_num
]
=
np
.
squeeze
(
data
[
'is_crowd'
])
if
pad_mask
:
for
j
,
poly
in
enumerate
(
data
[
'gt_poly'
]):
for
k
,
p_p
in
enumerate
(
poly
):
pp_np
=
np
.
array
(
p_p
).
reshape
(
-
1
,
2
)
gt_masks_data
[
j
,
k
,
:
pp_np
.
shape
[
0
],
:]
=
pp_np
data
[
'gt_poly'
]
=
gt_masks_data
data
[
'gt_bbox'
]
=
gt_box_data
data
[
'gt_class'
]
=
gt_class_data
data
[
'is_crowd'
]
=
is_crowd_data
return
samples
@
register_op
class
BatchRandomResizeOp
(
BaseOperator
):
"""
Resize image to target size randomly. random target_size and interpolation method
Args:
target_size (int, list, tuple): image target size, if random size is True, must be list or tuple
keep_ratio (bool): whether keep_raio or not, default true
interp (int): the interpolation method
random_size (bool): whether random select target size of image
random_interp (bool): whether random select interpolation method
"""
def
__init__
(
self
,
target_size
,
keep_ratio
,
interp
=
cv2
.
INTER_NEAREST
,
random_size
=
True
,
random_interp
=
False
):
super
(
BatchRandomResizeOp
,
self
).
__init__
()
self
.
keep_ratio
=
keep_ratio
self
.
interps
=
[
cv2
.
INTER_NEAREST
,
cv2
.
INTER_LINEAR
,
cv2
.
INTER_AREA
,
cv2
.
INTER_CUBIC
,
cv2
.
INTER_LANCZOS4
,
]
self
.
interp
=
interp
assert
isinstance
(
target_size
,
(
int
,
Sequence
)),
"target_size must be int, list or tuple"
if
random_size
and
not
isinstance
(
target_size
,
list
):
raise
TypeError
(
"Type of target_size is invalid when random_size is True. Must be List, now is {}"
.
format
(
type
(
target_size
)))
self
.
target_size
=
target_size
self
.
random_size
=
random_size
self
.
random_interp
=
random_interp
def
__call__
(
self
,
samples
,
context
=
None
):
if
self
.
random_size
:
target_size
=
np
.
random
.
choice
(
self
.
target_size
)
else
:
target_size
=
self
.
target_size
if
self
.
random_interp
:
interp
=
np
.
random
.
choice
(
self
.
interps
)
else
:
interp
=
self
.
interp
resizer
=
ResizeOp
(
target_size
,
keep_ratio
=
self
.
keep_ratio
,
interp
=
interp
)
return
resizer
(
samples
,
context
=
context
)
@
register_op
class
Gt2YoloTargetOp
(
BaseOperator
):
"""
Generate YOLOv3 targets by groud truth data, this operator is only used in
fine grained YOLOv3 loss mode
"""
def
__init__
(
self
,
anchors
,
anchor_masks
,
downsample_ratios
,
num_classes
=
80
,
iou_thresh
=
1.
):
super
(
Gt2YoloTargetOp
,
self
).
__init__
()
self
.
anchors
=
anchors
self
.
anchor_masks
=
anchor_masks
self
.
downsample_ratios
=
downsample_ratios
self
.
num_classes
=
num_classes
self
.
iou_thresh
=
iou_thresh
def
__call__
(
self
,
samples
,
context
=
None
):
assert
len
(
self
.
anchor_masks
)
==
len
(
self
.
downsample_ratios
),
\
"anchor_masks', and 'downsample_ratios' should have same length."
h
,
w
=
samples
[
0
][
'image'
].
shape
[
1
:
3
]
an_hw
=
np
.
array
(
self
.
anchors
)
/
np
.
array
([[
w
,
h
]])
for
sample
in
samples
:
# im, gt_bbox, gt_class, gt_score = sample
im
=
sample
[
'image'
]
gt_bbox
=
sample
[
'gt_bbox'
]
gt_class
=
sample
[
'gt_class'
]
if
'gt_score'
not
in
sample
:
sample
[
'gt_score'
]
=
np
.
ones
(
(
gt_bbox
.
shape
[
0
],
1
),
dtype
=
np
.
float32
)
gt_score
=
sample
[
'gt_score'
]
for
i
,
(
mask
,
downsample_ratio
)
in
enumerate
(
zip
(
self
.
anchor_masks
,
self
.
downsample_ratios
)):
grid_h
=
int
(
h
/
downsample_ratio
)
grid_w
=
int
(
w
/
downsample_ratio
)
target
=
np
.
zeros
(
(
len
(
mask
),
6
+
self
.
num_classes
,
grid_h
,
grid_w
),
dtype
=
np
.
float32
)
for
b
in
range
(
gt_bbox
.
shape
[
0
]):
gx
,
gy
,
gw
,
gh
=
gt_bbox
[
b
,
:]
cls
=
gt_class
[
b
]
score
=
gt_score
[
b
]
if
gw
<=
0.
or
gh
<=
0.
or
score
<=
0.
:
continue
# find best match anchor index
best_iou
=
0.
best_idx
=
-
1
for
an_idx
in
range
(
an_hw
.
shape
[
0
]):
iou
=
jaccard_overlap
(
[
0.
,
0.
,
gw
,
gh
],
[
0.
,
0.
,
an_hw
[
an_idx
,
0
],
an_hw
[
an_idx
,
1
]])
if
iou
>
best_iou
:
best_iou
=
iou
best_idx
=
an_idx
gi
=
int
(
gx
*
grid_w
)
gj
=
int
(
gy
*
grid_h
)
# gtbox should be regresed in this layes if best match
# anchor index in anchor mask of this layer
if
best_idx
in
mask
:
best_n
=
mask
.
index
(
best_idx
)
# x, y, w, h, scale
target
[
best_n
,
0
,
gj
,
gi
]
=
gx
*
grid_w
-
gi
target
[
best_n
,
1
,
gj
,
gi
]
=
gy
*
grid_h
-
gj
target
[
best_n
,
2
,
gj
,
gi
]
=
np
.
log
(
gw
*
w
/
self
.
anchors
[
best_idx
][
0
])
target
[
best_n
,
3
,
gj
,
gi
]
=
np
.
log
(
gh
*
h
/
self
.
anchors
[
best_idx
][
1
])
target
[
best_n
,
4
,
gj
,
gi
]
=
2.0
-
gw
*
gh
# objectness record gt_score
target
[
best_n
,
5
,
gj
,
gi
]
=
score
# classification
target
[
best_n
,
6
+
cls
,
gj
,
gi
]
=
1.
# For non-matched anchors, calculate the target if the iou
# between anchor and gt is larger than iou_thresh
if
self
.
iou_thresh
<
1
:
for
idx
,
mask_i
in
enumerate
(
mask
):
if
mask_i
==
best_idx
:
continue
iou
=
jaccard_overlap
(
[
0.
,
0.
,
gw
,
gh
],
[
0.
,
0.
,
an_hw
[
mask_i
,
0
],
an_hw
[
mask_i
,
1
]])
if
iou
>
self
.
iou_thresh
and
target
[
idx
,
5
,
gj
,
gi
]
==
0.
:
# x, y, w, h, scale
target
[
idx
,
0
,
gj
,
gi
]
=
gx
*
grid_w
-
gi
target
[
idx
,
1
,
gj
,
gi
]
=
gy
*
grid_h
-
gj
target
[
idx
,
2
,
gj
,
gi
]
=
np
.
log
(
gw
*
w
/
self
.
anchors
[
mask_i
][
0
])
target
[
idx
,
3
,
gj
,
gi
]
=
np
.
log
(
gh
*
h
/
self
.
anchors
[
mask_i
][
1
])
target
[
idx
,
4
,
gj
,
gi
]
=
2.0
-
gw
*
gh
# objectness record gt_score
target
[
idx
,
5
,
gj
,
gi
]
=
score
# classification
target
[
idx
,
6
+
cls
,
gj
,
gi
]
=
1.
sample
[
'target{}'
.
format
(
i
)]
=
target
# remove useless gt_class and gt_score after target calculated
sample
.
pop
(
'gt_class'
)
sample
.
pop
(
'gt_score'
)
return
samples
@
register_op
class
Gt2FCOSTargetOp
(
BaseOperator
):
"""
Generate FCOS targets by groud truth data
"""
def
__init__
(
self
,
object_sizes_boundary
,
center_sampling_radius
,
downsample_ratios
,
norm_reg_targets
=
False
):
super
(
Gt2FCOSTargetOp
,
self
).
__init__
()
self
.
center_sampling_radius
=
center_sampling_radius
self
.
downsample_ratios
=
downsample_ratios
self
.
INF
=
np
.
inf
self
.
object_sizes_boundary
=
[
-
1
]
+
object_sizes_boundary
+
[
self
.
INF
]
object_sizes_of_interest
=
[]
for
i
in
range
(
len
(
self
.
object_sizes_boundary
)
-
1
):
object_sizes_of_interest
.
append
([
self
.
object_sizes_boundary
[
i
],
self
.
object_sizes_boundary
[
i
+
1
]
])
self
.
object_sizes_of_interest
=
object_sizes_of_interest
self
.
norm_reg_targets
=
norm_reg_targets
def
_compute_points
(
self
,
w
,
h
):
"""
compute the corresponding points in each feature map
:param h: image height
:param w: image width
:return: points from all feature map
"""
locations
=
[]
for
stride
in
self
.
downsample_ratios
:
shift_x
=
np
.
arange
(
0
,
w
,
stride
).
astype
(
np
.
float32
)
shift_y
=
np
.
arange
(
0
,
h
,
stride
).
astype
(
np
.
float32
)
shift_x
,
shift_y
=
np
.
meshgrid
(
shift_x
,
shift_y
)
shift_x
=
shift_x
.
flatten
()
shift_y
=
shift_y
.
flatten
()
location
=
np
.
stack
([
shift_x
,
shift_y
],
axis
=
1
)
+
stride
//
2
locations
.
append
(
location
)
num_points_each_level
=
[
len
(
location
)
for
location
in
locations
]
locations
=
np
.
concatenate
(
locations
,
axis
=
0
)
return
locations
,
num_points_each_level
def
_convert_xywh2xyxy
(
self
,
gt_bbox
,
w
,
h
):
"""
convert the bounding box from style xywh to xyxy
:param gt_bbox: bounding boxes normalized into [0, 1]
:param w: image width
:param h: image height
:return: bounding boxes in xyxy style
"""
bboxes
=
gt_bbox
.
copy
()
bboxes
[:,
[
0
,
2
]]
=
bboxes
[:,
[
0
,
2
]]
*
w
bboxes
[:,
[
1
,
3
]]
=
bboxes
[:,
[
1
,
3
]]
*
h
bboxes
[:,
2
]
=
bboxes
[:,
0
]
+
bboxes
[:,
2
]
bboxes
[:,
3
]
=
bboxes
[:,
1
]
+
bboxes
[:,
3
]
return
bboxes
def
_check_inside_boxes_limited
(
self
,
gt_bbox
,
xs
,
ys
,
num_points_each_level
):
"""
check if points is within the clipped boxes
:param gt_bbox: bounding boxes
:param xs: horizontal coordinate of points
:param ys: vertical coordinate of points
:return: the mask of points is within gt_box or not
"""
bboxes
=
np
.
reshape
(
gt_bbox
,
newshape
=
[
1
,
gt_bbox
.
shape
[
0
],
gt_bbox
.
shape
[
1
]])
bboxes
=
np
.
tile
(
bboxes
,
reps
=
[
xs
.
shape
[
0
],
1
,
1
])
ct_x
=
(
bboxes
[:,
:,
0
]
+
bboxes
[:,
:,
2
])
/
2
ct_y
=
(
bboxes
[:,
:,
1
]
+
bboxes
[:,
:,
3
])
/
2
beg
=
0
clipped_box
=
bboxes
.
copy
()
for
lvl
,
stride
in
enumerate
(
self
.
downsample_ratios
):
end
=
beg
+
num_points_each_level
[
lvl
]
stride_exp
=
self
.
center_sampling_radius
*
stride
clipped_box
[
beg
:
end
,
:,
0
]
=
np
.
maximum
(
bboxes
[
beg
:
end
,
:,
0
],
ct_x
[
beg
:
end
,
:]
-
stride_exp
)
clipped_box
[
beg
:
end
,
:,
1
]
=
np
.
maximum
(
bboxes
[
beg
:
end
,
:,
1
],
ct_y
[
beg
:
end
,
:]
-
stride_exp
)
clipped_box
[
beg
:
end
,
:,
2
]
=
np
.
minimum
(
bboxes
[
beg
:
end
,
:,
2
],
ct_x
[
beg
:
end
,
:]
+
stride_exp
)
clipped_box
[
beg
:
end
,
:,
3
]
=
np
.
minimum
(
bboxes
[
beg
:
end
,
:,
3
],
ct_y
[
beg
:
end
,
:]
+
stride_exp
)
beg
=
end
l_res
=
xs
-
clipped_box
[:,
:,
0
]
r_res
=
clipped_box
[:,
:,
2
]
-
xs
t_res
=
ys
-
clipped_box
[:,
:,
1
]
b_res
=
clipped_box
[:,
:,
3
]
-
ys
clipped_box_reg_targets
=
np
.
stack
([
l_res
,
t_res
,
r_res
,
b_res
],
axis
=
2
)
inside_gt_box
=
np
.
min
(
clipped_box_reg_targets
,
axis
=
2
)
>
0
return
inside_gt_box
def
__call__
(
self
,
samples
,
context
=
None
):
assert
len
(
self
.
object_sizes_of_interest
)
==
len
(
self
.
downsample_ratios
),
\
"object_sizes_of_interest', and 'downsample_ratios' should have same length."
for
sample
in
samples
:
# im, gt_bbox, gt_class, gt_score = sample
im
=
sample
[
'image'
]
bboxes
=
sample
[
'gt_bbox'
]
gt_class
=
sample
[
'gt_class'
]
# calculate the locations
h
,
w
=
im
.
shape
[
1
:
3
]
points
,
num_points_each_level
=
self
.
_compute_points
(
w
,
h
)
object_scale_exp
=
[]
for
i
,
num_pts
in
enumerate
(
num_points_each_level
):
object_scale_exp
.
append
(
np
.
tile
(
np
.
array
([
self
.
object_sizes_of_interest
[
i
]]),
reps
=
[
num_pts
,
1
]))
object_scale_exp
=
np
.
concatenate
(
object_scale_exp
,
axis
=
0
)
gt_area
=
(
bboxes
[:,
2
]
-
bboxes
[:,
0
])
*
(
bboxes
[:,
3
]
-
bboxes
[:,
1
])
xs
,
ys
=
points
[:,
0
],
points
[:,
1
]
xs
=
np
.
reshape
(
xs
,
newshape
=
[
xs
.
shape
[
0
],
1
])
xs
=
np
.
tile
(
xs
,
reps
=
[
1
,
bboxes
.
shape
[
0
]])
ys
=
np
.
reshape
(
ys
,
newshape
=
[
ys
.
shape
[
0
],
1
])
ys
=
np
.
tile
(
ys
,
reps
=
[
1
,
bboxes
.
shape
[
0
]])
l_res
=
xs
-
bboxes
[:,
0
]
r_res
=
bboxes
[:,
2
]
-
xs
t_res
=
ys
-
bboxes
[:,
1
]
b_res
=
bboxes
[:,
3
]
-
ys
reg_targets
=
np
.
stack
([
l_res
,
t_res
,
r_res
,
b_res
],
axis
=
2
)
if
self
.
center_sampling_radius
>
0
:
is_inside_box
=
self
.
_check_inside_boxes_limited
(
bboxes
,
xs
,
ys
,
num_points_each_level
)
else
:
is_inside_box
=
np
.
min
(
reg_targets
,
axis
=
2
)
>
0
# check if the targets is inside the corresponding level
max_reg_targets
=
np
.
max
(
reg_targets
,
axis
=
2
)
lower_bound
=
np
.
tile
(
np
.
expand_dims
(
object_scale_exp
[:,
0
],
axis
=
1
),
reps
=
[
1
,
max_reg_targets
.
shape
[
1
]])
high_bound
=
np
.
tile
(
np
.
expand_dims
(
object_scale_exp
[:,
1
],
axis
=
1
),
reps
=
[
1
,
max_reg_targets
.
shape
[
1
]])
is_match_current_level
=
\
(
max_reg_targets
>
lower_bound
)
&
\
(
max_reg_targets
<
high_bound
)
points2gtarea
=
np
.
tile
(
np
.
expand_dims
(
gt_area
,
axis
=
0
),
reps
=
[
xs
.
shape
[
0
],
1
])
points2gtarea
[
is_inside_box
==
0
]
=
self
.
INF
points2gtarea
[
is_match_current_level
==
0
]
=
self
.
INF
points2min_area
=
points2gtarea
.
min
(
axis
=
1
)
points2min_area_ind
=
points2gtarea
.
argmin
(
axis
=
1
)
labels
=
gt_class
[
points2min_area_ind
]
+
1
labels
[
points2min_area
==
self
.
INF
]
=
0
reg_targets
=
reg_targets
[
range
(
xs
.
shape
[
0
]),
points2min_area_ind
]
ctn_targets
=
np
.
sqrt
((
reg_targets
[:,
[
0
,
2
]].
min
(
axis
=
1
)
/
\
reg_targets
[:,
[
0
,
2
]].
max
(
axis
=
1
))
*
\
(
reg_targets
[:,
[
1
,
3
]].
min
(
axis
=
1
)
/
\
reg_targets
[:,
[
1
,
3
]].
max
(
axis
=
1
))).
astype
(
np
.
float32
)
ctn_targets
=
np
.
reshape
(
ctn_targets
,
newshape
=
[
ctn_targets
.
shape
[
0
],
1
])
ctn_targets
[
labels
<=
0
]
=
0
pos_ind
=
np
.
nonzero
(
labels
!=
0
)
reg_targets_pos
=
reg_targets
[
pos_ind
[
0
],
:]
split_sections
=
[]
beg
=
0
for
lvl
in
range
(
len
(
num_points_each_level
)):
end
=
beg
+
num_points_each_level
[
lvl
]
split_sections
.
append
(
end
)
beg
=
end
labels_by_level
=
np
.
split
(
labels
,
split_sections
,
axis
=
0
)
reg_targets_by_level
=
np
.
split
(
reg_targets
,
split_sections
,
axis
=
0
)
ctn_targets_by_level
=
np
.
split
(
ctn_targets
,
split_sections
,
axis
=
0
)
for
lvl
in
range
(
len
(
self
.
downsample_ratios
)):
grid_w
=
int
(
np
.
ceil
(
w
/
self
.
downsample_ratios
[
lvl
]))
grid_h
=
int
(
np
.
ceil
(
h
/
self
.
downsample_ratios
[
lvl
]))
if
self
.
norm_reg_targets
:
sample
[
'reg_target{}'
.
format
(
lvl
)]
=
\
np
.
reshape
(
reg_targets_by_level
[
lvl
]
/
\
self
.
downsample_ratios
[
lvl
],
newshape
=
[
grid_h
,
grid_w
,
4
])
else
:
sample
[
'reg_target{}'
.
format
(
lvl
)]
=
np
.
reshape
(
reg_targets_by_level
[
lvl
],
newshape
=
[
grid_h
,
grid_w
,
4
])
sample
[
'labels{}'
.
format
(
lvl
)]
=
np
.
reshape
(
labels_by_level
[
lvl
],
newshape
=
[
grid_h
,
grid_w
,
1
])
sample
[
'centerness{}'
.
format
(
lvl
)]
=
np
.
reshape
(
ctn_targets_by_level
[
lvl
],
newshape
=
[
grid_h
,
grid_w
,
1
])
sample
.
pop
(
'is_crowd'
)
sample
.
pop
(
'gt_class'
)
sample
.
pop
(
'gt_bbox'
)
return
samples
@
register_op
class
Gt2TTFTargetOp
(
BaseOperator
):
__shared__
=
[
'num_classes'
]
"""
Gt2TTFTarget
Generate TTFNet targets by ground truth data
Args:
num_classes(int): the number of classes.
down_ratio(int): the down ratio from images to heatmap, 4 by default.
alpha(float): the alpha parameter to generate gaussian target.
0.54 by default.
"""
def
__init__
(
self
,
num_classes
=
80
,
down_ratio
=
4
,
alpha
=
0.54
):
super
(
Gt2TTFTargetOp
,
self
).
__init__
()
self
.
down_ratio
=
down_ratio
self
.
num_classes
=
num_classes
self
.
alpha
=
alpha
def
__call__
(
self
,
samples
,
context
=
None
):
output_size
=
samples
[
0
][
'image'
].
shape
[
1
]
feat_size
=
output_size
//
self
.
down_ratio
for
sample
in
samples
:
heatmap
=
np
.
zeros
(
(
self
.
num_classes
,
feat_size
,
feat_size
),
dtype
=
'float32'
)
box_target
=
np
.
ones
(
(
4
,
feat_size
,
feat_size
),
dtype
=
'float32'
)
*
-
1
reg_weight
=
np
.
zeros
((
1
,
feat_size
,
feat_size
),
dtype
=
'float32'
)
gt_bbox
=
sample
[
'gt_bbox'
]
gt_class
=
sample
[
'gt_class'
]
bbox_w
=
gt_bbox
[:,
2
]
-
gt_bbox
[:,
0
]
+
1
bbox_h
=
gt_bbox
[:,
3
]
-
gt_bbox
[:,
1
]
+
1
area
=
bbox_w
*
bbox_h
boxes_areas_log
=
np
.
log
(
area
)
boxes_ind
=
np
.
argsort
(
boxes_areas_log
,
axis
=
0
)[::
-
1
]
boxes_area_topk_log
=
boxes_areas_log
[
boxes_ind
]
gt_bbox
=
gt_bbox
[
boxes_ind
]
gt_class
=
gt_class
[
boxes_ind
]
feat_gt_bbox
=
gt_bbox
/
self
.
down_ratio
feat_gt_bbox
=
np
.
clip
(
feat_gt_bbox
,
0
,
feat_size
-
1
)
feat_hs
,
feat_ws
=
(
feat_gt_bbox
[:,
3
]
-
feat_gt_bbox
[:,
1
],
feat_gt_bbox
[:,
2
]
-
feat_gt_bbox
[:,
0
])
ct_inds
=
np
.
stack
(
[(
gt_bbox
[:,
0
]
+
gt_bbox
[:,
2
])
/
2
,
(
gt_bbox
[:,
1
]
+
gt_bbox
[:,
3
])
/
2
],
axis
=
1
)
/
self
.
down_ratio
h_radiuses_alpha
=
(
feat_hs
/
2.
*
self
.
alpha
).
astype
(
'int32'
)
w_radiuses_alpha
=
(
feat_ws
/
2.
*
self
.
alpha
).
astype
(
'int32'
)
for
k
in
range
(
len
(
gt_bbox
)):
cls_id
=
gt_class
[
k
]
fake_heatmap
=
np
.
zeros
((
feat_size
,
feat_size
),
dtype
=
'float32'
)
self
.
draw_truncate_gaussian
(
fake_heatmap
,
ct_inds
[
k
],
h_radiuses_alpha
[
k
],
w_radiuses_alpha
[
k
])
heatmap
[
cls_id
]
=
np
.
maximum
(
heatmap
[
cls_id
],
fake_heatmap
)
box_target_inds
=
fake_heatmap
>
0
box_target
[:,
box_target_inds
]
=
gt_bbox
[
k
][:,
None
]
local_heatmap
=
fake_heatmap
[
box_target_inds
]
ct_div
=
np
.
sum
(
local_heatmap
)
local_heatmap
*=
boxes_area_topk_log
[
k
]
reg_weight
[
0
,
box_target_inds
]
=
local_heatmap
/
ct_div
sample
[
'ttf_heatmap'
]
=
heatmap
sample
[
'ttf_box_target'
]
=
box_target
sample
[
'ttf_reg_weight'
]
=
reg_weight
return
samples
def
draw_truncate_gaussian
(
self
,
heatmap
,
center
,
h_radius
,
w_radius
):
h
,
w
=
2
*
h_radius
+
1
,
2
*
w_radius
+
1
sigma_x
=
w
/
6
sigma_y
=
h
/
6
gaussian
=
gaussian2D
((
h
,
w
),
sigma_x
,
sigma_y
)
x
,
y
=
int
(
center
[
0
]),
int
(
center
[
1
])
height
,
width
=
heatmap
.
shape
[
0
:
2
]
left
,
right
=
min
(
x
,
w_radius
),
min
(
width
-
x
,
w_radius
+
1
)
top
,
bottom
=
min
(
y
,
h_radius
),
min
(
height
-
y
,
h_radius
+
1
)
masked_heatmap
=
heatmap
[
y
-
top
:
y
+
bottom
,
x
-
left
:
x
+
right
]
masked_gaussian
=
gaussian
[
h_radius
-
top
:
h_radius
+
bottom
,
w_radius
-
left
:
w_radius
+
right
]
if
min
(
masked_gaussian
.
shape
)
>
0
and
min
(
masked_heatmap
.
shape
)
>
0
:
heatmap
[
y
-
top
:
y
+
bottom
,
x
-
left
:
x
+
right
]
=
np
.
maximum
(
masked_heatmap
,
masked_gaussian
)
return
heatmap
@
register_op
class
Gt2Solov2TargetOp
(
BaseOperator
):
"""Assign mask target and labels in SOLOv2 network.
Args:
num_grids (list): The list of feature map grids size.
scale_ranges (list): The list of mask boundary range.
coord_sigma (float): The coefficient of coordinate area length.
sampling_ratio (float): The ratio of down sampling.
"""
def
__init__
(
self
,
num_grids
=
[
40
,
36
,
24
,
16
,
12
],
scale_ranges
=
[[
1
,
96
],
[
48
,
192
],
[
96
,
384
],
[
192
,
768
],
[
384
,
2048
]],
coord_sigma
=
0.2
,
sampling_ratio
=
4.0
):
super
(
Gt2Solov2TargetOp
,
self
).
__init__
()
self
.
num_grids
=
num_grids
self
.
scale_ranges
=
scale_ranges
self
.
coord_sigma
=
coord_sigma
self
.
sampling_ratio
=
sampling_ratio
def
_scale_size
(
self
,
im
,
scale
):
h
,
w
=
im
.
shape
[:
2
]
new_size
=
(
int
(
w
*
float
(
scale
)
+
0.5
),
int
(
h
*
float
(
scale
)
+
0.5
))
resized_img
=
cv2
.
resize
(
im
,
None
,
None
,
fx
=
scale
,
fy
=
scale
,
interpolation
=
cv2
.
INTER_LINEAR
)
return
resized_img
def
__call__
(
self
,
samples
,
context
=
None
):
sample_id
=
0
max_ins_num
=
[
0
]
*
len
(
self
.
num_grids
)
for
sample
in
samples
:
gt_bboxes_raw
=
sample
[
'gt_bbox'
]
gt_labels_raw
=
sample
[
'gt_class'
]
+
1
im_c
,
im_h
,
im_w
=
sample
[
'image'
].
shape
[:]
gt_masks_raw
=
sample
[
'gt_segm'
].
astype
(
np
.
uint8
)
mask_feat_size
=
[
int
(
im_h
/
self
.
sampling_ratio
),
int
(
im_w
/
self
.
sampling_ratio
)
]
gt_areas
=
np
.
sqrt
((
gt_bboxes_raw
[:,
2
]
-
gt_bboxes_raw
[:,
0
])
*
(
gt_bboxes_raw
[:,
3
]
-
gt_bboxes_raw
[:,
1
]))
ins_ind_label_list
=
[]
idx
=
0
for
(
lower_bound
,
upper_bound
),
num_grid
\
in
zip
(
self
.
scale_ranges
,
self
.
num_grids
):
hit_indices
=
((
gt_areas
>=
lower_bound
)
&
(
gt_areas
<=
upper_bound
)).
nonzero
()[
0
]
num_ins
=
len
(
hit_indices
)
ins_label
=
[]
grid_order
=
[]
cate_label
=
np
.
zeros
([
num_grid
,
num_grid
],
dtype
=
np
.
int64
)
ins_ind_label
=
np
.
zeros
([
num_grid
**
2
],
dtype
=
np
.
bool
)
if
num_ins
==
0
:
ins_label
=
np
.
zeros
(
[
1
,
mask_feat_size
[
0
],
mask_feat_size
[
1
]],
dtype
=
np
.
uint8
)
ins_ind_label_list
.
append
(
ins_ind_label
)
sample
[
'cate_label{}'
.
format
(
idx
)]
=
cate_label
.
flatten
()
sample
[
'ins_label{}'
.
format
(
idx
)]
=
ins_label
sample
[
'grid_order{}'
.
format
(
idx
)]
=
np
.
asarray
(
[
sample_id
*
num_grid
*
num_grid
+
0
],
dtype
=
np
.
int32
)
idx
+=
1
continue
gt_bboxes
=
gt_bboxes_raw
[
hit_indices
]
gt_labels
=
gt_labels_raw
[
hit_indices
]
gt_masks
=
gt_masks_raw
[
hit_indices
,
...]
half_ws
=
0.5
*
(
gt_bboxes
[:,
2
]
-
gt_bboxes
[:,
0
])
*
self
.
coord_sigma
half_hs
=
0.5
*
(
gt_bboxes
[:,
3
]
-
gt_bboxes
[:,
1
])
*
self
.
coord_sigma
for
seg_mask
,
gt_label
,
half_h
,
half_w
in
zip
(
gt_masks
,
gt_labels
,
half_hs
,
half_ws
):
if
seg_mask
.
sum
()
==
0
:
continue
# mass center
upsampled_size
=
(
mask_feat_size
[
0
]
*
4
,
mask_feat_size
[
1
]
*
4
)
center_h
,
center_w
=
ndimage
.
measurements
.
center_of_mass
(
seg_mask
)
coord_w
=
int
(
(
center_w
/
upsampled_size
[
1
])
//
(
1.
/
num_grid
))
coord_h
=
int
(
(
center_h
/
upsampled_size
[
0
])
//
(
1.
/
num_grid
))
# left, top, right, down
top_box
=
max
(
0
,
int
(((
center_h
-
half_h
)
/
upsampled_size
[
0
])
//
(
1.
/
num_grid
)))
down_box
=
min
(
num_grid
-
1
,
int
(((
center_h
+
half_h
)
/
upsampled_size
[
0
])
//
(
1.
/
num_grid
)))
left_box
=
max
(
0
,
int
(((
center_w
-
half_w
)
/
upsampled_size
[
1
])
//
(
1.
/
num_grid
)))
right_box
=
min
(
num_grid
-
1
,
int
(((
center_w
+
half_w
)
/
upsampled_size
[
1
])
//
(
1.
/
num_grid
)))
top
=
max
(
top_box
,
coord_h
-
1
)
down
=
min
(
down_box
,
coord_h
+
1
)
left
=
max
(
coord_w
-
1
,
left_box
)
right
=
min
(
right_box
,
coord_w
+
1
)
cate_label
[
top
:(
down
+
1
),
left
:(
right
+
1
)]
=
gt_label
seg_mask
=
self
.
_scale_size
(
seg_mask
,
scale
=
1.
/
self
.
sampling_ratio
)
for
i
in
range
(
top
,
down
+
1
):
for
j
in
range
(
left
,
right
+
1
):
label
=
int
(
i
*
num_grid
+
j
)
cur_ins_label
=
np
.
zeros
(
[
mask_feat_size
[
0
],
mask_feat_size
[
1
]],
dtype
=
np
.
uint8
)
cur_ins_label
[:
seg_mask
.
shape
[
0
],
:
seg_mask
.
shape
[
1
]]
=
seg_mask
ins_label
.
append
(
cur_ins_label
)
ins_ind_label
[
label
]
=
True
grid_order
.
append
(
sample_id
*
num_grid
*
num_grid
+
label
)
if
ins_label
==
[]:
ins_label
=
np
.
zeros
(
[
1
,
mask_feat_size
[
0
],
mask_feat_size
[
1
]],
dtype
=
np
.
uint8
)
ins_ind_label_list
.
append
(
ins_ind_label
)
sample
[
'cate_label{}'
.
format
(
idx
)]
=
cate_label
.
flatten
()
sample
[
'ins_label{}'
.
format
(
idx
)]
=
ins_label
sample
[
'grid_order{}'
.
format
(
idx
)]
=
np
.
asarray
(
[
sample_id
*
num_grid
*
num_grid
+
0
],
dtype
=
np
.
int32
)
else
:
ins_label
=
np
.
stack
(
ins_label
,
axis
=
0
)
ins_ind_label_list
.
append
(
ins_ind_label
)
sample
[
'cate_label{}'
.
format
(
idx
)]
=
cate_label
.
flatten
()
sample
[
'ins_label{}'
.
format
(
idx
)]
=
ins_label
sample
[
'grid_order{}'
.
format
(
idx
)]
=
np
.
asarray
(
grid_order
,
dtype
=
np
.
int32
)
assert
len
(
grid_order
)
>
0
max_ins_num
[
idx
]
=
max
(
max_ins_num
[
idx
],
sample
[
'ins_label{}'
.
format
(
idx
)].
shape
[
0
])
idx
+=
1
ins_ind_labels
=
np
.
concatenate
([
ins_ind_labels_level_img
for
ins_ind_labels_level_img
in
ins_ind_label_list
])
fg_num
=
np
.
sum
(
ins_ind_labels
)
sample
[
'fg_num'
]
=
fg_num
sample_id
+=
1
sample
.
pop
(
'is_crowd'
)
sample
.
pop
(
'gt_class'
)
sample
.
pop
(
'gt_bbox'
)
sample
.
pop
(
'gt_poly'
)
sample
.
pop
(
'gt_segm'
)
# padding batch
for
data
in
samples
:
for
idx
in
range
(
len
(
self
.
num_grids
)):
gt_ins_data
=
np
.
zeros
(
[
max_ins_num
[
idx
],
data
[
'ins_label{}'
.
format
(
idx
)].
shape
[
1
],
data
[
'ins_label{}'
.
format
(
idx
)].
shape
[
2
]
],
dtype
=
np
.
uint8
)
gt_ins_data
[
0
:
data
[
'ins_label{}'
.
format
(
idx
)].
shape
[
0
],
:,
:]
=
data
[
'ins_label{}'
.
format
(
idx
)]
gt_grid_order
=
np
.
zeros
([
max_ins_num
[
idx
]],
dtype
=
np
.
int32
)
gt_grid_order
[
0
:
data
[
'grid_order{}'
.
format
(
idx
)].
shape
[
0
]]
=
data
[
'grid_order{}'
.
format
(
idx
)]
data
[
'ins_label{}'
.
format
(
idx
)]
=
gt_ins_data
data
[
'grid_order{}'
.
format
(
idx
)]
=
gt_grid_order
return
samples
dygraph/ppdet/data/transform/batch_operators.py
浏览文件 @
e527466d
...
@@ -23,20 +23,20 @@ except Exception:
...
@@ -23,20 +23,20 @@ except Exception:
import
cv2
import
cv2
import
numpy
as
np
import
numpy
as
np
from
.operator
import
register_op
,
BaseOperator
from
.operator
s
import
register_op
,
BaseOperator
,
Resize
from
.op_helper
import
jaccard_overlap
,
gaussian2D
from
.op_helper
import
jaccard_overlap
,
gaussian2D
from
.operators
import
NormalizeImage
,
Permut
e
from
scipy
import
ndimag
e
from
ppdet.utils.logger
import
setup_logger
from
ppdet.utils.logger
import
setup_logger
logger
=
setup_logger
(
__name__
)
logger
=
setup_logger
(
__name__
)
__all__
=
[
__all__
=
[
'PadBatch'
,
'PadBatch'
,
'RandomShape'
,
'BatchRandomResize'
,
'PadMultiScaleTest'
,
'Gt2YoloTarget'
,
'Gt2YoloTarget'
,
'Gt2FCOSTarget'
,
'Gt2FCOSTarget'
,
'Gt2TTFTarget'
,
'Gt2TTFTarget'
,
'Gt2Solov2Target'
,
]
]
...
@@ -50,20 +50,17 @@ class PadBatch(BaseOperator):
...
@@ -50,20 +50,17 @@ class PadBatch(BaseOperator):
height and width is divisible by `pad_to_stride`.
height and width is divisible by `pad_to_stride`.
"""
"""
def
__init__
(
self
,
pad_to_stride
=
0
,
use_padded_im_info
=
True
,
pad_gt
=
False
):
def
__init__
(
self
,
pad_to_stride
=
0
,
pad_gt
=
False
):
super
(
PadBatch
,
self
).
__init__
()
super
(
PadBatch
,
self
).
__init__
()
self
.
pad_to_stride
=
pad_to_stride
self
.
pad_to_stride
=
pad_to_stride
self
.
use_padded_im_info
=
use_padded_im_info
self
.
pad_gt
=
pad_gt
self
.
pad_gt
=
pad_gt
def
__call__
(
self
,
samples
):
def
__call__
(
self
,
samples
,
context
=
None
):
"""
"""
Args:
Args:
samples (list): a batch of sample, each is dict.
samples (list): a batch of sample, each is dict.
"""
"""
coarsest_stride
=
self
.
pad_to_stride
coarsest_stride
=
self
.
pad_to_stride
#if coarsest_stride == 0:
# return samples
max_shape
=
np
.
array
([
data
[
'image'
].
shape
for
data
in
samples
]).
max
(
max_shape
=
np
.
array
([
data
[
'image'
].
shape
for
data
in
samples
]).
max
(
axis
=
0
)
axis
=
0
)
...
@@ -81,8 +78,20 @@ class PadBatch(BaseOperator):
...
@@ -81,8 +78,20 @@ class PadBatch(BaseOperator):
(
im_c
,
max_shape
[
1
],
max_shape
[
2
]),
dtype
=
np
.
float32
)
(
im_c
,
max_shape
[
1
],
max_shape
[
2
]),
dtype
=
np
.
float32
)
padding_im
[:,
:
im_h
,
:
im_w
]
=
im
padding_im
[:,
:
im_h
,
:
im_w
]
=
im
data
[
'image'
]
=
padding_im
data
[
'image'
]
=
padding_im
if
self
.
use_padded_im_info
:
if
'semantic'
in
data
and
data
[
'semantic'
]
is
not
None
:
data
[
'im_info'
][:
2
]
=
max_shape
[
1
:
3
]
semantic
=
data
[
'semantic'
]
padding_sem
=
np
.
zeros
(
(
1
,
max_shape
[
1
],
max_shape
[
2
]),
dtype
=
np
.
float32
)
padding_sem
[:,
:
im_h
,
:
im_w
]
=
semantic
data
[
'semantic'
]
=
padding_sem
if
'gt_segm'
in
data
and
data
[
'gt_segm'
]
is
not
None
:
gt_segm
=
data
[
'gt_segm'
]
padding_segm
=
np
.
zeros
(
(
gt_segm
.
shape
[
0
],
max_shape
[
1
],
max_shape
[
2
]),
dtype
=
np
.
uint8
)
padding_segm
[:,
:
im_h
,
:
im_w
]
=
gt_segm
data
[
'gt_segm'
]
=
padding_segm
if
self
.
pad_gt
:
if
self
.
pad_gt
:
gt_num
=
[]
gt_num
=
[]
if
'gt_poly'
in
data
and
data
[
'gt_poly'
]
is
not
None
and
len
(
data
[
if
'gt_poly'
in
data
and
data
[
'gt_poly'
]
is
not
None
and
len
(
data
[
...
@@ -106,9 +115,10 @@ class PadBatch(BaseOperator):
...
@@ -106,9 +115,10 @@ class PadBatch(BaseOperator):
gt_num_max
=
max
(
gt_num
)
gt_num_max
=
max
(
gt_num
)
for
i
,
data
in
enumerate
(
samples
):
for
i
,
data
in
enumerate
(
samples
):
gt_box_data
=
np
.
zero
s
([
gt_num_max
,
4
],
dtype
=
np
.
float32
)
gt_box_data
=
-
np
.
one
s
([
gt_num_max
,
4
],
dtype
=
np
.
float32
)
gt_class_data
=
np
.
zero
s
([
gt_num_max
],
dtype
=
np
.
int32
)
gt_class_data
=
-
np
.
one
s
([
gt_num_max
],
dtype
=
np
.
int32
)
is_crowd_data
=
np
.
ones
([
gt_num_max
],
dtype
=
np
.
int32
)
is_crowd_data
=
np
.
ones
([
gt_num_max
],
dtype
=
np
.
int32
)
if
pad_mask
:
if
pad_mask
:
poly_num_max
=
max
(
poly_num
)
poly_num_max
=
max
(
poly_num
)
poly_part_num_max
=
max
(
poly_part_num
)
poly_part_num_max
=
max
(
poly_part_num
)
...
@@ -135,99 +145,56 @@ class PadBatch(BaseOperator):
...
@@ -135,99 +145,56 @@ class PadBatch(BaseOperator):
@
register_op
@
register_op
class
RandomShap
e
(
BaseOperator
):
class
BatchRandomResiz
e
(
BaseOperator
):
"""
"""
Randomly reshape a batch. If random_inter is True, also randomly
Resize image to target size randomly. random target_size and interpolation method
select one an interpolation algorithm [cv2.INTER_NEAREST, cv2.INTER_LINEAR,
cv2.INTER_AREA, cv2.INTER_CUBIC, cv2.INTER_LANCZOS4]. If random_inter is
False, use cv2.INTER_NEAREST.
Args:
Args:
sizes (list): list of int, random choose a size from these
target_size (int, list, tuple): image target size, if random size is True, must be list or tuple
random_inter (bool): whether to randomly interpolation, defalut true.
keep_ratio (bool): whether keep_raio or not, default true
interp (int): the interpolation method
random_size (bool): whether random select target size of image
random_interp (bool): whether random select interpolation method
"""
"""
def
__init__
(
self
,
sizes
=
[],
random_inter
=
False
,
resize_box
=
False
):
def
__init__
(
self
,
super
(
RandomShape
,
self
).
__init__
()
target_size
,
self
.
sizes
=
sizes
keep_ratio
,
self
.
random_inter
=
random_inter
interp
=
cv2
.
INTER_NEAREST
,
random_size
=
True
,
random_interp
=
False
):
super
(
BatchRandomResize
,
self
).
__init__
()
self
.
keep_ratio
=
keep_ratio
self
.
interps
=
[
self
.
interps
=
[
cv2
.
INTER_NEAREST
,
cv2
.
INTER_NEAREST
,
cv2
.
INTER_LINEAR
,
cv2
.
INTER_LINEAR
,
cv2
.
INTER_AREA
,
cv2
.
INTER_AREA
,
cv2
.
INTER_CUBIC
,
cv2
.
INTER_CUBIC
,
cv2
.
INTER_LANCZOS4
,
cv2
.
INTER_LANCZOS4
,
]
if
random_inter
else
[]
]
self
.
resize_box
=
resize_box
self
.
interp
=
interp
assert
isinstance
(
target_size
,
(
def
__call__
(
self
,
samples
):
int
,
Sequence
)),
"target_size must be int, list or tuple"
shape
=
np
.
random
.
choice
(
self
.
sizes
)
if
random_size
and
not
isinstance
(
target_size
,
list
):
method
=
np
.
random
.
choice
(
self
.
interps
)
if
self
.
random_inter
\
raise
TypeError
(
else
cv2
.
INTER_NEAREST
"Type of target_size is invalid when random_size is True. Must be List, now is {}"
.
for
i
in
range
(
len
(
samples
)):
format
(
type
(
target_size
)))
im
=
samples
[
i
][
'image'
]
self
.
target_size
=
target_size
h
,
w
=
im
.
shape
[:
2
]
self
.
random_size
=
random_size
scale_x
=
float
(
shape
)
/
w
self
.
random_interp
=
random_interp
scale_y
=
float
(
shape
)
/
h
im
=
cv2
.
resize
(
def
__call__
(
self
,
samples
,
context
=
None
):
im
,
None
,
None
,
fx
=
scale_x
,
fy
=
scale_y
,
interpolation
=
method
)
if
self
.
random_size
:
samples
[
i
][
'image'
]
=
im
target_size
=
np
.
random
.
choice
(
self
.
target_size
)
if
self
.
resize_box
and
'gt_bbox'
in
samples
[
i
]
and
len
(
samples
[
0
][
else
:
'gt_bbox'
])
>
0
:
target_size
=
self
.
target_size
scale_array
=
np
.
array
([
scale_x
,
scale_y
]
*
2
,
dtype
=
np
.
float32
)
samples
[
i
][
'gt_bbox'
]
=
np
.
clip
(
samples
[
i
][
'gt_bbox'
]
*
scale_array
,
0
,
float
(
shape
)
-
1
)
return
samples
@
register_op
class
PadMultiScaleTest
(
BaseOperator
):
"""
Pad the image so they can be divisible by a stride for multi-scale testing.
Args:
pad_to_stride (int): If `pad_to_stride > 0`, pad zeros to ensure
height and width is divisible by `pad_to_stride`.
"""
def
__init__
(
self
,
pad_to_stride
=
0
):
super
(
PadMultiScaleTest
,
self
).
__init__
()
self
.
pad_to_stride
=
pad_to_stride
def
__call__
(
self
,
samples
):
coarsest_stride
=
self
.
pad_to_stride
if
coarsest_stride
==
0
:
return
samples
batch_input
=
True
if
self
.
random_interp
:
if
not
isinstance
(
samples
,
Sequence
):
interp
=
np
.
random
.
choice
(
self
.
interps
)
batch_input
=
False
else
:
samples
=
[
samples
]
interp
=
self
.
interp
if
len
(
samples
)
!=
1
:
raise
ValueError
(
"Batch size must be 1 when using multiscale test, "
"but now batch size is {}"
.
format
(
len
(
samples
)))
for
i
in
range
(
len
(
samples
)):
sample
=
samples
[
i
]
for
k
in
sample
.
keys
():
# hard code
if
k
.
startswith
(
'image'
):
im
=
sample
[
k
]
im_c
,
im_h
,
im_w
=
im
.
shape
max_h
=
int
(
np
.
ceil
(
im_h
/
coarsest_stride
)
*
coarsest_stride
)
max_w
=
int
(
np
.
ceil
(
im_w
/
coarsest_stride
)
*
coarsest_stride
)
padding_im
=
np
.
zeros
(
(
im_c
,
max_h
,
max_w
),
dtype
=
np
.
float32
)
padding_im
[:,
:
im_h
,
:
im_w
]
=
im
resizer
=
Resize
(
target_size
,
keep_ratio
=
self
.
keep_ratio
,
interp
=
interp
)
sample
[
k
]
=
padding_im
return
resizer
(
samples
,
context
=
context
)
info_name
=
'im_info'
if
k
==
'image'
else
'im_info_'
+
k
# update im_info
sample
[
info_name
][:
2
]
=
[
max_h
,
max_w
]
if
not
batch_input
:
samples
=
samples
[
0
]
return
samples
@
register_op
@
register_op
...
@@ -250,7 +217,7 @@ class Gt2YoloTarget(BaseOperator):
...
@@ -250,7 +217,7 @@ class Gt2YoloTarget(BaseOperator):
self
.
num_classes
=
num_classes
self
.
num_classes
=
num_classes
self
.
iou_thresh
=
iou_thresh
self
.
iou_thresh
=
iou_thresh
def
__call__
(
self
,
samples
):
def
__call__
(
self
,
samples
,
context
=
None
):
assert
len
(
self
.
anchor_masks
)
==
len
(
self
.
downsample_ratios
),
\
assert
len
(
self
.
anchor_masks
)
==
len
(
self
.
downsample_ratios
),
\
"anchor_masks', and 'downsample_ratios' should have same length."
"anchor_masks', and 'downsample_ratios' should have same length."
...
@@ -261,6 +228,9 @@ class Gt2YoloTarget(BaseOperator):
...
@@ -261,6 +228,9 @@ class Gt2YoloTarget(BaseOperator):
im
=
sample
[
'image'
]
im
=
sample
[
'image'
]
gt_bbox
=
sample
[
'gt_bbox'
]
gt_bbox
=
sample
[
'gt_bbox'
]
gt_class
=
sample
[
'gt_class'
]
gt_class
=
sample
[
'gt_class'
]
if
'gt_score'
not
in
sample
:
sample
[
'gt_score'
]
=
np
.
ones
(
(
gt_bbox
.
shape
[
0
],
1
),
dtype
=
np
.
float32
)
gt_score
=
sample
[
'gt_score'
]
gt_score
=
sample
[
'gt_score'
]
for
i
,
(
for
i
,
(
mask
,
downsample_ratio
mask
,
downsample_ratio
...
@@ -336,6 +306,11 @@ class Gt2YoloTarget(BaseOperator):
...
@@ -336,6 +306,11 @@ class Gt2YoloTarget(BaseOperator):
# classification
# classification
target
[
idx
,
6
+
cls
,
gj
,
gi
]
=
1.
target
[
idx
,
6
+
cls
,
gj
,
gi
]
=
1.
sample
[
'target{}'
.
format
(
i
)]
=
target
sample
[
'target{}'
.
format
(
i
)]
=
target
# remove useless gt_class and gt_score after target calculated
sample
.
pop
(
'gt_class'
)
sample
.
pop
(
'gt_score'
)
return
samples
return
samples
...
@@ -434,22 +409,17 @@ class Gt2FCOSTarget(BaseOperator):
...
@@ -434,22 +409,17 @@ class Gt2FCOSTarget(BaseOperator):
inside_gt_box
=
np
.
min
(
clipped_box_reg_targets
,
axis
=
2
)
>
0
inside_gt_box
=
np
.
min
(
clipped_box_reg_targets
,
axis
=
2
)
>
0
return
inside_gt_box
return
inside_gt_box
def
__call__
(
self
,
samples
):
def
__call__
(
self
,
samples
,
context
=
None
):
assert
len
(
self
.
object_sizes_of_interest
)
==
len
(
self
.
downsample_ratios
),
\
assert
len
(
self
.
object_sizes_of_interest
)
==
len
(
self
.
downsample_ratios
),
\
"object_sizes_of_interest', and 'downsample_ratios' should have same length."
"object_sizes_of_interest', and 'downsample_ratios' should have same length."
for
sample
in
samples
:
for
sample
in
samples
:
# im, gt_bbox, gt_class = sample
# im, gt_bbox, gt_class
, gt_score
= sample
im
=
sample
[
'image'
]
im
=
sample
[
'image'
]
im_info
=
sample
[
'im_info'
]
bboxes
=
sample
[
'gt_bbox'
]
bboxes
=
sample
[
'gt_bbox'
]
gt_class
=
sample
[
'gt_class'
]
gt_class
=
sample
[
'gt_class'
]
bboxes
[:,
[
0
,
2
]]
=
bboxes
[:,
[
0
,
2
]]
*
np
.
floor
(
im_info
[
1
])
/
\
np
.
floor
(
im_info
[
1
]
/
im_info
[
2
])
bboxes
[:,
[
1
,
3
]]
=
bboxes
[:,
[
1
,
3
]]
*
np
.
floor
(
im_info
[
0
])
/
\
np
.
floor
(
im_info
[
0
]
/
im_info
[
2
])
# calculate the locations
# calculate the locations
h
,
w
=
sample
[
'image'
]
.
shape
[
1
:
3
]
h
,
w
=
im
.
shape
[
1
:
3
]
points
,
num_points_each_level
=
self
.
_compute_points
(
w
,
h
)
points
,
num_points_each_level
=
self
.
_compute_points
(
w
,
h
)
object_scale_exp
=
[]
object_scale_exp
=
[]
for
i
,
num_pts
in
enumerate
(
num_points_each_level
):
for
i
,
num_pts
in
enumerate
(
num_points_each_level
):
...
@@ -544,6 +514,7 @@ class Gt2FCOSTarget(BaseOperator):
...
@@ -544,6 +514,7 @@ class Gt2FCOSTarget(BaseOperator):
@
register_op
@
register_op
class
Gt2TTFTarget
(
BaseOperator
):
class
Gt2TTFTarget
(
BaseOperator
):
__shared__
=
[
'num_classes'
]
"""
"""
Gt2TTFTarget
Gt2TTFTarget
Generate TTFNet targets by ground truth data
Generate TTFNet targets by ground truth data
...
@@ -555,13 +526,13 @@ class Gt2TTFTarget(BaseOperator):
...
@@ -555,13 +526,13 @@ class Gt2TTFTarget(BaseOperator):
0.54 by default.
0.54 by default.
"""
"""
def
__init__
(
self
,
num_classes
,
down_ratio
=
4
,
alpha
=
0.54
):
def
__init__
(
self
,
num_classes
=
80
,
down_ratio
=
4
,
alpha
=
0.54
):
super
(
Gt2TTFTarget
,
self
).
__init__
()
super
(
Gt2TTFTarget
,
self
).
__init__
()
self
.
down_ratio
=
down_ratio
self
.
down_ratio
=
down_ratio
self
.
num_classes
=
num_classes
self
.
num_classes
=
num_classes
self
.
alpha
=
alpha
self
.
alpha
=
alpha
def
__call__
(
self
,
samples
):
def
__call__
(
self
,
samples
,
context
=
None
):
output_size
=
samples
[
0
][
'image'
].
shape
[
1
]
output_size
=
samples
[
0
][
'image'
].
shape
[
1
]
feat_size
=
output_size
//
self
.
down_ratio
feat_size
=
output_size
//
self
.
down_ratio
for
sample
in
samples
:
for
sample
in
samples
:
...
@@ -636,3 +607,183 @@ class Gt2TTFTarget(BaseOperator):
...
@@ -636,3 +607,183 @@ class Gt2TTFTarget(BaseOperator):
heatmap
[
y
-
top
:
y
+
bottom
,
x
-
left
:
x
+
right
]
=
np
.
maximum
(
heatmap
[
y
-
top
:
y
+
bottom
,
x
-
left
:
x
+
right
]
=
np
.
maximum
(
masked_heatmap
,
masked_gaussian
)
masked_heatmap
,
masked_gaussian
)
return
heatmap
return
heatmap
@
register_op
class
Gt2Solov2Target
(
BaseOperator
):
"""Assign mask target and labels in SOLOv2 network.
Args:
num_grids (list): The list of feature map grids size.
scale_ranges (list): The list of mask boundary range.
coord_sigma (float): The coefficient of coordinate area length.
sampling_ratio (float): The ratio of down sampling.
"""
def
__init__
(
self
,
num_grids
=
[
40
,
36
,
24
,
16
,
12
],
scale_ranges
=
[[
1
,
96
],
[
48
,
192
],
[
96
,
384
],
[
192
,
768
],
[
384
,
2048
]],
coord_sigma
=
0.2
,
sampling_ratio
=
4.0
):
super
(
Gt2Solov2Target
,
self
).
__init__
()
self
.
num_grids
=
num_grids
self
.
scale_ranges
=
scale_ranges
self
.
coord_sigma
=
coord_sigma
self
.
sampling_ratio
=
sampling_ratio
def
_scale_size
(
self
,
im
,
scale
):
h
,
w
=
im
.
shape
[:
2
]
new_size
=
(
int
(
w
*
float
(
scale
)
+
0.5
),
int
(
h
*
float
(
scale
)
+
0.5
))
resized_img
=
cv2
.
resize
(
im
,
None
,
None
,
fx
=
scale
,
fy
=
scale
,
interpolation
=
cv2
.
INTER_LINEAR
)
return
resized_img
def
__call__
(
self
,
samples
,
context
=
None
):
sample_id
=
0
max_ins_num
=
[
0
]
*
len
(
self
.
num_grids
)
for
sample
in
samples
:
gt_bboxes_raw
=
sample
[
'gt_bbox'
]
gt_labels_raw
=
sample
[
'gt_class'
]
+
1
im_c
,
im_h
,
im_w
=
sample
[
'image'
].
shape
[:]
gt_masks_raw
=
sample
[
'gt_segm'
].
astype
(
np
.
uint8
)
mask_feat_size
=
[
int
(
im_h
/
self
.
sampling_ratio
),
int
(
im_w
/
self
.
sampling_ratio
)
]
gt_areas
=
np
.
sqrt
((
gt_bboxes_raw
[:,
2
]
-
gt_bboxes_raw
[:,
0
])
*
(
gt_bboxes_raw
[:,
3
]
-
gt_bboxes_raw
[:,
1
]))
ins_ind_label_list
=
[]
idx
=
0
for
(
lower_bound
,
upper_bound
),
num_grid
\
in
zip
(
self
.
scale_ranges
,
self
.
num_grids
):
hit_indices
=
((
gt_areas
>=
lower_bound
)
&
(
gt_areas
<=
upper_bound
)).
nonzero
()[
0
]
num_ins
=
len
(
hit_indices
)
ins_label
=
[]
grid_order
=
[]
cate_label
=
np
.
zeros
([
num_grid
,
num_grid
],
dtype
=
np
.
int64
)
ins_ind_label
=
np
.
zeros
([
num_grid
**
2
],
dtype
=
np
.
bool
)
if
num_ins
==
0
:
ins_label
=
np
.
zeros
(
[
1
,
mask_feat_size
[
0
],
mask_feat_size
[
1
]],
dtype
=
np
.
uint8
)
ins_ind_label_list
.
append
(
ins_ind_label
)
sample
[
'cate_label{}'
.
format
(
idx
)]
=
cate_label
.
flatten
()
sample
[
'ins_label{}'
.
format
(
idx
)]
=
ins_label
sample
[
'grid_order{}'
.
format
(
idx
)]
=
np
.
asarray
(
[
sample_id
*
num_grid
*
num_grid
+
0
],
dtype
=
np
.
int32
)
idx
+=
1
continue
gt_bboxes
=
gt_bboxes_raw
[
hit_indices
]
gt_labels
=
gt_labels_raw
[
hit_indices
]
gt_masks
=
gt_masks_raw
[
hit_indices
,
...]
half_ws
=
0.5
*
(
gt_bboxes
[:,
2
]
-
gt_bboxes
[:,
0
])
*
self
.
coord_sigma
half_hs
=
0.5
*
(
gt_bboxes
[:,
3
]
-
gt_bboxes
[:,
1
])
*
self
.
coord_sigma
for
seg_mask
,
gt_label
,
half_h
,
half_w
in
zip
(
gt_masks
,
gt_labels
,
half_hs
,
half_ws
):
if
seg_mask
.
sum
()
==
0
:
continue
# mass center
upsampled_size
=
(
mask_feat_size
[
0
]
*
4
,
mask_feat_size
[
1
]
*
4
)
center_h
,
center_w
=
ndimage
.
measurements
.
center_of_mass
(
seg_mask
)
coord_w
=
int
(
(
center_w
/
upsampled_size
[
1
])
//
(
1.
/
num_grid
))
coord_h
=
int
(
(
center_h
/
upsampled_size
[
0
])
//
(
1.
/
num_grid
))
# left, top, right, down
top_box
=
max
(
0
,
int
(((
center_h
-
half_h
)
/
upsampled_size
[
0
])
//
(
1.
/
num_grid
)))
down_box
=
min
(
num_grid
-
1
,
int
(((
center_h
+
half_h
)
/
upsampled_size
[
0
])
//
(
1.
/
num_grid
)))
left_box
=
max
(
0
,
int
(((
center_w
-
half_w
)
/
upsampled_size
[
1
])
//
(
1.
/
num_grid
)))
right_box
=
min
(
num_grid
-
1
,
int
(((
center_w
+
half_w
)
/
upsampled_size
[
1
])
//
(
1.
/
num_grid
)))
top
=
max
(
top_box
,
coord_h
-
1
)
down
=
min
(
down_box
,
coord_h
+
1
)
left
=
max
(
coord_w
-
1
,
left_box
)
right
=
min
(
right_box
,
coord_w
+
1
)
cate_label
[
top
:(
down
+
1
),
left
:(
right
+
1
)]
=
gt_label
seg_mask
=
self
.
_scale_size
(
seg_mask
,
scale
=
1.
/
self
.
sampling_ratio
)
for
i
in
range
(
top
,
down
+
1
):
for
j
in
range
(
left
,
right
+
1
):
label
=
int
(
i
*
num_grid
+
j
)
cur_ins_label
=
np
.
zeros
(
[
mask_feat_size
[
0
],
mask_feat_size
[
1
]],
dtype
=
np
.
uint8
)
cur_ins_label
[:
seg_mask
.
shape
[
0
],
:
seg_mask
.
shape
[
1
]]
=
seg_mask
ins_label
.
append
(
cur_ins_label
)
ins_ind_label
[
label
]
=
True
grid_order
.
append
(
sample_id
*
num_grid
*
num_grid
+
label
)
if
ins_label
==
[]:
ins_label
=
np
.
zeros
(
[
1
,
mask_feat_size
[
0
],
mask_feat_size
[
1
]],
dtype
=
np
.
uint8
)
ins_ind_label_list
.
append
(
ins_ind_label
)
sample
[
'cate_label{}'
.
format
(
idx
)]
=
cate_label
.
flatten
()
sample
[
'ins_label{}'
.
format
(
idx
)]
=
ins_label
sample
[
'grid_order{}'
.
format
(
idx
)]
=
np
.
asarray
(
[
sample_id
*
num_grid
*
num_grid
+
0
],
dtype
=
np
.
int32
)
else
:
ins_label
=
np
.
stack
(
ins_label
,
axis
=
0
)
ins_ind_label_list
.
append
(
ins_ind_label
)
sample
[
'cate_label{}'
.
format
(
idx
)]
=
cate_label
.
flatten
()
sample
[
'ins_label{}'
.
format
(
idx
)]
=
ins_label
sample
[
'grid_order{}'
.
format
(
idx
)]
=
np
.
asarray
(
grid_order
,
dtype
=
np
.
int32
)
assert
len
(
grid_order
)
>
0
max_ins_num
[
idx
]
=
max
(
max_ins_num
[
idx
],
sample
[
'ins_label{}'
.
format
(
idx
)].
shape
[
0
])
idx
+=
1
ins_ind_labels
=
np
.
concatenate
([
ins_ind_labels_level_img
for
ins_ind_labels_level_img
in
ins_ind_label_list
])
fg_num
=
np
.
sum
(
ins_ind_labels
)
sample
[
'fg_num'
]
=
fg_num
sample_id
+=
1
sample
.
pop
(
'is_crowd'
)
sample
.
pop
(
'gt_class'
)
sample
.
pop
(
'gt_bbox'
)
sample
.
pop
(
'gt_poly'
)
sample
.
pop
(
'gt_segm'
)
# padding batch
for
data
in
samples
:
for
idx
in
range
(
len
(
self
.
num_grids
)):
gt_ins_data
=
np
.
zeros
(
[
max_ins_num
[
idx
],
data
[
'ins_label{}'
.
format
(
idx
)].
shape
[
1
],
data
[
'ins_label{}'
.
format
(
idx
)].
shape
[
2
]
],
dtype
=
np
.
uint8
)
gt_ins_data
[
0
:
data
[
'ins_label{}'
.
format
(
idx
)].
shape
[
0
],
:,
:]
=
data
[
'ins_label{}'
.
format
(
idx
)]
gt_grid_order
=
np
.
zeros
([
max_ins_num
[
idx
]],
dtype
=
np
.
int32
)
gt_grid_order
[
0
:
data
[
'grid_order{}'
.
format
(
idx
)].
shape
[
0
]]
=
data
[
'grid_order{}'
.
format
(
idx
)]
data
[
'ins_label{}'
.
format
(
idx
)]
=
gt_ins_data
data
[
'grid_order{}'
.
format
(
idx
)]
=
gt_grid_order
return
samples
dygraph/ppdet/data/transform/operator.py
已删除
100644 → 0
浏览文件 @
5b6bebf2
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# function:
# operators to process sample,
# eg: decode/resize/crop image
from
__future__
import
absolute_import
from
__future__
import
print_function
from
__future__
import
division
try
:
from
collections.abc
import
Sequence
except
Exception
:
from
collections
import
Sequence
from
numbers
import
Number
,
Integral
import
uuid
import
random
import
math
import
numpy
as
np
import
os
import
copy
import
cv2
from
PIL
import
Image
,
ImageEnhance
,
ImageDraw
from
ppdet.core.workspace
import
serializable
from
ppdet.modeling.layers
import
AnchorGrid
from
.op_helper
import
(
satisfy_sample_constraint
,
filter_and_process
,
generate_sample_bbox
,
clip_bbox
,
data_anchor_sampling
,
satisfy_sample_constraint_coverage
,
crop_image_sampling
,
generate_sample_bbox_square
,
bbox_area_sampling
,
is_poly
,
gaussian_radius
,
draw_gaussian
)
from
ppdet.utils.logger
import
setup_logger
logger
=
setup_logger
(
__name__
)
registered_ops
=
[]
def
register_op
(
cls
):
registered_ops
.
append
(
cls
.
__name__
)
if
not
hasattr
(
BaseOperator
,
cls
.
__name__
):
setattr
(
BaseOperator
,
cls
.
__name__
,
cls
)
else
:
raise
KeyError
(
"The {} class has been registered."
.
format
(
cls
.
__name__
))
return
serializable
(
cls
)
class
BboxError
(
ValueError
):
pass
class
ImageError
(
ValueError
):
pass
class
BaseOperator
(
object
):
def
__init__
(
self
,
name
=
None
):
if
name
is
None
:
name
=
self
.
__class__
.
__name__
self
.
_id
=
name
+
'_'
+
str
(
uuid
.
uuid4
())[
-
6
:]
def
apply
(
self
,
sample
,
context
=
None
):
""" Process a sample.
Args:
sample (dict): a dict of sample, eg: {'image':xx, 'label': xxx}
context (dict): info about this sample processing
Returns:
result (dict): a processed sample
"""
return
sample
def
__call__
(
self
,
sample
,
context
=
None
):
""" Process a sample.
Args:
sample (dict): a dict of sample, eg: {'image':xx, 'label': xxx}
context (dict): info about this sample processing
Returns:
result (dict): a processed sample
"""
if
isinstance
(
sample
,
Sequence
):
for
i
in
range
(
len
(
sample
)):
sample
[
i
]
=
self
.
apply
(
sample
[
i
],
context
)
else
:
sample
=
self
.
apply
(
sample
,
context
)
return
sample
def
__str__
(
self
):
return
str
(
self
.
_id
)
@
register_op
class
DecodeOp
(
BaseOperator
):
def
__init__
(
self
):
""" Transform the image data to numpy format following the rgb format
"""
super
(
DecodeOp
,
self
).
__init__
()
def
apply
(
self
,
sample
,
context
=
None
):
""" load image if 'im_file' field is not empty but 'image' is"""
if
'image'
not
in
sample
:
with
open
(
sample
[
'im_file'
],
'rb'
)
as
f
:
sample
[
'image'
]
=
f
.
read
()
sample
.
pop
(
'im_file'
)
im
=
sample
[
'image'
]
data
=
np
.
frombuffer
(
im
,
dtype
=
'uint8'
)
im
=
cv2
.
imdecode
(
data
,
1
)
# BGR mode, but need RGB mode
im
=
cv2
.
cvtColor
(
im
,
cv2
.
COLOR_BGR2RGB
)
sample
[
'image'
]
=
im
if
'h'
not
in
sample
:
sample
[
'h'
]
=
im
.
shape
[
0
]
elif
sample
[
'h'
]
!=
im
.
shape
[
0
]:
logger
.
warn
(
"The actual image height: {} is not equal to the "
"height: {} in annotation, and update sample['h'] by actual "
"image height."
.
format
(
im
.
shape
[
0
],
sample
[
'h'
]))
sample
[
'h'
]
=
im
.
shape
[
0
]
if
'w'
not
in
sample
:
sample
[
'w'
]
=
im
.
shape
[
1
]
elif
sample
[
'w'
]
!=
im
.
shape
[
1
]:
logger
.
warn
(
"The actual image width: {} is not equal to the "
"width: {} in annotation, and update sample['w'] by actual "
"image width."
.
format
(
im
.
shape
[
1
],
sample
[
'w'
]))
sample
[
'w'
]
=
im
.
shape
[
1
]
sample
[
'im_shape'
]
=
np
.
array
(
im
.
shape
[:
2
],
dtype
=
np
.
float32
)
sample
[
'scale_factor'
]
=
np
.
array
([
1.
,
1.
],
dtype
=
np
.
float32
)
return
sample
@
register_op
class
PermuteOp
(
BaseOperator
):
def
__init__
(
self
):
"""
Change the channel to be (C, H, W)
"""
super
(
PermuteOp
,
self
).
__init__
()
def
apply
(
self
,
sample
,
context
=
None
):
im
=
sample
[
'image'
]
im
=
im
.
transpose
((
2
,
0
,
1
))
sample
[
'image'
]
=
im
return
sample
@
register_op
class
LightingOp
(
BaseOperator
):
"""
Lighting the imagen by eigenvalues and eigenvectors
Args:
eigval (list): eigenvalues
eigvec (list): eigenvectors
alphastd (float): random weight of lighting, 0.1 by default
"""
def
__init__
(
self
,
eigval
,
eigvec
,
alphastd
=
0.1
):
super
(
LightingOp
,
self
).
__init__
()
self
.
alphastd
=
alphastd
self
.
eigval
=
np
.
array
(
eigval
).
astype
(
'float32'
)
self
.
eigvec
=
np
.
array
(
eigvec
).
astype
(
'float32'
)
def
apply
(
self
,
sample
,
context
=
None
):
alpha
=
np
.
random
.
normal
(
scale
=
self
.
alphastd
,
size
=
(
3
,
))
sample
[
'image'
]
+=
np
.
dot
(
self
.
eigvec
,
self
.
eigval
*
alpha
)
return
sample
@
register_op
class
RandomErasingImageOp
(
BaseOperator
):
def
__init__
(
self
,
prob
=
0.5
,
lower
=
0.02
,
higher
=
0.4
,
aspect_ratio
=
0.3
):
"""
Random Erasing Data Augmentation, see https://arxiv.org/abs/1708.04896
Args:
prob (float): probability to carry out random erasing
lower (float): lower limit of the erasing area ratio
heigher (float): upper limit of the erasing area ratio
aspect_ratio (float): aspect ratio of the erasing region
"""
super
(
RandomErasingImageOp
,
self
).
__init__
()
self
.
prob
=
prob
self
.
lower
=
lower
self
.
heigher
=
heigher
self
.
aspect_ratio
=
aspect_ratio
def
apply
(
self
,
sample
):
gt_bbox
=
sample
[
'gt_bbox'
]
im
=
sample
[
'image'
]
if
not
isinstance
(
im
,
np
.
ndarray
):
raise
TypeError
(
"{}: image is not a numpy array."
.
format
(
self
))
if
len
(
im
.
shape
)
!=
3
:
raise
ImageError
(
"{}: image is not 3-dimensional."
.
format
(
self
))
for
idx
in
range
(
gt_bbox
.
shape
[
0
]):
if
self
.
prob
<=
np
.
random
.
rand
():
continue
x1
,
y1
,
x2
,
y2
=
gt_bbox
[
idx
,
:]
w_bbox
=
x2
-
x1
+
1
h_bbox
=
y2
-
y1
+
1
area
=
w_bbox
*
h_bbox
target_area
=
random
.
uniform
(
self
.
lower
,
self
.
higher
)
*
area
aspect_ratio
=
random
.
uniform
(
self
.
aspect_ratio
,
1
/
self
.
aspect_ratio
)
h
=
int
(
round
(
math
.
sqrt
(
target_area
*
aspect_ratio
)))
w
=
int
(
round
(
math
.
sqrt
(
target_area
/
aspect_ratio
)))
if
w
<
w_bbox
and
h
<
h_bbox
:
off_y1
=
random
.
randint
(
0
,
int
(
h_bbox
-
h
))
off_x1
=
random
.
randint
(
0
,
int
(
w_bbox
-
w
))
im
[
int
(
y1
+
off_y1
):
int
(
y1
+
off_y1
+
h
),
int
(
x1
+
off_x1
):
int
(
x1
+
off_x1
+
w
),
:]
=
0
sample
[
'image'
]
=
im
return
sample
@
register_op
class
NormalizeImageOp
(
BaseOperator
):
def
__init__
(
self
,
mean
=
[
0.485
,
0.456
,
0.406
],
std
=
[
1
,
1
,
1
],
is_scale
=
True
):
"""
Args:
mean (list): the pixel mean
std (list): the pixel variance
"""
super
(
NormalizeImageOp
,
self
).
__init__
()
self
.
mean
=
mean
self
.
std
=
std
self
.
is_scale
=
is_scale
if
not
(
isinstance
(
self
.
mean
,
list
)
and
isinstance
(
self
.
std
,
list
)
and
isinstance
(
self
.
is_scale
,
bool
)):
raise
TypeError
(
"{}: input type is invalid."
.
format
(
self
))
from
functools
import
reduce
if
reduce
(
lambda
x
,
y
:
x
*
y
,
self
.
std
)
==
0
:
raise
ValueError
(
'{}: std is invalid!'
.
format
(
self
))
def
apply
(
self
,
sample
,
context
=
None
):
"""Normalize the image.
Operators:
1.(optional) Scale the image to [0,1]
2. Each pixel minus mean and is divided by std
"""
im
=
sample
[
'image'
]
im
=
im
.
astype
(
np
.
float32
,
copy
=
False
)
mean
=
np
.
array
(
self
.
mean
)[
np
.
newaxis
,
np
.
newaxis
,
:]
std
=
np
.
array
(
self
.
std
)[
np
.
newaxis
,
np
.
newaxis
,
:]
if
self
.
is_scale
:
im
=
im
/
255.0
im
-=
mean
im
/=
std
sample
[
'image'
]
=
im
return
sample
@
register_op
class
GridMask
(
BaseOperator
):
def
__init__
(
self
,
use_h
=
True
,
use_w
=
True
,
rotate
=
1
,
offset
=
False
,
ratio
=
0.5
,
mode
=
1
,
prob
=
0.7
,
upper_iter
=
360000
):
"""
GridMask Data Augmentation, see https://arxiv.org/abs/2001.04086
Args:
use_h (bool): whether to mask vertically
use_w (boo;): whether to mask horizontally
rotate (float): angle for the mask to rotate
offset (float): mask offset
ratio (float): mask ratio
mode (int): gridmask mode
prob (float): max probability to carry out gridmask
upper_iter (int): suggested to be equal to global max_iter
"""
super
(
GridMask
,
self
).
__init__
()
self
.
use_h
=
use_h
self
.
use_w
=
use_w
self
.
rotate
=
rotate
self
.
offset
=
offset
self
.
ratio
=
ratio
self
.
mode
=
mode
self
.
prob
=
prob
self
.
upper_iter
=
upper_iter
from
.gridmask_utils
import
GridMask
self
.
gridmask_op
=
GridMask
(
use_h
,
use_w
,
rotate
=
rotate
,
offset
=
offset
,
ratio
=
ratio
,
mode
=
mode
,
prob
=
prob
,
upper_iter
=
upper_iter
)
def
apply
(
self
,
sample
,
context
=
None
):
sample
[
'image'
]
=
self
.
gridmask_op
(
sample
[
'image'
],
sample
[
'curr_iter'
])
return
sample
@
register_op
class
RandomDistortOp
(
BaseOperator
):
"""Random color distortion.
Args:
hue (list): hue settings. in [lower, upper, probability] format.
saturation (list): saturation settings. in [lower, upper, probability] format.
contrast (list): contrast settings. in [lower, upper, probability] format.
brightness (list): brightness settings. in [lower, upper, probability] format.
random_apply (bool): whether to apply in random (yolo) or fixed (SSD)
order.
count (int): the number of doing distrot
random_channel (bool): whether to swap channels randomly
"""
def
__init__
(
self
,
hue
=
[
-
18
,
18
,
0.5
],
saturation
=
[
0.5
,
1.5
,
0.5
],
contrast
=
[
0.5
,
1.5
,
0.5
],
brightness
=
[
0.5
,
1.5
,
0.5
],
random_apply
=
True
,
count
=
4
,
random_channel
=
False
):
super
(
RandomDistortOp
,
self
).
__init__
()
self
.
hue
=
hue
self
.
saturation
=
saturation
self
.
contrast
=
contrast
self
.
brightness
=
brightness
self
.
random_apply
=
random_apply
self
.
count
=
count
self
.
random_channel
=
random_channel
def
apply_hue
(
self
,
img
):
low
,
high
,
prob
=
self
.
hue
if
np
.
random
.
uniform
(
0.
,
1.
)
<
prob
:
return
img
img
=
img
.
astype
(
np
.
float32
)
# it works, but result differ from HSV version
delta
=
np
.
random
.
uniform
(
low
,
high
)
u
=
np
.
cos
(
delta
*
np
.
pi
)
w
=
np
.
sin
(
delta
*
np
.
pi
)
bt
=
np
.
array
([[
1.0
,
0.0
,
0.0
],
[
0.0
,
u
,
-
w
],
[
0.0
,
w
,
u
]])
tyiq
=
np
.
array
([[
0.299
,
0.587
,
0.114
],
[
0.596
,
-
0.274
,
-
0.321
],
[
0.211
,
-
0.523
,
0.311
]])
ityiq
=
np
.
array
([[
1.0
,
0.956
,
0.621
],
[
1.0
,
-
0.272
,
-
0.647
],
[
1.0
,
-
1.107
,
1.705
]])
t
=
np
.
dot
(
np
.
dot
(
ityiq
,
bt
),
tyiq
).
T
img
=
np
.
dot
(
img
,
t
)
return
img
def
apply_saturation
(
self
,
img
):
low
,
high
,
prob
=
self
.
saturation
if
np
.
random
.
uniform
(
0.
,
1.
)
<
prob
:
return
img
delta
=
np
.
random
.
uniform
(
low
,
high
)
img
=
img
.
astype
(
np
.
float32
)
# it works, but result differ from HSV version
gray
=
img
*
np
.
array
([[[
0.299
,
0.587
,
0.114
]]],
dtype
=
np
.
float32
)
gray
=
gray
.
sum
(
axis
=
2
,
keepdims
=
True
)
gray
*=
(
1.0
-
delta
)
img
*=
delta
img
+=
gray
return
img
def
apply_contrast
(
self
,
img
):
low
,
high
,
prob
=
self
.
contrast
if
np
.
random
.
uniform
(
0.
,
1.
)
<
prob
:
return
img
delta
=
np
.
random
.
uniform
(
low
,
high
)
img
=
img
.
astype
(
np
.
float32
)
img
*=
delta
return
img
def
apply_brightness
(
self
,
img
):
low
,
high
,
prob
=
self
.
brightness
if
np
.
random
.
uniform
(
0.
,
1.
)
<
prob
:
return
img
delta
=
np
.
random
.
uniform
(
low
,
high
)
img
=
img
.
astype
(
np
.
float32
)
img
+=
delta
return
img
def
apply
(
self
,
sample
,
context
=
None
):
img
=
sample
[
'image'
]
if
self
.
random_apply
:
functions
=
[
self
.
apply_brightness
,
self
.
apply_contrast
,
self
.
apply_saturation
,
self
.
apply_hue
]
distortions
=
np
.
random
.
permutation
(
functions
)[:
self
.
count
]
for
func
in
distortions
:
img
=
func
(
img
)
sample
[
'image'
]
=
img
return
sample
img
=
self
.
apply_brightness
(
img
)
mode
=
np
.
random
.
randint
(
0
,
2
)
if
mode
:
img
=
self
.
apply_contrast
(
img
)
img
=
self
.
apply_saturation
(
img
)
img
=
self
.
apply_hue
(
img
)
if
not
mode
:
img
=
self
.
apply_contrast
(
img
)
if
self
.
random_channel
:
if
np
.
random
.
randint
(
0
,
2
):
img
=
img
[...,
np
.
random
.
permutation
(
3
)]
sample
[
'image'
]
=
img
return
sample
@
register_op
class
AutoAugmentOp
(
BaseOperator
):
def
__init__
(
self
,
autoaug_type
=
"v1"
):
"""
Args:
autoaug_type (str): autoaug type, support v0, v1, v2, v3, test
"""
super
(
AutoAugmentOp
,
self
).
__init__
()
self
.
autoaug_type
=
autoaug_type
def
apply
(
self
,
sample
,
context
=
None
):
"""
Learning Data Augmentation Strategies for Object Detection, see https://arxiv.org/abs/1906.11172
"""
im
=
sample
[
'image'
]
gt_bbox
=
sample
[
'gt_bbox'
]
if
not
isinstance
(
im
,
np
.
ndarray
):
raise
TypeError
(
"{}: image is not a numpy array."
.
format
(
self
))
if
len
(
im
.
shape
)
!=
3
:
raise
ImageError
(
"{}: image is not 3-dimensional."
.
format
(
self
))
if
len
(
gt_bbox
)
==
0
:
return
sample
height
,
width
,
_
=
im
.
shape
norm_gt_bbox
=
np
.
ones_like
(
gt_bbox
,
dtype
=
np
.
float32
)
norm_gt_bbox
[:,
0
]
=
gt_bbox
[:,
1
]
/
float
(
height
)
norm_gt_bbox
[:,
1
]
=
gt_bbox
[:,
0
]
/
float
(
width
)
norm_gt_bbox
[:,
2
]
=
gt_bbox
[:,
3
]
/
float
(
height
)
norm_gt_bbox
[:,
3
]
=
gt_bbox
[:,
2
]
/
float
(
width
)
from
.autoaugment_utils
import
distort_image_with_autoaugment
im
,
norm_gt_bbox
=
distort_image_with_autoaugment
(
im
,
norm_gt_bbox
,
self
.
autoaug_type
)
gt_bbox
[:,
0
]
=
norm_gt_bbox
[:,
1
]
*
float
(
width
)
gt_bbox
[:,
1
]
=
norm_gt_bbox
[:,
0
]
*
float
(
height
)
gt_bbox
[:,
2
]
=
norm_gt_bbox
[:,
3
]
*
float
(
width
)
gt_bbox
[:,
3
]
=
norm_gt_bbox
[:,
2
]
*
float
(
height
)
sample
[
'image'
]
=
im
sample
[
'gt_bbox'
]
=
gt_bbox
return
sample
@
register_op
class
RandomFlipOp
(
BaseOperator
):
def
__init__
(
self
,
prob
=
0.5
):
"""
Args:
prob (float): the probability of flipping image
"""
super
(
RandomFlipOp
,
self
).
__init__
()
self
.
prob
=
prob
if
not
(
isinstance
(
self
.
prob
,
float
)):
raise
TypeError
(
"{}: input type is invalid."
.
format
(
self
))
def
apply_segm
(
self
,
segms
,
height
,
width
):
def
_flip_poly
(
poly
,
width
):
flipped_poly
=
np
.
array
(
poly
)
flipped_poly
[
0
::
2
]
=
width
-
np
.
array
(
poly
[
0
::
2
])
return
flipped_poly
.
tolist
()
def
_flip_rle
(
rle
,
height
,
width
):
if
'counts'
in
rle
and
type
(
rle
[
'counts'
])
==
list
:
rle
=
mask_util
.
frPyObjects
(
rle
,
height
,
width
)
mask
=
mask_util
.
decode
(
rle
)
mask
=
mask
[:,
::
-
1
]
rle
=
mask_util
.
encode
(
np
.
array
(
mask
,
order
=
'F'
,
dtype
=
np
.
uint8
))
return
rle
flipped_segms
=
[]
for
segm
in
segms
:
if
is_poly
(
segm
):
# Polygon format
flipped_segms
.
append
([
_flip_poly
(
poly
,
width
)
for
poly
in
segm
])
else
:
# RLE format
import
pycocotools.mask
as
mask_util
flipped_segms
.
append
(
_flip_rle
(
segm
,
height
,
width
))
return
flipped_segms
def
apply_keypoint
(
self
,
gt_keypoint
,
width
):
for
i
in
range
(
gt_keypoint
.
shape
[
1
]):
if
i
%
2
==
0
:
old_x
=
gt_keypoint
[:,
i
].
copy
()
gt_keypoint
[:,
i
]
=
width
-
old_x
return
gt_keypoint
def
apply_image
(
self
,
image
):
return
image
[:,
::
-
1
,
:]
def
apply_bbox
(
self
,
bbox
,
width
):
oldx1
=
bbox
[:,
0
].
copy
()
oldx2
=
bbox
[:,
2
].
copy
()
bbox
[:,
0
]
=
width
-
oldx2
bbox
[:,
2
]
=
width
-
oldx1
return
bbox
def
apply
(
self
,
sample
,
context
=
None
):
"""Filp the image and bounding box.
Operators:
1. Flip the image numpy.
2. Transform the bboxes' x coordinates.
(Must judge whether the coordinates are normalized!)
3. Transform the segmentations' x coordinates.
(Must judge whether the coordinates are normalized!)
Output:
sample: the image, bounding box and segmentation part
in sample are flipped.
"""
if
np
.
random
.
uniform
(
0
,
1
)
<
self
.
prob
:
im
=
sample
[
'image'
]
height
,
width
=
im
.
shape
[:
2
]
im
=
self
.
apply_image
(
im
)
if
'gt_bbox'
in
sample
and
len
(
sample
[
'gt_bbox'
])
>
0
:
sample
[
'gt_bbox'
]
=
self
.
apply_bbox
(
sample
[
'gt_bbox'
],
width
)
if
'gt_poly'
in
sample
and
len
(
sample
[
'gt_poly'
])
>
0
:
sample
[
'gt_poly'
]
=
self
.
apply_segm
(
sample
[
'gt_poly'
],
height
,
width
)
if
'gt_keypoint'
in
sample
and
len
(
sample
[
'gt_keypoint'
])
>
0
:
sample
[
'gt_keypoint'
]
=
self
.
apply_keypoint
(
sample
[
'gt_keypoint'
],
width
)
if
'semantic'
in
sample
and
sample
[
'semantic'
]:
sample
[
'semantic'
]
=
sample
[
'semantic'
][:,
::
-
1
]
if
'gt_segm'
in
sample
and
sample
[
'gt_segm'
].
any
():
sample
[
'gt_segm'
]
=
sample
[
'gt_segm'
][:,
:,
::
-
1
]
sample
[
'flipped'
]
=
True
sample
[
'image'
]
=
im
return
sample
@
register_op
class
ResizeOp
(
BaseOperator
):
def
__init__
(
self
,
target_size
,
keep_ratio
,
interp
=
cv2
.
INTER_LINEAR
):
"""
Resize image to target size. if keep_ratio is True,
resize the image's long side to the maximum of target_size
if keep_ratio is False, resize the image to target size(h, w)
Args:
target_size (int|list): image target size
keep_ratio (bool): whether keep_ratio or not, default true
interp (int): the interpolation method
"""
super
(
ResizeOp
,
self
).
__init__
()
self
.
keep_ratio
=
keep_ratio
self
.
interp
=
interp
if
not
isinstance
(
target_size
,
(
Integral
,
Sequence
)):
raise
TypeError
(
"Type of target_size is invalid. Must be Integer or List or Tuple, now is {}"
.
format
(
type
(
target_size
)))
if
isinstance
(
target_size
,
Integral
):
target_size
=
[
target_size
,
target_size
]
self
.
target_size
=
target_size
def
apply_image
(
self
,
image
,
scale
):
im_scale_x
,
im_scale_y
=
scale
return
cv2
.
resize
(
image
,
None
,
None
,
fx
=
im_scale_x
,
fy
=
im_scale_y
,
interpolation
=
self
.
interp
)
def
apply_bbox
(
self
,
bbox
,
scale
,
size
):
im_scale_x
,
im_scale_y
=
scale
resize_w
,
resize_h
=
size
bbox
[:,
0
::
2
]
*=
im_scale_x
bbox
[:,
1
::
2
]
*=
im_scale_y
bbox
[:,
0
::
2
]
=
np
.
clip
(
bbox
[:,
0
::
2
],
0
,
resize_w
)
bbox
[:,
1
::
2
]
=
np
.
clip
(
bbox
[:,
1
::
2
],
0
,
resize_h
)
return
bbox
def
apply_segm
(
self
,
segms
,
im_size
,
scale
):
def
_resize_poly
(
poly
,
im_scale_x
,
im_scale_y
):
resized_poly
=
np
.
array
(
poly
)
resized_poly
[
0
::
2
]
*=
im_scale_x
resized_poly
[
1
::
2
]
*=
im_scale_y
return
resized_poly
.
tolist
()
def
_resize_rle
(
rle
,
im_h
,
im_w
,
im_scale_x
,
im_scale_y
):
if
'counts'
in
rle
and
type
(
rle
[
'counts'
])
==
list
:
rle
=
mask_util
.
frPyObjects
(
rle
,
im_h
,
im_w
)
mask
=
mask_util
.
decode
(
rle
)
mask
=
cv2
.
resize
(
image
,
None
,
None
,
fx
=
im_scale_x
,
fy
=
im_scale_y
,
interpolation
=
self
.
interp
)
rle
=
mask_util
.
encode
(
np
.
array
(
mask
,
order
=
'F'
,
dtype
=
np
.
uint8
))
return
rle
im_h
,
im_w
=
im_size
im_scale_x
,
im_scale_y
=
scale
resized_segms
=
[]
for
segm
in
segms
:
if
is_poly
(
segm
):
# Polygon format
resized_segms
.
append
([
_resize_poly
(
poly
,
im_scale_x
,
im_scale_y
)
for
poly
in
segm
])
else
:
# RLE format
import
pycocotools.mask
as
mask_util
resized_segms
.
append
(
_resize_rle
(
segm
,
im_h
,
im_w
,
im_scale_x
,
im_scale_y
))
return
resized_segms
def
apply
(
self
,
sample
,
context
=
None
):
""" Resize the image numpy.
"""
im
=
sample
[
'image'
]
if
not
isinstance
(
im
,
np
.
ndarray
):
raise
TypeError
(
"{}: image type is not numpy."
.
format
(
self
))
if
len
(
im
.
shape
)
!=
3
:
raise
ImageError
(
'{}: image is not 3-dimensional.'
.
format
(
self
))
# apply image
im_shape
=
im
.
shape
if
self
.
keep_ratio
:
im_size_min
=
np
.
min
(
im_shape
[
0
:
2
])
im_size_max
=
np
.
max
(
im_shape
[
0
:
2
])
target_size_min
=
np
.
min
(
self
.
target_size
)
target_size_max
=
np
.
max
(
self
.
target_size
)
im_scale
=
min
(
target_size_min
/
im_size_min
,
target_size_max
/
im_size_max
)
resize_h
=
im_scale
*
float
(
im_shape
[
0
])
resize_w
=
im_scale
*
float
(
im_shape
[
1
])
im_scale_x
=
im_scale
im_scale_y
=
im_scale
else
:
resize_h
,
resize_w
=
self
.
target_size
im_scale_y
=
resize_h
/
im_shape
[
0
]
im_scale_x
=
resize_w
/
im_shape
[
1
]
im
=
self
.
apply_image
(
sample
[
'image'
],
[
im_scale_x
,
im_scale_y
])
sample
[
'image'
]
=
im
sample
[
'im_shape'
]
=
np
.
asarray
([
resize_h
,
resize_w
],
dtype
=
np
.
float32
)
if
'scale_factor'
in
sample
:
scale_factor
=
sample
[
'scale_factor'
]
sample
[
'scale_factor'
]
=
np
.
asarray
(
[
scale_factor
[
0
]
*
im_scale_y
,
scale_factor
[
1
]
*
im_scale_x
],
dtype
=
np
.
float32
)
else
:
sample
[
'scale_factor'
]
=
np
.
asarray
(
[
im_scale_y
,
im_scale_x
],
dtype
=
np
.
float32
)
# apply bbox
if
'gt_bbox'
in
sample
and
len
(
sample
[
'gt_bbox'
])
>
0
:
sample
[
'gt_bbox'
]
=
self
.
apply_bbox
(
sample
[
'gt_bbox'
],
[
im_scale_x
,
im_scale_y
],
[
resize_w
,
resize_h
])
# apply polygon
if
'gt_poly'
in
sample
and
len
(
sample
[
'gt_poly'
])
>
0
:
sample
[
'gt_poly'
]
=
self
.
apply_segm
(
sample
[
'gt_poly'
],
im_shape
[:
2
],
[
im_scale_x
,
im_scale_y
])
# apply semantic
if
'semantic'
in
sample
and
sample
[
'semantic'
]:
semantic
=
sample
[
'semantic'
]
semantic
=
cv2
.
resize
(
semantic
.
astype
(
'float32'
),
None
,
None
,
fx
=
im_scale_x
,
fy
=
im_scale_y
,
interpolation
=
self
.
interp
)
semantic
=
np
.
asarray
(
semantic
).
astype
(
'int32'
)
semantic
=
np
.
expand_dims
(
semantic
,
0
)
sample
[
'semantic'
]
=
semantic
# apply gt_segm
if
'gt_segm'
in
sample
and
len
(
sample
[
'gt_segm'
])
>
0
:
masks
=
[
cv2
.
resize
(
gt_segm
,
None
,
None
,
fx
=
im_scale_x
,
fy
=
im_scale_y
,
interpolation
=
cv2
.
INTER_NEAREST
)
for
gt_segm
in
sample
[
'gt_segm'
]
]
sample
[
'gt_segm'
]
=
np
.
asarray
(
masks
).
astype
(
np
.
uint8
)
return
sample
@
register_op
class
MultiscaleTestResizeOp
(
BaseOperator
):
def
__init__
(
self
,
origin_target_size
=
[
800
,
1333
],
target_size
=
[],
interp
=
cv2
.
INTER_LINEAR
,
use_flip
=
True
):
"""
Rescale image to the each size in target size, and capped at max_size.
Args:
origin_target_size (list): origin target size of image
target_size (list): A list of target sizes of image.
interp (int): the interpolation method.
use_flip (bool): whether use flip augmentation.
"""
super
(
MultiscaleTestResizeOp
,
self
).
__init__
()
self
.
interp
=
interp
self
.
use_flip
=
use_flip
if
not
isinstance
(
target_size
,
Sequence
):
raise
TypeError
(
"Type of target_size is invalid. Must be List or Tuple, now is {}"
.
format
(
type
(
target_size
)))
self
.
target_size
=
target_size
if
not
isinstance
(
origin_target_size
,
Sequence
):
raise
TypeError
(
"Type of origin_target_size is invalid. Must be List or Tuple, now is {}"
.
format
(
type
(
origin_target_size
)))
self
.
origin_target_size
=
origin_target_size
def
apply
(
self
,
sample
,
context
=
None
):
""" Resize the image numpy for multi-scale test.
"""
samples
=
[]
resizer
=
ResizeOp
(
self
.
origin_target_size
,
keep_ratio
=
True
,
interp
=
self
.
interp
)
samples
.
append
(
resizer
(
sample
.
copy
(),
context
))
if
self
.
use_flip
:
flipper
=
RandomFlipOp
(
1.1
)
samples
.
append
(
flipper
(
sample
.
copy
(),
context
=
context
))
for
size
in
self
.
target_size
:
resizer
=
ResizeOp
(
size
,
keep_ratio
=
True
,
interp
=
self
.
interp
)
samples
.
append
(
resizer
(
sample
.
copy
(),
context
))
return
samples
@
register_op
class
RandomResizeOp
(
BaseOperator
):
def
__init__
(
self
,
target_size
,
keep_ratio
=
True
,
interp
=
cv2
.
INTER_LINEAR
,
random_size
=
True
,
random_interp
=
False
):
"""
Resize image to target size randomly. random target_size and interpolation method
Args:
target_size (int, list, tuple): image target size, if random size is True, must be list or tuple
keep_ratio (bool): whether keep_raio or not, default true
interp (int): the interpolation method
random_size (bool): whether random select target size of image
random_interp (bool): whether random select interpolation method
"""
super
(
RandomResizeOp
,
self
).
__init__
()
self
.
keep_ratio
=
keep_ratio
self
.
interp
=
interp
self
.
interps
=
[
cv2
.
INTER_NEAREST
,
cv2
.
INTER_LINEAR
,
cv2
.
INTER_AREA
,
cv2
.
INTER_CUBIC
,
cv2
.
INTER_LANCZOS4
,
]
assert
isinstance
(
target_size
,
(
Integral
,
Sequence
)),
"target_size must be Integer, List or Tuple"
if
random_size
and
not
isinstance
(
target_size
,
Sequence
):
raise
TypeError
(
"Type of target_size is invalid when random_size is True. Must be List or Tuple, now is {}"
.
format
(
type
(
target_size
)))
self
.
target_size
=
target_size
self
.
random_size
=
random_size
self
.
random_interp
=
random_interp
def
apply
(
self
,
sample
,
context
=
None
):
""" Resize the image numpy.
"""
if
self
.
random_size
:
target_size
=
random
.
choice
(
self
.
target_size
)
else
:
target_size
=
self
.
target_size
if
self
.
random_interp
:
interp
=
random
.
choice
(
self
.
interps
)
else
:
interp
=
self
.
interp
resizer
=
ResizeOp
(
target_size
,
self
.
keep_ratio
,
interp
)
return
resizer
(
sample
,
context
=
context
)
@
register_op
class
RandomExpandOp
(
BaseOperator
):
"""Random expand the canvas.
Args:
ratio (float): maximum expansion ratio.
prob (float): probability to expand.
fill_value (list): color value used to fill the canvas. in RGB order.
"""
def
__init__
(
self
,
ratio
=
4.
,
prob
=
0.5
,
fill_value
=
(
127.5
,
127.5
,
127.5
)):
super
(
RandomExpandOp
,
self
).
__init__
()
assert
ratio
>
1.01
,
"expand ratio must be larger than 1.01"
self
.
ratio
=
ratio
self
.
prob
=
prob
assert
isinstance
(
fill_value
,
(
Number
,
Sequence
)),
\
"fill value must be either float or sequence"
if
isinstance
(
fill_value
,
Number
):
fill_value
=
(
fill_value
,
)
*
3
if
not
isinstance
(
fill_value
,
tuple
):
fill_value
=
tuple
(
fill_value
)
self
.
fill_value
=
fill_value
def
apply
(
self
,
sample
,
context
=
None
):
if
np
.
random
.
uniform
(
0.
,
1.
)
<
self
.
prob
:
return
sample
im
=
sample
[
'image'
]
height
,
width
=
im
.
shape
[:
2
]
ratio
=
np
.
random
.
uniform
(
1.
,
self
.
ratio
)
h
=
int
(
height
*
ratio
)
w
=
int
(
width
*
ratio
)
if
not
h
>
height
or
not
w
>
width
:
return
sample
y
=
np
.
random
.
randint
(
0
,
h
-
height
)
x
=
np
.
random
.
randint
(
0
,
w
-
width
)
offsets
,
size
=
[
x
,
y
],
[
h
,
w
]
pad
=
Pad
(
size
,
pad_mode
=-
1
,
offsets
=
offsets
,
fill_value
=
self
.
fill_value
)
return
pad
(
sample
,
context
=
context
)
@
register_op
class
CropWithSampling
(
BaseOperator
):
def
__init__
(
self
,
batch_sampler
,
satisfy_all
=
False
,
avoid_no_bbox
=
True
):
"""
Args:
batch_sampler (list): Multiple sets of different
parameters for cropping.
satisfy_all (bool): whether all boxes must satisfy.
e.g.[[1, 1, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0],
[1, 50, 0.3, 1.0, 0.5, 2.0, 0.1, 1.0],
[1, 50, 0.3, 1.0, 0.5, 2.0, 0.3, 1.0],
[1, 50, 0.3, 1.0, 0.5, 2.0, 0.5, 1.0],
[1, 50, 0.3, 1.0, 0.5, 2.0, 0.7, 1.0],
[1, 50, 0.3, 1.0, 0.5, 2.0, 0.9, 1.0],
[1, 50, 0.3, 1.0, 0.5, 2.0, 0.0, 1.0]]
[max sample, max trial, min scale, max scale,
min aspect ratio, max aspect ratio,
min overlap, max overlap]
avoid_no_bbox (bool): whether to to avoid the
situation where the box does not appear.
"""
super
(
CropWithSampling
,
self
).
__init__
()
self
.
batch_sampler
=
batch_sampler
self
.
satisfy_all
=
satisfy_all
self
.
avoid_no_bbox
=
avoid_no_bbox
def
apply
(
self
,
sample
,
context
):
"""
Crop the image and modify bounding box.
Operators:
1. Scale the image width and height.
2. Crop the image according to a radom sample.
3. Rescale the bounding box.
4. Determine if the new bbox is satisfied in the new image.
Returns:
sample: the image, bounding box are replaced.
"""
assert
'image'
in
sample
,
"image data not found"
im
=
sample
[
'image'
]
gt_bbox
=
sample
[
'gt_bbox'
]
gt_class
=
sample
[
'gt_class'
]
im_height
,
im_width
=
im
.
shape
[:
2
]
gt_score
=
None
if
'gt_score'
in
sample
:
gt_score
=
sample
[
'gt_score'
]
sampled_bbox
=
[]
gt_bbox
=
gt_bbox
.
tolist
()
for
sampler
in
self
.
batch_sampler
:
found
=
0
for
i
in
range
(
sampler
[
1
]):
if
found
>=
sampler
[
0
]:
break
sample_bbox
=
generate_sample_bbox
(
sampler
)
if
satisfy_sample_constraint
(
sampler
,
sample_bbox
,
gt_bbox
,
self
.
satisfy_all
):
sampled_bbox
.
append
(
sample_bbox
)
found
=
found
+
1
im
=
np
.
array
(
im
)
while
sampled_bbox
:
idx
=
int
(
np
.
random
.
uniform
(
0
,
len
(
sampled_bbox
)))
sample_bbox
=
sampled_bbox
.
pop
(
idx
)
sample_bbox
=
clip_bbox
(
sample_bbox
)
crop_bbox
,
crop_class
,
crop_score
=
\
filter_and_process
(
sample_bbox
,
gt_bbox
,
gt_class
,
scores
=
gt_score
)
if
self
.
avoid_no_bbox
:
if
len
(
crop_bbox
)
<
1
:
continue
xmin
=
int
(
sample_bbox
[
0
]
*
im_width
)
xmax
=
int
(
sample_bbox
[
2
]
*
im_width
)
ymin
=
int
(
sample_bbox
[
1
]
*
im_height
)
ymax
=
int
(
sample_bbox
[
3
]
*
im_height
)
im
=
im
[
ymin
:
ymax
,
xmin
:
xmax
]
sample
[
'image'
]
=
im
sample
[
'gt_bbox'
]
=
crop_bbox
sample
[
'gt_class'
]
=
crop_class
sample
[
'gt_score'
]
=
crop_score
return
sample
return
sample
@
register_op
class
CropWithDataAchorSampling
(
BaseOperator
):
def
__init__
(
self
,
batch_sampler
,
anchor_sampler
=
None
,
target_size
=
None
,
das_anchor_scales
=
[
16
,
32
,
64
,
128
],
sampling_prob
=
0.5
,
min_size
=
8.
,
avoid_no_bbox
=
True
):
"""
Args:
anchor_sampler (list): anchor_sampling sets of different
parameters for cropping.
batch_sampler (list): Multiple sets of different
parameters for cropping.
e.g.[[1, 10, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.2, 0.0]]
[[1, 50, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0],
[1, 50, 0.3, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0],
[1, 50, 0.3, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0],
[1, 50, 0.3, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0],
[1, 50, 0.3, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0]]
[max sample, max trial, min scale, max scale,
min aspect ratio, max aspect ratio,
min overlap, max overlap, min coverage, max coverage]
target_size (bool): target image size.
das_anchor_scales (list[float]): a list of anchor scales in data
anchor smapling.
min_size (float): minimum size of sampled bbox.
avoid_no_bbox (bool): whether to to avoid the
situation where the box does not appear.
"""
super
(
CropWithDataAchorSampling
,
self
).
__init__
()
self
.
anchor_sampler
=
anchor_sampler
self
.
batch_sampler
=
batch_sampler
self
.
target_size
=
target_size
self
.
sampling_prob
=
sampling_prob
self
.
min_size
=
min_size
self
.
avoid_no_bbox
=
avoid_no_bbox
self
.
das_anchor_scales
=
np
.
array
(
das_anchor_scales
)
def
apply
(
self
,
sample
,
context
):
"""
Crop the image and modify bounding box.
Operators:
1. Scale the image width and height.
2. Crop the image according to a radom sample.
3. Rescale the bounding box.
4. Determine if the new bbox is satisfied in the new image.
Returns:
sample: the image, bounding box are replaced.
"""
assert
'image'
in
sample
,
"image data not found"
im
=
sample
[
'image'
]
gt_bbox
=
sample
[
'gt_bbox'
]
gt_class
=
sample
[
'gt_class'
]
image_height
,
image_width
=
im
.
shape
[:
2
]
gt_score
=
None
if
'gt_score'
in
sample
:
gt_score
=
sample
[
'gt_score'
]
sampled_bbox
=
[]
gt_bbox
=
gt_bbox
.
tolist
()
prob
=
np
.
random
.
uniform
(
0.
,
1.
)
if
prob
>
self
.
sampling_prob
:
# anchor sampling
assert
self
.
anchor_sampler
for
sampler
in
self
.
anchor_sampler
:
found
=
0
for
i
in
range
(
sampler
[
1
]):
if
found
>=
sampler
[
0
]:
break
sample_bbox
=
data_anchor_sampling
(
gt_bbox
,
image_width
,
image_height
,
self
.
das_anchor_scales
,
self
.
target_size
)
if
sample_bbox
==
0
:
break
if
satisfy_sample_constraint_coverage
(
sampler
,
sample_bbox
,
gt_bbox
):
sampled_bbox
.
append
(
sample_bbox
)
found
=
found
+
1
im
=
np
.
array
(
im
)
while
sampled_bbox
:
idx
=
int
(
np
.
random
.
uniform
(
0
,
len
(
sampled_bbox
)))
sample_bbox
=
sampled_bbox
.
pop
(
idx
)
if
'gt_keypoint'
in
sample
.
keys
():
keypoints
=
(
sample
[
'gt_keypoint'
],
sample
[
'keypoint_ignore'
])
crop_bbox
,
crop_class
,
crop_score
,
gt_keypoints
=
\
filter_and_process
(
sample_bbox
,
gt_bbox
,
gt_class
,
scores
=
gt_score
,
keypoints
=
keypoints
)
else
:
crop_bbox
,
crop_class
,
crop_score
=
filter_and_process
(
sample_bbox
,
gt_bbox
,
gt_class
,
scores
=
gt_score
)
crop_bbox
,
crop_class
,
crop_score
=
bbox_area_sampling
(
crop_bbox
,
crop_class
,
crop_score
,
self
.
target_size
,
self
.
min_size
)
if
self
.
avoid_no_bbox
:
if
len
(
crop_bbox
)
<
1
:
continue
im
=
crop_image_sampling
(
im
,
sample_bbox
,
image_width
,
image_height
,
self
.
target_size
)
sample
[
'image'
]
=
im
sample
[
'gt_bbox'
]
=
crop_bbox
sample
[
'gt_class'
]
=
crop_class
sample
[
'gt_score'
]
=
crop_score
if
'gt_keypoint'
in
sample
.
keys
():
sample
[
'gt_keypoint'
]
=
gt_keypoints
[
0
]
sample
[
'keypoint_ignore'
]
=
gt_keypoints
[
1
]
return
sample
return
sample
else
:
for
sampler
in
self
.
batch_sampler
:
found
=
0
for
i
in
range
(
sampler
[
1
]):
if
found
>=
sampler
[
0
]:
break
sample_bbox
=
generate_sample_bbox_square
(
sampler
,
image_width
,
image_height
)
if
satisfy_sample_constraint_coverage
(
sampler
,
sample_bbox
,
gt_bbox
):
sampled_bbox
.
append
(
sample_bbox
)
found
=
found
+
1
im
=
np
.
array
(
im
)
while
sampled_bbox
:
idx
=
int
(
np
.
random
.
uniform
(
0
,
len
(
sampled_bbox
)))
sample_bbox
=
sampled_bbox
.
pop
(
idx
)
sample_bbox
=
clip_bbox
(
sample_bbox
)
if
'gt_keypoint'
in
sample
.
keys
():
keypoints
=
(
sample
[
'gt_keypoint'
],
sample
[
'keypoint_ignore'
])
crop_bbox
,
crop_class
,
crop_score
,
gt_keypoints
=
\
filter_and_process
(
sample_bbox
,
gt_bbox
,
gt_class
,
scores
=
gt_score
,
keypoints
=
keypoints
)
else
:
crop_bbox
,
crop_class
,
crop_score
=
filter_and_process
(
sample_bbox
,
gt_bbox
,
gt_class
,
scores
=
gt_score
)
# sampling bbox according the bbox area
crop_bbox
,
crop_class
,
crop_score
=
bbox_area_sampling
(
crop_bbox
,
crop_class
,
crop_score
,
self
.
target_size
,
self
.
min_size
)
if
self
.
avoid_no_bbox
:
if
len
(
crop_bbox
)
<
1
:
continue
xmin
=
int
(
sample_bbox
[
0
]
*
image_width
)
xmax
=
int
(
sample_bbox
[
2
]
*
image_width
)
ymin
=
int
(
sample_bbox
[
1
]
*
image_height
)
ymax
=
int
(
sample_bbox
[
3
]
*
image_height
)
im
=
im
[
ymin
:
ymax
,
xmin
:
xmax
]
sample
[
'image'
]
=
im
sample
[
'gt_bbox'
]
=
crop_bbox
sample
[
'gt_class'
]
=
crop_class
sample
[
'gt_score'
]
=
crop_score
if
'gt_keypoint'
in
sample
.
keys
():
sample
[
'gt_keypoint'
]
=
gt_keypoints
[
0
]
sample
[
'keypoint_ignore'
]
=
gt_keypoints
[
1
]
return
sample
return
sample
@
register_op
class
RandomCropOp
(
BaseOperator
):
"""Random crop image and bboxes.
Args:
aspect_ratio (list): aspect ratio of cropped region.
in [min, max] format.
thresholds (list): iou thresholds for decide a valid bbox crop.
scaling (list): ratio between a cropped region and the original image.
in [min, max] format.
num_attempts (int): number of tries before giving up.
allow_no_crop (bool): allow return without actually cropping them.
cover_all_box (bool): ensure all bboxes are covered in the final crop.
is_mask_crop(bool): whether crop the segmentation.
"""
def
__init__
(
self
,
aspect_ratio
=
[.
5
,
2.
],
thresholds
=
[.
0
,
.
1
,
.
3
,
.
5
,
.
7
,
.
9
],
scaling
=
[.
3
,
1.
],
num_attempts
=
50
,
allow_no_crop
=
True
,
cover_all_box
=
False
,
is_mask_crop
=
False
):
super
(
RandomCropOp
,
self
).
__init__
()
self
.
aspect_ratio
=
aspect_ratio
self
.
thresholds
=
thresholds
self
.
scaling
=
scaling
self
.
num_attempts
=
num_attempts
self
.
allow_no_crop
=
allow_no_crop
self
.
cover_all_box
=
cover_all_box
self
.
is_mask_crop
=
is_mask_crop
def
crop_segms
(
self
,
segms
,
valid_ids
,
crop
,
height
,
width
):
def
_crop_poly
(
segm
,
crop
):
xmin
,
ymin
,
xmax
,
ymax
=
crop
crop_coord
=
[
xmin
,
ymin
,
xmin
,
ymax
,
xmax
,
ymax
,
xmax
,
ymin
]
crop_p
=
np
.
array
(
crop_coord
).
reshape
(
4
,
2
)
crop_p
=
Polygon
(
crop_p
)
crop_segm
=
list
()
for
poly
in
segm
:
poly
=
np
.
array
(
poly
).
reshape
(
len
(
poly
)
//
2
,
2
)
polygon
=
Polygon
(
poly
)
if
not
polygon
.
is_valid
:
exterior
=
polygon
.
exterior
multi_lines
=
exterior
.
intersection
(
exterior
)
polygons
=
shapely
.
ops
.
polygonize
(
multi_lines
)
polygon
=
MultiPolygon
(
polygons
)
multi_polygon
=
list
()
if
isinstance
(
polygon
,
MultiPolygon
):
multi_polygon
=
copy
.
deepcopy
(
polygon
)
else
:
multi_polygon
.
append
(
copy
.
deepcopy
(
polygon
))
for
per_polygon
in
multi_polygon
:
inter
=
per_polygon
.
intersection
(
crop_p
)
if
not
inter
:
continue
if
isinstance
(
inter
,
(
MultiPolygon
,
GeometryCollection
)):
for
part
in
inter
:
if
not
isinstance
(
part
,
Polygon
):
continue
part
=
np
.
squeeze
(
np
.
array
(
part
.
exterior
.
coords
[:
-
1
]).
reshape
(
1
,
-
1
))
part
[
0
::
2
]
-=
xmin
part
[
1
::
2
]
-=
ymin
crop_segm
.
append
(
part
.
tolist
())
elif
isinstance
(
inter
,
Polygon
):
crop_poly
=
np
.
squeeze
(
np
.
array
(
inter
.
exterior
.
coords
[:
-
1
]).
reshape
(
1
,
-
1
))
crop_poly
[
0
::
2
]
-=
xmin
crop_poly
[
1
::
2
]
-=
ymin
crop_segm
.
append
(
crop_poly
.
tolist
())
else
:
continue
return
crop_segm
def
_crop_rle
(
rle
,
crop
,
height
,
width
):
if
'counts'
in
rle
and
type
(
rle
[
'counts'
])
==
list
:
rle
=
mask_util
.
frPyObjects
(
rle
,
height
,
width
)
mask
=
mask_util
.
decode
(
rle
)
mask
=
mask
[
crop
[
1
]:
crop
[
3
],
crop
[
0
]:
crop
[
2
]]
rle
=
mask_util
.
encode
(
np
.
array
(
mask
,
order
=
'F'
,
dtype
=
np
.
uint8
))
return
rle
crop_segms
=
[]
for
id
in
valid_ids
:
segm
=
segms
[
id
]
if
is_poly
(
segm
):
import
copy
import
shapely.ops
from
shapely.geometry
import
Polygon
,
MultiPolygon
,
GeometryCollection
logging
.
getLogger
(
"shapely"
).
setLevel
(
logging
.
WARNING
)
# Polygon format
crop_segms
.
append
(
_crop_poly
(
segm
,
crop
))
else
:
# RLE format
import
pycocotools.mask
as
mask_util
crop_segms
.
append
(
_crop_rle
(
segm
,
crop
,
height
,
width
))
return
crop_segms
def
apply
(
self
,
sample
,
context
=
None
):
if
'gt_bbox'
in
sample
and
len
(
sample
[
'gt_bbox'
])
==
0
:
return
sample
h
,
w
=
sample
[
'image'
].
shape
[:
2
]
gt_bbox
=
sample
[
'gt_bbox'
]
# NOTE Original method attempts to generate one candidate for each
# threshold then randomly sample one from the resulting list.
# Here a short circuit approach is taken, i.e., randomly choose a
# threshold and attempt to find a valid crop, and simply return the
# first one found.
# The probability is not exactly the same, kinda resembling the
# "Monty Hall" problem. Actually carrying out the attempts will affect
# observability (just like opening doors in the "Monty Hall" game).
thresholds
=
list
(
self
.
thresholds
)
if
self
.
allow_no_crop
:
thresholds
.
append
(
'no_crop'
)
np
.
random
.
shuffle
(
thresholds
)
for
thresh
in
thresholds
:
if
thresh
==
'no_crop'
:
return
sample
found
=
False
for
i
in
range
(
self
.
num_attempts
):
scale
=
np
.
random
.
uniform
(
*
self
.
scaling
)
if
self
.
aspect_ratio
is
not
None
:
min_ar
,
max_ar
=
self
.
aspect_ratio
aspect_ratio
=
np
.
random
.
uniform
(
max
(
min_ar
,
scale
**
2
),
min
(
max_ar
,
scale
**-
2
))
h_scale
=
scale
/
np
.
sqrt
(
aspect_ratio
)
w_scale
=
scale
*
np
.
sqrt
(
aspect_ratio
)
else
:
h_scale
=
np
.
random
.
uniform
(
*
self
.
scaling
)
w_scale
=
np
.
random
.
uniform
(
*
self
.
scaling
)
crop_h
=
h
*
h_scale
crop_w
=
w
*
w_scale
if
self
.
aspect_ratio
is
None
:
if
crop_h
/
crop_w
<
0.5
or
crop_h
/
crop_w
>
2.0
:
continue
crop_h
=
int
(
crop_h
)
crop_w
=
int
(
crop_w
)
crop_y
=
np
.
random
.
randint
(
0
,
h
-
crop_h
)
crop_x
=
np
.
random
.
randint
(
0
,
w
-
crop_w
)
crop_box
=
[
crop_x
,
crop_y
,
crop_x
+
crop_w
,
crop_y
+
crop_h
]
iou
=
self
.
_iou_matrix
(
gt_bbox
,
np
.
array
(
[
crop_box
],
dtype
=
np
.
float32
))
if
iou
.
max
()
<
thresh
:
continue
if
self
.
cover_all_box
and
iou
.
min
()
<
thresh
:
continue
cropped_box
,
valid_ids
=
self
.
_crop_box_with_center_constraint
(
gt_bbox
,
np
.
array
(
crop_box
,
dtype
=
np
.
float32
))
if
valid_ids
.
size
>
0
:
found
=
True
break
if
found
:
if
self
.
is_mask_crop
and
'gt_poly'
in
sample
and
len
(
sample
[
'gt_poly'
])
>
0
:
crop_polys
=
self
.
crop_segms
(
sample
[
'gt_poly'
],
valid_ids
,
np
.
array
(
crop_box
,
dtype
=
np
.
int64
),
h
,
w
)
if
[]
in
crop_polys
:
delete_id
=
list
()
valid_polys
=
list
()
for
id
,
crop_poly
in
enumerate
(
crop_polys
):
if
crop_poly
==
[]:
delete_id
.
append
(
id
)
else
:
valid_polys
.
append
(
crop_poly
)
valid_ids
=
np
.
delete
(
valid_ids
,
delete_id
)
if
len
(
valid_polys
)
==
0
:
return
sample
sample
[
'gt_poly'
]
=
valid_polys
else
:
sample
[
'gt_poly'
]
=
crop_polys
if
'gt_segm'
in
sample
:
sample
[
'gt_segm'
]
=
self
.
_crop_segm
(
sample
[
'gt_segm'
],
crop_box
)
sample
[
'gt_segm'
]
=
np
.
take
(
sample
[
'gt_segm'
],
valid_ids
,
axis
=
0
)
sample
[
'image'
]
=
self
.
_crop_image
(
sample
[
'image'
],
crop_box
)
sample
[
'gt_bbox'
]
=
np
.
take
(
cropped_box
,
valid_ids
,
axis
=
0
)
sample
[
'gt_class'
]
=
np
.
take
(
sample
[
'gt_class'
],
valid_ids
,
axis
=
0
)
if
'gt_score'
in
sample
:
sample
[
'gt_score'
]
=
np
.
take
(
sample
[
'gt_score'
],
valid_ids
,
axis
=
0
)
if
'is_crowd'
in
sample
:
sample
[
'is_crowd'
]
=
np
.
take
(
sample
[
'is_crowd'
],
valid_ids
,
axis
=
0
)
return
sample
return
sample
def
_iou_matrix
(
self
,
a
,
b
):
tl_i
=
np
.
maximum
(
a
[:,
np
.
newaxis
,
:
2
],
b
[:,
:
2
])
br_i
=
np
.
minimum
(
a
[:,
np
.
newaxis
,
2
:],
b
[:,
2
:])
area_i
=
np
.
prod
(
br_i
-
tl_i
,
axis
=
2
)
*
(
tl_i
<
br_i
).
all
(
axis
=
2
)
area_a
=
np
.
prod
(
a
[:,
2
:]
-
a
[:,
:
2
],
axis
=
1
)
area_b
=
np
.
prod
(
b
[:,
2
:]
-
b
[:,
:
2
],
axis
=
1
)
area_o
=
(
area_a
[:,
np
.
newaxis
]
+
area_b
-
area_i
)
return
area_i
/
(
area_o
+
1e-10
)
def
_crop_box_with_center_constraint
(
self
,
box
,
crop
):
cropped_box
=
box
.
copy
()
cropped_box
[:,
:
2
]
=
np
.
maximum
(
box
[:,
:
2
],
crop
[:
2
])
cropped_box
[:,
2
:]
=
np
.
minimum
(
box
[:,
2
:],
crop
[
2
:])
cropped_box
[:,
:
2
]
-=
crop
[:
2
]
cropped_box
[:,
2
:]
-=
crop
[:
2
]
centers
=
(
box
[:,
:
2
]
+
box
[:,
2
:])
/
2
valid
=
np
.
logical_and
(
crop
[:
2
]
<=
centers
,
centers
<
crop
[
2
:]).
all
(
axis
=
1
)
valid
=
np
.
logical_and
(
valid
,
(
cropped_box
[:,
:
2
]
<
cropped_box
[:,
2
:]).
all
(
axis
=
1
))
return
cropped_box
,
np
.
where
(
valid
)[
0
]
def
_crop_image
(
self
,
img
,
crop
):
x1
,
y1
,
x2
,
y2
=
crop
return
img
[
y1
:
y2
,
x1
:
x2
,
:]
def
_crop_segm
(
self
,
segm
,
crop
):
x1
,
y1
,
x2
,
y2
=
crop
return
segm
[:,
y1
:
y2
,
x1
:
x2
]
@
register_op
class
RandomScaledCropOp
(
BaseOperator
):
"""Resize image and bbox based on long side (with optional random scaling),
then crop or pad image to target size.
Args:
target_dim (int): target size.
scale_range (list): random scale range.
interp (int): interpolation method, default to `cv2.INTER_LINEAR`.
"""
def
__init__
(
self
,
target_dim
=
512
,
scale_range
=
[.
1
,
2.
],
interp
=
cv2
.
INTER_LINEAR
):
super
(
RandomScaledCropOp
,
self
).
__init__
()
self
.
target_dim
=
target_dim
self
.
scale_range
=
scale_range
self
.
interp
=
interp
def
apply
(
self
,
sample
,
context
=
None
):
img
=
sample
[
'image'
]
h
,
w
=
img
.
shape
[:
2
]
random_scale
=
np
.
random
.
uniform
(
*
self
.
scale_range
)
dim
=
self
.
target_dim
random_dim
=
int
(
dim
*
random_scale
)
dim_max
=
max
(
h
,
w
)
scale
=
random_dim
/
dim_max
resize_w
=
w
*
scale
resize_h
=
h
*
scale
offset_x
=
int
(
max
(
0
,
np
.
random
.
uniform
(
0.
,
resize_w
-
dim
)))
offset_y
=
int
(
max
(
0
,
np
.
random
.
uniform
(
0.
,
resize_h
-
dim
)))
img
=
cv2
.
resize
(
img
,
(
resize_w
,
resize_h
),
interpolation
=
self
.
interp
)
img
=
np
.
array
(
img
)
canvas
=
np
.
zeros
((
dim
,
dim
,
3
),
dtype
=
img
.
dtype
)
canvas
[:
min
(
dim
,
resize_h
),
:
min
(
dim
,
resize_w
),
:]
=
img
[
offset_y
:
offset_y
+
dim
,
offset_x
:
offset_x
+
dim
,
:]
sample
[
'image'
]
=
canvas
sample
[
'im_shape'
]
=
np
.
asarray
([
resize_h
,
resize_w
],
dtype
=
np
.
float32
)
scale_factor
=
sample
[
'sacle_factor'
]
sample
[
'scale_factor'
]
=
np
.
asarray
(
[
scale_factor
[
0
]
*
scale
,
scale_factor
[
1
]
*
scale
],
dtype
=
np
.
float32
)
if
'gt_bbox'
in
sample
and
len
(
sample
[
'gt_bbox'
])
>
0
:
scale_array
=
np
.
array
([
scale
,
scale
]
*
2
,
dtype
=
np
.
float32
)
shift_array
=
np
.
array
([
offset_x
,
offset_y
]
*
2
,
dtype
=
np
.
float32
)
boxes
=
sample
[
'gt_bbox'
]
*
scale_array
-
shift_array
boxes
=
np
.
clip
(
boxes
,
0
,
dim
-
1
)
# filter boxes with no area
area
=
np
.
prod
(
boxes
[...,
2
:]
-
boxes
[...,
:
2
],
axis
=
1
)
valid
=
(
area
>
1.
).
nonzero
()[
0
]
sample
[
'gt_bbox'
]
=
boxes
[
valid
]
sample
[
'gt_class'
]
=
sample
[
'gt_class'
][
valid
]
return
sample
@
register_op
class
CutmixOp
(
BaseOperator
):
def
__init__
(
self
,
alpha
=
1.5
,
beta
=
1.5
):
"""
CutMix: Regularization Strategy to Train Strong Classifiers with Localizable Features, see https://arxiv.org/abs/1905.04899
Cutmix image and gt_bbbox/gt_score
Args:
alpha (float): alpha parameter of beta distribute
beta (float): beta parameter of beta distribute
"""
super
(
CutmixOp
,
self
).
__init__
()
self
.
alpha
=
alpha
self
.
beta
=
beta
if
self
.
alpha
<=
0.0
:
raise
ValueError
(
"alpha shold be positive in {}"
.
format
(
self
))
if
self
.
beta
<=
0.0
:
raise
ValueError
(
"beta shold be positive in {}"
.
format
(
self
))
def
apply_image
(
self
,
img1
,
img2
,
factor
):
""" _rand_bbox """
h
=
max
(
img1
.
shape
[
0
],
img2
.
shape
[
0
])
w
=
max
(
img1
.
shape
[
1
],
img2
.
shape
[
1
])
cut_rat
=
np
.
sqrt
(
1.
-
factor
)
cut_w
=
np
.
int
(
w
*
cut_rat
)
cut_h
=
np
.
int
(
h
*
cut_rat
)
# uniform
cx
=
np
.
random
.
randint
(
w
)
cy
=
np
.
random
.
randint
(
h
)
bbx1
=
np
.
clip
(
cx
-
cut_w
//
2
,
0
,
w
-
1
)
bby1
=
np
.
clip
(
cy
-
cut_h
//
2
,
0
,
h
-
1
)
bbx2
=
np
.
clip
(
cx
+
cut_w
//
2
,
0
,
w
-
1
)
bby2
=
np
.
clip
(
cy
+
cut_h
//
2
,
0
,
h
-
1
)
img_1
=
np
.
zeros
((
h
,
w
,
img1
.
shape
[
2
]),
'float32'
)
img_1
[:
img1
.
shape
[
0
],
:
img1
.
shape
[
1
],
:]
=
\
img1
.
astype
(
'float32'
)
img_2
=
np
.
zeros
((
h
,
w
,
img2
.
shape
[
2
]),
'float32'
)
img_2
[:
img2
.
shape
[
0
],
:
img2
.
shape
[
1
],
:]
=
\
img2
.
astype
(
'float32'
)
img_1
[
bby1
:
bby2
,
bbx1
:
bbx2
,
:]
=
img2
[
bby1
:
bby2
,
bbx1
:
bbx2
,
:]
return
img_1
def
__call__
(
self
,
sample
,
context
=
None
):
if
not
isinstance
(
sample
,
Sequence
):
return
sample
assert
len
(
sample
)
==
2
,
'cutmix need two samples'
factor
=
np
.
random
.
beta
(
self
.
alpha
,
self
.
beta
)
factor
=
max
(
0.0
,
min
(
1.0
,
factor
))
if
factor
>=
1.0
:
return
sample
[
0
]
if
factor
<=
0.0
:
return
sample
[
1
]
img1
=
sample
[
0
][
'image'
]
img2
=
sample
[
1
][
'image'
]
img
=
self
.
apply_image
(
img1
,
img2
,
factor
)
gt_bbox1
=
sample
[
0
][
'gt_bbox'
]
gt_bbox2
=
sample
[
1
][
'gt_bbox'
]
gt_bbox
=
np
.
concatenate
((
gt_bbox1
,
gt_bbox2
),
axis
=
0
)
gt_class1
=
sample
[
0
][
'gt_class'
]
gt_class2
=
sample
[
1
][
'gt_class'
]
gt_class
=
np
.
concatenate
((
gt_class1
,
gt_class2
),
axis
=
0
)
gt_score1
=
sample
[
0
][
'gt_score'
]
gt_score2
=
sample
[
1
][
'gt_score'
]
gt_score
=
np
.
concatenate
(
(
gt_score1
*
factor
,
gt_score2
*
(
1.
-
factor
)),
axis
=
0
)
sample
=
sample
[
0
]
sample
[
'image'
]
=
img
sample
[
'gt_bbox'
]
=
gt_bbox
sample
[
'gt_score'
]
=
gt_score
sample
[
'gt_class'
]
=
gt_class
return
sample
@
register_op
class
MixupOp
(
BaseOperator
):
def
__init__
(
self
,
alpha
=
1.5
,
beta
=
1.5
):
""" Mixup image and gt_bbbox/gt_score
Args:
alpha (float): alpha parameter of beta distribute
beta (float): beta parameter of beta distribute
"""
super
(
MixupOp
,
self
).
__init__
()
self
.
alpha
=
alpha
self
.
beta
=
beta
if
self
.
alpha
<=
0.0
:
raise
ValueError
(
"alpha shold be positive in {}"
.
format
(
self
))
if
self
.
beta
<=
0.0
:
raise
ValueError
(
"beta shold be positive in {}"
.
format
(
self
))
def
apply_image
(
self
,
img1
,
img2
,
factor
):
h
=
max
(
img1
.
shape
[
0
],
img2
.
shape
[
0
])
w
=
max
(
img1
.
shape
[
1
],
img2
.
shape
[
1
])
img
=
np
.
zeros
((
h
,
w
,
img1
.
shape
[
2
]),
'float32'
)
img
[:
img1
.
shape
[
0
],
:
img1
.
shape
[
1
],
:]
=
\
img1
.
astype
(
'float32'
)
*
factor
img
[:
img2
.
shape
[
0
],
:
img2
.
shape
[
1
],
:]
+=
\
img2
.
astype
(
'float32'
)
*
(
1.0
-
factor
)
return
img
.
astype
(
'uint8'
)
def
__call__
(
self
,
sample
,
context
=
None
):
if
not
isinstance
(
sample
,
Sequence
):
return
sample
assert
len
(
sample
)
==
2
,
'mixup need two samples'
factor
=
np
.
random
.
beta
(
self
.
alpha
,
self
.
beta
)
factor
=
max
(
0.0
,
min
(
1.0
,
factor
))
if
factor
>=
1.0
:
return
sample
[
0
]
if
factor
<=
0.0
:
return
sample
[
1
]
im
=
self
.
apply_image
(
sample
[
0
][
'image'
],
sample
[
1
][
'image'
],
factor
)
result
=
copy
.
deepcopy
(
sample
[
0
])
result
[
'image'
]
=
im
# apply bbox and score
if
'gt_bbox'
in
sample
[
0
]:
gt_bbox1
=
sample
[
0
][
'gt_bbox'
]
gt_bbox2
=
sample
[
1
][
'gt_bbox'
]
gt_bbox
=
np
.
concatenate
((
gt_bbox1
,
gt_bbox2
),
axis
=
0
)
result
[
'gt_bbox'
]
=
gt_bbox
if
'gt_class'
in
sample
[
0
]:
gt_class1
=
sample
[
0
][
'gt_class'
]
gt_class2
=
sample
[
1
][
'gt_class'
]
gt_class
=
np
.
concatenate
((
gt_class1
,
gt_class2
),
axis
=
0
)
result
[
'gt_class'
]
=
gt_class
gt_score1
=
np
.
ones_like
(
sample
[
0
][
'gt_class'
])
gt_score2
=
np
.
ones_like
(
sample
[
1
][
'gt_class'
])
gt_score
=
np
.
concatenate
(
(
gt_score1
*
factor
,
gt_score2
*
(
1.
-
factor
)),
axis
=
0
)
result
[
'gt_score'
]
=
gt_score
if
'is_crowd'
in
sample
[
0
]:
is_crowd1
=
sample
[
0
][
'is_crowd'
]
is_crowd2
=
sample
[
1
][
'is_crowd'
]
is_crowd
=
np
.
concatenate
((
is_crowd1
,
is_crowd2
),
axis
=
0
)
result
[
'is_crowd'
]
=
is_crowd
if
'difficult'
in
sample
[
0
]:
is_difficult1
=
sample
[
0
][
'difficult'
]
is_difficult2
=
sample
[
1
][
'difficult'
]
is_difficult
=
np
.
concatenate
(
(
is_difficult1
,
is_difficult2
),
axis
=
0
)
result
[
'difficult'
]
=
is_difficult
return
result
@
register_op
class
NormalizeBoxOp
(
BaseOperator
):
"""Transform the bounding box's coornidates to [0,1]."""
def
__init__
(
self
):
super
(
NormalizeBoxOp
,
self
).
__init__
()
def
apply
(
self
,
sample
,
context
):
im
=
sample
[
'image'
]
gt_bbox
=
sample
[
'gt_bbox'
]
height
,
width
,
_
=
im
.
shape
for
i
in
range
(
gt_bbox
.
shape
[
0
]):
gt_bbox
[
i
][
0
]
=
gt_bbox
[
i
][
0
]
/
width
gt_bbox
[
i
][
1
]
=
gt_bbox
[
i
][
1
]
/
height
gt_bbox
[
i
][
2
]
=
gt_bbox
[
i
][
2
]
/
width
gt_bbox
[
i
][
3
]
=
gt_bbox
[
i
][
3
]
/
height
sample
[
'gt_bbox'
]
=
gt_bbox
if
'gt_keypoint'
in
sample
.
keys
():
gt_keypoint
=
sample
[
'gt_keypoint'
]
for
i
in
range
(
gt_keypoint
.
shape
[
1
]):
if
i
%
2
:
gt_keypoint
[:,
i
]
=
gt_keypoint
[:,
i
]
/
height
else
:
gt_keypoint
[:,
i
]
=
gt_keypoint
[:,
i
]
/
width
sample
[
'gt_keypoint'
]
=
gt_keypoint
return
sample
@
register_op
class
BboxXYXY2XYWHOp
(
BaseOperator
):
"""
Convert bbox XYXY format to XYWH format.
"""
def
__init__
(
self
):
super
(
BboxXYXY2XYWHOp
,
self
).
__init__
()
def
apply
(
self
,
sample
,
context
=
None
):
assert
'gt_bbox'
in
sample
bbox
=
sample
[
'gt_bbox'
]
bbox
[:,
2
:
4
]
=
bbox
[:,
2
:
4
]
-
bbox
[:,
:
2
]
bbox
[:,
:
2
]
=
bbox
[:,
:
2
]
+
bbox
[:,
2
:
4
]
/
2.
sample
[
'gt_bbox'
]
=
bbox
return
sample
@
register_op
class
PadBoxOp
(
BaseOperator
):
def
__init__
(
self
,
num_max_boxes
=
50
):
"""
Pad zeros to bboxes if number of bboxes is less than num_max_boxes.
Args:
num_max_boxes (int): the max number of bboxes
"""
self
.
num_max_boxes
=
num_max_boxes
super
(
PadBoxOp
,
self
).
__init__
()
def
apply
(
self
,
sample
,
context
=
None
):
assert
'gt_bbox'
in
sample
bbox
=
sample
[
'gt_bbox'
]
gt_num
=
min
(
self
.
num_max_boxes
,
len
(
bbox
))
num_max
=
self
.
num_max_boxes
# fields = context['fields'] if context else []
pad_bbox
=
np
.
zeros
((
num_max
,
4
),
dtype
=
np
.
float32
)
if
gt_num
>
0
:
pad_bbox
[:
gt_num
,
:]
=
bbox
[:
gt_num
,
:]
sample
[
'gt_bbox'
]
=
pad_bbox
if
'gt_class'
in
sample
:
pad_class
=
np
.
zeros
((
num_max
,
),
dtype
=
np
.
int32
)
if
gt_num
>
0
:
pad_class
[:
gt_num
]
=
sample
[
'gt_class'
][:
gt_num
,
0
]
sample
[
'gt_class'
]
=
pad_class
if
'gt_score'
in
sample
:
pad_score
=
np
.
zeros
((
num_max
,
),
dtype
=
np
.
float32
)
if
gt_num
>
0
:
pad_score
[:
gt_num
]
=
sample
[
'gt_score'
][:
gt_num
,
0
]
sample
[
'gt_score'
]
=
pad_score
# in training, for example in op ExpandImage,
# the bbox and gt_class is expandded, but the difficult is not,
# so, judging by it's length
if
'difficult'
in
sample
:
pad_diff
=
np
.
zeros
((
num_max
,
),
dtype
=
np
.
int32
)
if
gt_num
>
0
:
pad_diff
[:
gt_num
]
=
sample
[
'difficult'
][:
gt_num
,
0
]
sample
[
'difficult'
]
=
pad_diff
if
'is_crowd'
in
sample
:
pad_crowd
=
np
.
zeros
((
num_max
,
),
dtype
=
np
.
int32
)
if
gt_num
>
0
:
pad_crowd
[:
gt_num
]
=
sample
[
'is_crowd'
][:
gt_num
,
0
]
sample
[
'is_crowd'
]
=
pad_crowd
return
sample
@
register_op
class
DebugVisibleImageOp
(
BaseOperator
):
"""
In debug mode, visualize images according to `gt_box`.
(Currently only supported when not cropping and flipping image.)
"""
def
__init__
(
self
,
output_dir
=
'output/debug'
,
is_normalized
=
False
):
super
(
DebugVisibleImageOp
,
self
).
__init__
()
self
.
is_normalized
=
is_normalized
self
.
output_dir
=
output_dir
if
not
os
.
path
.
isdir
(
output_dir
):
os
.
makedirs
(
output_dir
)
if
not
isinstance
(
self
.
is_normalized
,
bool
):
raise
TypeError
(
"{}: input type is invalid."
.
format
(
self
))
def
apply
(
self
,
sample
,
context
=
None
):
image
=
Image
.
open
(
sample
[
'im_file'
]).
convert
(
'RGB'
)
out_file_name
=
sample
[
'im_file'
].
split
(
'/'
)[
-
1
]
width
=
sample
[
'w'
]
height
=
sample
[
'h'
]
gt_bbox
=
sample
[
'gt_bbox'
]
gt_class
=
sample
[
'gt_class'
]
draw
=
ImageDraw
.
Draw
(
image
)
for
i
in
range
(
gt_bbox
.
shape
[
0
]):
if
self
.
is_normalized
:
gt_bbox
[
i
][
0
]
=
gt_bbox
[
i
][
0
]
*
width
gt_bbox
[
i
][
1
]
=
gt_bbox
[
i
][
1
]
*
height
gt_bbox
[
i
][
2
]
=
gt_bbox
[
i
][
2
]
*
width
gt_bbox
[
i
][
3
]
=
gt_bbox
[
i
][
3
]
*
height
xmin
,
ymin
,
xmax
,
ymax
=
gt_bbox
[
i
]
draw
.
line
(
[(
xmin
,
ymin
),
(
xmin
,
ymax
),
(
xmax
,
ymax
),
(
xmax
,
ymin
),
(
xmin
,
ymin
)],
width
=
2
,
fill
=
'green'
)
# draw label
text
=
str
(
gt_class
[
i
][
0
])
tw
,
th
=
draw
.
textsize
(
text
)
draw
.
rectangle
(
[(
xmin
+
1
,
ymin
-
th
),
(
xmin
+
tw
+
1
,
ymin
)],
fill
=
'green'
)
draw
.
text
((
xmin
+
1
,
ymin
-
th
),
text
,
fill
=
(
255
,
255
,
255
))
if
'gt_keypoint'
in
sample
.
keys
():
gt_keypoint
=
sample
[
'gt_keypoint'
]
if
self
.
is_normalized
:
for
i
in
range
(
gt_keypoint
.
shape
[
1
]):
if
i
%
2
:
gt_keypoint
[:,
i
]
=
gt_keypoint
[:,
i
]
*
height
else
:
gt_keypoint
[:,
i
]
=
gt_keypoint
[:,
i
]
*
width
for
i
in
range
(
gt_keypoint
.
shape
[
0
]):
keypoint
=
gt_keypoint
[
i
]
for
j
in
range
(
int
(
keypoint
.
shape
[
0
]
/
2
)):
x1
=
round
(
keypoint
[
2
*
j
]).
astype
(
np
.
int32
)
y1
=
round
(
keypoint
[
2
*
j
+
1
]).
astype
(
np
.
int32
)
draw
.
ellipse
(
(
x1
,
y1
,
x1
+
5
,
y1
+
5
),
fill
=
'green'
,
outline
=
'green'
)
save_path
=
os
.
path
.
join
(
self
.
output_dir
,
out_file_name
)
image
.
save
(
save_path
,
quality
=
95
)
return
sample
@
register_op
class
Pad
(
BaseOperator
):
def
__init__
(
self
,
size
=
None
,
size_divisor
=
32
,
pad_mode
=
0
,
offsets
=
None
,
fill_value
=
(
127.5
,
127.5
,
127.5
)):
"""
Pad image to a specified size or multiple of size_divisor. random target_size and interpolation method
Args:
size (int, Sequence): image target size, if None, pad to multiple of size_divisor, default None
size_divisor (int): size divisor, default 32
pad_mode (int): pad mode, currently only supports four modes [-1, 0, 1, 2]. if -1, use specified offsets
if 0, only pad to right and bottom. if 1, pad according to center. if 2, only pad left and top
fill_value (bool): rgb value of pad area, default (127.5, 127.5, 127.5)
"""
super
(
Pad
,
self
).
__init__
()
if
not
isinstance
(
size
,
(
int
,
Sequence
)):
raise
TypeError
(
"Type of target_size is invalid when random_size is True.
\
Must be List, now is {}"
.
format
(
type
(
size
)))
if
isinstance
(
size
,
int
):
size
=
[
size
,
size
]
assert
pad_mode
in
[
-
1
,
0
,
1
,
2
],
'currently only supports four modes [-1, 0, 1, 2]'
assert
pad_mode
==
-
1
and
offsets
,
'if pad_mode is -1, offsets should not be None'
self
.
size
=
size
self
.
size_divisor
=
size_divisor
self
.
pad_mode
=
pad_mode
self
.
fill_value
=
fill_value
self
.
offsets
=
offsets
def
apply_segm
(
self
,
segms
,
offsets
,
im_size
,
size
):
def
_expand_poly
(
poly
,
x
,
y
):
expanded_poly
=
np
.
array
(
poly
)
expanded_poly
[
0
::
2
]
+=
x
expanded_poly
[
1
::
2
]
+=
y
return
expanded_poly
.
tolist
()
def
_expand_rle
(
rle
,
x
,
y
,
height
,
width
,
h
,
w
):
if
'counts'
in
rle
and
type
(
rle
[
'counts'
])
==
list
:
rle
=
mask_util
.
frPyObjects
(
rle
,
height
,
width
)
mask
=
mask_util
.
decode
(
rle
)
expanded_mask
=
np
.
full
((
h
,
w
),
0
).
astype
(
mask
.
dtype
)
expanded_mask
[
y
:
y
+
height
,
x
:
x
+
width
]
=
mask
rle
=
mask_util
.
encode
(
np
.
array
(
expanded_mask
,
order
=
'F'
,
dtype
=
np
.
uint8
))
return
rle
x
,
y
=
offsets
height
,
width
=
im_size
h
,
w
=
size
expanded_segms
=
[]
for
segm
in
segms
:
if
is_poly
(
segm
):
# Polygon format
expanded_segms
.
append
(
[
_expand_poly
(
poly
,
x
,
y
)
for
poly
in
segm
])
else
:
# RLE format
import
pycocotools.mask
as
mask_util
expanded_segms
.
append
(
_expand_rle
(
segm
,
x
,
y
,
height
,
width
,
h
,
w
))
return
expanded_segms
def
apply_bbox
(
self
,
bbox
,
offsets
):
return
bbox
+
np
.
array
(
offsets
*
2
,
dtype
=
np
.
float32
)
def
apply_keypoint
(
self
,
keypoints
,
offsets
):
n
=
len
(
keypoints
[
0
])
//
2
return
keypoints
+
np
.
array
(
offsets
*
n
,
dtype
=
np
.
float32
)
def
apply_image
(
self
,
image
,
offsets
,
im_size
,
size
):
x
,
y
=
offsets
im_h
,
im_w
=
im_size
h
,
w
=
size
canvas
=
np
.
ones
((
h
,
w
,
3
),
dtype
=
np
.
float32
)
canvas
*=
np
.
array
(
self
.
fill_value
,
dtype
=
np
.
float32
)
canvas
[
y
:
y
+
im_h
,
x
:
x
+
im_w
,
:]
=
image
.
astype
(
np
.
float32
)
return
canvas
def
apply
(
self
,
sample
,
context
=
None
):
im
=
sample
[
'image'
]
im_h
,
im_w
=
im
.
shape
[:
2
]
if
self
.
size
:
h
,
w
=
self
.
size
assert
(
im_h
<
h
and
im_w
<
w
),
'(h, w) of target size should be greater than (im_h, im_w)'
else
:
h
=
np
.
ceil
(
im_h
//
self
.
size_divisor
)
*
self
.
size_divisor
w
=
np
.
ceil
(
im_w
/
self
.
size_divisor
)
*
self
.
size_divisor
if
h
==
im_h
and
w
==
im_w
:
return
sample
if
self
.
pad_mode
==
-
1
:
offset_x
,
offset_y
=
self
.
offsets
elif
self
.
pad_mode
==
0
:
offset_y
,
offset_x
=
0
,
0
elif
self
.
pad_mode
==
1
:
offset_y
,
offset_x
=
(
h
-
im_h
)
//
2
,
(
w
-
im_w
)
//
2
else
:
offset_y
,
offset_x
=
h
-
im_h
,
w
-
im_w
offsets
,
im_size
,
size
=
[
offset_x
,
offset_y
],
[
im_h
,
im_w
],
[
h
,
w
]
sample
[
'image'
]
=
self
.
apply_image
(
im
,
offsets
,
im_size
,
size
)
if
self
.
pad_mode
==
0
:
return
sample
if
'gt_bbox'
in
sample
and
len
(
sample
[
'gt_bbox'
])
>
0
:
sample
[
'gt_bbox'
]
=
self
.
apply_bbox
(
sample
[
'gt_bbox'
],
offsets
)
if
'gt_poly'
in
sample
and
len
(
sample
[
'gt_poly'
])
>
0
:
sample
[
'gt_poly'
]
=
self
.
apply_segm
(
sample
[
'gt_poly'
],
offsets
,
im_size
,
size
)
if
'gt_keypoint'
in
sample
and
len
(
sample
[
'gt_keypoint'
])
>
0
:
sample
[
'gt_keypoint'
]
=
self
.
apply_keypoint
(
sample
[
'gt_keypoint'
],
offsets
)
return
sample
@
register_op
class
Poly2Mask
(
BaseOperator
):
"""
gt poly to mask annotations
"""
def
__init__
(
self
):
super
(
Poly2Mask
,
self
).
__init__
()
import
pycocotools.mask
as
maskUtils
self
.
maskutils
=
maskUtils
def
_poly2mask
(
self
,
mask_ann
,
img_h
,
img_w
):
if
isinstance
(
mask_ann
,
list
):
# polygon -- a single object might consist of multiple parts
# we merge all parts into one mask rle code
rles
=
self
.
maskutils
.
frPyObjects
(
mask_ann
,
img_h
,
img_w
)
rle
=
self
.
maskutils
.
merge
(
rles
)
elif
isinstance
(
mask_ann
[
'counts'
],
list
):
# uncompressed RLE
rle
=
self
.
maskutils
.
frPyObjects
(
mask_ann
,
img_h
,
img_w
)
else
:
# rle
rle
=
mask_ann
mask
=
self
.
maskutils
.
decode
(
rle
)
return
mask
def
apply
(
self
,
sample
,
context
=
None
):
assert
'gt_poly'
in
sample
im_h
=
sample
[
'h'
]
im_w
=
sample
[
'w'
]
masks
=
[
self
.
_poly2mask
(
gt_poly
,
im_h
,
im_w
)
for
gt_poly
in
sample
[
'gt_poly'
]
]
sample
[
'gt_segm'
]
=
np
.
asarray
(
masks
).
astype
(
np
.
uint8
)
return
sample
dygraph/ppdet/data/transform/operators.py
浏览文件 @
e527466d
# Copyright (c) 20
19
PaddlePaddle Authors. All Rights Reserved.
# Copyright (c) 20
20
PaddlePaddle Authors. All Rights Reserved.
#
#
# Licensed under the Apache License, Version 2.0 (the "License");
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# you may not use this file except in compliance with the License.
...
@@ -25,20 +25,20 @@ try:
...
@@ -25,20 +25,20 @@ try:
except
Exception
:
except
Exception
:
from
collections
import
Sequence
from
collections
import
Sequence
from
numbers
import
Number
from
numbers
import
Number
,
Integral
import
uuid
import
uuid
import
random
import
random
import
math
import
math
import
numpy
as
np
import
numpy
as
np
import
os
import
os
import
copy
import
cv2
import
cv2
from
PIL
import
Image
,
ImageEnhance
,
ImageDraw
from
PIL
import
Image
,
ImageEnhance
,
ImageDraw
from
ppdet.core.workspace
import
serializable
from
ppdet.core.workspace
import
serializable
from
ppdet.modeling.layers
import
AnchorGrid
from
ppdet.modeling.layers
import
AnchorGrid
from
.operator
import
register_op
,
BaseOperator
,
BboxError
,
ImageError
from
.op_helper
import
(
satisfy_sample_constraint
,
filter_and_process
,
from
.op_helper
import
(
satisfy_sample_constraint
,
filter_and_process
,
generate_sample_bbox
,
clip_bbox
,
data_anchor_sampling
,
generate_sample_bbox
,
clip_bbox
,
data_anchor_sampling
,
...
@@ -49,40 +49,81 @@ from .op_helper import (satisfy_sample_constraint, filter_and_process,
...
@@ -49,40 +49,81 @@ from .op_helper import (satisfy_sample_constraint, filter_and_process,
from
ppdet.utils.logger
import
setup_logger
from
ppdet.utils.logger
import
setup_logger
logger
=
setup_logger
(
__name__
)
logger
=
setup_logger
(
__name__
)
registered_ops
=
[]
@
register_op
class
DecodeImage
(
BaseOperator
):
def
register_op
(
cls
):
def
__init__
(
self
,
to_rgb
=
True
,
with_mixup
=
False
,
with_cutmix
=
False
):
registered_ops
.
append
(
cls
.
__name__
)
""" Transform the image data to numpy format.
if
not
hasattr
(
BaseOperator
,
cls
.
__name__
):
setattr
(
BaseOperator
,
cls
.
__name__
,
cls
)
else
:
raise
KeyError
(
"The {} class has been registered."
.
format
(
cls
.
__name__
))
return
serializable
(
cls
)
class
BboxError
(
ValueError
):
pass
class
ImageError
(
ValueError
):
pass
class
BaseOperator
(
object
):
def
__init__
(
self
,
name
=
None
):
if
name
is
None
:
name
=
self
.
__class__
.
__name__
self
.
_id
=
name
+
'_'
+
str
(
uuid
.
uuid4
())[
-
6
:]
def
apply
(
self
,
sample
,
context
=
None
):
""" Process a sample.
Args:
Args:
to_rgb (bool): whether to convert BGR to RGB
sample (dict): a dict of sample, eg: {'image':xx, 'label': xxx}
with_mixup (bool): whether or not to mixup image and gt_bbbox/gt_score
context (dict): info about this sample processing
with_cutmix (bool): whether or not to cutmix image and gt_bbbox/gt_score
Returns:
result (dict): a processed sample
"""
"""
return
sample
super
(
DecodeImage
,
self
).
__init__
()
def
__call__
(
self
,
sample
,
context
=
None
):
self
.
to_rgb
=
to_rgb
""" Process a sample.
self
.
with_mixup
=
with_mixup
Args:
self
.
with_cutmix
=
with_cutmix
sample (dict): a dict of sample, eg: {'image':xx, 'label': xxx}
if
not
isinstance
(
self
.
to_rgb
,
bool
):
context (dict): info about this sample processing
raise
TypeError
(
"{}: input type is invalid."
.
format
(
self
))
Returns:
if
not
isinstance
(
self
.
with_mixup
,
bool
):
result (dict): a processed sample
raise
TypeError
(
"{}: input type is invalid."
.
format
(
self
))
"""
if
not
isinstance
(
self
.
with_cutmix
,
bool
):
if
isinstance
(
sample
,
Sequence
):
raise
TypeError
(
"{}: input type is invalid."
.
format
(
self
))
for
i
in
range
(
len
(
sample
)):
sample
[
i
]
=
self
.
apply
(
sample
[
i
],
context
)
else
:
sample
=
self
.
apply
(
sample
,
context
)
return
sample
def
__str__
(
self
):
return
str
(
self
.
_id
)
@
register_op
class
Decode
(
BaseOperator
):
def
__init__
(
self
):
""" Transform the image data to numpy format following the rgb format
"""
super
(
Decode
,
self
).
__init__
()
def
__call__
(
self
,
sampl
e
):
def
apply
(
self
,
sample
,
context
=
Non
e
):
""" load image if 'im_file' field is not empty but 'image' is"""
""" load image if 'im_file' field is not empty but 'image' is"""
if
'image'
not
in
sample
:
if
'image'
not
in
sample
:
with
open
(
sample
[
'im_file'
],
'rb'
)
as
f
:
with
open
(
sample
[
'im_file'
],
'rb'
)
as
f
:
sample
[
'image'
]
=
f
.
read
()
sample
[
'image'
]
=
f
.
read
()
sample
.
pop
(
'im_file'
)
im
=
sample
[
'image'
]
im
=
sample
[
'image'
]
data
=
np
.
frombuffer
(
im
,
dtype
=
'uint8'
)
data
=
np
.
frombuffer
(
im
,
dtype
=
'uint8'
)
im
=
cv2
.
imdecode
(
data
,
1
)
# BGR mode, but need RGB mode
im
=
cv2
.
imdecode
(
data
,
1
)
# BGR mode, but need RGB mode
if
self
.
to_rgb
:
im
=
cv2
.
cvtColor
(
im
,
cv2
.
COLOR_BGR2RGB
)
im
=
cv2
.
cvtColor
(
im
,
cv2
.
COLOR_BGR2RGB
)
sample
[
'image'
]
=
im
sample
[
'image'
]
=
im
if
'h'
not
in
sample
:
if
'h'
not
in
sample
:
sample
[
'h'
]
=
im
.
shape
[
0
]
sample
[
'h'
]
=
im
.
shape
[
0
]
...
@@ -101,353 +142,66 @@ class DecodeImage(BaseOperator):
...
@@ -101,353 +142,66 @@ class DecodeImage(BaseOperator):
"image width."
.
format
(
im
.
shape
[
1
],
sample
[
'w'
]))
"image width."
.
format
(
im
.
shape
[
1
],
sample
[
'w'
]))
sample
[
'w'
]
=
im
.
shape
[
1
]
sample
[
'w'
]
=
im
.
shape
[
1
]
# make default im_info with [h, w, 1]
sample
[
'im_shape'
]
=
np
.
array
(
im
.
shape
[:
2
],
dtype
=
np
.
float32
)
sample
[
'im_info'
]
=
np
.
array
(
sample
[
'scale_factor'
]
=
np
.
array
([
1.
,
1.
],
dtype
=
np
.
float32
)
[
im
.
shape
[
0
],
im
.
shape
[
1
],
1.
],
dtype
=
np
.
float32
)
# decode mixup image
if
self
.
with_mixup
and
'mixup'
in
sample
:
self
.
__call__
(
sample
[
'mixup'
])
# decode cutmix image
if
self
.
with_cutmix
and
'cutmix'
in
sample
:
self
.
__call__
(
sample
[
'cutmix'
])
return
sample
@
register_op
class
MultiscaleTestResize
(
BaseOperator
):
def
__init__
(
self
,
origin_target_size
=
800
,
origin_max_size
=
1333
,
target_size
=
[],
max_size
=
2000
,
interp
=
cv2
.
INTER_LINEAR
,
use_flip
=
True
):
"""
Rescale image to the each size in target size, and capped at max_size.
Args:
origin_target_size(int): original target size of image's short side.
origin_max_size(int): original max size of image.
target_size (list): A list of target sizes of image's short side.
max_size (int): the max size of image.
interp (int): the interpolation method.
use_flip (bool): whether use flip augmentation.
"""
super
(
MultiscaleTestResize
,
self
).
__init__
()
self
.
origin_target_size
=
int
(
origin_target_size
)
self
.
origin_max_size
=
int
(
origin_max_size
)
self
.
max_size
=
int
(
max_size
)
self
.
interp
=
int
(
interp
)
self
.
use_flip
=
use_flip
if
not
isinstance
(
target_size
,
list
):
raise
TypeError
(
"Type of target_size is invalid. Must be List, now is {}"
.
format
(
type
(
target_size
)))
self
.
target_size
=
target_size
if
not
(
isinstance
(
self
.
origin_target_size
,
int
)
and
isinstance
(
self
.
origin_max_size
,
int
)
and
isinstance
(
self
.
max_size
,
int
)
and
isinstance
(
self
.
interp
,
int
)):
raise
TypeError
(
"{}: input type is invalid."
.
format
(
self
))
def
__call__
(
self
,
sample
):
""" Resize the image numpy for multi-scale test.
"""
origin_ims
=
{}
im
=
sample
[
'image'
]
if
not
isinstance
(
im
,
np
.
ndarray
):
raise
TypeError
(
"{}: image type is not numpy."
.
format
(
self
))
if
len
(
im
.
shape
)
!=
3
:
raise
ImageError
(
'{}: image is not 3-dimensional.'
.
format
(
self
))
im_shape
=
im
.
shape
im_size_min
=
np
.
min
(
im_shape
[
0
:
2
])
im_size_max
=
np
.
max
(
im_shape
[
0
:
2
])
if
float
(
im_size_min
)
==
0
:
raise
ZeroDivisionError
(
'{}: min size of image is 0'
.
format
(
self
))
base_name_list
=
[
'image'
]
origin_ims
[
'image'
]
=
im
if
self
.
use_flip
:
sample
[
'image_flip'
]
=
im
[:,
::
-
1
,
:]
base_name_list
.
append
(
'image_flip'
)
origin_ims
[
'image_flip'
]
=
sample
[
'image_flip'
]
for
base_name
in
base_name_list
:
im_scale
=
float
(
self
.
origin_target_size
)
/
float
(
im_size_min
)
# Prevent the biggest axis from being more than max_size
if
np
.
round
(
im_scale
*
im_size_max
)
>
self
.
origin_max_size
:
im_scale
=
float
(
self
.
origin_max_size
)
/
float
(
im_size_max
)
im_scale_x
=
im_scale
im_scale_y
=
im_scale
resize_w
=
np
.
round
(
im_scale_x
*
float
(
im_shape
[
1
]))
resize_h
=
np
.
round
(
im_scale_y
*
float
(
im_shape
[
0
]))
im_resize
=
cv2
.
resize
(
origin_ims
[
base_name
],
None
,
None
,
fx
=
im_scale_x
,
fy
=
im_scale_y
,
interpolation
=
self
.
interp
)
sample
[
base_name
]
=
im_resize
info_name
=
'im_info'
if
base_name
==
'image'
else
'im_info_image_flip'
sample
[
base_name
]
=
im_resize
sample
[
info_name
]
=
np
.
array
(
[
resize_h
,
resize_w
,
im_scale
],
dtype
=
np
.
float32
)
for
i
,
size
in
enumerate
(
self
.
target_size
):
im_scale
=
float
(
size
)
/
float
(
im_size_min
)
if
np
.
round
(
im_scale
*
im_size_max
)
>
self
.
max_size
:
im_scale
=
float
(
self
.
max_size
)
/
float
(
im_size_max
)
im_scale_x
=
im_scale
im_scale_y
=
im_scale
resize_w
=
np
.
round
(
im_scale_x
*
float
(
im_shape
[
1
]))
resize_h
=
np
.
round
(
im_scale_y
*
float
(
im_shape
[
0
]))
im_resize
=
cv2
.
resize
(
origin_ims
[
base_name
],
None
,
None
,
fx
=
im_scale_x
,
fy
=
im_scale_y
,
interpolation
=
self
.
interp
)
im_info
=
[
resize_h
,
resize_w
,
im_scale
]
# hard-code here, must be consistent with
# ppdet/modeling/architectures/input_helper.py
name
=
base_name
+
'_scale_'
+
str
(
i
)
info_name
=
'im_info_'
+
name
sample
[
name
]
=
im_resize
sample
[
info_name
]
=
np
.
array
(
[
resize_h
,
resize_w
,
im_scale
],
dtype
=
np
.
float32
)
return
sample
return
sample
@
register_op
@
register_op
class
ResizeImage
(
BaseOperator
):
class
Permute
(
BaseOperator
):
def
__init__
(
self
,
def
__init__
(
self
):
target_size
=
0
,
max_size
=
0
,
interp
=
cv2
.
INTER_LINEAR
,
use_cv2
=
True
):
"""
"""
Rescale image to the specified target size, and capped at max_size
Change the channel to be (C, H, W)
if max_size != 0.
If target_size is list, selected a scale randomly as the specified
target size.
Args:
target_size (int|list): the target size of image's short side,
multi-scale training is adopted when type is list.
max_size (int): the max size of image
interp (int): the interpolation method
use_cv2 (bool): use the cv2 interpolation method or use PIL
interpolation method
"""
"""
super
(
ResizeImage
,
self
).
__init__
()
super
(
Permute
,
self
).
__init__
()
self
.
max_size
=
int
(
max_size
)
self
.
interp
=
int
(
interp
)
self
.
use_cv2
=
use_cv2
if
not
(
isinstance
(
target_size
,
int
)
or
isinstance
(
target_size
,
list
)):
raise
TypeError
(
"Type of target_size is invalid. Must be Integer or List, now is {}"
.
format
(
type
(
target_size
)))
self
.
target_size
=
target_size
if
not
(
isinstance
(
self
.
max_size
,
int
)
and
isinstance
(
self
.
interp
,
int
)):
raise
TypeError
(
"{}: input type is invalid."
.
format
(
self
))
def
__call__
(
self
,
sample
,
context
=
None
):
def
apply
(
self
,
sample
,
context
=
None
):
""" Resize the image numpy.
"""
im
=
sample
[
'image'
]
im
=
sample
[
'image'
]
if
not
isinstance
(
im
,
np
.
ndarray
):
im
=
im
.
transpose
((
2
,
0
,
1
))
raise
TypeError
(
"{}: image type is not numpy."
.
format
(
self
))
if
len
(
im
.
shape
)
!=
3
:
raise
ImageError
(
'{}: image is not 3-dimensional.'
.
format
(
self
))
im_shape
=
im
.
shape
im_size_min
=
np
.
min
(
im_shape
[
0
:
2
])
im_size_max
=
np
.
max
(
im_shape
[
0
:
2
])
if
isinstance
(
self
.
target_size
,
list
):
# Case for multi-scale training
selected_size
=
random
.
choice
(
self
.
target_size
)
else
:
selected_size
=
self
.
target_size
if
float
(
im_size_min
)
==
0
:
raise
ZeroDivisionError
(
'{}: min size of image is 0'
.
format
(
self
))
if
self
.
max_size
!=
0
:
im_scale
=
float
(
selected_size
)
/
float
(
im_size_min
)
# Prevent the biggest axis from being more than max_size
if
np
.
round
(
im_scale
*
im_size_max
)
>
self
.
max_size
:
im_scale
=
float
(
self
.
max_size
)
/
float
(
im_size_max
)
im_scale_x
=
im_scale
im_scale_y
=
im_scale
resize_w
=
im_scale_x
*
float
(
im_shape
[
1
])
resize_h
=
im_scale_y
*
float
(
im_shape
[
0
])
im_info
=
[
resize_h
,
resize_w
,
im_scale
]
if
'im_info'
in
sample
and
sample
[
'im_info'
][
2
]
!=
1.
:
sample
[
'im_info'
]
=
np
.
append
(
list
(
sample
[
'im_info'
]),
im_info
).
astype
(
np
.
float32
)
else
:
sample
[
'im_info'
]
=
np
.
array
(
im_info
).
astype
(
np
.
float32
)
else
:
im_scale_x
=
float
(
selected_size
)
/
float
(
im_shape
[
1
])
im_scale_y
=
float
(
selected_size
)
/
float
(
im_shape
[
0
])
resize_w
=
selected_size
resize_h
=
selected_size
if
self
.
use_cv2
:
im
=
cv2
.
resize
(
im
,
None
,
None
,
fx
=
im_scale_x
,
fy
=
im_scale_y
,
interpolation
=
self
.
interp
)
else
:
if
self
.
max_size
!=
0
:
raise
TypeError
(
'If you set max_size to cap the maximum size of image,'
'please set use_cv2 to True to resize the image.'
)
im
=
im
.
astype
(
'uint8'
)
im
=
Image
.
fromarray
(
im
)
im
=
im
.
resize
((
int
(
resize_w
),
int
(
resize_h
)),
self
.
interp
)
im
=
np
.
array
(
im
)
sample
[
'image'
]
=
im
sample
[
'image'
]
=
im
return
sample
return
sample
@
register_op
@
register_op
class
RandomFlipImage
(
BaseOperator
):
class
Lighting
(
BaseOperator
):
def
__init__
(
self
,
prob
=
0.5
,
is_normalized
=
False
,
is_mask_flip
=
False
):
"""
"""
Lighting the imagen by eigenvalues and eigenvectors
Args:
Args:
prob (float): the probability of flipping image
eigval (list): eigenvalues
is_normalized (bool): whether the bbox scale to [0,1]
eigvec (list): eigenvectors
is_mask_flip (bool): whether flip the segmentation
alphastd (float): random weight of lighting, 0.1 by default
"""
"""
super
(
RandomFlipImage
,
self
).
__init__
()
self
.
prob
=
prob
self
.
is_normalized
=
is_normalized
self
.
is_mask_flip
=
is_mask_flip
if
not
(
isinstance
(
self
.
prob
,
float
)
and
isinstance
(
self
.
is_normalized
,
bool
)
and
isinstance
(
self
.
is_mask_flip
,
bool
)):
raise
TypeError
(
"{}: input type is invalid."
.
format
(
self
))
def
flip_segms
(
self
,
segms
,
height
,
width
):
def
_flip_poly
(
poly
,
width
):
flipped_poly
=
np
.
array
(
poly
)
flipped_poly
[
0
::
2
]
=
width
-
np
.
array
(
poly
[
0
::
2
])
-
1
return
flipped_poly
.
tolist
()
def
_flip_rle
(
rle
,
height
,
width
):
def
__init__
(
self
,
eigval
,
eigvec
,
alphastd
=
0.1
):
if
'counts'
in
rle
and
type
(
rle
[
'counts'
])
==
list
:
super
(
Lighting
,
self
).
__init__
()
rle
=
mask_util
.
frPyObjects
(
rle
,
height
,
width
)
self
.
alphastd
=
alphastd
mask
=
mask_util
.
decode
(
rle
)
self
.
eigval
=
np
.
array
(
eigval
).
astype
(
'float32'
)
mask
=
mask
[:,
::
-
1
]
self
.
eigvec
=
np
.
array
(
eigvec
).
astype
(
'float32'
)
rle
=
mask_util
.
encode
(
np
.
array
(
mask
,
order
=
'F'
,
dtype
=
np
.
uint8
))
return
rle
flipped_segms
=
[]
for
segm
in
segms
:
if
is_poly
(
segm
):
# Polygon format
flipped_segms
.
append
([
_flip_poly
(
poly
,
width
)
for
poly
in
segm
])
else
:
# RLE format
import
pycocotools.mask
as
mask_util
flipped_segms
.
append
(
_flip_rle
(
segm
,
height
,
width
))
return
flipped_segms
def
flip_keypoint
(
self
,
gt_keypoint
,
width
):
for
i
in
range
(
gt_keypoint
.
shape
[
1
]):
if
i
%
2
==
0
:
old_x
=
gt_keypoint
[:,
i
].
copy
()
if
self
.
is_normalized
:
gt_keypoint
[:,
i
]
=
1
-
old_x
else
:
gt_keypoint
[:,
i
]
=
width
-
old_x
-
1
return
gt_keypoint
def
__call__
(
self
,
sample
):
"""Filp the image and bounding box.
Operators:
1. Flip the image numpy.
2. Transform the bboxes' x coordinates.
(Must judge whether the coordinates are normalized!)
3. Transform the segmentations' x coordinates.
(Must judge whether the coordinates are normalized!)
Output:
sample: the image, bounding box and segmentation part
in sample are flipped.
"""
samples
=
sample
def
apply
(
self
,
sample
,
context
=
None
):
batch_input
=
True
alpha
=
np
.
random
.
normal
(
scale
=
self
.
alphastd
,
size
=
(
3
,
))
if
not
isinstance
(
samples
,
Sequence
):
sample
[
'image'
]
+=
np
.
dot
(
self
.
eigvec
,
self
.
eigval
*
alpha
)
batch_input
=
False
samples
=
[
samples
]
for
sample
in
samples
:
gt_bbox
=
sample
[
'gt_bbox'
]
im
=
sample
[
'image'
]
if
not
isinstance
(
im
,
np
.
ndarray
):
raise
TypeError
(
"{}: image is not a numpy array."
.
format
(
self
))
if
len
(
im
.
shape
)
!=
3
:
raise
ImageError
(
"{}: image is not 3-dimensional."
.
format
(
self
))
height
,
width
,
_
=
im
.
shape
if
np
.
random
.
uniform
(
0
,
1
)
<
self
.
prob
:
im
=
im
[:,
::
-
1
,
:]
if
gt_bbox
.
shape
[
0
]
==
0
:
return
sample
oldx1
=
gt_bbox
[:,
0
].
copy
()
oldx2
=
gt_bbox
[:,
2
].
copy
()
if
self
.
is_normalized
:
gt_bbox
[:,
0
]
=
1
-
oldx2
gt_bbox
[:,
2
]
=
1
-
oldx1
else
:
gt_bbox
[:,
0
]
=
width
-
oldx2
-
1
gt_bbox
[:,
2
]
=
width
-
oldx1
-
1
if
gt_bbox
.
shape
[
0
]
!=
0
and
(
gt_bbox
[:,
2
]
<
gt_bbox
[:,
0
]).
all
():
m
=
"{}: invalid box, x2 should be greater than x1"
.
format
(
self
)
raise
BboxError
(
m
)
sample
[
'gt_bbox'
]
=
gt_bbox
if
self
.
is_mask_flip
and
len
(
sample
[
'gt_poly'
])
!=
0
:
sample
[
'gt_poly'
]
=
self
.
flip_segms
(
sample
[
'gt_poly'
],
height
,
width
)
if
'gt_keypoint'
in
sample
.
keys
():
sample
[
'gt_keypoint'
]
=
self
.
flip_keypoint
(
sample
[
'gt_keypoint'
],
width
)
sample
[
'flipped'
]
=
True
sample
[
'image'
]
=
im
sample
=
samples
if
batch_input
else
samples
[
0
]
return
sample
return
sample
@
register_op
@
register_op
class
RandomErasingImage
(
BaseOperator
):
class
RandomErasingImage
(
BaseOperator
):
def
__init__
(
self
,
prob
=
0.5
,
sl
=
0.02
,
sh
=
0.4
,
r1
=
0.3
):
def
__init__
(
self
,
prob
=
0.5
,
lower
=
0.02
,
higher
=
0.4
,
aspect_ratio
=
0.3
):
"""
"""
Random Erasing Data Augmentation, see https://arxiv.org/abs/1708.04896
Random Erasing Data Augmentation, see https://arxiv.org/abs/1708.04896
Args:
Args:
prob (float): probability to carry out random erasing
prob (float): probability to carry out random erasing
sl
(float): lower limit of the erasing area ratio
lower
(float): lower limit of the erasing area ratio
sh
(float): upper limit of the erasing area ratio
heigher
(float): upper limit of the erasing area ratio
r1
(float): aspect ratio of the erasing region
aspect_ratio
(float): aspect ratio of the erasing region
"""
"""
super
(
RandomErasingImage
,
self
).
__init__
()
super
(
RandomErasingImage
,
self
).
__init__
()
self
.
prob
=
prob
self
.
prob
=
prob
self
.
sl
=
sl
self
.
lower
=
lower
self
.
sh
=
sh
self
.
heigher
=
heigher
self
.
r1
=
r1
self
.
aspect_ratio
=
aspect_ratio
def
__call__
(
self
,
sample
):
def
apply
(
self
,
sample
):
samples
=
sample
batch_input
=
True
if
not
isinstance
(
samples
,
Sequence
):
batch_input
=
False
samples
=
[
samples
]
for
sample
in
samples
:
gt_bbox
=
sample
[
'gt_bbox'
]
gt_bbox
=
sample
[
'gt_bbox'
]
im
=
sample
[
'image'
]
im
=
sample
[
'image'
]
if
not
isinstance
(
im
,
np
.
ndarray
):
if
not
isinstance
(
im
,
np
.
ndarray
):
...
@@ -464,8 +218,9 @@ class RandomErasingImage(BaseOperator):
...
@@ -464,8 +218,9 @@ class RandomErasingImage(BaseOperator):
h_bbox
=
y2
-
y1
+
1
h_bbox
=
y2
-
y1
+
1
area
=
w_bbox
*
h_bbox
area
=
w_bbox
*
h_bbox
target_area
=
random
.
uniform
(
self
.
sl
,
self
.
sh
)
*
area
target_area
=
random
.
uniform
(
self
.
lower
,
self
.
higher
)
*
area
aspect_ratio
=
random
.
uniform
(
self
.
r1
,
1
/
self
.
r1
)
aspect_ratio
=
random
.
uniform
(
self
.
aspect_ratio
,
1
/
self
.
aspect_ratio
)
h
=
int
(
round
(
math
.
sqrt
(
target_area
*
aspect_ratio
)))
h
=
int
(
round
(
math
.
sqrt
(
target_area
*
aspect_ratio
)))
w
=
int
(
round
(
math
.
sqrt
(
target_area
/
aspect_ratio
)))
w
=
int
(
round
(
math
.
sqrt
(
target_area
/
aspect_ratio
)))
...
@@ -473,16 +228,55 @@ class RandomErasingImage(BaseOperator):
...
@@ -473,16 +228,55 @@ class RandomErasingImage(BaseOperator):
if
w
<
w_bbox
and
h
<
h_bbox
:
if
w
<
w_bbox
and
h
<
h_bbox
:
off_y1
=
random
.
randint
(
0
,
int
(
h_bbox
-
h
))
off_y1
=
random
.
randint
(
0
,
int
(
h_bbox
-
h
))
off_x1
=
random
.
randint
(
0
,
int
(
w_bbox
-
w
))
off_x1
=
random
.
randint
(
0
,
int
(
w_bbox
-
w
))
im
[
int
(
y1
+
off_y1
):
int
(
y1
+
off_y1
+
h
),
int
(
x1
+
off_x1
):
im
[
int
(
y1
+
off_y1
):
int
(
y1
+
off_y1
+
h
),
int
(
x1
+
off_x1
):
int
(
int
(
x1
+
off_x1
+
w
),
:]
=
0
x1
+
off_x1
+
w
),
:]
=
0
sample
[
'image'
]
=
im
sample
[
'image'
]
=
im
return
sample
sample
=
samples
if
batch_input
else
samples
[
0
]
@
register_op
class
NormalizeImage
(
BaseOperator
):
def
__init__
(
self
,
mean
=
[
0.485
,
0.456
,
0.406
],
std
=
[
1
,
1
,
1
],
is_scale
=
True
):
"""
Args:
mean (list): the pixel mean
std (list): the pixel variance
"""
super
(
NormalizeImage
,
self
).
__init__
()
self
.
mean
=
mean
self
.
std
=
std
self
.
is_scale
=
is_scale
if
not
(
isinstance
(
self
.
mean
,
list
)
and
isinstance
(
self
.
std
,
list
)
and
isinstance
(
self
.
is_scale
,
bool
)):
raise
TypeError
(
"{}: input type is invalid."
.
format
(
self
))
from
functools
import
reduce
if
reduce
(
lambda
x
,
y
:
x
*
y
,
self
.
std
)
==
0
:
raise
ValueError
(
'{}: std is invalid!'
.
format
(
self
))
def
apply
(
self
,
sample
,
context
=
None
):
"""Normalize the image.
Operators:
1.(optional) Scale the image to [0,1]
2. Each pixel minus mean and is divided by std
"""
im
=
sample
[
'image'
]
im
=
im
.
astype
(
np
.
float32
,
copy
=
False
)
mean
=
np
.
array
(
self
.
mean
)[
np
.
newaxis
,
np
.
newaxis
,
:]
std
=
np
.
array
(
self
.
std
)[
np
.
newaxis
,
np
.
newaxis
,
:]
if
self
.
is_scale
:
im
=
im
/
255.0
im
-=
mean
im
/=
std
sample
[
'image'
]
=
im
return
sample
return
sample
@
register_op
@
register_op
class
GridMask
Op
(
BaseOperator
):
class
GridMask
(
BaseOperator
):
def
__init__
(
self
,
def
__init__
(
self
,
use_h
=
True
,
use_h
=
True
,
use_w
=
True
,
use_w
=
True
,
...
@@ -504,7 +298,7 @@ class GridMaskOp(BaseOperator):
...
@@ -504,7 +298,7 @@ class GridMaskOp(BaseOperator):
prob (float): max probability to carry out gridmask
prob (float): max probability to carry out gridmask
upper_iter (int): suggested to be equal to global max_iter
upper_iter (int): suggested to be equal to global max_iter
"""
"""
super
(
GridMask
Op
,
self
).
__init__
()
super
(
GridMask
,
self
).
__init__
()
self
.
use_h
=
use_h
self
.
use_h
=
use_h
self
.
use_w
=
use_w
self
.
use_w
=
use_w
self
.
rotate
=
rotate
self
.
rotate
=
rotate
...
@@ -525,880 +319,23 @@ class GridMaskOp(BaseOperator):
...
@@ -525,880 +319,23 @@ class GridMaskOp(BaseOperator):
prob
=
prob
,
prob
=
prob
,
upper_iter
=
upper_iter
)
upper_iter
=
upper_iter
)
def
__call__
(
self
,
sample
):
def
apply
(
self
,
sample
,
context
=
None
):
samples
=
sample
sample
[
'image'
]
=
self
.
gridmask_op
(
sample
[
'image'
],
sample
[
'curr_iter'
])
batch_input
=
True
if
not
isinstance
(
samples
,
Sequence
):
batch_input
=
False
samples
=
[
samples
]
for
sample
in
samples
:
sample
[
'image'
]
=
self
.
gridmask_op
(
sample
[
'image'
],
sample
[
'curr_iter'
])
if
not
batch_input
:
samples
=
samples
[
0
]
return
sample
return
sample
@
register_op
@
register_op
class
AutoAugmentImage
(
BaseOperator
):
class
RandomDistort
(
BaseOperator
):
def
__init__
(
self
,
is_normalized
=
False
,
autoaug_type
=
"v1"
):
"""Random color distortion.
"""
Args:
Args:
is_normalized (bool): whether the bbox scale to [0,1]
hue (list): hue settings. in [lower, upper, probability] format.
autoaug_type (str): autoaug type, support v0, v1, v2, v3, test
saturation (list): saturation settings. in [lower, upper, probability] format.
"""
contrast (list): contrast settings. in [lower, upper, probability] format.
super
(
AutoAugmentImage
,
self
).
__init__
()
brightness (list): brightness settings. in [lower, upper, probability] format.
self
.
is_normalized
=
is_normalized
random_apply (bool): whether to apply in random (yolo) or fixed (SSD)
self
.
autoaug_type
=
autoaug_type
order.
if
not
isinstance
(
self
.
is_normalized
,
bool
):
count (int): the number of doing distrot
raise
TypeError
(
"{}: input type is invalid."
.
format
(
self
))
random_channel (bool): whether to swap channels randomly
def
__call__
(
self
,
sample
):
"""
Learning Data Augmentation Strategies for Object Detection, see https://arxiv.org/abs/1906.11172
"""
samples
=
sample
batch_input
=
True
if
not
isinstance
(
samples
,
Sequence
):
batch_input
=
False
samples
=
[
samples
]
for
sample
in
samples
:
gt_bbox
=
sample
[
'gt_bbox'
]
im
=
sample
[
'image'
]
if
not
isinstance
(
im
,
np
.
ndarray
):
raise
TypeError
(
"{}: image is not a numpy array."
.
format
(
self
))
if
len
(
im
.
shape
)
!=
3
:
raise
ImageError
(
"{}: image is not 3-dimensional."
.
format
(
self
))
if
len
(
gt_bbox
)
==
0
:
continue
# gt_boxes : [x1, y1, x2, y2]
# norm_gt_boxes: [y1, x1, y2, x2]
height
,
width
,
_
=
im
.
shape
norm_gt_bbox
=
np
.
ones_like
(
gt_bbox
,
dtype
=
np
.
float32
)
if
not
self
.
is_normalized
:
norm_gt_bbox
[:,
0
]
=
gt_bbox
[:,
1
]
/
float
(
height
)
norm_gt_bbox
[:,
1
]
=
gt_bbox
[:,
0
]
/
float
(
width
)
norm_gt_bbox
[:,
2
]
=
gt_bbox
[:,
3
]
/
float
(
height
)
norm_gt_bbox
[:,
3
]
=
gt_bbox
[:,
2
]
/
float
(
width
)
else
:
norm_gt_bbox
[:,
0
]
=
gt_bbox
[:,
1
]
norm_gt_bbox
[:,
1
]
=
gt_bbox
[:,
0
]
norm_gt_bbox
[:,
2
]
=
gt_bbox
[:,
3
]
norm_gt_bbox
[:,
3
]
=
gt_bbox
[:,
2
]
from
.autoaugment_utils
import
distort_image_with_autoaugment
im
,
norm_gt_bbox
=
distort_image_with_autoaugment
(
im
,
norm_gt_bbox
,
self
.
autoaug_type
)
if
not
self
.
is_normalized
:
gt_bbox
[:,
0
]
=
norm_gt_bbox
[:,
1
]
*
float
(
width
)
gt_bbox
[:,
1
]
=
norm_gt_bbox
[:,
0
]
*
float
(
height
)
gt_bbox
[:,
2
]
=
norm_gt_bbox
[:,
3
]
*
float
(
width
)
gt_bbox
[:,
3
]
=
norm_gt_bbox
[:,
2
]
*
float
(
height
)
else
:
gt_bbox
[:,
0
]
=
norm_gt_bbox
[:,
1
]
gt_bbox
[:,
1
]
=
norm_gt_bbox
[:,
0
]
gt_bbox
[:,
2
]
=
norm_gt_bbox
[:,
3
]
gt_bbox
[:,
3
]
=
norm_gt_bbox
[:,
2
]
sample
[
'gt_bbox'
]
=
gt_bbox
sample
[
'image'
]
=
im
sample
=
samples
if
batch_input
else
samples
[
0
]
return
sample
@
register_op
class
NormalizeImage
(
BaseOperator
):
def
__init__
(
self
,
mean
=
[
0.485
,
0.456
,
0.406
],
std
=
[
1
,
1
,
1
],
is_scale
=
True
,
is_channel_first
=
True
):
"""
Args:
mean (list): the pixel mean
std (list): the pixel variance
"""
super
(
NormalizeImage
,
self
).
__init__
()
self
.
mean
=
mean
self
.
std
=
std
self
.
is_scale
=
is_scale
self
.
is_channel_first
=
is_channel_first
if
not
(
isinstance
(
self
.
mean
,
list
)
and
isinstance
(
self
.
std
,
list
)
and
isinstance
(
self
.
is_scale
,
bool
)):
raise
TypeError
(
"{}: input type is invalid."
.
format
(
self
))
from
functools
import
reduce
if
reduce
(
lambda
x
,
y
:
x
*
y
,
self
.
std
)
==
0
:
raise
ValueError
(
'{}: std is invalid!'
.
format
(
self
))
def
__call__
(
self
,
sample
):
"""Normalize the image.
Operators:
1.(optional) Scale the image to [0,1]
2. Each pixel minus mean and is divided by std
"""
samples
=
sample
batch_input
=
True
if
not
isinstance
(
samples
,
Sequence
):
batch_input
=
False
samples
=
[
samples
]
for
sample
in
samples
:
for
k
in
sample
.
keys
():
# hard code
if
k
.
startswith
(
'image'
):
im
=
sample
[
k
]
im
=
im
.
astype
(
np
.
float32
,
copy
=
False
)
if
self
.
is_channel_first
:
mean
=
np
.
array
(
self
.
mean
)[:,
np
.
newaxis
,
np
.
newaxis
]
std
=
np
.
array
(
self
.
std
)[:,
np
.
newaxis
,
np
.
newaxis
]
else
:
mean
=
np
.
array
(
self
.
mean
)[
np
.
newaxis
,
np
.
newaxis
,
:]
std
=
np
.
array
(
self
.
std
)[
np
.
newaxis
,
np
.
newaxis
,
:]
if
self
.
is_scale
:
im
=
im
/
255.0
im
-=
mean
im
/=
std
sample
[
k
]
=
im
if
not
batch_input
:
samples
=
samples
[
0
]
return
samples
@
register_op
class
RandomDistort
(
BaseOperator
):
def
__init__
(
self
,
brightness_lower
=
0.5
,
brightness_upper
=
1.5
,
contrast_lower
=
0.5
,
contrast_upper
=
1.5
,
saturation_lower
=
0.5
,
saturation_upper
=
1.5
,
hue_lower
=-
18
,
hue_upper
=
18
,
brightness_prob
=
0.5
,
contrast_prob
=
0.5
,
saturation_prob
=
0.5
,
hue_prob
=
0.5
,
count
=
4
,
is_order
=
False
):
"""
Args:
brightness_lower/ brightness_upper (float): the brightness
between brightness_lower and brightness_upper
contrast_lower/ contrast_upper (float): the contrast between
contrast_lower and contrast_lower
saturation_lower/ saturation_upper (float): the saturation
between saturation_lower and saturation_upper
hue_lower/ hue_upper (float): the hue between
hue_lower and hue_upper
brightness_prob (float): the probability of changing brightness
contrast_prob (float): the probability of changing contrast
saturation_prob (float): the probability of changing saturation
hue_prob (float): the probability of changing hue
count (int): the kinds of doing distrot
is_order (bool): whether determine the order of distortion
"""
super
(
RandomDistort
,
self
).
__init__
()
self
.
brightness_lower
=
brightness_lower
self
.
brightness_upper
=
brightness_upper
self
.
contrast_lower
=
contrast_lower
self
.
contrast_upper
=
contrast_upper
self
.
saturation_lower
=
saturation_lower
self
.
saturation_upper
=
saturation_upper
self
.
hue_lower
=
hue_lower
self
.
hue_upper
=
hue_upper
self
.
brightness_prob
=
brightness_prob
self
.
contrast_prob
=
contrast_prob
self
.
saturation_prob
=
saturation_prob
self
.
hue_prob
=
hue_prob
self
.
count
=
count
self
.
is_order
=
is_order
def
random_brightness
(
self
,
img
):
brightness_delta
=
np
.
random
.
uniform
(
self
.
brightness_lower
,
self
.
brightness_upper
)
prob
=
np
.
random
.
uniform
(
0
,
1
)
if
prob
<
self
.
brightness_prob
:
img
=
ImageEnhance
.
Brightness
(
img
).
enhance
(
brightness_delta
)
return
img
def
random_contrast
(
self
,
img
):
contrast_delta
=
np
.
random
.
uniform
(
self
.
contrast_lower
,
self
.
contrast_upper
)
prob
=
np
.
random
.
uniform
(
0
,
1
)
if
prob
<
self
.
contrast_prob
:
img
=
ImageEnhance
.
Contrast
(
img
).
enhance
(
contrast_delta
)
return
img
def
random_saturation
(
self
,
img
):
saturation_delta
=
np
.
random
.
uniform
(
self
.
saturation_lower
,
self
.
saturation_upper
)
prob
=
np
.
random
.
uniform
(
0
,
1
)
if
prob
<
self
.
saturation_prob
:
img
=
ImageEnhance
.
Color
(
img
).
enhance
(
saturation_delta
)
return
img
def
random_hue
(
self
,
img
):
hue_delta
=
np
.
random
.
uniform
(
self
.
hue_lower
,
self
.
hue_upper
)
prob
=
np
.
random
.
uniform
(
0
,
1
)
if
prob
<
self
.
hue_prob
:
img
=
np
.
array
(
img
.
convert
(
'HSV'
))
img
[:,
:,
0
]
=
img
[:,
:,
0
]
+
hue_delta
img
=
Image
.
fromarray
(
img
,
mode
=
'HSV'
).
convert
(
'RGB'
)
return
img
def
__call__
(
self
,
sample
):
"""random distort the image"""
ops
=
[
self
.
random_brightness
,
self
.
random_contrast
,
self
.
random_saturation
,
self
.
random_hue
]
if
self
.
is_order
:
prob
=
np
.
random
.
uniform
(
0
,
1
)
if
prob
<
0.5
:
ops
=
[
self
.
random_brightness
,
self
.
random_saturation
,
self
.
random_hue
,
self
.
random_contrast
,
]
else
:
ops
=
random
.
sample
(
ops
,
self
.
count
)
assert
'image'
in
sample
,
"image data not found"
im
=
sample
[
'image'
]
im
=
Image
.
fromarray
(
im
)
for
id
in
range
(
self
.
count
):
im
=
ops
[
id
](
im
)
im
=
np
.
asarray
(
im
)
sample
[
'image'
]
=
im
return
sample
@
register_op
class
ExpandImage
(
BaseOperator
):
def
__init__
(
self
,
max_ratio
,
prob
,
mean
=
[
127.5
,
127.5
,
127.5
]):
"""
Args:
max_ratio (float): the ratio of expanding
prob (float): the probability of expanding image
mean (list): the pixel mean
"""
super
(
ExpandImage
,
self
).
__init__
()
self
.
max_ratio
=
max_ratio
self
.
mean
=
mean
self
.
prob
=
prob
def
__call__
(
self
,
sample
):
"""
Expand the image and modify bounding box.
Operators:
1. Scale the image width and height.
2. Construct new images with new height and width.
3. Fill the new image with the mean.
4. Put original imge into new image.
5. Rescale the bounding box.
6. Determine if the new bbox is satisfied in the new image.
Returns:
sample: the image, bounding box are replaced.
"""
prob
=
np
.
random
.
uniform
(
0
,
1
)
assert
'image'
in
sample
,
'not found image data'
im
=
sample
[
'image'
]
gt_bbox
=
sample
[
'gt_bbox'
]
gt_class
=
sample
[
'gt_class'
]
im_width
=
sample
[
'w'
]
im_height
=
sample
[
'h'
]
if
prob
<
self
.
prob
:
if
self
.
max_ratio
-
1
>=
0.01
:
expand_ratio
=
np
.
random
.
uniform
(
1
,
self
.
max_ratio
)
height
=
int
(
im_height
*
expand_ratio
)
width
=
int
(
im_width
*
expand_ratio
)
h_off
=
math
.
floor
(
np
.
random
.
uniform
(
0
,
height
-
im_height
))
w_off
=
math
.
floor
(
np
.
random
.
uniform
(
0
,
width
-
im_width
))
expand_bbox
=
[
-
w_off
/
im_width
,
-
h_off
/
im_height
,
(
width
-
w_off
)
/
im_width
,
(
height
-
h_off
)
/
im_height
]
expand_im
=
np
.
ones
((
height
,
width
,
3
))
expand_im
=
np
.
uint8
(
expand_im
*
np
.
squeeze
(
self
.
mean
))
expand_im
=
Image
.
fromarray
(
expand_im
)
im
=
Image
.
fromarray
(
im
)
expand_im
.
paste
(
im
,
(
int
(
w_off
),
int
(
h_off
)))
expand_im
=
np
.
asarray
(
expand_im
)
if
'gt_keypoint'
in
sample
.
keys
(
)
and
'keypoint_ignore'
in
sample
.
keys
():
keypoints
=
(
sample
[
'gt_keypoint'
],
sample
[
'keypoint_ignore'
])
gt_bbox
,
gt_class
,
_
,
gt_keypoints
=
filter_and_process
(
expand_bbox
,
gt_bbox
,
gt_class
,
keypoints
=
keypoints
)
sample
[
'gt_keypoint'
]
=
gt_keypoints
[
0
]
sample
[
'keypoint_ignore'
]
=
gt_keypoints
[
1
]
else
:
gt_bbox
,
gt_class
,
_
=
filter_and_process
(
expand_bbox
,
gt_bbox
,
gt_class
)
sample
[
'image'
]
=
expand_im
sample
[
'gt_bbox'
]
=
gt_bbox
sample
[
'gt_class'
]
=
gt_class
sample
[
'w'
]
=
width
sample
[
'h'
]
=
height
return
sample
@
register_op
class
CropImage
(
BaseOperator
):
def
__init__
(
self
,
batch_sampler
,
satisfy_all
=
False
,
avoid_no_bbox
=
True
):
"""
Args:
batch_sampler (list): Multiple sets of different
parameters for cropping.
satisfy_all (bool): whether all boxes must satisfy.
e.g.[[1, 1, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0],
[1, 50, 0.3, 1.0, 0.5, 2.0, 0.1, 1.0],
[1, 50, 0.3, 1.0, 0.5, 2.0, 0.3, 1.0],
[1, 50, 0.3, 1.0, 0.5, 2.0, 0.5, 1.0],
[1, 50, 0.3, 1.0, 0.5, 2.0, 0.7, 1.0],
[1, 50, 0.3, 1.0, 0.5, 2.0, 0.9, 1.0],
[1, 50, 0.3, 1.0, 0.5, 2.0, 0.0, 1.0]]
[max sample, max trial, min scale, max scale,
min aspect ratio, max aspect ratio,
min overlap, max overlap]
avoid_no_bbox (bool): whether to to avoid the
situation where the box does not appear.
"""
super
(
CropImage
,
self
).
__init__
()
self
.
batch_sampler
=
batch_sampler
self
.
satisfy_all
=
satisfy_all
self
.
avoid_no_bbox
=
avoid_no_bbox
def
__call__
(
self
,
sample
):
"""
Crop the image and modify bounding box.
Operators:
1. Scale the image width and height.
2. Crop the image according to a radom sample.
3. Rescale the bounding box.
4. Determine if the new bbox is satisfied in the new image.
Returns:
sample: the image, bounding box are replaced.
"""
assert
'image'
in
sample
,
"image data not found"
im
=
sample
[
'image'
]
gt_bbox
=
sample
[
'gt_bbox'
]
gt_class
=
sample
[
'gt_class'
]
im_width
=
sample
[
'w'
]
im_height
=
sample
[
'h'
]
gt_score
=
None
if
'gt_score'
in
sample
:
gt_score
=
sample
[
'gt_score'
]
sampled_bbox
=
[]
gt_bbox
=
gt_bbox
.
tolist
()
for
sampler
in
self
.
batch_sampler
:
found
=
0
for
i
in
range
(
sampler
[
1
]):
if
found
>=
sampler
[
0
]:
break
sample_bbox
=
generate_sample_bbox
(
sampler
)
if
satisfy_sample_constraint
(
sampler
,
sample_bbox
,
gt_bbox
,
self
.
satisfy_all
):
sampled_bbox
.
append
(
sample_bbox
)
found
=
found
+
1
im
=
np
.
array
(
im
)
while
sampled_bbox
:
idx
=
int
(
np
.
random
.
uniform
(
0
,
len
(
sampled_bbox
)))
sample_bbox
=
sampled_bbox
.
pop
(
idx
)
sample_bbox
=
clip_bbox
(
sample_bbox
)
crop_bbox
,
crop_class
,
crop_score
=
\
filter_and_process
(
sample_bbox
,
gt_bbox
,
gt_class
,
scores
=
gt_score
)
if
self
.
avoid_no_bbox
:
if
len
(
crop_bbox
)
<
1
:
continue
xmin
=
int
(
sample_bbox
[
0
]
*
im_width
)
xmax
=
int
(
sample_bbox
[
2
]
*
im_width
)
ymin
=
int
(
sample_bbox
[
1
]
*
im_height
)
ymax
=
int
(
sample_bbox
[
3
]
*
im_height
)
im
=
im
[
ymin
:
ymax
,
xmin
:
xmax
]
sample
[
'image'
]
=
im
sample
[
'gt_bbox'
]
=
crop_bbox
sample
[
'gt_class'
]
=
crop_class
sample
[
'gt_score'
]
=
crop_score
return
sample
return
sample
@
register_op
class
CropImageWithDataAchorSampling
(
BaseOperator
):
def
__init__
(
self
,
batch_sampler
,
anchor_sampler
=
None
,
target_size
=
None
,
das_anchor_scales
=
[
16
,
32
,
64
,
128
],
sampling_prob
=
0.5
,
min_size
=
8.
,
avoid_no_bbox
=
True
):
"""
Args:
anchor_sampler (list): anchor_sampling sets of different
parameters for cropping.
batch_sampler (list): Multiple sets of different
parameters for cropping.
e.g.[[1, 10, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.2, 0.0]]
[[1, 50, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0],
[1, 50, 0.3, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0],
[1, 50, 0.3, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0],
[1, 50, 0.3, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0],
[1, 50, 0.3, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0]]
[max sample, max trial, min scale, max scale,
min aspect ratio, max aspect ratio,
min overlap, max overlap, min coverage, max coverage]
target_size (bool): target image size.
das_anchor_scales (list[float]): a list of anchor scales in data
anchor smapling.
min_size (float): minimum size of sampled bbox.
avoid_no_bbox (bool): whether to to avoid the
situation where the box does not appear.
"""
super
(
CropImageWithDataAchorSampling
,
self
).
__init__
()
self
.
anchor_sampler
=
anchor_sampler
self
.
batch_sampler
=
batch_sampler
self
.
target_size
=
target_size
self
.
sampling_prob
=
sampling_prob
self
.
min_size
=
min_size
self
.
avoid_no_bbox
=
avoid_no_bbox
self
.
das_anchor_scales
=
np
.
array
(
das_anchor_scales
)
def
__call__
(
self
,
sample
):
"""
Crop the image and modify bounding box.
Operators:
1. Scale the image width and height.
2. Crop the image according to a radom sample.
3. Rescale the bounding box.
4. Determine if the new bbox is satisfied in the new image.
Returns:
sample: the image, bounding box are replaced.
"""
assert
'image'
in
sample
,
"image data not found"
im
=
sample
[
'image'
]
gt_bbox
=
sample
[
'gt_bbox'
]
gt_class
=
sample
[
'gt_class'
]
image_width
=
sample
[
'w'
]
image_height
=
sample
[
'h'
]
gt_score
=
None
if
'gt_score'
in
sample
:
gt_score
=
sample
[
'gt_score'
]
sampled_bbox
=
[]
gt_bbox
=
gt_bbox
.
tolist
()
prob
=
np
.
random
.
uniform
(
0.
,
1.
)
if
prob
>
self
.
sampling_prob
:
# anchor sampling
assert
self
.
anchor_sampler
for
sampler
in
self
.
anchor_sampler
:
found
=
0
for
i
in
range
(
sampler
[
1
]):
if
found
>=
sampler
[
0
]:
break
sample_bbox
=
data_anchor_sampling
(
gt_bbox
,
image_width
,
image_height
,
self
.
das_anchor_scales
,
self
.
target_size
)
if
sample_bbox
==
0
:
break
if
satisfy_sample_constraint_coverage
(
sampler
,
sample_bbox
,
gt_bbox
):
sampled_bbox
.
append
(
sample_bbox
)
found
=
found
+
1
im
=
np
.
array
(
im
)
while
sampled_bbox
:
idx
=
int
(
np
.
random
.
uniform
(
0
,
len
(
sampled_bbox
)))
sample_bbox
=
sampled_bbox
.
pop
(
idx
)
if
'gt_keypoint'
in
sample
.
keys
():
keypoints
=
(
sample
[
'gt_keypoint'
],
sample
[
'keypoint_ignore'
])
crop_bbox
,
crop_class
,
crop_score
,
gt_keypoints
=
\
filter_and_process
(
sample_bbox
,
gt_bbox
,
gt_class
,
scores
=
gt_score
,
keypoints
=
keypoints
)
else
:
crop_bbox
,
crop_class
,
crop_score
=
filter_and_process
(
sample_bbox
,
gt_bbox
,
gt_class
,
scores
=
gt_score
)
crop_bbox
,
crop_class
,
crop_score
=
bbox_area_sampling
(
crop_bbox
,
crop_class
,
crop_score
,
self
.
target_size
,
self
.
min_size
)
if
self
.
avoid_no_bbox
:
if
len
(
crop_bbox
)
<
1
:
continue
im
=
crop_image_sampling
(
im
,
sample_bbox
,
image_width
,
image_height
,
self
.
target_size
)
sample
[
'image'
]
=
im
sample
[
'gt_bbox'
]
=
crop_bbox
sample
[
'gt_class'
]
=
crop_class
sample
[
'gt_score'
]
=
crop_score
if
'gt_keypoint'
in
sample
.
keys
():
sample
[
'gt_keypoint'
]
=
gt_keypoints
[
0
]
sample
[
'keypoint_ignore'
]
=
gt_keypoints
[
1
]
return
sample
return
sample
else
:
for
sampler
in
self
.
batch_sampler
:
found
=
0
for
i
in
range
(
sampler
[
1
]):
if
found
>=
sampler
[
0
]:
break
sample_bbox
=
generate_sample_bbox_square
(
sampler
,
image_width
,
image_height
)
if
satisfy_sample_constraint_coverage
(
sampler
,
sample_bbox
,
gt_bbox
):
sampled_bbox
.
append
(
sample_bbox
)
found
=
found
+
1
im
=
np
.
array
(
im
)
while
sampled_bbox
:
idx
=
int
(
np
.
random
.
uniform
(
0
,
len
(
sampled_bbox
)))
sample_bbox
=
sampled_bbox
.
pop
(
idx
)
sample_bbox
=
clip_bbox
(
sample_bbox
)
if
'gt_keypoint'
in
sample
.
keys
():
keypoints
=
(
sample
[
'gt_keypoint'
],
sample
[
'keypoint_ignore'
])
crop_bbox
,
crop_class
,
crop_score
,
gt_keypoints
=
\
filter_and_process
(
sample_bbox
,
gt_bbox
,
gt_class
,
scores
=
gt_score
,
keypoints
=
keypoints
)
else
:
crop_bbox
,
crop_class
,
crop_score
=
filter_and_process
(
sample_bbox
,
gt_bbox
,
gt_class
,
scores
=
gt_score
)
# sampling bbox according the bbox area
crop_bbox
,
crop_class
,
crop_score
=
bbox_area_sampling
(
crop_bbox
,
crop_class
,
crop_score
,
self
.
target_size
,
self
.
min_size
)
if
self
.
avoid_no_bbox
:
if
len
(
crop_bbox
)
<
1
:
continue
xmin
=
int
(
sample_bbox
[
0
]
*
image_width
)
xmax
=
int
(
sample_bbox
[
2
]
*
image_width
)
ymin
=
int
(
sample_bbox
[
1
]
*
image_height
)
ymax
=
int
(
sample_bbox
[
3
]
*
image_height
)
im
=
im
[
ymin
:
ymax
,
xmin
:
xmax
]
sample
[
'image'
]
=
im
sample
[
'gt_bbox'
]
=
crop_bbox
sample
[
'gt_class'
]
=
crop_class
sample
[
'gt_score'
]
=
crop_score
if
'gt_keypoint'
in
sample
.
keys
():
sample
[
'gt_keypoint'
]
=
gt_keypoints
[
0
]
sample
[
'keypoint_ignore'
]
=
gt_keypoints
[
1
]
return
sample
return
sample
@
register_op
class
NormalizeBox
(
BaseOperator
):
"""Transform the bounding box's coornidates to [0,1]."""
def
__init__
(
self
):
super
(
NormalizeBox
,
self
).
__init__
()
def
__call__
(
self
,
sample
):
gt_bbox
=
sample
[
'gt_bbox'
]
width
=
sample
[
'w'
]
height
=
sample
[
'h'
]
for
i
in
range
(
gt_bbox
.
shape
[
0
]):
gt_bbox
[
i
][
0
]
=
gt_bbox
[
i
][
0
]
/
width
gt_bbox
[
i
][
1
]
=
gt_bbox
[
i
][
1
]
/
height
gt_bbox
[
i
][
2
]
=
gt_bbox
[
i
][
2
]
/
width
gt_bbox
[
i
][
3
]
=
gt_bbox
[
i
][
3
]
/
height
sample
[
'gt_bbox'
]
=
gt_bbox
if
'gt_keypoint'
in
sample
.
keys
():
gt_keypoint
=
sample
[
'gt_keypoint'
]
for
i
in
range
(
gt_keypoint
.
shape
[
1
]):
if
i
%
2
:
gt_keypoint
[:,
i
]
=
gt_keypoint
[:,
i
]
/
height
else
:
gt_keypoint
[:,
i
]
=
gt_keypoint
[:,
i
]
/
width
sample
[
'gt_keypoint'
]
=
gt_keypoint
return
sample
@
register_op
class
Permute
(
BaseOperator
):
def
__init__
(
self
,
to_bgr
=
True
,
channel_first
=
True
):
"""
Change the channel.
Args:
to_bgr (bool): confirm whether to convert RGB to BGR
channel_first (bool): confirm whether to change channel
"""
super
(
Permute
,
self
).
__init__
()
self
.
to_bgr
=
to_bgr
self
.
channel_first
=
channel_first
if
not
(
isinstance
(
self
.
to_bgr
,
bool
)
and
isinstance
(
self
.
channel_first
,
bool
)):
raise
TypeError
(
"{}: input type is invalid."
.
format
(
self
))
def
__call__
(
self
,
sample
,
context
=
None
):
samples
=
sample
batch_input
=
True
if
not
isinstance
(
samples
,
Sequence
):
batch_input
=
False
samples
=
[
samples
]
for
sample
in
samples
:
assert
'image'
in
sample
,
"image data not found"
for
k
in
sample
.
keys
():
# hard code
if
k
.
startswith
(
'image'
):
im
=
sample
[
k
]
if
self
.
channel_first
:
im
=
np
.
swapaxes
(
im
,
1
,
2
)
im
=
np
.
swapaxes
(
im
,
1
,
0
)
if
self
.
to_bgr
:
im
=
im
[[
2
,
1
,
0
],
:,
:]
sample
[
k
]
=
im
if
not
batch_input
:
samples
=
samples
[
0
]
return
samples
@
register_op
class
MixupImage
(
BaseOperator
):
def
__init__
(
self
,
alpha
=
1.5
,
beta
=
1.5
):
""" Mixup image and gt_bbbox/gt_score
Args:
alpha (float): alpha parameter of beta distribute
beta (float): beta parameter of beta distribute
"""
super
(
MixupImage
,
self
).
__init__
()
self
.
alpha
=
alpha
self
.
beta
=
beta
if
self
.
alpha
<=
0.0
:
raise
ValueError
(
"alpha shold be positive in {}"
.
format
(
self
))
if
self
.
beta
<=
0.0
:
raise
ValueError
(
"beta shold be positive in {}"
.
format
(
self
))
def
_mixup_img
(
self
,
img1
,
img2
,
factor
):
h
=
max
(
img1
.
shape
[
0
],
img2
.
shape
[
0
])
w
=
max
(
img1
.
shape
[
1
],
img2
.
shape
[
1
])
img
=
np
.
zeros
((
h
,
w
,
img1
.
shape
[
2
]),
'float32'
)
img
[:
img1
.
shape
[
0
],
:
img1
.
shape
[
1
],
:]
=
\
img1
.
astype
(
'float32'
)
*
factor
img
[:
img2
.
shape
[
0
],
:
img2
.
shape
[
1
],
:]
+=
\
img2
.
astype
(
'float32'
)
*
(
1.0
-
factor
)
return
img
.
astype
(
'uint8'
)
def
__call__
(
self
,
sample
,
context
=
None
):
if
'mixup'
not
in
sample
:
return
sample
factor
=
np
.
random
.
beta
(
self
.
alpha
,
self
.
beta
)
factor
=
max
(
0.0
,
min
(
1.0
,
factor
))
if
factor
>=
1.0
:
sample
.
pop
(
'mixup'
)
return
sample
if
factor
<=
0.0
:
return
sample
[
'mixup'
]
im
=
self
.
_mixup_img
(
sample
[
'image'
],
sample
[
'mixup'
][
'image'
],
factor
)
gt_bbox1
=
sample
[
'gt_bbox'
]
gt_bbox2
=
sample
[
'mixup'
][
'gt_bbox'
]
gt_bbox
=
np
.
concatenate
((
gt_bbox1
,
gt_bbox2
),
axis
=
0
)
gt_class1
=
sample
[
'gt_class'
]
gt_class2
=
sample
[
'mixup'
][
'gt_class'
]
gt_class
=
np
.
concatenate
((
gt_class1
,
gt_class2
),
axis
=
0
)
gt_score1
=
sample
[
'gt_score'
]
gt_score2
=
sample
[
'mixup'
][
'gt_score'
]
gt_score
=
np
.
concatenate
(
(
gt_score1
*
factor
,
gt_score2
*
(
1.
-
factor
)),
axis
=
0
)
is_crowd1
=
sample
[
'is_crowd'
]
is_crowd2
=
sample
[
'mixup'
][
'is_crowd'
]
is_crowd
=
np
.
concatenate
((
is_crowd1
,
is_crowd2
),
axis
=
0
)
sample
[
'image'
]
=
im
sample
[
'gt_bbox'
]
=
gt_bbox
sample
[
'gt_score'
]
=
gt_score
sample
[
'gt_class'
]
=
gt_class
sample
[
'is_crowd'
]
=
is_crowd
sample
[
'h'
]
=
im
.
shape
[
0
]
sample
[
'w'
]
=
im
.
shape
[
1
]
sample
.
pop
(
'mixup'
)
return
sample
@
register_op
class
CutmixImage
(
BaseOperator
):
def
__init__
(
self
,
alpha
=
1.5
,
beta
=
1.5
):
"""
CutMix: Regularization Strategy to Train Strong Classifiers with Localizable Features, see https://https://arxiv.org/abs/1905.04899
Cutmix image and gt_bbbox/gt_score
Args:
alpha (float): alpha parameter of beta distribute
beta (float): beta parameter of beta distribute
"""
super
(
CutmixImage
,
self
).
__init__
()
self
.
alpha
=
alpha
self
.
beta
=
beta
if
self
.
alpha
<=
0.0
:
raise
ValueError
(
"alpha shold be positive in {}"
.
format
(
self
))
if
self
.
beta
<=
0.0
:
raise
ValueError
(
"beta shold be positive in {}"
.
format
(
self
))
def
_rand_bbox
(
self
,
img1
,
img2
,
factor
):
""" _rand_bbox """
h
=
max
(
img1
.
shape
[
0
],
img2
.
shape
[
0
])
w
=
max
(
img1
.
shape
[
1
],
img2
.
shape
[
1
])
cut_rat
=
np
.
sqrt
(
1.
-
factor
)
cut_w
=
np
.
int
(
w
*
cut_rat
)
cut_h
=
np
.
int
(
h
*
cut_rat
)
# uniform
cx
=
np
.
random
.
randint
(
w
)
cy
=
np
.
random
.
randint
(
h
)
bbx1
=
np
.
clip
(
cx
-
cut_w
//
2
,
0
,
w
)
bby1
=
np
.
clip
(
cy
-
cut_h
//
2
,
0
,
h
)
bbx2
=
np
.
clip
(
cx
+
cut_w
//
2
,
0
,
w
)
bby2
=
np
.
clip
(
cy
+
cut_h
//
2
,
0
,
h
)
img_1
=
np
.
zeros
((
h
,
w
,
img1
.
shape
[
2
]),
'float32'
)
img_1
[:
img1
.
shape
[
0
],
:
img1
.
shape
[
1
],
:]
=
\
img1
.
astype
(
'float32'
)
img_2
=
np
.
zeros
((
h
,
w
,
img2
.
shape
[
2
]),
'float32'
)
img_2
[:
img2
.
shape
[
0
],
:
img2
.
shape
[
1
],
:]
=
\
img2
.
astype
(
'float32'
)
img_1
[
bby1
:
bby2
,
bbx1
:
bbx2
,
:]
=
img2
[
bby1
:
bby2
,
bbx1
:
bbx2
,
:]
return
img_1
def
__call__
(
self
,
sample
,
context
=
None
):
if
'cutmix'
not
in
sample
:
return
sample
factor
=
np
.
random
.
beta
(
self
.
alpha
,
self
.
beta
)
factor
=
max
(
0.0
,
min
(
1.0
,
factor
))
if
factor
>=
1.0
:
sample
.
pop
(
'cutmix'
)
return
sample
if
factor
<=
0.0
:
return
sample
[
'cutmix'
]
img1
=
sample
[
'image'
]
img2
=
sample
[
'cutmix'
][
'image'
]
img
=
self
.
_rand_bbox
(
img1
,
img2
,
factor
)
gt_bbox1
=
sample
[
'gt_bbox'
]
gt_bbox2
=
sample
[
'cutmix'
][
'gt_bbox'
]
gt_bbox
=
np
.
concatenate
((
gt_bbox1
,
gt_bbox2
),
axis
=
0
)
gt_class1
=
sample
[
'gt_class'
]
gt_class2
=
sample
[
'cutmix'
][
'gt_class'
]
gt_class
=
np
.
concatenate
((
gt_class1
,
gt_class2
),
axis
=
0
)
gt_score1
=
sample
[
'gt_score'
]
gt_score2
=
sample
[
'cutmix'
][
'gt_score'
]
gt_score
=
np
.
concatenate
(
(
gt_score1
*
factor
,
gt_score2
*
(
1.
-
factor
)),
axis
=
0
)
sample
[
'image'
]
=
img
sample
[
'gt_bbox'
]
=
gt_bbox
sample
[
'gt_score'
]
=
gt_score
sample
[
'gt_class'
]
=
gt_class
sample
[
'h'
]
=
img
.
shape
[
0
]
sample
[
'w'
]
=
img
.
shape
[
1
]
sample
.
pop
(
'cutmix'
)
return
sample
@
register_op
class
RandomInterpImage
(
BaseOperator
):
def
__init__
(
self
,
target_size
=
0
,
max_size
=
0
):
"""
Random reisze image by multiply interpolate method.
Args:
target_size (int): the taregt size of image's short side
max_size (int): the max size of image
"""
super
(
RandomInterpImage
,
self
).
__init__
()
self
.
target_size
=
target_size
self
.
max_size
=
max_size
if
not
(
isinstance
(
self
.
target_size
,
int
)
and
isinstance
(
self
.
max_size
,
int
)):
raise
TypeError
(
'{}: input type is invalid.'
.
format
(
self
))
interps
=
[
cv2
.
INTER_NEAREST
,
cv2
.
INTER_LINEAR
,
cv2
.
INTER_AREA
,
cv2
.
INTER_CUBIC
,
cv2
.
INTER_LANCZOS4
,
]
self
.
resizers
=
[]
for
interp
in
interps
:
self
.
resizers
.
append
(
ResizeImage
(
target_size
,
max_size
,
interp
))
def
__call__
(
self
,
sample
,
context
=
None
):
"""Resise the image numpy by random resizer."""
resizer
=
random
.
choice
(
self
.
resizers
)
return
resizer
(
sample
,
context
)
@
register_op
class
Resize
(
BaseOperator
):
"""Resize image and bbox.
Args:
target_dim (int or list): target size, can be a single number or a list
(for random shape).
interp (int or str): interpolation method, can be an integer or
'random' (for randomized interpolation).
default to `cv2.INTER_LINEAR`.
"""
def
__init__
(
self
,
target_dim
=
[],
interp
=
cv2
.
INTER_LINEAR
):
super
(
Resize
,
self
).
__init__
()
self
.
target_dim
=
target_dim
self
.
interp
=
interp
# 'random' for yolov3
def
__call__
(
self
,
sample
,
context
=
None
):
w
=
sample
[
'w'
]
h
=
sample
[
'h'
]
interp
=
self
.
interp
if
interp
==
'random'
:
interp
=
np
.
random
.
choice
(
range
(
5
))
if
isinstance
(
self
.
target_dim
,
Sequence
):
dim
=
np
.
random
.
choice
(
self
.
target_dim
)
else
:
dim
=
self
.
target_dim
resize_w
=
resize_h
=
dim
scale_x
=
dim
/
w
scale_y
=
dim
/
h
if
'gt_bbox'
in
sample
and
len
(
sample
[
'gt_bbox'
])
>
0
:
scale_array
=
np
.
array
([
scale_x
,
scale_y
]
*
2
,
dtype
=
np
.
float32
)
sample
[
'gt_bbox'
]
=
np
.
clip
(
sample
[
'gt_bbox'
]
*
scale_array
,
0
,
dim
-
1
)
sample
[
'scale_factor'
]
=
[
scale_x
,
scale_y
]
*
2
sample
[
'h'
]
=
resize_h
sample
[
'w'
]
=
resize_w
sample
[
'image'
]
=
cv2
.
resize
(
sample
[
'image'
],
(
resize_w
,
resize_h
),
interpolation
=
interp
)
return
sample
@
register_op
class
ColorDistort
(
BaseOperator
):
"""Random color distortion.
Args:
hue (list): hue settings.
in [lower, upper, probability] format.
saturation (list): saturation settings.
in [lower, upper, probability] format.
contrast (list): contrast settings.
in [lower, upper, probability] format.
brightness (list): brightness settings.
in [lower, upper, probability] format.
random_apply (bool): whether to apply in random (yolo) or fixed (SSD)
order.
hsv_format (bool): whether to convert color from BGR to HSV
random_channel (bool): whether to swap channels randomly
"""
"""
def
__init__
(
self
,
def
__init__
(
self
,
...
@@ -1407,15 +344,15 @@ class ColorDistort(BaseOperator):
...
@@ -1407,15 +344,15 @@ class ColorDistort(BaseOperator):
contrast
=
[
0.5
,
1.5
,
0.5
],
contrast
=
[
0.5
,
1.5
,
0.5
],
brightness
=
[
0.5
,
1.5
,
0.5
],
brightness
=
[
0.5
,
1.5
,
0.5
],
random_apply
=
True
,
random_apply
=
True
,
hsv_format
=
False
,
count
=
4
,
random_channel
=
False
):
random_channel
=
False
):
super
(
Color
Distort
,
self
).
__init__
()
super
(
Random
Distort
,
self
).
__init__
()
self
.
hue
=
hue
self
.
hue
=
hue
self
.
saturation
=
saturation
self
.
saturation
=
saturation
self
.
contrast
=
contrast
self
.
contrast
=
contrast
self
.
brightness
=
brightness
self
.
brightness
=
brightness
self
.
random_apply
=
random_apply
self
.
random_apply
=
random_apply
self
.
hsv_format
=
hsv_forma
t
self
.
count
=
coun
t
self
.
random_channel
=
random_channel
self
.
random_channel
=
random_channel
def
apply_hue
(
self
,
img
):
def
apply_hue
(
self
,
img
):
...
@@ -1424,13 +361,7 @@ class ColorDistort(BaseOperator):
...
@@ -1424,13 +361,7 @@ class ColorDistort(BaseOperator):
return
img
return
img
img
=
img
.
astype
(
np
.
float32
)
img
=
img
.
astype
(
np
.
float32
)
if
self
.
hsv_format
:
# it works, but result differ from HSV version
img
[...,
0
]
+=
random
.
uniform
(
low
,
high
)
img
[...,
0
][
img
[...,
0
]
>
360
]
-=
360
img
[...,
0
][
img
[...,
0
]
<
0
]
+=
360
return
img
# XXX works, but result differ from HSV version
delta
=
np
.
random
.
uniform
(
low
,
high
)
delta
=
np
.
random
.
uniform
(
low
,
high
)
u
=
np
.
cos
(
delta
*
np
.
pi
)
u
=
np
.
cos
(
delta
*
np
.
pi
)
w
=
np
.
sin
(
delta
*
np
.
pi
)
w
=
np
.
sin
(
delta
*
np
.
pi
)
...
@@ -1449,9 +380,7 @@ class ColorDistort(BaseOperator):
...
@@ -1449,9 +380,7 @@ class ColorDistort(BaseOperator):
return
img
return
img
delta
=
np
.
random
.
uniform
(
low
,
high
)
delta
=
np
.
random
.
uniform
(
low
,
high
)
img
=
img
.
astype
(
np
.
float32
)
img
=
img
.
astype
(
np
.
float32
)
if
self
.
hsv_format
:
# it works, but result differ from HSV version
img
[...,
1
]
*=
delta
return
img
gray
=
img
*
np
.
array
([[[
0.299
,
0.587
,
0.114
]]],
dtype
=
np
.
float32
)
gray
=
img
*
np
.
array
([[[
0.299
,
0.587
,
0.114
]]],
dtype
=
np
.
float32
)
gray
=
gray
.
sum
(
axis
=
2
,
keepdims
=
True
)
gray
=
gray
.
sum
(
axis
=
2
,
keepdims
=
True
)
gray
*=
(
1.0
-
delta
)
gray
*=
(
1.0
-
delta
)
...
@@ -1464,7 +393,6 @@ class ColorDistort(BaseOperator):
...
@@ -1464,7 +393,6 @@ class ColorDistort(BaseOperator):
if
np
.
random
.
uniform
(
0.
,
1.
)
<
prob
:
if
np
.
random
.
uniform
(
0.
,
1.
)
<
prob
:
return
img
return
img
delta
=
np
.
random
.
uniform
(
low
,
high
)
delta
=
np
.
random
.
uniform
(
low
,
high
)
img
=
img
.
astype
(
np
.
float32
)
img
=
img
.
astype
(
np
.
float32
)
img
*=
delta
img
*=
delta
return
img
return
img
...
@@ -1474,139 +402,446 @@ class ColorDistort(BaseOperator):
...
@@ -1474,139 +402,446 @@ class ColorDistort(BaseOperator):
if
np
.
random
.
uniform
(
0.
,
1.
)
<
prob
:
if
np
.
random
.
uniform
(
0.
,
1.
)
<
prob
:
return
img
return
img
delta
=
np
.
random
.
uniform
(
low
,
high
)
delta
=
np
.
random
.
uniform
(
low
,
high
)
img
=
img
.
astype
(
np
.
float32
)
img
=
img
.
astype
(
np
.
float32
)
img
+=
delta
img
+=
delta
return
img
return
img
def
__call__
(
self
,
sample
,
context
=
None
):
def
apply
(
self
,
sample
,
context
=
None
):
img
=
sample
[
'image'
]
img
=
sample
[
'image'
]
if
self
.
random_apply
:
if
self
.
random_apply
:
functions
=
[
functions
=
[
self
.
apply_brightness
,
self
.
apply_brightness
,
self
.
apply_contrast
,
self
.
apply_contrast
,
self
.
apply_saturation
,
self
.
apply_hue
self
.
apply_saturation
,
self
.
apply_hue
,
]
]
distortions
=
np
.
random
.
permutation
(
functions
)
distortions
=
np
.
random
.
permutation
(
functions
)
[:
self
.
count
]
for
func
in
distortions
:
for
func
in
distortions
:
img
=
func
(
img
)
img
=
func
(
img
)
sample
[
'image'
]
=
img
sample
[
'image'
]
=
img
return
sample
return
sample
img
=
self
.
apply_brightness
(
img
)
img
=
self
.
apply_brightness
(
img
)
mode
=
np
.
random
.
randint
(
0
,
2
)
if
mode
:
img
=
self
.
apply_contrast
(
img
)
img
=
self
.
apply_saturation
(
img
)
img
=
self
.
apply_hue
(
img
)
if
not
mode
:
img
=
self
.
apply_contrast
(
img
)
if
self
.
random_channel
:
if
np
.
random
.
randint
(
0
,
2
):
img
=
img
[...,
np
.
random
.
permutation
(
3
)]
sample
[
'image'
]
=
img
return
sample
@
register_op
class
AutoAugment
(
BaseOperator
):
def
__init__
(
self
,
autoaug_type
=
"v1"
):
"""
Args:
autoaug_type (str): autoaug type, support v0, v1, v2, v3, test
"""
super
(
AutoAugment
,
self
).
__init__
()
self
.
autoaug_type
=
autoaug_type
def
apply
(
self
,
sample
,
context
=
None
):
"""
Learning Data Augmentation Strategies for Object Detection, see https://arxiv.org/abs/1906.11172
"""
im
=
sample
[
'image'
]
gt_bbox
=
sample
[
'gt_bbox'
]
if
not
isinstance
(
im
,
np
.
ndarray
):
raise
TypeError
(
"{}: image is not a numpy array."
.
format
(
self
))
if
len
(
im
.
shape
)
!=
3
:
raise
ImageError
(
"{}: image is not 3-dimensional."
.
format
(
self
))
if
len
(
gt_bbox
)
==
0
:
return
sample
height
,
width
,
_
=
im
.
shape
norm_gt_bbox
=
np
.
ones_like
(
gt_bbox
,
dtype
=
np
.
float32
)
norm_gt_bbox
[:,
0
]
=
gt_bbox
[:,
1
]
/
float
(
height
)
norm_gt_bbox
[:,
1
]
=
gt_bbox
[:,
0
]
/
float
(
width
)
norm_gt_bbox
[:,
2
]
=
gt_bbox
[:,
3
]
/
float
(
height
)
norm_gt_bbox
[:,
3
]
=
gt_bbox
[:,
2
]
/
float
(
width
)
from
.autoaugment_utils
import
distort_image_with_autoaugment
im
,
norm_gt_bbox
=
distort_image_with_autoaugment
(
im
,
norm_gt_bbox
,
self
.
autoaug_type
)
gt_bbox
[:,
0
]
=
norm_gt_bbox
[:,
1
]
*
float
(
width
)
gt_bbox
[:,
1
]
=
norm_gt_bbox
[:,
0
]
*
float
(
height
)
gt_bbox
[:,
2
]
=
norm_gt_bbox
[:,
3
]
*
float
(
width
)
gt_bbox
[:,
3
]
=
norm_gt_bbox
[:,
2
]
*
float
(
height
)
sample
[
'image'
]
=
im
sample
[
'gt_bbox'
]
=
gt_bbox
return
sample
@
register_op
class
RandomFlip
(
BaseOperator
):
def
__init__
(
self
,
prob
=
0.5
):
"""
Args:
prob (float): the probability of flipping image
"""
super
(
RandomFlip
,
self
).
__init__
()
self
.
prob
=
prob
if
not
(
isinstance
(
self
.
prob
,
float
)):
raise
TypeError
(
"{}: input type is invalid."
.
format
(
self
))
def
apply_segm
(
self
,
segms
,
height
,
width
):
def
_flip_poly
(
poly
,
width
):
flipped_poly
=
np
.
array
(
poly
)
flipped_poly
[
0
::
2
]
=
width
-
np
.
array
(
poly
[
0
::
2
])
return
flipped_poly
.
tolist
()
def
_flip_rle
(
rle
,
height
,
width
):
if
'counts'
in
rle
and
type
(
rle
[
'counts'
])
==
list
:
rle
=
mask_util
.
frPyObjects
(
rle
,
height
,
width
)
mask
=
mask_util
.
decode
(
rle
)
mask
=
mask
[:,
::
-
1
]
rle
=
mask_util
.
encode
(
np
.
array
(
mask
,
order
=
'F'
,
dtype
=
np
.
uint8
))
return
rle
flipped_segms
=
[]
for
segm
in
segms
:
if
is_poly
(
segm
):
# Polygon format
flipped_segms
.
append
([
_flip_poly
(
poly
,
width
)
for
poly
in
segm
])
else
:
# RLE format
import
pycocotools.mask
as
mask_util
flipped_segms
.
append
(
_flip_rle
(
segm
,
height
,
width
))
return
flipped_segms
def
apply_keypoint
(
self
,
gt_keypoint
,
width
):
for
i
in
range
(
gt_keypoint
.
shape
[
1
]):
if
i
%
2
==
0
:
old_x
=
gt_keypoint
[:,
i
].
copy
()
gt_keypoint
[:,
i
]
=
width
-
old_x
return
gt_keypoint
def
apply_image
(
self
,
image
):
return
image
[:,
::
-
1
,
:]
def
apply_bbox
(
self
,
bbox
,
width
):
oldx1
=
bbox
[:,
0
].
copy
()
oldx2
=
bbox
[:,
2
].
copy
()
bbox
[:,
0
]
=
width
-
oldx2
bbox
[:,
2
]
=
width
-
oldx1
return
bbox
def
apply
(
self
,
sample
,
context
=
None
):
"""Filp the image and bounding box.
Operators:
1. Flip the image numpy.
2. Transform the bboxes' x coordinates.
(Must judge whether the coordinates are normalized!)
3. Transform the segmentations' x coordinates.
(Must judge whether the coordinates are normalized!)
Output:
sample: the image, bounding box and segmentation part
in sample are flipped.
"""
if
np
.
random
.
uniform
(
0
,
1
)
<
self
.
prob
:
im
=
sample
[
'image'
]
height
,
width
=
im
.
shape
[:
2
]
im
=
self
.
apply_image
(
im
)
if
'gt_bbox'
in
sample
and
len
(
sample
[
'gt_bbox'
])
>
0
:
sample
[
'gt_bbox'
]
=
self
.
apply_bbox
(
sample
[
'gt_bbox'
],
width
)
if
'gt_poly'
in
sample
and
len
(
sample
[
'gt_poly'
])
>
0
:
sample
[
'gt_poly'
]
=
self
.
apply_segm
(
sample
[
'gt_poly'
],
height
,
width
)
if
'gt_keypoint'
in
sample
and
len
(
sample
[
'gt_keypoint'
])
>
0
:
sample
[
'gt_keypoint'
]
=
self
.
apply_keypoint
(
sample
[
'gt_keypoint'
],
width
)
if
'semantic'
in
sample
and
sample
[
'semantic'
]:
sample
[
'semantic'
]
=
sample
[
'semantic'
][:,
::
-
1
]
if
'gt_segm'
in
sample
and
sample
[
'gt_segm'
].
any
():
sample
[
'gt_segm'
]
=
sample
[
'gt_segm'
][:,
:,
::
-
1
]
sample
[
'flipped'
]
=
True
sample
[
'image'
]
=
im
return
sample
@
register_op
class
Resize
(
BaseOperator
):
def
__init__
(
self
,
target_size
,
keep_ratio
,
interp
=
cv2
.
INTER_LINEAR
):
"""
Resize image to target size. if keep_ratio is True,
resize the image's long side to the maximum of target_size
if keep_ratio is False, resize the image to target size(h, w)
Args:
target_size (int|list): image target size
keep_ratio (bool): whether keep_ratio or not, default true
interp (int): the interpolation method
"""
super
(
Resize
,
self
).
__init__
()
self
.
keep_ratio
=
keep_ratio
self
.
interp
=
interp
if
not
isinstance
(
target_size
,
(
Integral
,
Sequence
)):
raise
TypeError
(
"Type of target_size is invalid. Must be Integer or List or Tuple, now is {}"
.
format
(
type
(
target_size
)))
if
isinstance
(
target_size
,
Integral
):
target_size
=
[
target_size
,
target_size
]
self
.
target_size
=
target_size
def
apply_image
(
self
,
image
,
scale
):
im_scale_x
,
im_scale_y
=
scale
return
cv2
.
resize
(
image
,
None
,
None
,
fx
=
im_scale_x
,
fy
=
im_scale_y
,
interpolation
=
self
.
interp
)
def
apply_bbox
(
self
,
bbox
,
scale
,
size
):
im_scale_x
,
im_scale_y
=
scale
resize_w
,
resize_h
=
size
bbox
[:,
0
::
2
]
*=
im_scale_x
bbox
[:,
1
::
2
]
*=
im_scale_y
bbox
[:,
0
::
2
]
=
np
.
clip
(
bbox
[:,
0
::
2
],
0
,
resize_w
)
bbox
[:,
1
::
2
]
=
np
.
clip
(
bbox
[:,
1
::
2
],
0
,
resize_h
)
return
bbox
def
apply_segm
(
self
,
segms
,
im_size
,
scale
):
def
_resize_poly
(
poly
,
im_scale_x
,
im_scale_y
):
resized_poly
=
np
.
array
(
poly
)
resized_poly
[
0
::
2
]
*=
im_scale_x
resized_poly
[
1
::
2
]
*=
im_scale_y
return
resized_poly
.
tolist
()
def
_resize_rle
(
rle
,
im_h
,
im_w
,
im_scale_x
,
im_scale_y
):
if
'counts'
in
rle
and
type
(
rle
[
'counts'
])
==
list
:
rle
=
mask_util
.
frPyObjects
(
rle
,
im_h
,
im_w
)
mask
=
mask_util
.
decode
(
rle
)
mask
=
cv2
.
resize
(
image
,
None
,
None
,
fx
=
im_scale_x
,
fy
=
im_scale_y
,
interpolation
=
self
.
interp
)
rle
=
mask_util
.
encode
(
np
.
array
(
mask
,
order
=
'F'
,
dtype
=
np
.
uint8
))
return
rle
im_h
,
im_w
=
im_size
im_scale_x
,
im_scale_y
=
scale
resized_segms
=
[]
for
segm
in
segms
:
if
is_poly
(
segm
):
# Polygon format
resized_segms
.
append
([
_resize_poly
(
poly
,
im_scale_x
,
im_scale_y
)
for
poly
in
segm
])
else
:
# RLE format
import
pycocotools.mask
as
mask_util
resized_segms
.
append
(
_resize_rle
(
segm
,
im_h
,
im_w
,
im_scale_x
,
im_scale_y
))
return
resized_segms
def
apply
(
self
,
sample
,
context
=
None
):
""" Resize the image numpy.
"""
im
=
sample
[
'image'
]
if
not
isinstance
(
im
,
np
.
ndarray
):
raise
TypeError
(
"{}: image type is not numpy."
.
format
(
self
))
if
len
(
im
.
shape
)
!=
3
:
raise
ImageError
(
'{}: image is not 3-dimensional.'
.
format
(
self
))
# apply image
im_shape
=
im
.
shape
if
self
.
keep_ratio
:
if
np
.
random
.
randint
(
0
,
2
):
im_size_min
=
np
.
min
(
im_shape
[
0
:
2
])
img
=
self
.
apply_contrast
(
img
)
im_size_max
=
np
.
max
(
im_shape
[
0
:
2
])
if
self
.
hsv_format
:
img
=
cv2
.
cvtColor
(
img
,
cv2
.
COLOR_RGB2HSV
)
target_size_min
=
np
.
min
(
self
.
target_size
)
img
=
self
.
apply_saturation
(
img
)
target_size_max
=
np
.
max
(
self
.
target_size
)
img
=
self
.
apply_hue
(
img
)
if
self
.
hsv_format
:
im_scale
=
min
(
target_size_min
/
im_size_min
,
img
=
cv2
.
cvtColor
(
img
,
cv2
.
COLOR_HSV2RGB
)
target_size_max
/
im_size_max
)
resize_h
=
im_scale
*
float
(
im_shape
[
0
])
resize_w
=
im_scale
*
float
(
im_shape
[
1
])
im_scale_x
=
im_scale
im_scale_y
=
im_scale
else
:
else
:
if
self
.
hsv_format
:
resize_h
,
resize_w
=
self
.
target_size
img
=
cv2
.
cvtColor
(
img
,
cv2
.
COLOR_RGB2HSV
)
im_scale_y
=
resize_h
/
im_shape
[
0
]
img
=
self
.
apply_saturation
(
img
)
im_scale_x
=
resize_w
/
im_shape
[
1
]
img
=
self
.
apply_hue
(
img
)
if
self
.
hsv_format
:
im
=
self
.
apply_image
(
sample
[
'image'
],
[
im_scale_x
,
im_scale_y
])
img
=
cv2
.
cvtColor
(
img
,
cv2
.
COLOR_HSV2RGB
)
sample
[
'image'
]
=
im
img
=
self
.
apply_contrast
(
img
)
sample
[
'im_shape'
]
=
np
.
asarray
([
resize_h
,
resize_w
],
dtype
=
np
.
float32
)
if
'scale_factor'
in
sample
:
scale_factor
=
sample
[
'scale_factor'
]
sample
[
'scale_factor'
]
=
np
.
asarray
(
[
scale_factor
[
0
]
*
im_scale_y
,
scale_factor
[
1
]
*
im_scale_x
],
dtype
=
np
.
float32
)
else
:
sample
[
'scale_factor'
]
=
np
.
asarray
(
[
im_scale_y
,
im_scale_x
],
dtype
=
np
.
float32
)
# apply bbox
if
'gt_bbox'
in
sample
and
len
(
sample
[
'gt_bbox'
])
>
0
:
sample
[
'gt_bbox'
]
=
self
.
apply_bbox
(
sample
[
'gt_bbox'
],
[
im_scale_x
,
im_scale_y
],
[
resize_w
,
resize_h
])
# apply polygon
if
'gt_poly'
in
sample
and
len
(
sample
[
'gt_poly'
])
>
0
:
sample
[
'gt_poly'
]
=
self
.
apply_segm
(
sample
[
'gt_poly'
],
im_shape
[:
2
],
[
im_scale_x
,
im_scale_y
])
# apply semantic
if
'semantic'
in
sample
and
sample
[
'semantic'
]:
semantic
=
sample
[
'semantic'
]
semantic
=
cv2
.
resize
(
semantic
.
astype
(
'float32'
),
None
,
None
,
fx
=
im_scale_x
,
fy
=
im_scale_y
,
interpolation
=
self
.
interp
)
semantic
=
np
.
asarray
(
semantic
).
astype
(
'int32'
)
semantic
=
np
.
expand_dims
(
semantic
,
0
)
sample
[
'semantic'
]
=
semantic
# apply gt_segm
if
'gt_segm'
in
sample
and
len
(
sample
[
'gt_segm'
])
>
0
:
masks
=
[
cv2
.
resize
(
gt_segm
,
None
,
None
,
fx
=
im_scale_x
,
fy
=
im_scale_y
,
interpolation
=
cv2
.
INTER_NEAREST
)
for
gt_segm
in
sample
[
'gt_segm'
]
]
sample
[
'gt_segm'
]
=
np
.
asarray
(
masks
).
astype
(
np
.
uint8
)
if
self
.
random_channel
:
if
np
.
random
.
randint
(
0
,
2
):
img
=
img
[...,
np
.
random
.
permutation
(
3
)]
sample
[
'image'
]
=
img
return
sample
return
sample
@
register_op
@
register_op
class
CornerRandColor
(
ColorDistort
):
class
MultiscaleTestResize
(
BaseOperator
):
"""Random color for CornerNet series models.
def
__init__
(
self
,
origin_target_size
=
[
800
,
1333
],
target_size
=
[],
interp
=
cv2
.
INTER_LINEAR
,
use_flip
=
True
):
"""
Rescale image to the each size in target size, and capped at max_size.
Args:
Args:
saturation (float): saturation settings.
origin_target_size (list): origin target size of image
contrast (float): contrast settings
.
target_size (list): A list of target sizes of image
.
brightness (float): brightness settings
.
interp (int): the interpolation method
.
is_scale (bool): whether to scale the input image
.
use_flip (bool): whether use flip augmentation
.
"""
"""
super
(
MultiscaleTestResize
,
self
).
__init__
()
self
.
interp
=
interp
self
.
use_flip
=
use_flip
def
__init__
(
self
,
if
not
isinstance
(
target_size
,
Sequence
):
saturation
=
0.4
,
raise
TypeError
(
contrast
=
0.4
,
"Type of target_size is invalid. Must be List or Tuple, now is {}"
.
brightness
=
0.4
,
format
(
type
(
target_size
)))
is_scale
=
True
):
self
.
target_size
=
target_size
super
(
CornerRandColor
,
self
).
__init__
(
saturation
=
saturation
,
contrast
=
contrast
,
brightness
=
brightness
)
self
.
is_scale
=
is_scale
def
apply_saturation
(
self
,
img
,
img_gray
):
if
not
isinstance
(
origin_target_size
,
Sequence
):
alpha
=
1.
+
np
.
random
.
uniform
(
raise
TypeError
(
low
=-
self
.
saturation
,
high
=
self
.
saturation
)
"Type of origin_target_size is invalid. Must be List or Tuple, now is {}"
.
self
.
_blend
(
alpha
,
img
,
img_gray
[:,
:,
None
])
format
(
type
(
origin_target_size
)))
return
img
def
apply_contrast
(
self
,
img
,
img_gray
):
self
.
origin_target_size
=
origin_target_size
alpha
=
1.
+
np
.
random
.
uniform
(
low
=-
self
.
contrast
,
high
=
self
.
contrast
)
img_mean
=
img_gray
.
mean
()
self
.
_blend
(
alpha
,
img
,
img_mean
)
return
img
def
apply_brightness
(
self
,
img
,
img_gray
):
def
apply
(
self
,
sample
,
context
=
None
):
alpha
=
1
+
np
.
random
.
uniform
(
""" Resize the image numpy for multi-scale test.
low
=-
self
.
brightness
,
high
=
self
.
brightness
)
"""
img
*=
alpha
samples
=
[]
return
img
resizer
=
Resize
(
self
.
origin_target_size
,
keep_ratio
=
True
,
interp
=
self
.
interp
)
samples
.
append
(
resizer
(
sample
.
copy
(),
context
))
if
self
.
use_flip
:
flipper
=
RandomFlip
(
1.1
)
samples
.
append
(
flipper
(
sample
.
copy
(),
context
=
context
))
def
_blend
(
self
,
alpha
,
img
,
img_mean
):
for
size
in
self
.
target_size
:
img
*=
alpha
resizer
=
Resize
(
size
,
keep_ratio
=
True
,
interp
=
self
.
interp
)
img_mean
*=
(
1
-
alpha
)
samples
.
append
(
resizer
(
sample
.
copy
(),
context
))
img
+=
img_mean
def
__call__
(
self
,
sample
,
context
=
None
):
return
samples
img
=
sample
[
'image'
]
if
self
.
is_scale
:
img
=
img
.
astype
(
np
.
float32
,
copy
=
False
)
img
/=
255.
img_gray
=
cv2
.
cvtColor
(
img
,
cv2
.
COLOR_BGR2GRAY
)
functions
=
[
self
.
apply_brightness
,
self
.
apply_contrast
,
self
.
apply_saturation
,
]
distortions
=
np
.
random
.
permutation
(
functions
)
for
func
in
distortions
:
img
=
func
(
img
,
img_gray
)
sample
[
'image'
]
=
img
return
sample
@
register_op
@
register_op
class
NormalizePermute
(
BaseOperator
):
class
RandomResize
(
BaseOperator
):
"""Normalize and permute channel order.
def
__init__
(
self
,
target_size
,
keep_ratio
=
True
,
interp
=
cv2
.
INTER_LINEAR
,
random_size
=
True
,
random_interp
=
False
):
"""
Resize image to target size randomly. random target_size and interpolation method
Args:
Args:
mean (list): mean values in RGB order.
target_size (int, list, tuple): image target size, if random size is True, must be list or tuple
std (list): std values in RGB order.
keep_ratio (bool): whether keep_raio or not, default true
interp (int): the interpolation method
random_size (bool): whether random select target size of image
random_interp (bool): whether random select interpolation method
"""
"""
super
(
RandomResize
,
self
).
__init__
()
self
.
keep_ratio
=
keep_ratio
self
.
interp
=
interp
self
.
interps
=
[
cv2
.
INTER_NEAREST
,
cv2
.
INTER_LINEAR
,
cv2
.
INTER_AREA
,
cv2
.
INTER_CUBIC
,
cv2
.
INTER_LANCZOS4
,
]
assert
isinstance
(
target_size
,
(
Integral
,
Sequence
)),
"target_size must be Integer, List or Tuple"
if
random_size
and
not
isinstance
(
target_size
,
Sequence
):
raise
TypeError
(
"Type of target_size is invalid when random_size is True. Must be List or Tuple, now is {}"
.
format
(
type
(
target_size
)))
self
.
target_size
=
target_size
self
.
random_size
=
random_size
self
.
random_interp
=
random_interp
def
__init__
(
self
,
def
apply
(
self
,
sample
,
context
=
None
):
mean
=
[
123.675
,
116.28
,
103.53
],
""" Resize the image numpy.
std
=
[
58.395
,
57.120
,
57.375
]):
"""
super
(
NormalizePermute
,
self
).
__init__
()
if
self
.
random_size
:
self
.
mean
=
mean
target_size
=
random
.
choice
(
self
.
target_size
)
self
.
std
=
std
else
:
target_size
=
self
.
target_size
def
__call__
(
self
,
sample
,
context
=
None
):
if
self
.
random_interp
:
img
=
sample
[
'image'
]
interp
=
random
.
choice
(
self
.
interps
)
img
=
img
.
astype
(
np
.
float32
)
else
:
interp
=
self
.
interp
img
=
img
.
transpose
((
2
,
0
,
1
))
resizer
=
Resize
(
target_size
,
self
.
keep_ratio
,
interp
)
mean
=
np
.
array
(
self
.
mean
,
dtype
=
np
.
float32
)
return
resizer
(
sample
,
context
=
context
)
std
=
np
.
array
(
self
.
std
,
dtype
=
np
.
float32
)
invstd
=
1.
/
std
for
v
,
m
,
s
in
zip
(
img
,
mean
,
invstd
):
v
.
__isub__
(
m
).
__imul__
(
s
)
sample
[
'image'
]
=
img
return
sample
@
register_op
@
register_op
...
@@ -1616,14 +851,9 @@ class RandomExpand(BaseOperator):
...
@@ -1616,14 +851,9 @@ class RandomExpand(BaseOperator):
ratio (float): maximum expansion ratio.
ratio (float): maximum expansion ratio.
prob (float): probability to expand.
prob (float): probability to expand.
fill_value (list): color value used to fill the canvas. in RGB order.
fill_value (list): color value used to fill the canvas. in RGB order.
is_mask_expand(bool): whether expand the segmentation.
"""
"""
def
__init__
(
self
,
def
__init__
(
self
,
ratio
=
4.
,
prob
=
0.5
,
fill_value
=
(
127.5
,
127.5
,
127.5
)):
ratio
=
4.
,
prob
=
0.5
,
fill_value
=
(
127.5
,
)
*
3
,
is_mask_expand
=
False
):
super
(
RandomExpand
,
self
).
__init__
()
super
(
RandomExpand
,
self
).
__init__
()
assert
ratio
>
1.01
,
"expand ratio must be larger than 1.01"
assert
ratio
>
1.01
,
"expand ratio must be larger than 1.01"
self
.
ratio
=
ratio
self
.
ratio
=
ratio
...
@@ -1635,68 +865,273 @@ class RandomExpand(BaseOperator):
...
@@ -1635,68 +865,273 @@ class RandomExpand(BaseOperator):
if
not
isinstance
(
fill_value
,
tuple
):
if
not
isinstance
(
fill_value
,
tuple
):
fill_value
=
tuple
(
fill_value
)
fill_value
=
tuple
(
fill_value
)
self
.
fill_value
=
fill_value
self
.
fill_value
=
fill_value
self
.
is_mask_expand
=
is_mask_expand
def
expand_segms
(
self
,
segms
,
x
,
y
,
height
,
width
,
ratio
):
def
apply
(
self
,
sample
,
context
=
None
):
def
_expand_poly
(
poly
,
x
,
y
):
if
np
.
random
.
uniform
(
0.
,
1.
)
<
self
.
prob
:
expanded_poly
=
np
.
array
(
poly
)
return
sample
expanded_poly
[
0
::
2
]
+=
x
expanded_poly
[
1
::
2
]
+=
y
return
expanded_poly
.
tolist
()
def
_expand_rle
(
rle
,
x
,
y
,
height
,
width
,
ratio
):
im
=
sample
[
'image'
]
if
'counts'
in
rle
and
type
(
rle
[
'counts'
])
==
list
:
height
,
width
=
im
.
shape
[:
2
]
rle
=
mask_util
.
frPyObjects
(
rle
,
height
,
width
)
ratio
=
np
.
random
.
uniform
(
1.
,
self
.
ratio
)
mask
=
mask_util
.
decode
(
rle
)
h
=
int
(
height
*
ratio
)
expanded_mask
=
np
.
full
((
int
(
height
*
ratio
),
int
(
width
*
ratio
)),
w
=
int
(
width
*
ratio
)
0
).
astype
(
mask
.
dtype
)
if
not
h
>
height
or
not
w
>
width
:
expanded_mask
[
y
:
y
+
height
,
x
:
x
+
width
]
=
mask
return
sample
rle
=
mask_util
.
encode
(
y
=
np
.
random
.
randint
(
0
,
h
-
height
)
np
.
array
(
x
=
np
.
random
.
randint
(
0
,
w
-
width
)
expanded_mask
,
order
=
'F'
,
dtype
=
np
.
uint8
))
offsets
,
size
=
[
x
,
y
],
[
h
,
w
]
return
rle
pad
=
Pad
(
size
,
pad_mode
=-
1
,
offsets
=
offsets
,
fill_value
=
self
.
fill_value
)
return
pad
(
sample
,
context
=
context
)
@
register_op
class
CropWithSampling
(
BaseOperator
):
def
__init__
(
self
,
batch_sampler
,
satisfy_all
=
False
,
avoid_no_bbox
=
True
):
"""
Args:
batch_sampler (list): Multiple sets of different
parameters for cropping.
satisfy_all (bool): whether all boxes must satisfy.
e.g.[[1, 1, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0],
[1, 50, 0.3, 1.0, 0.5, 2.0, 0.1, 1.0],
[1, 50, 0.3, 1.0, 0.5, 2.0, 0.3, 1.0],
[1, 50, 0.3, 1.0, 0.5, 2.0, 0.5, 1.0],
[1, 50, 0.3, 1.0, 0.5, 2.0, 0.7, 1.0],
[1, 50, 0.3, 1.0, 0.5, 2.0, 0.9, 1.0],
[1, 50, 0.3, 1.0, 0.5, 2.0, 0.0, 1.0]]
[max sample, max trial, min scale, max scale,
min aspect ratio, max aspect ratio,
min overlap, max overlap]
avoid_no_bbox (bool): whether to to avoid the
situation where the box does not appear.
"""
super
(
CropWithSampling
,
self
).
__init__
()
self
.
batch_sampler
=
batch_sampler
self
.
satisfy_all
=
satisfy_all
self
.
avoid_no_bbox
=
avoid_no_bbox
def
apply
(
self
,
sample
,
context
):
"""
Crop the image and modify bounding box.
Operators:
1. Scale the image width and height.
2. Crop the image according to a radom sample.
3. Rescale the bounding box.
4. Determine if the new bbox is satisfied in the new image.
Returns:
sample: the image, bounding box are replaced.
"""
assert
'image'
in
sample
,
"image data not found"
im
=
sample
[
'image'
]
gt_bbox
=
sample
[
'gt_bbox'
]
gt_class
=
sample
[
'gt_class'
]
im_height
,
im_width
=
im
.
shape
[:
2
]
gt_score
=
None
if
'gt_score'
in
sample
:
gt_score
=
sample
[
'gt_score'
]
sampled_bbox
=
[]
gt_bbox
=
gt_bbox
.
tolist
()
for
sampler
in
self
.
batch_sampler
:
found
=
0
for
i
in
range
(
sampler
[
1
]):
if
found
>=
sampler
[
0
]:
break
sample_bbox
=
generate_sample_bbox
(
sampler
)
if
satisfy_sample_constraint
(
sampler
,
sample_bbox
,
gt_bbox
,
self
.
satisfy_all
):
sampled_bbox
.
append
(
sample_bbox
)
found
=
found
+
1
im
=
np
.
array
(
im
)
while
sampled_bbox
:
idx
=
int
(
np
.
random
.
uniform
(
0
,
len
(
sampled_bbox
)))
sample_bbox
=
sampled_bbox
.
pop
(
idx
)
sample_bbox
=
clip_bbox
(
sample_bbox
)
crop_bbox
,
crop_class
,
crop_score
=
\
filter_and_process
(
sample_bbox
,
gt_bbox
,
gt_class
,
scores
=
gt_score
)
if
self
.
avoid_no_bbox
:
if
len
(
crop_bbox
)
<
1
:
continue
xmin
=
int
(
sample_bbox
[
0
]
*
im_width
)
xmax
=
int
(
sample_bbox
[
2
]
*
im_width
)
ymin
=
int
(
sample_bbox
[
1
]
*
im_height
)
ymax
=
int
(
sample_bbox
[
3
]
*
im_height
)
im
=
im
[
ymin
:
ymax
,
xmin
:
xmax
]
sample
[
'image'
]
=
im
sample
[
'gt_bbox'
]
=
crop_bbox
sample
[
'gt_class'
]
=
crop_class
sample
[
'gt_score'
]
=
crop_score
return
sample
return
sample
@
register_op
class
CropWithDataAchorSampling
(
BaseOperator
):
def
__init__
(
self
,
batch_sampler
,
anchor_sampler
=
None
,
target_size
=
None
,
das_anchor_scales
=
[
16
,
32
,
64
,
128
],
sampling_prob
=
0.5
,
min_size
=
8.
,
avoid_no_bbox
=
True
):
"""
Args:
anchor_sampler (list): anchor_sampling sets of different
parameters for cropping.
batch_sampler (list): Multiple sets of different
parameters for cropping.
e.g.[[1, 10, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.2, 0.0]]
[[1, 50, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0],
[1, 50, 0.3, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0],
[1, 50, 0.3, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0],
[1, 50, 0.3, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0],
[1, 50, 0.3, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0]]
[max sample, max trial, min scale, max scale,
min aspect ratio, max aspect ratio,
min overlap, max overlap, min coverage, max coverage]
target_size (bool): target image size.
das_anchor_scales (list[float]): a list of anchor scales in data
anchor smapling.
min_size (float): minimum size of sampled bbox.
avoid_no_bbox (bool): whether to to avoid the
situation where the box does not appear.
"""
super
(
CropWithDataAchorSampling
,
self
).
__init__
()
self
.
anchor_sampler
=
anchor_sampler
self
.
batch_sampler
=
batch_sampler
self
.
target_size
=
target_size
self
.
sampling_prob
=
sampling_prob
self
.
min_size
=
min_size
self
.
avoid_no_bbox
=
avoid_no_bbox
self
.
das_anchor_scales
=
np
.
array
(
das_anchor_scales
)
def
apply
(
self
,
sample
,
context
):
"""
Crop the image and modify bounding box.
Operators:
1. Scale the image width and height.
2. Crop the image according to a radom sample.
3. Rescale the bounding box.
4. Determine if the new bbox is satisfied in the new image.
Returns:
sample: the image, bounding box are replaced.
"""
assert
'image'
in
sample
,
"image data not found"
im
=
sample
[
'image'
]
gt_bbox
=
sample
[
'gt_bbox'
]
gt_class
=
sample
[
'gt_class'
]
image_height
,
image_width
=
im
.
shape
[:
2
]
gt_score
=
None
if
'gt_score'
in
sample
:
gt_score
=
sample
[
'gt_score'
]
sampled_bbox
=
[]
gt_bbox
=
gt_bbox
.
tolist
()
expanded_segms
=
[]
prob
=
np
.
random
.
uniform
(
0.
,
1.
)
for
segm
in
segms
:
if
prob
>
self
.
sampling_prob
:
# anchor sampling
if
is_poly
(
segm
):
assert
self
.
anchor_sampler
# Polygon format
for
sampler
in
self
.
anchor_sampler
:
expanded_segms
.
append
(
found
=
0
[
_expand_poly
(
poly
,
x
,
y
)
for
poly
in
segm
])
for
i
in
range
(
sampler
[
1
]):
if
found
>=
sampler
[
0
]:
break
sample_bbox
=
data_anchor_sampling
(
gt_bbox
,
image_width
,
image_height
,
self
.
das_anchor_scales
,
self
.
target_size
)
if
sample_bbox
==
0
:
break
if
satisfy_sample_constraint_coverage
(
sampler
,
sample_bbox
,
gt_bbox
):
sampled_bbox
.
append
(
sample_bbox
)
found
=
found
+
1
im
=
np
.
array
(
im
)
while
sampled_bbox
:
idx
=
int
(
np
.
random
.
uniform
(
0
,
len
(
sampled_bbox
)))
sample_bbox
=
sampled_bbox
.
pop
(
idx
)
if
'gt_keypoint'
in
sample
.
keys
():
keypoints
=
(
sample
[
'gt_keypoint'
],
sample
[
'keypoint_ignore'
])
crop_bbox
,
crop_class
,
crop_score
,
gt_keypoints
=
\
filter_and_process
(
sample_bbox
,
gt_bbox
,
gt_class
,
scores
=
gt_score
,
keypoints
=
keypoints
)
else
:
else
:
# RLE format
crop_bbox
,
crop_class
,
crop_score
=
filter_and_process
(
import
pycocotools.mask
as
mask_util
sample_bbox
,
gt_bbox
,
gt_class
,
scores
=
gt_score
)
expanded_segms
.
append
(
crop_bbox
,
crop_class
,
crop_score
=
bbox_area_sampling
(
_expand_rle
(
segm
,
x
,
y
,
height
,
width
,
ratio
))
crop_bbox
,
crop_class
,
crop_score
,
self
.
target_size
,
return
expanded_segms
self
.
min_size
)
def
__call__
(
self
,
sample
,
context
=
None
):
if
self
.
avoid_no_bbox
:
if
np
.
random
.
uniform
(
0.
,
1.
)
<
self
.
prob
:
if
len
(
crop_bbox
)
<
1
:
continue
im
=
crop_image_sampling
(
im
,
sample_bbox
,
image_width
,
image_height
,
self
.
target_size
)
sample
[
'image'
]
=
im
sample
[
'gt_bbox'
]
=
crop_bbox
sample
[
'gt_class'
]
=
crop_class
sample
[
'gt_score'
]
=
crop_score
if
'gt_keypoint'
in
sample
.
keys
():
sample
[
'gt_keypoint'
]
=
gt_keypoints
[
0
]
sample
[
'keypoint_ignore'
]
=
gt_keypoints
[
1
]
return
sample
return
sample
return
sample
img
=
sample
[
'image'
]
else
:
height
=
int
(
sample
[
'h'
])
for
sampler
in
self
.
batch_sampler
:
width
=
int
(
sample
[
'w'
])
found
=
0
for
i
in
range
(
sampler
[
1
]):
if
found
>=
sampler
[
0
]:
break
sample_bbox
=
generate_sample_bbox_square
(
sampler
,
image_width
,
image_height
)
if
satisfy_sample_constraint_coverage
(
sampler
,
sample_bbox
,
gt_bbox
):
sampled_bbox
.
append
(
sample_bbox
)
found
=
found
+
1
im
=
np
.
array
(
im
)
while
sampled_bbox
:
idx
=
int
(
np
.
random
.
uniform
(
0
,
len
(
sampled_bbox
)))
sample_bbox
=
sampled_bbox
.
pop
(
idx
)
sample_bbox
=
clip_bbox
(
sample_bbox
)
expand_ratio
=
np
.
random
.
uniform
(
1.
,
self
.
ratio
)
if
'gt_keypoint'
in
sample
.
keys
():
h
=
int
(
height
*
expand_ratio
)
keypoints
=
(
sample
[
'gt_keypoint'
],
w
=
int
(
width
*
expand_ratio
)
sample
[
'keypoint_ignore'
])
if
not
h
>
height
or
not
w
>
width
:
crop_bbox
,
crop_class
,
crop_score
,
gt_keypoints
=
\
return
sample
filter_and_process
(
sample_bbox
,
gt_bbox
,
gt_class
,
y
=
np
.
random
.
randint
(
0
,
h
-
height
)
scores
=
gt_score
,
x
=
np
.
random
.
randint
(
0
,
w
-
width
)
keypoints
=
keypoints
)
canvas
=
np
.
ones
((
h
,
w
,
3
),
dtype
=
np
.
uint8
)
else
:
canvas
*=
np
.
array
(
self
.
fill_value
,
dtype
=
np
.
uint8
)
crop_bbox
,
crop_class
,
crop_score
=
filter_and_process
(
canvas
[
y
:
y
+
height
,
x
:
x
+
width
,
:]
=
img
.
astype
(
np
.
uint8
)
sample_bbox
,
gt_bbox
,
gt_class
,
scores
=
gt_score
)
# sampling bbox according the bbox area
crop_bbox
,
crop_class
,
crop_score
=
bbox_area_sampling
(
crop_bbox
,
crop_class
,
crop_score
,
self
.
target_size
,
self
.
min_size
)
sample
[
'h'
]
=
h
if
self
.
avoid_no_bbox
:
sample
[
'w'
]
=
w
if
len
(
crop_bbox
)
<
1
:
sample
[
'image'
]
=
canvas
continue
if
'gt_bbox'
in
sample
and
len
(
sample
[
'gt_bbox'
])
>
0
:
xmin
=
int
(
sample_bbox
[
0
]
*
image_width
)
sample
[
'gt_bbox'
]
+=
np
.
array
([
x
,
y
]
*
2
,
dtype
=
np
.
float32
)
xmax
=
int
(
sample_bbox
[
2
]
*
image_width
)
if
self
.
is_mask_expand
and
'gt_poly'
in
sample
and
len
(
sample
[
ymin
=
int
(
sample_bbox
[
1
]
*
image_height
)
'gt_poly'
])
>
0
:
ymax
=
int
(
sample_bbox
[
3
]
*
image_height
)
sample
[
'gt_poly'
]
=
self
.
expand_segms
(
sample
[
'gt_poly'
],
x
,
y
,
im
=
im
[
ymin
:
ymax
,
xmin
:
xmax
]
height
,
width
,
expand_ratio
)
sample
[
'image'
]
=
im
sample
[
'gt_bbox'
]
=
crop_bbox
sample
[
'gt_class'
]
=
crop_class
sample
[
'gt_score'
]
=
crop_score
if
'gt_keypoint'
in
sample
.
keys
():
sample
[
'gt_keypoint'
]
=
gt_keypoints
[
0
]
sample
[
'keypoint_ignore'
]
=
gt_keypoints
[
1
]
return
sample
return
sample
return
sample
...
@@ -1801,12 +1236,11 @@ class RandomCrop(BaseOperator):
...
@@ -1801,12 +1236,11 @@ class RandomCrop(BaseOperator):
crop_segms
.
append
(
_crop_rle
(
segm
,
crop
,
height
,
width
))
crop_segms
.
append
(
_crop_rle
(
segm
,
crop
,
height
,
width
))
return
crop_segms
return
crop_segms
def
__call__
(
self
,
sample
,
context
=
None
):
def
apply
(
self
,
sample
,
context
=
None
):
if
'gt_bbox'
in
sample
and
len
(
sample
[
'gt_bbox'
])
==
0
:
if
'gt_bbox'
in
sample
and
len
(
sample
[
'gt_bbox'
])
==
0
:
return
sample
return
sample
h
=
sample
[
'h'
]
h
,
w
=
sample
[
'image'
].
shape
[:
2
]
w
=
sample
[
'w'
]
gt_bbox
=
sample
[
'gt_bbox'
]
gt_bbox
=
sample
[
'gt_bbox'
]
# NOTE Original method attempts to generate one candidate for each
# NOTE Original method attempts to generate one candidate for each
...
@@ -1889,12 +1323,17 @@ class RandomCrop(BaseOperator):
...
@@ -1889,12 +1323,17 @@ class RandomCrop(BaseOperator):
sample
[
'gt_poly'
]
=
valid_polys
sample
[
'gt_poly'
]
=
valid_polys
else
:
else
:
sample
[
'gt_poly'
]
=
crop_polys
sample
[
'gt_poly'
]
=
crop_polys
if
'gt_segm'
in
sample
:
sample
[
'gt_segm'
]
=
self
.
_crop_segm
(
sample
[
'gt_segm'
],
crop_box
)
sample
[
'gt_segm'
]
=
np
.
take
(
sample
[
'gt_segm'
],
valid_ids
,
axis
=
0
)
sample
[
'image'
]
=
self
.
_crop_image
(
sample
[
'image'
],
crop_box
)
sample
[
'image'
]
=
self
.
_crop_image
(
sample
[
'image'
],
crop_box
)
sample
[
'gt_bbox'
]
=
np
.
take
(
cropped_box
,
valid_ids
,
axis
=
0
)
sample
[
'gt_bbox'
]
=
np
.
take
(
cropped_box
,
valid_ids
,
axis
=
0
)
sample
[
'gt_class'
]
=
np
.
take
(
sample
[
'gt_class'
]
=
np
.
take
(
sample
[
'gt_class'
],
valid_ids
,
axis
=
0
)
sample
[
'gt_class'
],
valid_ids
,
axis
=
0
)
sample
[
'w'
]
=
crop_box
[
2
]
-
crop_box
[
0
]
sample
[
'h'
]
=
crop_box
[
3
]
-
crop_box
[
1
]
if
'gt_score'
in
sample
:
if
'gt_score'
in
sample
:
sample
[
'gt_score'
]
=
np
.
take
(
sample
[
'gt_score'
]
=
np
.
take
(
sample
[
'gt_score'
],
valid_ids
,
axis
=
0
)
sample
[
'gt_score'
],
valid_ids
,
axis
=
0
)
...
@@ -1936,494 +1375,313 @@ class RandomCrop(BaseOperator):
...
@@ -1936,494 +1375,313 @@ class RandomCrop(BaseOperator):
x1
,
y1
,
x2
,
y2
=
crop
x1
,
y1
,
x2
,
y2
=
crop
return
img
[
y1
:
y2
,
x1
:
x2
,
:]
return
img
[
y1
:
y2
,
x1
:
x2
,
:]
def
_crop_segm
(
self
,
segm
,
crop
):
@
register_op
x1
,
y1
,
x2
,
y2
=
crop
class
PadBox
(
BaseOperator
):
return
segm
[:,
y1
:
y2
,
x1
:
x2
]
def
__init__
(
self
,
num_max_boxes
=
50
):
"""
Pad zeros to bboxes if number of bboxes is less than num_max_boxes.
Args:
num_max_boxes (int): the max number of bboxes
"""
self
.
num_max_boxes
=
num_max_boxes
super
(
PadBox
,
self
).
__init__
()
def
__call__
(
self
,
sample
):
assert
'gt_bbox'
in
sample
bbox
=
sample
[
'gt_bbox'
]
gt_num
=
min
(
self
.
num_max_boxes
,
len
(
bbox
))
num_max
=
self
.
num_max_boxes
pad_bbox
=
np
.
zeros
((
num_max
,
4
),
dtype
=
np
.
float32
)
if
gt_num
>
0
:
pad_bbox
[:
gt_num
,
:]
=
bbox
[:
gt_num
,
:]
sample
[
'gt_bbox'
]
=
pad_bbox
if
'gt_class'
in
sample
.
keys
():
pad_class
=
np
.
zeros
((
num_max
),
dtype
=
np
.
int32
)
if
gt_num
>
0
:
pad_class
[:
gt_num
]
=
sample
[
'gt_class'
][:
gt_num
,
0
]
sample
[
'gt_class'
]
=
pad_class
if
'gt_score'
in
sample
.
keys
():
pad_score
=
np
.
zeros
((
num_max
),
dtype
=
np
.
float32
)
if
gt_num
>
0
:
pad_score
[:
gt_num
]
=
sample
[
'gt_score'
][:
gt_num
,
0
]
sample
[
'gt_score'
]
=
pad_score
# in training, for example in op ExpandImage,
# the bbox and gt_class is expandded, but the difficult is not,
# so, judging by it's length
if
'is_difficult'
in
sample
.
keys
():
pad_diff
=
np
.
zeros
((
num_max
),
dtype
=
np
.
int32
)
if
gt_num
>
0
:
pad_diff
[:
gt_num
]
=
sample
[
'difficult'
][:
gt_num
,
0
]
sample
[
'difficult'
]
=
pad_diff
return
sample
@
register_op
@
register_op
class
BboxXYXY2XYWH
(
BaseOperator
):
class
RandomScaledCrop
(
BaseOperator
):
"""
"""Resize image and bbox based on long side (with optional random scaling),
Convert bbox XYXY format to XYWH format.
then crop or pad image to target size.
Args:
target_dim (int): target size.
scale_range (list): random scale range.
interp (int): interpolation method, default to `cv2.INTER_LINEAR`.
"""
"""
def
__init__
(
self
):
def
__init__
(
self
,
super
(
BboxXYXY2XYWH
,
self
).
__init__
()
target_dim
=
512
,
scale_range
=
[.
1
,
2.
],
def
__call__
(
self
,
sample
):
interp
=
cv2
.
INTER_LINEAR
):
assert
'gt_bbox'
in
sample
super
(
RandomScaledCrop
,
self
).
__init__
()
bbox
=
sample
[
'gt_bbox'
]
self
.
target_dim
=
target_dim
bbox
[:,
2
:
4
]
=
bbox
[:,
2
:
4
]
-
bbox
[:,
:
2
]
self
.
scale_range
=
scale_range
bbox
[:,
:
2
]
=
bbox
[:,
:
2
]
+
bbox
[:,
2
:
4
]
/
2.
self
.
interp
=
interp
sample
[
'gt_bbox'
]
=
bbox
return
sample
def
apply
(
self
,
sample
,
context
=
None
):
img
=
sample
[
'image'
]
h
,
w
=
img
.
shape
[:
2
]
random_scale
=
np
.
random
.
uniform
(
*
self
.
scale_range
)
dim
=
self
.
target_dim
random_dim
=
int
(
dim
*
random_scale
)
dim_max
=
max
(
h
,
w
)
scale
=
random_dim
/
dim_max
resize_w
=
w
*
scale
resize_h
=
h
*
scale
offset_x
=
int
(
max
(
0
,
np
.
random
.
uniform
(
0.
,
resize_w
-
dim
)))
offset_y
=
int
(
max
(
0
,
np
.
random
.
uniform
(
0.
,
resize_h
-
dim
)))
class
Lighting
(
BaseOperator
):
img
=
cv2
.
resize
(
img
,
(
resize_w
,
resize_h
),
interpolation
=
self
.
interp
)
"""
img
=
np
.
array
(
img
)
Lighting the imagen by eigenvalues and eigenvectors
canvas
=
np
.
zeros
((
dim
,
dim
,
3
),
dtype
=
img
.
dtype
)
Args:
canvas
[:
min
(
dim
,
resize_h
),
:
min
(
dim
,
resize_w
),
:]
=
img
[
eigval (list): eigenvalues
offset_y
:
offset_y
+
dim
,
offset_x
:
offset_x
+
dim
,
:]
eigvec (list): eigenvectors
sample
[
'image'
]
=
canvas
alphastd (float): random weight of lighting, 0.1 by default
sample
[
'im_shape'
]
=
np
.
asarray
([
resize_h
,
resize_w
],
dtype
=
np
.
float32
)
"""
scale_factor
=
sample
[
'sacle_factor'
]
sample
[
'scale_factor'
]
=
np
.
asarray
(
[
scale_factor
[
0
]
*
scale
,
scale_factor
[
1
]
*
scale
],
dtype
=
np
.
float32
)
def
__init__
(
self
,
eigval
,
eigvec
,
alphastd
=
0.1
):
if
'gt_bbox'
in
sample
and
len
(
sample
[
'gt_bbox'
])
>
0
:
super
(
Lighting
,
self
).
__init__
()
scale_array
=
np
.
array
([
scale
,
scale
]
*
2
,
dtype
=
np
.
float32
)
self
.
alphastd
=
alphastd
shift_array
=
np
.
array
([
offset_x
,
offset_y
]
*
2
,
dtype
=
np
.
float32
)
self
.
eigval
=
np
.
array
(
eigval
).
astype
(
'float32'
)
boxes
=
sample
[
'gt_bbox'
]
*
scale_array
-
shift_array
self
.
eigvec
=
np
.
array
(
eigvec
).
astype
(
'float32'
)
boxes
=
np
.
clip
(
boxes
,
0
,
dim
-
1
)
# filter boxes with no area
area
=
np
.
prod
(
boxes
[...,
2
:]
-
boxes
[...,
:
2
],
axis
=
1
)
valid
=
(
area
>
1.
).
nonzero
()[
0
]
sample
[
'gt_bbox'
]
=
boxes
[
valid
]
sample
[
'gt_class'
]
=
sample
[
'gt_class'
][
valid
]
def
__call__
(
self
,
sample
):
alpha
=
np
.
random
.
normal
(
scale
=
self
.
alphastd
,
size
=
(
3
,
))
sample
[
'image'
]
+=
np
.
dot
(
self
.
eigvec
,
self
.
eigval
*
alpha
)
return
sample
return
sample
@
register_op
@
register_op
class
CornerTarget
(
BaseOperator
):
class
Cutmix
(
BaseOperator
):
def
__init__
(
self
,
alpha
=
1.5
,
beta
=
1.5
):
"""
"""
Generate targets for CornerNet by ground truth data.
CutMix: Regularization Strategy to Train Strong Classifiers with Localizable Features, see https://arxiv.org/abs/1905.04899
Cutmix image and gt_bbbox/gt_score
Args:
Args:
output_size (int): the size of output heatmaps.
alpha (float): alpha parameter of beta distribute
num_classes (int): num of classes.
beta (float): beta parameter of beta distribute
gaussian_bump (bool): whether to apply gaussian bump on gt targets.
True by default.
gaussian_rad (int): radius of gaussian bump. If it is set to -1, the
radius will be calculated by iou. -1 by default.
gaussian_iou (float): the threshold iou of predicted bbox to gt bbox.
If the iou is larger than threshold, the predicted bboox seems as
positive sample. 0.3 by default
max_tag_len (int): max num of gt box per image.
"""
"""
super
(
Cutmix
,
self
).
__init__
()
self
.
alpha
=
alpha
self
.
beta
=
beta
if
self
.
alpha
<=
0.0
:
raise
ValueError
(
"alpha shold be positive in {}"
.
format
(
self
))
if
self
.
beta
<=
0.0
:
raise
ValueError
(
"beta shold be positive in {}"
.
format
(
self
))
def
__init__
(
self
,
def
apply_image
(
self
,
img1
,
img2
,
factor
):
output_size
,
""" _rand_bbox """
num_classes
,
h
=
max
(
img1
.
shape
[
0
],
img2
.
shape
[
0
])
gaussian_bump
=
True
,
w
=
max
(
img1
.
shape
[
1
],
img2
.
shape
[
1
])
gaussian_rad
=-
1
,
cut_rat
=
np
.
sqrt
(
1.
-
factor
)
gaussian_iou
=
0.3
,
max_tag_len
=
128
):
super
(
CornerTarget
,
self
).
__init__
()
self
.
num_classes
=
num_classes
self
.
output_size
=
output_size
self
.
gaussian_bump
=
gaussian_bump
self
.
gaussian_rad
=
gaussian_rad
self
.
gaussian_iou
=
gaussian_iou
self
.
max_tag_len
=
max_tag_len
def
__call__
(
self
,
sample
):
tl_heatmaps
=
np
.
zeros
(
(
self
.
num_classes
,
self
.
output_size
[
0
],
self
.
output_size
[
1
]),
dtype
=
np
.
float32
)
br_heatmaps
=
np
.
zeros
(
(
self
.
num_classes
,
self
.
output_size
[
0
],
self
.
output_size
[
1
]),
dtype
=
np
.
float32
)
tl_regrs
=
np
.
zeros
((
self
.
max_tag_len
,
2
),
dtype
=
np
.
float32
)
cut_w
=
np
.
int
(
w
*
cut_rat
)
br_regrs
=
np
.
zeros
((
self
.
max_tag_len
,
2
),
dtype
=
np
.
float32
)
cut_h
=
np
.
int
(
h
*
cut_rat
)
tl_tags
=
np
.
zeros
((
self
.
max_tag_len
),
dtype
=
np
.
int64
)
br_tags
=
np
.
zeros
((
self
.
max_tag_len
),
dtype
=
np
.
int64
)
tag_masks
=
np
.
zeros
((
self
.
max_tag_len
),
dtype
=
np
.
uint8
)
tag_lens
=
np
.
zeros
((),
dtype
=
np
.
int32
)
tag_nums
=
np
.
zeros
((
1
),
dtype
=
np
.
int32
)
gt_bbox
=
sample
[
'gt_bbox'
]
# uniform
gt_class
=
sample
[
'gt_class'
]
cx
=
np
.
random
.
randint
(
w
)
keep_inds
=
((
gt_bbox
[:,
2
]
-
gt_bbox
[:,
0
])
>
0
)
&
\
cy
=
np
.
random
.
randint
(
h
)
((
gt_bbox
[:,
3
]
-
gt_bbox
[:,
1
])
>
0
)
gt_bbox
=
gt_bbox
[
keep_inds
]
gt_class
=
gt_class
[
keep_inds
]
sample
[
'gt_bbox'
]
=
gt_bbox
sample
[
'gt_class'
]
=
gt_class
width_ratio
=
self
.
output_size
[
1
]
/
sample
[
'w'
]
height_ratio
=
self
.
output_size
[
0
]
/
sample
[
'h'
]
for
i
in
range
(
gt_bbox
.
shape
[
0
]):
width
=
gt_bbox
[
i
][
2
]
-
gt_bbox
[
i
][
0
]
height
=
gt_bbox
[
i
][
3
]
-
gt_bbox
[
i
][
1
]
xtl
,
ytl
=
gt_bbox
[
i
][
0
],
gt_bbox
[
i
][
1
]
xbr
,
ybr
=
gt_bbox
[
i
][
2
],
gt_bbox
[
i
][
3
]
fxtl
=
(
xtl
*
width_ratio
)
fytl
=
(
ytl
*
height_ratio
)
fxbr
=
(
xbr
*
width_ratio
)
fybr
=
(
ybr
*
height_ratio
)
xtl
=
int
(
fxtl
)
ytl
=
int
(
fytl
)
xbr
=
int
(
fxbr
)
ybr
=
int
(
fybr
)
if
self
.
gaussian_bump
:
width
=
math
.
ceil
(
width
*
width_ratio
)
height
=
math
.
ceil
(
height
*
height_ratio
)
if
self
.
gaussian_rad
==
-
1
:
radius
=
gaussian_radius
((
height
,
width
),
self
.
gaussian_iou
)
radius
=
max
(
0
,
int
(
radius
))
else
:
radius
=
self
.
gaussian_rad
draw_gaussian
(
tl_heatmaps
[
gt_class
[
i
][
0
]],
[
xtl
,
ytl
],
radius
)
draw_gaussian
(
br_heatmaps
[
gt_class
[
i
][
0
]],
[
xbr
,
ybr
],
radius
)
else
:
tl_heatmaps
[
gt_class
[
i
][
0
],
ytl
,
xtl
]
=
1
br_heatmaps
[
gt_class
[
i
][
0
],
ybr
,
xbr
]
=
1
tl_regrs
[
i
,
:]
=
[
fxtl
-
xtl
,
fytl
-
ytl
]
bbx1
=
np
.
clip
(
cx
-
cut_w
//
2
,
0
,
w
-
1
)
br_regrs
[
i
,
:]
=
[
fxbr
-
xbr
,
fybr
-
ybr
]
bby1
=
np
.
clip
(
cy
-
cut_h
//
2
,
0
,
h
-
1
)
tl_tags
[
tag_lens
]
=
ytl
*
self
.
output_size
[
1
]
+
xtl
bbx2
=
np
.
clip
(
cx
+
cut_w
//
2
,
0
,
w
-
1
)
br_tags
[
tag_lens
]
=
ybr
*
self
.
output_size
[
1
]
+
xbr
bby2
=
np
.
clip
(
cy
+
cut_h
//
2
,
0
,
h
-
1
)
tag_lens
+=
1
img_1
=
np
.
zeros
((
h
,
w
,
img1
.
shape
[
2
]),
'float32'
)
img_1
[:
img1
.
shape
[
0
],
:
img1
.
shape
[
1
],
:]
=
\
img1
.
astype
(
'float32'
)
img_2
=
np
.
zeros
((
h
,
w
,
img2
.
shape
[
2
]),
'float32'
)
img_2
[:
img2
.
shape
[
0
],
:
img2
.
shape
[
1
],
:]
=
\
img2
.
astype
(
'float32'
)
img_1
[
bby1
:
bby2
,
bbx1
:
bbx2
,
:]
=
img2
[
bby1
:
bby2
,
bbx1
:
bbx2
,
:]
return
img_1
tag_masks
[:
tag_lens
]
=
1
def
__call__
(
self
,
sample
,
context
=
None
):
if
not
isinstance
(
sample
,
Sequence
):
return
sample
sample
[
'tl_heatmaps'
]
=
tl_heatmaps
assert
len
(
sample
)
==
2
,
'cutmix need two samples'
sample
[
'br_heatmaps'
]
=
br_heatmaps
sample
[
'tl_regrs'
]
=
tl_regrs
sample
[
'br_regrs'
]
=
br_regrs
sample
[
'tl_tags'
]
=
tl_tags
sample
[
'br_tags'
]
=
br_tags
sample
[
'tag_masks'
]
=
tag_masks
factor
=
np
.
random
.
beta
(
self
.
alpha
,
self
.
beta
)
factor
=
max
(
0.0
,
min
(
1.0
,
factor
))
if
factor
>=
1.0
:
return
sample
[
0
]
if
factor
<=
0.0
:
return
sample
[
1
]
img1
=
sample
[
0
][
'image'
]
img2
=
sample
[
1
][
'image'
]
img
=
self
.
apply_image
(
img1
,
img2
,
factor
)
gt_bbox1
=
sample
[
0
][
'gt_bbox'
]
gt_bbox2
=
sample
[
1
][
'gt_bbox'
]
gt_bbox
=
np
.
concatenate
((
gt_bbox1
,
gt_bbox2
),
axis
=
0
)
gt_class1
=
sample
[
0
][
'gt_class'
]
gt_class2
=
sample
[
1
][
'gt_class'
]
gt_class
=
np
.
concatenate
((
gt_class1
,
gt_class2
),
axis
=
0
)
gt_score1
=
sample
[
0
][
'gt_score'
]
gt_score2
=
sample
[
1
][
'gt_score'
]
gt_score
=
np
.
concatenate
(
(
gt_score1
*
factor
,
gt_score2
*
(
1.
-
factor
)),
axis
=
0
)
sample
=
sample
[
0
]
sample
[
'image'
]
=
img
sample
[
'gt_bbox'
]
=
gt_bbox
sample
[
'gt_score'
]
=
gt_score
sample
[
'gt_class'
]
=
gt_class
return
sample
return
sample
@
register_op
@
register_op
class
CornerCro
p
(
BaseOperator
):
class
Mixu
p
(
BaseOperator
):
"""
def
__init__
(
self
,
alpha
=
1.5
,
beta
=
1.5
):
Random crop for CornerNet
""" Mixup image and gt_bbbox/gt_score
Args:
Args:
random_scales (list): scales of output_size to input_size.
alpha (float): alpha parameter of beta distribute
border (int): border of corp center
beta (float): beta parameter of beta distribute
is_train (bool): train or test
input_size (int): size of input image
"""
"""
super
(
Mixup
,
self
).
__init__
()
self
.
alpha
=
alpha
self
.
beta
=
beta
if
self
.
alpha
<=
0.0
:
raise
ValueError
(
"alpha shold be positive in {}"
.
format
(
self
))
if
self
.
beta
<=
0.0
:
raise
ValueError
(
"beta shold be positive in {}"
.
format
(
self
))
def
__init__
(
self
,
def
apply_image
(
self
,
img1
,
img2
,
factor
):
random_scales
=
[
0.6
,
0.7
,
0.8
,
0.9
,
1.
,
1.1
,
1.2
,
1.3
],
h
=
max
(
img1
.
shape
[
0
],
img2
.
shape
[
0
])
border
=
128
,
w
=
max
(
img1
.
shape
[
1
],
img2
.
shape
[
1
])
is_train
=
True
,
img
=
np
.
zeros
((
h
,
w
,
img1
.
shape
[
2
]),
'float32'
)
input_size
=
511
):
img
[:
img1
.
shape
[
0
],
:
img1
.
shape
[
1
],
:]
=
\
super
(
CornerCrop
,
self
).
__init__
()
img1
.
astype
(
'float32'
)
*
factor
self
.
random_scales
=
random_scales
img
[:
img2
.
shape
[
0
],
:
img2
.
shape
[
1
],
:]
+=
\
self
.
border
=
border
img2
.
astype
(
'float32'
)
*
(
1.0
-
factor
)
self
.
is_train
=
is_train
return
img
.
astype
(
'uint8'
)
self
.
input_size
=
input_size
def
__call__
(
self
,
sample
):
im_h
,
im_w
=
int
(
sample
[
'h'
]),
int
(
sample
[
'w'
])
if
self
.
is_train
:
scale
=
np
.
random
.
choice
(
self
.
random_scales
)
height
=
int
(
self
.
input_size
*
scale
)
width
=
int
(
self
.
input_size
*
scale
)
w_border
=
self
.
_get_border
(
self
.
border
,
im_w
)
h_border
=
self
.
_get_border
(
self
.
border
,
im_h
)
ctx
=
np
.
random
.
randint
(
low
=
w_border
,
high
=
im_w
-
w_border
)
cty
=
np
.
random
.
randint
(
low
=
h_border
,
high
=
im_h
-
h_border
)
else
:
cty
,
ctx
=
im_h
//
2
,
im_w
//
2
height
=
im_h
|
127
width
=
im_w
|
127
cropped_image
=
np
.
zeros
(
(
height
,
width
,
3
),
dtype
=
sample
[
'image'
].
dtype
)
x0
,
x1
=
max
(
ctx
-
width
//
2
,
0
),
min
(
ctx
+
width
//
2
,
im_w
)
y0
,
y1
=
max
(
cty
-
height
//
2
,
0
),
min
(
cty
+
height
//
2
,
im_h
)
left_w
,
right_w
=
ctx
-
x0
,
x1
-
ctx
top_h
,
bottom_h
=
cty
-
y0
,
y1
-
cty
# crop image
cropped_ctx
,
cropped_cty
=
width
//
2
,
height
//
2
x_slice
=
slice
(
int
(
cropped_ctx
-
left_w
),
int
(
cropped_ctx
+
right_w
))
y_slice
=
slice
(
int
(
cropped_cty
-
top_h
),
int
(
cropped_cty
+
bottom_h
))
cropped_image
[
y_slice
,
x_slice
,
:]
=
sample
[
'image'
][
y0
:
y1
,
x0
:
x1
,
:]
sample
[
'image'
]
=
cropped_image
sample
[
'h'
],
sample
[
'w'
]
=
height
,
width
if
self
.
is_train
:
# crop detections
gt_bbox
=
sample
[
'gt_bbox'
]
gt_bbox
[:,
0
:
4
:
2
]
-=
x0
gt_bbox
[:,
1
:
4
:
2
]
-=
y0
gt_bbox
[:,
0
:
4
:
2
]
+=
cropped_ctx
-
left_w
gt_bbox
[:,
1
:
4
:
2
]
+=
cropped_cty
-
top_h
else
:
sample
[
'borders'
]
=
np
.
array
(
[
cropped_cty
-
top_h
,
cropped_cty
+
bottom_h
,
cropped_ctx
-
left_w
,
cropped_ctx
+
right_w
],
dtype
=
np
.
float32
)
def
__call__
(
self
,
sample
,
context
=
None
):
if
not
isinstance
(
sample
,
Sequence
):
return
sample
return
sample
def
_get_border
(
self
,
border
,
size
):
assert
len
(
sample
)
==
2
,
'mixup need two samples'
i
=
1
while
size
-
border
//
i
<=
border
//
i
:
i
*=
2
return
border
//
i
factor
=
np
.
random
.
beta
(
self
.
alpha
,
self
.
beta
)
factor
=
max
(
0.0
,
min
(
1.0
,
factor
))
if
factor
>=
1.0
:
return
sample
[
0
]
if
factor
<=
0.0
:
return
sample
[
1
]
im
=
self
.
apply_image
(
sample
[
0
][
'image'
],
sample
[
1
][
'image'
],
factor
)
result
=
copy
.
deepcopy
(
sample
[
0
])
result
[
'image'
]
=
im
# apply bbox and score
if
'gt_bbox'
in
sample
[
0
]:
gt_bbox1
=
sample
[
0
][
'gt_bbox'
]
gt_bbox2
=
sample
[
1
][
'gt_bbox'
]
gt_bbox
=
np
.
concatenate
((
gt_bbox1
,
gt_bbox2
),
axis
=
0
)
result
[
'gt_bbox'
]
=
gt_bbox
if
'gt_class'
in
sample
[
0
]:
gt_class1
=
sample
[
0
][
'gt_class'
]
gt_class2
=
sample
[
1
][
'gt_class'
]
gt_class
=
np
.
concatenate
((
gt_class1
,
gt_class2
),
axis
=
0
)
result
[
'gt_class'
]
=
gt_class
@
register_op
gt_score1
=
np
.
ones_like
(
sample
[
0
][
'gt_class'
])
class
CornerRatio
(
BaseOperator
):
gt_score2
=
np
.
ones_like
(
sample
[
1
][
'gt_class'
])
"""
gt_score
=
np
.
concatenate
(
Ratio of output size to image size
(
gt_score1
*
factor
,
gt_score2
*
(
1.
-
factor
)),
axis
=
0
)
Args:
result
[
'gt_score'
]
=
gt_score
input_size (int): the size of input size
if
'is_crowd'
in
sample
[
0
]:
output_size (int): the size of heatmap
is_crowd1
=
sample
[
0
][
'is_crowd'
]
"""
is_crowd2
=
sample
[
1
][
'is_crowd'
]
is_crowd
=
np
.
concatenate
((
is_crowd1
,
is_crowd2
),
axis
=
0
)
result
[
'is_crowd'
]
=
is_crowd
if
'difficult'
in
sample
[
0
]:
is_difficult1
=
sample
[
0
][
'difficult'
]
is_difficult2
=
sample
[
1
][
'difficult'
]
is_difficult
=
np
.
concatenate
(
(
is_difficult1
,
is_difficult2
),
axis
=
0
)
result
[
'difficult'
]
=
is_difficult
def
__init__
(
self
,
input_size
=
511
,
output_size
=
64
):
return
result
super
(
CornerRatio
,
self
).
__init__
()
self
.
input_size
=
input_size
self
.
output_size
=
output_size
def
__call__
(
self
,
sample
):
scale
=
(
self
.
input_size
+
1
)
//
self
.
output_size
out_height
,
out_width
=
(
sample
[
'h'
]
+
1
)
//
scale
,
(
sample
[
'w'
]
+
1
)
//
scale
height_ratio
=
out_height
/
float
(
sample
[
'h'
])
width_ratio
=
out_width
/
float
(
sample
[
'w'
])
sample
[
'ratios'
]
=
np
.
array
([
height_ratio
,
width_ratio
])
return
sample
@
register_op
class
NormalizeBox
(
BaseOperator
):
"""Transform the bounding box's coornidates to [0,1]."""
def
__init__
(
self
):
super
(
NormalizeBox
,
self
).
__init__
()
@
register_op
def
apply
(
self
,
sample
,
context
):
class
RandomScaledCrop
(
BaseOperator
):
im
=
sample
[
'image'
]
"""Resize image and bbox based on long side (with optional random scaling),
gt_bbox
=
sample
[
'gt_bbox'
]
then crop or pad image to target size.
height
,
width
,
_
=
im
.
shape
Args:
for
i
in
range
(
gt_bbox
.
shape
[
0
]):
target_dim (int): target size.
gt_bbox
[
i
][
0
]
=
gt_bbox
[
i
][
0
]
/
width
scale_range (list): random scale range.
gt_bbox
[
i
][
1
]
=
gt_bbox
[
i
][
1
]
/
height
interp (int): interpolation method, default to `cv2.INTER_LINEAR`.
gt_bbox
[
i
][
2
]
=
gt_bbox
[
i
][
2
]
/
width
"""
gt_bbox
[
i
][
3
]
=
gt_bbox
[
i
][
3
]
/
height
sample
[
'gt_bbox'
]
=
gt_bbox
def
__init__
(
self
,
if
'gt_keypoint'
in
sample
.
keys
():
target_dim
=
512
,
gt_keypoint
=
sample
[
'gt_keypoint'
]
scale_range
=
[.
1
,
2.
],
interp
=
cv2
.
INTER_LINEAR
):
super
(
RandomScaledCrop
,
self
).
__init__
()
self
.
target_dim
=
target_dim
self
.
scale_range
=
scale_range
self
.
interp
=
interp
def
__call__
(
self
,
sample
):
for
i
in
range
(
gt_keypoint
.
shape
[
1
]):
w
=
sample
[
'w'
]
if
i
%
2
:
h
=
sample
[
'h'
]
gt_keypoint
[:,
i
]
=
gt_keypoint
[:,
i
]
/
height
random_scale
=
np
.
random
.
uniform
(
*
self
.
scale_range
)
else
:
dim
=
self
.
target_dim
gt_keypoint
[:,
i
]
=
gt_keypoint
[:,
i
]
/
width
random_dim
=
int
(
dim
*
random_scale
)
sample
[
'gt_keypoint'
]
=
gt_keypoint
dim_max
=
max
(
h
,
w
)
scale
=
random_dim
/
dim_max
resize_w
=
int
(
round
(
w
*
scale
))
resize_h
=
int
(
round
(
h
*
scale
))
offset_x
=
int
(
max
(
0
,
np
.
random
.
uniform
(
0.
,
resize_w
-
dim
)))
offset_y
=
int
(
max
(
0
,
np
.
random
.
uniform
(
0.
,
resize_h
-
dim
)))
if
'gt_bbox'
in
sample
and
len
(
sample
[
'gt_bbox'
])
>
0
:
scale_array
=
np
.
array
([
scale
,
scale
]
*
2
,
dtype
=
np
.
float32
)
shift_array
=
np
.
array
([
offset_x
,
offset_y
]
*
2
,
dtype
=
np
.
float32
)
boxes
=
sample
[
'gt_bbox'
]
*
scale_array
-
shift_array
boxes
=
np
.
clip
(
boxes
,
0
,
dim
-
1
)
# filter boxes with no area
area
=
np
.
prod
(
boxes
[...,
2
:]
-
boxes
[...,
:
2
],
axis
=
1
)
valid
=
(
area
>
1.
).
nonzero
()[
0
]
sample
[
'gt_bbox'
]
=
boxes
[
valid
]
sample
[
'gt_class'
]
=
sample
[
'gt_class'
][
valid
]
img
=
sample
[
'image'
]
img
=
cv2
.
resize
(
img
,
(
resize_w
,
resize_h
),
interpolation
=
self
.
interp
)
img
=
np
.
array
(
img
)
canvas
=
np
.
zeros
((
dim
,
dim
,
3
),
dtype
=
img
.
dtype
)
canvas
[:
min
(
dim
,
resize_h
),
:
min
(
dim
,
resize_w
),
:]
=
img
[
offset_y
:
offset_y
+
dim
,
offset_x
:
offset_x
+
dim
,
:]
sample
[
'h'
]
=
dim
sample
[
'w'
]
=
dim
sample
[
'image'
]
=
canvas
sample
[
'im_info'
]
=
[
resize_h
,
resize_w
,
scale
]
return
sample
return
sample
@
register_op
@
register_op
class
ResizeAndPad
(
BaseOperator
):
class
BboxXYXY2XYWH
(
BaseOperator
):
"""Resize image and bbox, then pad image to target size.
"""
Args:
Convert bbox XYXY format to XYWH format.
target_dim (int): target size
interp (int): interpolation method, default to `cv2.INTER_LINEAR`.
"""
"""
def
__init__
(
self
,
target_dim
=
512
,
interp
=
cv2
.
INTER_LINEAR
):
def
__init__
(
self
):
super
(
ResizeAndPad
,
self
).
__init__
()
super
(
BboxXYXY2XYWH
,
self
).
__init__
()
self
.
target_dim
=
target_dim
self
.
interp
=
interp
def
__call__
(
self
,
sample
):
def
apply
(
self
,
sample
,
context
=
None
):
w
=
sample
[
'w'
]
assert
'gt_bbox'
in
sample
h
=
sample
[
'h'
]
bbox
=
sample
[
'gt_bbox'
]
interp
=
self
.
interp
bbox
[:,
2
:
4
]
=
bbox
[:,
2
:
4
]
-
bbox
[:,
:
2
]
dim
=
self
.
target_dim
bbox
[:,
:
2
]
=
bbox
[:,
:
2
]
+
bbox
[:,
2
:
4
]
/
2.
dim_max
=
max
(
h
,
w
)
sample
[
'gt_bbox'
]
=
bbox
scale
=
self
.
target_dim
/
dim_max
resize_w
=
int
(
round
(
w
*
scale
))
resize_h
=
int
(
round
(
h
*
scale
))
if
'gt_bbox'
in
sample
and
len
(
sample
[
'gt_bbox'
])
>
0
:
scale_array
=
np
.
array
([
scale
,
scale
]
*
2
,
dtype
=
np
.
float32
)
sample
[
'gt_bbox'
]
=
np
.
clip
(
sample
[
'gt_bbox'
]
*
scale_array
,
0
,
dim
-
1
)
img
=
sample
[
'image'
]
img
=
cv2
.
resize
(
img
,
(
resize_w
,
resize_h
),
interpolation
=
interp
)
img
=
np
.
array
(
img
)
canvas
=
np
.
zeros
((
dim
,
dim
,
3
),
dtype
=
img
.
dtype
)
canvas
[:
resize_h
,
:
resize_w
,
:]
=
img
sample
[
'h'
]
=
dim
sample
[
'w'
]
=
dim
sample
[
'image'
]
=
canvas
sample
[
'im_info'
]
=
[
resize_h
,
resize_w
,
scale
]
return
sample
return
sample
@
register_op
@
register_op
class
TargetAssign
(
BaseOperator
):
class
PadBox
(
BaseOperator
):
"""Assign regression target and labels.
def
__init__
(
self
,
num_max_boxes
=
50
):
"""
Pad zeros to bboxes if number of bboxes is less than num_max_boxes.
Args:
Args:
image_size (int or list): input image size, a single integer or list of
num_max_boxes (int): the max number of bboxes
[h, w]. Default: 512
min_level (int): min level of the feature pyramid. Default: 3
max_level (int): max level of the feature pyramid. Default: 7
anchor_base_scale (int): base anchor scale. Default: 4
num_scales (int): number of anchor scales. Default: 3
aspect_ratios (list): aspect ratios.
Default: [(1, 1), (1.4, 0.7), (0.7, 1.4)]
match_threshold (float): threshold for foreground IoU. Default: 0.5
"""
"""
self
.
num_max_boxes
=
num_max_boxes
super
(
PadBox
,
self
).
__init__
()
def
__init__
(
self
,
def
apply
(
self
,
sample
,
context
=
None
):
image_size
=
512
,
assert
'gt_bbox'
in
sample
min_level
=
3
,
bbox
=
sample
[
'gt_bbox'
]
max_level
=
7
,
gt_num
=
min
(
self
.
num_max_boxes
,
len
(
bbox
))
anchor_base_scale
=
4
,
num_max
=
self
.
num_max_boxes
num_scales
=
3
,
# fields = context['fields'] if context else []
aspect_ratios
=
[(
1
,
1
),
(
1.4
,
0.7
),
(
0.7
,
1.4
)],
pad_bbox
=
np
.
zeros
((
num_max
,
4
),
dtype
=
np
.
float32
)
match_threshold
=
0.5
):
if
gt_num
>
0
:
super
(
TargetAssign
,
self
).
__init__
()
pad_bbox
[:
gt_num
,
:]
=
bbox
[:
gt_num
,
:]
assert
image_size
%
2
**
max_level
==
0
,
\
sample
[
'gt_bbox'
]
=
pad_bbox
"image size should be multiple of the max level stride"
if
'gt_class'
in
sample
:
self
.
image_size
=
image_size
pad_class
=
np
.
zeros
((
num_max
,
),
dtype
=
np
.
int32
)
self
.
min_level
=
min_level
if
gt_num
>
0
:
self
.
max_level
=
max_level
pad_class
[:
gt_num
]
=
sample
[
'gt_class'
][:
gt_num
,
0
]
self
.
anchor_base_scale
=
anchor_base_scale
sample
[
'gt_class'
]
=
pad_class
self
.
num_scales
=
num_scales
if
'gt_score'
in
sample
:
self
.
aspect_ratios
=
aspect_ratios
pad_score
=
np
.
zeros
((
num_max
,
),
dtype
=
np
.
float32
)
self
.
match_threshold
=
match_threshold
if
gt_num
>
0
:
pad_score
[:
gt_num
]
=
sample
[
'gt_score'
][:
gt_num
,
0
]
@
property
sample
[
'gt_score'
]
=
pad_score
def
anchors
(
self
):
# in training, for example in op ExpandImage,
if
not
hasattr
(
self
,
'_anchors'
):
# the bbox and gt_class is expandded, but the difficult is not,
anchor_grid
=
AnchorGrid
(
self
.
image_size
,
self
.
min_level
,
# so, judging by it's length
self
.
max_level
,
self
.
anchor_base_scale
,
if
'difficult'
in
sample
:
self
.
num_scales
,
self
.
aspect_ratios
)
pad_diff
=
np
.
zeros
((
num_max
,
),
dtype
=
np
.
int32
)
self
.
_anchors
=
np
.
concatenate
(
anchor_grid
.
generate
())
if
gt_num
>
0
:
return
self
.
_anchors
pad_diff
[:
gt_num
]
=
sample
[
'difficult'
][:
gt_num
,
0
]
sample
[
'difficult'
]
=
pad_diff
def
iou_matrix
(
self
,
a
,
b
):
if
'is_crowd'
in
sample
:
tl_i
=
np
.
maximum
(
a
[:,
np
.
newaxis
,
:
2
],
b
[:,
:
2
])
pad_crowd
=
np
.
zeros
((
num_max
,
),
dtype
=
np
.
int32
)
br_i
=
np
.
minimum
(
a
[:,
np
.
newaxis
,
2
:],
b
[:,
2
:])
if
gt_num
>
0
:
area_i
=
np
.
prod
(
br_i
-
tl_i
,
axis
=
2
)
*
(
tl_i
<
br_i
).
all
(
axis
=
2
)
pad_crowd
[:
gt_num
]
=
sample
[
'is_crowd'
][:
gt_num
,
0
]
area_a
=
np
.
prod
(
a
[:,
2
:]
-
a
[:,
:
2
],
axis
=
1
)
sample
[
'is_crowd'
]
=
pad_crowd
area_b
=
np
.
prod
(
b
[:,
2
:]
-
b
[:,
:
2
],
axis
=
1
)
area_o
=
(
area_a
[:,
np
.
newaxis
]
+
area_b
-
area_i
)
# return area_i / (area_o + 1e-10)
return
np
.
where
(
area_i
==
0.
,
np
.
zeros_like
(
area_i
),
area_i
/
area_o
)
def
match
(
self
,
anchors
,
gt_boxes
):
# XXX put smaller matrix first would be a little bit faster
mat
=
self
.
iou_matrix
(
gt_boxes
,
anchors
)
max_anchor_for_each_gt
=
mat
.
argmax
(
axis
=
1
)
max_for_each_anchor
=
mat
.
max
(
axis
=
0
)
anchor_to_gt
=
mat
.
argmax
(
axis
=
0
)
anchor_to_gt
[
max_for_each_anchor
<
self
.
match_threshold
]
=
-
1
# XXX ensure each gt has at least one anchor assigned,
# see `force_match_for_each_row` in TF implementation
one_hot
=
np
.
zeros_like
(
mat
)
one_hot
[
np
.
arange
(
mat
.
shape
[
0
]),
max_anchor_for_each_gt
]
=
1.
max_anchor_indices
=
one_hot
.
sum
(
axis
=
0
).
nonzero
()[
0
]
max_gt_indices
=
one_hot
.
argmax
(
axis
=
0
)[
max_anchor_indices
]
anchor_to_gt
[
max_anchor_indices
]
=
max_gt_indices
return
anchor_to_gt
def
encode
(
self
,
anchors
,
boxes
):
wha
=
anchors
[...,
2
:]
-
anchors
[...,
:
2
]
+
1
ca
=
anchors
[...,
:
2
]
+
wha
*
.
5
whb
=
boxes
[...,
2
:]
-
boxes
[...,
:
2
]
+
1
cb
=
boxes
[...,
:
2
]
+
whb
*
.
5
offsets
=
np
.
empty_like
(
anchors
)
offsets
[...,
:
2
]
=
(
cb
-
ca
)
/
wha
offsets
[...,
2
:]
=
np
.
log
(
whb
/
wha
)
return
offsets
def
__call__
(
self
,
sample
):
gt_boxes
=
sample
[
'gt_bbox'
]
gt_labels
=
sample
[
'gt_class'
]
labels
=
np
.
full
((
self
.
anchors
.
shape
[
0
],
1
),
0
,
dtype
=
np
.
int32
)
targets
=
np
.
full
((
self
.
anchors
.
shape
[
0
],
4
),
0.
,
dtype
=
np
.
float32
)
sample
[
'gt_label'
]
=
labels
sample
[
'gt_target'
]
=
targets
if
len
(
gt_boxes
)
<
1
:
sample
[
'fg_num'
]
=
np
.
array
(
0
,
dtype
=
np
.
int32
)
return
sample
anchor_to_gt
=
self
.
match
(
self
.
anchors
,
gt_boxes
)
matched_indices
=
(
anchor_to_gt
>=
0
).
nonzero
()[
0
]
labels
[
matched_indices
]
=
gt_labels
[
anchor_to_gt
[
matched_indices
]]
matched_boxes
=
gt_boxes
[
anchor_to_gt
[
matched_indices
]]
matched_anchors
=
self
.
anchors
[
matched_indices
]
matched_targets
=
self
.
encode
(
matched_anchors
,
matched_boxes
)
targets
[
matched_indices
]
=
matched_targets
sample
[
'fg_num'
]
=
np
.
array
(
len
(
matched_targets
),
dtype
=
np
.
int32
)
return
sample
return
sample
...
@@ -2443,7 +1701,7 @@ class DebugVisibleImage(BaseOperator):
...
@@ -2443,7 +1701,7 @@ class DebugVisibleImage(BaseOperator):
if
not
isinstance
(
self
.
is_normalized
,
bool
):
if
not
isinstance
(
self
.
is_normalized
,
bool
):
raise
TypeError
(
"{}: input type is invalid."
.
format
(
self
))
raise
TypeError
(
"{}: input type is invalid."
.
format
(
self
))
def
__call__
(
self
,
sampl
e
):
def
apply
(
self
,
sample
,
context
=
Non
e
):
image
=
Image
.
open
(
sample
[
'im_file'
]).
convert
(
'RGB'
)
image
=
Image
.
open
(
sample
[
'im_file'
]).
convert
(
'RGB'
)
out_file_name
=
sample
[
'im_file'
].
split
(
'/'
)[
-
1
]
out_file_name
=
sample
[
'im_file'
].
split
(
'/'
)[
-
1
]
width
=
sample
[
'w'
]
width
=
sample
[
'w'
]
...
@@ -2485,9 +1743,177 @@ class DebugVisibleImage(BaseOperator):
...
@@ -2485,9 +1743,177 @@ class DebugVisibleImage(BaseOperator):
x1
=
round
(
keypoint
[
2
*
j
]).
astype
(
np
.
int32
)
x1
=
round
(
keypoint
[
2
*
j
]).
astype
(
np
.
int32
)
y1
=
round
(
keypoint
[
2
*
j
+
1
]).
astype
(
np
.
int32
)
y1
=
round
(
keypoint
[
2
*
j
+
1
]).
astype
(
np
.
int32
)
draw
.
ellipse
(
draw
.
ellipse
(
(
x1
,
y1
,
x1
+
5
,
y1i
+
5
),
(
x1
,
y1
,
x1
+
5
,
y1
+
5
),
fill
=
'green'
,
outline
=
'green'
)
fill
=
'green'
,
outline
=
'green'
)
save_path
=
os
.
path
.
join
(
self
.
output_dir
,
out_file_name
)
save_path
=
os
.
path
.
join
(
self
.
output_dir
,
out_file_name
)
image
.
save
(
save_path
,
quality
=
95
)
image
.
save
(
save_path
,
quality
=
95
)
return
sample
return
sample
@
register_op
class
Pad
(
BaseOperator
):
def
__init__
(
self
,
size
=
None
,
size_divisor
=
32
,
pad_mode
=
0
,
offsets
=
None
,
fill_value
=
(
127.5
,
127.5
,
127.5
)):
"""
Pad image to a specified size or multiple of size_divisor. random target_size and interpolation method
Args:
size (int, Sequence): image target size, if None, pad to multiple of size_divisor, default None
size_divisor (int): size divisor, default 32
pad_mode (int): pad mode, currently only supports four modes [-1, 0, 1, 2]. if -1, use specified offsets
if 0, only pad to right and bottom. if 1, pad according to center. if 2, only pad left and top
fill_value (bool): rgb value of pad area, default (127.5, 127.5, 127.5)
"""
super
(
Pad
,
self
).
__init__
()
if
not
isinstance
(
size
,
(
int
,
Sequence
)):
raise
TypeError
(
"Type of target_size is invalid when random_size is True.
\
Must be List, now is {}"
.
format
(
type
(
size
)))
if
isinstance
(
size
,
int
):
size
=
[
size
,
size
]
assert
pad_mode
in
[
-
1
,
0
,
1
,
2
],
'currently only supports four modes [-1, 0, 1, 2]'
assert
pad_mode
==
-
1
and
offsets
,
'if pad_mode is -1, offsets should not be None'
self
.
size
=
size
self
.
size_divisor
=
size_divisor
self
.
pad_mode
=
pad_mode
self
.
fill_value
=
fill_value
self
.
offsets
=
offsets
def
apply_segm
(
self
,
segms
,
offsets
,
im_size
,
size
):
def
_expand_poly
(
poly
,
x
,
y
):
expanded_poly
=
np
.
array
(
poly
)
expanded_poly
[
0
::
2
]
+=
x
expanded_poly
[
1
::
2
]
+=
y
return
expanded_poly
.
tolist
()
def
_expand_rle
(
rle
,
x
,
y
,
height
,
width
,
h
,
w
):
if
'counts'
in
rle
and
type
(
rle
[
'counts'
])
==
list
:
rle
=
mask_util
.
frPyObjects
(
rle
,
height
,
width
)
mask
=
mask_util
.
decode
(
rle
)
expanded_mask
=
np
.
full
((
h
,
w
),
0
).
astype
(
mask
.
dtype
)
expanded_mask
[
y
:
y
+
height
,
x
:
x
+
width
]
=
mask
rle
=
mask_util
.
encode
(
np
.
array
(
expanded_mask
,
order
=
'F'
,
dtype
=
np
.
uint8
))
return
rle
x
,
y
=
offsets
height
,
width
=
im_size
h
,
w
=
size
expanded_segms
=
[]
for
segm
in
segms
:
if
is_poly
(
segm
):
# Polygon format
expanded_segms
.
append
(
[
_expand_poly
(
poly
,
x
,
y
)
for
poly
in
segm
])
else
:
# RLE format
import
pycocotools.mask
as
mask_util
expanded_segms
.
append
(
_expand_rle
(
segm
,
x
,
y
,
height
,
width
,
h
,
w
))
return
expanded_segms
def
apply_bbox
(
self
,
bbox
,
offsets
):
return
bbox
+
np
.
array
(
offsets
*
2
,
dtype
=
np
.
float32
)
def
apply_keypoint
(
self
,
keypoints
,
offsets
):
n
=
len
(
keypoints
[
0
])
//
2
return
keypoints
+
np
.
array
(
offsets
*
n
,
dtype
=
np
.
float32
)
def
apply_image
(
self
,
image
,
offsets
,
im_size
,
size
):
x
,
y
=
offsets
im_h
,
im_w
=
im_size
h
,
w
=
size
canvas
=
np
.
ones
((
h
,
w
,
3
),
dtype
=
np
.
float32
)
canvas
*=
np
.
array
(
self
.
fill_value
,
dtype
=
np
.
float32
)
canvas
[
y
:
y
+
im_h
,
x
:
x
+
im_w
,
:]
=
image
.
astype
(
np
.
float32
)
return
canvas
def
apply
(
self
,
sample
,
context
=
None
):
im
=
sample
[
'image'
]
im_h
,
im_w
=
im
.
shape
[:
2
]
if
self
.
size
:
h
,
w
=
self
.
size
assert
(
im_h
<
h
and
im_w
<
w
),
'(h, w) of target size should be greater than (im_h, im_w)'
else
:
h
=
np
.
ceil
(
im_h
//
self
.
size_divisor
)
*
self
.
size_divisor
w
=
np
.
ceil
(
im_w
/
self
.
size_divisor
)
*
self
.
size_divisor
if
h
==
im_h
and
w
==
im_w
:
return
sample
if
self
.
pad_mode
==
-
1
:
offset_x
,
offset_y
=
self
.
offsets
elif
self
.
pad_mode
==
0
:
offset_y
,
offset_x
=
0
,
0
elif
self
.
pad_mode
==
1
:
offset_y
,
offset_x
=
(
h
-
im_h
)
//
2
,
(
w
-
im_w
)
//
2
else
:
offset_y
,
offset_x
=
h
-
im_h
,
w
-
im_w
offsets
,
im_size
,
size
=
[
offset_x
,
offset_y
],
[
im_h
,
im_w
],
[
h
,
w
]
sample
[
'image'
]
=
self
.
apply_image
(
im
,
offsets
,
im_size
,
size
)
if
self
.
pad_mode
==
0
:
return
sample
if
'gt_bbox'
in
sample
and
len
(
sample
[
'gt_bbox'
])
>
0
:
sample
[
'gt_bbox'
]
=
self
.
apply_bbox
(
sample
[
'gt_bbox'
],
offsets
)
if
'gt_poly'
in
sample
and
len
(
sample
[
'gt_poly'
])
>
0
:
sample
[
'gt_poly'
]
=
self
.
apply_segm
(
sample
[
'gt_poly'
],
offsets
,
im_size
,
size
)
if
'gt_keypoint'
in
sample
and
len
(
sample
[
'gt_keypoint'
])
>
0
:
sample
[
'gt_keypoint'
]
=
self
.
apply_keypoint
(
sample
[
'gt_keypoint'
],
offsets
)
return
sample
@
register_op
class
Poly2Mask
(
BaseOperator
):
"""
gt poly to mask annotations
"""
def
__init__
(
self
):
super
(
Poly2Mask
,
self
).
__init__
()
import
pycocotools.mask
as
maskUtils
self
.
maskutils
=
maskUtils
def
_poly2mask
(
self
,
mask_ann
,
img_h
,
img_w
):
if
isinstance
(
mask_ann
,
list
):
# polygon -- a single object might consist of multiple parts
# we merge all parts into one mask rle code
rles
=
self
.
maskutils
.
frPyObjects
(
mask_ann
,
img_h
,
img_w
)
rle
=
self
.
maskutils
.
merge
(
rles
)
elif
isinstance
(
mask_ann
[
'counts'
],
list
):
# uncompressed RLE
rle
=
self
.
maskutils
.
frPyObjects
(
mask_ann
,
img_h
,
img_w
)
else
:
# rle
rle
=
mask_ann
mask
=
self
.
maskutils
.
decode
(
rle
)
return
mask
def
apply
(
self
,
sample
,
context
=
None
):
assert
'gt_poly'
in
sample
im_h
=
sample
[
'h'
]
im_w
=
sample
[
'w'
]
masks
=
[
self
.
_poly2mask
(
gt_poly
,
im_h
,
im_w
)
for
gt_poly
in
sample
[
'gt_poly'
]
]
sample
[
'gt_segm'
]
=
np
.
asarray
(
masks
).
astype
(
np
.
uint8
)
return
sample
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录