Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
OpenDocCN
d2l-zh
提交
023d77db
D
d2l-zh
项目概览
OpenDocCN
/
d2l-zh
通知
2
Star
0
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
D
d2l-zh
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
体验新版 GitCode,发现更多精彩内容 >>
提交
023d77db
编写于
10月 29, 2018
作者:
A
Aston Zhang
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
revise ssd, fcn code
上级
edc21426
变更
4
隐藏空白更改
内联
并排
Showing
4 changed file
with
63 addition
and
59 deletion
+63
-59
chapter_computer-vision/fcn.md
chapter_computer-vision/fcn.md
+5
-6
chapter_computer-vision/semantic-segmentation-and-dataset.md
chapter_computer-vision/semantic-segmentation-and-dataset.md
+35
-32
chapter_computer-vision/ssd.md
chapter_computer-vision/ssd.md
+3
-3
gluonbook/utils.py
gluonbook/utils.py
+20
-18
未找到文件。
chapter_computer-vision/fcn.md
浏览文件 @
023d77db
...
...
@@ -165,11 +165,10 @@ gb.train(train_iter, test_iter, net, loss, trainer, ctx, num_epochs=5)
预测一张新图像时,我们只需要将其归一化并转成卷积网络需要的4D格式。
```
{.python .input n=13}
def predict(im):
data = test_iter._dataset.normalize_image(im)
data = data.transpose((2, 0, 1)).expand_dims(axis=0)
yhat = net(data.as_in_context(ctx[0]))
pred = nd.argmax(yhat, axis=1)
def predict(img):
x = test_iter._dataset.normalize_image(img)
x = x.transpose((2, 0, 1)).expand_dims(axis=0)
pred = nd.argmax(net(x.as_in_context(ctx[0])), axis=1)
return pred.reshape((pred.shape[1], pred.shape[2]))
```
...
...
@@ -185,7 +184,7 @@ def label2image(pred):
现在我们读取前几张测试图像并对其进行预测。
```
{.python .input n=15}
test_images, test_labels = gb.read_voc_images(train=False)
test_images, test_labels = gb.read_voc_images(
is_
train=False)
n = 5
imgs = []
...
...
chapter_computer-vision/semantic-segmentation-and-dataset.md
浏览文件 @
023d77db
...
...
@@ -50,25 +50,26 @@ voc_dir = download_voc_pascal()
```
{.python .input n=3}
# 本函数已保存在 gluonbook 包中方便以后使用。
def read_voc_images(root=voc_dir, train=True):
def read_voc_images(root=voc_dir,
is_
train=True):
txt_fname = '%s/ImageSets/Segmentation/%s' % (
root, 'train.txt' if train else 'val.txt')
root, 'train.txt' if
is_
train else 'val.txt')
with open(txt_fname, 'r') as f:
images = f.read().split()
data, label
= [None] * len(images), [None] * len(images)
features, labels
= [None] * len(images), [None] * len(images)
for i, fname in enumerate(images):
data[i] = image.imread('%s/JPEGImages/%s.jpg' % (root, fname))
label[i] = image.imread('%s/SegmentationClass/%s.png' % (root, fname))
return data, label
features[i] = image.imread('%s/JPEGImages/%s.jpg' % (root, fname))
labels[i] = image.imread(
'%s/SegmentationClass/%s.png' % (root, fname))
return features, labels
train_
imag
es, train_labels = read_voc_images()
train_
featur
es, train_labels = read_voc_images()
```
我们画出前面五张图像和它们对应的标注。在标注,白色代表边框黑色代表背景,其他不同的颜色对应不同目标类别。
```
{.python .input n=4}
n = 5
imgs = train_
imag
es[0:n] + train_labels[0:n]
imgs = train_
featur
es[0:n] + train_labels[0:n]
gb.show_images(imgs, 2, n);
```
...
...
@@ -93,13 +94,14 @@ VOC_CLASSES = ['background', 'aeroplane', 'bicycle', 'bird', 'boat',
```
{.python .input n=6}
colormap2label = nd.zeros(256 ** 3)
for i, c
m
in enumerate(VOC_COLORMAP):
colormap2label[(c
m[0] * 256 + cm[1]) * 256 + cm
[2]] = i
for i, c
olormap
in enumerate(VOC_COLORMAP):
colormap2label[(c
olormap[0] * 256 + colormap[1]) * 256 + colormap
[2]] = i
# 本函数已保存在 gluonbook 包中方便以后使用。
def voc_label_indices(img, colormap2label):
data = img.astype('int32')
idx = (data[:, :, 0] * 256 + data[:, :, 1]) * 256 + data[:, :, 2]
def voc_label_indices(colormap, colormap2label):
colormap = colormap.astype('int32')
idx = ((colormap[:, :, 0] * 256 + colormap[:, :, 1]) * 256
+ colormap[:, :, 2])
return colormap2label[idx]
```
...
...
@@ -118,14 +120,14 @@ y[105:115, 130:140], VOC_CLASSES[1]
```
{.python .input n=8}
# 本函数已保存在 gluonbook 包中方便以后使用。
def voc_rand_crop(
data
, label, height, width):
data, rect = image.random_crop(data
, (width, height))
def voc_rand_crop(
feature
, label, height, width):
feature, rect = image.random_crop(feature
, (width, height))
label = image.fixed_crop(label, *rect)
return
data
, label
return
feature
, label
imgs = []
for _ in range(n):
imgs += voc_rand_crop(train_
imag
es[0], train_labels[0], 200, 300)
imgs += voc_rand_crop(train_
featur
es[0], train_labels[0], 200, 300)
gb.show_images(imgs[::2] + imgs[1::2], 2, n);
```
...
...
@@ -136,32 +138,33 @@ gb.show_images(imgs[::2] + imgs[1::2], 2, n);
```
{.python .input n=9}
# 本类已保存在 gluonbook 包中方便以后使用。
class VOCSegDataset(gdata.Dataset):
def __init__(self, train, crop_size, voc_dir, colormap2label):
def __init__(self,
is_
train, crop_size, voc_dir, colormap2label):
self.rgb_mean = nd.array([0.485, 0.456, 0.406])
self.rgb_std = nd.array([0.229, 0.224, 0.225])
self.crop_size = crop_size
data, label = read_voc_images(root=voc_dir, train=train)
self.data = [self.normalize_image(im) for im in self.filter(data)]
self.label = self.filter(label)
features, labels = read_voc_images(root=voc_dir, is_train=is_train)
self.features = [self.normalize_image(feature)
for feature in self.filter(features)]
self.labels = self.filter(labels)
self.colormap2label = colormap2label
print('read ' + str(len(self.
data
)) + ' examples')
print('read ' + str(len(self.
features
)) + ' examples')
def normalize_image(self,
data
):
return (
data
.astype('float32') / 255 - self.rgb_mean) / self.rgb_std
def normalize_image(self,
img
):
return (
img
.astype('float32') / 255 - self.rgb_mean) / self.rgb_std
def filter(self, im
age
s):
return [im
for im in image
s if (
im.shape[0] >= self.crop_size[0] and
im.shape[1] >= self.crop_size[1])]
def filter(self, im
g
s):
return [im
g for img in img
s if (
im
g
.shape[0] >= self.crop_size[0] and
im
g
.shape[1] >= self.crop_size[1])]
def __getitem__(self, idx):
data, label = voc_rand_crop(self.data[idx], self.label
[idx],
*self.crop_size)
return (
data
.transpose((2, 0, 1)),
feature, label = voc_rand_crop(self.features[idx], self.labels
[idx],
*self.crop_size)
return (
feature
.transpose((2, 0, 1)),
voc_label_indices(label, self.colormap2label))
def __len__(self):
return len(self.
data
)
return len(self.
features
)
```
假设我们裁剪$320
\t
imes 480$图像来进行训练,我们可以查看训练和测试各保留了多少图像。
...
...
chapter_computer-vision/ssd.md
浏览文件 @
023d77db
...
...
@@ -277,8 +277,8 @@ for epoch in range(20):
```
{.python .input n=20}
def process_image(file_name):
img = image.imread(file_name)
data
= image.imresize(img, 256, 256).astype('float32')
return
data
.transpose((2, 0, 1)).expand_dims(axis=0), img
feature
= image.imresize(img, 256, 256).astype('float32')
return
feature
.transpose((2, 0, 1)).expand_dims(axis=0), img
x, img = process_image('../img/pikachu.jpg')
```
...
...
@@ -310,7 +310,7 @@ def display(img, out, threshold=0.5):
bbox = [row[2:6] * nd.array(img.shape[0:2] * 2, ctx=row.context)]
gb.show_bboxes(fig.axes, bbox, '%.2f' % score, 'w')
display(img, out, threshold=0.
01
)
display(img, out, threshold=0.
3
)
```
## 小结
...
...
gluonbook/utils.py
浏览文件 @
023d77db
...
...
@@ -352,17 +352,18 @@ def read_imdb(folder='train'):
return
data
def
read_voc_images
(
root
=
'../data/VOCdevkit/VOC2012'
,
train
=
True
):
def
read_voc_images
(
root
=
'../data/VOCdevkit/VOC2012'
,
is_
train
=
True
):
"""Read VOC images."""
txt_fname
=
'%s/ImageSets/Segmentation/%s'
%
(
root
,
'train.txt'
if
train
else
'val.txt'
)
root
,
'train.txt'
if
is_
train
else
'val.txt'
)
with
open
(
txt_fname
,
'r'
)
as
f
:
images
=
f
.
read
().
split
()
data
,
label
=
[
None
]
*
len
(
images
),
[
None
]
*
len
(
images
)
features
,
labels
=
[
None
]
*
len
(
images
),
[
None
]
*
len
(
images
)
for
i
,
fname
in
enumerate
(
images
):
data
[
i
]
=
image
.
imread
(
'%s/JPEGImages/%s.jpg'
%
(
root
,
fname
))
label
[
i
]
=
image
.
imread
(
'%s/SegmentationClass/%s.png'
%
(
root
,
fname
))
return
data
,
label
features
[
i
]
=
image
.
imread
(
'%s/JPEGImages/%s.jpg'
%
(
root
,
fname
))
labels
[
i
]
=
image
.
imread
(
'%s/SegmentationClass/%s.png'
%
(
root
,
fname
))
return
features
,
labels
class
Residual
(
nn
.
Block
):
...
...
@@ -775,29 +776,30 @@ def use_svg_display():
display
.
set_matplotlib_formats
(
'svg'
)
def
voc_label_indices
(
img
,
colormap2label
):
def
voc_label_indices
(
colormap
,
colormap2label
):
"""Assig label indices for Pascal VOC2012 Dataset."""
data
=
img
.
astype
(
'int32'
)
idx
=
(
data
[:,:,
0
]
*
256
+
data
[:,:,
1
])
*
256
+
data
[:,:,
2
]
colormap
=
colormap
.
astype
(
'int32'
)
idx
=
((
colormap
[:,
:,
0
]
*
256
+
colormap
[:,
:,
1
])
*
256
+
colormap
[:,
:,
2
])
return
colormap2label
[
idx
]
def
voc_rand_crop
(
data
,
label
,
height
,
width
):
def
voc_rand_crop
(
feature
,
label
,
height
,
width
):
"""Random cropping for images of the Pascal VOC2012 Dataset."""
data
,
rect
=
image
.
random_crop
(
data
,
(
width
,
height
))
feature
,
rect
=
image
.
random_crop
(
feature
,
(
width
,
height
))
label
=
image
.
fixed_crop
(
label
,
*
rect
)
return
data
,
label
return
feature
,
label
class
VOCSegDataset
(
gdata
.
Dataset
):
"""The Pascal VOC2012 Dataset."""
def
__init__
(
self
,
train
,
crop_size
,
voc_dir
,
colormap2label
):
def
__init__
(
self
,
is_
train
,
crop_size
,
voc_dir
,
colormap2label
):
self
.
rgb_mean
=
nd
.
array
([
0.485
,
0.456
,
0.406
])
self
.
rgb_std
=
nd
.
array
([
0.229
,
0.224
,
0.225
])
self
.
crop_size
=
crop_size
data
,
label
=
read_voc_images
(
root
=
voc_dir
,
train
=
train
)
data
,
label
s
=
read_voc_images
(
root
=
voc_dir
,
is_train
=
is_
train
)
self
.
data
=
[
self
.
normalize_image
(
im
)
for
im
in
self
.
filter
(
data
)]
self
.
label
=
self
.
filter
(
label
)
self
.
label
s
=
self
.
filter
(
labels
)
self
.
colormap2label
=
colormap2label
print
(
'read '
+
str
(
len
(
self
.
data
))
+
' examples'
)
...
...
@@ -810,10 +812,10 @@ class VOCSegDataset(gdata.Dataset):
im
.
shape
[
1
]
>=
self
.
crop_size
[
1
])]
def
__getitem__
(
self
,
idx
):
data
,
label
=
voc_rand_crop
(
self
.
data
[
idx
],
self
.
label
[
idx
],
*
self
.
crop_size
)
data
,
label
s
=
voc_rand_crop
(
self
.
data
[
idx
],
self
.
labels
[
idx
],
*
self
.
crop_size
)
return
(
data
.
transpose
((
2
,
0
,
1
)),
voc_label_indices
(
label
,
self
.
colormap2label
))
voc_label_indices
(
label
s
,
self
.
colormap2label
))
def
__len__
(
self
):
return
len
(
self
.
data
)
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录