{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# 9.5 多尺度目标检测"
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"1.2.0\n"
]
},
{
"data": {
"text/plain": [
"(728, 561)"
]
},
"execution_count": 1,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"%matplotlib inline\n",
"from PIL import Image\n",
"import numpy as np\n",
"import torch\n",
"\n",
"import sys\n",
"sys.path.append(\"..\") \n",
"import d2lzh_pytorch as d2l\n",
"print(torch.__version__) # 1.2.0\n",
"\n",
"img = Image.open('../../docs/img/catdog.jpg')\n",
"w, h = img.size\n",
"w, h"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"d2l.set_figsize()\n",
"\n",
"def display_anchors(fmap_w, fmap_h, s):\n",
" # 前两维的取值不影响输出结果(原书这里是(1, 10, fmap_w, fmap_h), 我认为错了)\n",
" fmap = torch.zeros((1, 10, fmap_h, fmap_w), dtype=torch.float32)\n",
" \n",
" # 平移所有锚框使均匀分布在图片上\n",
" offset_x, offset_y = 1.0/fmap_w, 1.0/fmap_h\n",
" anchors = d2l.MultiBoxPrior(fmap, sizes=s, ratios=[1, 2, 0.5]) + \\\n",
" torch.tensor([offset_x/2, offset_y/2, offset_x/2, offset_y/2])\n",
" \n",
" bbox_scale = torch.tensor([[w, h, w, h]], dtype=torch.float32)\n",
" d2l.show_bboxes(d2l.plt.imshow(img).axes,\n",
" anchors[0] * bbox_scale)"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [
{
"data": {
"image/svg+xml": [
"\n",
"\n",
"\n",
"\n"
],
"text/plain": [
""
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"display_anchors(fmap_w=4, fmap_h=2, s=[0.15])"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [
{
"data": {
"image/svg+xml": [
"\n",
"\n",
"\n",
"\n"
],
"text/plain": [
""
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"display_anchors(fmap_w=2, fmap_h=1, s=[0.4])"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [
{
"data": {
"image/svg+xml": [
"\n",
"\n",
"\n",
"\n"
],
"text/plain": [
""
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"display_anchors(fmap_w=1, fmap_h=1, s=[0.8])"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.2"
}
},
"nbformat": 4,
"nbformat_minor": 2
}