Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
Greenplum
Opencv
提交
83ccdbd0
O
Opencv
项目概览
Greenplum
/
Opencv
11 个月 前同步成功
通知
7
Star
0
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
DevOps
流水线
流水线任务
计划
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
O
Opencv
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
DevOps
DevOps
流水线
流水线任务
计划
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
流水线任务
提交
Issue看板
体验新版 GitCode,发现更多精彩内容 >>
提交
83ccdbd0
编写于
7月 02, 2012
作者:
A
Alexander Mordvintsev
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
refactored digits_adjust.py (dataset preprocess in cloud)
上级
6977a895
变更
2
隐藏空白更改
内联
并排
Showing
2 changed file
with
94 addition
and
69 deletion
+94
-69
samples/python2/digits.py
samples/python2/digits.py
+2
-1
samples/python2/digits_adjust.py
samples/python2/digits_adjust.py
+92
-68
未找到文件。
samples/python2/digits.py
浏览文件 @
83ccdbd0
...
...
@@ -17,6 +17,7 @@ from common import clock, mosaic
SZ
=
20
# size of each digit is SZ x SZ
CLASS_N
=
10
DIGITS_FN
=
'digits.png'
def
load_digits
(
fn
):
print
'loading "%s" ...'
%
fn
...
...
@@ -95,7 +96,7 @@ def evaluate_model(model, digits, samples, labels):
if
__name__
==
'__main__'
:
print
__doc__
digits
,
labels
=
load_digits
(
'digits.png'
)
digits
,
labels
=
load_digits
(
DIGITS_FN
)
print
'preprocessing...'
# shuffle digits
...
...
samples/python2/digits_adjust.py
浏览文件 @
83ccdbd0
...
...
@@ -11,11 +11,10 @@ Usage:
digits_adjust.py [--model {svm|knearest}] [--cloud] [--env <PiCloud environment>]
--model {svm|knearest} - select the classifier (SVM is the default)
--cloud - use PiCloud computing platform
(for SVM only)
--cloud - use PiCloud computing platform
--env - cloud environment name
'''
# TODO dataset preprocessing in cloud
# TODO cloud env setup tutorial
import
numpy
as
np
...
...
@@ -24,6 +23,14 @@ from multiprocessing.pool import ThreadPool
from
digits
import
*
try
:
import
cloud
have_cloud
=
True
except
ImportError
:
have_cloud
=
False
def
cross_validate
(
model_class
,
params
,
samples
,
labels
,
kfold
=
3
,
pool
=
None
):
n
=
len
(
samples
)
folds
=
np
.
array_split
(
np
.
arange
(
n
),
kfold
)
...
...
@@ -46,66 +53,88 @@ def cross_validate(model_class, params, samples, labels, kfold = 3, pool = None)
scores
=
pool
.
map
(
f
,
xrange
(
kfold
))
return
np
.
mean
(
scores
)
def
adjust_KNearest
(
samples
,
labels
):
print
'adjusting KNearest ...'
best_err
,
best_k
=
np
.
inf
,
-
1
for
k
in
xrange
(
1
,
9
):
err
=
cross_validate
(
KNearest
,
dict
(
k
=
k
),
samples
,
labels
)
if
err
<
best_err
:
best_err
,
best_k
=
err
,
k
print
'k = %d, error: %.2f %%'
%
(
k
,
err
*
100
)
best_params
=
dict
(
k
=
best_k
)
print
'best params:'
,
best_params
return
best_params
def
adjust_SVM
(
samples
,
labels
,
usecloud
=
False
,
cloud_env
=
''
):
Cs
=
np
.
logspace
(
0
,
5
,
10
,
base
=
2
)
gammas
=
np
.
logspace
(
-
7
,
-
2
,
10
,
base
=
2
)
scores
=
np
.
zeros
((
len
(
Cs
),
len
(
gammas
)))
scores
[:]
=
np
.
nan
if
usecloud
:
try
:
import
cloud
except
ImportError
:
print
'cloud module is not installed'
class
App
(
object
):
def
__init__
(
self
,
usecloud
=
False
,
cloud_env
=
''
):
if
usecloud
and
not
have_cloud
:
print
'warning: cloud module is not installed, running locally'
usecloud
=
False
if
usecloud
:
print
'uploading dataset to cloud...'
np
.
savez
(
'train.npz'
,
samples
=
samples
,
labels
=
labels
)
cloud
.
files
.
put
(
'train.npz'
)
print
'adjusting SVM (may take a long time) ...'
def
f
(
job
):
i
,
j
=
job
params
=
dict
(
C
=
Cs
[
i
],
gamma
=
gammas
[
j
])
score
=
cross_validate
(
SVM
,
params
,
samples
,
labels
)
return
i
,
j
,
score
def
fcloud
(
job
):
i
,
j
=
job
cloud
.
files
.
get
(
'train.npz'
)
npz
=
np
.
load
(
'train.npz'
)
params
=
dict
(
C
=
Cs
[
i
],
gamma
=
gammas
[
j
])
score
=
cross_validate
(
SVM
,
params
,
npz
[
'samples'
],
npz
[
'labels'
])
return
i
,
j
,
score
if
usecloud
:
jids
=
cloud
.
map
(
fcloud
,
np
.
ndindex
(
*
scores
.
shape
),
_env
=
cloud_env
,
_profile
=
True
)
ires
=
cloud
.
iresult
(
jids
)
else
:
pool
=
ThreadPool
(
processes
=
cv2
.
getNumberOfCPUs
())
ires
=
pool
.
imap_unordered
(
f
,
np
.
ndindex
(
*
scores
.
shape
))
self
.
usecloud
=
usecloud
self
.
cloud_env
=
cloud_env
if
self
.
usecloud
:
print
'uploading dataset to cloud...'
cloud
.
files
.
put
(
DIGITS_FN
)
self
.
preprocess_job
=
cloud
.
call
(
self
.
preprocess
,
_env
=
self
.
cloud_env
)
else
:
self
.
_samples
,
self
.
_labels
=
self
.
preprocess
()
for
count
,
(
i
,
j
,
score
)
in
enumerate
(
ires
):
scores
[
i
,
j
]
=
score
print
'%d / %d (best error: %.2f %%, last: %.2f %%)'
%
(
count
+
1
,
scores
.
size
,
np
.
nanmin
(
scores
)
*
100
,
score
*
100
)
print
scores
def
preprocess
(
self
):
if
self
.
usecloud
:
cloud
.
files
.
get
(
DIGITS_FN
)
digits
,
labels
=
load_digits
(
DIGITS_FN
)
shuffle
=
np
.
random
.
permutation
(
len
(
digits
))
digits
,
labels
=
digits
[
shuffle
],
labels
[
shuffle
]
digits2
=
map
(
deskew
,
digits
)
samples
=
np
.
float32
(
digits2
).
reshape
(
-
1
,
SZ
*
SZ
)
/
255.0
return
samples
,
labels
def
get_dataset
(
self
):
if
self
.
usecloud
:
return
cloud
.
result
(
self
.
preprocess_job
)
else
:
return
self
.
_samples
,
self
.
_labels
def
run_jobs
(
self
,
f
,
jobs
):
if
self
.
usecloud
:
jids
=
cloud
.
map
(
f
,
jobs
,
_env
=
self
.
cloud_env
,
_profile
=
True
,
_depends_on
=
self
.
preprocess_job
)
ires
=
cloud
.
iresult
(
jids
)
else
:
pool
=
ThreadPool
(
processes
=
cv2
.
getNumberOfCPUs
())
ires
=
pool
.
imap_unordered
(
f
,
jobs
)
return
ires
def
adjust_SVM
(
self
):
Cs
=
np
.
logspace
(
0
,
5
,
10
,
base
=
2
)
gammas
=
np
.
logspace
(
-
7
,
-
2
,
10
,
base
=
2
)
scores
=
np
.
zeros
((
len
(
Cs
),
len
(
gammas
)))
scores
[:]
=
np
.
nan
print
'adjusting SVM (may take a long time) ...'
def
f
(
job
):
i
,
j
=
job
samples
,
labels
=
self
.
get_dataset
()
params
=
dict
(
C
=
Cs
[
i
],
gamma
=
gammas
[
j
])
score
=
cross_validate
(
SVM
,
params
,
samples
,
labels
)
return
i
,
j
,
score
ires
=
self
.
run_jobs
(
f
,
np
.
ndindex
(
*
scores
.
shape
))
for
count
,
(
i
,
j
,
score
)
in
enumerate
(
ires
):
scores
[
i
,
j
]
=
score
print
'%d / %d (best error: %.2f %%, last: %.2f %%)'
%
(
count
+
1
,
scores
.
size
,
np
.
nanmin
(
scores
)
*
100
,
score
*
100
)
print
scores
i
,
j
=
np
.
unravel_index
(
scores
.
argmin
(),
scores
.
shape
)
best_params
=
dict
(
C
=
Cs
[
i
],
gamma
=
gammas
[
j
])
print
'best params:'
,
best_params
print
'best error: %.2f %%'
%
(
scores
.
min
()
*
100
)
return
best_params
def
adjust_KNearest
(
self
):
print
'adjusting KNearest ...'
def
f
(
k
):
samples
,
labels
=
self
.
get_dataset
()
err
=
cross_validate
(
KNearest
,
dict
(
k
=
k
),
samples
,
labels
)
return
k
,
err
best_err
,
best_k
=
np
.
inf
,
-
1
for
k
,
err
in
self
.
run_jobs
(
f
,
xrange
(
1
,
9
)):
if
err
<
best_err
:
best_err
,
best_k
=
err
,
k
print
'k = %d, error: %.2f %%'
%
(
k
,
err
*
100
)
best_params
=
dict
(
k
=
best_k
)
print
'best params:'
,
best_params
,
'err: %.2f'
%
(
best_err
*
100
)
return
best_params
i
,
j
=
np
.
unravel_index
(
scores
.
argmin
(),
scores
.
shape
)
best_params
=
dict
(
C
=
Cs
[
i
],
gamma
=
gammas
[
j
])
print
'best params:'
,
best_params
print
'best error: %.2f %%'
%
(
scores
.
min
()
*
100
)
return
best_params
if
__name__
==
'__main__'
:
import
getopt
...
...
@@ -113,6 +142,7 @@ if __name__ == '__main__':
print
__doc__
args
,
_
=
getopt
.
getopt
(
sys
.
argv
[
1
:],
''
,
[
'model='
,
'cloud'
,
'env='
])
args
=
dict
(
args
)
args
.
setdefault
(
'--model'
,
'svm'
)
...
...
@@ -121,16 +151,10 @@ if __name__ == '__main__':
print
'unknown model "%s"'
%
args
[
'--model'
]
sys
.
exit
(
1
)
digits
,
labels
=
load_digits
(
'digits.png'
)
shuffle
=
np
.
random
.
permutation
(
len
(
digits
))
digits
,
labels
=
digits
[
shuffle
],
labels
[
shuffle
]
digits2
=
map
(
deskew
,
digits
)
samples
=
np
.
float32
(
digits2
).
reshape
(
-
1
,
SZ
*
SZ
)
/
255.0
t
=
clock
()
app
=
App
(
usecloud
=
'--cloud'
in
args
,
cloud_env
=
args
[
'--env'
])
if
args
[
'--model'
]
==
'knearest'
:
a
djust_KNearest
(
samples
,
labels
)
a
pp
.
adjust_KNearest
(
)
else
:
a
djust_SVM
(
samples
,
labels
,
usecloud
=
'--cloud'
in
args
,
cloud_env
=
args
[
'--env'
]
)
a
pp
.
adjust_SVM
(
)
print
'work time: %f s'
%
(
clock
()
-
t
)
\ No newline at end of file
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录