Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
l2902392385
machine-learning-in-action-python3
提交
36bcca2c
M
machine-learning-in-action-python3
项目概览
l2902392385
/
machine-learning-in-action-python3
与 Fork 源项目一致
从无法访问的项目Fork
通知
1
Star
0
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
M
machine-learning-in-action-python3
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
前往新版Gitcode,体验更适合开发者的 AI 搜索 >>
提交
36bcca2c
编写于
7月 13, 2018
作者:
H
hathackerwang
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
ch04
上级
f31712e5
变更
9
展开全部
隐藏空白更改
内联
并排
Showing
9 changed file
with
672 addition
and
0 deletion
+672
-0
ch04-Logistic_regression/Grad_descent.py
ch04-Logistic_regression/Grad_descent.py
+45
-0
ch04-Logistic_regression/Logistic_classify.py
ch04-Logistic_regression/Logistic_classify.py
+74
-0
ch04-Logistic_regression/Plot_boundary.py
ch04-Logistic_regression/Plot_boundary.py
+33
-0
ch04-Logistic_regression/Random_GDS.py
ch04-Logistic_regression/Random_GDS.py
+31
-0
ch04-Logistic_regression/__init__.py
ch04-Logistic_regression/__init__.py
+1
-0
ch04-Logistic_regression/__main__.py
ch04-Logistic_regression/__main__.py
+22
-0
ch04-Logistic_regression/horseColicTest.txt
ch04-Logistic_regression/horseColicTest.txt
+67
-0
ch04-Logistic_regression/horseColicTraining.txt
ch04-Logistic_regression/horseColicTraining.txt
+299
-0
ch04-Logistic_regression/testSet.txt
ch04-Logistic_regression/testSet.txt
+100
-0
未找到文件。
ch04-Logistic_regression/Grad_descent.py
0 → 100644
浏览文件 @
36bcca2c
# -*- coding: utf-8 -*-
"""
Created on Sun Jul 8 13:06:53 2018
@author: Administrator
"""
from
numpy
import
*
def
loadData
(
filename
):
'''
'''
datamat
=
[];
labelmat
=
[]
with
open
(
filename
)
as
fr
:
for
line
in
fr
.
readlines
():
line_arr
=
line
.
strip
().
split
()
datamat
.
append
([
1.0
,
float
(
line_arr
[
0
]),
float
(
line_arr
[
1
])])
# jisuan x0, x1,x2. x0wei 1
labelmat
.
append
(
int
(
line_arr
[
2
]))
return
datamat
,
labelmat
def
sigmoid
(
inp
):
return
1.0
/
(
1
+
exp
(
-
inp
))
def
Grad_descent
(
datamat
,
labels
):
'''
'''
data
=
mat
(
datamat
)
label
=
mat
(
labels
).
transpose
()
m
,
n
=
shape
(
datamat
)
alpha
=
0.001
;
max_iter
=
500
weights
=
ones
((
n
,
1
))
#
for
k
in
range
(
max_iter
):
#
z
=
dot
(
datamat
,
weights
)
y_pred
=
sigmoid
(
z
)
error
=
(
label
-
y_pred
)
# grad(x) = (y - f(x)) * x'
weights
=
weights
+
alpha
*
data
.
transpose
()
*
error
return
weights
\ No newline at end of file
ch04-Logistic_regression/Logistic_classify.py
0 → 100644
浏览文件 @
36bcca2c
# -*- coding: utf-8 -*-
"""
Created on Sun Jul 8 15:11:21 2018
@author: Administrator
"""
from
numpy
import
*
from
Grad_descent
import
*
from
Random_GDS
import
Stoch_gdescent
def
classifyVec
(
inp
,
weights
):
prob
=
sigmoid
(
sum
(
list
(
array
(
inp
)
*
array
(
weights
))))
if
prob
>
0.5
:
return
1.0
else
:
return
0.0
#logistic回归预测算法
def
colicTest
():
# 打开训练数据集
frTrain
=
open
(
'horseColicTraining.txt'
)
# 打开测试数据集
frTest
=
open
(
'horseColicTest.txt'
)
trainingSet
=
[];
trainingLabels
=
[]
# 读取训练集文档的每一行
for
line
in
frTrain
.
readlines
():
# 对当前行进行特征分割
currLine
=
line
.
strip
().
split
()
# 新建列表存储每个样本的特征向量
lineArr
=
[]
# 遍历每个样本的特征
for
i
in
range
(
21
):
# 将该样本的特征存入lineArr列表
lineArr
.
append
(
float
(
currLine
[
i
]))
#将该样本标签存入标签列表
trainingLabels
.
append
(
currLine
[
21
])
#将该样本的特征向量添加到数据集列表
trainingSet
.
append
(
lineArr
)
#调用随机梯度上升法更新logistic回归的权值参数
trainWeights
=
Stoch_gdescent
(
trainingSet
,
trainingLabels
,
500
)
#统计测试数据集预测错误样本数量和样本总数
errorCount
=
0
;
numTestVec
=
0.0
#遍历测试数据集的每个样本
for
line
in
frTest
.
readlines
():
#样本总数加1
numTestVec
+=
1.0
#对当前行进行处理,分割出各个特征及样本标签
currLine
=
line
.
strip
().
split
()
#新建特征向量
lineArr
=
[]
#将各个特征构成特征向量
for
i
in
range
(
21
):
lineArr
.
append
(
float
(
currLine
[
i
]))
#利用分类预测函数对该样本进行预测,并与样本标签进行比较
if
(
classifyVec
(
lineArr
,
trainWeights
)
!=
currLine
[
21
]):
#如果预测错误,错误数加1
errorCount
+=
1
#计算测试集总的预测错误率
errorRate
=
(
float
(
errorCount
)
/
numTestVec
)
#打印错误率大小
print
(
'the error rate of this test is: %f'
%
(
errorRate
))
#返回错误率
return
errorRate
#多次测试算法求取预测误差平均值
def
multTest
():
#设置测试次数为10次,并统计错误率总和
numTests
=
10
;
errorRateSum
=
0.0
#每一次测试算法并统计错误率
for
k
in
range
(
numTests
):
errorRateSum
+=
colicTest
()
#打印出测试10次预测错误率平均值
print
(
'after %d iterations the average error rate is: %f'
\
%
(
numTests
,
errorRateSum
/
float
(
numTests
)))
\ No newline at end of file
ch04-Logistic_regression/Plot_boundary.py
0 → 100644
浏览文件 @
36bcca2c
# -*- coding: utf-8 -*-
"""
Created on Sun Jul 8 13:51:17 2018
@author: Administrator
"""
import
matplotlib
import
matplotlib.pyplot
as
plt
from
numpy
import
*
def
plot_fit
(
data
,
labelMat
,
weights
):
dataArr
=
array
(
data
)
n
=
shape
(
dataArr
)[
0
]
x_cord1
=
[];
y_cord1
=
[]
x_cord2
=
[];
y_cord2
=
[]
for
i
in
range
(
n
):
if
int
(
labelMat
[
i
])
==
1
:
x_cord1
.
append
(
dataArr
[
i
,
1
]);
y_cord1
.
append
(
dataArr
[
i
,
2
])
else
:
x_cord2
.
append
(
dataArr
[
i
,
1
]);
y_cord2
.
append
(
dataArr
[
i
,
2
])
fig
=
plt
.
figure
()
ax
=
fig
.
add_subplot
(
111
)
ax
.
scatter
(
x_cord1
,
y_cord1
,
s
=
30
,
c
=
'red'
,
marker
=
's'
)
ax
.
scatter
(
x_cord2
,
y_cord2
,
s
=
30
,
c
=
'green'
)
x
=
arange
(
-
3.0
,
3.0
,
0.1
)
y
=
((
-
weights
[
0
]
-
weights
[
1
]
*
x
)
/
weights
[
2
]).
transpose
()
ax
.
plot
(
x
,
y
)
plt
.
xlabel
(
'X1'
);
plt
.
ylabel
(
'X2'
);
plt
.
show
()
\ No newline at end of file
ch04-Logistic_regression/Random_GDS.py
0 → 100644
浏览文件 @
36bcca2c
# -*- coding: utf-8 -*-
"""
Created on Sun Jul 8 14:30:25 2018
@author: Administrator
"""
from
numpy
import
*
from
Grad_descent
import
sigmoid
def
Stoch_gdescent
(
datamat
,
labels
,
num_iter
=
150
):
'''
基于样本集中的每个样本(随机抽取)进行迭代
求出优化的参数,并在此基础上对alpha进行衰减
'''
m
,
n
=
shape
(
datamat
)
alpha
=
0.01
weights
=
ones
(
n
)
#
for
j
in
range
(
num_iter
):
for
i
in
range
(
m
):
#j << x时衰减效果受到影响,0.01则为了保存一定的速率
alpha
=
4
/
(
1.0
+
j
+
i
)
+
0.01
randidx
=
int
(
random
.
uniform
(
0
,
len
(
range
(
m
))))
z
=
sum
(
datamat
[
randidx
]
*
weights
)
y_pred
=
sigmoid
(
z
)
error
=
float
(
labels
[
randidx
])
-
y_pred
# grad(x) = (y - f(x)) * x'为迭代公式(梯度)
weights
=
weights
+
(
alpha
*
error
)
*
array
(
datamat
[
randidx
])
return
weights
\ No newline at end of file
ch04-Logistic_regression/__init__.py
0 → 100644
浏览文件 @
36bcca2c
#
\ No newline at end of file
ch04-Logistic_regression/__main__.py
0 → 100644
浏览文件 @
36bcca2c
# -*- coding: utf-8 -*-
"""
Created on Sun Jul 8 13:05:29 2018
@author: Administrator
"""
from
numpy
import
*
from
Grad_descent
import
*
from
Plot_boundary
import
*
from
matplotlib
import
*
from
Random_GDS
import
*
from
Logistic_classify
import
*
if
__name__
==
'__main__'
:
data
,
label
=
loadData
(
'testSet.txt'
)
print
(
Stoch_gdescent
(
data
,
label
))
weights
=
Stoch_gdescent
(
data
,
label
)
plot_fit
(
data
,
label
,
weights
)
multTest
()
\ No newline at end of file
ch04-Logistic_regression/horseColicTest.txt
0 → 100644
浏览文件 @
36bcca2c
2 1 38.50 54 20 0 1 2 2 3 4 1 2 2 5.90 0 2 42.00 6.30 0 0 1
2 1 37.60 48 36 0 0 1 1 0 3 0 0 0 0 0 0 44.00 6.30 1 5.00 1
1 1 37.7 44 28 0 4 3 2 5 4 4 1 1 0 3 5 45 70 3 2 1
1 1 37 56 24 3 1 4 2 4 4 3 1 1 0 0 0 35 61 3 2 0
2 1 38.00 42 12 3 0 3 1 1 0 1 0 0 0 0 2 37.00 5.80 0 0 1
1 1 0 60 40 3 0 1 1 0 4 0 3 2 0 0 5 42 72 0 0 1
2 1 38.40 80 60 3 2 2 1 3 2 1 2 2 0 1 1 54.00 6.90 0 0 1
2 1 37.80 48 12 2 1 2 1 3 0 1 2 0 0 2 0 48.00 7.30 1 0 1
2 1 37.90 45 36 3 3 3 2 2 3 1 2 1 0 3 0 33.00 5.70 3 0 1
2 1 39.00 84 12 3 1 5 1 2 4 2 1 2 7.00 0 4 62.00 5.90 2 2.20 0
2 1 38.20 60 24 3 1 3 2 3 3 2 3 3 0 4 4 53.00 7.50 2 1.40 1
1 1 0 140 0 0 0 4 2 5 4 4 1 1 0 0 5 30 69 0 0 0
1 1 37.90 120 60 3 3 3 1 5 4 4 2 2 7.50 4 5 52.00 6.60 3 1.80 0
2 1 38.00 72 36 1 1 3 1 3 0 2 2 1 0 3 5 38.00 6.80 2 2.00 1
2 9 38.00 92 28 1 1 2 1 1 3 2 3 0 7.20 0 0 37.00 6.10 1 1.10 1
1 1 38.30 66 30 2 3 1 1 2 4 3 3 2 8.50 4 5 37.00 6.00 0 0 1
2 1 37.50 48 24 3 1 1 1 2 1 0 1 1 0 3 2 43.00 6.00 1 2.80 1
1 1 37.50 88 20 2 3 3 1 4 3 3 0 0 0 0 0 35.00 6.40 1 0 0
2 9 0 150 60 4 4 4 2 5 4 4 0 0 0 0 0 0 0 0 0 0
1 1 39.7 100 30 0 0 6 2 4 4 3 1 0 0 4 5 65 75 0 0 0
1 1 38.30 80 0 3 3 4 2 5 4 3 2 1 0 4 4 45.00 7.50 2 4.60 1
2 1 37.50 40 32 3 1 3 1 3 2 3 2 1 0 0 5 32.00 6.40 1 1.10 1
1 1 38.40 84 30 3 1 5 2 4 3 3 2 3 6.50 4 4 47.00 7.50 3 0 0
1 1 38.10 84 44 4 0 4 2 5 3 1 1 3 5.00 0 4 60.00 6.80 0 5.70 0
2 1 38.70 52 0 1 1 1 1 1 3 1 0 0 0 1 3 4.00 74.00 0 0 1
2 1 38.10 44 40 2 1 3 1 3 3 1 0 0 0 1 3 35.00 6.80 0 0 1
2 1 38.4 52 20 2 1 3 1 1 3 2 2 1 0 3 5 41 63 1 1 1
1 1 38.20 60 0 1 0 3 1 2 1 1 1 1 0 4 4 43.00 6.20 2 3.90 1
2 1 37.70 40 18 1 1 1 0 3 2 1 1 1 0 3 3 36.00 3.50 0 0 1
1 1 39.1 60 10 0 1 1 0 2 3 0 0 0 0 4 4 0 0 0 0 1
2 1 37.80 48 16 1 1 1 1 0 1 1 2 1 0 4 3 43.00 7.50 0 0 1
1 1 39.00 120 0 4 3 5 2 2 4 3 2 3 8.00 0 0 65.00 8.20 3 4.60 1
1 1 38.20 76 0 2 3 2 1 5 3 3 1 2 6.00 1 5 35.00 6.50 2 0.90 1
2 1 38.30 88 0 0 0 6 0 0 0 0 0 0 0 0 0 0 0 0 0 0
1 1 38.00 80 30 3 3 3 1 0 0 0 0 0 6.00 0 0 48.00 8.30 0 4.30 1
1 1 0 0 0 3 1 1 1 2 3 3 1 3 6.00 4 4 0 0 2 0 0
1 1 37.60 40 0 1 1 1 1 1 1 1 0 0 0 1 1 0 0 2 2.10 1
2 1 37.50 44 0 1 1 1 1 3 3 2 0 0 0 0 0 45.00 5.80 2 1.40 1
2 1 38.2 42 16 1 1 3 1 1 3 1 0 0 0 1 0 35 60 1 1 1
2 1 38 56 44 3 3 3 0 0 1 1 2 1 0 4 0 47 70 2 1 1
2 1 38.30 45 20 3 3 2 2 2 4 1 2 0 0 4 0 0 0 0 0 1
1 1 0 48 96 1 1 3 1 0 4 1 2 1 0 1 4 42.00 8.00 1 0 1
1 1 37.70 55 28 2 1 2 1 2 3 3 0 3 5.00 4 5 0 0 0 0 1
2 1 36.00 100 20 4 3 6 2 2 4 3 1 1 0 4 5 74.00 5.70 2 2.50 0
1 1 37.10 60 20 2 0 4 1 3 0 3 0 2 5.00 3 4 64.00 8.50 2 0 1
2 1 37.10 114 40 3 0 3 2 2 2 1 0 0 0 0 3 32.00 0 3 6.50 1
1 1 38.1 72 30 3 3 3 1 4 4 3 2 1 0 3 5 37 56 3 1 1
1 1 37.00 44 12 3 1 1 2 1 1 1 0 0 0 4 2 40.00 6.70 3 8.00 1
1 1 38.6 48 20 3 1 1 1 4 3 1 0 0 0 3 0 37 75 0 0 1
1 1 0 82 72 3 1 4 1 2 3 3 0 3 0 4 4 53 65 3 2 0
1 9 38.20 78 60 4 4 6 0 3 3 3 0 0 0 1 0 59.00 5.80 3 3.10 0
2 1 37.8 60 16 1 1 3 1 2 3 2 1 2 0 3 0 41 73 0 0 0
1 1 38.7 34 30 2 0 3 1 2 3 0 0 0 0 0 0 33 69 0 2 0
1 1 0 36 12 1 1 1 1 1 2 1 1 1 0 1 5 44.00 0 0 0 1
2 1 38.30 44 60 0 0 1 1 0 0 0 0 0 0 0 0 6.40 36.00 0 0 1
2 1 37.40 54 18 3 0 1 1 3 4 3 2 2 0 4 5 30.00 7.10 2 0 1
1 1 0 0 0 4 3 0 2 2 4 1 0 0 0 0 0 54 76 3 2 1
1 1 36.6 48 16 3 1 3 1 4 1 1 1 1 0 0 0 27 56 0 0 0
1 1 38.5 90 0 1 1 3 1 3 3 3 2 3 2 4 5 47 79 0 0 1
1 1 0 75 12 1 1 4 1 5 3 3 0 3 5.80 0 0 58.00 8.50 1 0 1
2 1 38.20 42 0 3 1 1 1 1 1 2 2 1 0 3 2 35.00 5.90 2 0 1
1 9 38.20 78 60 4 4 6 0 3 3 3 0 0 0 1 0 59.00 5.80 3 3.10 0
2 1 38.60 60 30 1 1 3 1 4 2 2 1 1 0 0 0 40.00 6.00 1 0 1
2 1 37.80 42 40 1 1 1 1 1 3 1 0 0 0 3 3 36.00 6.20 0 0 1
1 1 38 60 12 1 1 2 1 2 1 1 1 1 0 1 4 44 65 3 2 0
2 1 38.00 42 12 3 0 3 1 1 1 1 0 0 0 0 1 37.00 5.80 0 0 1
2 1 37.60 88 36 3 1 1 1 3 3 2 1 3 1.50 0 0 44.00 6.00 0 0 0
\ No newline at end of file
ch04-Logistic_regression/horseColicTraining.txt
0 → 100644
浏览文件 @
36bcca2c
此差异已折叠。
点击以展开。
ch04-Logistic_regression/testSet.txt
0 → 100644
浏览文件 @
36bcca2c
-0.017612 14.053064 0
-1.395634 4.662541 1
-0.752157 6.538620 0
-1.322371 7.152853 0
0.423363 11.054677 0
0.406704 7.067335 1
0.667394 12.741452 0
-2.460150 6.866805 1
0.569411 9.548755 0
-0.026632 10.427743 0
0.850433 6.920334 1
1.347183 13.175500 0
1.176813 3.167020 1
-1.781871 9.097953 0
-0.566606 5.749003 1
0.931635 1.589505 1
-0.024205 6.151823 1
-0.036453 2.690988 1
-0.196949 0.444165 1
1.014459 5.754399 1
1.985298 3.230619 1
-1.693453 -0.557540 1
-0.576525 11.778922 0
-0.346811 -1.678730 1
-2.124484 2.672471 1
1.217916 9.597015 0
-0.733928 9.098687 0
-3.642001 -1.618087 1
0.315985 3.523953 1
1.416614 9.619232 0
-0.386323 3.989286 1
0.556921 8.294984 1
1.224863 11.587360 0
-1.347803 -2.406051 1
1.196604 4.951851 1
0.275221 9.543647 0
0.470575 9.332488 0
-1.889567 9.542662 0
-1.527893 12.150579 0
-1.185247 11.309318 0
-0.445678 3.297303 1
1.042222 6.105155 1
-0.618787 10.320986 0
1.152083 0.548467 1
0.828534 2.676045 1
-1.237728 10.549033 0
-0.683565 -2.166125 1
0.229456 5.921938 1
-0.959885 11.555336 0
0.492911 10.993324 0
0.184992 8.721488 0
-0.355715 10.325976 0
-0.397822 8.058397 0
0.824839 13.730343 0
1.507278 5.027866 1
0.099671 6.835839 1
-0.344008 10.717485 0
1.785928 7.718645 1
-0.918801 11.560217 0
-0.364009 4.747300 1
-0.841722 4.119083 1
0.490426 1.960539 1
-0.007194 9.075792 0
0.356107 12.447863 0
0.342578 12.281162 0
-0.810823 -1.466018 1
2.530777 6.476801 1
1.296683 11.607559 0
0.475487 12.040035 0
-0.783277 11.009725 0
0.074798 11.023650 0
-1.337472 0.468339 1
-0.102781 13.763651 0
-0.147324 2.874846 1
0.518389 9.887035 0
1.015399 7.571882 0
-1.658086 -0.027255 1
1.319944 2.171228 1
2.056216 5.019981 1
-0.851633 4.375691 1
-1.510047 6.061992 0
-1.076637 -3.181888 1
1.821096 10.283990 0
3.010150 8.401766 1
-1.099458 1.688274 1
-0.834872 -1.733869 1
-0.846637 3.849075 1
1.400102 12.628781 0
1.752842 5.468166 1
0.078557 0.059736 1
0.089392 -0.715300 1
1.825662 12.693808 0
0.197445 9.744638 0
0.126117 0.922311 1
-0.679797 1.220530 1
0.677983 2.556666 1
0.761349 10.693862 0
-2.168791 0.143632 1
1.388610 9.341997 0
0.317029 14.739025 0
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录