提交 36bcca2c 编写于 作者: H hathackerwang

ch04

上级 f31712e5
# -*- coding: utf-8 -*-
"""
Created on Sun Jul 8 13:06:53 2018
@author: Administrator
"""
from numpy import *
def loadData(filename):
'''
'''
datamat = []; labelmat = []
with open(filename) as fr:
for line in fr.readlines():
line_arr = line.strip().split()
datamat.append([1.0, float(line_arr[0]), float(line_arr[1])])
# jisuan x0, x1,x2. x0wei 1
labelmat.append(int(line_arr[2]))
return datamat, labelmat
def sigmoid(inp):
return 1.0 / (1 + exp(-inp))
def Grad_descent(datamat, labels):
'''
'''
data = mat(datamat)
label = mat(labels).transpose()
m, n = shape(datamat)
alpha = 0.001; max_iter = 500
weights = ones((n, 1)) #
for k in range(max_iter):
#
z = dot(datamat, weights)
y_pred = sigmoid(z)
error = (label - y_pred)
# grad(x) = (y - f(x)) * x'
weights = weights + alpha * data.transpose() * error
return weights
\ No newline at end of file
# -*- coding: utf-8 -*-
"""
Created on Sun Jul 8 15:11:21 2018
@author: Administrator
"""
from numpy import *
from Grad_descent import *
from Random_GDS import Stoch_gdescent
def classifyVec(inp, weights):
prob = sigmoid(sum(list(array(inp) * array(weights))))
if prob > 0.5: return 1.0
else: return 0.0
#logistic回归预测算法
def colicTest():
# 打开训练数据集
frTrain=open('horseColicTraining.txt')
# 打开测试数据集
frTest=open('horseColicTest.txt')
trainingSet=[];trainingLabels=[]
# 读取训练集文档的每一行
for line in frTrain.readlines():
# 对当前行进行特征分割
currLine=line.strip().split()
# 新建列表存储每个样本的特征向量
lineArr=[]
# 遍历每个样本的特征
for i in range(21):
# 将该样本的特征存入lineArr列表
lineArr.append(float(currLine[i]))
#将该样本标签存入标签列表
trainingLabels.append(currLine[21])
#将该样本的特征向量添加到数据集列表
trainingSet.append(lineArr)
#调用随机梯度上升法更新logistic回归的权值参数
trainWeights=Stoch_gdescent(trainingSet,trainingLabels,500)
#统计测试数据集预测错误样本数量和样本总数
errorCount=0; numTestVec=0.0
#遍历测试数据集的每个样本
for line in frTest.readlines():
#样本总数加1
numTestVec+=1.0
#对当前行进行处理,分割出各个特征及样本标签
currLine=line.strip().split()
#新建特征向量
lineArr=[]
#将各个特征构成特征向量
for i in range(21):
lineArr.append(float(currLine[i]))
#利用分类预测函数对该样本进行预测,并与样本标签进行比较
if(classifyVec(lineArr,trainWeights)!=currLine[21]):
#如果预测错误,错误数加1
errorCount+=1
#计算测试集总的预测错误率
errorRate=(float(errorCount)/numTestVec)
#打印错误率大小
print('the error rate of this test is: %f' % (errorRate))
#返回错误率
return errorRate
#多次测试算法求取预测误差平均值
def multTest():
#设置测试次数为10次,并统计错误率总和
numTests=10;errorRateSum=0.0
#每一次测试算法并统计错误率
for k in range(numTests):
errorRateSum+=colicTest()
#打印出测试10次预测错误率平均值
print('after %d iterations the average error rate is: %f' \
% (numTests,errorRateSum / float(numTests)))
\ No newline at end of file
# -*- coding: utf-8 -*-
"""
Created on Sun Jul 8 13:51:17 2018
@author: Administrator
"""
import matplotlib
import matplotlib.pyplot as plt
from numpy import *
def plot_fit(data, labelMat, weights):
dataArr = array(data)
n = shape(dataArr)[0]
x_cord1 = []; y_cord1 = []
x_cord2 = []; y_cord2 = []
for i in range(n):
if int(labelMat[i]) == 1:
x_cord1.append(dataArr[i,1]); y_cord1.append(dataArr[i,2])
else: x_cord2.append(dataArr[i,1]); y_cord2.append(dataArr[i,2])
fig = plt.figure()
ax = fig.add_subplot(111)
ax.scatter(x_cord1, y_cord1, s = 30, c = 'red', marker='s')
ax.scatter(x_cord2, y_cord2, s = 30, c = 'green')
x = arange(-3.0, 3.0, 0.1)
y = ((-weights[0]- weights[1] * x)/weights[2]).transpose()
ax.plot(x, y)
plt.xlabel('X1');
plt.ylabel('X2');
plt.show()
\ No newline at end of file
# -*- coding: utf-8 -*-
"""
Created on Sun Jul 8 14:30:25 2018
@author: Administrator
"""
from numpy import *
from Grad_descent import sigmoid
def Stoch_gdescent(datamat, labels, num_iter = 150):
'''
基于样本集中的每个样本(随机抽取)进行迭代
求出优化的参数,并在此基础上对alpha进行衰减
'''
m, n = shape(datamat)
alpha = 0.01
weights = ones(n) #
for j in range(num_iter):
for i in range(m):
#j << x时衰减效果受到影响,0.01则为了保存一定的速率
alpha = 4 / (1.0 + j + i) + 0.01
randidx = int(random.uniform(0,len(range(m))))
z = sum(datamat[randidx] * weights)
y_pred = sigmoid(z)
error = float(labels[randidx]) - y_pred
# grad(x) = (y - f(x)) * x'为迭代公式(梯度)
weights = weights + (alpha * error) * array(datamat[randidx])
return weights
\ No newline at end of file
#
\ No newline at end of file
# -*- coding: utf-8 -*-
"""
Created on Sun Jul 8 13:05:29 2018
@author: Administrator
"""
from numpy import *
from Grad_descent import *
from Plot_boundary import *
from matplotlib import *
from Random_GDS import *
from Logistic_classify import *
if __name__ == '__main__':
data, label = loadData('testSet.txt')
print(Stoch_gdescent(data, label))
weights = Stoch_gdescent(data, label)
plot_fit(data, label, weights)
multTest()
\ No newline at end of file
2 1 38.50 54 20 0 1 2 2 3 4 1 2 2 5.90 0 2 42.00 6.30 0 0 1
2 1 37.60 48 36 0 0 1 1 0 3 0 0 0 0 0 0 44.00 6.30 1 5.00 1
1 1 37.7 44 28 0 4 3 2 5 4 4 1 1 0 3 5 45 70 3 2 1
1 1 37 56 24 3 1 4 2 4 4 3 1 1 0 0 0 35 61 3 2 0
2 1 38.00 42 12 3 0 3 1 1 0 1 0 0 0 0 2 37.00 5.80 0 0 1
1 1 0 60 40 3 0 1 1 0 4 0 3 2 0 0 5 42 72 0 0 1
2 1 38.40 80 60 3 2 2 1 3 2 1 2 2 0 1 1 54.00 6.90 0 0 1
2 1 37.80 48 12 2 1 2 1 3 0 1 2 0 0 2 0 48.00 7.30 1 0 1
2 1 37.90 45 36 3 3 3 2 2 3 1 2 1 0 3 0 33.00 5.70 3 0 1
2 1 39.00 84 12 3 1 5 1 2 4 2 1 2 7.00 0 4 62.00 5.90 2 2.20 0
2 1 38.20 60 24 3 1 3 2 3 3 2 3 3 0 4 4 53.00 7.50 2 1.40 1
1 1 0 140 0 0 0 4 2 5 4 4 1 1 0 0 5 30 69 0 0 0
1 1 37.90 120 60 3 3 3 1 5 4 4 2 2 7.50 4 5 52.00 6.60 3 1.80 0
2 1 38.00 72 36 1 1 3 1 3 0 2 2 1 0 3 5 38.00 6.80 2 2.00 1
2 9 38.00 92 28 1 1 2 1 1 3 2 3 0 7.20 0 0 37.00 6.10 1 1.10 1
1 1 38.30 66 30 2 3 1 1 2 4 3 3 2 8.50 4 5 37.00 6.00 0 0 1
2 1 37.50 48 24 3 1 1 1 2 1 0 1 1 0 3 2 43.00 6.00 1 2.80 1
1 1 37.50 88 20 2 3 3 1 4 3 3 0 0 0 0 0 35.00 6.40 1 0 0
2 9 0 150 60 4 4 4 2 5 4 4 0 0 0 0 0 0 0 0 0 0
1 1 39.7 100 30 0 0 6 2 4 4 3 1 0 0 4 5 65 75 0 0 0
1 1 38.30 80 0 3 3 4 2 5 4 3 2 1 0 4 4 45.00 7.50 2 4.60 1
2 1 37.50 40 32 3 1 3 1 3 2 3 2 1 0 0 5 32.00 6.40 1 1.10 1
1 1 38.40 84 30 3 1 5 2 4 3 3 2 3 6.50 4 4 47.00 7.50 3 0 0
1 1 38.10 84 44 4 0 4 2 5 3 1 1 3 5.00 0 4 60.00 6.80 0 5.70 0
2 1 38.70 52 0 1 1 1 1 1 3 1 0 0 0 1 3 4.00 74.00 0 0 1
2 1 38.10 44 40 2 1 3 1 3 3 1 0 0 0 1 3 35.00 6.80 0 0 1
2 1 38.4 52 20 2 1 3 1 1 3 2 2 1 0 3 5 41 63 1 1 1
1 1 38.20 60 0 1 0 3 1 2 1 1 1 1 0 4 4 43.00 6.20 2 3.90 1
2 1 37.70 40 18 1 1 1 0 3 2 1 1 1 0 3 3 36.00 3.50 0 0 1
1 1 39.1 60 10 0 1 1 0 2 3 0 0 0 0 4 4 0 0 0 0 1
2 1 37.80 48 16 1 1 1 1 0 1 1 2 1 0 4 3 43.00 7.50 0 0 1
1 1 39.00 120 0 4 3 5 2 2 4 3 2 3 8.00 0 0 65.00 8.20 3 4.60 1
1 1 38.20 76 0 2 3 2 1 5 3 3 1 2 6.00 1 5 35.00 6.50 2 0.90 1
2 1 38.30 88 0 0 0 6 0 0 0 0 0 0 0 0 0 0 0 0 0 0
1 1 38.00 80 30 3 3 3 1 0 0 0 0 0 6.00 0 0 48.00 8.30 0 4.30 1
1 1 0 0 0 3 1 1 1 2 3 3 1 3 6.00 4 4 0 0 2 0 0
1 1 37.60 40 0 1 1 1 1 1 1 1 0 0 0 1 1 0 0 2 2.10 1
2 1 37.50 44 0 1 1 1 1 3 3 2 0 0 0 0 0 45.00 5.80 2 1.40 1
2 1 38.2 42 16 1 1 3 1 1 3 1 0 0 0 1 0 35 60 1 1 1
2 1 38 56 44 3 3 3 0 0 1 1 2 1 0 4 0 47 70 2 1 1
2 1 38.30 45 20 3 3 2 2 2 4 1 2 0 0 4 0 0 0 0 0 1
1 1 0 48 96 1 1 3 1 0 4 1 2 1 0 1 4 42.00 8.00 1 0 1
1 1 37.70 55 28 2 1 2 1 2 3 3 0 3 5.00 4 5 0 0 0 0 1
2 1 36.00 100 20 4 3 6 2 2 4 3 1 1 0 4 5 74.00 5.70 2 2.50 0
1 1 37.10 60 20 2 0 4 1 3 0 3 0 2 5.00 3 4 64.00 8.50 2 0 1
2 1 37.10 114 40 3 0 3 2 2 2 1 0 0 0 0 3 32.00 0 3 6.50 1
1 1 38.1 72 30 3 3 3 1 4 4 3 2 1 0 3 5 37 56 3 1 1
1 1 37.00 44 12 3 1 1 2 1 1 1 0 0 0 4 2 40.00 6.70 3 8.00 1
1 1 38.6 48 20 3 1 1 1 4 3 1 0 0 0 3 0 37 75 0 0 1
1 1 0 82 72 3 1 4 1 2 3 3 0 3 0 4 4 53 65 3 2 0
1 9 38.20 78 60 4 4 6 0 3 3 3 0 0 0 1 0 59.00 5.80 3 3.10 0
2 1 37.8 60 16 1 1 3 1 2 3 2 1 2 0 3 0 41 73 0 0 0
1 1 38.7 34 30 2 0 3 1 2 3 0 0 0 0 0 0 33 69 0 2 0
1 1 0 36 12 1 1 1 1 1 2 1 1 1 0 1 5 44.00 0 0 0 1
2 1 38.30 44 60 0 0 1 1 0 0 0 0 0 0 0 0 6.40 36.00 0 0 1
2 1 37.40 54 18 3 0 1 1 3 4 3 2 2 0 4 5 30.00 7.10 2 0 1
1 1 0 0 0 4 3 0 2 2 4 1 0 0 0 0 0 54 76 3 2 1
1 1 36.6 48 16 3 1 3 1 4 1 1 1 1 0 0 0 27 56 0 0 0
1 1 38.5 90 0 1 1 3 1 3 3 3 2 3 2 4 5 47 79 0 0 1
1 1 0 75 12 1 1 4 1 5 3 3 0 3 5.80 0 0 58.00 8.50 1 0 1
2 1 38.20 42 0 3 1 1 1 1 1 2 2 1 0 3 2 35.00 5.90 2 0 1
1 9 38.20 78 60 4 4 6 0 3 3 3 0 0 0 1 0 59.00 5.80 3 3.10 0
2 1 38.60 60 30 1 1 3 1 4 2 2 1 1 0 0 0 40.00 6.00 1 0 1
2 1 37.80 42 40 1 1 1 1 1 3 1 0 0 0 3 3 36.00 6.20 0 0 1
1 1 38 60 12 1 1 2 1 2 1 1 1 1 0 1 4 44 65 3 2 0
2 1 38.00 42 12 3 0 3 1 1 1 1 0 0 0 0 1 37.00 5.80 0 0 1
2 1 37.60 88 36 3 1 1 1 3 3 2 1 3 1.50 0 0 44.00 6.00 0 0 0
\ No newline at end of file
此差异已折叠。
-0.017612 14.053064 0
-1.395634 4.662541 1
-0.752157 6.538620 0
-1.322371 7.152853 0
0.423363 11.054677 0
0.406704 7.067335 1
0.667394 12.741452 0
-2.460150 6.866805 1
0.569411 9.548755 0
-0.026632 10.427743 0
0.850433 6.920334 1
1.347183 13.175500 0
1.176813 3.167020 1
-1.781871 9.097953 0
-0.566606 5.749003 1
0.931635 1.589505 1
-0.024205 6.151823 1
-0.036453 2.690988 1
-0.196949 0.444165 1
1.014459 5.754399 1
1.985298 3.230619 1
-1.693453 -0.557540 1
-0.576525 11.778922 0
-0.346811 -1.678730 1
-2.124484 2.672471 1
1.217916 9.597015 0
-0.733928 9.098687 0
-3.642001 -1.618087 1
0.315985 3.523953 1
1.416614 9.619232 0
-0.386323 3.989286 1
0.556921 8.294984 1
1.224863 11.587360 0
-1.347803 -2.406051 1
1.196604 4.951851 1
0.275221 9.543647 0
0.470575 9.332488 0
-1.889567 9.542662 0
-1.527893 12.150579 0
-1.185247 11.309318 0
-0.445678 3.297303 1
1.042222 6.105155 1
-0.618787 10.320986 0
1.152083 0.548467 1
0.828534 2.676045 1
-1.237728 10.549033 0
-0.683565 -2.166125 1
0.229456 5.921938 1
-0.959885 11.555336 0
0.492911 10.993324 0
0.184992 8.721488 0
-0.355715 10.325976 0
-0.397822 8.058397 0
0.824839 13.730343 0
1.507278 5.027866 1
0.099671 6.835839 1
-0.344008 10.717485 0
1.785928 7.718645 1
-0.918801 11.560217 0
-0.364009 4.747300 1
-0.841722 4.119083 1
0.490426 1.960539 1
-0.007194 9.075792 0
0.356107 12.447863 0
0.342578 12.281162 0
-0.810823 -1.466018 1
2.530777 6.476801 1
1.296683 11.607559 0
0.475487 12.040035 0
-0.783277 11.009725 0
0.074798 11.023650 0
-1.337472 0.468339 1
-0.102781 13.763651 0
-0.147324 2.874846 1
0.518389 9.887035 0
1.015399 7.571882 0
-1.658086 -0.027255 1
1.319944 2.171228 1
2.056216 5.019981 1
-0.851633 4.375691 1
-1.510047 6.061992 0
-1.076637 -3.181888 1
1.821096 10.283990 0
3.010150 8.401766 1
-1.099458 1.688274 1
-0.834872 -1.733869 1
-0.846637 3.849075 1
1.400102 12.628781 0
1.752842 5.468166 1
0.078557 0.059736 1
0.089392 -0.715300 1
1.825662 12.693808 0
0.197445 9.744638 0
0.126117 0.922311 1
-0.679797 1.220530 1
0.677983 2.556666 1
0.761349 10.693862 0
-2.168791 0.143632 1
1.388610 9.341997 0
0.317029 14.739025 0
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册