ch07

e521f8ab · hathackerwang · af2a6337 · e521f8ab · e521f8ab
隐藏空白更改
内联并排

Showing with 88 addition and 0 deletion

ch07-Linear_regrs/Ridge_regre.py ch07-Linear_regrs/Ridge_regre.py +47 -0

ch07-Linear_regrs/Stand_Linear.py ch07-Linear_regrs/Stand_Linear.py +41 -0

未找到文件。
--- a/ch07-Linear_regrs/Ridge_regre.py
+++ b/ch07-Linear_regrs/Ridge_regre.py
+# -*- coding: utf-8 -*-
+"""
+Created on Sat Jul 14 15:04:10 2018
+
+@author: Administrator
+"""
+
+
+def ridgeRegres(xMat,yMat,lam=0.2):
+    '''
+    #岭回归
+    @xMat:样本数据
+    @yMat：样本对应的原始值
+    @lam：惩罚项系数lamda，默认值为0.2
+    '''
+    #计算矩阵内积
+    xTx=xMat.T*xMat
+    #添加惩罚项，使矩阵xTx变换后可逆
+    denom=xTx+eye(shape(xMat)[1])*lam
+    #判断行列式值是否为0，确定是否可逆
+    if linalg.det(denom)==0.0:
+        print('This matrix is singular,cannot do inverse')
+        return 
+    #计算回归系数
+    ws=denom.I*(xMat.T*yMat)
+    return ws
+
+#特征需要标准化处理，使所有特征具有相同重要性
+def ridgeTest(xArr,yArr):
+    xMat=mat(xArr);yMat=mat(yArr).T
+    #计算均值
+    yMean=mean(yMat,0)
+    yMat=yMat-yMean
+    xMeans=mean(xMat,0)
+    #计算各个特征的方差
+    xVar=var(xMat,0)
+    #特征-均值/方差
+    xMat=(xMat-xMeans)/xVar
+    #在30个不同的lamda下进行测试
+    numTestpts=30
+    #30次的结果保存在wMat中
+    wMat=zeros((numTestpts,shape(xMat)[1]))
+    for i in range(numTestpts):
+        #计算对应lamda回归系数，lamda以指数形式变换
+        ws=ridgeRegres(xMat,yMat,exp(i-10))
+        wMat[i,:]=ws.T
+    return wMat
\ No newline at end of file
--- a/ch07-Linear_regrs/Stand_Linear.py
+++ b/ch07-Linear_regrs/Stand_Linear.py
+# -*- coding: utf-8 -*-
+"""
+Created on Sat Jul 14 15:02:40 2018
+
+@author: Administrator
+"""
+
+from numpy import *
+
+#解析文件中的数据为适合机器处理的形式
+def loadDataSet(filename):
+    numFeat=len(open(filename).readline().split('\t'))-1
+    dataMat=[];labelMat=[]
+    fr=open(filename)
+    for line in fr.readlines():
+        lineArr=[]
+        curLine=line.strip().split('\t')
+        for i in range(numFeat):
+            lineArr.extend(float(curLine[i]))
+        dataMat.append(lineArr)
+        labelMat.append(float(curLine[-1]))
+    return dataMat,labelMat
+
+#标准线性回归算法
+#ws=(X.T*X).I*(X.T*Y)    
+def standRegres(xArr,yArr):
+    #将列表形式的数据转为numpy矩阵形式
+    xMat=mat(xArr);yMat=mat(yArr).T
+    #求矩阵的内积
+    xTx=xMat.T*xMat
+    #numpy线性代数库linalg
+    #调用linalg.det()计算矩阵行列式
+    #计算矩阵行列式是否为0
+    if linalg.det(xTx)==0.0:
+        print('This matrix is singular,cannot do inverse')
+        return 
+    #如果可逆，根据公式计算回归系数
+    ws=xTx.I*(xMat.T*yMat)
+    #可以用yHat=xMat*ws计算实际值y的预测值
+    #返回归系数
+    return ws
\ No newline at end of file