提交 a2ac7233 编写于 作者: wnma3mz's avatar wnma3mz

commit chapter13

上级 da8153f2
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Wed Nov 8 23:05:54 2017
@author: lu
"""
import pandas as pd
import numpy as np
from sklearn.linear_model import Lasso
from keras.models import Sequential
from keras.layers.core import Dense, Activation
import matplotlib.pyplot as plt
"""
GM11-->自定义的灰度预测函
programmer_1-->读取文件提取基本信息
programmer_2-->用自定义的灰度预测函数,进行预测
programmer_3-->建立神经网络模型,进行预测并画图预测图
"""
def GM11(x0):
# 1-AGO序列, 累计求和
x1 = np.cumsum(x0)
# 紧邻均值(MEAN)生成序列
z1 = (x1[:-1] + x1[1:]) / 2.0
z1 = z1.reshape(len(z1), 1)
B = np.append(-z1, np.ones_like(z1), axis=1)
Yn = x0[1:].reshape((len(x0) - 1, 1))
# 矩阵计算,计算参数
[[a], [b]] = np.dot(np.dot(np.linalg.inv(np.dot(B.T, B)), B.T), Yn)
# 还原值
f = lambda k: (x0[0] - b / a) * np.exp(-a * (k - 1)) -( x0[0] - b / a) * np.exp(-a * (k - 2))
delta=np.abs(x0 - np.array([f(i) for i in range(1, len(x0) + 1)]))
C=delta.std() / x0.std()
P=1.0 * (np.abs(delta - delta.mean()) <
0.6745 * x0.std()).sum() / len(x0)
# 灰度预测函数、a、b、首项、方差比、小残差概率
return f, a, b, x0[0], C, P
def programmer_1(inputfile, data_range):
# inputfile = "data/data1.csv"
data=pd.read_csv(inputfile)
"""
原始方法,替代方法可以使用describe()方法,然后进行筛选
r = [data.min(), data.max(), data.mean(), data.std()]
r = pd.DataFrame(r, index = ["Min", "Max", "Mean", "STD"]).T
"""
r=pd.DataFrame(data.describe()).T
np.round(r, 2)
# 计算相关系数矩阵
np.round(data.corr(method="pearson"), 2)
"""
原代码使用的是AdaptiveLasso,现更新为Lasso
参数也由gamma变为tol(有待验证)
"""
model =Lasso(tol = 1)
model.fit(data.iloc[:, 0:data_range], data["y"])
# 各个特征的系数
model.coef_
print(model.coef_)
def programmer_2(inputfile, outputfile, startyear, feature_lst, roundnum = 0):
"""
year: 开始年份
feature_lst: 特征列
roundnum: 四舍五入保留的位数
"""
data=pd.read_csv(inputfile)
data.index=range(startyear, 2014)
data.loc[2014]=None
data.loc[2015]=None
for i in feature_lst:
f=GM11(data[i][list(range(startyear, 2014))].as_matrix())[0]
# 2014年预测结果
data[i][2014]=f(len(data) - 1)
# 2015年预测结果
data[i][2015]=f(len(data))
data[i]=data[i].round(roundnum)
print(data[feature_lst + ["y"]])
data[feature_lst + ["y"]].to_excel(outputfile)
def programmer_3(inputfile, outputfile, modelfile, feature_lst, startyear, input_dim_1, units1, input_dim_2, units2, epochs_num = 10000, roundnum = 0):
"""
feature_lst: 特征列
input_dim、units: 表示训练模型层数和神经元个数
roundnum: 四舍五入
"""
data=pd.read_excel(inputfile)
# 特征列
# 取startyear年以前的数据
data_train=data.loc[range(startyear, 2014)].copy()
data_mean=data_train.mean()
data_std=data.std()
# 数据标准化
data_train=(data_train - data_mean) / data_std
# 特征数据
x_train=data_train[feature_lst].as_matrix()
# 标签数据
y_train=data_train["y"].as_matrix()
model=Sequential()
model.add(Dense(input_dim=input_dim_1, units=units1))
model.add(Activation("relu"))
model.add(Dense(input_dim=input_dim_2, units=units2))
model.compile(loss = "mean_squared_error", optimizer = "adam")
model.fit(x_train, y_train, epochs = epochs_num, batch_size = 16)
model.save_weights(modelfile)
# 预测,并且还原结果
x=((data[feature_lst] - data_mean[feature_lst]) /
data_std[feature_lst]).as_matrix()
data["y_pred"]=model.predict(x) * data_std["y"] + data_mean["y"]
data["y_pred"]=data["y_pred"].round(roundnum)
data.to_excel(outputfile)
# 画出预测结果图
p=data[["y", "y_pred"]].plot(subplots = True, style = ["b-o", "r-*"])
plt.show()
if __name__ == "__main__":
# programmer_1(inputfile="data/data1.csv",
# data_range=13)
# programmer_2(inputfile="data/data1.csv",
# outputfile="tmp/data1_GM11.xls",
# startyear=1994,
# feature_lst=["x1", "x2", "x3", "x4", "x5", "x7"],
# roundnum=2)
# programmer_3(inputfile="tmp/data1_GM11.xls",
# outputfile="data/revenue.xls",
# modelfile="tmp/1-net.model",
# feature_lst=["x1", "x2", "x3", "x4", "x5", "x7"],
# startyear=1994,
# input_dim_1=6,
# units1=12,
# input_dim_2=12,
# units2=1)
# programmer_1(inputfile="data/data2.csv",
# data_range=6)
# programmer_2(inputfile="data/data2.csv",
# outputfile="tmp/data2_GM11.xls",
# startyear=1999,
# feature_lst=["x1", "x3", "x5"],
# roundnum=6)
# programmer_3(inputfile="tmp/data2_GM11.xls",
# outputfile="data/VAT.xls",
# modelfile="tmp/2-net.model",
# feature_lst=["x1", "x3", "x5"],
# startyear=1999,
# input_dim_1=3,
# units1=6,
# input_dim_2=6,
# units2=1,
# roundnum=2)
# programmer_1(inputfile="data/data3.csv",
# data_range=10)
# programmer_2(inputfile="data/data3.csv",
# outputfile="tmp/data3_GM11.xls",
# startyear=1999,
# feature_lst=["x3", "x4", "x6", "x8"])
# programmer_3(inputfile="tmp/data3_GM11.xls",
# outputfile="data/sales_tax.xls",
# modelfile="tmp/3-net.model",
# feature_lst=["x3", "x4", "x6", "x8"],
# startyear=1999,
# input_dim_1=4,
# units1=8,
# input_dim_2=8,
# units2=1,
# roundnum=2)
# programmer_1(inputfile="data/data4.csv",
# data_range=10)
# programmer_2(inputfile="data/data4.csv",
# outputfile="tmp/data4_GM11.xls",
# startyear=2002,
# feature_lst=["x1", "x2", "x3", "x4", "x6", "x7", "x9", "x10"],
# roundnum=2)
# programmer_3(inputfile="tmp/data4_GM11.xls",
# outputfile="data/enterprise_incomt.xls",
# modelfile="tmp/4-net.model",
# feature_lst=["x1", "x2", "x3", "x4", "x6", "x7", "x9", "x10"],
# startyear=2002,
# input_dim_1=8,
# units1=6,
# input_dim_2=6,
# units2=1,
# roundnum=2)
# programmer_1(inputfile="data/data5.csv",
# data_range=7)
# programmer_2(inputfile="data/data5.csv",
# outputfile="tmp/data5_GM11.xls",
# startyear=2000,
# feature_lst=["x1", "x4", "x5", "x7"])
# programmer_3(inputfile="tmp/data5_GM11.xls",
# outputfile="data/personal_Income.xls",
# modelfile="tmp/5-net.model",
# feature_lst=["x1", "x4", "x5", "x7"],
# startyear=2000,
# input_dim_1=4,
# units1=8,
# input_dim_2=8,
# units2=1,
# epochs_num=15000# )
x0=np.array([3152063, 2213050, 4050122,
5265142, 5556619, 4772843, 9463330])
f, a, b, x00, C, P=GM11(x0)
print(u'2014年、2015年的预测结果分别为:\n%0.2f万元和%0.2f万元' % (f(8), f(9)))
print(u'后验差比值为:%0.4f' % C)
p=pd.DataFrame(x0, columns = ["y"], index = range(2007, 2014))
p.loc[2014]=None
p.loc[2015]=None
p["y_pred"]=[f(i) for i in range(1, 10)]
p["y_pred"]=p["y_pred"].round(2)
p.index =pd.to_datetime(p.index, format = "%Y")
p.plot(style = ["b-o", "r-*"], xticks = p.index)
plt.show()
x1,x2,x3,x4,x5,x6,x7,x8,x9,x10,x11,x12,x13,y
3831732,181.54,448.19,7571,6212.7,6370241,525.71,985.31,60.62,65.66,120,1.029,5321,64.87
3913824,214.63,549.97,9038.16,7601.73,6467115,618.25,1259.2,73.46,95.46,113.5,1.051,6529,99.75
3928907,239.56,686.44,9905.31,8092.82,6560508,638.94,1468.06,81.16,81.16,108.2,1.064,7008,88.11
4282130,261.58,802.59,10444.6,8767.98,6664862,656.58,1678.12,85.72,91.7,102.2,1.092,7694,106.07
4453911,283.14,904.57,11255.7,9422.33,6741400,758.83,1893.52,88.88,114.61,97.7,1.2,8027,137.32
4548852,308.58,1000.69,12018.52,9751.44,6850024,878.26,2139.18,92.85,152.78,98.5,1.198,8549,188.14
4962579,348.09,1121.13,13966.53,11349.47,7006896,923.67,2492.74,94.37,170.62,102.8,1.348,9566,219.91
5029338,387.81,1248.29,14694,11467.35,7125979,978.21,2841.65,97.28,214.53,98.9,1.467,10473,271.91
5070216,453.49,1370.68,13380.47,10671.78,7206229,1009.24,3203.96,103.07,202.18,97.6,1.56,11469,269.1
5210706,533.55,1494.27,15002.59,11570.58,7251888,1175.17,3758.62,109.91,222.51,100.1,1.456,12360,300.55
5407087,598.33,1677.77,16884.16,13120.83,7376720,1348.93,4450.55,117.15,249.01,101.7,1.424,14174,338.45
5744550,665.32,1905.84,18287.24,14468.24,7505322,1519.16,5154.23,130.22,303.41,101.5,1.456,16394,408.86
5994973,738.97,2199.14,19850.66,15444.93,7607220,1696.38,6081.86,128.51,356.99,102.3,1.438,17881,476.72
6236312,877.07,2624.24,22469.22,18951.32,7734787,1863.34,7140.32,149.87,429.36,103.4,1.474,20058,838.99
6529045,1005.37,3187.39,25316.72,20835.95,7841695,2105.54,8287.38,169.19,508.84,105.9,1.515,22114,843.14
6791495,1118.03,3615.77,27609.59,22820.89,7946154,2659.85,9138.21,172.28,557.74,97.5,1.633,24190,1107.67
7110695,1304.48,4476.38,30658.49,25011.61,8061370,3263.57,10748.28,188.57,664.06,103.2,1.638,29549,1399.16
7431755,1700.87,5243.03,34438.08,28209.74,8145797,3412.21,12423.44,204.54,710.66,105.5,1.67,34214,1535.14
7512997,1969.51,5977.27,38053.52,30490.44,8222969,3758.39,13551.21,213.76,760.49,103,1.825,37934,1579.68
7599295,2110.78,6882.85,42049.14,33156.83,8323096,4454.55,15420.14,228.46,852.56,102.6,1.906,41972,2088.14
x1,x2,x3,x4,x5,x6,y
93.18,21391758,7980207,6661555,0.373050546,0,288972
115.6,24927434,8779835,7839516,0.352215756,0,350495
114.13,28416511,9554676,8803979,0.336236774,0,443213
141.49,32039616,10509450,9733195,0.328014231,0,526377
180.52,37586166,13141254,11833760,0.349630074,0,581898
233.14,44505503,15941538,14030973,0.358192514,4636933,528365
268.07,51542283,18439550,16171817,0.357755787,5574275,816119
313.85,60818614,22270093,18921556,0.36617232,5907373,967265
355.91,71403223,26029310,22841850,0.364539707,6875421,1115007
389.47,82873816,29724781,27953721,0.358675182,9328615,1287226
392.82,91382135,31173422,31565720,0.341132564,11237237,1375085
553.89,107482828,36449611,38835933,0.33912032,13541607,1594182
596.94,124234390,41405926,45444614,0.333288762,15947698,1573830
582.52,135512072,42641557,51685711,0.314669803,3837376,1758311
560.89,154201434,47548175,59858717,0.308351056,4168317,2216017
x1,x2,x3,x4,x5,x6,x7,x8,x9,x10,y
12004,18678,1330484,11152545,2959027,2878473,1881388,2470523,1820957,349730,433360
12549,19964,1436406,13767475,3555816,3250326,2199077,2561326,2081458,409725,479698
13286,21057,1568267,16320762,3870207,3316894,2719058,3403870,2399208,522331,540075
14122,23157,1603966,18895479,4263898,3457617,2690984,3733922,2659750,601220,613161
14693,23839,1718007,21627825,4194806,3522168,3005475,4785787,2818533,680647,650119
17585,27845,1939100,25453413,4770315,3712961,3384477,5459314,2636742,859141,793520
20601,30782,2012633,29787941,5080846,3777003,4088545,6331382,2835312,853315,892678
24801,33021,2145067,35118425,5567893,3783416,4767231,6870406,3069823,1020793,1027971
29200,38949,2228495,41646681,7038031,5041090,8389925,7507109,3400549,1166412,1235374
33114,42359,2553936,48903250,7634024,5398216,8431405,8754491,3920141,1516500,1279793
36983,43921,2878166,55607710,8173449,5246903,11076649,10134050,4591935,1476434,1516049
39696,47296,3573047,65574525,9836582,5727122,13991612,12805288,5927847,1694029,1777343
45448,51107,4363837,76419207,13053605,8116313,15351387,15613171,6985632,1988868,1625593
52697,58875,4564947,86167948,13704511,8626775,15796804,17417072,8086955,2332653,1747616
59142,70815,4725256,99643373,15724289,9969708,20881374,21828895,8969756,2412402,1623520
x1,x2,x3,x4,x5,x6,x7,x8,x9,x10,y
12113416,18895479,10092421,559.6,2075416,31.99,3733922,80922,1053156,2690984,236416
14859261,21627825,11751668,554.5,3184744,29.87,4785787,167217,1154425,3005475,268360
17880638,25453413,13489283,566.1,3981959,30.69,5459314,154958,1434440,3384477,326556
20452183,29787941,15191582,575.2,4048305,31.63,6331382,186678,3621757,4088545,373397
24415160,35118425,16963824,582.1,5388451,28.95,6870406,219390,4196301,4767231,455820
28257805,41646681,18633437,599,7531147,24.88,7507109,376839,7068265,8389925,596693
32278717,48903250,21055373,633.1,6930269,30.85,8754491,458096,17829885,8431405,756412
34051588,55607710,26598516,612.8,7791165,23.16,10134050,485760,17019222,11076649,732282
40022658,65574525,32635731,632.4,10312744,20.42,12805288,653736,26192835,13991612,935248
45769763,76419207,34122005,664.7,9585263,22.55,15613171,668043,21639131,15351387,1061594
47206504,86167948,37583868,677.3,8256048,20.9,17417072,703733,21396742,15796804,1075045
52273431,99643373,44545508,680.7,11053751,19.7,21828895,877889,22659148,20881374,1155923
x1,x2,x3,x4,x5,x6,x7,y
13967,19091,2239.86,24927434,10216241,1755512,2199077,185625
14694,22141,2600.43,28416511,11122943,1997579,2719058,254892
13380,25583,3132.8,32039616,12113416,2071574,2690984,159684
15003,28237,3727.33,37586166,14859261,2236902,3005475,153080
16884,31025,4256.82,44505503,17880638,2255380,3384477,167379
18287,33853,5024.69,51542283,20452183,2351538,4088545,198017
19851,36321,5562.36,60818614,24415160,2039164,4767231,231794
22469,40187,5589.51,71403223,28257805,2190420,8389925,295316
25317,45365,6867.29,82873816,32278717,2205292,8431405,353372
27610,49215,7954.22,91382135,34051588,2284029,11076649,389824
30658,54807,9069.26,107482828,40022658,2387940,13991612,472154
34438,57473,10032.62,124234390,45769763,3102356,15351387,462098
38054,63752,11310.69,135512072,47206504,3268488,15796804,439592
42049,69692,12253.98,154201434,52273431,3245858,20881374,489777
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册