提交 016d59cd 编写于 作者: wnma3mz's avatar wnma3mz

review code.py

上级 61a84eae
......@@ -11,8 +11,6 @@ import numpy as np
import pandas as pd
from keras.layers.core import Activation, Dense
from keras.models import Sequential
"""
programmer_1-->简单的数据筛选,划分数据
programmer_2-->阈值寻优???不懂。。
......@@ -31,19 +29,14 @@ def programmer_1():
data = pd.read_excel(inputfile)
# dataframe处理
data[u"发生时间"] = pd.to_datetime(data[u"发生时间"], format="%Y%m%d%H%M%S")
data = data[data[u"水流量"] > 0] # 流量大于0
data = data[data[u"水流量"] > 0] # 流量大于0
d = data[u"发生时间"].diff() > threshold # 相邻时间作差分,大于threshold
data[u"事件编号"] = d.cumsum() + 1 # 通过累积求和的方式为事件编号
data.to_excel(outputfile)
# 相邻时间作差分,比较是否大于阈值
def event_num(ts):
d = data[u"发生时间"].diff() > ts
# 返回事件数
return d.sum() + 1
# 相邻时间作差分,比较是否大于阈值
def programmer_2():
......@@ -60,6 +53,12 @@ def programmer_2():
# 定义阈值列
dt = [pd.Timedelta(minutes=i) for i in np.arange(1, 9, 0.25)]
h = pd.DataFrame(dt, columns=[u"阈值"])
def event_num(ts):
d = data[u"发生时间"].diff() > ts
# 返回事件数
return d.sum() + 1
# 计算每个阈值对应的事件数
h[u"事件数"] = h[u"阈值"].apply(event_num)
# 计算每两个相邻点对应的斜率
......@@ -90,15 +89,17 @@ def programmer_3():
# 建立神经网络模型
model = Sequential()
model.add(Dense(17, input_shape=(11,)))
model.add(Dense(17, input_shape=(11, )))
model.add(Activation("relu"))
model.add(Dense(10, input_shape=(17,)))
model.add(Dense(10, input_shape=(17, )))
model.add(Activation("relu"))
model.add(Dense(1, input_shape=(10,)))
model.add(Dense(1, input_shape=(10, )))
model.add(Activation("sigmoid"))
# 编译模型
model.compile(loss="binary_crossentropy",
optimizer="adam", sample_weight_mode="binary")
model.compile(
loss="binary_crossentropy",
optimizer="adam",
sample_weight_mode="binary")
# 训练模型
model.fit(x_train, y_train, nb_epoch=100, batch_size=1)
# 保存模型
......@@ -108,6 +109,8 @@ def programmer_3():
r = pd.DataFrame(model.predict_classes(x_test), columns=[u"预测结果"])
pd.concat([data_test.iloc[:, :5], r], axis=1).to_excel(testoutputfile)
model.predict(x_test)
return y_test
def programmer_4():
threshold = pd.Timedelta("4 min")
......@@ -123,29 +126,32 @@ def programmer_4():
data_g = data.groupby(u"事件编号")
result = pd.DataFrame()
dt = pd.Timedelta(seconds=2)
for n, g in data_g:
for _, g in data_g:
temp = pd.DataFrame(index=[0])
# 根据用水时长、开关机切换次数、总用水量推出是否是洗澡
tstart = g[u"发生时间"].min()
tend = g[u"发生时间"].max()
temp[u"用水事件时长(M)"] = (dt + tend - tstart).total_seconds() / 60
temp[u"开关机切换次数"] = (pd.Series.rolling(g[u"开关机状态"] == u"关", 2).sum() == 1).sum()
temp[u"开关机切换次数"] = (pd.Series.rolling(g[u"开关机状态"] == u"关",
2).sum() == 1).sum()
temp[u"总用水量(L)"] = g[u"水流量"].sum()
tdiff = g[u"发生时间"].diff()
if len(g[u"发生时间"]) == 1:
temp[u"总用水时长(Min)"] = dt.total_seconds() / 60
else:
temp[u"总用水时长(Min)"] = (tdiff.sum() - tdiff.iloc[1] / 2 -
tdiff.iloc[len(tdiff) - 1] / 2).total_seconds() / 60
temp[u"总用水时长(Min)"] = (
tdiff.sum() - tdiff.iloc[1] / 2 -
tdiff.iloc[len(tdiff) - 1] / 2).total_seconds() / 60
temp[u"平均水流量(L/min)"] = temp[u"总用水量(L)"] / temp[u"总用水时长(Min)"]
result = result.append(temp, ignore_index=True)
result.to_excel(outputfile)
if __name__ == "__main__":
# programmer_1()
# programmer_2()
# programmer_3()
# programmer_4()
pass
\ No newline at end of file
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册