review code.py

016d59cd · wnma3mz · 61a84eae · 016d59cd
隐藏空白更改
内联并排

Showing with 26 addition and 20 deletion

chapter10/code.py chapter10/code.py +26 -20

未找到文件。
--- a/chapter10/code.py
+++ b/chapter10/code.py
@@ -11,8 +11,6 @@ import numpy as np
 import pandas as pd
 from keras.layers.core import Activation, Dense
 from keras.models import Sequential
-
-
 """
 programmer_1-->简单的数据筛选，划分数据
 programmer_2-->阈值寻优？？？不懂。。
@@ -31,19 +29,14 @@ def programmer_1():
    data = pd.read_excel(inputfile)
    # dataframe处理
    data[u"发生时间"] = pd.to_datetime(data[u"发生时间"], format="%Y%m%d%H%M%S")
-    data = data[data[u"水流量"] > 0]   # 流量大于0
+    data = data[data[u"水流量"] > 0]  # 流量大于0
    d = data[u"发生时间"].diff() > threshold  # 相邻时间作差分，大于threshold
    data[u"事件编号"] = d.cumsum() + 1  # 通过累积求和的方式为事件编号

    data.to_excel(outputfile)

-# 相邻时间作差分，比较是否大于阈值

-
-def event_num(ts):
-    d = data[u"发生时间"].diff() > ts
-    # 返回事件数
-    return d.sum() + 1
+# 相邻时间作差分，比较是否大于阈值


 def programmer_2():
@@ -60,6 +53,12 @@ def programmer_2():
    # 定义阈值列
    dt = [pd.Timedelta(minutes=i) for i in np.arange(1, 9, 0.25)]
    h = pd.DataFrame(dt, columns=[u"阈值"])
+
+    def event_num(ts):
+        d = data[u"发生时间"].diff() > ts
+        # 返回事件数
+        return d.sum() + 1
+
    # 计算每个阈值对应的事件数
    h[u"事件数"] = h[u"阈值"].apply(event_num)
    # 计算每两个相邻点对应的斜率
@@ -90,15 +89,17 @@ def programmer_3():

    # 建立神经网络模型
    model = Sequential()
-    model.add(Dense(17, input_shape=(11,)))
+    model.add(Dense(17, input_shape=(11, )))
    model.add(Activation("relu"))
-    model.add(Dense(10, input_shape=(17,)))
+    model.add(Dense(10, input_shape=(17, )))
    model.add(Activation("relu"))
-    model.add(Dense(1, input_shape=(10,)))
+    model.add(Dense(1, input_shape=(10, )))
    model.add(Activation("sigmoid"))
    # 编译模型
-    model.compile(loss="binary_crossentropy",
-                  optimizer="adam", sample_weight_mode="binary")
+    model.compile(
+        loss="binary_crossentropy",
+        optimizer="adam",
+        sample_weight_mode="binary")
    # 训练模型
    model.fit(x_train, y_train, nb_epoch=100, batch_size=1)
    # 保存模型
@@ -108,6 +109,8 @@ def programmer_3():
    r = pd.DataFrame(model.predict_classes(x_test), columns=[u"预测结果"])
    pd.concat([data_test.iloc[:, :5], r], axis=1).to_excel(testoutputfile)
    model.predict(x_test)
+    return y_test
+

 def programmer_4():
    threshold = pd.Timedelta("4 min")
@@ -123,29 +126,32 @@ def programmer_4():
    data_g = data.groupby(u"事件编号")
    result = pd.DataFrame()
    dt = pd.Timedelta(seconds=2)
-    
-    for n, g in data_g:
-        
+
+    for _, g in data_g:
        temp = pd.DataFrame(index=[0])
        # 根据用水时长、开关机切换次数、总用水量推出是否是洗澡
        tstart = g[u"发生时间"].min()
        tend = g[u"发生时间"].max()
        temp[u"用水事件时长（M）"] = (dt + tend - tstart).total_seconds() / 60
-        temp[u"开关机切换次数"] = (pd.Series.rolling(g[u"开关机状态"] == u"关", 2).sum() == 1).sum()
+        temp[u"开关机切换次数"] = (pd.Series.rolling(g[u"开关机状态"] == u"关",
+                                              2).sum() == 1).sum()
        temp[u"总用水量（L）"] = g[u"水流量"].sum()
        tdiff = g[u"发生时间"].diff()
        if len(g[u"发生时间"]) == 1:
            temp[u"总用水时长（Min）"] = dt.total_seconds() / 60
        else:
-            temp[u"总用水时长（Min）"] = (tdiff.sum() - tdiff.iloc[1] / 2 -
-                         tdiff.iloc[len(tdiff) - 1] / 2).total_seconds() / 60
+            temp[u"总用水时长（Min）"] = (
+                tdiff.sum() - tdiff.iloc[1] / 2 -
+                tdiff.iloc[len(tdiff) - 1] / 2).total_seconds() / 60
        temp[u"平均水流量（L/min）"] = temp[u"总用水量（L）"] / temp[u"总用水时长（Min）"]
        result = result.append(temp, ignore_index=True)

    result.to_excel(outputfile)

+
 if __name__ == "__main__":
    # programmer_1()
    # programmer_2()
    # programmer_3()
    # programmer_4()
+    pass
\ No newline at end of file