Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
OpenDocCN
python_data_analysis_and_mining_action
提交
016d59cd
P
python_data_analysis_and_mining_action
项目概览
OpenDocCN
/
python_data_analysis_and_mining_action
大约 1 年 前同步成功
通知
12
Star
1527
Fork
690
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
python_data_analysis_and_mining_action
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
体验新版 GitCode,发现更多精彩内容 >>
提交
016d59cd
编写于
3月 04, 2018
作者:
wnma3mz
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
review code.py
上级
61a84eae
变更
1
隐藏空白更改
内联
并排
Showing
1 changed file
with
26 addition
and
20 deletion
+26
-20
chapter10/code.py
chapter10/code.py
+26
-20
未找到文件。
chapter10/code.py
浏览文件 @
016d59cd
...
...
@@ -11,8 +11,6 @@ import numpy as np
import
pandas
as
pd
from
keras.layers.core
import
Activation
,
Dense
from
keras.models
import
Sequential
"""
programmer_1-->简单的数据筛选,划分数据
programmer_2-->阈值寻优???不懂。。
...
...
@@ -31,19 +29,14 @@ def programmer_1():
data
=
pd
.
read_excel
(
inputfile
)
# dataframe处理
data
[
u
"发生时间"
]
=
pd
.
to_datetime
(
data
[
u
"发生时间"
],
format
=
"%Y%m%d%H%M%S"
)
data
=
data
[
data
[
u
"水流量"
]
>
0
]
# 流量大于0
data
=
data
[
data
[
u
"水流量"
]
>
0
]
# 流量大于0
d
=
data
[
u
"发生时间"
].
diff
()
>
threshold
# 相邻时间作差分,大于threshold
data
[
u
"事件编号"
]
=
d
.
cumsum
()
+
1
# 通过累积求和的方式为事件编号
data
.
to_excel
(
outputfile
)
# 相邻时间作差分,比较是否大于阈值
def
event_num
(
ts
):
d
=
data
[
u
"发生时间"
].
diff
()
>
ts
# 返回事件数
return
d
.
sum
()
+
1
# 相邻时间作差分,比较是否大于阈值
def
programmer_2
():
...
...
@@ -60,6 +53,12 @@ def programmer_2():
# 定义阈值列
dt
=
[
pd
.
Timedelta
(
minutes
=
i
)
for
i
in
np
.
arange
(
1
,
9
,
0.25
)]
h
=
pd
.
DataFrame
(
dt
,
columns
=
[
u
"阈值"
])
def
event_num
(
ts
):
d
=
data
[
u
"发生时间"
].
diff
()
>
ts
# 返回事件数
return
d
.
sum
()
+
1
# 计算每个阈值对应的事件数
h
[
u
"事件数"
]
=
h
[
u
"阈值"
].
apply
(
event_num
)
# 计算每两个相邻点对应的斜率
...
...
@@ -90,15 +89,17 @@ def programmer_3():
# 建立神经网络模型
model
=
Sequential
()
model
.
add
(
Dense
(
17
,
input_shape
=
(
11
,)))
model
.
add
(
Dense
(
17
,
input_shape
=
(
11
,
)))
model
.
add
(
Activation
(
"relu"
))
model
.
add
(
Dense
(
10
,
input_shape
=
(
17
,)))
model
.
add
(
Dense
(
10
,
input_shape
=
(
17
,
)))
model
.
add
(
Activation
(
"relu"
))
model
.
add
(
Dense
(
1
,
input_shape
=
(
10
,)))
model
.
add
(
Dense
(
1
,
input_shape
=
(
10
,
)))
model
.
add
(
Activation
(
"sigmoid"
))
# 编译模型
model
.
compile
(
loss
=
"binary_crossentropy"
,
optimizer
=
"adam"
,
sample_weight_mode
=
"binary"
)
model
.
compile
(
loss
=
"binary_crossentropy"
,
optimizer
=
"adam"
,
sample_weight_mode
=
"binary"
)
# 训练模型
model
.
fit
(
x_train
,
y_train
,
nb_epoch
=
100
,
batch_size
=
1
)
# 保存模型
...
...
@@ -108,6 +109,8 @@ def programmer_3():
r
=
pd
.
DataFrame
(
model
.
predict_classes
(
x_test
),
columns
=
[
u
"预测结果"
])
pd
.
concat
([
data_test
.
iloc
[:,
:
5
],
r
],
axis
=
1
).
to_excel
(
testoutputfile
)
model
.
predict
(
x_test
)
return
y_test
def
programmer_4
():
threshold
=
pd
.
Timedelta
(
"4 min"
)
...
...
@@ -123,29 +126,32 @@ def programmer_4():
data_g
=
data
.
groupby
(
u
"事件编号"
)
result
=
pd
.
DataFrame
()
dt
=
pd
.
Timedelta
(
seconds
=
2
)
for
n
,
g
in
data_g
:
for
_
,
g
in
data_g
:
temp
=
pd
.
DataFrame
(
index
=
[
0
])
# 根据用水时长、开关机切换次数、总用水量推出是否是洗澡
tstart
=
g
[
u
"发生时间"
].
min
()
tend
=
g
[
u
"发生时间"
].
max
()
temp
[
u
"用水事件时长(M)"
]
=
(
dt
+
tend
-
tstart
).
total_seconds
()
/
60
temp
[
u
"开关机切换次数"
]
=
(
pd
.
Series
.
rolling
(
g
[
u
"开关机状态"
]
==
u
"关"
,
2
).
sum
()
==
1
).
sum
()
temp
[
u
"开关机切换次数"
]
=
(
pd
.
Series
.
rolling
(
g
[
u
"开关机状态"
]
==
u
"关"
,
2
).
sum
()
==
1
).
sum
()
temp
[
u
"总用水量(L)"
]
=
g
[
u
"水流量"
].
sum
()
tdiff
=
g
[
u
"发生时间"
].
diff
()
if
len
(
g
[
u
"发生时间"
])
==
1
:
temp
[
u
"总用水时长(Min)"
]
=
dt
.
total_seconds
()
/
60
else
:
temp
[
u
"总用水时长(Min)"
]
=
(
tdiff
.
sum
()
-
tdiff
.
iloc
[
1
]
/
2
-
tdiff
.
iloc
[
len
(
tdiff
)
-
1
]
/
2
).
total_seconds
()
/
60
temp
[
u
"总用水时长(Min)"
]
=
(
tdiff
.
sum
()
-
tdiff
.
iloc
[
1
]
/
2
-
tdiff
.
iloc
[
len
(
tdiff
)
-
1
]
/
2
).
total_seconds
()
/
60
temp
[
u
"平均水流量(L/min)"
]
=
temp
[
u
"总用水量(L)"
]
/
temp
[
u
"总用水时长(Min)"
]
result
=
result
.
append
(
temp
,
ignore_index
=
True
)
result
.
to_excel
(
outputfile
)
if
__name__
==
"__main__"
:
# programmer_1()
# programmer_2()
# programmer_3()
# programmer_4()
pass
\ No newline at end of file
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录