Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
zengbin93
czsc
提交
7d204483
C
czsc
项目概览
zengbin93
/
czsc
通知
23
Star
2
Fork
1
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
4
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
C
czsc
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
4
Issue
4
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
前往新版Gitcode,体验更适合开发者的 AI 搜索 >>
提交
7d204483
编写于
7月 16, 2020
作者:
Z
zengbin93
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
0.5.1: 删除文件
上级
b204d7bd
变更
3
隐藏空白更改
内联
并排
Showing
3 changed file
with
0 addition
and
255 deletion
+0
-255
examples/xd_end/classifier.py
examples/xd_end/classifier.py
+0
-116
examples/xd_end/make_data.py
examples/xd_end/make_data.py
+0
-129
examples/xd_end/readme.md
examples/xd_end/readme.md
+0
-10
未找到文件。
examples/xd_end/classifier.py
已删除
100644 → 0
浏览文件 @
b204d7bd
# coding: utf-8
import
os
import
pandas
as
pd
from
sklearn.linear_model
import
LogisticRegression
from
sklearn.svm
import
LinearSVC
from
sklearn.ensemble
import
AdaBoostClassifier
from
sklearn.ensemble
import
RandomForestClassifier
from
sklearn.model_selection
import
train_test_split
,
KFold
from
sklearn.metrics
import
classification_report
import
xgboost
as
xgb
data
=
[]
for
file
in
os
.
listdir
(
"./data"
):
if
file
.
endswith
(
"xlsx"
)
and
"30min"
in
file
:
file_data
=
f
"./data/
{
file
}
"
df_
=
pd
.
read_excel
(
file_data
)
data
.
append
(
df_
)
print
(
f
"load
{
file_data
}
success."
)
df
=
pd
.
concat
(
data
)
print
(
"data shape(before drop duplicates): "
,
df
.
shape
)
x_cols
=
[
'1分钟分型标记'
,
'1分钟笔标记'
,
'1分钟线段标记'
,
'1分钟MACD金叉'
,
'1分钟MACD死叉'
,
'5分钟分型标记'
,
'5分钟笔标记'
,
'5分钟线段标记'
,
'5分钟MACD金叉'
,
'5分钟MACD死叉'
,
'30分钟分型标记'
,
'30分钟笔标记'
,
'30分钟线段标记'
,
'30分钟MACD金叉'
,
'30分钟MACD死叉'
,
'日线分型标记'
,
'日线笔标记'
,
'日线线段标记'
,
'日线MACD金叉'
,
'日线MACD死叉'
]
y_col
=
'30min线段状态'
df
.
drop_duplicates
(
subset
=
x_cols
+
[
y_col
],
inplace
=
True
)
print
(
"data shape(after drop duplicates): "
,
df
.
shape
)
df0
=
df
[
df
[
y_col
]
==
'向上段'
]
df1
=
df
[
df
[
y_col
]
==
'向下段'
]
# 降采样获取均衡数据集
n_sample
=
min
(
len
(
df0
),
len
(
df1
))
df
=
pd
.
concat
([
df0
.
sample
(
n_sample
,
random_state
=
42
),
df1
.
sample
(
n_sample
,
random_state
=
42
)])
X
=
df
[
x_cols
].
values
y
=
df
[
y_col
].
apply
(
lambda
x
:
1
if
x
==
'向上段'
else
0
).
values
X_train
,
X_test
,
y_train
,
y_test
=
train_test_split
(
X
,
y
,
test_size
=
0.33
,
random_state
=
42
)
k_fold
=
KFold
(
n_splits
=
5
,
shuffle
=
True
,
random_state
=
42
)
def
run_logistic_regression
():
model
=
LogisticRegression
(
penalty
=
'l1'
,
random_state
=
42
)
model
.
fit
(
X_train
,
y_train
)
y_pred
=
model
.
predict
(
X_test
)
print
(
classification_report
(
y_test
,
y_pred
))
for
k
,
(
train
,
test
)
in
enumerate
(
k_fold
.
split
(
X
,
y
)):
model
.
fit
(
X
[
train
],
y
[
train
])
y_pred
=
model
.
predict
(
X
[
test
])
print
(
k
,
"="
*
100
)
print
(
classification_report
(
y
[
test
],
y_pred
),
'
\n\n
'
)
def
run_svc
():
model
=
LinearSVC
(
penalty
=
'l2'
,
tol
=
1e-8
,
max_iter
=
10000
,
random_state
=
42
,
verbose
=
True
)
model
.
fit
(
X_train
,
y_train
)
y_pred
=
model
.
predict
(
X_test
)
print
(
classification_report
(
y_test
,
y_pred
))
for
k
,
(
train
,
test
)
in
enumerate
(
k_fold
.
split
(
X
,
y
)):
model
.
fit
(
X
[
train
],
y
[
train
])
y_pred
=
model
.
predict
(
X
[
test
])
print
(
k
,
"="
*
100
)
print
(
classification_report
(
y
[
test
],
y_pred
),
'
\n\n
'
)
def
run_ada_boost
():
model
=
AdaBoostClassifier
(
n_estimators
=
100
,
random_state
=
0
)
model
.
fit
(
X_train
,
y_train
)
y_pred
=
model
.
predict
(
X_test
)
print
(
classification_report
(
y_test
,
y_pred
))
for
k
,
(
train
,
test
)
in
enumerate
(
k_fold
.
split
(
X
,
y
)):
model
.
fit
(
X
[
train
],
y
[
train
])
y_pred
=
model
.
predict
(
X
[
test
])
print
(
k
,
"="
*
100
)
print
(
classification_report
(
y
[
test
],
y_pred
),
'
\n\n
'
)
def
run_random_forest
():
model
=
RandomForestClassifier
(
n_estimators
=
100
,
max_depth
=
2
,
random_state
=
0
)
model
.
fit
(
X_train
,
y_train
)
y_pred
=
model
.
predict
(
X_test
)
print
(
classification_report
(
y_test
,
y_pred
))
for
k
,
(
train
,
test
)
in
enumerate
(
k_fold
.
split
(
X
,
y
)):
model
.
fit
(
X
[
train
],
y
[
train
])
y_pred
=
model
.
predict
(
X
[
test
])
print
(
k
,
"="
*
100
)
print
(
classification_report
(
y
[
test
],
y_pred
),
'
\n\n
'
)
def
run_xgboost
():
model
=
xgb
.
XGBClassifier
()
model
.
fit
(
X_train
,
y_train
)
y_pred
=
model
.
predict
(
X_test
)
print
(
classification_report
(
y_test
,
y_pred
))
for
k
,
(
train
,
test
)
in
enumerate
(
k_fold
.
split
(
X
,
y
)):
model
=
model
.
fit
(
X
[
train
],
y
[
train
])
y_pred
=
model
.
predict
(
X
[
test
])
print
(
k
,
"="
*
100
)
print
(
classification_report
(
y
[
test
],
y_pred
),
'
\n\n
'
)
if
__name__
==
'__main__'
:
# run_ada_boost()
# run_random_forest()
run_xgboost
()
examples/xd_end/make_data.py
已删除
100644 → 0
浏览文件 @
b204d7bd
# coding: utf-8
import
traceback
import
os
import
pandas
as
pd
from
copy
import
deepcopy
from
datetime
import
timedelta
,
datetime
from
cobra.data.kline
import
kline_simulator
,
get_kline
from
cobra.data.basic
import
is_trade_day
from
czsc
import
SolidAnalyze
,
KlineAnalyze
from
czsc.analyze
import
is_macd_cross
data_path
=
"./data"
if
not
os
.
path
.
exists
(
data_path
):
os
.
mkdir
(
data_path
)
def
get_data
(
sa
):
mark_convert
=
{
"d"
:
0
,
"g"
:
1
}
signals
=
{
"交易标的"
:
sa
.
symbol
,
"交易时间"
:
sa
.
kas
[
'1分钟'
].
end_dt
}
for
freq
,
ka
in
sa
.
kas
.
items
():
signals
[
freq
+
"分型标记"
]
=
mark_convert
[
ka
.
fx
[
-
1
][
'fx_mark'
]]
signals
[
freq
+
"笔标记"
]
=
mark_convert
[
ka
.
bi
[
-
1
][
'fx_mark'
]]
signals
[
freq
+
"线段标记"
]
=
mark_convert
[
ka
.
xd
[
-
1
][
'fx_mark'
]]
signals
[
freq
+
"MACD金叉"
]
=
int
(
is_macd_cross
(
ka
,
direction
=
'up'
))
signals
[
freq
+
"MACD死叉"
]
=
int
(
is_macd_cross
(
ka
,
direction
=
'down'
))
print
(
signals
)
return
signals
def
trade_simulator
(
ts_code
,
end_date
,
start_date
,
asset
=
"E"
,
watch_interval
=
5
):
"""单只标的类实盘模拟,研究买卖点变化过程
:param ts_code: str
标的代码,如 300033.SZ
:param end_date: str
截止日期,如 2020-03-12
:param start_date: str
开始日期
:param asset: str
tushare 中的资产类型编码
:param watch_interval: int
看盘间隔,单位:分钟;默认值为 5分钟看盘一次
:return: None
"""
file_signals
=
os
.
path
.
join
(
data_path
,
"%s_%s_%s_signals.txt"
%
(
ts_code
,
start_date
,
end_date
))
end_date
=
datetime
.
strptime
(
end_date
.
replace
(
"-"
,
""
),
"%Y%m%d"
)
start_date
=
datetime
.
strptime
(
start_date
.
replace
(
"-"
,
""
),
"%Y%m%d"
)
while
start_date
<=
end_date
:
if
(
asset
in
[
"E"
,
"I"
])
and
(
not
is_trade_day
(
start_date
.
strftime
(
'%Y%m%d'
))):
start_date
+=
timedelta
(
days
=
1
)
continue
ks
=
kline_simulator
(
ts_code
,
trade_dt
=
start_date
.
strftime
(
'%Y-%m-%d'
),
asset
=
asset
,
count
=
1000
)
for
i
,
klines
in
enumerate
(
ks
.
__iter__
(),
1
):
if
i
%
watch_interval
!=
0
:
continue
sa
=
SolidAnalyze
(
klines
)
print
(
sa
.
kas
[
'1分钟'
].
end_dt
)
try
:
signals
=
get_data
(
sa
)
with
open
(
file_signals
,
'a'
,
encoding
=
'utf-8'
)
as
f
:
f
.
write
(
str
(
signals
)
+
"
\n
"
)
except
:
traceback
.
print_exc
()
start_date
+=
timedelta
(
days
=
1
)
def
make_one_day
(
ts_code
,
trade_date
,
asset
=
"E"
):
if
"-"
in
trade_date
:
end_date
=
datetime
.
strptime
(
trade_date
,
'%Y-%m-%d'
)
else
:
end_date
=
datetime
.
strptime
(
trade_date
,
'%Y-%m-%d'
)
start_date
=
end_date
-
timedelta
(
days
=
1
)
end_dt
=
end_date
+
timedelta
(
days
=
30
)
start_date
=
start_date
.
strftime
(
"%Y-%m-%d"
)
end_date
=
end_date
.
strftime
(
"%Y-%m-%d"
)
if
not
is_trade_day
(
start_date
):
return
print
(
f
"start trade simulator on
{
start_date
}
"
)
trade_simulator
(
ts_code
=
ts_code
,
start_date
=
start_date
,
end_date
=
end_date
,
asset
=
asset
,
watch_interval
=
1
)
for
freq
in
[
'1min'
,
'5min'
,
'30min'
]:
file_signals
=
os
.
path
.
join
(
data_path
,
f
"
{
ts_code
}
_
{
start_date
}
_
{
end_date
}
_signals.txt"
)
signals
=
[
eval
(
x
)
for
x
in
open
(
file_signals
,
encoding
=
'utf-8'
).
readlines
()]
df
=
pd
.
DataFrame
(
signals
)
kline
=
get_kline
(
ts_code
,
end_dt
=
end_dt
.
strftime
(
"%Y-%m-%d %H:%M:%S"
),
freq
=
freq
,
asset
=
asset
)
ka
=
KlineAnalyze
(
kline
)
print
(
kline
.
head
(),
"
\n\n
"
)
xd
=
deepcopy
(
ka
.
xd
)
xd
=
sorted
(
xd
,
key
=
lambda
row
:
row
[
'dt'
],
reverse
=
False
)
print
(
xd
,
"
\n\n
"
)
def
___xd_status
(
dt
):
for
x
in
xd
:
if
x
[
'dt'
]
>=
dt
:
if
x
[
'fx_mark'
]
==
'd'
:
s
=
"向下段"
elif
x
[
'fx_mark'
]
==
'g'
:
s
=
"向上段"
else
:
raise
ValueError
return
s
return
"o"
col
=
f
'
{
freq
}
线段状态'
df
[
col
]
=
df
[
'交易时间'
].
apply
(
___xd_status
)
file_excel
=
"./data/%s_%s_%s_%s.xlsx"
%
(
ts_code
,
start_date
,
end_date
,
freq
)
df
.
to_excel
(
file_excel
,
index
=
False
)
if
__name__
==
'__main__'
:
ts_code
=
"000001.SH"
start_date
=
"2019-08-01"
end_date
=
"2019-10-01"
asset
=
'I'
start_date
=
datetime
.
strptime
(
start_date
,
"%Y-%m-%d"
)
end_date
=
datetime
.
strptime
(
end_date
,
"%Y-%m-%d"
)
while
start_date
<
end_date
:
start_date
+=
timedelta
(
days
=
1
)
make_one_day
(
ts_code
,
start_date
.
strftime
(
"%Y-%m-%d"
),
asset
)
examples/xd_end/readme.md
已删除
100644 → 0
浏览文件 @
b204d7bd
# 线段当下结束的判断
基本思路:
*
1)仿真交易,获取判断线段结束需要的特征,并构建线段方向的分类数据集;
*
2)训练分类器,得到模型,实盘中,输入特征,得到线段方向。
**Note:**
仿真依赖
`cobra`
,执行
`pip install git+git://github.com/zengbin93/cobra.git -U`
进行安装
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录