Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
机器未来
1024程序员开源挑战赛
提交
33529055
1
1024程序员开源挑战赛
项目概览
机器未来
/
1024程序员开源挑战赛
与 Fork 源项目一致
Fork自
GitCode / 1024程序员开源挑战赛(10.23-11.14)
通知
2
Star
0
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1
列表
看板
标记
里程碑
合并请求
0
DevOps
流水线
流水线任务
计划
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
1
1024程序员开源挑战赛
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1
Issue
1
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
DevOps
DevOps
流水线
流水线任务
计划
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
流水线任务
提交
Issue看板
提交
33529055
编写于
11月 02, 2022
作者:
Z
zhoushimin
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
YaoLowCodeDemo
上级
3433b8a0
变更
3
隐藏空白更改
内联
并排
Showing
3 changed file
with
8 addition
and
730 deletion
+8
-730
demo-widget
demo-widget
+1
-0
fund_tools.py
fund_tools.py
+0
-730
start.sh
start.sh
+7
-0
未找到文件。
demo-widget
@
2489060e
Subproject commit 2489060e09630dff334e88b8e51e1efa1bfa1989
fund_tools.py
已删除
100644 → 0
浏览文件 @
3433b8a0
from
asyncio.windows_events
import
NULL
from
cProfile
import
label
import
akshare
as
ak
import
pandas
as
pd
import
numpy
as
np
from
matplotlib
import
pyplot
as
plt
from
matplotlib.pyplot
import
MultipleLocator
import
matplotlib.dates
as
mdate
import
requests
from
bs4
import
BeautifulSoup
import
datetime
import
time
import
math
import
os
import
re
import
random
from
decimal
import
Decimal
dir_cumulative_net_value_trend
=
'cumulative_net_value_trend'
dir_cumulative_return_trend
=
'cumulative_return_trend'
# 基金行业常用常量定义
# 无风险年化收益率%
risk_free_annual_return_ratio
=
0.0275
# 每年交易日天数
trading_days_per_year
=
250
# user_agent列表
user_agent_list
=
[
'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.1 (KHTML, like Gecko) Chrome/21.0.1180.71 Safari/537.1 LBBROWSER'
,
'Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; QQDownload 732; .NET4.0C; .NET4.0E)'
,
'Mozilla/5.0 (Windows NT 5.1) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.84 Safari/535.11 SE 2.X MetaSr 1.0'
,
'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Maxthon/4.4.3.4000 Chrome/30.0.1599.101 Safari/537.36'
,
'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/38.0.2125.122 UBrowser/4.0.3214.0 Safari/537.36'
]
# referer列表
referer_list
=
[
'http://fund.eastmoney.com/110022.html'
,
'http://fund.eastmoney.com/110023.html'
,
'http://fund.eastmoney.com/110024.html'
,
'http://fund.eastmoney.com/110025.html'
]
# 获取一个随机user_agent和Referer
header
=
{
'User-Agent'
:
random
.
choice
(
user_agent_list
),
'Referer'
:
random
.
choice
(
referer_list
)
}
# 获得字符串中的整数
def
get_decimal
(
x
):
return
''
.
join
(
re
.
findall
(
r
"\d+\.?\d*"
,
''
.
join
(
x
)))
# 获得指定后缀字符串中的整数
def
get_decimal_suffix
(
x
,
suffix
):
return
get_decimal
(
re
.
findall
(
r
"\d+\.?\d*"
+
suffix
,
''
.
join
(
x
)))
# 日志调试
def
log
(
x
):
file_handle
=
open
(
'log.txt'
,
mode
=
'a'
,
encoding
=
'utf-8'
)
file_handle
.
write
(
x
)
file_handle
.
close
()
# 获得基金概要数据
def
get_all_fund_outline
():
if
not
os
.
path
.
exists
(
'fund_em_fund_name_df.csv'
):
df1
=
ak
.
fund_name_em
()
df1
=
df1
[[
'基金代码'
,
'拼音缩写'
,
'基金类型'
]]
df2
=
ak
.
fund_open_fund_daily_em
()
df
=
pd
.
merge
(
df1
,
df2
,
on
=
'基金代码'
)
df
.
set_index
(
df
[
'基金代码'
])
df
.
to_csv
(
'fund_em_fund_name_df.csv'
,
encoding
=
'utf_8_sig'
,
index
=
None
)
print
(
df
)
else
:
df
=
pd
.
read_csv
(
'fund_em_fund_name_df.csv'
,
dtype
=
object
)
return
df
#获得公墓基金每日净值
def
get_all_open_fund_daily
():
if
not
os
.
path
.
exists
(
'open_fund_daily.csv'
):
fund_em_fund_name_df
=
ak
.
fund_open_fund_daily_em
()
fund_em_fund_name_df
.
set_index
(
fund_em_fund_name_df
[
'基金代码'
])
fund_em_fund_name_df
.
to_csv
(
'fund_em_open_fund_daily.csv'
,
encoding
=
'utf_8_sig'
,
index
=
None
)
else
:
fund_em_fund_name_df
=
pd
.
read_csv
(
'fund_em_open_fund_daily.csv'
,
dtype
=
object
)
return
fund_em_fund_name_df
# 根据关键字过滤基金列表
def
query_fund_by_fundname_keyword
(
df
,
fundname_keyword
):
# if not os.path.exists(fundname_keyword + '.csv'):
df_query
=
df
[
df
[
'基金简称'
].
str
.
contains
(
fundname_keyword
)]
df_query
.
set_index
(
df_query
[
'基金代码'
])
df_query
.
to_csv
(
fundname_keyword
+
'.csv'
,
encoding
=
'utf_8_sig'
,
index
=
None
);
# else:
# df_query = pd.read_csv(fundname_keyword + '.csv', dtype=object)
return
df_query
# 获得分红数据
def
get_fund_dividend_by_id
(
dir
,
fund_id
):
filename
=
dir
+
'/%s'
%
(
fund_id
)
+
'-dividend.csv'
try
:
if
not
os
.
path
.
exists
(
filename
):
df
=
ak
.
fund_open_fund_info_em
(
fund
=
fund_id
,
indicator
=
"分红送配详情"
)
print
(
df
)
for
i
in
np
.
arange
(
0
,
df
.
shape
[
0
]):
df
.
iloc
[
i
][
'每份分红'
]
=
get_decimal
(
df
.
iloc
[
i
][
'每份分红'
])
df
.
to_csv
(
filename
,
encoding
=
'utf_8_sig'
,
index
=
None
);
else
:
df
=
pd
.
read_csv
(
filename
,
dtype
=
object
)
return
df
except
:
return
NULL
# 获得最近一次分红
def
query_last_dividend_before_date
(
df
,
cur_date
):
try
:
dividend
=
'0.0'
# 查询最近一次的分红数据,满足条件反馈分红
for
i
in
np
.
arange
(
0
,
df
.
shape
[
0
]):
if
str
(
cur_date
)
>
df
.
iloc
[
df
.
shape
[
0
]
-
1
-
i
][
'分红发放日'
]:
# print('Index:', df.shape[0] - 1 - i)
dividend
=
df
.
iloc
[
df
.
shape
[
0
]
-
1
-
i
][
'每份分红'
]
# print("日期:%s"%cur_date, "分红%s"%dividend)
return
dividend
except
:
return
'0.0'
# 获得指定基金历史数据
def
get_fund_his_by_id
(
fund_id
,
dir
):
filename
=
dir
+
'/%s'
%
(
fund_id
)
+
'.csv'
# print(filename)
try
:
if
not
os
.
path
.
exists
(
filename
):
df_total_net_value_trend
=
ak
.
fund_open_fund_info_em
(
fund
=
fund_id
,
indicator
=
'累计净值走势'
)
df_unit_net_value_trend
=
ak
.
fund_open_fund_info_em
(
fund
=
fund_id
,
indicator
=
"单位净值走势"
)
fund_em_info_df
=
pd
.
merge
(
df_total_net_value_trend
,
df_unit_net_value_trend
,
on
=
'净值日期'
)
fund_em_info_df
.
set_index
(
fund_em_info_df
[
'净值日期'
])
if
not
isinstance
(
fund_em_info_df
[
'日增长率'
],
float
):
fund_em_info_df
[
'日增长率'
]
=
fund_em_info_df
[
'日增长率'
].
astype
(
float
)
df_dividend
=
get_fund_dividend_by_id
(
dir
,
fund_id
)
# 复权列表
red_net_value_list
=
[]
last_red_net_value
=
0.0
# print(fund_em_info_df)
# print(f"fund_em_info_df.shape:{fund_em_info_df.shape}")
# 复权净值计算
for
i
in
np
.
arange
(
0
,
fund_em_info_df
.
shape
[
0
]):
dividend
=
float
(
query_last_dividend_before_date
(
df_dividend
,
fund_em_info_df
.
iloc
[
i
][
'净值日期'
]))
# print(dividend)
if
dividend
>
0.0
:
try
:
last_red_net_value
=
float
(
last_red_net_value
*
(
1
+
fund_em_info_df
.
iloc
[
i
][
'日增长率'
]
/
100
))
red_net_value_list
.
append
(
last_red_net_value
)
# print('date:',fund_em_info_df.iloc[i]['净值日期'], '单位净值:', fund_em_info_df.iloc[i]['单位净值'], \
# '日增长率:', fund_em_info_df.iloc[i]['日增长率'], '累计净值', fund_em_info_df.iloc[i]['累计净值'])
except
:
print
(
fund_id
,
'日增长率数据异常:'
,
fund_em_info_df
.
iloc
[
i
][
'日增长率'
])
return
''
else
:
try
:
last_red_net_value
=
float
(
fund_em_info_df
.
iloc
[
i
][
'累计净值'
])
red_net_value_list
.
append
(
last_red_net_value
)
except
:
print
(
fund_id
,
'累计净值数据异常:'
,
fund_em_info_df
.
iloc
[
i
][
'累计净值'
])
return
''
fund_em_info_df
[
'复权净值'
]
=
red_net_value_list
# print(fund_em_info_df)
fund_em_info_df
.
to_csv
(
filename
,
encoding
=
'utf_8_sig'
,
index
=
None
)
else
:
fund_em_info_df
=
pd
.
read_csv
(
filename
,
dtype
=
object
)
fund_em_info_df
.
set_index
(
fund_em_info_df
[
'净值日期'
])
except
Exception
as
e
:
print
(
f
"错误信息:
{
e
}
"
)
return
fund_em_info_df
# 绘制基金净值走势
def
draw_cumulative_net_value_trend
(
x
,
y
,
y_hushen300
):
# plt.style.use('dark_background')
plt
.
rcParams
[
"font.sans-serif"
]
=
[
"SimHei"
]
#设置字体
plt
.
rcParams
[
"axes.unicode_minus"
]
=
False
#该语句解决图像中的“-”负号的乱码问题
fig
,
ax
=
plt
.
subplots
()
# fig.subplots_adjust(bottom=0.2)
# y_major_locator = MultipleLocator(0.1)
# # 设置X轴刻度为日期时间
# ax.xaxis.set_major_formatter(mdate.DateFormatter('%Y-%m-%d')) # 设置时间标签显示格式
# ax.yaxis.set_major_locator(y_major_locator)
# ax.xaxis_date()
# X轴刻度文字倾斜45度
# plt.xticks(rotation=45)
ax
.
set_title
(
"基金走势图"
)
ax
.
set_xlabel
(
"时间"
)
ax
.
set_ylabel
(
"基金累计净值"
)
ax
.
plot
(
x
,
y
,
color
=
'r'
,
label
=
"自选组合"
)
ax
.
plot
(
x
,
y_hushen300
,
color
=
'b'
,
label
=
"沪深300-110020"
)
ax
.
legend
()
plt
.
show
()
# 计算基金成立时间
def
calc_fund_established_time
(
df
):
# print(df.dtypes)
# print(df.iloc[-1, 0], df.iloc[0, 0], df.iloc[-1, 0]-df.iloc[0, 0])
established_time
=
(
df
.
iloc
[
-
1
,
0
]
-
df
.
iloc
[
0
,
0
]).
days
/
365
return
established_time
# 计算基金最大回撤
def
calc_fund_drawdown
(
fund_df
,
start
=
'1970-01-01'
,
end
=
'2200-01-01'
):
df
=
fund_df
[[
'净值日期'
,
'复权净值'
]].
copy
()
df
[
'净值日期'
]
=
pd
.
to_datetime
(
df
[
'净值日期'
])
st
=
df
[
'净值日期'
]
>=
start
et
=
df
[
'净值日期'
]
<=
end
res
=
st
==
et
df
=
df
[
res
]
df
[
'max2here'
]
=
df
[
'复权净值'
].
expanding
().
max
()
df
[
'dd2here'
]
=
df
[
'复权净值'
]
/
df
[
'max2here'
]
end_date
,
remains
=
tuple
(
df
.
sort_values
(
by
=
[
'dd2here'
]).
iloc
[
0
][[
'净值日期'
,
'dd2here'
]])
# 计算最大回撤开始时间
start_date
=
df
[
df
[
'净值日期'
]
<=
end_date
].
sort_values
(
by
=
'复权净值'
,
ascending
=
False
).
iloc
[
0
][
'净值日期'
]
drawdown
=
round
((
1
-
remains
)
*
100
,
2
)
print
(
'最大回撤 (%):'
,
drawdown
)
print
(
'最大回撤开始时间:'
,
start_date
)
print
(
'最大回撤结束时间:'
,
end_date
)
return
drawdown
,
start_date
,
end_date
# 计算基金年华回报率
def
cal_fund_annual_return
(
fund_df
,
start
=
'1970-01-01'
,
end
=
'2200-01-01'
):
df
=
fund_df
[[
'净值日期'
,
'复权净值'
]].
copy
()
# df['净值日期'] = pd.to_datetime(df['净值日期'])
#
# df['日收益'] = df['累计净值'] - df['累计净值'].shift(1)
# df['日收益率'] = (df['累计净值'] - df['累计净值'].shift(1))/df['累计净值'].shift(1)
st
=
df
[
'净值日期'
]
>=
start
et
=
df
[
'净值日期'
]
<=
end
res
=
st
==
et
df
=
df
[
res
]
# df = df[df['净值日期'] >= '2020-01-09']
# 计算累计收益率
# df['累计收益率'] = df['日收益率'].expanding().sum()
total_return
=
round
(((
df
.
iloc
[
-
1
,
1
]
/
df
.
iloc
[
0
,
1
])
-
1
)
*
100
,
2
)
# df = df.fillna(0.0)
# print(df)
years
=
round
(
calc_fund_established_time
(
df
),
2
)
# print('分析周期:%f years'%years)
# 计算
# print("最新累计收益率:%f, "%df.iloc[-1, 4] + '初始累计收益率:%f'%df.iloc[0, 4])
if
years
>=
1
:
annualized_returns
=
(
df
.
iloc
[
-
1
,
1
]
/
df
.
iloc
[
0
,
1
])
**
(
1
/
years
)
-
1
else
:
annualized_returns
=
(
df
.
iloc
[
-
1
,
1
]
/
df
.
iloc
[
0
,
1
])
-
1
annualized_returns
=
annualized_returns
*
100
print
(
'annualized_returns:%f'
%
annualized_returns
+
'%'
)
return
annualized_returns
,
total_return
;
# 计算基金夏普比率
def
cal_fund_sharpe_ratio
(
fund_df
,
withdrawal
,
start
=
'1970-01-01'
,
end
=
'2200-01-01'
):
df
=
fund_df
[[
'净值日期'
,
'日增长率'
]].
copy
()
st
=
df
[
'净值日期'
]
>=
start
et
=
df
[
'净值日期'
]
<=
end
res
=
st
==
et
df
=
df
[
res
]
# print(df)
# 计算日收益率的均值
daily_return_ratio_average
=
np
.
mean
(
df
[
'日增长率'
].
astype
(
float
))
*
100
# print('daily_return_ratio_average:%f'%daily_return_ratio_average)
# 计算收益率的标准方差
return_ratio_std
=
df
[
'日增长率'
].
astype
(
float
).
std
()
# 计算无风险日收益率
daily_risk_free_return_ratio
=
(((
1
+
risk_free_annual_return_ratio
)
**
(
1
/
365
))
-
1
)
*
100
# print('daily_risk_free_return_ratio:%f' % daily_risk_free_return_ratio)
# 计算夏普比率
try
:
fund_sharpe_ratio
=
(
daily_return_ratio_average
-
daily_risk_free_return_ratio
)
/
return_ratio_std
*
math
.
sqrt
(
trading_days_per_year
)
/
100
except
:
fund_sharpe_ratio
=
0.0
# 计算卡玛比率
try
:
fund_calmar_ratio
=
(
daily_return_ratio_average
-
daily_risk_free_return_ratio
)
/
withdrawal
*
100
except
:
fund_calmar_ratio
=
0.0
print
(
'sharpe_ratio:%f'
%
fund_sharpe_ratio
+
', calmar_ratio:%f'
%
fund_calmar_ratio
)
return
fund_sharpe_ratio
,
fund_calmar_ratio
# 计算基金波动
def
calc_fund_volatility
(
df_fund
,
start
=
'1970-01-01'
,
end
=
'2200-01-01'
):
# difflntotal = []
df
=
df_fund
[[
'净值日期'
,
'复权净值'
,
'上一交易日复权净值'
]].
copy
()
# df['上一次累计净值'] = df['累计净值'].shift(1)
st
=
df
[
'净值日期'
]
>=
start
et
=
df
[
'净值日期'
]
<=
end
res
=
st
==
et
df
=
df
[
res
]
# days = df.shape[0]
# print(days)
df
[
'价格自然对数差'
]
=
np
.
log
(
df
[
'复权净值'
])
-
np
.
log
(
df
[
'上一交易日复权净值'
])
volitality
=
np
.
std
(
df
[
'价格自然对数差'
])
*
100
*
math
.
sqrt
(
trading_days_per_year
)
# 与前一种计算结果一致
# df['价格比率对数'] = np.log(df['累计净值'] / df['上一次累计净值'])
# volitality = np.std(df['价格比率对数'])* 100 * math.sqrt(trading_days_per_year)
print
(
'volitality:%f'
%
volitality
)
return
volitality
# 计算基金kpi
def
calc_fund_kpi
(
fund_id
,
df_cumulative_net_value_trend
,
start
=
'1970-01-01'
,
end
=
'2200-01-01'
):
dict_select
=
{
'code'
:
fund_id
,
# 基金代码
'years'
:
''
,
# 成立时间
'withdrawal'
:
''
,
# 最大回撤
'annual_return'
:
''
,
# 年化收益率
'total_return'
:
''
,
# 累计收益率
'sharp'
:
''
,
# 夏普比率
'calmar'
:
''
,
# 卡玛比率
'volatility'
:
''
,
# 波动率
}
df
=
df_cumulative_net_value_trend
[[
'净值日期'
,
'复权净值'
,
'日增长率'
]].
copy
()
# 生成上一个交易日累计净值
df
[
'上一交易日复权净值'
]
=
df
[
'复权净值'
].
shift
(
1
)
# 数据格式转换
df
[
'净值日期'
]
=
pd
.
to_datetime
(
df
[
'净值日期'
])
df
[
'复权净值'
]
=
df
[
'复权净值'
].
astype
(
float
)
df
[
'上一交易日复权净值'
]
=
df
[
'上一交易日复权净值'
].
astype
(
float
)
# # 计算日收益和日收益率
# df['日收益'] = df['累计净值'] - df['上一交易日累计净值']
# df['日收益率'] = (df['累计净值'] - df['上一交易日累计净值'])/df['上一交易日累计净值']
# x = df['净值日期']
# y = df['累计净值']
#
# # 绘制基金累计净值走势图
# draw_cumulative_net_value_trend(x, y)
# 计算成立时间
years
=
calc_fund_established_time
(
df
)
dict_select
[
'years'
]
=
round
(
years
,
2
)
print
(
'基金[%s'
%
fund_id
+
']成立时间:%d'
%
(
int
(
years
))
+
'年%d'
%
((
years
-
int
(
years
))
*
365
)
+
'天'
)
# 计算最大回撤
withdrawal
,
start_date
,
end_date
=
calc_fund_drawdown
(
df
,
start
,
end
)
dict_select
[
'withdrawal'
]
=
round
(
withdrawal
,
2
)
# 计算年化收益
annual_return
,
total_return
=
cal_fund_annual_return
(
df
,
start
,
end
)
dict_select
[
'annual_return'
]
=
round
(
annual_return
,
2
)
dict_select
[
'total_return'
]
=
round
(
total_return
,
2
)
# 计算夏普比率
sharpe_ratio
,
calmar_ratio
=
cal_fund_sharpe_ratio
(
df
,
withdrawal
,
start
,
end
)
dict_select
[
'sharp'
]
=
round
(
sharpe_ratio
,
2
)
dict_select
[
'calmar'
]
=
round
(
calmar_ratio
,
4
)
# 计算波动率
volatility
=
calc_fund_volatility
(
df
,
start
,
end
)
dict_select
[
'volatility'
]
=
round
(
volatility
,
2
)
print
(
dict_select
)
return
dict_select
# user_agent列表
user_agent_list
=
[
'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.1 (KHTML, like Gecko) Chrome/21.0.1180.71 Safari/537.1 LBBROWSER'
,
'Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; QQDownload 732; .NET4.0C; .NET4.0E)'
,
'Mozilla/5.0 (Windows NT 5.1) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.84 Safari/535.11 SE 2.X MetaSr 1.0'
,
'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Maxthon/4.4.3.4000 Chrome/30.0.1599.101 Safari/537.36'
,
'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/38.0.2125.122 UBrowser/4.0.3214.0 Safari/537.36'
]
# referer列表
referer_list
=
[
'http://fund.eastmoney.com/110022.html'
,
'http://fund.eastmoney.com/110023.html'
,
'http://fund.eastmoney.com/'
,
'http://fund.eastmoney.com/110025.html'
]
# 爬取网页基金数据
def
get_url
(
baseUrl
):
# 获取一个随机user_agent和Referer
headers
=
{
'User-Agent'
:
random
.
choice
(
user_agent_list
),
'Referer'
:
random
.
choice
(
referer_list
)}
try
:
resp
=
requests
.
get
(
baseUrl
,
headers
=
headers
)
# print(resp.status_code)
if
resp
.
status_code
==
200
:
# print(resp.content)
return
resp
.
text
print
(
"没有爬取到相应的内容"
)
return
None
except
:
print
(
"没有爬取到相应的内容"
)
return
None
# 获得基金费用
def
get_fund_fee
(
code
):
dict_select
=
{
'基金代码'
:
code
,
# 基金代码
'scale'
:
''
,
# 基金规模
'm_fee'
:
''
,
# 管理费
'c_fee'
:
''
,
# 托管费
'sale_fee'
:
''
,
# 销售费用
'sub_fee'
:
''
,
# 申购费<50W
# 'buy_fee_less_than_100': '', # 申购费<100W
# 'buy_fee_more_than_100': '', # 申购费>=100W
}
url
=
'http://fundf10.eastmoney.com/jjfl_%s'
%
code
+
'.html'
html
=
get_url
(
url
)
soup
=
BeautifulSoup
(
html
,
'html.parser'
)
# log(soup.prettify())
target
=
''
for
co
in
soup
.
find_all
([
'span'
,
'td'
,
'span'
,
'label'
]):
# log('target%s' % target)
log
(
'co.text:%s'
%
co
.
text
)
if
target
!=
''
:
# '''dict_select[target] = co.text.encode('utf-8')'''
dict_select
[
target
]
=
co
.
text
if
target
==
'sale_fee'
:
break
target
=
''
if
co
.
text
.
find
(
u
'资产规模'
)
>=
0
:
target
=
'scale'
elif
co
.
text
==
u
'管理费率'
:
target
=
'm_fee'
elif
co
.
text
==
u
'托管费率'
:
target
=
'c_fee'
elif
co
.
text
==
u
'销售服务费率'
:
target
=
'sale_fee'
# print(dict_select)
try
:
dict_select
[
'scale'
]
=
float
(
get_decimal_suffix
(
dict_select
[
'scale'
],
'亿元'
))
if
dict_select
[
'scale'
]
==
''
:
dict_select
[
'scale'
]
=
0.0
except
:
dict_select
[
'scale'
]
=
0.0
try
:
dict_select
[
'm_fee'
]
=
float
(
get_decimal
(
dict_select
[
'm_fee'
]))
if
dict_select
[
'm_fee'
]
==
''
:
dict_select
[
'm_fee'
]
=
0.0
except
:
dict_select
[
'm_fee'
]
=
0.0
try
:
dict_select
[
'c_fee'
]
=
float
(
get_decimal
(
dict_select
[
'c_fee'
]))
if
dict_select
[
'c_fee'
]
==
''
:
dict_select
[
'c_fee'
]
=
0.0
except
:
dict_select
[
'c_fee'
]
=
0.0
try
:
dict_select
[
'sale_fee'
]
=
float
(
get_decimal
(
dict_select
[
'sale_fee'
]))
if
dict_select
[
'sale_fee'
]
==
''
:
dict_select
[
'sale_fee'
]
=
0.0
except
:
dict_select
[
'sale_fee'
]
=
0.0
print
(
dict_select
)
return
dict_select
# 获得基金费用列表
def
get_fund_fee_list
(
df
):
err_count
=
0
fee_list
=
[]
filename
=
'fund_fee_list.csv'
filename_bak
=
'fund_fee_list-bak.csv'
# print(filename)
if
not
os
.
path
.
exists
(
filename
):
for
i
in
np
.
arange
(
0
,
df
.
shape
[
0
]):
# try:
print
(
'基金编码[%s'
%
(
df
.
iloc
[
i
][
'基金代码'
])
+
']正在读取费用...'
)
dict_item
=
get_fund_fee
(
df
.
iloc
[
i
][
'基金代码'
])
dict_item
[
'sub_fee'
]
=
df
.
iloc
[
i
][
'手续费'
]
fee_list
.
append
(
dict_item
)
# except:
# err_count = err_count + 1
# print('基金编码[%s' % (df.iloc[i]['基金代码']) + ']读取费用失败...')
time
.
sleep
(
1
)
df_fee
=
pd
.
DataFrame
(
fee_list
)
df_fee
.
to_csv
(
filename
,
encoding
=
'utf_8_sig'
,
index
=
None
)
print
(
'读取费率失败次数:%d'
%
err_count
)
else
:
df_fee
=
pd
.
read_csv
(
filename
,
dtype
=
object
)
# for i in np.arange(0, df.shape[0]):
# try:
# print(df_fee.iloc[i]['基金代码'], df_fee.iloc[i]['sub_fee'])
# df_fee.iloc[i]['sub_fee'] = get_decimal(df_fee.iloc[i]['sub_fee'])
# except:
# df_fee.iloc[i]['sub_fee'] = 0.0
#
# df_fee.to_csv(filename_bak, encoding='utf_8_sig', index=None)
return
df_fee
# 计算基金rank
def
get_fund_rank
(
df_in
,
keywords
,
max_withdrawal
,
establish_year
=
3.0
,
start
=
'1970-01-01'
,
end
=
'2200-01-01'
):
fund_kpi_list
=
[]
# 根据关键字检索
df_dist
=
query_fund_by_fundname_keyword
(
df_in
,
keywords
)
df_dist
=
df_dist
[
df_dist
[
'基金类型'
].
str
.
contains
(
'场内'
)
==
False
]
for
i
in
np
.
arange
(
0
,
df_dist
.
shape
[
0
]):
try
:
print
(
'基金编码[%s'
%
(
df_dist
.
iloc
[
i
][
'基金代码'
])
+
']正在读取...'
)
# 爬取历史净值
df_cumulative_net_value_trend
=
get_fund_his_by_id
(
df_dist
.
iloc
[
i
][
'基金代码'
],
dir_cumulative_net_value_trend
)
print
(
'历史数据已加载...'
)
if
(
keywords
!=
'债'
)
or
(
keywords
==
'债'
and
df_cumulative_net_value_trend
[
'日增长率'
].
astype
(
float
).
max
(
skipna
=
True
)
<=
2.0
):
# 计算基金KPI
dict_kpi
=
calc_fund_kpi
(
df_dist
.
iloc
[
i
][
'基金代码'
],
df_cumulative_net_value_trend
,
start
,
end
)
dict_kpi
[
'code'
]
=
df_dist
.
iloc
[
i
][
'基金代码'
]
dict_kpi
[
'name'
]
=
df_dist
.
iloc
[
i
][
'基金简称'
]
dict_kpi
[
'type'
]
=
df_dist
.
iloc
[
i
][
'基金类型'
]
print
(
'KPI计算已完成...'
)
# 爬取费用
dict_kpi
[
'scale'
]
=
df_dist
.
iloc
[
i
][
'scale'
]
dict_kpi
[
'm_fee'
]
=
df_dist
.
iloc
[
i
][
'm_fee'
]
dict_kpi
[
'c_fee'
]
=
df_dist
.
iloc
[
i
][
'c_fee'
]
dict_kpi
[
'sale_fee'
]
=
df_dist
.
iloc
[
i
][
'sale_fee'
]
dict_kpi
[
'sub_fee'
]
=
df_dist
.
iloc
[
i
][
'sub_fee'
]
# print(dict_kpi)
fund_kpi_list
.
append
(
dict_kpi
)
# time.sleep(1)
except
Exception
as
e
:
# 未知异常的捕获
print
(
f
"异常信息:
{
e
}
"
)
print
(
'基金编码[%s'
%
(
df_dist
.
iloc
[
i
][
'基金代码'
])
+
']读取失败'
)
# # time.sleep(1)
# print(fund_kpi_list)
df_kpi
=
pd
.
DataFrame
(
fund_kpi_list
)
print
(
df_kpi
)
# 删除异常数据行
# df_kpi.dropna(subset=['withdrawal'], inplace=True)
# 对数据筛选
df_kpi
=
df_kpi
[
df_kpi
[
'years'
].
astype
(
float
)
>=
establish_year
]
df_kpi
=
df_kpi
[
df_kpi
[
'withdrawal'
].
astype
(
float
)
<=
max_withdrawal
]
try
:
df_kpi
[
'total_fee'
]
=
df_kpi
[
'sub_fee'
].
astype
(
float
)
+
df_kpi
[
'm_fee'
].
astype
(
float
)
+
df_kpi
[
'c_fee'
].
astype
(
float
)
+
df_kpi
[
'sale_fee'
].
astype
(
float
)
except
Exception
as
e
:
# 未知异常的捕获
print
(
f
"异常信息:
{
e
}
"
)
df_kpi
[
'total_fee'
]
=
0
print
(
'[%s'
%
keywords
+
']计算累计费用失败,跳过!'
)
# 对数据排序, 'sharp', 'total_fee', 'annual_return'
df_kpi
.
sort_values
(
by
=
[
'sharp'
,
'withdrawal'
,
'calmar'
,
'total_fee'
],
ascending
=
[
False
,
False
,
False
,
True
],
inplace
=
True
)
# print(df_kpi.dtypes)
# df_kpi.to_csv('fund_kpi.csv')
df_kpi
.
to_excel
(
'fund_rank-%s'
%
keywords
+
'.xlsx'
)
print
(
df_kpi
)
return
df_kpi
# 计算基金组合预测数据
def
calc_fund_comb_kpi
(
fund_comb
,
start
=
'1970-01-01'
,
end
=
'2200-01-01'
):
df_comb
=
pd
.
DataFrame
(
columns
=
[
'净值日期'
,
'复权净值'
,
'复权净值临时'
,
'日增长率'
])
for
i
in
np
.
arange
(
0
,
fund_comb
.
shape
[
0
]):
# 爬取历史净值
df
=
get_fund_his_by_id
(
fund_comb
.
iloc
[
i
][
'code'
],
dir_cumulative_net_value_trend
)
# df.dropna(axis=0, inplace=True)
# df.fillna(method='pad',axis=0, inplace=True)
# 过滤时间
st
=
df
[
'净值日期'
]
>=
start
et
=
df
[
'净值日期'
]
<=
end
res
=
st
==
et
df
=
df
[
res
]
if
df_comb
.
empty
:
df_comb
[
'净值日期'
]
=
df
[
'净值日期'
]
df_comb
.
set_index
(
'净值日期'
,
inplace
=
True
)
df
.
set_index
(
'净值日期'
,
inplace
=
True
)
# print(df)
print
(
fund_comb
.
iloc
[
i
][
'share'
])
# if pd.isnull(df_comb.iloc[0]['复权净值']):
if
df_comb
[
'复权净值'
].
isnull
().
all
():
df_comb
[
'复权净值临时'
]
=
0.0
else
:
df_comb
.
loc
[:,
'复权净值临时'
]
=
df_comb
.
loc
[:,
'复权净值'
]
df_comb
.
loc
[:,
'复权净值'
]
=
df
.
loc
[:,
'复权净值'
].
astype
(
float
)
*
fund_comb
.
iloc
[
i
][
'share'
]
# print(type(df_comb.loc[:, '复权净值']), type(df_comb.loc[:, '复权净值临时']))
df_comb
.
loc
[:,
'复权净值'
]
=
df_comb
.
loc
[:,
'复权净值'
].
astype
(
float
).
add
(
df_comb
.
loc
[:,
'复权净值临时'
])
df_comb
.
fillna
(
method
=
'pad'
,
axis
=
0
,
inplace
=
True
)
# 计算日增长率
df_comb
[
'日增长率'
]
=
(
df_comb
[
'复权净值'
]
-
df_comb
[
'复权净值'
].
shift
(
1
))
/
df_comb
[
'复权净值'
].
shift
(
1
)
# print(df_comb)
return
df_comb
if
__name__
==
"__main__"
:
pd
.
set_option
(
'display.max_rows'
,
1000
)
pd
.
set_option
(
'display.max_columns'
,
10
)
# get_fund_his_by_id('000051', dir_cumulative_net_value_trend)
# 获得公募基金列表
df
=
get_all_fund_outline
()
# df = get_all_open_fund_daily()
df
=
df
[
df
[
'申购状态'
]
==
'开放申购'
]
# 加载费用
df_fee
=
get_fund_fee_list
(
df
)
df_fee
=
df_fee
[[
'基金代码'
,
'scale'
,
'm_fee'
,
'c_fee'
,
'sale_fee'
,
'sub_fee'
]].
copy
()
# get_fund_fee('000051')
df
=
pd
.
merge
(
df
,
df_fee
,
on
=
'基金代码'
)
df
.
set_index
(
df
[
'基金代码'
])
# 分红测试代码
# df_dividend = get_fund_dividend_by_id('002898')
# df_dividend = query_last_dividend_before_date(dir_cumulative_net_value_trend, '002898', '2021-01-15')
# print(type(df_dividend))
# print(df_dividend)
df_kpi_csi300
=
get_fund_rank
(
df
,
'沪深300'
,
60.0
,
5
,
'2018-01-01'
,
'2022-10-30'
)
df_kpi_csi500
=
get_fund_rank
(
df
,
'中证500'
,
50.0
,
5
,
'2018-01-01'
,
'2022-10-30'
)
df_kpi_gem
=
get_fund_rank
(
df
,
'创业板'
,
50.0
,
5
,
'2018-01-01'
,
'2022-10-30'
)
df_kpi_gold
=
get_fund_rank
(
df
,
'黄金'
,
50.0
,
5
,
'2018-01-01'
,
'2022-10-30'
)
df_kpi_bond
=
get_fund_rank
(
df
,
'债'
,
30.0
,
5
,
'2018-01-01'
,
'2022-10-30'
)
df_kpi_sp500
=
get_fund_rank
(
df
,
'标普500'
,
50.0
,
5
,
'2018-01-01'
,
'2022-10-30'
)
df_kpi_nasda
=
get_fund_rank
(
df
,
'纳斯达克'
,
50.0
,
5
,
'2018-01-01'
,
'2022-10-30'
)
#
# # 行业基金
# get_fund_rank(df, '新能源', 50.0, 3, '2018-01-01', '2021-02-18')
# get_fund_rank(df, '消费', 50.0, 3, '2018-01-01', '2021-02-18')
# get_fund_rank(df, '商品', 50.0, 3, '2018-01-01', '2021-02-18')
# get_fund_rank(df, '混合', 50.0, 3, '2018-01-01', '2021-03-28')
print
(
f
"基金组合:
{
df_kpi_csi300
.
iloc
[
0
][
'code'
]
}
:0.25,
\n
\
{
df_kpi_csi500
.
iloc
[
0
][
'code'
]
}
:0.15,
\n
\
{
df_kpi_gem
.
iloc
[
0
][
'code'
]
}
: 0.10,
\n
\
{
df_kpi_gold
.
iloc
[
0
][
'code'
]
}
: 0.10,
\n
\
{
df_kpi_bond
.
iloc
[
0
][
'code'
]
}
: 0.20],
\n
\
{
df_kpi_sp500
.
iloc
[
0
][
'code'
]
}
: 0.10],
\n\
{
df_kpi_nasda
.
iloc
[
0
][
'code'
]
}
: 0.10]"
)
# # 组合数据
# fund_comb = pd.DataFrame([['673100', 0.50], ['001879', 0.15], ['000187', 0.20], ['000218', 0.15]], columns=['code', 'share'])
fund_comb
=
pd
.
DataFrame
([
[
df_kpi_csi300
.
iloc
[
0
][
'code'
],
0.25
],
[
df_kpi_csi500
.
iloc
[
0
][
'code'
],
0.15
],
[
df_kpi_gem
.
iloc
[
0
][
'code'
],
0.10
],
[
df_kpi_gold
.
iloc
[
0
][
'code'
],
0.10
],
[
df_kpi_bond
.
iloc
[
0
][
'code'
],
0.20
],
[
df_kpi_sp500
.
iloc
[
0
][
'code'
],
0.10
],
[
df_kpi_nasda
.
iloc
[
0
][
'code'
],
0.10
]
],
columns
=
[
'code'
,
'share'
])
df_comb
=
calc_fund_comb_kpi
(
fund_comb
,
'2017-06-02'
,
'2022-10-30'
)
df_comb
.
reset_index
(
inplace
=
True
)
x
=
df_comb
[
'净值日期'
]
y
=
df_comb
[
'复权净值'
]
fund_hs300
=
pd
.
DataFrame
([[
'000512'
,
1.00
]],
columns
=
[
'code'
,
'share'
])
df_hs300
=
calc_fund_comb_kpi
(
fund_hs300
,
'2017-06-02'
,
'2022-10-30'
)
df_hs300
.
reset_index
(
inplace
=
True
)
df_hs300
.
to_csv
(
"参考沪深300.csv"
,
encoding
=
'utf_8_sig'
,
index
=
None
)
y_hs300
=
df_hs300
[
'复权净值'
]
# 绘制基金累计净值走势图
draw_cumulative_net_value_trend
(
x
,
y
,
y_hs300
)
start.sh
0 → 100755
浏览文件 @
33529055
cd
demo-widget
# Create tables & set menu
yao migrate
--reset
yao run flows.setmenu
yao start
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录