From 3433b8a007e3ffc811afb5fcee353f58cc99198f Mon Sep 17 00:00:00 2001 From: RobotsFutures Date: Mon, 31 Oct 2022 01:08:42 +0800 Subject: [PATCH] fund analysis tool v0.1 --- fund_tools.py | 730 ++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 730 insertions(+) create mode 100644 fund_tools.py diff --git a/fund_tools.py b/fund_tools.py new file mode 100644 index 0000000..28123f9 --- /dev/null +++ b/fund_tools.py @@ -0,0 +1,730 @@ +from asyncio.windows_events import NULL +from cProfile import label +import akshare as ak +import pandas as pd +import numpy as np +from matplotlib import pyplot as plt +from matplotlib.pyplot import MultipleLocator +import matplotlib.dates as mdate +import requests +from bs4 import BeautifulSoup +import datetime +import time +import math +import os +import re +import random +from decimal import Decimal + +dir_cumulative_net_value_trend = 'cumulative_net_value_trend' +dir_cumulative_return_trend = 'cumulative_return_trend' + +# 基金行业常用常量定义 +# 无风险年化收益率% +risk_free_annual_return_ratio = 0.0275 +# 每年交易日天数 +trading_days_per_year = 250 + +# user_agent列表 +user_agent_list = [ + 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.1 (KHTML, like Gecko) Chrome/21.0.1180.71 Safari/537.1 LBBROWSER', + 'Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; QQDownload 732; .NET4.0C; .NET4.0E)', + 'Mozilla/5.0 (Windows NT 5.1) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.84 Safari/535.11 SE 2.X MetaSr 1.0', + 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Maxthon/4.4.3.4000 Chrome/30.0.1599.101 Safari/537.36', + 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/38.0.2125.122 UBrowser/4.0.3214.0 Safari/537.36' +] + +# referer列表 +referer_list = [ + 'http://fund.eastmoney.com/110022.html', + 'http://fund.eastmoney.com/110023.html', + 'http://fund.eastmoney.com/110024.html', + 'http://fund.eastmoney.com/110025.html' +] + +# 获取一个随机user_agent和Referer +header = {'User-Agent': random.choice(user_agent_list), + 'Referer': random.choice(referer_list) +} + +# 获得字符串中的整数 +def get_decimal(x): + return ''.join(re.findall(r"\d+\.?\d*", ''.join(x))) + +# 获得指定后缀字符串中的整数 +def get_decimal_suffix(x, suffix): + return get_decimal(re.findall(r"\d+\.?\d*"+suffix, ''.join(x))) + +# 日志调试 +def log(x): + file_handle = open('log.txt', mode='a', encoding='utf-8') + file_handle.write(x) + file_handle.close() + +# 获得基金概要数据 +def get_all_fund_outline(): + if not os.path.exists('fund_em_fund_name_df.csv'): + df1 = ak.fund_name_em() + df1 = df1[['基金代码', '拼音缩写', '基金类型']] + df2 = ak.fund_open_fund_daily_em() + df = pd.merge(df1, df2, on='基金代码') + df.set_index(df['基金代码']) + df.to_csv('fund_em_fund_name_df.csv', encoding='utf_8_sig',index=None) + + print(df) + else: + df = pd.read_csv('fund_em_fund_name_df.csv', dtype=object) + + return df + +#获得公墓基金每日净值 +def get_all_open_fund_daily(): + if not os.path.exists('open_fund_daily.csv'): + fund_em_fund_name_df = ak.fund_open_fund_daily_em() + fund_em_fund_name_df.set_index(fund_em_fund_name_df['基金代码']) + fund_em_fund_name_df.to_csv('fund_em_open_fund_daily.csv', encoding='utf_8_sig',index=None) + else: + fund_em_fund_name_df = pd.read_csv('fund_em_open_fund_daily.csv', dtype=object) + + return fund_em_fund_name_df + +# 根据关键字过滤基金列表 +def query_fund_by_fundname_keyword(df, fundname_keyword): + # if not os.path.exists(fundname_keyword + '.csv'): + df_query = df[df['基金简称'].str.contains(fundname_keyword)] + df_query.set_index(df_query['基金代码']) + df_query.to_csv(fundname_keyword + '.csv', encoding='utf_8_sig',index=None); + # else: + # df_query = pd.read_csv(fundname_keyword + '.csv', dtype=object) + + return df_query + +# 获得分红数据 +def get_fund_dividend_by_id(dir, fund_id): + filename = dir + '/%s' % (fund_id) + '-dividend.csv' + try: + if not os.path.exists(filename): + df = ak.fund_open_fund_info_em(fund=fund_id, indicator="分红送配详情") + print(df) + for i in np.arange(0, df.shape[0]): + df.iloc[i]['每份分红'] = get_decimal(df.iloc[i]['每份分红']) + + df.to_csv(filename, encoding='utf_8_sig', index=None); + else: + df = pd.read_csv(filename, dtype=object) + + return df + except: + return NULL + + +# 获得最近一次分红 +def query_last_dividend_before_date(df, cur_date): + try: + dividend = '0.0' + # 查询最近一次的分红数据,满足条件反馈分红 + for i in np.arange(0, df.shape[0]): + if str(cur_date) > df.iloc[df.shape[0] - 1 - i]['分红发放日']: + # print('Index:', df.shape[0] - 1 - i) + dividend = df.iloc[df.shape[0] - 1 - i]['每份分红'] + + # print("日期:%s"%cur_date, "分红%s"%dividend) + + return dividend + except: + return '0.0' + +# 获得指定基金历史数据 +def get_fund_his_by_id(fund_id, dir): + filename = dir + '/%s'%(fund_id) + '.csv' + # print(filename) + + try: + if not os.path.exists(filename): + df_total_net_value_trend = ak.fund_open_fund_info_em(fund=fund_id, indicator='累计净值走势') + df_unit_net_value_trend = ak.fund_open_fund_info_em(fund=fund_id, indicator="单位净值走势") + fund_em_info_df = pd.merge(df_total_net_value_trend, df_unit_net_value_trend, on='净值日期') + fund_em_info_df.set_index(fund_em_info_df['净值日期']) + + if not isinstance(fund_em_info_df['日增长率'], float): + fund_em_info_df['日增长率'] = fund_em_info_df['日增长率'].astype(float) + + df_dividend = get_fund_dividend_by_id(dir, fund_id) + + # 复权列表 + red_net_value_list = [] + last_red_net_value = 0.0 + + # print(fund_em_info_df) + # print(f"fund_em_info_df.shape:{fund_em_info_df.shape}") + + # 复权净值计算 + for i in np.arange(0, fund_em_info_df.shape[0]): + dividend = float(query_last_dividend_before_date(df_dividend, fund_em_info_df.iloc[i]['净值日期'])) + # print(dividend) + + if dividend > 0.0: + try: + last_red_net_value = float(last_red_net_value * (1 + fund_em_info_df.iloc[i]['日增长率'] / 100)) + red_net_value_list.append(last_red_net_value) + + # print('date:',fund_em_info_df.iloc[i]['净值日期'], '单位净值:', fund_em_info_df.iloc[i]['单位净值'], \ + # '日增长率:', fund_em_info_df.iloc[i]['日增长率'], '累计净值', fund_em_info_df.iloc[i]['累计净值']) + except: + print(fund_id, '日增长率数据异常:', fund_em_info_df.iloc[i]['日增长率']) + return '' + else: + try: + last_red_net_value = float(fund_em_info_df.iloc[i]['累计净值']) + red_net_value_list.append(last_red_net_value) + except: + print(fund_id, '累计净值数据异常:', fund_em_info_df.iloc[i]['累计净值']) + return '' + fund_em_info_df['复权净值'] = red_net_value_list + + # print(fund_em_info_df) + fund_em_info_df.to_csv(filename, encoding='utf_8_sig',index=None) + else: + fund_em_info_df = pd.read_csv(filename, dtype=object) + fund_em_info_df.set_index(fund_em_info_df['净值日期']) + except Exception as e: + print(f"错误信息:{e}") + + return fund_em_info_df + +# 绘制基金净值走势 +def draw_cumulative_net_value_trend(x, y, y_hushen300): + # plt.style.use('dark_background') + plt.rcParams["font.sans-serif"]=["SimHei"] #设置字体 + plt.rcParams["axes.unicode_minus"]=False #该语句解决图像中的“-”负号的乱码问题 + + fig, ax = plt.subplots() + # fig.subplots_adjust(bottom=0.2) + + # y_major_locator = MultipleLocator(0.1) + + # # 设置X轴刻度为日期时间 + # ax.xaxis.set_major_formatter(mdate.DateFormatter('%Y-%m-%d')) # 设置时间标签显示格式 + + # ax.yaxis.set_major_locator(y_major_locator) + + # ax.xaxis_date() + # X轴刻度文字倾斜45度 + # plt.xticks(rotation=45) + ax.set_title("基金走势图") + ax.set_xlabel("时间") + ax.set_ylabel("基金累计净值") + ax.plot(x, y, color='r', label="自选组合") + ax.plot(x, y_hushen300, color='b', label="沪深300-110020") + ax.legend() + plt.show() + +# 计算基金成立时间 +def calc_fund_established_time(df): + # print(df.dtypes) + # print(df.iloc[-1, 0], df.iloc[0, 0], df.iloc[-1, 0]-df.iloc[0, 0]) + established_time = (df.iloc[-1, 0] - df.iloc[0, 0]).days/365 + return established_time + +# 计算基金最大回撤 +def calc_fund_drawdown(fund_df, start='1970-01-01', end='2200-01-01'): + df = fund_df[['净值日期', '复权净值']].copy() + df['净值日期'] = pd.to_datetime(df['净值日期']) + + st = df['净值日期'] >= start + et = df['净值日期'] <= end + res = st == et + df = df[res] + + df['max2here'] = df['复权净值'].expanding().max() + df['dd2here'] = df['复权净值']/df['max2here'] + + end_date, remains = tuple(df.sort_values(by=['dd2here']).iloc[0][['净值日期', 'dd2here']]) + + # 计算最大回撤开始时间 + start_date = df[df['净值日期']<=end_date].sort_values(by='复权净值', ascending=False).iloc[0]['净值日期'] + + drawdown = round((1-remains)*100, 2) + print('最大回撤 (%):', drawdown) + print('最大回撤开始时间:', start_date) + print('最大回撤结束时间:', end_date) + + return drawdown, start_date, end_date + +# 计算基金年华回报率 +def cal_fund_annual_return(fund_df, start='1970-01-01', end='2200-01-01'): + df = fund_df[['净值日期', '复权净值']].copy() + # df['净值日期'] = pd.to_datetime(df['净值日期']) + # + # df['日收益'] = df['累计净值'] - df['累计净值'].shift(1) + # df['日收益率'] = (df['累计净值'] - df['累计净值'].shift(1))/df['累计净值'].shift(1) + + st = df['净值日期'] >= start + et = df['净值日期'] <= end + res = st == et + df = df[res] + # df = df[df['净值日期'] >= '2020-01-09'] + + # 计算累计收益率 + # df['累计收益率'] = df['日收益率'].expanding().sum() + total_return = round(((df.iloc[-1, 1] / df.iloc[0, 1]) - 1)*100, 2) + # df = df.fillna(0.0) + # print(df) + + years = round(calc_fund_established_time(df), 2) + # print('分析周期:%f years'%years) + + # 计算 + # print("最新累计收益率:%f, "%df.iloc[-1, 4] + '初始累计收益率:%f'%df.iloc[0, 4]) + if years >= 1: + annualized_returns = (df.iloc[-1, 1] / df.iloc[0, 1])**(1/years) - 1 + else: + annualized_returns = (df.iloc[-1, 1] / df.iloc[0, 1]) - 1 + + annualized_returns = annualized_returns*100 + + print('annualized_returns:%f'%annualized_returns + '%') + + return annualized_returns, total_return; + +# 计算基金夏普比率 +def cal_fund_sharpe_ratio(fund_df, withdrawal, start='1970-01-01', end='2200-01-01'): + df = fund_df[['净值日期', '日增长率']].copy() + + st = df['净值日期'] >= start + et = df['净值日期'] <= end + res = st == et + df = df[res] + # print(df) + + # 计算日收益率的均值 + daily_return_ratio_average = np.mean(df['日增长率'].astype(float))*100 + # print('daily_return_ratio_average:%f'%daily_return_ratio_average) + + # 计算收益率的标准方差 + return_ratio_std = df['日增长率'].astype(float).std() + + # 计算无风险日收益率 + daily_risk_free_return_ratio = (((1 + risk_free_annual_return_ratio)**(1/365)) - 1) * 100 + # print('daily_risk_free_return_ratio:%f' % daily_risk_free_return_ratio) + + # 计算夏普比率 + try: + fund_sharpe_ratio = (daily_return_ratio_average - daily_risk_free_return_ratio) / return_ratio_std * math.sqrt(trading_days_per_year) / 100 + except: + fund_sharpe_ratio = 0.0 + + # 计算卡玛比率 + try: + fund_calmar_ratio = (daily_return_ratio_average - daily_risk_free_return_ratio) / withdrawal * 100 + except: + fund_calmar_ratio = 0.0 + + print('sharpe_ratio:%f' % fund_sharpe_ratio + ', calmar_ratio:%f' % fund_calmar_ratio) + return fund_sharpe_ratio, fund_calmar_ratio + +# 计算基金波动 +def calc_fund_volatility(df_fund, start='1970-01-01', end='2200-01-01'): + # difflntotal = [] + + df = df_fund[['净值日期', '复权净值', '上一交易日复权净值']].copy() + # df['上一次累计净值'] = df['累计净值'].shift(1) + + st = df['净值日期'] >= start + et = df['净值日期'] <= end + res = st == et + df = df[res] + + # days = df.shape[0] + # print(days) + + df['价格自然对数差'] = np.log(df['复权净值']) - np.log(df['上一交易日复权净值']) + volitality = np.std(df['价格自然对数差']) * 100 * math.sqrt(trading_days_per_year) + + # 与前一种计算结果一致 + # df['价格比率对数'] = np.log(df['累计净值'] / df['上一次累计净值']) + # volitality = np.std(df['价格比率对数'])* 100 * math.sqrt(trading_days_per_year) + + print('volitality:%f'%volitality) + + return volitality + +# 计算基金kpi +def calc_fund_kpi(fund_id, df_cumulative_net_value_trend, start='1970-01-01', end='2200-01-01'): + dict_select = { + 'code': fund_id, # 基金代码 + 'years': '', # 成立时间 + 'withdrawal': '', # 最大回撤 + 'annual_return': '', # 年化收益率 + 'total_return': '', # 累计收益率 + 'sharp': '', # 夏普比率 + 'calmar': '', # 卡玛比率 + 'volatility': '', # 波动率 + } + + df = df_cumulative_net_value_trend[['净值日期', '复权净值', '日增长率']].copy() + + # 生成上一个交易日累计净值 + df['上一交易日复权净值'] = df['复权净值'].shift(1) + + # 数据格式转换 + df['净值日期'] = pd.to_datetime(df['净值日期']) + df['复权净值'] = df['复权净值'].astype(float) + df['上一交易日复权净值'] = df['上一交易日复权净值'].astype(float) + + # # 计算日收益和日收益率 + # df['日收益'] = df['累计净值'] - df['上一交易日累计净值'] + # df['日收益率'] = (df['累计净值'] - df['上一交易日累计净值'])/df['上一交易日累计净值'] + + # x = df['净值日期'] + # y = df['累计净值'] + # + # # 绘制基金累计净值走势图 + # draw_cumulative_net_value_trend(x, y) + + # 计算成立时间 + years = calc_fund_established_time(df) + dict_select['years'] = round(years, 2) + print('基金[%s'%fund_id + ']成立时间:%d'%(int(years)) + '年%d'%((years-int(years))*365) + '天') + + # 计算最大回撤 + withdrawal, start_date, end_date = calc_fund_drawdown(df, start, end) + dict_select['withdrawal'] = round(withdrawal, 2) + + # 计算年化收益 + annual_return, total_return = cal_fund_annual_return(df, start, end) + dict_select['annual_return'] = round(annual_return, 2) + dict_select['total_return'] = round(total_return, 2) + + # 计算夏普比率 + sharpe_ratio, calmar_ratio = cal_fund_sharpe_ratio(df, withdrawal, start, end) + dict_select['sharp'] = round(sharpe_ratio, 2) + dict_select['calmar'] = round(calmar_ratio, 4) + + # 计算波动率 + volatility = calc_fund_volatility(df, start, end) + dict_select['volatility'] = round(volatility, 2) + + print(dict_select) + + return dict_select + +# user_agent列表 +user_agent_list = [ + 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.1 (KHTML, like Gecko) Chrome/21.0.1180.71 Safari/537.1 LBBROWSER', + 'Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; QQDownload 732; .NET4.0C; .NET4.0E)', + 'Mozilla/5.0 (Windows NT 5.1) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.84 Safari/535.11 SE 2.X MetaSr 1.0', + 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Maxthon/4.4.3.4000 Chrome/30.0.1599.101 Safari/537.36', + 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/38.0.2125.122 UBrowser/4.0.3214.0 Safari/537.36' +] + +# referer列表 +referer_list = [ + 'http://fund.eastmoney.com/110022.html', + 'http://fund.eastmoney.com/110023.html', + 'http://fund.eastmoney.com/', + 'http://fund.eastmoney.com/110025.html' +] + +# 爬取网页基金数据 +def get_url(baseUrl): + # 获取一个随机user_agent和Referer + headers = {'User-Agent': random.choice(user_agent_list), 'Referer': random.choice(referer_list)} + try: + resp = requests.get(baseUrl, headers=headers) + # print(resp.status_code) + if resp.status_code == 200: + # print(resp.content) + return resp.text + print("没有爬取到相应的内容") + return None + except: + print("没有爬取到相应的内容") + return None + +# 获得基金费用 +def get_fund_fee(code): + dict_select = { + '基金代码': code, # 基金代码 + 'scale': '', # 基金规模 + 'm_fee': '', # 管理费 + 'c_fee': '', # 托管费 + 'sale_fee': '', # 销售费用 + 'sub_fee': '', # 申购费<50W + # 'buy_fee_less_than_100': '', # 申购费<100W + # 'buy_fee_more_than_100': '', # 申购费>=100W + } + + url = 'http://fundf10.eastmoney.com/jjfl_%s' % code + '.html' + html = get_url(url) + soup = BeautifulSoup(html, 'html.parser') + # log(soup.prettify()) + + target = '' + for co in soup.find_all(['span', 'td', 'span', 'label']): + # log('target%s' % target) + log('co.text:%s' % co.text) + if target != '': + # '''dict_select[target] = co.text.encode('utf-8')''' + dict_select[target] = co.text + if target == 'sale_fee': + break + target = '' + + if co.text.find(u'资产规模') >= 0: + target = 'scale' + elif co.text == u'管理费率': + target = 'm_fee' + elif co.text == u'托管费率': + target = 'c_fee' + elif co.text == u'销售服务费率': + target = 'sale_fee' + + # print(dict_select) + + try: + dict_select['scale'] = float(get_decimal_suffix(dict_select['scale'], '亿元')) + if dict_select['scale'] == '': + dict_select['scale'] = 0.0 + except: + dict_select['scale'] = 0.0 + + try: + dict_select['m_fee'] = float(get_decimal(dict_select['m_fee'])) + if dict_select['m_fee'] == '': + dict_select['m_fee'] = 0.0 + except: + dict_select['m_fee'] = 0.0 + + try: + dict_select['c_fee'] = float(get_decimal(dict_select['c_fee'])) + if dict_select['c_fee'] == '': + dict_select['c_fee'] = 0.0 + except: + dict_select['c_fee'] = 0.0 + + try: + dict_select['sale_fee'] = float(get_decimal(dict_select['sale_fee'])) + if dict_select['sale_fee'] == '': + dict_select['sale_fee'] = 0.0 + except: + dict_select['sale_fee'] = 0.0 + + print(dict_select) + return dict_select + +# 获得基金费用列表 +def get_fund_fee_list(df): + err_count = 0 + fee_list = [] + filename = 'fund_fee_list.csv' + filename_bak = 'fund_fee_list-bak.csv' + # print(filename) + + if not os.path.exists(filename): + for i in np.arange(0, df.shape[0]): + # try: + print('基金编码[%s'%(df.iloc[i]['基金代码']) + ']正在读取费用...') + dict_item = get_fund_fee(df.iloc[i]['基金代码']) + dict_item['sub_fee'] = df.iloc[i]['手续费'] + fee_list.append(dict_item) + # except: + # err_count = err_count + 1 + # print('基金编码[%s' % (df.iloc[i]['基金代码']) + ']读取费用失败...') + time.sleep(1) + + df_fee = pd.DataFrame(fee_list) + df_fee.to_csv(filename, encoding='utf_8_sig', index=None) + print('读取费率失败次数:%d' % err_count) + else: + df_fee = pd.read_csv(filename, dtype=object) + # for i in np.arange(0, df.shape[0]): + # try: + # print(df_fee.iloc[i]['基金代码'], df_fee.iloc[i]['sub_fee']) + # df_fee.iloc[i]['sub_fee'] = get_decimal(df_fee.iloc[i]['sub_fee']) + # except: + # df_fee.iloc[i]['sub_fee'] = 0.0 + # + # df_fee.to_csv(filename_bak, encoding='utf_8_sig', index=None) + + return df_fee + +# 计算基金rank +def get_fund_rank(df_in, keywords, max_withdrawal, establish_year=3.0, start='1970-01-01', end='2200-01-01'): + fund_kpi_list = [] + + # 根据关键字检索 + df_dist = query_fund_by_fundname_keyword(df_in, keywords) + + df_dist = df_dist[df_dist['基金类型'].str.contains('场内') == False] + for i in np.arange(0, df_dist.shape[0]): + try: + print('基金编码[%s'%(df_dist.iloc[i]['基金代码']) + ']正在读取...') + + # 爬取历史净值 + df_cumulative_net_value_trend = get_fund_his_by_id(df_dist.iloc[i]['基金代码'], dir_cumulative_net_value_trend) + print('历史数据已加载...') + if (keywords != '债') or (keywords == '债' and df_cumulative_net_value_trend['日增长率'].astype(float).max(skipna=True) <= 2.0): + # 计算基金KPI + dict_kpi = calc_fund_kpi(df_dist.iloc[i]['基金代码'], df_cumulative_net_value_trend, start, end) + dict_kpi['code'] = df_dist.iloc[i]['基金代码'] + dict_kpi['name'] = df_dist.iloc[i]['基金简称'] + dict_kpi['type'] = df_dist.iloc[i]['基金类型'] + print('KPI计算已完成...') + + # 爬取费用 + dict_kpi['scale'] = df_dist.iloc[i]['scale'] + dict_kpi['m_fee'] = df_dist.iloc[i]['m_fee'] + dict_kpi['c_fee'] = df_dist.iloc[i]['c_fee'] + dict_kpi['sale_fee'] = df_dist.iloc[i]['sale_fee'] + dict_kpi['sub_fee'] = df_dist.iloc[i]['sub_fee'] + + # print(dict_kpi) + fund_kpi_list.append(dict_kpi) + # time.sleep(1) + except Exception as e: # 未知异常的捕获 + print(f"异常信息:{e}") + print('基金编码[%s'%(df_dist.iloc[i]['基金代码']) + ']读取失败') + # # time.sleep(1) + + # print(fund_kpi_list) + df_kpi = pd.DataFrame(fund_kpi_list) + print(df_kpi) + + # 删除异常数据行 + # df_kpi.dropna(subset=['withdrawal'], inplace=True) + + # 对数据筛选 + df_kpi = df_kpi[df_kpi['years'].astype(float) >= establish_year] + df_kpi = df_kpi[df_kpi['withdrawal'].astype(float) <= max_withdrawal] + + try: + df_kpi['total_fee'] = df_kpi['sub_fee'].astype(float) + df_kpi['m_fee'].astype(float) + df_kpi['c_fee'].astype(float) + df_kpi['sale_fee'].astype(float) + except Exception as e: # 未知异常的捕获 + print(f"异常信息:{e}") + df_kpi['total_fee'] = 0 + print('[%s'%keywords + ']计算累计费用失败,跳过!') + + # 对数据排序, 'sharp', 'total_fee', 'annual_return' + df_kpi.sort_values(by=['sharp', 'withdrawal', 'calmar', 'total_fee'], ascending=[False, False, False, True], inplace=True) + + # print(df_kpi.dtypes) + # df_kpi.to_csv('fund_kpi.csv') + df_kpi.to_excel('fund_rank-%s' % keywords + '.xlsx') + print(df_kpi) + return df_kpi + +# 计算基金组合预测数据 +def calc_fund_comb_kpi(fund_comb, start='1970-01-01', end='2200-01-01'): + df_comb = pd.DataFrame(columns=['净值日期', '复权净值', '复权净值临时', '日增长率']) + + for i in np.arange(0, fund_comb.shape[0]): + # 爬取历史净值 + df = get_fund_his_by_id(fund_comb.iloc[i]['code'], dir_cumulative_net_value_trend) + + # df.dropna(axis=0, inplace=True) + # df.fillna(method='pad',axis=0, inplace=True) + + # 过滤时间 + st = df['净值日期'] >= start + et = df['净值日期'] <= end + res = st == et + df = df[res] + + if df_comb.empty: + df_comb['净值日期'] = df['净值日期'] + df_comb.set_index('净值日期', inplace=True) + + df.set_index('净值日期', inplace=True) + # print(df) + + print(fund_comb.iloc[i]['share']) + # if pd.isnull(df_comb.iloc[0]['复权净值']): + if df_comb['复权净值'].isnull().all(): + df_comb['复权净值临时'] = 0.0 + else: + df_comb.loc[:, '复权净值临时'] = df_comb.loc[:, '复权净值'] + + df_comb.loc[:, '复权净值'] = df.loc[:, '复权净值'].astype(float) * fund_comb.iloc[i]['share'] + # print(type(df_comb.loc[:, '复权净值']), type(df_comb.loc[:, '复权净值临时'])) + df_comb.loc[:, '复权净值'] = df_comb.loc[:, '复权净值'].astype(float).add(df_comb.loc[:, '复权净值临时']) + df_comb.fillna(method='pad', axis=0, inplace=True) + + # 计算日增长率 + df_comb['日增长率'] = (df_comb['复权净值'] - df_comb['复权净值'].shift(1))/df_comb['复权净值'].shift(1) + + # print(df_comb) + return df_comb + +if __name__ == "__main__": + pd.set_option('display.max_rows', 1000) + pd.set_option('display.max_columns', 10) + + # get_fund_his_by_id('000051', dir_cumulative_net_value_trend) + + # 获得公募基金列表 + df = get_all_fund_outline() + # df = get_all_open_fund_daily() + df = df[df['申购状态'] == '开放申购'] + + # 加载费用 + df_fee = get_fund_fee_list(df) + df_fee = df_fee[['基金代码', 'scale', 'm_fee', 'c_fee', 'sale_fee', 'sub_fee']].copy() + # get_fund_fee('000051') + + df = pd.merge(df, df_fee, on='基金代码') + df.set_index(df['基金代码']) + + # 分红测试代码 + # df_dividend = get_fund_dividend_by_id('002898') + # df_dividend = query_last_dividend_before_date(dir_cumulative_net_value_trend, '002898', '2021-01-15') + # print(type(df_dividend)) + # print(df_dividend) + + df_kpi_csi300 = get_fund_rank(df, '沪深300', 60.0, 5, '2018-01-01', '2022-10-30') + df_kpi_csi500 = get_fund_rank(df, '中证500', 50.0, 5, '2018-01-01', '2022-10-30') + df_kpi_gem = get_fund_rank(df, '创业板', 50.0, 5, '2018-01-01', '2022-10-30') + df_kpi_gold = get_fund_rank(df, '黄金', 50.0, 5, '2018-01-01', '2022-10-30') + df_kpi_bond = get_fund_rank(df, '债', 30.0, 5, '2018-01-01', '2022-10-30') + df_kpi_sp500 = get_fund_rank(df, '标普500', 50.0, 5, '2018-01-01', '2022-10-30') + df_kpi_nasda = get_fund_rank(df, '纳斯达克', 50.0, 5, '2018-01-01', '2022-10-30') + # + # # 行业基金 + # get_fund_rank(df, '新能源', 50.0, 3, '2018-01-01', '2021-02-18') + # get_fund_rank(df, '消费', 50.0, 3, '2018-01-01', '2021-02-18') + # get_fund_rank(df, '商品', 50.0, 3, '2018-01-01', '2021-02-18') + # get_fund_rank(df, '混合', 50.0, 3, '2018-01-01', '2021-03-28') + + print(f"基金组合:{df_kpi_csi300.iloc[0]['code']}:0.25,\n \ + {df_kpi_csi500.iloc[0]['code']}:0.15,\n \ + {df_kpi_gem.iloc[0]['code']}: 0.10,\n \ + {df_kpi_gold.iloc[0]['code']}: 0.10,\n \ + {df_kpi_bond.iloc[0]['code']}: 0.20],\n \ + {df_kpi_sp500.iloc[0]['code']}: 0.10],\n\ + {df_kpi_nasda.iloc[0]['code']}: 0.10]") + # # 组合数据 + # fund_comb = pd.DataFrame([['673100', 0.50], ['001879', 0.15], ['000187', 0.20], ['000218', 0.15]], columns=['code', 'share']) + fund_comb = pd.DataFrame([ + [df_kpi_csi300.iloc[0]['code'], 0.25], + [df_kpi_csi500.iloc[0]['code'], 0.15], + [df_kpi_gem.iloc[0]['code'], 0.10], + [df_kpi_gold.iloc[0]['code'], 0.10], + [df_kpi_bond.iloc[0]['code'], 0.20], + [df_kpi_sp500.iloc[0]['code'], 0.10], + [df_kpi_nasda.iloc[0]['code'], 0.10] + ], + columns=['code', 'share']) + df_comb = calc_fund_comb_kpi(fund_comb, '2017-06-02', '2022-10-30') + df_comb.reset_index(inplace=True) + x = df_comb['净值日期'] + y = df_comb['复权净值'] + + fund_hs300 = pd.DataFrame([['000512', 1.00]], columns=['code', 'share']) + df_hs300 = calc_fund_comb_kpi(fund_hs300, '2017-06-02', '2022-10-30') + df_hs300.reset_index(inplace=True) + df_hs300.to_csv("参考沪深300.csv", encoding='utf_8_sig', index=None) + + y_hs300 = df_hs300['复权净值'] + + # 绘制基金累计净值走势图 + draw_cumulative_net_value_trend(x, y, y_hs300) -- GitLab