From 3433b8a007e3ffc811afb5fcee353f58cc99198f Mon Sep 17 00:00:00 2001
From: RobotsFutures <joeychou180@163.com>
Date: Mon, 31 Oct 2022 01:08:42 +0800
Subject: [PATCH] fund analysis tool v0.1

---
 fund_tools.py | 730 ++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 730 insertions(+)
 create mode 100644 fund_tools.py

diff --git a/fund_tools.py b/fund_tools.py
new file mode 100644
index 0000000..28123f9
--- /dev/null
+++ b/fund_tools.py
@@ -0,0 +1,730 @@
+from asyncio.windows_events import NULL
+from cProfile import label
+import akshare as ak
+import pandas as pd
+import numpy as np
+from matplotlib import pyplot as plt
+from matplotlib.pyplot import MultipleLocator
+import matplotlib.dates as mdate
+import requests
+from bs4 import BeautifulSoup
+import datetime
+import time
+import math
+import os
+import re
+import random
+from decimal import Decimal
+
+dir_cumulative_net_value_trend = 'cumulative_net_value_trend'
+dir_cumulative_return_trend = 'cumulative_return_trend'
+
+# 基金行业常用常量定义
+# 无风险年化收益率%
+risk_free_annual_return_ratio = 0.0275
+# 每年交易日天数
+trading_days_per_year = 250
+
+# user_agent列表
+user_agent_list = [
+  'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.1 (KHTML, like Gecko) Chrome/21.0.1180.71 Safari/537.1 LBBROWSER',
+  'Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; QQDownload 732; .NET4.0C; .NET4.0E)',
+  'Mozilla/5.0 (Windows NT 5.1) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.84 Safari/535.11 SE 2.X MetaSr 1.0',
+  'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Maxthon/4.4.3.4000 Chrome/30.0.1599.101 Safari/537.36',
+  'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/38.0.2125.122 UBrowser/4.0.3214.0 Safari/537.36'
+]
+
+# referer列表
+referer_list = [
+  'http://fund.eastmoney.com/110022.html',
+  'http://fund.eastmoney.com/110023.html',
+  'http://fund.eastmoney.com/110024.html',
+  'http://fund.eastmoney.com/110025.html'
+]
+
+# 获取一个随机user_agent和Referer
+header = {'User-Agent': random.choice(user_agent_list),
+     'Referer': random.choice(referer_list)
+}
+
+# 获得字符串中的整数
+def get_decimal(x):
+    return ''.join(re.findall(r"\d+\.?\d*", ''.join(x)))
+
+# 获得指定后缀字符串中的整数
+def get_decimal_suffix(x, suffix):
+    return get_decimal(re.findall(r"\d+\.?\d*"+suffix, ''.join(x)))
+
+# 日志调试
+def log(x):
+    file_handle = open('log.txt', mode='a', encoding='utf-8')
+    file_handle.write(x)
+    file_handle.close()
+
+# 获得基金概要数据
+def get_all_fund_outline():
+    if not os.path.exists('fund_em_fund_name_df.csv'):
+        df1 = ak.fund_name_em()
+        df1 = df1[['基金代码', '拼音缩写', '基金类型']]
+        df2 = ak.fund_open_fund_daily_em()
+        df = pd.merge(df1, df2, on='基金代码')
+        df.set_index(df['基金代码'])
+        df.to_csv('fund_em_fund_name_df.csv', encoding='utf_8_sig',index=None)
+
+        print(df)
+    else:
+        df = pd.read_csv('fund_em_fund_name_df.csv', dtype=object)
+
+    return df
+
+#获得公墓基金每日净值
+def get_all_open_fund_daily():
+    if not os.path.exists('open_fund_daily.csv'):
+        fund_em_fund_name_df = ak.fund_open_fund_daily_em()
+        fund_em_fund_name_df.set_index(fund_em_fund_name_df['基金代码'])
+        fund_em_fund_name_df.to_csv('fund_em_open_fund_daily.csv', encoding='utf_8_sig',index=None)
+    else:
+        fund_em_fund_name_df = pd.read_csv('fund_em_open_fund_daily.csv', dtype=object)
+
+    return fund_em_fund_name_df
+
+# 根据关键字过滤基金列表
+def query_fund_by_fundname_keyword(df, fundname_keyword):
+    # if not os.path.exists(fundname_keyword + '.csv'):
+    df_query = df[df['基金简称'].str.contains(fundname_keyword)]
+    df_query.set_index(df_query['基金代码'])
+    df_query.to_csv(fundname_keyword + '.csv', encoding='utf_8_sig',index=None);
+    # else:
+    #     df_query = pd.read_csv(fundname_keyword + '.csv', dtype=object)
+
+    return df_query
+
+# 获得分红数据
+def get_fund_dividend_by_id(dir, fund_id):
+    filename = dir + '/%s' % (fund_id) + '-dividend.csv'
+    try:
+        if not os.path.exists(filename):
+            df = ak.fund_open_fund_info_em(fund=fund_id, indicator="分红送配详情")
+            print(df)
+            for i in np.arange(0, df.shape[0]):
+                df.iloc[i]['每份分红'] = get_decimal(df.iloc[i]['每份分红'])
+
+            df.to_csv(filename, encoding='utf_8_sig', index=None);
+        else:
+            df = pd.read_csv(filename, dtype=object)
+
+        return df
+    except:
+        return NULL
+
+
+# 获得最近一次分红
+def query_last_dividend_before_date(df, cur_date):
+    try:
+        dividend = '0.0'
+        # 查询最近一次的分红数据，满足条件反馈分红
+        for i in np.arange(0, df.shape[0]):
+            if str(cur_date) > df.iloc[df.shape[0] - 1 - i]['分红发放日']:
+                # print('Index:', df.shape[0] - 1 - i)
+                dividend = df.iloc[df.shape[0] - 1 - i]['每份分红']
+
+        # print("日期:%s"%cur_date, "分红%s"%dividend)
+
+        return dividend
+    except:
+        return '0.0'
+
+# 获得指定基金历史数据
+def get_fund_his_by_id(fund_id, dir):
+    filename = dir + '/%s'%(fund_id) + '.csv'
+    # print(filename)
+
+    try:
+        if not os.path.exists(filename):
+            df_total_net_value_trend = ak.fund_open_fund_info_em(fund=fund_id, indicator='累计净值走势')
+            df_unit_net_value_trend = ak.fund_open_fund_info_em(fund=fund_id, indicator="单位净值走势")
+            fund_em_info_df = pd.merge(df_total_net_value_trend, df_unit_net_value_trend, on='净值日期')
+            fund_em_info_df.set_index(fund_em_info_df['净值日期'])
+
+            if not isinstance(fund_em_info_df['日增长率'], float):
+                fund_em_info_df['日增长率'] = fund_em_info_df['日增长率'].astype(float)
+
+            df_dividend = get_fund_dividend_by_id(dir, fund_id)
+
+            # 复权列表
+            red_net_value_list = []
+            last_red_net_value = 0.0
+
+            # print(fund_em_info_df)
+            # print(f"fund_em_info_df.shape:{fund_em_info_df.shape}")
+
+            # 复权净值计算
+            for i in np.arange(0, fund_em_info_df.shape[0]):
+                dividend = float(query_last_dividend_before_date(df_dividend, fund_em_info_df.iloc[i]['净值日期']))
+                # print(dividend)
+
+                if dividend > 0.0:
+                    try:
+                        last_red_net_value = float(last_red_net_value * (1 + fund_em_info_df.iloc[i]['日增长率'] / 100))
+                        red_net_value_list.append(last_red_net_value)
+
+                        # print('date:',fund_em_info_df.iloc[i]['净值日期'], '单位净值:', fund_em_info_df.iloc[i]['单位净值'], \
+                        #       '日增长率:', fund_em_info_df.iloc[i]['日增长率'], '累计净值', fund_em_info_df.iloc[i]['累计净值'])
+                    except:
+                        print(fund_id, '日增长率数据异常:', fund_em_info_df.iloc[i]['日增长率'])
+                        return ''
+                else:
+                    try:
+                        last_red_net_value = float(fund_em_info_df.iloc[i]['累计净值'])
+                        red_net_value_list.append(last_red_net_value)
+                    except:
+                        print(fund_id, '累计净值数据异常:', fund_em_info_df.iloc[i]['累计净值'])
+                        return ''
+            fund_em_info_df['复权净值'] = red_net_value_list
+
+            # print(fund_em_info_df)
+            fund_em_info_df.to_csv(filename, encoding='utf_8_sig',index=None)
+        else:
+            fund_em_info_df = pd.read_csv(filename, dtype=object)
+            fund_em_info_df.set_index(fund_em_info_df['净值日期'])
+    except Exception as e:
+        print(f"错误信息：{e}")
+
+    return fund_em_info_df
+
+# 绘制基金净值走势
+def draw_cumulative_net_value_trend(x, y, y_hushen300):
+    # plt.style.use('dark_background')
+    plt.rcParams["font.sans-serif"]=["SimHei"] #设置字体
+    plt.rcParams["axes.unicode_minus"]=False #该语句解决图像中的“-”负号的乱码问题
+
+    fig, ax = plt.subplots()
+    # fig.subplots_adjust(bottom=0.2)
+
+    # y_major_locator = MultipleLocator(0.1)
+
+    # # 设置X轴刻度为日期时间
+    # ax.xaxis.set_major_formatter(mdate.DateFormatter('%Y-%m-%d'))  # 设置时间标签显示格式
+
+    # ax.yaxis.set_major_locator(y_major_locator)
+
+    # ax.xaxis_date()
+    # X轴刻度文字倾斜45度
+    # plt.xticks(rotation=45)
+    ax.set_title("基金走势图")
+    ax.set_xlabel("时间")
+    ax.set_ylabel("基金累计净值")
+    ax.plot(x, y, color='r', label="自选组合")
+    ax.plot(x, y_hushen300, color='b', label="沪深300-110020")
+    ax.legend()
+    plt.show()
+
+# 计算基金成立时间
+def calc_fund_established_time(df):
+    # print(df.dtypes)
+    # print(df.iloc[-1, 0], df.iloc[0, 0], df.iloc[-1, 0]-df.iloc[0, 0])
+    established_time = (df.iloc[-1, 0] - df.iloc[0, 0]).days/365
+    return established_time
+
+# 计算基金最大回撤
+def calc_fund_drawdown(fund_df, start='1970-01-01', end='2200-01-01'):
+    df = fund_df[['净值日期', '复权净值']].copy()
+    df['净值日期'] = pd.to_datetime(df['净值日期'])
+
+    st = df['净值日期'] >= start
+    et = df['净值日期'] <= end
+    res = st == et
+    df = df[res]
+
+    df['max2here'] = df['复权净值'].expanding().max()
+    df['dd2here']  = df['复权净值']/df['max2here']
+
+    end_date, remains = tuple(df.sort_values(by=['dd2here']).iloc[0][['净值日期', 'dd2here']])
+
+    # 计算最大回撤开始时间
+    start_date = df[df['净值日期']<=end_date].sort_values(by='复权净值', ascending=False).iloc[0]['净值日期']
+
+    drawdown = round((1-remains)*100, 2)
+    print('最大回撤 (%):', drawdown)
+    print('最大回撤开始时间:', start_date)
+    print('最大回撤结束时间:', end_date)
+
+    return drawdown, start_date, end_date
+
+# 计算基金年华回报率
+def cal_fund_annual_return(fund_df, start='1970-01-01', end='2200-01-01'):
+    df = fund_df[['净值日期', '复权净值']].copy()
+    # df['净值日期'] = pd.to_datetime(df['净值日期'])
+    #
+    # df['日收益'] = df['累计净值'] - df['累计净值'].shift(1)
+    # df['日收益率'] = (df['累计净值'] - df['累计净值'].shift(1))/df['累计净值'].shift(1)
+
+    st = df['净值日期'] >= start
+    et = df['净值日期'] <= end
+    res = st == et
+    df = df[res]
+    # df = df[df['净值日期'] >= '2020-01-09']
+
+    # 计算累计收益率
+    # df['累计收益率'] = df['日收益率'].expanding().sum()
+    total_return = round(((df.iloc[-1, 1] / df.iloc[0, 1]) - 1)*100, 2)
+    # df = df.fillna(0.0)
+    # print(df)
+
+    years = round(calc_fund_established_time(df), 2)
+    # print('分析周期:%f years'%years)
+
+    # 计算
+    # print("最新累计收益率:%f, "%df.iloc[-1, 4] + '初始累计收益率:%f'%df.iloc[0, 4])
+    if years >= 1:
+        annualized_returns = (df.iloc[-1, 1] / df.iloc[0, 1])**(1/years) - 1
+    else:
+        annualized_returns = (df.iloc[-1, 1] / df.iloc[0, 1]) - 1
+
+    annualized_returns = annualized_returns*100
+
+    print('annualized_returns:%f'%annualized_returns + '%')
+
+    return annualized_returns, total_return;
+
+# 计算基金夏普比率
+def cal_fund_sharpe_ratio(fund_df, withdrawal, start='1970-01-01', end='2200-01-01'):
+    df = fund_df[['净值日期', '日增长率']].copy()
+
+    st = df['净值日期'] >= start
+    et = df['净值日期'] <= end
+    res = st == et
+    df = df[res]
+    # print(df)
+
+    # 计算日收益率的均值
+    daily_return_ratio_average = np.mean(df['日增长率'].astype(float))*100
+    # print('daily_return_ratio_average:%f'%daily_return_ratio_average)
+
+    # 计算收益率的标准方差
+    return_ratio_std = df['日增长率'].astype(float).std()
+
+    # 计算无风险日收益率
+    daily_risk_free_return_ratio = (((1 + risk_free_annual_return_ratio)**(1/365)) - 1) * 100
+    # print('daily_risk_free_return_ratio:%f' % daily_risk_free_return_ratio)
+
+    # 计算夏普比率
+    try:
+        fund_sharpe_ratio = (daily_return_ratio_average - daily_risk_free_return_ratio) / return_ratio_std * math.sqrt(trading_days_per_year) / 100
+    except:
+        fund_sharpe_ratio = 0.0
+
+    # 计算卡玛比率
+    try:
+        fund_calmar_ratio = (daily_return_ratio_average - daily_risk_free_return_ratio) / withdrawal * 100
+    except:
+        fund_calmar_ratio = 0.0
+
+    print('sharpe_ratio:%f' % fund_sharpe_ratio + ', calmar_ratio:%f' % fund_calmar_ratio)
+    return fund_sharpe_ratio, fund_calmar_ratio
+
+# 计算基金波动
+def calc_fund_volatility(df_fund, start='1970-01-01', end='2200-01-01'):
+    # difflntotal = []
+
+    df = df_fund[['净值日期', '复权净值', '上一交易日复权净值']].copy()
+    # df['上一次累计净值'] = df['累计净值'].shift(1)
+
+    st = df['净值日期'] >= start
+    et = df['净值日期'] <= end
+    res = st == et
+    df = df[res]
+
+    # days = df.shape[0]
+    # print(days)
+
+    df['价格自然对数差'] = np.log(df['复权净值']) - np.log(df['上一交易日复权净值'])
+    volitality = np.std(df['价格自然对数差']) * 100 * math.sqrt(trading_days_per_year)
+
+    # 与前一种计算结果一致
+    # df['价格比率对数'] = np.log(df['累计净值'] / df['上一次累计净值'])
+    # volitality = np.std(df['价格比率对数'])* 100 * math.sqrt(trading_days_per_year)
+
+    print('volitality:%f'%volitality)
+
+    return volitality
+
+# 计算基金kpi
+def calc_fund_kpi(fund_id, df_cumulative_net_value_trend, start='1970-01-01', end='2200-01-01'):
+    dict_select = {
+        'code': fund_id,  # 基金代码
+        'years': '',        # 成立时间
+        'withdrawal': '',   # 最大回撤
+        'annual_return': '',  # 年化收益率
+        'total_return': '',  # 累计收益率
+        'sharp': '',        # 夏普比率
+        'calmar': '',       # 卡玛比率
+        'volatility': '',   # 波动率
+    }
+
+    df = df_cumulative_net_value_trend[['净值日期', '复权净值', '日增长率']].copy()
+
+    # 生成上一个交易日累计净值
+    df['上一交易日复权净值'] = df['复权净值'].shift(1)
+
+    # 数据格式转换
+    df['净值日期'] = pd.to_datetime(df['净值日期'])
+    df['复权净值'] = df['复权净值'].astype(float)
+    df['上一交易日复权净值'] = df['上一交易日复权净值'].astype(float)
+
+    # # 计算日收益和日收益率
+    # df['日收益'] = df['累计净值'] - df['上一交易日累计净值']
+    # df['日收益率'] = (df['累计净值'] - df['上一交易日累计净值'])/df['上一交易日累计净值']
+
+    # x = df['净值日期']
+    # y = df['累计净值']
+    #
+    # # 绘制基金累计净值走势图
+    # draw_cumulative_net_value_trend(x, y)
+
+    # 计算成立时间
+    years = calc_fund_established_time(df)
+    dict_select['years'] = round(years, 2)
+    print('基金[%s'%fund_id + ']成立时间:%d'%(int(years)) + '年%d'%((years-int(years))*365) + '天')
+
+    # 计算最大回撤
+    withdrawal, start_date, end_date = calc_fund_drawdown(df, start, end)
+    dict_select['withdrawal'] = round(withdrawal, 2)
+
+    # 计算年化收益
+    annual_return, total_return = cal_fund_annual_return(df, start, end)
+    dict_select['annual_return'] = round(annual_return, 2)
+    dict_select['total_return'] = round(total_return, 2)
+
+    # 计算夏普比率
+    sharpe_ratio, calmar_ratio = cal_fund_sharpe_ratio(df, withdrawal, start, end)
+    dict_select['sharp'] = round(sharpe_ratio, 2)
+    dict_select['calmar'] = round(calmar_ratio, 4)
+
+    # 计算波动率
+    volatility = calc_fund_volatility(df, start, end)
+    dict_select['volatility'] = round(volatility, 2)
+
+    print(dict_select)
+
+    return dict_select
+
+# user_agent列表
+user_agent_list = [
+    'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.1 (KHTML, like Gecko) Chrome/21.0.1180.71 Safari/537.1 LBBROWSER',
+    'Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; QQDownload 732; .NET4.0C; .NET4.0E)',
+    'Mozilla/5.0 (Windows NT 5.1) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.84 Safari/535.11 SE 2.X MetaSr 1.0',
+    'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Maxthon/4.4.3.4000 Chrome/30.0.1599.101 Safari/537.36',
+    'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/38.0.2125.122 UBrowser/4.0.3214.0 Safari/537.36'
+]
+
+# referer列表
+referer_list = [
+    'http://fund.eastmoney.com/110022.html',
+    'http://fund.eastmoney.com/110023.html',
+    'http://fund.eastmoney.com/',
+    'http://fund.eastmoney.com/110025.html'
+]
+
+# 爬取网页基金数据
+def get_url(baseUrl):
+    # 获取一个随机user_agent和Referer
+    headers = {'User-Agent': random.choice(user_agent_list), 'Referer': random.choice(referer_list)}
+    try:
+        resp = requests.get(baseUrl, headers=headers)
+        # print(resp.status_code)
+        if resp.status_code == 200:
+            # print(resp.content)
+            return resp.text
+        print("没有爬取到相应的内容")
+        return None
+    except:
+        print("没有爬取到相应的内容")
+        return None
+
+# 获得基金费用
+def get_fund_fee(code):
+    dict_select = {
+        '基金代码': code,  # 基金代码
+        'scale': '',  # 基金规模
+        'm_fee': '',  # 管理费
+        'c_fee': '',  # 托管费
+        'sale_fee': '',  # 销售费用
+        'sub_fee': '',  # 申购费<50W
+        # 'buy_fee_less_than_100': '',  # 申购费<100W
+        # 'buy_fee_more_than_100': '',  # 申购费>=100W
+    }
+
+    url = 'http://fundf10.eastmoney.com/jjfl_%s' % code + '.html'
+    html = get_url(url)
+    soup = BeautifulSoup(html, 'html.parser')
+    # log(soup.prettify())
+
+    target = ''
+    for co in soup.find_all(['span', 'td', 'span', 'label']):
+        # log('target%s' % target)
+        log('co.text:%s' % co.text)
+        if target != '':
+            # '''dict_select[target] = co.text.encode('utf-8')'''
+            dict_select[target] = co.text
+            if target == 'sale_fee':
+                break
+            target = ''
+
+        if co.text.find(u'资产规模') >= 0:
+            target = 'scale'
+        elif co.text == u'管理费率':
+            target = 'm_fee'
+        elif co.text == u'托管费率':
+            target = 'c_fee'
+        elif co.text == u'销售服务费率':
+            target = 'sale_fee'
+
+    # print(dict_select)
+
+    try:
+        dict_select['scale'] = float(get_decimal_suffix(dict_select['scale'], '亿元'))
+        if dict_select['scale'] == '':
+            dict_select['scale'] = 0.0
+    except:
+        dict_select['scale'] = 0.0
+
+    try:
+        dict_select['m_fee'] = float(get_decimal(dict_select['m_fee']))
+        if dict_select['m_fee'] == '':
+            dict_select['m_fee'] = 0.0
+    except:
+        dict_select['m_fee'] = 0.0
+
+    try:
+        dict_select['c_fee'] = float(get_decimal(dict_select['c_fee']))
+        if dict_select['c_fee'] == '':
+            dict_select['c_fee'] = 0.0
+    except:
+        dict_select['c_fee'] = 0.0
+
+    try:
+        dict_select['sale_fee'] = float(get_decimal(dict_select['sale_fee']))
+        if dict_select['sale_fee'] == '':
+            dict_select['sale_fee'] = 0.0
+    except:
+        dict_select['sale_fee'] = 0.0
+
+    print(dict_select)
+    return dict_select
+
+# 获得基金费用列表
+def get_fund_fee_list(df):
+    err_count = 0
+    fee_list = []
+    filename = 'fund_fee_list.csv'
+    filename_bak = 'fund_fee_list-bak.csv'
+    # print(filename)
+
+    if not os.path.exists(filename):
+        for i in np.arange(0, df.shape[0]):
+            # try:
+            print('基金编码[%s'%(df.iloc[i]['基金代码']) + ']正在读取费用...')
+            dict_item = get_fund_fee(df.iloc[i]['基金代码'])
+            dict_item['sub_fee'] = df.iloc[i]['手续费']
+            fee_list.append(dict_item)
+            # except:
+            #     err_count = err_count + 1
+            #     print('基金编码[%s' % (df.iloc[i]['基金代码']) + ']读取费用失败...')
+            time.sleep(1)
+
+        df_fee = pd.DataFrame(fee_list)
+        df_fee.to_csv(filename, encoding='utf_8_sig', index=None)
+        print('读取费率失败次数:%d' % err_count)
+    else:
+        df_fee = pd.read_csv(filename, dtype=object)
+        # for i in np.arange(0, df.shape[0]):
+        #     try:
+        #         print(df_fee.iloc[i]['基金代码'], df_fee.iloc[i]['sub_fee'])
+        #         df_fee.iloc[i]['sub_fee'] = get_decimal(df_fee.iloc[i]['sub_fee'])
+        #     except:
+        #         df_fee.iloc[i]['sub_fee'] = 0.0
+        #
+        # df_fee.to_csv(filename_bak, encoding='utf_8_sig', index=None)
+
+    return df_fee
+
+# 计算基金rank
+def get_fund_rank(df_in, keywords, max_withdrawal, establish_year=3.0, start='1970-01-01', end='2200-01-01'):
+    fund_kpi_list = []
+
+    # 根据关键字检索
+    df_dist = query_fund_by_fundname_keyword(df_in, keywords)
+
+    df_dist = df_dist[df_dist['基金类型'].str.contains('场内') == False]
+    for i in np.arange(0, df_dist.shape[0]):
+        try:
+            print('基金编码[%s'%(df_dist.iloc[i]['基金代码']) + ']正在读取...')
+
+            # 爬取历史净值
+            df_cumulative_net_value_trend = get_fund_his_by_id(df_dist.iloc[i]['基金代码'], dir_cumulative_net_value_trend)
+            print('历史数据已加载...')
+            if (keywords != '债') or (keywords == '债' and df_cumulative_net_value_trend['日增长率'].astype(float).max(skipna=True) <= 2.0):
+                # 计算基金KPI
+                dict_kpi = calc_fund_kpi(df_dist.iloc[i]['基金代码'], df_cumulative_net_value_trend, start, end)
+                dict_kpi['code'] = df_dist.iloc[i]['基金代码']
+                dict_kpi['name'] = df_dist.iloc[i]['基金简称']
+                dict_kpi['type'] = df_dist.iloc[i]['基金类型']
+                print('KPI计算已完成...')
+
+                # 爬取费用
+                dict_kpi['scale'] = df_dist.iloc[i]['scale']
+                dict_kpi['m_fee'] = df_dist.iloc[i]['m_fee']
+                dict_kpi['c_fee'] = df_dist.iloc[i]['c_fee']
+                dict_kpi['sale_fee'] = df_dist.iloc[i]['sale_fee']
+                dict_kpi['sub_fee'] = df_dist.iloc[i]['sub_fee']
+
+                # print(dict_kpi)
+                fund_kpi_list.append(dict_kpi)
+            # time.sleep(1)
+        except Exception as e:  # 未知异常的捕获
+            print(f"异常信息:{e}")  
+            print('基金编码[%s'%(df_dist.iloc[i]['基金代码']) + ']读取失败')
+        #     # time.sleep(1)
+
+    # print(fund_kpi_list)
+    df_kpi = pd.DataFrame(fund_kpi_list)
+    print(df_kpi)
+
+    # 删除异常数据行
+    # df_kpi.dropna(subset=['withdrawal'], inplace=True)
+
+    #  对数据筛选
+    df_kpi = df_kpi[df_kpi['years'].astype(float) >= establish_year]
+    df_kpi = df_kpi[df_kpi['withdrawal'].astype(float) <= max_withdrawal]
+
+    try:
+        df_kpi['total_fee'] = df_kpi['sub_fee'].astype(float) + df_kpi['m_fee'].astype(float) + df_kpi['c_fee'].astype(float) + df_kpi['sale_fee'].astype(float)
+    except Exception as e:  # 未知异常的捕获
+        print(f"异常信息:{e}")
+        df_kpi['total_fee'] = 0
+        print('[%s'%keywords + ']计算累计费用失败，跳过!')
+
+    # 对数据排序, 'sharp', 'total_fee', 'annual_return'
+    df_kpi.sort_values(by=['sharp', 'withdrawal', 'calmar', 'total_fee'], ascending=[False, False, False, True], inplace=True)
+
+    # print(df_kpi.dtypes)
+    # df_kpi.to_csv('fund_kpi.csv')
+    df_kpi.to_excel('fund_rank-%s' % keywords + '.xlsx')
+    print(df_kpi)
+    return df_kpi
+
+# 计算基金组合预测数据
+def calc_fund_comb_kpi(fund_comb, start='1970-01-01', end='2200-01-01'):
+    df_comb = pd.DataFrame(columns=['净值日期', '复权净值', '复权净值临时', '日增长率'])
+
+    for i in np.arange(0, fund_comb.shape[0]):
+        # 爬取历史净值
+        df = get_fund_his_by_id(fund_comb.iloc[i]['code'], dir_cumulative_net_value_trend)
+
+        # df.dropna(axis=0, inplace=True)
+        # df.fillna(method='pad',axis=0, inplace=True)
+
+        # 过滤时间
+        st = df['净值日期'] >= start
+        et = df['净值日期'] <= end
+        res = st == et
+        df = df[res]
+
+        if df_comb.empty:
+            df_comb['净值日期'] = df['净值日期']
+            df_comb.set_index('净值日期', inplace=True)
+
+        df.set_index('净值日期', inplace=True)
+        # print(df)
+
+        print(fund_comb.iloc[i]['share'])
+        # if pd.isnull(df_comb.iloc[0]['复权净值']):
+        if df_comb['复权净值'].isnull().all():
+            df_comb['复权净值临时'] = 0.0
+        else:
+            df_comb.loc[:, '复权净值临时'] = df_comb.loc[:, '复权净值']
+
+        df_comb.loc[:, '复权净值'] = df.loc[:, '复权净值'].astype(float) * fund_comb.iloc[i]['share']
+        # print(type(df_comb.loc[:, '复权净值']), type(df_comb.loc[:, '复权净值临时']))
+        df_comb.loc[:, '复权净值'] = df_comb.loc[:, '复权净值'].astype(float).add(df_comb.loc[:, '复权净值临时'])
+        df_comb.fillna(method='pad', axis=0, inplace=True)
+
+    # 计算日增长率
+    df_comb['日增长率'] = (df_comb['复权净值'] - df_comb['复权净值'].shift(1))/df_comb['复权净值'].shift(1)
+
+    # print(df_comb)
+    return df_comb
+
+if __name__ == "__main__":
+    pd.set_option('display.max_rows', 1000)
+    pd.set_option('display.max_columns', 10)
+
+    # get_fund_his_by_id('000051', dir_cumulative_net_value_trend)
+
+    # 获得公募基金列表
+    df = get_all_fund_outline()
+    # df = get_all_open_fund_daily()
+    df = df[df['申购状态'] == '开放申购']
+
+    # 加载费用
+    df_fee = get_fund_fee_list(df)
+    df_fee = df_fee[['基金代码', 'scale', 'm_fee', 'c_fee', 'sale_fee', 'sub_fee']].copy()
+    # get_fund_fee('000051')
+
+    df = pd.merge(df, df_fee, on='基金代码')
+    df.set_index(df['基金代码'])
+
+    # 分红测试代码
+    # df_dividend = get_fund_dividend_by_id('002898')
+    # df_dividend = query_last_dividend_before_date(dir_cumulative_net_value_trend, '002898', '2021-01-15')
+    # print(type(df_dividend))
+    # print(df_dividend)
+
+    df_kpi_csi300 = get_fund_rank(df, '沪深300', 60.0, 5, '2018-01-01', '2022-10-30')
+    df_kpi_csi500 = get_fund_rank(df, '中证500', 50.0, 5, '2018-01-01', '2022-10-30')
+    df_kpi_gem = get_fund_rank(df, '创业板', 50.0, 5, '2018-01-01', '2022-10-30')
+    df_kpi_gold = get_fund_rank(df, '黄金', 50.0, 5, '2018-01-01', '2022-10-30')
+    df_kpi_bond = get_fund_rank(df, '债', 30.0, 5, '2018-01-01', '2022-10-30')
+    df_kpi_sp500 = get_fund_rank(df, '标普500', 50.0, 5, '2018-01-01', '2022-10-30')
+    df_kpi_nasda = get_fund_rank(df, '纳斯达克', 50.0, 5, '2018-01-01', '2022-10-30')
+    #
+    # # 行业基金
+    # get_fund_rank(df, '新能源', 50.0, 3, '2018-01-01', '2021-02-18')
+    # get_fund_rank(df, '消费', 50.0, 3, '2018-01-01', '2021-02-18')
+    # get_fund_rank(df, '商品', 50.0, 3, '2018-01-01', '2021-02-18')
+    # get_fund_rank(df, '混合', 50.0, 3, '2018-01-01', '2021-03-28')
+
+    print(f"基金组合:{df_kpi_csi300.iloc[0]['code']}:0.25,\n \
+                    {df_kpi_csi500.iloc[0]['code']}:0.15,\n \
+                    {df_kpi_gem.iloc[0]['code']}: 0.10,\n \
+                    {df_kpi_gold.iloc[0]['code']}: 0.10,\n \
+                    {df_kpi_bond.iloc[0]['code']}: 0.20],\n \
+                    {df_kpi_sp500.iloc[0]['code']}: 0.10],\n\
+                    {df_kpi_nasda.iloc[0]['code']}: 0.10]")
+    # # 组合数据
+    # fund_comb = pd.DataFrame([['673100', 0.50], ['001879', 0.15], ['000187', 0.20], ['000218', 0.15]], columns=['code', 'share'])
+    fund_comb = pd.DataFrame([
+                                [df_kpi_csi300.iloc[0]['code'], 0.25], 
+                                [df_kpi_csi500.iloc[0]['code'], 0.15], 
+                                [df_kpi_gem.iloc[0]['code'], 0.10], 
+                                [df_kpi_gold.iloc[0]['code'], 0.10], 
+                                [df_kpi_bond.iloc[0]['code'], 0.20], 
+                                [df_kpi_sp500.iloc[0]['code'], 0.10], 
+                                [df_kpi_nasda.iloc[0]['code'], 0.10]
+                                ], 
+                                columns=['code', 'share'])
+    df_comb = calc_fund_comb_kpi(fund_comb,  '2017-06-02', '2022-10-30')
+    df_comb.reset_index(inplace=True)
+    x = df_comb['净值日期']
+    y = df_comb['复权净值']
+
+    fund_hs300 = pd.DataFrame([['000512', 1.00]], columns=['code', 'share'])
+    df_hs300 = calc_fund_comb_kpi(fund_hs300,  '2017-06-02', '2022-10-30')
+    df_hs300.reset_index(inplace=True)
+    df_hs300.to_csv("参考沪深300.csv", encoding='utf_8_sig', index=None)
+
+    y_hs300 = df_hs300['复权净值']
+
+    # 绘制基金累计净值走势图
+    draw_cumulative_net_value_trend(x, y, y_hs300)
-- 
GitLab