UPDATE

上级 e6376ef1
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# 异常值检测"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Looking in indexes: http://mirrors.csdn.net.cn/repository/csdn-pypi-mirrors/simple\n",
"Requirement already satisfied: pandas in /usr/local/lib/python3.8/dist-packages (2.0.1)\n",
"Requirement already satisfied: openpyxl in /usr/local/lib/python3.8/dist-packages (3.1.2)\n",
"Requirement already satisfied: xlrd==1.2.0 in /usr/local/lib/python3.8/dist-packages (1.2.0)\n",
"Requirement already satisfied: python-dateutil>=2.8.2 in /root/.local/lib/python3.8/site-packages (from pandas) (2.8.2)\n",
"Requirement already satisfied: numpy>=1.20.3 in /usr/local/lib/python3.8/dist-packages (from pandas) (1.24.3)\n",
"Requirement already satisfied: pytz>=2020.1 in /usr/local/lib/python3.8/dist-packages (from pandas) (2023.3)\n",
"Requirement already satisfied: tzdata>=2022.1 in /usr/local/lib/python3.8/dist-packages (from pandas) (2023.3)\n",
"Requirement already satisfied: et-xmlfile in /usr/local/lib/python3.8/dist-packages (from openpyxl) (1.1.0)\n",
"Requirement already satisfied: six>=1.5 in /root/.local/lib/python3.8/site-packages (from python-dateutil>=2.8.2->pandas) (1.16.0)\n",
"\u001b[33mWARNING: Running pip as the 'root' user can result in broken permissions and conflicting behaviour with the system package manager. It is recommended to use a virtual environment instead: https://pip.pypa.io/warnings/venv\u001b[0m\u001b[33m\n",
"\u001b[0m\n",
"\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip available: \u001b[0m\u001b[31;49m22.2.2\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m23.1.2\u001b[0m\n",
"\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpip install --upgrade pip\u001b[0m\n",
"Note: you may need to restart the kernel to use updated packages.\n"
]
}
],
"source": [
"pip install pandas openpyxl xlrd==1.2.0"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"{'outlier_values': array([ 51. , 6607.4, 865. ]),\n",
" 'outlier_index': array([ 0, 8, 15]),\n",
" 'outlier_ratio': 0.15789473684210525}"
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"import numpy as np\n",
"\n",
"def detect_outliers(data):\n",
" # 使用numpy中的percentile函数计算百分位数\n",
" data=np.array(data)\n",
" q1, q3 = np.percentile(data, [25, 75])\n",
" iqr = q3 - q1 # 计算四分位距\n",
" upper_bound = q3 + 1.5 * iqr # 计算上界\n",
" lower_bound = q1 - 1.5 * iqr # 计算下界\n",
"\n",
" # 判断异常值,大于上界或小于下界的值即为异常值\n",
" outliers = data[(data > upper_bound) | (data < lower_bound)]\n",
" outlier_index = np.where((data > upper_bound) | (data < lower_bound))[0]\n",
"\n",
" # 计算异常值比例\n",
" outlier_ratio = len(outliers) / len(data)\n",
"\n",
" # 将结果存储到字典中\n",
" result = {\n",
" 'outlier_values': outliers,\n",
" 'outlier_index': outlier_index,\n",
" 'outlier_ratio': outlier_ratio\n",
" }\n",
" return result\n",
"detect_outliers([51, 2618.2, 2608.4, 2651.9, 3442.1, 3393.1, 3136.1, 3744.1,\n",
" 6607.4, 4060.3, 3614.7, 3295.5, 2332.1, 2699.3, 3036.8,\n",
" 865, 3014.3, 2742.8, 2173.5])"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.10"
},
"orig_nbformat": 4,
"vscode": {
"interpreter": {
"hash": "916dbcbb3f70747c44a77c7bcd40155683ae19c65e1c03b4aa3499c5328201f1"
}
}
},
"nbformat": 4,
"nbformat_minor": 2
}
此差异已折叠。
此差异已折叠。
进行分析的季节,分析时的年龄,是否患有幼稚病,事故或严重创伤,手术干预,去年高烧,饮酒频率,吸烟习惯,每天花费在坐的小时数,诊断结果
-0.33,0.69,0,1,1,0,0.8,0,0.88,0
-0.33,0.94,1,0,1,0,0.8,1,0.31,1
-0.33,0.5,1,0,0,0,1,-1,0.5,0
-0.33,0.75,0,1,1,0,1,-1,0.38,0
-0.33,0.67,1,1,0,0,0.8,-1,0.5,1
-0.33,0.67,1,0,1,0,0.8,0,0.5,0
-0.33,0.67,0,0,0,-1,0.8,-1,0.44,0
-0.33,1,1,1,1,0,0.6,-1,0.38,0
1,0.64,0,0,1,0,0.8,-1,0.25,0
1,0.61,1,0,0,0,1,-1,0.25,0
1,0.67,1,1,0,-1,0.8,0,0.31,0
1,0.78,1,1,1,0,0.6,0,0.13,0
1,0.75,1,1,1,0,0.8,1,0.25,0
1,0.81,1,0,0,0,1,-1,0.38,0
1,0.94,1,1,1,0,0.2,-1,0.25,0
1,0.81,1,1,0,0,1,1,0.5,0
1,0.64,1,0,1,0,1,-1,0.38,0
1,0.69,1,0,1,0,0.8,-1,0.25,1
1,0.75,1,1,1,0,1,1,0.25,0
1,0.67,1,0,0,0,0.8,1,0.38,1
1,0.67,0,0,1,0,0.8,-1,0.25,0
1,0.75,1,0,0,0,0.6,0,0.25,0
1,0.67,1,1,0,0,0.8,-1,0.25,0
1,0.69,1,0,1,-1,1,-1,0.44,1
1,0.56,1,0,1,0,1,-1,0.63,0
1,0.67,1,0,0,0,1,-1,0.25,0
1,0.67,1,0,1,0,0.6,-1,0.38,1
1,0.78,1,1,0,1,0.6,-1,0.38,1
1,0.58,0,0,1,0,1,-1,0.19,0
1,0.67,0,0,1,0,0.6,0,0.5,1
1,0.61,1,0,1,0,1,-1,0.63,0
1,0.56,1,0,0,0,1,-1,0.44,0
1,0.64,0,0,0,0,1,-1,0.63,0
1,0.58,1,1,1,0,0.8,0,0.44,0
1,0.56,1,1,1,0,1,-1,0.63,0
-1,0.78,1,1,0,1,0.6,-1,0.38,0
-1,0.78,1,0,1,0,1,-1,0.25,0
-1,0.56,1,0,1,0,1,-1,0.63,0
-1,0.67,0,0,1,0,0.6,0,0.5,1
-1,0.69,1,0,0,0,1,-1,0.31,0
-1,0.53,1,1,1,0,0.8,1,0.5,0
-1,0.56,1,1,0,0,0.8,1,0.5,0
-1,0.58,1,0,1,-1,0.8,1,0.5,0
-1,0.56,1,0,0,0,1,-1,0.44,0
-1,0.53,1,1,0,1,1,0,0.31,0
-1,0.53,1,0,0,1,1,0,0.44,0
-0.33,0.56,1,0,0,0,1,-1,0.63,0
-0.33,0.72,1,1,0,0,0.6,1,0.19,0
-0.33,0.64,1,1,1,0,0.8,-1,0.31,0
-0.33,0.75,1,1,1,0,0.6,-1,0.19,0
-0.33,0.67,1,0,1,0,0.8,-1,0.19,0
-0.33,0.53,1,1,0,1,1,-1,0.75,0
-0.33,0.53,1,1,0,0,0.8,0,0.5,0
-0.33,0.58,1,1,1,-1,0.8,0,0.19,0
-0.33,0.61,1,0,1,0,1,-1,0.63,0
-0.33,0.58,1,0,1,0,0.8,1,0.19,0
-0.33,0.53,1,1,0,0,0.8,0,0.75,0
-0.33,0.69,1,1,1,-1,1,-1,0.75,0
-0.33,0.56,1,1,0,0,0.4,1,0.63,0
1,0.58,0,0,0,1,0.8,1,0.44,0
1,0.56,0,0,0,1,0.8,0,1,0
-1,0.64,1,0,0,1,1,1,0.25,0
-1,0.61,1,1,1,0,0.6,-1,0.38,0
-1,0.56,1,0,0,1,1,-1,0.5,0
-1,0.53,1,0,0,1,0.8,-1,0.31,0
-0.33,0.56,0,0,1,0,1,-1,0.56,0
-0.33,0.5,1,1,0,-1,0.8,0,0.88,0
-0.33,0.5,1,0,0,1,1,-1,0.47,0
-0.33,0.5,1,0,0,1,0.8,0,0.31,0
-0.33,0.5,1,0,1,-1,0.8,-1,0.5,0
-0.33,0.5,1,1,0,-1,0.8,0,0.88,1
0.33,0.69,1,0,0,1,1,-1,0.31,0
1,0.56,1,0,0,1,0.6,0,0.5,0
-1,0.5,1,0,0,1,0.8,-1,0.44,0
-1,0.53,1,0,0,1,0.8,-1,0.63,0
-1,0.78,1,0,1,1,1,1,0.25,0
-1,0.75,1,0,1,1,0.6,0,0.56,0
-1,0.72,1,1,1,1,0.8,-1,0.19,0
-1,0.53,1,1,0,1,0.8,-1,0.38,0
-1,1,1,0,1,1,0.6,0,0.25,0
-0.33,0.92,1,1,0,1,1,-1,0.63,0
-1,0.81,1,1,1,1,0.8,0,0.19,0
-0.33,0.92,1,0,0,1,0.6,-1,0.19,0
-0.33,0.86,1,1,1,1,1,-1,0.25,0
-0.33,0.78,1,0,0,1,1,1,0.06,1
-0.33,0.89,1,1,0,0,0.6,1,0.31,0
-0.33,0.75,1,1,1,0,0.6,1,0.25,0
-0.33,0.75,1,1,1,1,0.8,1,0.25,0
-0.33,0.83,1,1,1,0,1,-1,0.31,0
-0.33,0.81,1,1,1,0,1,1,0.38,0
-0.33,0.81,1,1,1,1,0.8,-1,0.38,0
0.33,0.78,1,0,0,0,1,1,0.06,0
0.33,0.75,1,1,0,0,0.8,-1,0.38,0
0.33,0.75,1,0,1,0,0.8,-1,0.44,1
1,0.58,1,0,0,0,0.6,1,0.5,0
-1,0.67,1,0,0,0,1,-1,0.5,0
-1,0.61,1,0,0,0,0.8,0,0.5,0
-1,0.67,1,1,1,0,1,-1,0.31,0
-1,0.64,1,0,1,0,1,0,0.19,0
-1,0.69,0,1,1,0,0.6,-1,0.19,0
此差异已折叠。
"","height","weight"
"1",58,115
"2",59,117
"3",60,120
"4",61,123
"5",62,126
"6",63,129
"7",64,132
"8",65,135
"9",66,139
"10",67,142
"11",68,146
"12",69,150
"13",70,154
"14",71,159
"15",72,164
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册