Auto commit

上级 b9eea59a
...@@ -9,7 +9,7 @@ ...@@ -9,7 +9,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 3, "execution_count": 5,
"metadata": {}, "metadata": {},
"outputs": [ "outputs": [
{ {
...@@ -17,30 +17,15 @@ ...@@ -17,30 +17,15 @@
"output_type": "stream", "output_type": "stream",
"text": [ "text": [
"Looking in indexes: http://mirrors.csdn.net.cn/repository/csdn-pypi-mirrors/simple\n", "Looking in indexes: http://mirrors.csdn.net.cn/repository/csdn-pypi-mirrors/simple\n",
"Collecting pandas\n", "Requirement already satisfied: pandas in /usr/local/lib/python3.8/dist-packages (2.0.1)\n",
" Downloading http://mirrors.csdn.net.cn/repository/csdn-pypi-mirrors/packages/pandas/2.0.1/pandas-2.0.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (12.3 MB)\n", "Requirement already satisfied: openpyxl in /usr/local/lib/python3.8/dist-packages (3.1.2)\n",
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m12.3/12.3 MB\u001b[0m \u001b[31m284.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0ma \u001b[36m0:00:01\u001b[0m\n", "Requirement already satisfied: xlrd==1.2.0 in /usr/local/lib/python3.8/dist-packages (1.2.0)\n",
"\u001b[?25hCollecting openpyxl\n", "Requirement already satisfied: python-dateutil>=2.8.2 in /root/.local/lib/python3.8/site-packages (from pandas) (2.8.2)\n",
" Downloading http://mirrors.csdn.net.cn/repository/csdn-pypi-mirrors/packages/openpyxl/3.1.2/openpyxl-3.1.2-py2.py3-none-any.whl (249 kB)\n", "Requirement already satisfied: numpy>=1.20.3 in /usr/local/lib/python3.8/dist-packages (from pandas) (1.24.3)\n",
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m250.0/250.0 kB\u001b[0m \u001b[31m189.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "Requirement already satisfied: pytz>=2020.1 in /usr/local/lib/python3.8/dist-packages (from pandas) (2023.3)\n",
"\u001b[?25hCollecting xlrd==1.2.0\n", "Requirement already satisfied: tzdata>=2022.1 in /usr/local/lib/python3.8/dist-packages (from pandas) (2023.3)\n",
" Downloading http://mirrors.csdn.net.cn/repository/csdn-pypi-mirrors/packages/xlrd/1.2.0/xlrd-1.2.0-py2.py3-none-any.whl (103 kB)\n", "Requirement already satisfied: et-xmlfile in /usr/local/lib/python3.8/dist-packages (from openpyxl) (1.1.0)\n",
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m103.3/103.3 kB\u001b[0m \u001b[31m137.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "Requirement already satisfied: six>=1.5 in /root/.local/lib/python3.8/site-packages (from python-dateutil>=2.8.2->pandas) (1.16.0)\n",
"\u001b[?25hCollecting tzdata>=2022.1\n",
" Downloading http://mirrors.csdn.net.cn/repository/csdn-pypi-mirrors/packages/tzdata/2023.3/tzdata-2023.3-py2.py3-none-any.whl (341 kB)\n",
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m341.8/341.8 kB\u001b[0m \u001b[31m250.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
"\u001b[?25hCollecting numpy>=1.20.3\n",
" Downloading http://mirrors.csdn.net.cn/repository/csdn-pypi-mirrors/packages/numpy/1.24.3/numpy-1.24.3-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (17.3 MB)\n",
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m17.3/17.3 MB\u001b[0m \u001b[31m236.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m00:01\u001b[0m00:01\u001b[0m\n",
"\u001b[?25hCollecting pytz>=2020.1\n",
" Downloading http://mirrors.csdn.net.cn/repository/csdn-pypi-mirrors/packages/pytz/2023.3/pytz-2023.3-py2.py3-none-any.whl (502 kB)\n",
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m502.3/502.3 kB\u001b[0m \u001b[31m231.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
"\u001b[?25hRequirement already satisfied: python-dateutil>=2.8.2 in /usr/local/lib/python3.8/dist-packages (from pandas) (2.8.2)\n",
"Collecting et-xmlfile\n",
" Downloading http://mirrors.csdn.net.cn/repository/csdn-pypi-mirrors/packages/et-xmlfile/1.1.0/et_xmlfile-1.1.0-py3-none-any.whl (4.7 kB)\n",
"Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.8/dist-packages (from python-dateutil>=2.8.2->pandas) (1.16.0)\n",
"Installing collected packages: pytz, xlrd, tzdata, numpy, et-xmlfile, pandas, openpyxl\n",
"Successfully installed et-xmlfile-1.1.0 numpy-1.24.3 openpyxl-3.1.2 pandas-2.0.1 pytz-2023.3 tzdata-2023.3 xlrd-1.2.0\n",
"\u001b[33mWARNING: Running pip as the 'root' user can result in broken permissions and conflicting behaviour with the system package manager. It is recommended to use a virtual environment instead: https://pip.pypa.io/warnings/venv\u001b[0m\u001b[33m\n", "\u001b[33mWARNING: Running pip as the 'root' user can result in broken permissions and conflicting behaviour with the system package manager. It is recommended to use a virtual environment instead: https://pip.pypa.io/warnings/venv\u001b[0m\u001b[33m\n",
"\u001b[0m\n", "\u001b[0m\n",
"\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip available: \u001b[0m\u001b[31;49m22.2.2\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m23.1.2\u001b[0m\n", "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip available: \u001b[0m\u001b[31;49m22.2.2\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m23.1.2\u001b[0m\n",
...@@ -55,20 +40,20 @@ ...@@ -55,20 +40,20 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 4, "execution_count": 6,
"metadata": {}, "metadata": {},
"outputs": [ "outputs": [
{ {
"ename": "TypeError", "data": {
"evalue": "only integer scalar arrays can be converted to a scalar index", "text/plain": [
"output_type": "error", "{'outlier_values': array([ 51. , 6607.4, 865. ]),\n",
"traceback": [ " 'outlier_index': array([ 0, 8, 15]),\n",
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", " 'outlier_ratio': 0.15789473684210525}"
"\u001b[0;31mTypeError\u001b[0m Traceback (most recent call last)", ]
"Cell \u001b[0;32mIn[4], line 24\u001b[0m\n\u001b[1;32m 18\u001b[0m result \u001b[39m=\u001b[39m {\n\u001b[1;32m 19\u001b[0m \u001b[39m'\u001b[39m\u001b[39moutlier_values\u001b[39m\u001b[39m'\u001b[39m: outliers,\n\u001b[1;32m 20\u001b[0m \u001b[39m'\u001b[39m\u001b[39moutlier_index\u001b[39m\u001b[39m'\u001b[39m: outlier_index,\n\u001b[1;32m 21\u001b[0m \u001b[39m'\u001b[39m\u001b[39moutlier_ratio\u001b[39m\u001b[39m'\u001b[39m: outlier_ratio\n\u001b[1;32m 22\u001b[0m }\n\u001b[1;32m 23\u001b[0m \u001b[39mreturn\u001b[39;00m result\n\u001b[0;32m---> 24\u001b[0m detect_outliers([\u001b[39m51\u001b[39;49m, \u001b[39m2618.2\u001b[39;49m, \u001b[39m2608.4\u001b[39;49m, \u001b[39m2651.9\u001b[39;49m, \u001b[39m3442.1\u001b[39;49m, \u001b[39m3393.1\u001b[39;49m, \u001b[39m3136.1\u001b[39;49m, \u001b[39m3744.1\u001b[39;49m,\n\u001b[1;32m 25\u001b[0m \u001b[39m6607.4\u001b[39;49m, \u001b[39m4060.3\u001b[39;49m, \u001b[39m3614.7\u001b[39;49m, \u001b[39m3295.5\u001b[39;49m, \u001b[39m2332.1\u001b[39;49m, \u001b[39m2699.3\u001b[39;49m, \u001b[39m3036.8\u001b[39;49m,\n\u001b[1;32m 26\u001b[0m \u001b[39m865\u001b[39;49m, \u001b[39m3014.3\u001b[39;49m, \u001b[39m2742.8\u001b[39;49m, \u001b[39m2173.5\u001b[39;49m])\n", },
"Cell \u001b[0;32mIn[4], line 11\u001b[0m, in \u001b[0;36mdetect_outliers\u001b[0;34m(data)\u001b[0m\n\u001b[1;32m 8\u001b[0m lower_bound \u001b[39m=\u001b[39m q1 \u001b[39m-\u001b[39m \u001b[39m1.5\u001b[39m \u001b[39m*\u001b[39m iqr \u001b[39m# 计算下界\u001b[39;00m\n\u001b[1;32m 10\u001b[0m \u001b[39m# 判断异常值,大于上界或小于下界的值即为异常值\u001b[39;00m\n\u001b[0;32m---> 11\u001b[0m outliers \u001b[39m=\u001b[39m data[(data \u001b[39m>\u001b[39;49m upper_bound) \u001b[39m|\u001b[39;49m (data \u001b[39m<\u001b[39;49m lower_bound)]\n\u001b[1;32m 12\u001b[0m outlier_index \u001b[39m=\u001b[39m np\u001b[39m.\u001b[39mwhere((data \u001b[39m>\u001b[39m upper_bound) \u001b[39m|\u001b[39m (data \u001b[39m<\u001b[39m lower_bound))[\u001b[39m0\u001b[39m]\n\u001b[1;32m 14\u001b[0m \u001b[39m# 计算异常值比例\u001b[39;00m\n", "execution_count": 6,
"\u001b[0;31mTypeError\u001b[0m: only integer scalar arrays can be converted to a scalar index" "metadata": {},
] "output_type": "execute_result"
} }
], ],
"source": [ "source": [
...@@ -76,6 +61,7 @@ ...@@ -76,6 +61,7 @@
"\n", "\n",
"def detect_outliers(data):\n", "def detect_outliers(data):\n",
" # 使用numpy中的percentile函数计算百分位数\n", " # 使用numpy中的percentile函数计算百分位数\n",
" data=np.array(data)\n",
" q1, q3 = np.percentile(data, [25, 75])\n", " q1, q3 = np.percentile(data, [25, 75])\n",
" iqr = q3 - q1 # 计算四分位距\n", " iqr = q3 - q1 # 计算四分位距\n",
" upper_bound = q3 + 1.5 * iqr # 计算上界\n", " upper_bound = q3 + 1.5 * iqr # 计算上界\n",
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册