未验证 提交 a6518e9e 编写于 作者: N NLP-LOVE 提交者: GitHub

Add files via upload

上级 12bd2097
{
"cells": [
{
"cell_type": "code",
"execution_count": 53,
"metadata": {},
"outputs": [],
"source": [
"import numpy as np\n",
"import pandas as pd\n",
"from sklearn.ensemble import GradientBoostingRegressor"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### 获取训练数据"
]
},
{
"cell_type": "code",
"execution_count": 54,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>0</th>\n",
" <th>1</th>\n",
" <th>2</th>\n",
" <th>3</th>\n",
" <th>4</th>\n",
" <th>5</th>\n",
" <th>6</th>\n",
" <th>7</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>0.005988</td>\n",
" <td>0.569231</td>\n",
" <td>0.647059</td>\n",
" <td>0.951220</td>\n",
" <td>-0.225434</td>\n",
" <td>0.837989</td>\n",
" <td>0.357258</td>\n",
" <td>-0.003058</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>0.161677</td>\n",
" <td>0.743195</td>\n",
" <td>0.682353</td>\n",
" <td>0.960976</td>\n",
" <td>-0.086705</td>\n",
" <td>0.780527</td>\n",
" <td>0.282945</td>\n",
" <td>0.149847</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>0.113772</td>\n",
" <td>0.744379</td>\n",
" <td>0.541176</td>\n",
" <td>0.990244</td>\n",
" <td>-0.005780</td>\n",
" <td>0.721468</td>\n",
" <td>0.434110</td>\n",
" <td>-0.318043</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>0.053892</td>\n",
" <td>0.608284</td>\n",
" <td>0.764706</td>\n",
" <td>0.951220</td>\n",
" <td>-0.248555</td>\n",
" <td>0.821229</td>\n",
" <td>0.848604</td>\n",
" <td>-0.003058</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>0.173653</td>\n",
" <td>0.866272</td>\n",
" <td>0.682353</td>\n",
" <td>0.951220</td>\n",
" <td>0.017341</td>\n",
" <td>0.704709</td>\n",
" <td>-0.021002</td>\n",
" <td>-0.195719</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" 0 1 2 3 4 5 6 \\\n",
"0 0.005988 0.569231 0.647059 0.951220 -0.225434 0.837989 0.357258 \n",
"1 0.161677 0.743195 0.682353 0.960976 -0.086705 0.780527 0.282945 \n",
"2 0.113772 0.744379 0.541176 0.990244 -0.005780 0.721468 0.434110 \n",
"3 0.053892 0.608284 0.764706 0.951220 -0.248555 0.821229 0.848604 \n",
"4 0.173653 0.866272 0.682353 0.951220 0.017341 0.704709 -0.021002 \n",
"\n",
" 7 \n",
"0 -0.003058 \n",
"1 0.149847 \n",
"2 -0.318043 \n",
"3 -0.003058 \n",
"4 -0.195719 "
]
},
"execution_count": 54,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"train_feature = np.genfromtxt(\"train_feat.txt\",dtype=np.float32)\n",
"num_feature = len(train_feature[0])\n",
"train_feature = pd.DataFrame(train_feature)\n",
"\n",
"train_label = train_feature.iloc[:, num_feature - 1]\n",
"train_feature = train_feature.iloc[:, 0:num_feature - 2]\n",
"train_feature"
]
},
{
"cell_type": "code",
"execution_count": 55,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"0 320.0\n",
"1 361.0\n",
"2 364.0\n",
"3 336.0\n",
"4 358.0\n",
"Name: 9, dtype: float32"
]
},
"execution_count": 55,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"train_label"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### 获取测试数据"
]
},
{
"cell_type": "code",
"execution_count": 56,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>0</th>\n",
" <th>1</th>\n",
" <th>2</th>\n",
" <th>3</th>\n",
" <th>4</th>\n",
" <th>5</th>\n",
" <th>6</th>\n",
" <th>7</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>0.005988</td>\n",
" <td>0.569231</td>\n",
" <td>0.647059</td>\n",
" <td>0.951220</td>\n",
" <td>-0.225434</td>\n",
" <td>0.837989</td>\n",
" <td>0.357258</td>\n",
" <td>-0.003058</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>0.161677</td>\n",
" <td>0.743195</td>\n",
" <td>0.682353</td>\n",
" <td>0.960976</td>\n",
" <td>-0.086705</td>\n",
" <td>0.780527</td>\n",
" <td>0.282945</td>\n",
" <td>0.149847</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>0.113772</td>\n",
" <td>0.744379</td>\n",
" <td>0.541176</td>\n",
" <td>0.990244</td>\n",
" <td>-0.005780</td>\n",
" <td>0.721468</td>\n",
" <td>0.434110</td>\n",
" <td>-0.318043</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>0.053892</td>\n",
" <td>0.608284</td>\n",
" <td>0.764706</td>\n",
" <td>0.951220</td>\n",
" <td>-0.248555</td>\n",
" <td>0.821229</td>\n",
" <td>0.848604</td>\n",
" <td>-0.003058</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>0.173653</td>\n",
" <td>0.866272</td>\n",
" <td>0.682353</td>\n",
" <td>0.951220</td>\n",
" <td>0.017341</td>\n",
" <td>0.704709</td>\n",
" <td>-0.021002</td>\n",
" <td>-0.195719</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" 0 1 2 3 4 5 6 \\\n",
"0 0.005988 0.569231 0.647059 0.951220 -0.225434 0.837989 0.357258 \n",
"1 0.161677 0.743195 0.682353 0.960976 -0.086705 0.780527 0.282945 \n",
"2 0.113772 0.744379 0.541176 0.990244 -0.005780 0.721468 0.434110 \n",
"3 0.053892 0.608284 0.764706 0.951220 -0.248555 0.821229 0.848604 \n",
"4 0.173653 0.866272 0.682353 0.951220 0.017341 0.704709 -0.021002 \n",
"\n",
" 7 \n",
"0 -0.003058 \n",
"1 0.149847 \n",
"2 -0.318043 \n",
"3 -0.003058 \n",
"4 -0.195719 "
]
},
"execution_count": 56,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"test_feature = np.genfromtxt(\"test_feat.txt\",dtype=np.float32)\n",
"num_feature = len(test_feature[0])\n",
"test_feature = pd.DataFrame(test_feature)\n",
"\n",
"test_label = test_feature.iloc[:, num_feature - 1]\n",
"test_feature = test_feature.iloc[:, 0:num_feature - 2]\n",
"test_feature"
]
},
{
"cell_type": "code",
"execution_count": 57,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"0 320.0\n",
"1 361.0\n",
"2 364.0\n",
"3 336.0\n",
"4 358.0\n",
"Name: 9, dtype: float32"
]
},
"execution_count": 57,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"test_label"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### GBDT模型建立"
]
},
{
"cell_type": "code",
"execution_count": 58,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"pred: 320.0008173984891 label: 320.0\n",
"pred: 360.99965033119537 label: 361.0\n",
"pred: 363.99928183902097 label: 364.0\n",
"pred: 336.0002344322584 label: 336.0\n",
"pred: 358.0000159974151 label: 358.0\n",
"均方误差: 0.0005218003748239915\n"
]
}
],
"source": [
"gbdt = GradientBoostingRegressor(\n",
" loss = 'ls'\n",
", learning_rate = 0.1\n",
", n_estimators = 100\n",
", subsample = 1\n",
", min_samples_split = 2\n",
", min_samples_leaf = 1\n",
", max_depth = 3\n",
", init = None\n",
", random_state = None\n",
", max_features = None\n",
", alpha = 0.9\n",
", verbose = 0\n",
", max_leaf_nodes = None\n",
", warm_start = False\n",
")\n",
"\n",
"gbdt.fit(train_feature, train_label)\n",
"pred = gbdt.predict(test_feature)\n",
"total_err = 0\n",
"\n",
"for i in range(pred.shape[0]):\n",
" print('pred:', pred[i], ' label:', test_label[i])\n",
"print('均方误差:', np.sqrt(((pred - test_label) ** 2).mean()))"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.5.4"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
0.00598802 0.569231 0.647059 0.95122 -0.225434 0.837989 0.357258 -0.0030581 -0.383475 320
0.161677 0.743195 0.682353 0.960976 -0.0867052 0.780527 0.282945 0.149847 -0.0529661 361
0.113772 0.744379 0.541176 0.990244 -0.00578035 0.721468 0.43411 -0.318043 0.288136 364
0.0538922 0.608284 0.764706 0.95122 -0.248555 0.821229 0.848604 -0.0030581 0.239407 336
0.173653 0.866272 0.682353 0.95122 0.017341 0.704709 -0.0210016 -0.195719 0.150424 358
\ No newline at end of file
0.00598802 0.569231 0.647059 0.95122 -0.225434 0.837989 0.357258 -0.0030581 -0.383475 320
0.161677 0.743195 0.682353 0.960976 -0.0867052 0.780527 0.282945 0.149847 -0.0529661 361
0.113772 0.744379 0.541176 0.990244 -0.00578035 0.721468 0.43411 -0.318043 0.288136 364
0.0538922 0.608284 0.764706 0.95122 -0.248555 0.821229 0.848604 -0.0030581 0.239407 336
0.173653 0.866272 0.682353 0.95122 0.017341 0.704709 -0.0210016 -0.195719 0.150424 358
\ No newline at end of file
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册