From a6518e9e03544bc9d4a387b5190127cc5e9cde87 Mon Sep 17 00:00:00 2001 From: NLP-LOVE <52538954+NLP-LOVE@users.noreply.github.com> Date: Tue, 9 Jul 2019 20:32:09 +0800 Subject: [PATCH] Add files via upload --- Machine Learning/3.2 GBDT/GBDT_demo.ipynb | 413 ++++++++++++++++++++++ Machine Learning/3.2 GBDT/test_feat.txt | 5 + Machine Learning/3.2 GBDT/train_feat.txt | 5 + 3 files changed, 423 insertions(+) create mode 100644 Machine Learning/3.2 GBDT/GBDT_demo.ipynb create mode 100644 Machine Learning/3.2 GBDT/test_feat.txt create mode 100644 Machine Learning/3.2 GBDT/train_feat.txt diff --git a/Machine Learning/3.2 GBDT/GBDT_demo.ipynb b/Machine Learning/3.2 GBDT/GBDT_demo.ipynb new file mode 100644 index 0000000..c7c9c8f --- /dev/null +++ b/Machine Learning/3.2 GBDT/GBDT_demo.ipynb @@ -0,0 +1,413 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 53, + "metadata": {}, + "outputs": [], + "source": [ + "import numpy as np\n", + "import pandas as pd\n", + "from sklearn.ensemble import GradientBoostingRegressor" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 获取训练数据" + ] + }, + { + "cell_type": "code", + "execution_count": 54, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
01234567
00.0059880.5692310.6470590.951220-0.2254340.8379890.357258-0.003058
10.1616770.7431950.6823530.960976-0.0867050.7805270.2829450.149847
20.1137720.7443790.5411760.990244-0.0057800.7214680.434110-0.318043
30.0538920.6082840.7647060.951220-0.2485550.8212290.848604-0.003058
40.1736530.8662720.6823530.9512200.0173410.704709-0.021002-0.195719
\n", + "
" + ], + "text/plain": [ + " 0 1 2 3 4 5 6 \\\n", + "0 0.005988 0.569231 0.647059 0.951220 -0.225434 0.837989 0.357258 \n", + "1 0.161677 0.743195 0.682353 0.960976 -0.086705 0.780527 0.282945 \n", + "2 0.113772 0.744379 0.541176 0.990244 -0.005780 0.721468 0.434110 \n", + "3 0.053892 0.608284 0.764706 0.951220 -0.248555 0.821229 0.848604 \n", + "4 0.173653 0.866272 0.682353 0.951220 0.017341 0.704709 -0.021002 \n", + "\n", + " 7 \n", + "0 -0.003058 \n", + "1 0.149847 \n", + "2 -0.318043 \n", + "3 -0.003058 \n", + "4 -0.195719 " + ] + }, + "execution_count": 54, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "train_feature = np.genfromtxt(\"train_feat.txt\",dtype=np.float32)\n", + "num_feature = len(train_feature[0])\n", + "train_feature = pd.DataFrame(train_feature)\n", + "\n", + "train_label = train_feature.iloc[:, num_feature - 1]\n", + "train_feature = train_feature.iloc[:, 0:num_feature - 2]\n", + "train_feature" + ] + }, + { + "cell_type": "code", + "execution_count": 55, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0 320.0\n", + "1 361.0\n", + "2 364.0\n", + "3 336.0\n", + "4 358.0\n", + "Name: 9, dtype: float32" + ] + }, + "execution_count": 55, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "train_label" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 获取测试数据" + ] + }, + { + "cell_type": "code", + "execution_count": 56, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
01234567
00.0059880.5692310.6470590.951220-0.2254340.8379890.357258-0.003058
10.1616770.7431950.6823530.960976-0.0867050.7805270.2829450.149847
20.1137720.7443790.5411760.990244-0.0057800.7214680.434110-0.318043
30.0538920.6082840.7647060.951220-0.2485550.8212290.848604-0.003058
40.1736530.8662720.6823530.9512200.0173410.704709-0.021002-0.195719
\n", + "
" + ], + "text/plain": [ + " 0 1 2 3 4 5 6 \\\n", + "0 0.005988 0.569231 0.647059 0.951220 -0.225434 0.837989 0.357258 \n", + "1 0.161677 0.743195 0.682353 0.960976 -0.086705 0.780527 0.282945 \n", + "2 0.113772 0.744379 0.541176 0.990244 -0.005780 0.721468 0.434110 \n", + "3 0.053892 0.608284 0.764706 0.951220 -0.248555 0.821229 0.848604 \n", + "4 0.173653 0.866272 0.682353 0.951220 0.017341 0.704709 -0.021002 \n", + "\n", + " 7 \n", + "0 -0.003058 \n", + "1 0.149847 \n", + "2 -0.318043 \n", + "3 -0.003058 \n", + "4 -0.195719 " + ] + }, + "execution_count": 56, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "test_feature = np.genfromtxt(\"test_feat.txt\",dtype=np.float32)\n", + "num_feature = len(test_feature[0])\n", + "test_feature = pd.DataFrame(test_feature)\n", + "\n", + "test_label = test_feature.iloc[:, num_feature - 1]\n", + "test_feature = test_feature.iloc[:, 0:num_feature - 2]\n", + "test_feature" + ] + }, + { + "cell_type": "code", + "execution_count": 57, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0 320.0\n", + "1 361.0\n", + "2 364.0\n", + "3 336.0\n", + "4 358.0\n", + "Name: 9, dtype: float32" + ] + }, + "execution_count": 57, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "test_label" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### GBDT模型建立" + ] + }, + { + "cell_type": "code", + "execution_count": 58, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "pred: 320.0008173984891 label: 320.0\n", + "pred: 360.99965033119537 label: 361.0\n", + "pred: 363.99928183902097 label: 364.0\n", + "pred: 336.0002344322584 label: 336.0\n", + "pred: 358.0000159974151 label: 358.0\n", + "均方误差: 0.0005218003748239915\n" + ] + } + ], + "source": [ + "gbdt = GradientBoostingRegressor(\n", + " loss = 'ls'\n", + ", learning_rate = 0.1\n", + ", n_estimators = 100\n", + ", subsample = 1\n", + ", min_samples_split = 2\n", + ", min_samples_leaf = 1\n", + ", max_depth = 3\n", + ", init = None\n", + ", random_state = None\n", + ", max_features = None\n", + ", alpha = 0.9\n", + ", verbose = 0\n", + ", max_leaf_nodes = None\n", + ", warm_start = False\n", + ")\n", + "\n", + "gbdt.fit(train_feature, train_label)\n", + "pred = gbdt.predict(test_feature)\n", + "total_err = 0\n", + "\n", + "for i in range(pred.shape[0]):\n", + " print('pred:', pred[i], ' label:', test_label[i])\n", + "print('均方误差:', np.sqrt(((pred - test_label) ** 2).mean()))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.5.4" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/Machine Learning/3.2 GBDT/test_feat.txt b/Machine Learning/3.2 GBDT/test_feat.txt new file mode 100644 index 0000000..7d5cd20 --- /dev/null +++ b/Machine Learning/3.2 GBDT/test_feat.txt @@ -0,0 +1,5 @@ +0.00598802 0.569231 0.647059 0.95122 -0.225434 0.837989 0.357258 -0.0030581 -0.383475 320 +0.161677 0.743195 0.682353 0.960976 -0.0867052 0.780527 0.282945 0.149847 -0.0529661 361 +0.113772 0.744379 0.541176 0.990244 -0.00578035 0.721468 0.43411 -0.318043 0.288136 364 +0.0538922 0.608284 0.764706 0.95122 -0.248555 0.821229 0.848604 -0.0030581 0.239407 336 +0.173653 0.866272 0.682353 0.95122 0.017341 0.704709 -0.0210016 -0.195719 0.150424 358 \ No newline at end of file diff --git a/Machine Learning/3.2 GBDT/train_feat.txt b/Machine Learning/3.2 GBDT/train_feat.txt new file mode 100644 index 0000000..7d5cd20 --- /dev/null +++ b/Machine Learning/3.2 GBDT/train_feat.txt @@ -0,0 +1,5 @@ +0.00598802 0.569231 0.647059 0.95122 -0.225434 0.837989 0.357258 -0.0030581 -0.383475 320 +0.161677 0.743195 0.682353 0.960976 -0.0867052 0.780527 0.282945 0.149847 -0.0529661 361 +0.113772 0.744379 0.541176 0.990244 -0.00578035 0.721468 0.43411 -0.318043 0.288136 364 +0.0538922 0.608284 0.764706 0.95122 -0.248555 0.821229 0.848604 -0.0030581 0.239407 336 +0.173653 0.866272 0.682353 0.95122 0.017341 0.704709 -0.0210016 -0.195719 0.150424 358 \ No newline at end of file -- GitLab