From a6518e9e03544bc9d4a387b5190127cc5e9cde87 Mon Sep 17 00:00:00 2001
From: NLP-LOVE <52538954+NLP-LOVE@users.noreply.github.com>
Date: Tue, 9 Jul 2019 20:32:09 +0800
Subject: [PATCH] Add files via upload
---
Machine Learning/3.2 GBDT/GBDT_demo.ipynb | 413 ++++++++++++++++++++++
Machine Learning/3.2 GBDT/test_feat.txt | 5 +
Machine Learning/3.2 GBDT/train_feat.txt | 5 +
3 files changed, 423 insertions(+)
create mode 100644 Machine Learning/3.2 GBDT/GBDT_demo.ipynb
create mode 100644 Machine Learning/3.2 GBDT/test_feat.txt
create mode 100644 Machine Learning/3.2 GBDT/train_feat.txt
diff --git a/Machine Learning/3.2 GBDT/GBDT_demo.ipynb b/Machine Learning/3.2 GBDT/GBDT_demo.ipynb
new file mode 100644
index 0000000..c7c9c8f
--- /dev/null
+++ b/Machine Learning/3.2 GBDT/GBDT_demo.ipynb
@@ -0,0 +1,413 @@
+{
+ "cells": [
+ {
+ "cell_type": "code",
+ "execution_count": 53,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import numpy as np\n",
+ "import pandas as pd\n",
+ "from sklearn.ensemble import GradientBoostingRegressor"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### 获取训练数据"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 54,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "
\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " 0 | \n",
+ " 1 | \n",
+ " 2 | \n",
+ " 3 | \n",
+ " 4 | \n",
+ " 5 | \n",
+ " 6 | \n",
+ " 7 | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " 0.005988 | \n",
+ " 0.569231 | \n",
+ " 0.647059 | \n",
+ " 0.951220 | \n",
+ " -0.225434 | \n",
+ " 0.837989 | \n",
+ " 0.357258 | \n",
+ " -0.003058 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " 0.161677 | \n",
+ " 0.743195 | \n",
+ " 0.682353 | \n",
+ " 0.960976 | \n",
+ " -0.086705 | \n",
+ " 0.780527 | \n",
+ " 0.282945 | \n",
+ " 0.149847 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " 0.113772 | \n",
+ " 0.744379 | \n",
+ " 0.541176 | \n",
+ " 0.990244 | \n",
+ " -0.005780 | \n",
+ " 0.721468 | \n",
+ " 0.434110 | \n",
+ " -0.318043 | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " 0.053892 | \n",
+ " 0.608284 | \n",
+ " 0.764706 | \n",
+ " 0.951220 | \n",
+ " -0.248555 | \n",
+ " 0.821229 | \n",
+ " 0.848604 | \n",
+ " -0.003058 | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " 0.173653 | \n",
+ " 0.866272 | \n",
+ " 0.682353 | \n",
+ " 0.951220 | \n",
+ " 0.017341 | \n",
+ " 0.704709 | \n",
+ " -0.021002 | \n",
+ " -0.195719 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " 0 1 2 3 4 5 6 \\\n",
+ "0 0.005988 0.569231 0.647059 0.951220 -0.225434 0.837989 0.357258 \n",
+ "1 0.161677 0.743195 0.682353 0.960976 -0.086705 0.780527 0.282945 \n",
+ "2 0.113772 0.744379 0.541176 0.990244 -0.005780 0.721468 0.434110 \n",
+ "3 0.053892 0.608284 0.764706 0.951220 -0.248555 0.821229 0.848604 \n",
+ "4 0.173653 0.866272 0.682353 0.951220 0.017341 0.704709 -0.021002 \n",
+ "\n",
+ " 7 \n",
+ "0 -0.003058 \n",
+ "1 0.149847 \n",
+ "2 -0.318043 \n",
+ "3 -0.003058 \n",
+ "4 -0.195719 "
+ ]
+ },
+ "execution_count": 54,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "train_feature = np.genfromtxt(\"train_feat.txt\",dtype=np.float32)\n",
+ "num_feature = len(train_feature[0])\n",
+ "train_feature = pd.DataFrame(train_feature)\n",
+ "\n",
+ "train_label = train_feature.iloc[:, num_feature - 1]\n",
+ "train_feature = train_feature.iloc[:, 0:num_feature - 2]\n",
+ "train_feature"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 55,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "0 320.0\n",
+ "1 361.0\n",
+ "2 364.0\n",
+ "3 336.0\n",
+ "4 358.0\n",
+ "Name: 9, dtype: float32"
+ ]
+ },
+ "execution_count": 55,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "train_label"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### 获取测试数据"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 56,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " 0 | \n",
+ " 1 | \n",
+ " 2 | \n",
+ " 3 | \n",
+ " 4 | \n",
+ " 5 | \n",
+ " 6 | \n",
+ " 7 | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " 0.005988 | \n",
+ " 0.569231 | \n",
+ " 0.647059 | \n",
+ " 0.951220 | \n",
+ " -0.225434 | \n",
+ " 0.837989 | \n",
+ " 0.357258 | \n",
+ " -0.003058 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " 0.161677 | \n",
+ " 0.743195 | \n",
+ " 0.682353 | \n",
+ " 0.960976 | \n",
+ " -0.086705 | \n",
+ " 0.780527 | \n",
+ " 0.282945 | \n",
+ " 0.149847 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " 0.113772 | \n",
+ " 0.744379 | \n",
+ " 0.541176 | \n",
+ " 0.990244 | \n",
+ " -0.005780 | \n",
+ " 0.721468 | \n",
+ " 0.434110 | \n",
+ " -0.318043 | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " 0.053892 | \n",
+ " 0.608284 | \n",
+ " 0.764706 | \n",
+ " 0.951220 | \n",
+ " -0.248555 | \n",
+ " 0.821229 | \n",
+ " 0.848604 | \n",
+ " -0.003058 | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " 0.173653 | \n",
+ " 0.866272 | \n",
+ " 0.682353 | \n",
+ " 0.951220 | \n",
+ " 0.017341 | \n",
+ " 0.704709 | \n",
+ " -0.021002 | \n",
+ " -0.195719 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " 0 1 2 3 4 5 6 \\\n",
+ "0 0.005988 0.569231 0.647059 0.951220 -0.225434 0.837989 0.357258 \n",
+ "1 0.161677 0.743195 0.682353 0.960976 -0.086705 0.780527 0.282945 \n",
+ "2 0.113772 0.744379 0.541176 0.990244 -0.005780 0.721468 0.434110 \n",
+ "3 0.053892 0.608284 0.764706 0.951220 -0.248555 0.821229 0.848604 \n",
+ "4 0.173653 0.866272 0.682353 0.951220 0.017341 0.704709 -0.021002 \n",
+ "\n",
+ " 7 \n",
+ "0 -0.003058 \n",
+ "1 0.149847 \n",
+ "2 -0.318043 \n",
+ "3 -0.003058 \n",
+ "4 -0.195719 "
+ ]
+ },
+ "execution_count": 56,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "test_feature = np.genfromtxt(\"test_feat.txt\",dtype=np.float32)\n",
+ "num_feature = len(test_feature[0])\n",
+ "test_feature = pd.DataFrame(test_feature)\n",
+ "\n",
+ "test_label = test_feature.iloc[:, num_feature - 1]\n",
+ "test_feature = test_feature.iloc[:, 0:num_feature - 2]\n",
+ "test_feature"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 57,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "0 320.0\n",
+ "1 361.0\n",
+ "2 364.0\n",
+ "3 336.0\n",
+ "4 358.0\n",
+ "Name: 9, dtype: float32"
+ ]
+ },
+ "execution_count": 57,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "test_label"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### GBDT模型建立"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 58,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "pred: 320.0008173984891 label: 320.0\n",
+ "pred: 360.99965033119537 label: 361.0\n",
+ "pred: 363.99928183902097 label: 364.0\n",
+ "pred: 336.0002344322584 label: 336.0\n",
+ "pred: 358.0000159974151 label: 358.0\n",
+ "均方误差: 0.0005218003748239915\n"
+ ]
+ }
+ ],
+ "source": [
+ "gbdt = GradientBoostingRegressor(\n",
+ " loss = 'ls'\n",
+ ", learning_rate = 0.1\n",
+ ", n_estimators = 100\n",
+ ", subsample = 1\n",
+ ", min_samples_split = 2\n",
+ ", min_samples_leaf = 1\n",
+ ", max_depth = 3\n",
+ ", init = None\n",
+ ", random_state = None\n",
+ ", max_features = None\n",
+ ", alpha = 0.9\n",
+ ", verbose = 0\n",
+ ", max_leaf_nodes = None\n",
+ ", warm_start = False\n",
+ ")\n",
+ "\n",
+ "gbdt.fit(train_feature, train_label)\n",
+ "pred = gbdt.predict(test_feature)\n",
+ "total_err = 0\n",
+ "\n",
+ "for i in range(pred.shape[0]):\n",
+ " print('pred:', pred[i], ' label:', test_label[i])\n",
+ "print('均方误差:', np.sqrt(((pred - test_label) ** 2).mean()))"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "Python 3",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.5.4"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/Machine Learning/3.2 GBDT/test_feat.txt b/Machine Learning/3.2 GBDT/test_feat.txt
new file mode 100644
index 0000000..7d5cd20
--- /dev/null
+++ b/Machine Learning/3.2 GBDT/test_feat.txt
@@ -0,0 +1,5 @@
+0.00598802 0.569231 0.647059 0.95122 -0.225434 0.837989 0.357258 -0.0030581 -0.383475 320
+0.161677 0.743195 0.682353 0.960976 -0.0867052 0.780527 0.282945 0.149847 -0.0529661 361
+0.113772 0.744379 0.541176 0.990244 -0.00578035 0.721468 0.43411 -0.318043 0.288136 364
+0.0538922 0.608284 0.764706 0.95122 -0.248555 0.821229 0.848604 -0.0030581 0.239407 336
+0.173653 0.866272 0.682353 0.95122 0.017341 0.704709 -0.0210016 -0.195719 0.150424 358
\ No newline at end of file
diff --git a/Machine Learning/3.2 GBDT/train_feat.txt b/Machine Learning/3.2 GBDT/train_feat.txt
new file mode 100644
index 0000000..7d5cd20
--- /dev/null
+++ b/Machine Learning/3.2 GBDT/train_feat.txt
@@ -0,0 +1,5 @@
+0.00598802 0.569231 0.647059 0.95122 -0.225434 0.837989 0.357258 -0.0030581 -0.383475 320
+0.161677 0.743195 0.682353 0.960976 -0.0867052 0.780527 0.282945 0.149847 -0.0529661 361
+0.113772 0.744379 0.541176 0.990244 -0.00578035 0.721468 0.43411 -0.318043 0.288136 364
+0.0538922 0.608284 0.764706 0.95122 -0.248555 0.821229 0.848604 -0.0030581 0.239407 336
+0.173653 0.866272 0.682353 0.95122 0.017341 0.704709 -0.0210016 -0.195719 0.150424 358
\ No newline at end of file
--
GitLab