提交 2c53996a 编写于 作者: ToTensor's avatar ToTensor

extract code success

上级 4f3b0959
from keras.applications import ResNet50
from keras.applications import Inception V3
from keras.applications import Mobile Net V2
from keras.applications import Xception
from keras.applications import VGG16
from keras.applications import VGG19
df_housing = pd.read_csv("https://raw.githubusercontent.com/huangjia2019/house/
master/house.csv")
df_housing.head #显示加州房价数据
X = df_housing.drop("median_house_value", axis = 1) #构建特征集X
y = df_housing.median_house_value #构建标签集y
from sklearn.model_selection import train_test_split #导入sklearn工具库
X_train, X_test, y_train, y_test = train_test_split(X, y,
from sklearn.linear_model import Linear Regression #导入线性回归算法模型
model = Linear Regression() #确定线性回归算法
model.fit(X_train, y_train) #根据训练集数据, 训练机器, 拟合函数
y_pred = model.predict(X_test) #预测验证集的y值print ('房价的真值(测试集)', y_test)
print ('预测的房价(测试集)', y_pred)
plt.plot(X_test.median_income, y_pred, color='green', linewidth=1)
plt.xlabel('Median Income') #x轴:家庭收入中位数
plt.ylabel('Median House Value') #y轴:房价中位数
plt.show() #显示房价分布和机器学习到的函数模型
from sklearn.linear_model import Linear Regression #导入线性回归算法模型
model = Linear Regression() #使用线性回归算法
import numpy as np # 导入Num Py库
import pandas as pd # 导入Pandas库
from keras.datasets import mnist #从Keras中导入MNIST数据集
print ("数据集张量形状:", X_train_image.shape) #用shape方法显示张量的形状
print ("第一个数据样本:\n", X_train_image[0]) #注意Python的索引是从0开始的
from keras.utils import to_categorical # 导入keras.utils工具库的类别转换工具
X_train = X_train_image.reshape(60000, 28, 28, 1) # 给标签增加一个维度
X_test = X_test_image.reshape(10000, 28, 28, 1) # 给标签增加一个维度
y_train = to_categorical(y_train_lable, 10) # 特征转换为one-hot编码
y_test = to_categorical(y_test_lable, 10) # 特征转换为one-hot编码
print ("训练集张量形状:", X_train.shape) # 训练集张量的形状
print ("第一个数据标签:", y_train[0]) # 显示标签集的第一个数据
from keras import models # 导入Keras模型, 以及各种神经网络的层
from keras.layers import Dense, Dropout, Flatten, Conv2D, Max Pooling2D
model = models.Sequential() # 用序贯方式建立模型
model.add(Conv2D(32, (3, 3), activation='relu', # 添加Conv2D层
model.add(Max Pooling2D(pool_size=(2, 2))) # 添加Max Pooling2D层
model.add(Conv2D(64, (3, 3), activation='relu')) # 添加Conv2D层
model.add(Max Pooling2D(pool_size=(2, 2))) # 添加Max Pooling2D层
model.add(Dropout(0.25)) # 添加Dropout层
model.add(Flatten()) # 展平
model.add(Dense(128, activation='relu')) # 添加全连接层
model.add(Dropout(0.5)) # 添加Dropout层
model.add(Dense(10, activation='softmax')) # Softmax分类激活, 输出10维分类码
score = model.evaluate(X_test, y_test) # 在验证集上进行模型评估
print('测试集预测准确率:', score[1]) # 输出测试集上的预测准确率
pred = model.predict(X_test[0].reshape(1, 28, 28, 1)) # 预测测试集第一个数据
print(pred[0], "转换一下格式得到:", pred.argmax()) # 把one-hot编码转换为数字
import matplotlib.pyplot as plt # 导入绘图工具包
plt.imshow(X_test[0].reshape(28, 28), cmap='Greys') # 输出这个图片
import math # 导入数学工具包
y = math.log(100000000, 10)# 以10为底, 在x值等于一亿的情况下
print("以10为底, 求一亿的对数:", y)# 求出y的值为8
import numpy as np #导入Num Py库
X = np.array(5) #创建0D张量, 也就是标量
print("X的值", X)
print("X的阶", X.ndim) #ndim属性显示标量的阶
print("X的数据类型", X.dtype) #dtype属性显示标量的数据类型
print("X的形状", X.shape) #shape属性显示标量的形状
X = np.array([5, 6, 7, 8, 9]) #创建1D张量, 也就是向量
print("X的值", X)
print("X的阶", X.ndim) #ndim属性显示向量的阶
print("X的形状", X.shape) #shape属性显示向量的形状
print("X_train的形状:", X_train.shape)
print("X_train中第一个样本的形状:", X_train[0].shape)
print("y_train的形状:", y_train.shape)
weight = np.array([1, -1.8, 1, 1, 2]) #权重向量(也就是多项式的参数)
X = np.array([1, 6, 7, 8, 9]) #特征向量(也就是一个特定样本中的特征值)
y_hat = np.dot(X, weight) #通过点积运算构建预测函数
print('函数返回结果:', y_hat) #输出预测结果
import numpy as np # 导入Num Py库
list=[1, 2, 3, 4, 5] # 创建列表
array_01=np.array([1, 2, 3, 4, 5]) # 列表转换为数组
array_02=np.array((6, 7, 8, 9, 10)) # 元组转换为数组
array_03=np.array([[1, 2, 3], [4, 5, 6]]) # 列表转换为2D数组
print ('列表:', list)
print ('列表转换为数组:', array_01)
print ('元组转换为数组:', array_02)
print ('2D数组:', array_03)
print ('数组的形状:', array_01.shape)
print'列表的形状:', list.shape# 列表没有形状, 程序会报错
array_06 = np.arange(10)
print (array_06, '形状是', array_06.shape, '阶为', array_06.ndim)
array_06 = array_06.reshape(10, 1)
print (array_06, '形状是', array_06.shape, '阶为', array_06.ndim)
array_08 = np.array([[0, 0, 0], [10, 10, 10], [20, 20, 20], [30, 30, 30]])
array_09 = np.array([[0, 1, 2]])
array_10 = np.array([[0], [1], [2], [3]])
list_11 = [[0, 1, 2]]
print ('array_09的形状:', array_09.shape )
print ('array_10的形状:', array_10.shape )
array_12 = array_09.reshape(3)
print ('array_12的形状:', array_12.shape )
array_13 = np.array([1])
print ('array_13的形状:', array_13.shape )
array_14 = array_13.reshape(1, 1)
print ('array_14的形状:', array_14.shape )
print ('08 + 09结果:', array_08 + array_09)
print ('08 + 10结果:', array_08 + array_10)
print ('08 + 11结果:', array_08 + list_11)
print ('08 + 12结果:', array_08 + array_12)
print ('08 + 13结果:', array_08 + array_13)
print ('08 + 14结果:', array_08 + array_14)
else if, 上述条件都不满足, 那么两个数组当前阶不兼容, 不能够进行广播操作
vector_01 = np.array([1, 2, 3])
vector_02 = np.array([[1], [2], [3]])
vector_03 = np.array([2])
vector_04 = vector_02.reshape(1, 3)
print ('vector_01的形状:', vector_01.shape)
print ('vector_02的形状:', vector_02.shape)
print ('vector_03的形状:', vector_03.shape)
print ('vector_04的形状:', vector_04.shape)
print ('01和01的点积:', np.dot(vector_01, vector_01))
print ('01和02的点积:', np.dot(vector_01, vector_02))
print ('04和02的点积:', np.dot(vector_04, vector_02))
print ('01和数字的点积:', np.dot(vector_01, 2))
print ('02和03的点积:', np.dot(vector_02, vector_03))
print ('02和04的点积:', np.dot(vector_02, vector_04))
print ('01和03的点积:', np.dot(vector_01, vector_03))
print ('02和02的点积:', np.dot(vector_02, vector_02))
matrix_01 = np.arange(0, 6).reshape(2, 3)
matrix_02 = np.arange(0, 6).reshape(3, 2)
print(matrix_01)
print(matrix_02)
print ('01和02的点积:', np.dot(matrix_01, matrix_02))
print ('02和01的点积:', np.dot(matrix_02, matrix_01))
print ('01和01的点积:', np.dot(matrix_01, matrix_01))
array_04=np.arange(1, 5, 1) # 通过arange函数生成数组
array_05=np.linspace(1, 5, 5) # 通过linspace函数生成数组
print (array_04)
print (array_05)
array_06 = np.arange(10)
print (array_06)
index_01 = array_06[3] # 索引—第4个元素
print ('第4个元素', index_01)
index_02 = array_06[-1] # 索引—最后一个元素
print ('第-1个元素', index_02)
slice_01 = array_06[:4] # 从0到4切片
print ('从0到4切片', slice_01)
slice_02 = array_06[0:12:4] # 从0到12切片, 步长为4
print ('从0到12切片, 步长为4', slice_02)
array_07 = np.array([[1, 2, 3], [4, 5, 6]])
print (array_07[1:2], '它的形状是', array_07[1:2].shape)
print (array_07[1:2][0], '它的形状又不同了', array_07[1:2][0].shape)
print (array_07, '形状是', array_07.shape)
print (array_07.reshape(3, 2), '形状是', array_07.reshape(3, 2).shape)
X_train, X_test = scaler(X_train, X_test) #对特征归一化
y_train, y_test = scaler(y_train, y_test) #对标签也归一化
plt.plot(X_train, y_train, 'r.', label='Training data') # 显示训练数据
plt.xlabel('wechat') # x轴标签
plt.ylabel('sales') # y轴标签
plt.legend() # 显示图例
plt.show() # 显示绘图结果
df_ads = pd.read_csv('../input/advertising-simple-dataset/advertising.csv')
df_ads.head()
import matplotlib.pyplot as plt #Matplotlib为Python画图工具库
import seaborn as sns #Seaborn为统计学数据可视化工具库
sns.heatmap(df_ads.corr(), cmap="Yl Gn Bu", annot = True)
plt.show() #plt代表英文plot, 就是画图的意思
X = np.array(df_ads.wechat) #构建特征集, 只含有微信公众号广告投放金额一个特征
y = np.array(df_ads.sales) #构建标签集, 销售额
print ("张量X的阶:", X.ndim)
print ("张量X的形状:", X.shape)
print ("张量X的内容:", X)
X = X.reshape((len(X), 1)) #通过reshape方法把向量转换为矩阵, len函数返回样本个数
y = y.reshape((len(y), 1)) #通过reshape方法把向量转换为矩阵, len函数返回样本个数
print ("张量X的阶:", X.ndim)
print ("张量X的形状:", X.shape)
print ("张量X的内容:", X)
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y,
print ("当权重为5, 偏置为3时, 损失为:",
loss_function(X_train, y_train, weight=5, bias=3))
print ("当权重为100, 偏置为1时, 损失为:",
loss_function(X_train, y_train, weight=100, bias=1))
y_hat = weight*X + bias # 这是向量化运算实现的假设函数
loss = y_hat-y # 这是中间过程, 求得的是假设函数预测的y'和真正的y值之间的差值
derivative_wight = X.T.dot(loss)/len(X) # 对权重求导, len(X)就是样本总数
derivative_bias = sum(loss)*1/len(X)  # 对偏置求导, len(X)就是样本总数
weight = weight - alpha*derivative_wight # 结合学习速率alpha更新权重
bias = bias - alpha*derivative_bias # 结合学习速率alpha更新偏置
iterations = 100# 迭代100次
alpha = 1# 初始学习速率设为1
weight = -5 # 权重
bias = 3 # 偏置
plt.plot(X_train, y_train, 'r.', label='Training data') # 显示训练数据
line_X = np.linspace(X_train.min(), X_train.max(), 500) # X值域
line_y = [weight*xx + bias for xx in line_X] # 假设函数y_hat
plt.plot(line_X, line_y, 'b--', label='Current hypothesis' ) #显示当前假设函数
plt.xlabel('wechat') # x轴标签
plt.ylabel('sales') # y轴标签
plt.legend() # 显示图例
plt.show() # 显示函数图像
plt.plot(loss_history, 'g--', label='Loss Curve') # 显示损失曲线
plt.xlabel('Iterations') # x轴标签
plt.ylabel('Loss') # y轴标签
plt.legend() # 显示图例
plt.show() # 显示损失曲线
plt.plot(X_train, y_train, 'r.', label='Training data') # 显示训练数据
line_X = np.linspace(X_train.min(), X_train.max(), 500) # X值域
line_y = [weight_history[-1]*xx + bias_history[-1] for xx in line_X] # 假设函数
plt.plot(line_X, line_y, 'b--', label='Current hypothesis' ) # 显示当前假设函数
plt.xlabel('wechat') # x轴标签
plt.ylabel('sales') # y轴标签
plt.legend() # 显示图例
plt.show() # 显示函数图像
X = np.array(df_ads) # 构建特征集, 包含全部特征
X = np.delete(X, [3], axis = 1) # 删除标签
y = np.array(df_ads.sales) #构建标签集, 销售额
print ("张量X的阶:", X.ndim)
print ("张量X的维度:", X.shape)
print (X)
print("权重历史记录:", weight_history)
print("损失历史记录:", loss_history)
X_plan = [250,50,50] # 要预测的X特征数据
X_train,X_plan = scaler(X_train_original,X_plan) # 对预测数据也要归一化缩放
X_plan = np.append([1], X_plan ) # 加一个哑特征X0 = 1
y_plan = np.dot(weight_history[-1],X_plan) # [-1] 即模型收敛时的权重
y_value = y_plan*23.8 + 3.2 #23.8是当前y_train中最大值和最小值的差,3.2是最小值
print ("预计商品销售额:",y_value, "千元")
x0_train = np.ones((len(X_train), 1)) # 构造X长度的全1数组配合对偏置的点积
X_train = np.append(x0_train, X_train, axis=1) #把X增加一系列的1
print ("张量X的形状:", X_train.shape)
print (X_train)
def loss_function(X, y, W): # 手工定义一个均方误差函数, W此时是一个向量
iterations = 300# 迭代300次
alpha = 0.15#学习速率设为0.15
weight = np.array([0.5, 1, 1, 1]) # 权重向量, w[0] = bias
import numpy as np # 导入Num Py库
import pandas as pd # 导入Pandas库
df_heart = pd.read_csv("../input/heart-dataset/heart.csv") # 读取文件
df_heart.head() # 显示前5行数据
dimension = X.shape[1] # 这里的维度len(X)是矩阵的行的数目, 维度是列的数目
weight = np.full((dimension, 1), 0.1) # 权重向量, 向量一般是1D, 但这里实际上创建了2D张量
bias = 0 # 偏置值
y_pred = predict(X_test, weight_history[-1], bias_history[-1]) # 预测测试集
testing_acc = 100 - np.mean(np.abs(y_pred - y_test))*100 # 计算准确率
print("逻辑回归测试准确率: {:.2f}%".format(testing_acc))
loss_history_test = np.zeros(iterations) # 初始化历史损失
for i in range(iterations): #求训练过程中不同参数带来的测试集损失
index = np.arange(0, iterations, 1)
plt.plot(index, loss_history, c='blue', linestyle='solid')
plt.plot(index, loss_history_test, c='red', linestyle='dashed')
plt.legend(["Training Loss", "Test Loss"])
plt.xlabel("Number of Iteration")
plt.ylabel("Cost")
plt.show() # 同时显示训练集和测试集损失曲线
from sklearn.linear_model import Logistic Regression #导入逻辑回归模型
lr = Logistic Regression() # lr, 就代表是逻辑回归模型
lr.fit(X_train, y_train) # fit, 就相当于是梯度下降
print("SK learn逻辑回归测试准确率{:.2f}%".format(lr.score(X_test, y_test)*100))
a = pd.get_dummies(df_heart['cp'], prefix = "cp")
b = pd.get_dummies(df_heart['thal'], prefix = "thal")
c = pd.get_dummies(df_heart['slope'], prefix = "slope")
frames = [df_heart, a, b, c]
df_heart = pd.concat(frames, axis = 1)
df_heart = df_heart.drop(columns = ['cp', 'thal', 'slope'])
df_heart.head() # 显示新的dataframe
X = df_heart.drop(['target'], axis = 1) # 构建特征集
y = df_heart.target.values # 构建标签集
y = y.reshape(-1, 1) # -1是相对索引, 等价于len(y)
print("张量X的形状:", X.shape)
print("张量X的形状:", y.shape)
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2)
from sklearn.preprocessing import Min Max Scaler # 导入数据缩放器
scaler = Min Max Scaler() # 选择归一化数据缩放器Min Max Scaler
X_train = scaler.fit_transform(X_train) # 特征归一化训练集fit_transform
X_test = scaler.transform(X_test) # 特征归一化测试集transform
import numpy as np # 导入Num Py
import pandas as pd # 导入Pandas
from sklearn import datasets # 导入Sklearn的数据集
iris=datasets.load_iris() # 导入iris
X_sepal = iris.data[:, [0, 1]]
lr = Logistic Regression(penalty='l2', C = 10) # 设定L2正则化和C参数
lr.fit(X_train_sepal, y_train_sepal) # 训练机器
score = lr.score(X_test_sepal, y_test_sepal) # 测试集分数评估
print("Sklearn逻辑回归测试准确率 {:.2f}%".format(score*100))
from sklearn.model_selection import train_test_split # 导入拆分数据集工具
from sklearn.preprocessing import Standard Scaler # 导入标准化工具
X_train_sepal, X_test_sepal, y_train_sepal, y_test_sepal = \
print("花瓣训练集样本数: ", len(X_train_sepal))
print("花瓣测试集样本数: ", len(X_test_sepal))
scaler = Standard Scaler() # 标准化工具
X_train_sepal = scaler.fit_transform(X_train_sepal) # 训练集数据标准化
X_test_sepal = scaler.transform(X_test_sepal) # 测试集数据标准化
X_combined_sepal = np.vstack((X_train_sepal, X_test_sepal)) # 合并特征集
Y_combined_sepal = np.hstack((y_train_sepal, y_test_sepal)) # 合并标签集
from sklearn.linear_model import Logistic Regression # 导入逻辑回归模型
lr = Logistic Regression(penalty='l2', C = 0.1) # 设定L2正则化和C参数
lr.fit(X_train_sepal, y_train_sepal) # 训练机器
score = lr.score(X_test_sepal, y_test_sepal) # 验证集分数评估
print("SKlearn逻辑回归测试准确率 {:.2f}%".format(score*100))
import matplotlib.pyplot as plt # 导入Matplotlib库
from matplotlib.colors import Listed Colormap # 导入Listed Colormap
def plot_decision_regions(X, y, classifier, test_idx=None, resolution=0.02):
from sklearn.metrics import accuracy_score # 导入准确率指标
C_param_range = [0.01, 0.1, 1, 10, 100, 1000]
sepal_acc_table = pd.Data Frame(columns = ['C_parameter', 'Accuracy'])
sepal_acc_table['C_parameter'] = C_param_range
plt.figure(figsize=(10, 10))
j = 0
for i in C_param_range:
import numpy as np #导入Num Py库
import pandas as pd #导入Pandas库
df_bank = pd.read_csv("../input/bank-customer/Bank Customer.csv") # 读取文件
df_bank.head() # 显示文件前5行数据
from sklearn.linear_model import Logistic Regression # 导入Sklearn模型
lr = Logistic Regression() # 逻辑回归模型
history = lr.fit(X_train, y_train) # 训练机器
print("逻辑回归预测准确率 {:.2f}%".format(lr.score(X_test, y_test)*100))
import keras # 导入Keras库
from keras.models import Sequential # 导入Keras序贯模型
from keras.layers import Dense # 导入Keras全连接层
ann = Sequential() # 创建一个序贯ANN模型
ann.add(Dense(units=12, input_dim=11, activation = 'relu')) # 添加输入层
ann.add(Dense(units=24, activation = 'relu')) # 添加隐层
ann.add(Dense(units=1, activation = 'sigmoid')) # 添加输出层
ann.summary() # 显示网络模型(这个语句不是必需的)
from IPython.display import SVG # 实现神经网络结构的图形化显示
from keras.utils.vis_utils import model_to_dot
SVG(model_to_dot(ann, show_shapes=True).create(prog='dot', format='svg'))
fig=plt.subplots(figsize=(15, 15))
for i, j in enumerate(features):
plt.subplot(4, 2, i+1)
plt.subplots_adjust(hspace = 1.0)
sns.countplot(x=j, data = df_bank)
plt.title("No.of costumers")
df_bank['Gender'].replace("Female", 0, inplace = True)
df_bank['Gender'].replace("Male", 1, inplace=True)
d_city = pd.get_dummies(df_bank['City'], prefix = "City")
df_bank = [df_bank, d_city]
df_bank = pd.concat(df_bank, axis = 1)
y = df_bank ['Exited']
X = df_bank.drop(['Name', 'Exited', 'City'], axis=1)
X.head() #显示新的特征集
from sklearn.model_selection import train_test_split #拆分数据集
X_train, X_test, y_train, y_test = train_test_split(X, y,
from sklearn.metrics import classification_report # 导入分类报告
y_pred = ann.predict(X_test, batch_size=10) # 预测测试集的标签
y_pred = np.round(y_pred) # 四舍五入, 将分类概率值转换成0/1整数值
y_test = y_test.values # 把Pandas series转换成Num Py array
y_test = y_test.reshape((len(y_test), 1)) # 转换成与y_pred相同的形状
print(classification_report(y_test, y_pred, labels=[0, 1])) #调用分类报告
from sklearn.metrics import confusion_matrix # 导入混淆矩阵
cm = confusion_matrix(y_test, y_pred) # 调用混淆矩阵
plt.title("ANN Confusion Matrix") # 标题:人工神经网络混淆矩阵
sns.heatmap(cm, annot=True, cmap="Blues", fmt="d", cbar=False) # 热力图设定
plt.show() # 显示混淆矩阵
mean = X_train.mean(axis=0) # 计算训练集均值
X_train -= mean # 训练集减去训练集均值
std = X_train.std(axis=0) # 计算训练集标准差
X_train /= std # 训练集除以训练集标准差
X_test -= mean # 测试集减去训练集均值
X_test /= std # 测试集除以训练集标准差
from sklearn.preprocessing import Standard Scaler # 导入特征缩放器
sc = Standard Scaler() # 特征缩放器
X_train = sc.fit_transform(X_train) # 拟合并应用于训练集
ann.add(Dense(units=12, input_dim=11, activation = 'relu')) # 添加输入层
ann.add(Dense(units=24, activation = 'relu')) # 添加隐层
ann.add(Dense(units=12, input_dim=12, activation = 'relu')) # 添加输入层
ann.add(Dense(units=24, activation = 'relu')) # 添加隐层
ann.add(Dense(units=48, activation = 'relu')) # 添加隐层
ann.add(Dense(units=96, activation = 'relu')) # 添加隐层
ann.add(Dense(units=192, activation = 'relu')) # 添加隐层
ann.add(Dense(units=1, activation = 'sigmoid')) # 添加输出层
from keras.layers import Dropout # 导入Dropout
ann = Sequential() # 创建一个序贯ANN模型
ann.add(Dense(units=12, input_dim=12, activation = 'relu')) # 添加输入层
ann.add(Dense(units=24, activation = 'relu')) # 添加隐层
ann.add(Dropout(0.5)) # 添加Dropout层
ann.add(Dense(units=48, activation = 'relu')) # 添加隐层
ann.add(Dropout(0.5)) # 添加Dropout层
ann.add(Dense(units=96, activation = 'relu')) # 添加隐层
ann.add(Dropout(0.5)) # 添加Dropout层
ann.add(Dense(units=192, activation = 'relu')) # 添加隐层
ann.add(Dropout(0.5)) # 添加Dropout层
ann.add(Dense(units=1, activation = 'sigmoid')) # 添加输出层
ann.compile(optimizer = 'adam', # 优化器
from keras.callbacks import Model Checkpoint
from keras.callbacks import Early Stopping
from keras.callbacks import Reduce LROn Plateau
callbacks = [earlystop, reducelr, modelckpt] # 设定回调
history = ann.fit(X_train, y_train, # 指定训练集
import tensorflow as tf # 导入Tensor Flow
tensorboard_callback = tf.keras.callbacks.TensorBoard("logs")
from keras.layers import Dense # 导入Dense层
from keras.regularizers import l2 # 导入L2正则化工具
ann.add(Dense(32, # 输出维度, 就是神经元的个数
from keras.layers.normalization import Batch Normalization # 导入批标准化组件
ann.add(Dense(64, input_dim=14, init='uniform')) # 添加输入层
ann.add(Batch Normalization()) # 添加批标准化层
ann.add(Dense(64, init='uniform')) # 添加中间层
from keras import models # 导入Keras模型和各种神经网络的层
from keras.layers import Dense, Dropout, Flatten, Conv2D, Max Pooling2D
model = models.Sequential() # 序贯模型
model.add(Conv2D(filters=32, # 添加Conv2D层, 指定过滤器的个数, 即通道数
model.add(Max Pooling2D(pool_size=(2, 2))) # 添加Max Pooling2D层
model.add(Conv2D(64, (3, 3), activation='relu')) # 添加Conv2D层
model.add(Max Pooling2D(pool_size=(2, 2))) # 添加Max Pooling2D层
model.add(Dropout(0.25)) # 添加Dropout层
model.add(Flatten()) # 添加展平层
model.add(Dense(128, activation='relu')) # 添加全连接层
model.add(Dropout(0.5)) # 添加Dropout层
model.add(Dense(10, activation='softmax')) # Softmax分类激活, 输出10维分类码
model.compile(optimizer='rmsprop', # 指定优化器
from IPython.display import SVG
from keras.utils.vis_utils import model_to_dot
SVG(model_to_dot(ann, show_shapes = True ).create(prog='dot', format='svg'))
model.add(Conv2D(filters=32, # 添加Conv2D层, 指定过滤器的个数, 即通道数
model.add(Conv2D(64, kernel_size=(3, 3), activation='relu')) # 添加卷积层
model.add(Max Pooling2D(pool_size=(2, 2))) # 添加最大池化层
import numpy as np # 导入Numpy
import pandas as pd # 导入Pandas
import os # 导入os工具
print(os.listdir("../input/stanford-dogs-dataset/images/Images"))
from sklearn.model_selection import train_test_split # 导入拆分工具
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2,
from keras import layers # 导入所有层
from keras import models # 导入所有模型
cnn = models.Sequential() # 序贯模型
cnn.add(layers.Conv2D(32, (3, 3), activation='relu', # 卷积层
cnn.add(layers.Max Pooling2D((2, 2))) # 最大池化层
cnn.add(layers.Conv2D(64, (3, 3), activation='relu')) # 卷积层
cnn.add(layers.Max Pooling2D((2, 2))) # 最大池化层
cnn.add(layers.Conv2D(128, (3, 3), activation='relu')) # 卷积层
cnn.add(layers.Max Pooling2D((2, 2))) # 最大池化层
cnn.add(layers.Conv2D(128, (3, 3), activation='relu')) # 卷积层
cnn.add(layers.Max Pooling2D((2, 2))) # 最大池化层
cnn.add(layers.Flatten()) # 展平层
cnn.add(layers.Dense(512, activation='relu')) # 全连接层
cnn.add(layers.Dense(10, activation='softmax')) # 分类输出
cnn.compile(loss='categorical_crossentropy', # 损失函数
dir = '../input/stanford-dogs-dataset/images/Images/'
chihuahua_dir = dir+'n02085620-Chihuahua' #吉娃娃
japanese_spaniel_dir = dir+'n02085782-Japanese_spaniel' #日本狆
maltese_dir = dir+'n02085936-Maltese_dog' #马尔济斯犬
pekinese_dir = dir+'n02086079-Pekinese' #狮子狗
shitzu_dir = dir+'n02086240-Shih-Tzu' #西施犬
blenheim_spaniel_dir = dir+'n02086646-Blenheim_spaniel' #英国可卡犬
papillon_dir = dir+'n02086910-papillon' #蝴蝶犬
toy_terrier_dir = dir+'n02087046-toy_terrier' #玩具猎狐梗
afghan_hound_dir = dir+'n02088094-Afghan_hound' #阿富汗猎犬
basset_dir = dir+'n02088238-basset' #巴吉度猎犬
training_data('chihuahua', chihuahua_dir)
training_data('japanese_spaniel', japanese_spaniel_dir)
training_data('maltese', maltese_dir)
training_data('pekinese', pekinese_dir)
training_data('shitzu', shitzu_dir)
training_data('blenheim_spaniel', blenheim_spaniel_dir)
training_data('papillon', papillon_dir)
training_data('toy_terrier', toy_terrier_dir)
training_data('afghan_hound', afghan_hound_dir)
training_data('basset', basset_dir)
from sklearn.preprocessing import Label Encoder # 导入标签编码工具
from keras.utils.np_utils import to_categorical # 导入One-hot编码工具
label_encoder = Label Encoder()
y = label_encoder.fit_transform(y_label) # 标签编码
y = to_categorical(y, 10) # 将标签转换为One-hot编码
X = np.array(X) # 将X从列表转换为张量数组
X = X/255 # 将X张量归一化
import matplotlib.pyplot as plt # 导入Matplotlib库
import random as rdm # 导入随机数工具
from keras import optimizers # 导入优化器
cnn = models.Sequential() # 贯序模型
cnn.add(layers.Conv2D(32, (3, 3), activation='relu', # 卷积层
from keras.models import load_model # 导入模型保存工具
cnn.save('../my_dog_cnn.h5') # 创建一个HDF5格式的文件'my_dog_cnn.h5'
del cnn # 删除当前模型
cnn = load_model('../my_dog_cnn.h5') # 重新载入已经保存的模型
cnn.add(layers.MaxPooling2D((2, 2))) # 最大池化层
cnn.add(layers.Conv2D(64, (3, 3), activation='relu')) # 卷积层
cnn.add(layers.MaxPooling2D((2, 2))) # 最大池化层
cnn.add(layers.Conv2D(128, (3, 3), activation='relu')) # 卷积层
cnn.add(layers.MaxPooling2D((2, 2))) # 最大池化层
cnn.add(layers.Conv2D(256, (3, 3), activation='relu')) # 卷积层
cnn.add(layers.MaxPooling2D((2, 2))) # 最大池化层
cnn.add(layers.Flatten()) # 展平层
cnn.add(layers.Dense(512, activation='relu')) # 全连接层
cnn.add(layers.Dense(10, activation='sigmoid')) # 分类输出
cnn.compile(loss='categorical_crossentropy', # 损失函数
cnn = models.Sequential() # 序贯模型
cnn.add(layers.Conv2D(32, (3, 3), activation='relu', # 卷积层
cnn.add(layers.Max Pooling2D((2, 2))) # 最大池化层
cnn.add(layers.Conv2D(64, (3, 3), activation='relu')) # 卷积层
cnn.add(layers.Dropout(0.5)) # Dropout层
cnn.add(layers.Max Pooling2D((2, 2))) # 最大池化层
cnn.add(layers.Conv2D(128, (3, 3), activation='relu')) # 卷积层
cnn.add(layers.Dropout(0.5)) # Dropout层
cnn.add(layers.Max Pooling2D((2, 2))) # 最大池化层
cnn.add(layers.Conv2D(256, (3, 3), activation='relu')) # 卷积层
cnn.add(layers.Max Pooling2D((2, 2))) # 最大池化层
cnn.add(layers.Flatten()) # 展平层
cnn.add(layers.Dropout(0.5)) # Dropout
cnn.add(layers.Dense(512, activation='relu')) # 全连接层
cnn.add(layers.Dense(10, activation='sigmoid')) # 分类输出
cnn.compile(loss='categorical_crossentropy', # 损失函数
history = cnn.fit_generator( # 使用fit_generator
augs_gen.flow(X_train, y_train, batch_size=16), # 增强后的训练集
validation_data = (X_test, y_test), # 指定验证集
validation_steps = 100, # 指定验证步长
steps_per_epoch = 100, # 指定每轮步长
epochs = 50, # 指定轮次
verbose = 1) # 指定是否显示训练过程中的信息
from keras.models import load_model # 导入模型保存工具
import matplotlib.pyplot as plt # 导入Matplotlib库
model = load_model('../my_dog_cnn.h5')# 载入刚才保存的模型
layer_outputs = [layer.output for layer in model.layers[:16]]
image = X_train[0]
image = image.reshape(1, 150, 150, 3)
activation_model = models.Model(inputs=model.input, outputs=layer_outputs)
activations = activation_model.predict(image)
first_layer_activation = activations[0]
plt.matshow(first_layer_activation[0, :, :, 2], cmap='viridis')
plt.matshow(first_layer_activation[0, :, :, 3], cmap='viridis')
from keras.preprocessing.text import Tokenizer #导入Tokenizer工具
words = ['Lao Wang has a Wechat account.', 'He is not a nice person.', 'Be careful.']
tokenizer = Tokenizer(num_words=30) # 词典大小只设定30个词(因为句子数量少)
tokenizer.fit_on_texts(words) # 根据3个句子编辑词典
sequences = tokenizer.texts_to_sequences(words) # 为3个句子根据词典里面的索引进行序号编码
one_hot_matrix = tokenizer.texts_to_matrix(words, mode='binary') #进行One-hot编码
word_index = tokenizer.word_index # 词典中的单词索引总数
print('找到了 %s个词' % len(word_index))
print('这3句话(单词)的序号编码:' , sequences)
print('这3句话(单词)的One-hot编码:' , one_hot_matrix
import pandas as pd # 导入Pandas
import numpy as np # 导入Num Py
dir = '../input/product-comments/'
dir_train = dir+'Clothing Reviews.csv'
df_train = pd.read_csv(dir_train) # 读入训练集
df_train.head() # 输出部分数据
from keras.preprocessing.text import Tokenizer # 导入分词工具
X_train_lst = df_train["Review Text"].values # 将评论读入张量(训练集)
y_train = df_train["Rating"].values # 构建标签集
dictionary_size = 20000 # 设定词典的大小
tokenizer = Tokenizer(num_words=dictionary_size) # 初始化词典
tokenizer.fit_on_texts( X_train_lst ) # 使用训练集创建词典索引
import matplotlib.pyplot as plt # 导入matplotlib
word_per_comment = [len(comment) for comment in X_train_tokenized_lst]
plt.hist(word_per_comment, bins = np.arange(0,500,10)) # 显示评论长度分布
plt.show()
from keras.preprocessing.sequence import pad_sequences
max_comment_length = 120 # 设定评论输入长度为120,并填充默认值(如字数少于120)
X_train = pad_sequences(X_train_tokenized_lst, maxlen=max_comment_length)
from keras.models import Sequential # 导入序贯模型
from keras.layers.embeddings import Embedding #导入词嵌入层
from keras.layers import Dense #导入全连接层
from keras.layers import Simple RNN #导入Simple RNN层
embedding_vecor_length = 60 # 设定词嵌入向量长度为60
rnn = Sequential() #序贯模型
rnn.add(Embedding(dictionary_size, embedding_vecor_length,
rnn.add(Simple RNN(100)) # 加入Simple RNN层
rnn.add(Dense(10, activation='relu')) # 加入全连接层
rnn.add(Dense(6, activation='softmax')) # 加入分类输出层
rnn.compile(loss='sparse_categorical_crossentropy', #损失函数
from keras.models import Sequential # 导入序贯模型
from keras.layers.embeddings import Embedding #导入词嵌入层
from keras.layers import Dense #导入全连接层
from keras.layers import LSTM #导入LSTM层
embedding_vecor_length = 60 # 设定词嵌入向量长度为60
lstm = Sequential() #序贯模型
lstm.add(Embedding(dictionary_size, embedding_vecor_length,
lstm.add(LSTM(100)) # 加入LSTM层
lstm.add(Dense(10, activation='relu')) # 加入全连接层
lstm.add(Dense(6, activation='softmax')) # 加入分类输出层
lstm.compile(loss='sparse_categorical_crossentropy', #损失函数
import numpy as np # 导入Num Py库
import pandas as pd # 导入Pandas库
df_train = pd.read_csv('../input/new-earth/exo Train.csv') # 导入训练集
df_test = pd.read_csv('../input/new-earth/exo Test.csv') # 导入测试集
print(df_train.head()) # 输入前几行数据
print(df_train.info()) # 输出训练集信息
y_pred = np.where(y_prob > 0.2, 1, 0) # 进行阈值调整
cm = confusion_matrix(y_pred, y_test)
print('Confusion matrix:\n', cm, '\n')
print(classification_report(y_pred, y_test))
from keras import layers # 导入各种层
from keras.models import Model # 导入模型
from keras.optimizers import Adam # 导入Adam优化器
input = layers.Input(shape=(3197, 1)) # 输入
x = layers.Conv1D(32, kernel_size=10, strides=4)(input)
x = layers.Max Pooling1D(pool_size=4, strides=2)(x)
x = layers.GRU(256, return_sequences=True)(x)
x = layers.Flatten()(x)
x = layers.Dropout(0.5)(x)
x = layers.Batch Normalization()(x)
output = layers.Dense(1, activation='sigmoid')(x) # 输出
model = Model(input, output)
model.summary() # 显示模型的输出
opt = Adam(lr=0.0001, beta_1=0.9, beta_2=0.999, decay=0.01) # 设置优化器
model.compile(optimizer=opt, # 优化器
X_train_rev = [X[::-1] for X in X_train]
X_test_rev = [X[::-1] for X in X_test]
X_train = np.expand_dims(X_train, axis=2)
X_train_rev = np.expand_dims(X_train_rev, axis=2)
X_test = np.expand_dims(X_test, axis=2)
X_test_rev = np.expand_dims(X_test_rev, axis=2)
input_1 = layers.Input(shape=(3197, 1))
x = layers.GRU(32, return_sequences=True)(input_1)
x = layers.Flatten()(x)
x = layers.Dropout(0.5)(x)
input_2 = layers.Input(shape=(3197, 1))
y = layers.GRU(32, return_sequences=True)(input_2)
y = layers.Flatten()(y)
y = layers.Dropout(0.5)(y)
z = layers.concatenate([x, y])
output = layers.Dense(1, activation='sigmoid')(z)
model = Model([input_1, input_2], output)
model.summary()
from sklearn.utils import shuffle # 导入乱序工具
df_train = shuffle(df_train) # 乱序训练集
df_test = shuffle(df_test) # 乱序测试集
X_train = df_train.iloc[:, 1:].values # 构建特征集(训练集)
y_train = df_train.iloc[:, 0].values # 构建标签集(训练集)
X_test = df_test.iloc[:, 1:].values # 构建特征集(验证集)
y_test = df_test.iloc[:, 0].values # 构建标签集(验证集)
y_train = y_train - 1 # 标签转换成惯用的(0, 1)分类值
y_test = y_test - 1 # 标签转换成惯用的(0, 1)分类值
print (X_train) # 输出训练集中的特征集
print (y_train) # 输出训练集中的标签集
X_train = np.expand_dims(X_train, axis=2) # 张量升阶, 以满足序列数据集的要求
X_test = np.expand_dims(X_test, axis=2) # 张量升阶, 以满足序列数据集的要求
from keras.models import Sequential # 导入序贯模型
from keras import layers # 导入所有类型的层
from keras.optimizers import Adam # 导入优化器
model = Sequential() # 序贯模型
model.add(layers.Conv1D(32, kernel_size=10, strides=4,
model.add(layers.Max Pooling1D(pool_size=4, strides=2)) # 池化层
model.add(layers.GRU(256, return_sequences=True)) # GRU层要足够大
model.add(layers.Flatten()) # 展平层
model.add(layers.Dropout(0.5)) # Dropout层
model.add(layers.Batch Normalization()) # 批标准化
model.add(layers.Dense(1, activation='sigmoid')) # 分类输出层
opt = Adam(lr=0.0001, beta_1=0.9, beta_2=0.999, decay=0.01) # 设置优化器
model.compile(optimizer=opt, # 优化器
from sklearn.metrics import classification_report # 分类报告
from sklearn.metrics import confusion_matrix # 混淆矩阵
y_prob = model.predict(X_test) # 对测试集进行预测
y_pred = np.where(y_prob > 0.5, 1, 0) #将概率值转换成真值
cm = confusion_matrix(y_pred, y_test)
print('Confusion matrix:\n', cm, '\n')
print(classification_report(y_pred, y_test))
import numpy as np # 导入Num Py库
import pandas as pd # 导入Pandas库
df_heart = pd.read_csv("../input/heart-dataset/heart.csv") # 读取文件
df_heart.head() # 显示前5行数据
from sklearn.neighbors import KNeighbors Classifier # 导入KNN模型
K = 5 # 设定初始K值为5
KNN = KNeighbors Classifier(n_neighbors = K) # KNN模型
KNN.fit(X_train, y_train) # 拟合KNN模型
y_pred = KNN.predict(X_test) # 预测心脏病结果
from sklearn.metrics import (f1_score, confusion_matrix) # 导入评估指标
print("{}NN预测准确率: {:.2f}%".format(K, KNN.score(X_test, y_test)*100))
print("{}NN预测F1分数: {:.2f}%".format(K, f1_score(y_test, y_pred)*100))
print('KNN混淆矩阵:\n', confusion_matrix(y_pred, y_test))
index = np.arange(1, 15, 1)
plt.plot(index, acc_score_list, c='blue', linestyle='solid')
plt.plot(index, f1_score_list, c='red', linestyle='dashed')
plt.legend(["Accuracy", "F1 Score"])
plt.xlabel("k value")
plt.ylabel("Score")
plt.grid('false')
plt.show()
KNN_acc = max(f1_score_list)*100
print("Maximum KNN Score is {:.2f}%".format(KNN_acc))
from sklearn.svm import SVC # 导入SVM模型
svm = SVC(random_state = 1)
svm.fit(X_train, y_train)
y_pred = svm.predict(X_test) # 预测心脏病结果
svm_acc = svm.score(X_test, y_test)*100
print("SVM预测准确率:: {:.2f}%".format(svm.score(X_test, y_test)*100))
print("SVM预测F1分数: {:.2f}%".format(f1_score(y_test, y_pred)*100))
print('SVM混淆矩阵:\n', confusion_matrix(y_pred, y_test))
from sklearn.naive_bayes import Gaussian NB # 导入朴素贝叶斯模型
nb = Gaussian NB()
nb.fit(X_train, y_train)
y_pred = nb.predict(X_test) # 预测心脏病结果
nb_acc = nb.score(X_test, y_test)*100
print("NB预测准确率:: {:.2f}%".format(svm.score(X_test, y_test)*100))
print("NB预测F1分数: {:.2f}%".format(f1_score(y_test, y_pred)*100))
print('NB混淆矩阵:\n', confusion_matrix(y_pred, y_test))
from sklearn.tree import Decision Tree Classifier # 导入决策树模型
dtc = Decision Tree Classifier()
dtc.fit(X_train, y_train)
dtc_acc = dtc.score(X_test, y_test)*100
y_pred = dtc.predict(X_test) # 预测心脏病结果
print("Decision Tree Test Accuracy {:.2f}%".format(dtc_acc))
print("决策树 预测准确率: {:.2f}%".format(dtc.score(X_test, y_test)*100))
print("决策树 预测F1分数: {:.2f}%".format(f1_score(y_test, y_pred)*100))
print('决策树 混淆矩阵:\n', confusion_matrix(y_pred, y_test))
from sklearn.ensemble import Random Forest Classifier # 导入随机森林模型
rf = Random Forest Classifier(n_estimators = 1000, random_state = 1)
rf.fit(X_train, y_train)
rf_acc = rf.score(X_test, y_test)*100
y_pred = rf.predict(X_test) # 预测心脏病结果
print("随机森林 预测准确率:: {:.2f}%".format(rf.score(X_test, y_test)*100))
print("随机森林 预测F1分数: {:.2f}%".format(f1_score(y_test, y_pred)*100))
print('随机森林 混淆矩阵:\n', confusion_matrix(y_pred, y_test))
IF机器学习问题 = 感知类问题(也就是图像语言文本等非结构化问题)
from sklearn.linear_model import Logistic Regression # 导入逻辑回归模型
lr = Logistic Regression()
lr.fit(X_train, y_train)
y_pred = lr.predict(X_test) # 预测心脏病结果
lr_acc = lr.score(X_test, y_test)*100
lr_f1 = f1_score(y_test, y_pred)*100
print("逻辑回归预测准确率:{:.2f}%".format(lr_acc))
print("逻辑回归预测F1分数: {:.2f}%".format(lr_f1))
print('逻辑回归混淆矩阵:\n', confusion_matrix(y_test, y_pred))
accuracy = [lr_acc, KNN_acc, svm_acc, nb_acc, dtc_acc, rf_acc]
colors = ["orange", "red", "purple", "magenta", "green", "blue"]
sns.set_style("whitegrid")
plt.figure(figsize=(16, 5))
plt.yticks(np.arange(0, 100, 10))
plt.ylabel("Accuracy %")
plt.xlabel("Algorithms")
sns.barplot(x=methods, y=accuracy, palette=colors)
plt.grid(b=None)
plt.show()
from sklearn.metrics import confusion_matrix
y_pred_lr = lr.predict(X_test)
KNN3 = KNeighbors Classifier(n_neighbors = 3)
KNN3.fit(X_train, y_train)
y_pred_KNN = KNN3.predict(X_test)
y_pred_svm = svm.predict(X_test)
y_pred_nb = nb.predict(X_test)
y_pred_dtc = dtc.predict(X_test)
y_pred_rf = rf.predict(X_test)
cm_lr = confusion_matrix(y_test, y_pred_lr)
cm_KNN = confusion_matrix(y_test, y_pred_KNN)
cm_svm = confusion_matrix(y_test, y_pred_svm)
cm_nb = confusion_matrix(y_test, y_pred_nb)
cm_dtc = confusion_matrix(y_test, y_pred_dtc)
cm_rf = confusion_matrix(y_test, y_pred_rf)
plt.figure(figsize=(24, 12))
plt.suptitle("Confusion Matrixes", fontsize=24) #混淆矩阵
plt.subplots_adjust(wspace = 0.4, hspace= 0.4)
plt.subplot(2, 3, 1)
plt.title("Logistic Regression Confusion Matrix") #逻辑回归混淆矩阵
sns.heatmap(cm_lr, annot=True, cmap="Blues", fmt="d", cbar=False)
plt.subplot(2, 3, 2)
plt.title("K Nearest Neighbors Confusion Matrix") #KNN混淆矩阵
sns.heatmap(cm_KNN, annot=True, cmap="Blues", fmt="d", cbar=False)
plt.subplot(2, 3, 3)
plt.title("Support Vector Machine Confusion Matrix") #SVM混淆矩阵
sns.heatmap(cm_svm, annot=True, cmap="Blues", fmt="d", cbar=False)
plt.subplot(2, 3, 4)
plt.title("Naive Bayes Confusion Matrix") #朴素贝叶斯混淆矩阵
sns.heatmap(cm_nb, annot=True, cmap="Blues", fmt="d", cbar=False)
plt.subplot(2, 3, 5)
plt.title("Decision Tree Classifier Confusion Matrix") #决策树混淆矩阵
sns.heatmap(cm_dtc, annot=True, cmap="Blues", fmt="d", cbar=False)
plt.subplot(2, 3, 6)
plt.title("Random Forest Confusion Matrix") #随机森林混淆矩阵
sns.heatmap(cm_rf, annot=True, cmap="Blues", fmt="d", cbar=False)
plt.show()
from sklearn.model_selection import Stratified KFold # 导入K折验证工具
from sklearn.model_selection import Grid Search CV # 导入网格搜索工具
kfold = Stratified KFold(n_splits=10) # 10折验证
rf = Random Forest Classifier() # 随机森林模型
from sklearn.metrics import (accuracy_score, confusion_matrix)
y_hat_rfgs = rf_gs.predict(X_test) # 用随机森林算法的最佳参数进行预测
print("参数优化后随机森林预测准确率:", accuracy_score(y_test.T, y_hat_rfgs))
cm_rfgs = confusion_matrix(y_test, y_had_rfgs) # 显示混淆矩阵
plt.figure(figsize=(4, 4))
plt.title("Random Forest (Best Score) Confusion Matrix")#随机森林(最优参数)混淆矩阵
sns.heatmap(cm_rfgs, annot=True, cmap="Blues", fmt="d", cbar=False)
from sklearn.ensemble import Bagging Classifier #导入Bagging分类器
from sklearn.tree import Decision Tree Classifier #导入决策树分类器
from sklearn.metrics import (f1_score, confusion_matrix) # 导入评估指标
dt = Bagging Classifier(Decision Tree Classifier()) # 只使用一棵决策树
dt.fit(X_train, y_train) # 拟合模型
y_pred = dt.predict(X_test) # 进行预测
print("决策树测试准确率: {:.2f}%".format(dt.score(X_test, y_test)*100))
print("决策树测试F1分数: {:.2f}%".format(f1_score(y_test, y_pred)*100))
bdt = Bagging Classifier(Decision Tree Classifier()) #树的Bagging
bdt.fitX_train, y_train# 拟合模型
y_pred = bdt.predict(X_test) # 进行预测
print("决策树Bagging测试准确率: {:.2f}%".format(bdt.score(X_test, y_test)*100))
print("决策树Bagging测试F1分数: {:.2f}%".format(f1_score(y_test, y_pred)*100))
from sklearn.ensemble import Extra Trees Classifier # 导入极端随机森林模型
ext = Extra Trees Classifier() # 极端随机森林模型
ext_gs.fit(X_train, y_train) # 拟合模型
ext_gs = ext_gs.best_estimator_ # 最佳模型
y_pred = ext_gs.predict(X_test) # 进行预测
print("极端随机森林测试准确率: {:.2f}%".format(ext_gs.score(X_test, y_test)*100))
print("极端随机森林测试F1分数: {:.2f}%".format(f1_score(y_test, y_pred)*100))
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt
from sklearn.ensemble import (Random Forest Regressor,
dtree = Decision Tree Regressor().fit(X_train, y_train)
d_predict = dtree.predict(X_test)
plt.figure(figsize=(20, 12))
plt.grid(b=None)
plt.subplot(2, 2, 1)
plt.plot(X_test, f(X_test), "b")
plt.scatter(X_train, y_train, c="b", s=20)
plt.plot(X_test, d_predict, "g", lw=2)
plt.title("Decision Tree, MSE = %.2f" % np.sum((y_test - d_predict) ** 2))
bdt = Bagging Regressor(Decision Tree Regressor()).fit(X_train, y_train)
bdt_predict = bdt.predict(X_test)
plt.subplot(2, 2, 2)
plt.plot(X_test, f(X_test), "b")
plt.scatter(X_train, y_train, c="b", s=20)
plt.plot(X_test, bdt_predict, "y", lw=2)
plt.title("Bagging for Trees, MSE = %.2f" % np.sum((y_test - bdt_predict) ** 2))
rf = Random Forest Regressor(n_estimators=10).fit(X_train, y_train)
rf_predict = rf.predict(X_test)
plt.subplot(2, 2, 3)
plt.plot(X_test, f(X_test), "b")
plt.scatter(X_train, y_train, c="b", s=20)
plt.plot(X_test, rf_predict, "r", lw=2)
plt.title("Random Forest, MSE = %.2f" % np.sum((y_test - rf_predict) ** 2))
et = Extra Trees Regressor(n_estimators=10).fit(X_train, y_train)
et_predict = et.predict(X_test)
plt.subplot(2, 2, 4)
plt.plot(X_test, f(X_test), "b")
plt.scatter(X_train, y_train, c="b", s=20)
plt.plot(X_test, et_predict, "purple", lw=2)
plt.title("Extra Trees, MSE = %.2f" % np.sum((y_test - et_predict) ** 2))
bdt_gs.fit(X_train, y_train) # 拟合模型
bdt_gs = bdt_gs.best_estimator_ # 最佳模型
y_pred = bdt.predict(X_test) # 进行预测
print("决策树Bagging测试准确率: {:.2f}%".format(bdt_gs.score(X_test, y_test)*100))
print("决策树Bagging测试F1分数: {:.2f}%".format(f1_score(y_test, y_pred)*100))
from sklearn.ensemble import Random Forest Classifier # 导入随机森林模型
rf = Random Forest Classifier() # 随机森林模型
rf_gs.fit(X_train, y_train) # 拟合模型
rf_gs = rf_gs.best_estimator_ # 最佳模型
y_pred = rf_gs.predict(X_test) # 进行预测
print("随机森林测试准确率: {:.2f}%".format(rf_gs.score(X_test, y_test)*100))
print("随机森林测试F1分数: {:.2f}%".format(f1_score(y_test, y_pred)*100))
xgb_gs.fit(X_train, y_train) # 拟合模型
xgb_gs = xgb_gs.best_estimator_ # 最佳模型
y_pred = xgb_gs.predict(X_test) # 进行预测
print("XGB测试准确率: {:.2f}%".format(xgb_gs.score(X_test, y_test)*100))
print("XGB测试F1分数: {:.2f}%".format(f1_score(y_test, y_pred)*100))
from sklearn.ensemble import Ada Boost Classifier # 导入Ada Boost模型
dt = Decision Tree Classifier() # 选择决策树分类器作为Ada Boost的基准算法
ada = Ada Boost Classifier(dt) # Ada Boost模型
ada_gs.fitX_train, y_train# 拟合模型
ada_gs = ada_gs.best_estimator_ # 最佳模型
y_pred = ada_gs.predictX_test# 进行预测
print("Ada Boost测试准确率: {:.2f}%".format(ada_gs.score(X_test, y_test)*100))
print("Ada Boost测试F1分数: {:.2f}%".format(f1_score(y_test, y_pred)*100))
from sklearn.ensemble import Gradient Boosting Classifier # 导入梯度提升模型
gb = Gradient Boosting Classifier() # 梯度提升模型
gb_gs.fit(X_train, y_train) # 拟合模型
gb_gs = gb_gs.best_estimator_ # 最佳模型
y_pred = gb_gs.predict(X_test) # 进行预测
print("梯度提升测试准确率: {:.2f}%".format(gb_gs.score(X_test, y_test)*100))
print("梯度提升测试F1分数: {:.2f}%".format(f1_score(y_test, y_pred)*100))
from sklearn.model_selection import Stratified KFold # 导入K折验证工具
def Stacking(model, train, y, test, n_fold): # 定义Stacking函数
from sklearn.tree import Decision Tree Classifier # 导入决策树模型
model1 = Decision Tree Classifier(random_state=1) # model1-决策树
test_pred1 , train_pred1 = Stacking(model=model1, n_fold=10,
train_pred1 = pd.Data Frame(train_pred1)
test_pred1 = pd.Data Frame(test_pred1)
from sklearn.neighbors import KNeighbors Classifier # 导入KNN模型
model2 = KNeighbors Classifier() # model2-KNN
test_pred2 , train_pred2 = Stacking(model=model2, n_fold=10,
X_train_new = pd.concat([train_pred1, train_pred2], axis=1)
X_test_new = pd.concat([test_pred1, test_pred2], axis=1)
from sklearn.linear_model import Logistic Regression # 导入逻辑回归模型
model = Logistic Regression(random_state=1)
model.fit(X_train_new, y_train) # 拟合模型
model.score(df_test, y_test) # 分数评估
voting = voting.fit(X_train, y_train) # 拟合模型
y_pred = voting.predict(X_test) # 进行预测
print("Voting测试准确率: {:.2f}%", voting.score(X_test, y_test)*100)
print"Voting测试F1分数:{:.2f}%", f1_scorey_test, y_pred*100
model1.fit(X_train, y_train)
model2.fit(X_train, y_train)
model3.fit(X_train, y_train)
pred_m1=model1.predict_proba(X_test)
pred_m2=model2.predict_proba(X_test)
pred_m3=model3.predict_proba(X_test)
pred_final=(pred_m1+pred_m2+pred_m3)/3
p_res[label_cols] = (p_nbsvm[label_cols] + p_lstm[label_cols]) / 2
p_res.to_csv('submission.csv', index=False)
import numpy as np # 导入Num Py库
import pandas as pd # 导入pandas库
dataset = pd.read_csv('../input/customer-cluster/Customers Cluster.csv')
dataset.head() # 显示一些数据
plt.title('Clusters of customers')#客户形成的聚类
plt.xlabel('Income')#年收入
plt.ylabel('Spending Score')#消费分数
plt.legend()
plt.show()
from sklearn.cluster import KMeans # 导入聚类模型
cost=[] # 初始化损失(距离)值
for i in range(1, 11): # 尝试不同的K值
import matplotlib.pyplot as plt # 导入Matplotlib库
import seaborn as sns # 导入Seaborn库
plt.plot(range(1, 11), cost)
plt.title('The Elbow Method')#手肘法
plt.xlabel('No of clusters')#聚类的个数
plt.ylabel('Cost')#成本
plt.show()
kmeansmodel = KMeans(n_clusters= 4, init='k-means++') # 选择4作为聚类个数
y_kmeans= kmeansmodel.fit_predict(X) # 进行聚类的拟合和分类
import numpy as np # 导入Num Py库
import pandas as pd # 导入pandas库
import matplotlib.pyplot as plt # 导入Matplotlib库
x_load = np.load('../input/sign-language-digits-dataset/X.npy') # 导入特征
y_load = np.load('../input/sign-language-digits-dataset/Y.npy') # 导入标签
img_size = 64 # 设定显示图像的大小
image_index_list = [299, 999, 1699, 699, 1299, 1999, 699, 499, 1111, 199]
for each in range(10): # 每个手语数字选取一张展示
from sklearn.decomposition import PCA # 导入Sklearn中decomposition模块的PCA工具
X = x_load.reshape((len(x_load), -1)) # Reshaple张量X
n_components = 5 # 设定因子个数, 因子越多, 模型越复杂
X_pca = pca.fit_transform(X) # PCA降维拟合
components_ = pca.components_ # 保留的主要成分因子(也就是被简化的模型)
images = components_[:n_components] # 显示降维之后的特征图
plt.figure(figsize=(6, 5))
for i, comp in enumerate(images):
herself lying on the bank, with her
head in the lap of her sister, who was gently brushing away s
herself lying on the bank, with her
head in the lap of her sister, who was gently brushing away
so siee, and she sabbit said to herself and the sabbit said to herself and the sood
way of the was a little that she was a little lad good to the garden,
and the sood of the mock turtle said to herself, 'it was a little that
the mock turtle said to see it said to sea it said to sea it say it
the marge hard sat hn a little that she was so sereated to herself, and
she sabbit said to herself, 'it was a little little shated of the sooe
of the coomouse it was a little lad good to the little gooder head.and
SFFF    (S: start起点, 安全)
FHFH    (F: frozen surface冰面, 安全)
FFFH    (H: hole冰窟窿, 落水)
HFFG    (G: goal目标, 飞盘所在地)
env = gym.make('Frozen Lake-v0', is_slippery=False) # 生成冰湖挑战的环境
env.reset() # 初始化冰湖挑战的环境
print("状态数:", env.observation_space.n)
print("动作数:", env.action_space.n)
alpha = 0.6 # 学习速率
gamma = 0.75 # 奖励折扣
episodes = 500 # 游戏盘数
r_history = [] # 奖励值的历史信息
j_history = [] # 步数的历史信息
for i in range(episodes):
import matplotlib.pyplot as plt # 导入Matplotlib库
plt.figure(figsize=(16, 5))
plt.subplot(1, 2, 1)
plt.plot(r_history)
plt.subplot(1, 2, 2)
plt.plot(j_history)
alpha = 0.6 # 学习速率
gamma = 0.75 # 奖励折扣
episodes = 500 # 游戏盘数
r_history = [] # 奖励值的历史信息
j_history = [] # 步数的历史信息
for i in range(episodes):
from keras.layers import GRU #导入GRU层
model.add(GRU(100)) # 加入GRU层
......@@ -85,26 +85,29 @@ def extract_code(book_mapping):
with open(html_save_path, 'w', encoding='utf-8') as f:
f.write(section_content)
code_list = re.findall(r'<code>(.*?)</code>', section_content,
re.S)
code_list = re.findall(
r'(?:(?: *<p class="content_105">[a-zA-Z]+.*? \n).*?)*',
section_content,
flags=re.DOTALL)
# print(code_list)
res_codelist = []
for code in code_list:
code = code.strip()
if code != '':
res_codelist.append(code)
# print(res_codelist)
# break
count = 0
for code in res_codelist:
if len(code.split('\n')) < 2:
continue
# code = html.unescape(code)
# soup = BeautifulSoup(code)
# clean_code = soup.get_text()
# print(clean_code)
# if len(code.split('\n')) < 2:
# continue
code = html.unescape(code)
soup = BeautifulSoup(code)
clean_code = soup.get_text()
print(clean_code)
# print('-------' * 10)
print('-------' * 10)
# pianduan_name = re.findall(r'(代码片段.*),', clean_code)
# if pianduan_name == []:
# pianduan_name_str = ''
......@@ -121,13 +124,12 @@ def extract_code(book_mapping):
# print(save_file_name)
if idx == 0:
code_save_path = os.path.join(chapter_dir,
'code_0.css')
code_save_path = os.path.join(chapter_dir, 'code_0.py')
else:
count += 1
code_save_path = os.path.join(
section_dir_list[idx - 1],
'code_{}.css'.format(count))
'code_{}.py'.format(count))
# res_code_list = []
# for line in clean_code.split('\n'):
......@@ -140,7 +142,7 @@ def extract_code(book_mapping):
# res_code = '\n'.join(res_code_list)
with open(code_save_path, 'w', encoding='utf-8') as f:
f.write(code)
f.write(clean_code)
# clean_text_list = []
# for line in res_str.split('\n'):
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册