From 3fba23545bd8b8bba39dbb93ddff5f1876462b7c Mon Sep 17 00:00:00 2001 From: wizardforcel <562826179@qq.com> Date: Sun, 17 Jan 2021 22:20:07 +0800 Subject: [PATCH] 2021-01-17 22:20:07 --- new/dl-pt-workshop/7.md | 871 ++++++++++++++++------------------------ 1 file changed, 352 insertions(+), 519 deletions(-) diff --git a/new/dl-pt-workshop/7.md b/new/dl-pt-workshop/7.md index 2ce4fe3d..72335aad 100644 --- a/new/dl-pt-workshop/7.md +++ b/new/dl-pt-workshop/7.md @@ -221,75 +221,67 @@ 1. 导入所需的库。 - 进口火炬 - - 将 torch.nn 导入为 nn + ```py + import torch + import torch.nn as nn + ``` 2. 将我们在上一个活动中创建的所有三组数据的特征从目标中分割出来。将 DataFrames 转换为张量。 - x_train = torch.tensor(x_train.values).float() - - y_train =火炬张量(y_train.values).float() - - x_dev = torch.tensor(x_dev.values).float() - - y_dev = torch.tensor(y_dev.values).float() - - x_test = torch.tensor(x_test.values).float() - - y_test = torch.tensor(y_test.values).float() + ```py + x_train = torch.tensor(x_train.values).float() + y_train = torch.tensor(y_train.values).float() + x_dev = torch.tensor(x_dev.values).float() + y_dev = torch.tensor(y_dev.values).float() + x_test = torch.tensor(x_test.values).float() + y_test = torch.tensor(y_test.values).float() + ``` 3. 定义网络的架构。可以自由尝试不同的层数和每层单元数的组合。 - 模型= nn.Sequential(nn.Linear(x_train.shape [1],10),\ - - nn.ReLU(),\ - - nn.Linear(10,7),\ - - nn.ReLU(),\ - - nn.Linear(7,5),\ - - nn.ReLU(),\ - - nn.Linear(5,1)) + ```py + model = nn.Sequential(nn.Linear(x_train.shape[1], 10), \ +                       nn.ReLU(), \ +                       nn.Linear(10, 7), \ +                       nn.ReLU(), \ +                       nn.Linear(7, 5), \ +                       nn.ReLU(), \ +                       nn.Linear(5, 1)) + ``` 4. 定义损失函数和优化器算法。 - loss_function = torch.nn.MSELoss() - - 优化程序= torch.optim.Adam(model.parameters(),lr = 0.01) + ```py + loss_function = torch.nn.MSELoss() + optimizer = torch.optim.Adam(model.parameters(), lr=0.01) + ``` 5. 使用`for`循环来训练网络,迭代步数为 3000 步。 - 对于我的范围(3000): - - y_pred = model(x_train).squeeze() - - 损失= loss_function(y_pred,y_train) - - Optimizer.zero_grad() - - loss.backward() - - Optimizer.step() - - 如果 i% 250 == 0: - - 打印(i,loss.item()) + ```py + for i in range(3000): +     y_pred = model(x_train).squeeze() +     loss = loss_function(y_pred, y_train) +     optimizer.zero_grad() +     loss.backward() +     optimizer.step() +     if i%250 == 0: +         print(i, loss.item()) + ``` 6. 通过对测试集的第一个实例进行预测,并与地面真相进行比较来测试你的模型。 - 之前=模型(x_test [0]) - - print(“地面真相:”,y_test [0] .item(),\ - - “预测:”,pred.item()) + ```py + pred = model(x_test[0]) + print("Ground truth:", y_test[0].item(), \ +       "Prediction:", pred.item()) + ``` 您的输出应类似于以下内容: - 基本事实:1995.0 预测:1998.0279541015625 + ```py + Ground truth: 1995.0 Prediction: 1998.0279541015625 + ``` 注意 @@ -305,31 +297,25 @@ 1. 导入以下库: - 将熊猫作为 pd 导入 - - 将 numpy 导入为 np - - 从 sklearn.model_selection 导入 train_test_split - - 从 sklearn.utils 导入 shuffle - - 从 sklearn.metrics 导入 precision_score - - 进口火炬 - - 从火炬进口 nn,乐观 - - 导入功能为 F 的 torch.nn。 - - 导入 matplotlib.pyplot 作为 plt - - torch.manual_seed(0) + ```py + import pandas as pd + import numpy as np + from sklearn.model_selection import train_test_split + from sklearn.utils import shuffle + from sklearn.metrics import accuracy_score + import torch + from torch import nn, optim + import torch.nn.functional as F + import matplotlib.pyplot as plt + torch.manual_seed(0) + ``` 2. 读取之前准备好的数据集,该数据集应该命名为`dccc_prepared.csv`。 - 数据= pd.read_csv(“ dccc_prepared.csv”) - - data.head() + ```py + data = pd.read_csv("dccc_prepared.csv") + data.head() + ``` 输出应如下所示: @@ -339,187 +325,131 @@ 3. 将特征与目标分开。 - X = data.loc [:,:-1] - - y =数据[“下个月的默认付款”] + ```py + X = data.iloc[:,:-1] + y = data["default payment next month"] + ``` 4. 使用 scikit-learn 的`train_test_split`函数,将数据集分割成训练集、验证集和测试集。使用 60:20:20 的分割比例。将`random_state`设置为 0。 - X_new,X_test,\ - - y_new,y_test = train_test_split(X,y,test_size = 0.2,\ - - random_state = 0) - - dev_per = X_test.shape [0] /X_new.shape [0] - - X_train,X_dev,\ - - y_train,y_dev = train_test_split(X_new,y_new,\ - - test_size = dev_per,\ - - random_state = 0) + ```py + X_new, X_test, \ + y_new, y_test = train_test_split(X, y, test_size=0.2, \ +                                  random_state=0) + dev_per = X_test.shape[0]/X_new.shape[0] + X_train, X_dev, \ + y_train, y_dev = train_test_split(X_new, y_new, \ +                                   test_size=dev_per, \ +                                   random_state=0) + ``` 您可以使用以下代码打印每个集合的最终形状: - print(“训练集:”,X_train.shape,y_train.shape) - - print(“验证集:”,X_dev.shape,y_dev.shape) - - print(“测试集:”,X_test.shape,y_test.shape) + ```py + print("Training sets:",X_train.shape, y_train.shape) + print("Validation sets:",X_dev.shape, y_dev.shape) + print("Testing sets:",X_test.shape, y_test.shape) + ``` 每个集合的最终形状如下所示: - 训练集:(28036,22)(28036,) - - 验证集:(9346,22)(9346,) - - 测试集:(9346,22)(9346,) + ```py + Training sets: (28036, 22) (28036,) + Validation sets: (9346, 22) (9346,) + Testing sets: (9346, 22) (9346,) + ``` 5. 将验证集和测试集转换为张量,记住特征矩阵应该是`float`类型,而目标矩阵不应该。训练集暂不转换,因为它们将进行进一步的转换。 - X_dev_torch = torch.tensor(X_dev.values).float() - - y_dev_torch = torch.tensor(y_dev.values) - - X_test_torch = torch.tensor(X_test.values).float() - - y_test_torch =火炬张量(y_test.values) + ```py + X_dev_torch = torch.tensor(X_dev.values).float() + y_dev_torch = torch.tensor(y_dev.values) + X_test_torch = torch.tensor(X_test.values).float() + y_test_torch = torch.tensor(y_test.values) + ``` 6. 构建一个自定义模块类,用于定义网络的层。包括一个前向函数,指定将应用于每层输出的激活函数。对所有层都使用 **ReLU**,除了输出,你应该使用`log_softmax`。 - 类 Classifier(nn.Module): - - def __init __(self,input_size): - - super().__ init __() - - self.hidden_​​1 = nn.Linear(input_size,10) - - self.hidden_​​2 = nn.Linear(10,10) - - self.hidden_​​3 = nn.Linear(10,10) - - self.output = nn.Linear(10,2) - - def forward(self,x): - - z = F.relu(self.hidden_​​1(x)) - - z = F.relu(self.hidden_​​2(z)) - - z = F.relu(self.hidden_​​3(z)) - - out = F.log_softmax(self.output(z),dim = 1) - - 返回 + ```py + class Classifier(nn.Module): +     def __init__(self, input_size): +         super().__init__() +         self.hidden_1 = nn.Linear(input_size, 10) +         self.hidden_2 = nn.Linear(10, 10) +         self.hidden_3 = nn.Linear(10, 10) +         self.output = nn.Linear(10, 2) +     def forward(self, x): +         z = F.relu(self.hidden_1(x)) +         z = F.relu(self.hidden_2(z)) +         z = F.relu(self.hidden_3(z)) +         out = F.log_softmax(self.output(z), dim=1) +         return out + ``` 7. 实例化模型并定义训练模型所需的所有变量。设置纪元数为`50`,批次大小为`128`。使用`0.001`的学习率。 - 模型=分类器(X_train.shape [1]) - - 准则= nn.NLLLoss() - - 优化程序= optim.Adam(model.parameters(),lr = 0.001) - - 时代= 50 - + ```py + model = Classifier(X_train.shape[1]) + criterion = nn.NLLLoss() + optimizer = optim.Adam(model.parameters(), lr=0.001) + epochs = 50 batch_size = 128 + ``` 8. 使用训练集的数据来训练网络。使用验证集来衡量性能。要做到这一点,请保存每个时代的训练集和验证集的损失和准确性。 - train_losses,dev_losses, - - train_acc,dev_acc = [],[],[],[] - - 对于范围内的 e(历元): - - X_,y_ = shuffle(X_train,y_train) - - running_loss = 0 - - running_acc = 0 - - 迭代次数= 0 - - 对于范围(0,len(X_),batch_size)中的 i: - - 迭代次数== 1 - - b =我+ batch_size - - X_batch = torch.tensor(X_.iloc [i:b,:]。values).float() - - y_batch = torch.tensor(y_.iloc [i:b] .values) - - pred =模型(X_batch) - - 损失=标准(pred,y_batch) - - Optimizer.zero_grad() - - loss.backward() - - Optimizer.step() - - running_loss + = loss.item() - - ps = torch.exp(pred) - - top_p,top_class = ps.topk(1,暗= 1) - - running_acc + =精度得分(y_batch,top_class) - - dev_loss = 0 - - acc = 0 - - 使用 torch.no_grad(): - - pred_dev =模型(X_dev_torch) - - dev_loss =条件(pred_dev,y_dev_torch) - - ps_dev = torch.exp(pred_dev) - - top_p,top_class_dev = ps_dev.topk(1,暗= 1) - - acc = precision_score(y_dev_torch,top_class_dev) - - train_losses.append(running_loss / iterations) - - dev_losses.append(dev_loss) - - train_acc.append(running_acc / iterations) - - dev_acc.append(acc) - - print(“ Epoch:{} / {} ..” .format(e + 1,epochs),\ - - “训练损失:{:. 3f} ..” \ - - .format(running_loss / iterations),\ - - “验证损失:{:. 3f} ..” .format(dev_loss),\ - - “训练准确率:{:. 3f} ..” \ - - .format(running_acc / iterations),\ - - “验证准确率:{:. 3f}”。format(acc)) + ```py + train_losses, dev_losses, \ + train_acc, dev_acc = [], [], [], [] + for e in range(epochs): +     X_, y_ = shuffle(X_train, y_train) +     running_loss = 0 +     running_acc = 0 +     iterations = 0 +     for i in range(0, len(X_), batch_size): +         iterations += 1 +         b = i + batch_size +         X_batch = torch.tensor(X_.iloc[i:b,:].values).float() +         y_batch = torch.tensor(y_.iloc[i:b].values) +         pred = model(X_batch) +         loss = criterion(pred, y_batch) +         optimizer.zero_grad() +         loss.backward() +         optimizer.step() +         running_loss += loss.item() +         ps = torch.exp(pred) +         top_p, top_class = ps.topk(1, dim=1) +         running_acc += accuracy_score(y_batch, top_class) +     dev_loss = 0 +     acc = 0 +     with torch.no_grad(): +         pred_dev = model(X_dev_torch) +         dev_loss = criterion(pred_dev, y_dev_torch) +         ps_dev = torch.exp(pred_dev) +         top_p, top_class_dev = ps_dev.topk(1, dim=1) +         acc = accuracy_score(y_dev_torch, top_class_dev) +     train_losses.append(running_loss/iterations) +     dev_losses.append(dev_loss) +     train_acc.append(running_acc/iterations) +     dev_acc.append(acc) +     print("Epoch: {}/{}.. ".format(e+1, epochs),\ +           "Training Loss: {:.3f}.. "\ +           .format(running_loss/iterations),\ +           "Validation Loss: {:.3f}.. ".format(dev_loss), \ +           "Training Accuracy: {:.3f}.. "\ +           .format(running_acc/iterations), \ +           "Validation Accuracy: {:.3f}".format(acc)) + ``` 9. 绘出两组的损失。 - 图 = plt.figure(figsize =(15,5)) - - plt.plot(train_losses,label ='训练损失') - - plt.plot(dev_losses,label ='Validation loss') - - plt.legend(frameon = False,fontsize = 15) - + ```py + fig = plt.figure(figsize=(15, 5)) + plt.plot(train_losses, label='Training loss') + plt.plot(dev_losses, label='Validation loss') + plt.legend(frameon=False, fontsize=15) plt.show() + ``` 考虑到改组训练数据可能会得出略有不同的结果,结果图应与此处显示的图相似,尽管有所不同。 @@ -529,15 +459,13 @@ 10. 绘制两组的精度。 - 无花果= plt.figure(figsize =(15,5)) - - plt.plot(train_acc,label =“训练精度”) - - plt.plot(dev_acc,label =“验证准确率”) - - plt.legend(frameon = False,fontsize = 15) - + ```py + fig = plt.figure(figsize=(15, 5)) + plt.plot(train_acc, label="Training accuracy") + plt.plot(dev_acc, label="Validation accuracy") + plt.legend(frameon=False, fontsize=15) plt.show() + ``` 这是从此代码段派生的图: @@ -557,195 +485,120 @@ 1. 导入你在上一个活动中使用的相同的库。 - 将熊猫作为 pd 导入 - - 将 numpy 导入为 np - - 从 sklearn.model_selection 导入 train_test_split - - 从 sklearn.utils 导入 shuffle - - 从 sklearn.metrics 导入 precision_score - - 进口火炬 - - 从火炬进口 nn,乐观 - - 导入功能为 F 的 torch.nn。 - - 导入 matplotlib.pyplot 作为 plt - - torch.manual_seed(0) + ```py + import pandas as pd + import numpy as np + from sklearn.model_selection import train_test_split + from sklearn.utils import shuffle + from sklearn.metrics import accuracy_score + import torch + from torch import nn, optim + import torch.nn.functional as F + import matplotlib.pyplot as plt + torch.manual_seed(0)``` + ``` 2. 加载数据并从目标中拆分特征。接下来,使用 60:20:20 的分割比例将数据分割成三个子集(训练、验证和测试)。最后,将验证和测试集转换为 PyTorch 张量,就像您在上一个活动中所做的那样。 - 数据= pd.read_csv(“ dccc_prepared.csv”) - - X = data.loc [:,:-1] - - y =数据[“下个月的默认付款”] - - X_new,X_test,\ - - y_new,y_test = train_test_split(X,y,test_size = 0.2,\ - - random_state = 0) - - dev_per = X_test.shape [0] /X_new.shape [0] - - X_train,X_dev,\ - - y_train,y_dev = train_test_split(X_new,y_new,\ - - test_size = dev_per,\ - - random_state = 0) - - X_dev_torch = torch.tensor(X_dev.values).float() - - y_dev_torch = torch.tensor(y_dev.values) - - X_test_torch = torch.tensor(X_test.values).float() - - y_test_torch =火炬张量(y_test.values) + ```py + data = pd.read_csv("dccc_prepared.csv") + X = data.iloc[:,:-1] + y = data["default payment next month"] + X_new, X_test, \ + y_new, y_test = train_test_split(X, y, test_size=0.2, \ +                                  random_state=0) + dev_per = X_test.shape[0]/X_new.shape[0] + X_train, X_dev, \ + y_train, y_dev = train_test_split(X_new, y_new, \ +                                   test_size=dev_per, \ +                                   random_state=0) + X_dev_torch = torch.tensor(X_dev.values).float() + y_dev_torch = torch.tensor(y_dev.values) + X_test_torch = torch.tensor(X_test.values).float() + y_test_torch = torch.tensor(y_test.values) + ``` 3. 考虑到该模型存在较高的偏差,重点应放在增加纪元的数量上,或通过在每层中增加额外的层或单位来增加网络的规模。目标应该是将验证集的准确度近似到 80%。 之后,将显示性能最佳的模型,该模型是在几次微调尝试之后实现的。 首先,定义模型架构和正向传递,如以下代码片段所示: - 类 Classifier(nn.Module): - - def __init __(self,input_size): - - super().__ init __() - - self.hidden_​​1 = nn.Linear(input_size,100) - - self.hidden_​​2 = nn.Linear(100,100) - - self.hidden_​​3 = nn.Linear(100,50) - - self.hidden_​​4 = nn.Linear(50,50) - - self.output = nn.Linear(50,2) - - self.dropout = nn.Dropout(p = 0.1) - - def forward(self,x): - - z = self.dropout(F.relu(self.hidden_​​1(x))) - - z = self.dropout(F.relu(self.hidden_​​2(z))) - - z = self.dropout(F.relu(self.hidden_​​3(z))) - - z = self.dropout(F.relu(self.hidden_​​4(z))) - - out = F.log_softmax(self.output(z),dim = 1) - - 返回 + ```py + class Classifier(nn.Module): +     def __init__(self, input_size): +         super().__init__() +         self.hidden_1 = nn.Linear(input_size, 100) +         self.hidden_2 = nn.Linear(100, 100) +         self.hidden_3 = nn.Linear(100, 50) +         self.hidden_4 = nn.Linear(50,50) +         self.output = nn.Linear(50, 2) +         self.dropout = nn.Dropout(p=0.1) +     def forward(self, x): +         z = self.dropout(F.relu(self.hidden_1(x))) +         z = self.dropout(F.relu(self.hidden_2(z))) +         z = self.dropout(F.relu(self.hidden_3(z))) +         z = self.dropout(F.relu(self.hidden_4(z))) +         out = F.log_softmax(self.output(z), dim=1) +         return out + ``` 接下来,定义训练过程的不同参数。 这包括损失函数,优化算法,批量大小和时期数,如以下代码所示: - 模型=分类器(X_train.shape [1]) - - 准则= nn.NLLLoss() - - 优化程序= optim.Adam(model.parameters(),lr = 0.001) - - 时代= 4000 - + ```py + model = Classifier(X_train.shape[1]) + criterion = nn.NLLLoss() + optimizer = optim.Adam(model.parameters(), lr=0.001) + epochs = 4000 batch_size = 128 + ``` 最后,按照以下代码片段处理训练过程: - train_losses,dev_losses,train_acc,dev_acc = [],[],[],[] - + ```py + train_losses, dev_losses, train_acc, dev_acc= [], [], [], [] x_axis = [] - - 对于范围(1,历元+1)中的 e: - - X_,y_ = shuffle(X_train,y_train) - - running_loss = 0 - - running_acc = 0 - - 迭代次数= 0 - - 对于范围(0,len(X_),batch_size)中的 i: - - 迭代次数== 1 - - b =我+ batch_size - - X_batch = torch.tensor(X_.iloc [i:b,:]。values).float() - - y_batch = torch.tensor(y_.iloc [i:b] .values) - - log_ps =模型(X_batch) - - 损失=标准(log_ps,y_batch) - - Optimizer.zero_grad() - - loss.backward() - - Optimizer.step() - - running_loss + = loss.item() - - ps = torch.exp(log_ps) - - top_p,top_class = ps.topk(1,暗= 1) - - running_acc + =精度得分(y_batch,top_class) - - dev_loss = 0 - - acc = 0 - - 使用 torch.no_grad(): - - model.eval() - - log_dev =模型(X_dev_torch) - - dev_loss =条件(log_dev,y_dev_torch) - - ps_dev = torch.exp(log_dev) - - top_p,top_class_dev = ps_dev.topk(1,暗= 1) - - acc = precision_score(y_dev_torch,top_class_dev) - - model.train() - - 如果 e% 50 == 0 或 e == 1: - - x_axis.append(e) - - train_losses.append(running_loss / iterations) - - dev_losses.append(dev_loss) - - train_acc.append(running_acc / iterations) - - dev_acc.append(acc) - - print(“ Epoch:{} / {} ..”“ .format(e,epochs),\ - - “训练损失:{:. 3f} ..” \ - - .format(running_loss / iterations),\ - - “验证损失:{:. 3f} ..” .format(dev_loss),\ - - “训练准确率:{:. 3f} ..” \ - - .format(running_acc / iterations),\ - - “验证准确率:{:. 3f}”。format(acc)) + for e in range(1, epochs + 1): +     X_, y_ = shuffle(X_train, y_train) +     running_loss = 0 +     running_acc = 0 +     iterations = 0 +     for i in range(0, len(X_), batch_size): +         iterations += 1 +         b = i + batch_size +         X_batch = torch.tensor(X_.iloc[i:b,:].values).float() +         y_batch = torch.tensor(y_.iloc[i:b].values) +         log_ps = model(X_batch) +         loss = criterion(log_ps, y_batch) +         optimizer.zero_grad() +         loss.backward() +         optimizer.step() +         running_loss += loss.item() +         ps = torch.exp(log_ps) +         top_p, top_class = ps.topk(1, dim=1) +         running_acc += accuracy_score(y_batch, top_class) +     dev_loss = 0 +     acc = 0 +     with torch.no_grad(): +         model.eval() +         log_dev = model(X_dev_torch) +         dev_loss = criterion(log_dev, y_dev_torch) +         ps_dev = torch.exp(log_dev) +         top_p, top_class_dev = ps_dev.topk(1, dim=1) +         acc = accuracy_score(y_dev_torch, top_class_dev) +     model.train() +     if e%50 == 0 or e == 1: +         x_axis.append(e) +         train_losses.append(running_loss/iterations) +         dev_losses.append(dev_loss) +         train_acc.append(running_acc/iterations) +         dev_acc.append(acc) +         print("Epoch: {}/{}.. ".format(e, epochs), \ +               "Training Loss: {:.3f}.. "\ +               .format(running_loss/iterations), \ +               "Validation Loss: {:.3f}.. ".format(dev_loss),\ +               "Training Accuracy: {:.3f}.. "\ +               .format(running_acc/iterations), \ +               "Validation Accuracy: {:.3f}".format(acc)) + ``` 注意 @@ -759,15 +612,13 @@ 使用以下代码绘制损失: - 无花果= plt.figure(figsize =(15,5)) - - plt.plot(x_axis,train_losses,label ='训练损失') - - plt.plot(x_axis,dev_losses,label ='验证损失') - - plt.legend(frameon = False,fontsize = 15) - + ```py + fig = plt.figure(figsize=(15, 5)) + plt.plot(x_axis,train_losses, label='Training loss') + plt.plot(x_axis, dev_losses, label='Validation loss') + plt.legend(frameon=False , fontsize=15) plt.show() + ``` 运行前面的代码将显示以下图: @@ -777,15 +628,13 @@ 使用以下代码来绘制精度: - 无花果= plt.figure(figsize =(15,5)) - - plt.plot(x_axis,train_acc,label =“训练精度”) - - plt.plot(x_axis,dev_acc,label =“验证准确率”) - - plt.legend(frameon = False,fontsize = 15) - + ```py + fig = plt.figure(figsize=(15, 5)) + plt.plot(x_axis, train_acc, label="Training accuracy") + plt.plot(x_axis, dev_acc, label="Validation accuracy") + plt.legend(frameon=False , fontsize=15) plt.show() + ``` 运行前面的代码将显示以下图: @@ -795,17 +644,14 @@ 5. 使用性能最好的模型,对测试集(在微调过程中不应该使用)进行预测。通过计算模型在该集上的准确度,将预测结果与基本事实进行比较。 - model.eval() - - test_pred =模型(X_test_torch) - - test_pred = torch.exp(test_pred) - - top_p,top_class_test = test_pred.topk(1,暗= 1) - - acc_test = precision_score(y_test_torch,top_class_test) - - 打印(acc_test) + ```py + model.eval() + test_pred = model(X_test_torch) + test_pred = torch.exp(test_pred) + top_p, top_class_test = test_pred.topk(1, dim=1) + acc_test = accuracy_score(y_test_torch, top_class_test) + print(acc_test) + ``` 通过模型架构和此处定义的参数获得的精度应为 80% 左右。 @@ -830,124 +676,111 @@ 3. 在 Jupyter 笔记本中,保存表现最好的模型。请务必保存与输入单位相关的信息,以及模型的参数。将其命名为`checkpoint.pth`。 - 检查点= {“输入”:X_train.shape [1],\ - - “ state_dict”:model.state_dict()} - - torch.save(检查点,“ checkpoint.pth”) + ```py + checkpoint = {"input": X_train.shape[1], \ +               "state_dict": model.state_dict()} + torch.save(checkpoint, "checkpoint.pth") + ``` 4. 打开一个新的 Jupyter Notebook。 5. 导入 PyTorch,以及我们在“步骤 2”中创建的 Python 文件。 - 进口火炬 - - 导入 final_model + ```py + import torch + import final_model + ``` 6. 创建一个加载模型的函数。 - def load_model_checkpoint(path): - - 检查点= torch.load(路径) - - 模型= final_model.Classifier(checkpoint [“ input”]) - - model.load_state_dict(checkpoint [“ state_dict”]) - - 退货模式 - - 模型= load_model_checkpoint(“ checkpoint.pth”) + ```py + def load_model_checkpoint(path): +     checkpoint = torch.load(path) +     model = final_model.Classifier(checkpoint["input"]) +     model.load_state_dict(checkpoint["state_dict"]) +     return model + model = load_model_checkpoint("checkpoint.pth") + ``` 7. 通过将以下张量输入到你的模型中进行预测。 - 例子= torch.tensor([[0.0606,0.5000,0.3333,0.4828,\ - - 0.4000, 0.4000, 0.4000, 0.4000, \ - - 0.4000, 0.4000, 0.1651, 0.0869, \ - - 0.0980, 0.1825, 0.1054, 0.2807, \ - - 0.0016, 0.0000, 0.0033, 0.0027, \ - - 0.0031,0.0021]])。float() - - 之前=模型(示例) - - pred = torch.exp(pred) - - top_p,top_class_test = pred.topk(1,暗= 1) + ```py + example = torch.tensor([[0.0606, 0.5000, 0.3333, 0.4828, \ +                          0.4000, 0.4000, 0.4000, 0.4000, \ +                          0.4000, 0.4000, 0.1651, 0.0869, \ +                          0.0980, 0.1825, 0.1054, 0.2807, \ +                          0.0016, 0.0000, 0.0033, 0.0027, \ +                          0.0031, 0.0021]]).float() + pred = model(example) + pred = torch.exp(pred) + top_p, top_class_test = pred.topk(1, dim=1) + ``` 通过打印`top_class_test`,我们可以获得模型的预测,在这种情况下,该预测等于`1`(是)。 8. 使用 JIT 模块转换模型。 - traced_script = torch.jit.trace(模型,例如\ - - check_trace = False) + ```py + traced_script = torch.jit.trace(model, example, + check_trace=False) + ``` 9. 通过输入“步骤 7”的相同张量到模型的跟踪脚本中进行预测。 - 预测= traced_script(示例) - - 预测= torch.exp(预测) - - top_p_2,top_class_test_2 = projection.topk(1,dim = 1) + ```py + prediction = traced_script(example) + prediction = torch.exp(prediction) + top_p_2, top_class_test_2 = prediction.topk(1, dim=1) + ``` 通过打印`top_class_test_2`,我们从模型的跟踪脚本表示中获得了预测,该预测再次等于`1`(是)。 10. 打开一个新的 Jupyter 笔记本,并导入所需的库来使用 Flask 创建一个 API,以及加载保存的模型的库。 - 进口烧瓶 - - 从烧瓶进口要求 - - 进口火炬 - - 导入 final_model + ```py + import flask + from flask import request + import torch + import final_model + ``` 11. 初始化 Flask 应用。 - app = flask.Flask(__ name__) - - app.config [“ DEBUG”] = True + ```py + app = flask.Flask(__name__) + app.config["DEBUG"] = True + ``` 12. 定义一个函数,加载保存的模型,然后实例化模型。 - def load_model_checkpoint(path): - - 检查点= torch.load(路径) - - 模型= final_model.Classifier(checkpoint [“ input”]) - - model.load_state_dict(checkpoint [“ state_dict”]) - - 退货模式 - - 模型= load_model_checkpoint(“ checkpoint.pth”) + ```py + def load_model_checkpoint(path): +     checkpoint = torch.load(path) +     model = final_model.Classifier(checkpoint["input"]) +     model.load_state_dict(checkpoint["state_dict"]) +     return model + model = load_model_checkpoint("checkpoint.pth") + ``` 13. 定义 API 的路由为`/prediction`,并将方法设置为`POST`。然后,定义接收`POST`数据的函数,并将其反馈给模型进行预测。 - @ app.route('/ prediction',methods = ['POST']) - - def definition(): - - 正文= request.get_json() - - 示例= torch.tensor(body ['data'])。float() - - 之前=模型(示例) - - pred = torch.exp(pred) - - _,top_class_test = pred.topk(1,暗= 1) - - top_class_test = top_class_test.numpy() - - return {“ status”:“ ok”,“ result”:int(top_class_test [0] [0])} + ```py + @app.route('/prediction', methods=['POST']) + def prediction(): +     body = request.get_json() +     example = torch.tensor(body['data']).float() +     pred = model(example) +     pred = torch.exp(pred) +     _, top_class_test = pred.topk(1, dim=1) +     top_class_test = top_class_test.numpy() +      +     return {"status":"ok", "result":int(top_class_test[0][0])} + ``` 14. 运行 Flask 应用。 - app.run(debug = True,use_reloader = False) + ```py + app.run(debug=True, use_reloader=False) + ``` 使用为 API 开发而创建的平台 Postman,可以测试 API。 要向 Postman 提交成功的请求,标头的`*Content-Type`应当等于`application/json`。 结果输出应如下所示: -- GitLab