import pandas as pd import numpy as np import torch import argparse import torch.nn as nn import matplotlib.pyplot as plt import time from sklearn.preprocessing import MinMaxScaler from sklearn.metrics import r2_score, mean_squared_error, mean_absolute_error from tqdm import tqdm from torch.utils.data import DataLoader, TensorDataset class model(nn.Module): def __init__(self, inputs_size, hidden_size, output_Size, num_layers=1): super(model, self).__init__() self.inputs_size = inputs_size self.hidden_size = hidden_size self.output_size = output_Size self.num_layers = num_layers self.lstm = nn.LSTM(inputs_size, hidden_size, num_layers=num_layers, batch_first=True) self.fc = nn.Linear(hidden_size, output_Size) def forward(self, x): out, (h_t, c_t) = self.lstm(x) out = self.fc(h_t) return out.reshape(-1, 1) # output, _ = self.lstm(x) # batch_size, timeStep, hidden_size = output.shape # output = output.reshape(-1, hidden_size) # output = self.fc(output) # output = output.reshape(timeStep, batch_size, -1) # return output[-1] # 数据归一化 def to_minmax_scale(old_data): minmax_scale = MinMaxScaler() old_data["pollution"] = minmax_scale.fit_transform(old_data["pollution"].values.reshape(-1, 1)) old_data["dew"] = minmax_scale.fit_transform(old_data["dew"].values.reshape(-1, 1)) old_data["temp"] = minmax_scale.fit_transform(old_data["temp"].values.reshape(-1, 1)) old_data["press"] = minmax_scale.fit_transform(old_data["press"].values.reshape(-1, 1)) old_data["wnd_dir"] = minmax_scale.fit_transform(old_data["wnd_dir"].values.reshape(-1, 1)) old_data["wnd_spd"] = minmax_scale.fit_transform(old_data["wnd_spd"].values.reshape(-1, 1)) old_data["snow"] = minmax_scale.fit_transform(old_data["snow"].values.reshape(-1, 1)) old_data["rain"] = minmax_scale.fit_transform(old_data["rain"].values.reshape(-1, 1)) return old_data # 数据滑窗处理 def slidingWindow(old_data, window_size, batch_size): train_X = list() train_y = list() test_X = list() test_y = list() split_len = int(len(old_data) * 0.8) train_data = old_data.iloc[: split_len, 1:] test_data = old_data.iloc[split_len:len(old_data), 1:] temp = tqdm(range(0, len(train_data) - window_size), desc="训练集数据") for item in temp: train_X.append(train_data.iloc[item: item + window_size, :]) train_y.append(train_data.iloc[item + window_size, :]) temp = tqdm(range(0, len(test_data) - window_size), desc="测试集数据") for item in temp: test_X.append(test_data.iloc[item: item + window_size, :]) test_y.append(test_data.iloc[item + window_size, :]) train_X = np.array(train_X) train_y = np.array(train_y) test_X = np.array(test_X) test_y = np.array(test_y) train_X = torch.Tensor(train_X) train_y = torch.Tensor(train_y) test_X = torch.Tensor(test_X) test_y = torch.Tensor(test_y) dataset = TensorDataset(train_X, train_y) train_loader = DataLoader(dataset, batch_size=batch_size) data_set = TensorDataset(test_X, test_y) test_loader = DataLoader(data_set, batch_size=batch_size) return train_loader, test_loader if __name__ == "__main__": parser = argparse.ArgumentParser() parser.add_argument("--path", default="./data/pollution.csv", type=str) parser.add_argument("--window_size", default=24, type=int) # window_size = 1做个测试 将window_size和feature合并做一个linear model parser.add_argument("--batch_size", default=32, type=int) parser.add_argument("--num_epochs", default=200, type=int) parser.add_argument("--inputs_size", default=8, type=int) parser.add_argument("--hidden_size", default=50, type=int) parser.add_argument("--output_size", default=1, type=int) parser.add_argument("--num_layers", default=1, type=int) parser.add_argument("--learning_rate", default=1e-5, type=float) args = parser.parse_args() args.device = "cuda:0" if torch.cuda.is_available() else "cpu" # print("正在进行数据处理...") data = pd.read_csv(args.path) data = to_minmax_scale(data) trainLoader, testLoader = slidingWindow(data, args.window_size, args.batch_size) # 模型参数 model = model(args.inputs_size, args.hidden_size, args.output_size) model.to(args.device) criterion = nn.MSELoss() optimizer = torch.optim.Adam(model.parameters(), lr=args.learning_rate) total_loss = 0 # 模型训练 model.train() for epoch in range(args.num_epochs): for inputs, labels in tqdm(trainLoader, desc="训练"): inputs, labels = inputs.to(args.device), labels[:, 0].view(-1, 1).to(args.device) out = model(inputs) loss = criterion(out, labels[:, 0].view(-1, 1)) optimizer.zero_grad() loss.backward() optimizer.step() total_loss += loss.item() # / len(inputs) total_loss = total_loss / len(trainLoader) print("train epoch[%d/%d] loss:%f" % (epoch + 1, args.num_epochs, total_loss)) # 模型测试 labels_list = list() output_list = list() model.eval() i = 0 mae_sum = 0 mse_sum = 0 r2_sum = 0 for inputs, labels in testLoader: r2 = 0 mse = 0 mae = 0 inputs, labels = inputs.to(args.device), labels[:, 0].view(-1, 1).to(args.device) out = model(inputs) # labels_list.append(labels.clone().detach().cpu().numpy()) # output_list.append(out.clone().detach().cpu().numpy()) for label, output in zip(labels.clone().detach().cpu().numpy(), out.clone().detach().cpu().numpy()): labels_list.append(label) output_list.append(output) if i == 1: print("out: ", out) print("labels: ", labels) i += 1 labels = labels.clone().detach().cpu().numpy() out = out.clone().detach().cpu().numpy() print(labels.shape, out.shape) r2 = r2_score(labels, out) mse = mean_squared_error(labels, out) mae = mean_absolute_error(labels, out) mae_sum += mae mse_sum += mse r2_sum += r2 print("%d_R2_sum: %.3f" % (i, r2)) print("%dMSE: %.3f" % (i, mse)) print("%dMAE: %.3f" % (i, mae)) print("MSE: %.4f MAE: %.4f R2: %.4f" % (mse_sum / i, mae_sum / i, r2_sum / i)) now_time = time.localtime() time_string = "./LSTM结果对比图" + str(now_time.tm_mon) + "-" + str(now_time.tm_mday) + "-" + str(now_time.tm_hour) + "-" + str(now_time.tm_min) + ".jpg" # print(labels_list, output_list) plt.figure(figsize=(10, 8), dpi=150) plt.plot(range(len(labels_list)), labels_list, color='red', label='Original') plt.plot(range(len(output_list)), output_list, color='green', label='Predict') string_title = "LSTM MSE:" + str(mse_sum / i) + " MAE:" + str(mae_sum / i) + " R2:" + str(r2_sum / i) plt.title(string_title) plt.xlabel('the number of test data') plt.ylabel('Soil moisture') plt.legend() plt.savefig(time_string) plt.show()