新建仓库维护数据预测项目
This commit is contained in:
291
TVS_DL/LSTM_Test.py
Normal file
291
TVS_DL/LSTM_Test.py
Normal file
@@ -0,0 +1,291 @@
|
||||
# 导入相关库
|
||||
import os
|
||||
|
||||
import pandas as pd
|
||||
import torch # 导入PyTorch库
|
||||
import torch.nn as nn # 导入神经网络模块
|
||||
import tushare as ts # 导入tushare库,用于获取股票数据
|
||||
import torch.optim as optim # 导入优化器模块
|
||||
from tqdm import tqdm # 导入tqdm库,用于显示进度条
|
||||
import matplotlib.pyplot as plt # 导入matplotlib库,用于绘制图表
|
||||
from copy import deepcopy as copy # 导入deepcopy函数,用于深拷贝对象
|
||||
from torch.utils.data import DataLoader, TensorDataset # 导入DataLoader和TensorDataset类,用于加载数据
|
||||
|
||||
|
||||
# 获取数据
|
||||
class GetData:
|
||||
def __init__(self, stock_id, save_path):
|
||||
"""
|
||||
初始化方法
|
||||
:param stock_id: 股票id
|
||||
:param save_path: 数据保存路径
|
||||
"""
|
||||
self.min_value = None
|
||||
self.max_value = None
|
||||
self.stock_id = stock_id
|
||||
self.save_path = save_path
|
||||
self.data = None
|
||||
|
||||
def getData(self):
|
||||
"""
|
||||
获取数据
|
||||
数据处理并保存
|
||||
:return: None
|
||||
"""
|
||||
"""
|
||||
# 获取股票数据
|
||||
self.data = ts.get_hist_data(self.stock_id).iloc[::-1]
|
||||
# 选取特定列作为数据
|
||||
self.data = self.data[["open", "close", "high", "low", "volume"]]
|
||||
# 计算数据列的最大值和最小值
|
||||
self.max_value = self.data['volume'].max()
|
||||
self.min_value = self.data['volume'].min()
|
||||
# 归一化处理
|
||||
self.data = self.data.apply(lambda x: (x - min(x)) / (max(x) - min(x)))
|
||||
# 保存数据
|
||||
self.data.to_csv(self.save_path)
|
||||
return self.data
|
||||
"""
|
||||
# 本地数据data.csv由于是归一化后的数据,所以最大值和最小值并不准确,所以运行结果会有误差,重在体验整个项目的逻辑即可
|
||||
columns = ['open', 'close', 'high', 'low', 'volume']
|
||||
self.data = pd.read_csv(self.save_path, names=columns, header=0)
|
||||
# 计算数据列的最大值和最小值
|
||||
self.max_value = self.data['volume'].max()
|
||||
self.min_value = self.data['volume'].min()
|
||||
return self.data
|
||||
|
||||
def process_data(self, n):
|
||||
"""
|
||||
处理数据
|
||||
:param n: 滑动窗口大小
|
||||
:return: 训练集的特征、测试集的特征、训练集的标签、测试集的标签
|
||||
"""
|
||||
if self.data is None:
|
||||
self.getData()
|
||||
# 提取特征和标签数据
|
||||
"""
|
||||
iloc 是 Pandas 库中用于按位置索引选取数据的方法
|
||||
"""
|
||||
feature = [
|
||||
self.data.iloc[i: i + n].values.tolist()
|
||||
for i in range(len(self.data) - n + 2)
|
||||
if i + n < len(self.data)
|
||||
]
|
||||
label = [
|
||||
self.data.close.values[i + n]
|
||||
for i in range(len(self.data) - n + 2)
|
||||
if i + n < len(self.data)
|
||||
]
|
||||
# 划分训练集和数据集
|
||||
train_x = feature[:500]
|
||||
test_x = feature[500:]
|
||||
train_y = label[:500]
|
||||
test_y = label[500:]
|
||||
return train_x, test_x, train_y, test_y
|
||||
|
||||
# 搭建LSTM模型: 单层单向LSTM网络+全连接层输出
|
||||
class Model(nn.Module):
|
||||
def __init__(self, n):
|
||||
# 初始化方法
|
||||
super(Model, self).__init__() # 调用父类的初始化方法
|
||||
# 定义LSTM层 输入大小为n, 隐藏层大小为256,批次优先为True
|
||||
self.lstm_layer = nn.LSTM(input_size=n, hidden_size=256, batch_first=True)
|
||||
# 定义全连接层 输入特征数为256, 输出特征数为1 有偏差
|
||||
self.linear_layer = nn.Linear(in_features=256, out_features=1, bias=True)
|
||||
|
||||
# 向前传播方法
|
||||
def forward(self, x):
|
||||
"""
|
||||
x: 输入数据(通常是时间序列的特征)
|
||||
lstm_output: LSTM 层的输出序列
|
||||
hidden_state: LSTM 的隐藏状态(用于传递长期记忆)
|
||||
cell_state: LSTM 的细胞状态(仅在 LSTM 中存在)
|
||||
final_output: 经过全连接层后的最终输出
|
||||
"""
|
||||
# LSTM 层的前向传播,得到输出和隐藏状态
|
||||
lstm_output, (hidden_state, cell_state) = self.lstm_layer(x)
|
||||
|
||||
# 获取隐藏状态的维度:batch_size, num_layers, hidden_size
|
||||
batch_size, num_layers, hidden_size = hidden_state.shape
|
||||
|
||||
# 将隐藏状态输入全连接层,需要先展平为二维
|
||||
final_output = self.linear_layer(hidden_state.view(batch_size * num_layers, hidden_size))
|
||||
|
||||
return final_output
|
||||
|
||||
|
||||
# 训练模型
|
||||
def train_model(epoch, train_dataloader, test_dataloader, optimizer, early_stop, model):
|
||||
"""
|
||||
训练模型的函数
|
||||
:param model: 模型
|
||||
:param early_stop: 提前停止的轮数
|
||||
:param optimizer: 优化器
|
||||
:param epoch: 训练轮次
|
||||
:param train_dataloader: 训练数据加载器
|
||||
:param test_dataloader: 测试数据加载器
|
||||
:return:
|
||||
"""
|
||||
best_model = None # 用于保存最佳模型
|
||||
train_loss = 0 # 训练损失
|
||||
test_loss = 0 # 测试损失
|
||||
best_loss = 100 # 最佳损失
|
||||
epoch_cnt = 0 # 训练轮次计数器
|
||||
|
||||
for i in range(epoch):
|
||||
total_train_loss = 0 # 训练总损失
|
||||
total_train_num = 0 # 训练总样本数
|
||||
total_test_loss = 0 # 测试总损失
|
||||
total_test_num = 0 # 测试总样本数
|
||||
for x, y in tqdm(train_dataloader, desc=f"Epoch:{i} | Train Loss:{train_loss} | Test Loss:{test_loss}"):
|
||||
x_num = len(x) # 当前批次样本数
|
||||
p = model(x) # 模型预测 ✅ 使用 model(x),而不是 Model(x)
|
||||
loss = loss_func(p, y) # 计算损失
|
||||
optimizer.zero_grad() # 清空梯度
|
||||
loss.backward() # 反向传播
|
||||
optimizer.step() # 更新参数
|
||||
total_train_loss += loss.item() # 训练损失累加
|
||||
total_train_num += x_num # 训练样本数累加
|
||||
# 计算训练损失
|
||||
train_loss = total_train_loss / total_train_num
|
||||
|
||||
for x, y in test_dataloader:
|
||||
x_num = len(x) # 当前批次样本数
|
||||
p = model(x) # 模型预测 ✅ 使用 model(x),而不是 Model(x)
|
||||
loss = loss_func(p, y) # 计算损失
|
||||
optimizer.zero_grad() # 清空梯度
|
||||
loss.backward() # 反向传播
|
||||
optimizer.step() # 更新参数
|
||||
total_test_loss += loss.item() # 测试损失累加
|
||||
total_test_num += x_num # 测试样本数累加
|
||||
|
||||
test_loss = total_test_loss / total_test_num
|
||||
|
||||
# 如果当前测试损失小于最佳损失,则更新最佳模型和轮次计数器 否则 轮次计数器加1
|
||||
if test_loss < best_loss:
|
||||
best_loss = test_loss
|
||||
best_model = copy(model) # ✅ 使用 copy(model),而不是 copy(Model
|
||||
torch.save(best_model.state_dict(), './best_model.pth')
|
||||
epoch_cnt = 0
|
||||
else:
|
||||
epoch_cnt += 1
|
||||
if epoch_cnt > early_stop:
|
||||
break
|
||||
|
||||
|
||||
def test_model(test_dataloader):
|
||||
"""
|
||||
测试模型,并返回预测值、真实标签和测试损失
|
||||
:param test_dataloader: 测试数据加载器
|
||||
:return: pred,label,test_loss
|
||||
"""
|
||||
pred = [] # 预测值列表
|
||||
label = [] # 真实标签列表
|
||||
model_f = Model(5) # 创建模型对象
|
||||
model_f.load_state_dict(torch.load('./best_model.pth')) # 加载最佳模型
|
||||
model_f.eval() # 设置模型为评估模式
|
||||
total_test_loss = 0
|
||||
total_test_num = 0
|
||||
|
||||
for x, y in test_dataloader:
|
||||
x_num = len(x)
|
||||
p = model_f(x) # ✅ 使用 model_f(x)
|
||||
loss = loss_func(p, y)
|
||||
total_test_loss += loss.item()
|
||||
total_test_num += x_num
|
||||
# 将预测值和真实标签添加到列表中
|
||||
pred.extend(p.data.squeeze(1).tolist())
|
||||
label.extend(y.data.tolist())
|
||||
# 获取预测值和真实标签
|
||||
test_loss = total_test_loss / total_test_num
|
||||
return pred, label, test_loss
|
||||
|
||||
|
||||
def plot_img(data, pred):
|
||||
"""
|
||||
绘制真实值与预测值对比图
|
||||
:param data: 真实标签列表
|
||||
:param pred: 模型预测值列表
|
||||
:return:
|
||||
"""
|
||||
# 设置支持中文的字体
|
||||
plt.rcParams['font.sans-serif'] = ['SimHei']
|
||||
plt.rcParams['axes.unicode_minus'] = False # 解决负号显示问题
|
||||
|
||||
plt.figure(figsize=(14, 8))
|
||||
|
||||
# 绘制真实值曲线
|
||||
plt.plot(range(len(data)), data, color='blue', label='真实值(收盘价)', linewidth=2)
|
||||
|
||||
# 绘制预测值曲线
|
||||
plt.plot(range(len(pred)), pred, color='green', label='预测值(模型输出)', linestyle='--', linewidth=2)
|
||||
|
||||
# 添加预测区间(每5个点绘制一个3天的预测区间)
|
||||
for i in range(0, len(pred) - 3, 5):
|
||||
price = [data[i] + pred[j] - pred[i] for j in range(i, i + 3)]
|
||||
plt.plot(range(i, i + 3), price, color='red', alpha=0.6, linestyle=':', linewidth=1.5)
|
||||
|
||||
# 设置标题和标签
|
||||
plt.title('股票价格预测结果对比', fontsize=20)
|
||||
plt.xlabel('时间步(天数)', fontsize=16)
|
||||
plt.ylabel('股票收盘价(亿)', fontsize=16)
|
||||
|
||||
# 设置刻度字体
|
||||
plt.xticks(fontproperties='Times New Roman', size=14)
|
||||
plt.yticks(fontproperties='Times New Roman', size=14)
|
||||
|
||||
# 显示图例
|
||||
plt.legend(loc='upper left', fontsize=14)
|
||||
|
||||
# 显示网格
|
||||
plt.grid(True, linestyle='--', alpha=0.5)
|
||||
|
||||
# 展示图形
|
||||
plt.tight_layout()
|
||||
plt.show()
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
# 超参数
|
||||
days_num = 5 # 天数
|
||||
epoch = 100 # 训练轮次
|
||||
fea = 5 # 特征数量
|
||||
batch_size = 20 # 批次大小
|
||||
early_stop = 5 # 提前停止轮次
|
||||
|
||||
# 创建模型对象
|
||||
model = Model(fea)
|
||||
# 创建数据加载器
|
||||
gd = GetData(stock_id='601398', save_path='./data.csv')
|
||||
train_x, test_x, train_y, test_y = gd.process_data(days_num)
|
||||
# 将数据转换为张量
|
||||
train_x = torch.tensor(train_x).float()
|
||||
test_x = torch.tensor(test_x).float()
|
||||
train_y = torch.tensor(train_y).float()
|
||||
test_y = torch.tensor(test_y).float()
|
||||
|
||||
# 构建训练数据集和测试数据集
|
||||
train_data = TensorDataset(train_x, train_y)
|
||||
train_dataloader = DataLoader(train_data, batch_size=batch_size)
|
||||
test_data = TensorDataset(test_x, test_y)
|
||||
test_dataloader = DataLoader(test_data, batch_size=batch_size)
|
||||
|
||||
# 创建损失函数和优化器
|
||||
loss_func = nn.MSELoss()
|
||||
optimizer = optim.Adam(model.parameters(), lr=0.001)
|
||||
|
||||
# 训练模型
|
||||
train_model(epoch, train_dataloader, test_dataloader, optimizer, early_stop, model)
|
||||
# 只有模型存在时才进行测试
|
||||
if os.path.exists('./best_model.pth'):
|
||||
pred, label, test_loss = test_model(test_dataloader)
|
||||
else:
|
||||
print("模型文件不存在,请先完成训练并确保模型已保存。")
|
||||
|
||||
# 将预测值和真实标签转换为真实价格
|
||||
pred = [ele * (gd.max_value - gd.min_value) + gd.min_value for ele in pred]
|
||||
data = [ele * (gd.max_value - gd.min_value) + gd.min_value for ele in label]
|
||||
# 绘制图像
|
||||
plot_img(data, pred)
|
||||
|
||||
print(f"模型损失:{test_loss}")
|
||||
33
TVS_DL/LoadData.py
Normal file
33
TVS_DL/LoadData.py
Normal file
@@ -0,0 +1,33 @@
|
||||
import pandas as pd
|
||||
import torch
|
||||
import numpy as np
|
||||
from sklearn.preprocessing import StandardScaler, MinMaxScaler
|
||||
# 数据预处理
|
||||
def process_data(file_path):
|
||||
# 读取Excel文件,跳过首行,使用科学计数法解析
|
||||
df = pd.read_excel(file_path, header=None,sheet_name="sample1")
|
||||
|
||||
# 提取输入输出数据
|
||||
inputs = df.iloc[:, 1:9].values.astype(np.float32) # 第2-9列
|
||||
outputs = df.iloc[:, 10:19].values.astype(np.float32) # 第11-19列
|
||||
|
||||
# 数据标准化
|
||||
input_scaler = StandardScaler()
|
||||
output_scaler = StandardScaler()
|
||||
inputs = input_scaler.fit_transform(inputs)
|
||||
outputs = output_scaler.fit_transform(outputs)
|
||||
#
|
||||
# input_scaler = MinMaxScaler(feature_range=(-1, 1))
|
||||
# output_scaler = MinMaxScaler(feature_range=(-1, 1))
|
||||
# inputs = input_scaler.fit_transform(inputs)
|
||||
# outputs = output_scaler.fit_transform(outputs)
|
||||
|
||||
return inputs, outputs, input_scaler, output_scaler
|
||||
|
||||
# 创建序列数据集 look_back:依据时间序列 pred_step:推测时间序列
|
||||
def create_sequences(inputs, outputs, look_back=8, pred_step=1):
|
||||
X, y = [], []
|
||||
for i in range(len(inputs) - look_back - pred_step):
|
||||
X.append(inputs[i:i + look_back])
|
||||
y.append(outputs[(i+look_back):(i+look_back+pred_step)])
|
||||
return torch.FloatTensor(np.array(X)), torch.FloatTensor(np.array(y))
|
||||
190
TVS_DL/TVS_LSTM.py
Normal file
190
TVS_DL/TVS_LSTM.py
Normal file
@@ -0,0 +1,190 @@
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
import pandas as pd
|
||||
import numpy as np
|
||||
from sklearn.preprocessing import StandardScaler, MinMaxScaler
|
||||
from torch.utils.data import Dataset, DataLoader
|
||||
import matplotlib.pyplot as plt
|
||||
|
||||
# 参数设置
|
||||
SEQ_LENGTH = 10 # 时间序列窗口长度
|
||||
PRE_LENGTH = 1 #预测时间序列窗口长度
|
||||
BATCH_SIZE = 4096
|
||||
EPOCHS = 2000
|
||||
HIDDEN_SIZE = 64
|
||||
HIDDEN_LAYER = 3 #隐藏层
|
||||
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
||||
|
||||
|
||||
# 数据预处理
|
||||
def process_data(file_path):
|
||||
# 读取Excel文件,跳过首行,使用科学计数法解析
|
||||
df = pd.read_excel(file_path, header=None, skiprows=1,sheet_name="sample1")
|
||||
|
||||
# 提取输入输出数据
|
||||
inputs = df.iloc[:, 1:9].values.astype(np.float32) # 第2-9列
|
||||
outputs = df.iloc[:, 10:19].values.astype(np.float32) # 第11-19列
|
||||
print("标准化之前:")
|
||||
ori = inputs
|
||||
print(inputs)
|
||||
# 数据标准化
|
||||
input_scaler = StandardScaler()
|
||||
output_scaler = StandardScaler()
|
||||
inputs = input_scaler.fit_transform(inputs)
|
||||
outputs = output_scaler.fit_transform(outputs)
|
||||
#
|
||||
# input_scaler = MinMaxScaler(feature_range=(-1, 1))
|
||||
# output_scaler = MinMaxScaler(feature_range=(-1, 1))
|
||||
# inputs = input_scaler.fit_transform(inputs)
|
||||
# outputs = output_scaler.fit_transform(outputs)
|
||||
print("标准化之后:")
|
||||
print(inputs)
|
||||
|
||||
# 创建对比可视化
|
||||
plt.figure(figsize=(12, 8))
|
||||
colors = plt.cm.tab10(np.arange(8)) # 生成8种不同颜色
|
||||
# 绘制图像通道数据分布
|
||||
plt.subplot(2, 2, 1)
|
||||
plt.hist(ori, bins=30, alpha=0.7,color=colors, label='Original')
|
||||
plt.title('Image Channel (Original)')
|
||||
plt.xlabel('Pixel Value')
|
||||
|
||||
plt.subplot(2, 2, 1)
|
||||
plt.hist(inputs, bins=30, alpha=0.7, color=colors, label='StandardScaler')
|
||||
plt.title('StandardScaler Comparison')
|
||||
plt.xlabel('StandardScaler Value')
|
||||
plt.legend()
|
||||
|
||||
plt.tight_layout()
|
||||
plt.show()
|
||||
|
||||
return inputs, outputs, input_scaler, output_scaler
|
||||
|
||||
|
||||
# 创建序列数据集 look_back:依据时间序列 pred_step:推测时间序列
|
||||
def create_sequences(inputs, outputs, look_back=8, pred_step=1):
|
||||
X, y = [], []
|
||||
for i in range(len(inputs) - look_back - pred_step):
|
||||
X.append(inputs[i:i + look_back])
|
||||
y.append(outputs[(i+look_back):(i+look_back+pred_step)])
|
||||
return torch.FloatTensor(np.array(X)), torch.FloatTensor(np.array(y))
|
||||
|
||||
|
||||
# 自定义Dataset
|
||||
class TimeSeriesDataset(Dataset):
|
||||
def __init__(self, X, y):
|
||||
self.X = X
|
||||
self.y = y
|
||||
|
||||
def __len__(self):
|
||||
return len(self.X)
|
||||
|
||||
def __getitem__(self, idx):
|
||||
return self.X[idx], self.y[idx]
|
||||
# 在训练循环前初始化损失记录列表
|
||||
train_losses = []
|
||||
test_losses = []
|
||||
|
||||
# LSTM模型
|
||||
class LSTMModel(nn.Module):
|
||||
def __init__(self, input_size, hidden_size, output_size, num_lay):
|
||||
super().__init__()
|
||||
self.lstm = nn.LSTM(
|
||||
input_size=input_size,
|
||||
hidden_size=hidden_size,
|
||||
batch_first=True,
|
||||
num_layers = num_lay, # 增加LSTM层数
|
||||
# bidirectional = True, # 使用双向LSTM
|
||||
# dropout = 0.2 # 添加正则化
|
||||
)
|
||||
self.fc = nn.Linear(hidden_size, output_size)
|
||||
|
||||
def forward(self, x):
|
||||
out, (h_n, c_n) = self.lstm(x)
|
||||
out = self.fc(out[:, -1, :]) # 取最后一个时间步的输出
|
||||
return out
|
||||
|
||||
|
||||
# 主流程
|
||||
if __name__ == "__main__":
|
||||
# 数据准备,归一化和标准化
|
||||
inputs, outputs, input_scaler, output_scaler = process_data("D:\liyong\文档\项目文档\中汽TVS\机器学习\降阶模型数据.xlsx")
|
||||
|
||||
print(inputs[:5])#输出前五个
|
||||
print(outputs[:5])#输出前五个
|
||||
X, y = create_sequences(inputs, outputs, SEQ_LENGTH,PRE_LENGTH)
|
||||
|
||||
# 数据集分割
|
||||
split = int(0.8 * len(X))
|
||||
train_dataset = TimeSeriesDataset(X[:split], y[:split])
|
||||
test_dataset = TimeSeriesDataset(X[split:], y[split:])
|
||||
|
||||
train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)
|
||||
test_loader = DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=False)
|
||||
|
||||
# 模型初始化
|
||||
model = LSTMModel(
|
||||
input_size=8, # 输入特征数
|
||||
hidden_size=HIDDEN_SIZE,
|
||||
output_size=8 , # 输出特征数
|
||||
num_lay = HIDDEN_LAYER
|
||||
).to(DEVICE)
|
||||
|
||||
# 训练配置
|
||||
criterion = nn.MSELoss()
|
||||
optimizer = torch.optim.Adam(model.parameters(), lr=0.0001)
|
||||
|
||||
for epoch in range(EPOCHS):
|
||||
model.train()
|
||||
epoch_train_loss = 0
|
||||
|
||||
for batch_X, batch_y in train_loader:
|
||||
batch_X, batch_y = batch_X.to(DEVICE), batch_y.to(DEVICE)
|
||||
outputs = model(batch_X)
|
||||
loss = criterion(outputs, batch_y)
|
||||
|
||||
optimizer.zero_grad()
|
||||
loss.backward()
|
||||
optimizer.step()
|
||||
|
||||
epoch_train_loss += loss.item() * batch_X.size(0)
|
||||
|
||||
# 计算平均训练损失并记录
|
||||
avg_train_loss = epoch_train_loss / len(train_loader.dataset)
|
||||
train_losses.append(avg_train_loss)
|
||||
|
||||
# 验证损失计算(原代码逻辑)
|
||||
model.eval()
|
||||
test_loss = 0
|
||||
with torch.no_grad():
|
||||
for batch_X, batch_y in test_loader:
|
||||
batch_X, batch_y = batch_X.to(DEVICE), batch_y.to(DEVICE)
|
||||
preds = model(batch_X)
|
||||
test_loss += criterion(preds, batch_y).item() * batch_X.size(0)
|
||||
|
||||
avg_test_loss = test_loss / len(test_loader.dataset)
|
||||
test_losses.append(avg_test_loss)
|
||||
|
||||
# 输出损失比例
|
||||
if epoch % 10 == 0:
|
||||
print(
|
||||
f"Epoch {epoch} | Train Loss: {avg_train_loss:.4f} | Test Loss: {avg_test_loss:.4f} | 损失比: {avg_train_loss / avg_test_loss:.2f}:1")
|
||||
torch.save(model,"D:\liyong\lstm.pth")
|
||||
# 动态绘制损失曲线
|
||||
plt.figure(figsize=(10, 5))
|
||||
plt.plot(train_losses, label='Train Loss', color='blue', alpha=0.7)
|
||||
plt.plot(test_losses, label='Test Loss', color='red', alpha=0.7)
|
||||
plt.title("LSTM TrainLine (train vs test)")
|
||||
plt.xlabel("Epoch")
|
||||
plt.ylabel("Loss")
|
||||
plt.legend()
|
||||
plt.grid(True, linestyle='--', alpha=0.5)
|
||||
# //plt.savefig('training_loss_curve.png', dpi=300) # 保存高清图像
|
||||
plt.show()
|
||||
|
||||
# 示例预测
|
||||
sample_input = X[0:1].to(DEVICE) # 取第一个样本
|
||||
prediction = model(sample_input)
|
||||
|
||||
print("Sample Prediction:", prediction)
|
||||
print("Real Value:", y[0:1])
|
||||
196
TVS_DL/TVS_LSTM_2.py
Normal file
196
TVS_DL/TVS_LSTM_2.py
Normal file
@@ -0,0 +1,196 @@
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
import torch.nn.functional
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
import matplotlib.pyplot as plt
|
||||
"""
|
||||
导入数据
|
||||
"""
|
||||
# load the dataset
|
||||
flight_data = pd.read_csv('flights.csv', usecols=[1], engine='python')
|
||||
dataset = flight_data.values
|
||||
dataset = dataset.astype('float32')
|
||||
|
||||
print(flight_data.head())
|
||||
print(flight_data.shape)
|
||||
|
||||
#绘制每月乘客的出行频率
|
||||
fig_size = plt.rcParams['figure.figsize']
|
||||
fig_size[0] = 15
|
||||
fig_size[1] = 5
|
||||
plt.rcParams['figure.figsize'] = fig_size
|
||||
plt.title('Month vs Passenger')
|
||||
plt.ylabel('Total Passengers')
|
||||
plt.xlabel('Months')
|
||||
plt.grid(True)
|
||||
plt.autoscale(axis='x',tight=True)
|
||||
plt.plot(flight_data['passengers'])
|
||||
plt.show()
|
||||
|
||||
"""
|
||||
数据预处理
|
||||
"""
|
||||
flight_data.columns#显示数据集中 列的数据类型
|
||||
all_data = flight_data['passengers'].values.astype(float)#将passengers列的数据类型改为float
|
||||
#划分测试集和训练集
|
||||
test_data_size = 12
|
||||
train_data = all_data[:-test_data_size]#除了最后12个数据,其他全取
|
||||
test_data = all_data[-test_data_size:]#取最后12个数据
|
||||
print(len(train_data))
|
||||
print(len(test_data))
|
||||
|
||||
#最大最小缩放器进行归一化,减小误差
|
||||
from sklearn.preprocessing import MinMaxScaler
|
||||
scaler = MinMaxScaler(feature_range=(-1, 1))
|
||||
train_data_normalized = scaler.fit_transform(train_data.reshape(-1, 1))
|
||||
#查看归一化之后的前5条数据和后5条数据
|
||||
print(train_data_normalized[:5])
|
||||
print(train_data_normalized[-5:])
|
||||
#将数据集转换为tensor,因为PyTorch模型是使用tensor进行训练的,并将训练数据转换为输入序列和相应的标签
|
||||
train_data_normalized = torch.FloatTensor(train_data_normalized).view(-1)
|
||||
#view相当于numpy中的resize,参数代表数组不同维的维度;
|
||||
#参数为-1表示,这个维的维度由机器自行推断,如果没有-1,那么view中的所有参数就要和tensor中的元素总个数一致
|
||||
|
||||
#定义create_inout_sequences函数,接收原始输入数据,并返回一个元组列表。
|
||||
def create_inout_sequences(input_data, tw):
|
||||
inout_seq = []
|
||||
L = len(input_data)
|
||||
for i in range(L-tw):
|
||||
train_seq = input_data[i:i+tw]
|
||||
train_label = input_data[i+tw:i+tw+1]#预测time_step之后的第一个数值
|
||||
inout_seq.append((train_seq, train_label))#inout_seq内的数据不断更新,但是总量只有tw+1个
|
||||
return inout_seq
|
||||
train_window = 12#设置训练输入的序列长度为12,类似于time_step = 12
|
||||
train_inout_seq = create_inout_sequences(train_data_normalized, train_window)
|
||||
print(train_inout_seq[:5])#产看数据集改造结果
|
||||
"""
|
||||
注意:
|
||||
create_inout_sequences返回的元组列表由一个个序列组成,
|
||||
每一个序列有13个数据,分别是设置的12个数据(train_window)+ 第13个数据(label)
|
||||
第一个序列由前12个数据组成,第13个数据是第一个序列的标签。
|
||||
同样,第二个序列从第二个数据开始,到第13个数据结束,而第14个数据是第二个序列的标签,依此类推。
|
||||
"""
|
||||
|
||||
"""
|
||||
创建LSTM模型
|
||||
参数说明:
|
||||
1、input_size:对应的及特征数量,此案例中为1,即passengers
|
||||
2、output_size:预测变量的个数,及数据标签的个数
|
||||
2、hidden_layer_size:隐藏层的特征数,也就是隐藏层的神经元个数
|
||||
"""
|
||||
class LSTM(nn.Module):#注意Module首字母需要大写
|
||||
def __init__(self, input_size=1, hidden_layer_size=100, output_size=1):
|
||||
super().__init__()
|
||||
self.hidden_layer_size = hidden_layer_size
|
||||
|
||||
# 创建LSTM层和linear层,LSTM层提取特征,linear层用作最后的预测
|
||||
##LSTM算法接受三个输入:先前的隐藏状态,先前的单元状态和当前输入。
|
||||
self.lstm = nn.LSTM(input_size, hidden_layer_size)
|
||||
self.linear = nn.Linear(hidden_layer_size, output_size)
|
||||
|
||||
#初始化隐含状态及细胞状态C,hidden_cell变量包含先前的隐藏状态和单元状态
|
||||
self.hidden_cell = (torch.zeros(1, 1, self.hidden_layer_size),
|
||||
torch.zeros(1, 1, self.hidden_layer_size))
|
||||
# 为什么的第二个参数也是1
|
||||
# 第二个参数代表的应该是batch_size吧
|
||||
# 是因为之前对数据已经进行过切分了吗?????
|
||||
|
||||
def forward(self, input_seq):
|
||||
lstm_out, self.hidden_cell = self.lstm(input_seq.view(len(input_seq), 1, -1), self.hidden_cell)
|
||||
#lstm的输出是当前时间步的隐藏状态ht和单元状态ct以及输出lstm_out
|
||||
#按照lstm的格式修改input_seq的形状,作为linear层的输入
|
||||
predictions = self.linear(lstm_out.view(len(input_seq), -1))
|
||||
return predictions[-1]#返回predictions的最后一个元素
|
||||
"""
|
||||
forward方法:LSTM层的输入与输出:out, (ht,Ct)=lstm(input,(h0,C0)),其中
|
||||
一、输入格式:lstm(input,(h0, C0))
|
||||
1、input为(seq_len,batch,input_size)格式的tensor,seq_len即为time_step
|
||||
2、h0为(num_layers * num_directions, batch, hidden_size)格式的tensor,隐藏状态的初始状态
|
||||
3、C0为(seq_len, batch, input_size)格式的tensor,细胞初始状态
|
||||
二、输出格式:output,(ht,Ct)
|
||||
1、output为(seq_len, batch, num_directions*hidden_size)格式的tensor,包含输出特征h_t(源于LSTM每个t的最后一层)
|
||||
2、ht为(num_layers * num_directions, batch, hidden_size)格式的tensor,
|
||||
3、Ct为(num_layers * num_directions, batch, hidden_size)格式的tensor,
|
||||
"""
|
||||
|
||||
#创建LSTM()类的对象,定义损失函数和优化器
|
||||
model = LSTM()
|
||||
loss_function = nn.MSELoss()
|
||||
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)#建立优化器实例
|
||||
print(model)
|
||||
|
||||
"""
|
||||
模型训练
|
||||
batch-size是指1次迭代所使用的样本量;
|
||||
epoch是指把所有训练数据完整的过一遍;
|
||||
由于默认情况下权重是在PyTorch神经网络中随机初始化的,因此可能会获得不同的值。
|
||||
"""
|
||||
epochs = 300
|
||||
for i in range(epochs):
|
||||
for seq, labels in train_inout_seq:
|
||||
#清除网络先前的梯度值
|
||||
optimizer.zero_grad()#训练模型时需要使模型处于训练模式,即调用model.train()。缺省情况下梯度是累加的,需要手工把梯度初始化或者清零,调用optimizer.zero_grad()
|
||||
#初始化隐藏层数据
|
||||
model.hidden_cell = (torch.zeros(1, 1, model.hidden_layer_size),
|
||||
torch.zeros(1, 1, model.hidden_layer_size))
|
||||
#实例化模型
|
||||
y_pred = model(seq)
|
||||
#计算损失,反向传播梯度以及更新模型参数
|
||||
single_loss = loss_function(y_pred, labels)#训练过程中,正向传播生成网络的输出,计算输出和实际值之间的损失值
|
||||
single_loss.backward()#调用loss.backward()自动生成梯度,
|
||||
optimizer.step()#使用optimizer.step()执行优化器,把梯度传播回每个网络
|
||||
|
||||
# 查看模型训练的结果
|
||||
if i%25 == 1:
|
||||
print(f'epoch:{i:3} loss:{single_loss.item():10.8f}')
|
||||
print(f'epoch:{i:3} loss:{single_loss.item():10.10f}')
|
||||
|
||||
"""
|
||||
预测
|
||||
注意,test_input中包含12个数据,
|
||||
在for循环中,12个数据将用于对测试集的第一个数据进行预测,然后将预测值附加到test_inputs列表中。
|
||||
在第二次迭代中,最后12个数据将再次用作输入,并进行新的预测,然后 将第二次预测的新值再次添加到列表中。
|
||||
由于测试集中有12个元素,因此该循环将执行12次。
|
||||
循环结束后,test_inputs列表将包含24个数据,其中,最后12个数据将是测试集的预测值。
|
||||
"""
|
||||
fut_pred = 12
|
||||
test_inputs = train_data_normalized[-train_window:].tolist()#首先打印出数据列表的最后12个值
|
||||
print(test_inputs)
|
||||
|
||||
#更改模型为测试或者验证模式
|
||||
model.eval()#把training属性设置为false,使模型处于测试或验证状态
|
||||
for i in range(fut_pred):
|
||||
seq = torch.FloatTensor(test_inputs[-train_window:])
|
||||
with torch.no_grad():
|
||||
model.hidden = (torch.zeros(1, 1, model.hidden_layer_size),
|
||||
torch.zeros(1, 1, model.hidden_layer_size))
|
||||
test_inputs.append(model(seq).item())
|
||||
#打印最后的12个预测值
|
||||
print(test_inputs[fut_pred:])
|
||||
#由于对训练集数据进行了标准化,因此预测数据也是标准化了的
|
||||
#需要将归一化的预测值转换为实际的预测值。通过inverse_transform实现
|
||||
actual_predictions = scaler.inverse_transform(np.array(test_inputs[train_window:]).reshape(-1, 1))
|
||||
print(actual_predictions)
|
||||
|
||||
"""
|
||||
根据实际值,绘制预测值
|
||||
"""
|
||||
x = np.arange(132, 132+fut_pred, 1)
|
||||
plt.title('Month vs Passenger with all data')
|
||||
plt.ylabel('Total Passengers')
|
||||
plt.xlabel('Months')
|
||||
plt.grid(True)
|
||||
plt.autoscale(axis='x', tight=True)
|
||||
plt.plot(flight_data['passengers'])
|
||||
plt.plot(x, actual_predictions)
|
||||
plt.show()
|
||||
#绘制最近12个月的实际和预测乘客数量,以更大的尺度观测数据
|
||||
plt.title('Month vs Passenger last pred data')
|
||||
plt.ylabel('Total Passengers')
|
||||
plt.xlabel('Months')
|
||||
plt.grid(True)
|
||||
plt.autoscale(axis='x', tight=True)
|
||||
plt.plot(flight_data['passengers'][-train_window:])
|
||||
plt.plot(x, actual_predictions)
|
||||
plt.show()
|
||||
0
TVS_DL/__init__.py
Normal file
0
TVS_DL/__init__.py
Normal file
Reference in New Issue
Block a user