#多项式拟合 import sys import torch import argparse from torch.utils.data import TensorDataset, DataLoader sys.path.append("D:\liyong\project\TVS_ML") # 替换为实际路径 from FC_ML_Data.FC_ML_Data_Load.Data_Load_Excel import get_data_from_excel_xy from FC_ML_Data.FC_ML_Data_Output.Data_Output_Pytorch import export_model from FC_ML_Loss_Function.Loss_Function_Selector import LossFunctionSelector from FC_ML_Optim_Function.Optimizer_Selector import OptimizerSelector # 生成训练数据 def make_features(x,degree): return torch.stack([x**i for i in range(1,degree)], dim=1) # 构建x, x², x³特征矩阵 class PolyModel(torch.nn.Module): def __init__(self,input_size): super().__init__() self.linear = torch.nn.Linear(input_size, 1) # 输入3维(x,x²,x³),输出1维 def forward(self, x): return self.linear(x) DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu") if __name__ == "__main__": # 在训练循环前初始化损失记录列表 train_losses = [] test_losses = [] #加载外部参数 parser = argparse.ArgumentParser(description='模型训练参数配置') parser.add_argument('--data_dir',default='D:/liyong/project/TVS_ML/Test_Data/multi_poly/output.xlsx', help='数据集路径') parser.add_argument('--model_dir',default='D:/liyong/project/TVS_ML/Test_Data/multi_poly/', help='模型导出路径') parser.add_argument('--name', default='model', help='导出模型名称') parser.add_argument('--model_format', default='pt', help='模型格式') ##pt onnx bin parser.add_argument('--epochs', type=int, default=1000, help='训练轮次') parser.add_argument('--epochs_output', type=int, default=10, help='训练轮次损失打印') parser.add_argument('--degree', type=int, default=3, help='多项式拟合阶数') parser.add_argument('--lr', type=float, default=0.001, help='学习率')# 0.1 - 0.0001 parser.add_argument('--batch_size', type=int, default=32, help='批量加载大小')# 越大内存消耗越大,计算数据加载速度越快 # 'mse': '均方误差', TVS # 'l1': '平均绝对误差', TVS # 'cross_entropy': '交叉熵', # 'bce': '二分类交叉熵', # 'smooth_l1': '平滑L1', # 'kl_div': 'KL散度', # 'hinge': '合页损失', # 'triplet': '三元组损失' parser.add_argument('--loss', default='mse', help='损失函数') # 'sgd': '随机梯度下降', TVS # 'adam': '自适应矩估计', TVS # 'rmsprop': '均方根传播', # 'adagrad': '自适应梯度', # 'adamw': 'Adam权重衰减版' parser.add_argument('--optim', default='sgd', help='优化函数') parser.add_argument('--percent', type=float, default=0.8, help='训练集比例') #0.8表示训练集合占总数据集比例80%,区间[0,1] parser.add_argument('--sheet', default='Sheet1', help='数据表单名')#不放出来 parser.add_argument('--normalization',action='store_true', help='是否开启数据预处理')#如果开启normalization_type会生效 # Min - Max等区间缩放法 # Z-score等方差缩放法,‌用于数据标准化,数据特征:数据分布未知、存在异常值、模型依赖梯度下降‌ # 小数定标标准化法,与min-max比,保持原始数据分布形态,区间≈[-1,1] parser.add_argument('--normalization_type', default='minmax', help='数据处理方式') parser.add_argument('--shuffle', action='store_false', help='数据乱序')#默认开启,强时序数据不开启 parser.add_argument('--num_workers',type=int, default=0, help='加速线程数量')#默认为0,增加线程会提速数据加载 #不开放 parser.add_argument('--gpu', action='store_true', help='启用GPU加速')#默认采用GPU加速,如果没有则CPU计算 #不开放 args = parser.parse_args() print(f"训练数据源: {args.data_dir},模型导出路径:{args.model_dir}," f"模型名称:{args.name} ,模型导出格式:{args.model_format}," f"训练轮次: {args.epochs}, 多项式阶数:{args.degree}," f"学习率:{args.lr},损失函数:{args.loss},优化函数:{args.optim}," f"数据表单名:{args.sheet},是否开启数据预处理:{args.normalization}," f"数据处理方式:{args.normalization_type},gpu加速:{args.gpu}," f"批量加载:{args.batch_size},数据乱序:{args.shuffle}," f"加速线程数量:{args.num_workers},训练集比例:{args.percent},训练轮次损失打印:{args.epochs_output}") #默认开启GPU加速 if not args.gpu: DEVICE = torch.device("cpu") #加载训练数据 x_ori,y_ori,x,y,normalization = get_data_from_excel_xy(args.data_dir,args.sheet,args.normalization,args.normalization_type) #拆分测试集和训练集 aa = len(x) split = int(args.percent * len(x)) train_dataset = TensorDataset(x[:split], y[:split]) test_dataset = TensorDataset(x[split:], y[split:]) print(train_dataset,test_dataset) train_loader = DataLoader( train_dataset, batch_size=args.batch_size, #批量加载数据 shuffle=args.shuffle, #数据打乱 num_workers=args.num_workers #多线程加速 ) test_loader = DataLoader( test_dataset, batch_size=args.batch_size, #批量加载数据 shuffle=False, #验证集默认不打乱 num_workers=args.num_workers #多线程加速 ) #初始化模型 model = PolyModel(input_size = args.degree).to(DEVICE) #初始化损失函数 loss_selector = LossFunctionSelector() criterion = loss_selector.get_loss(args.loss) #初始化优化器 optim_selector = OptimizerSelector(); optimizer = optim_selector.get_optimizer(model.parameters(),args.optim, lr=args.lr) for epoch in range(args.epochs): #模型启用训练模式 epoch_train_loss = 0 model.train() for features,labels in train_loader: powers = torch.arange(1, args.degree + 1, dtype=x.dtype) x_poly = features ** powers.view(1, -1) x_poly,labels= x_poly.to(DEVICE),labels.to(DEVICE) pred = model(x_poly) loss = criterion(pred.squeeze(), labels) optimizer.zero_grad() loss.backward() optimizer.step() #计算损失 epoch_train_loss += loss.item() * x_poly.size(0) # 计算平均训练损失并记录 avg_train_loss = epoch_train_loss / len(train_loader.dataset) train_losses.append(avg_train_loss) #模型启用评估模式 model.eval() test_loss = 0 with torch.no_grad():#关闭梯度下降 for features, labels in test_loader: powers = torch.arange(1, args.degree + 1, dtype=x.dtype) x_poly = features ** powers.view(1, -1) x_poly, labels = x_poly.to(DEVICE), labels.to(DEVICE) preds = model(x_poly) test_loss += criterion(preds.squeeze(), labels).item() * x_poly.size(0) avg_test_loss = test_loss / len(test_loader.dataset) test_losses.append(avg_test_loss) #每100次迭代输出一次损失数值 if epoch % args.epochs_output == 0: print( f"Epoch {epoch} | Train Loss: {avg_train_loss:.4f} | Test Loss: {avg_test_loss:.4f} | 损失比: {avg_train_loss / avg_test_loss:.2f}:1") #导出训练后的模型 export_model(model,args.model_dir,args.name,args.model_format) # 可视化 # import matplotlib.pyplot as plt # plt.scatter(x_ori, y_ori, label='ori') # powers = torch.arange(1, args.degree + 1, dtype=x.dtype) # x_input = x ** powers.view(1, -1) # x_input.to(DEVICE) # model.to(DEVICE) # y_output = model(x_input).detach().numpy() # y_output = torch.tensor(y_output) # y_real = y_output # if args.normalization: # y_real = normalization.inverse_transform(y_output) # plt.plot(x_ori, y_real.squeeze(), 'r', label='fit') # plt.legend() # plt.show()