新建仓库维护数据预测项目

This commit is contained in:
13151580307
2025-10-17 14:59:16 +08:00
commit 516126d2a5
72 changed files with 82332 additions and 0 deletions

View File

@@ -0,0 +1,231 @@
import copy
import pandas as pd
import torch
from sympy import false
from FC_ML_Data.FC_ML_Data_Output.Data_Output_File import tensor_to_json
from FC_ML_Data.FC_ML_Data_Process.Data_Process_Normalization import Normalizer
from FC_ML_Tool.Check import is_number
def get_data_from_excel_xy(data_path,sheet_name='Sheet1',normalization = false,normalization_type = 'minmax'):
"""读取Excel文件数据并转换成输入特征和输出特征可以支持多种算子的标准化和正则化操作
Args:
data_path (str): 文件绝对路径
sheet_namestr, optional:表单名
normalization (boolean, optional): 标准化正则化选项
normalization_typestr, optional标准化正则化算子
Returns:
x_data_ori: 输入特征原始tensor
y_data_ori: 输出特征原始tensor
x_data: 输入特征标准化或归一化后tensor
y_data: 输出特征标准化或归一化后tensor
Raises:
data_path: 非法路径
Examples:
get_data_from_excel_xy("D://test.excel")
"""
data = pd.read_excel(data_path, sheet_name=sheet_name,header=None)
x_data_ori = torch.tensor(data.iloc[:, :-1].to_numpy(), dtype=torch.float32) # 除最后一列作为X
y_data_ori = torch.tensor(data.iloc[:, -1].to_numpy(), dtype=torch.float32) # 最后一列作为Y
# print(x_data_ori,y_data_ori)
# x_data_ori = torch.reshape(x_data_ori,(1,-1))
# y_data_ori = torch.reshape(y_data_ori,(1,-1))
x_data = copy.deepcopy(x_data_ori)
y_data = copy.deepcopy(y_data_ori)
normalizer = Normalizer(method=normalization_type)
if normalization:
# 初始化归一化器
normalizer.fit(x_data)
# 归一化转换
x_data = normalizer.transform(x_data)
normalizer.fit(y_data)
y_data = normalizer.transform(y_data)
return x_data_ori,y_data_ori,x_data,y_data,normalizer
def get_data_from_csv(data_path,begin_x,end_x,begin_y,end_y,skip_rows=0, normalization = false,normalization_type = 'minmax'):
"""读取CSV文件数据并转换成输入特征和输出特征可以支持多种算子的标准化和正则化操作
Args:
data_path (str): 文件绝对路径
begin_xint:输入特征起始列
end_xint:输入特征结束列
begin_yint:输出特征起始列
end_yint:输出特征结束列
skip_rowsint, optional:跳过读取行数
normalization (boolean, optional): 标准化正则化选项
normalization_typestr, optional标准化正则化算子
Returns:
x_data_ori: 输入特征原始tensor
y_data_ori: 输出特征原始tensor
x_data: 输入特征标准化或归一化后tensor
y_data: 输出特征标准化或归一化后tensor
Raises:
data_path: 非法路径
Examples:
get_data_from_csv("D://test.excel",0,8,10,18)
"""
data = pd.read_csv(data_path, encoding='gbk', skiprows=skip_rows) # 跳过首行
# data = pd.read_excel(data_path, sheet_name=sheet_name,header=None)
x_data_ori = torch.tensor(data.iloc[:, begin_x:end_x].to_numpy(), dtype=torch.float32) # 除最后一列作为X
y_data_ori = torch.tensor(data.iloc[:, begin_y,end_y].to_numpy(), dtype=torch.float32) # 最后一列作为Y
# print(x_data_ori,y_data_ori)
# x_data_ori = torch.reshape(x_data_ori,(1,-1))
# y_data_ori = torch.reshape(y_data_ori,(1,-1))
x_data = copy.deepcopy(x_data_ori)
y_data = copy.deepcopy(y_data_ori)
normalizer = Normalizer(method=normalization_type)
if normalization:
# 初始化归一化器
normalizer.fit(x_data)
# 归一化转换
x_data = normalizer.transform(x_data)
normalizer.fit(y_data)
y_data = normalizer.transform(y_data)
return x_data_ori,y_data_ori,x_data,y_data,normalizer
def get_data_from_csv_filter(data_path,filter_rows,filter_file_path,filter_file_name,skip_rows,skip_file_path,skip_file_name):
"""读取csv数据文件并生成一个前xx行数据过滤文件一个抽样行数据文件
Args:
data_path (str): 文件绝对路径
filter_rowsint:过滤不读取的行数
filter_file_pathstr:输出过滤文件路径
filter_file_namestr:输出过滤文件名
skip_rowsint:抽样读取行数
skip_file_path (str):输出抽样文件路径
skip_file_namestr输出抽样文件名
Returns:
x_data_ori: 输入特征原始tensor
y_data_ori: 输出特征原始tensor
x_data: 输入特征标准化或归一化后tensor
y_data: 输出特征标准化或归一化后tensor
Raises:
data_path: 非法路径
Examples:
get_data_from_csv_filter("D://test.excel",0,“D://filter//”,“filter.csv”,10,“D://skip//”,“skip.csv”)
"""
# 读取前xx行数据
df = pd.read_csv(data_path,encoding='gbk',nrows=filter_rows)
# 转换为PyTorch Tensor
df_data = torch.tensor(df.values)
tensor_to_json(df_data,filter_file_path,filter_file_name)
data = pd.read_csv(data_path, encoding='gbk', skiprows=lambda x: x % skip_rows != 0)
data_ori = torch.tensor(data.values)
tensor_to_json(data_ori, skip_file_path, skip_file_name)
return df_data,data_ori
def get_data_from_csv_feature(data_path,skip_rows = 100,sample_rows = 100,normalization_type = 'minmax'):
"""读取csv数据文件并生成一个前xx行数据过滤文件一个抽样行数据文件
Args:
data_path (str): 文件绝对路径
sample_rows(int): 连续抽样总行数
skip_rowsint:抽样行数量
normalization_typestr, optional标准化正则化算子
Returns:
label_name: 标签矩阵
source_data: 原始数据
max: 每一列的最大值
min: 每一列的最小值
average: 每一列的平均值
sample_x: 抽样的横坐标
sample_y: 抽样的纵坐标
Raises:
data_path: 非法路径
Examples:
get_data_from_csv_feature("D://test.excel",0,8,10,18)
"""
# 读取前xx行数据
df = pd.read_csv(data_path,encoding='gbk')
df = df.dropna(axis=1,how='all') # 删除包含任何空值的列
df = df.dropna(axis=0,how='all') # 删除包含任何空值的行
print(df.iloc[0,0])
# 尝试将值转换为数字
if is_number(df.iloc[0,0]):#首行为非标签行
#获取列数
label_name = []
cols = df.columns.size
for i in range(cols):
label_name.append("param"+ str(i+1))
# 读取全量数据
source_data = torch.tensor(df.iloc[0:sample_rows, ].to_numpy(), dtype=torch.float32)
data_ori = torch.tensor(df.iloc[:, ].to_numpy(), dtype=torch.float32)
normalizer = Normalizer(method=normalization_type)
# 初始化归一化器
normalizer.fit(data_ori)
data_sample = data_ori[::skip_rows]
sampled_indices = torch.arange(0, len(data_ori), skip_rows) # 记录行号
return label_name,source_data,normalizer.params["min"],normalizer.params["max"],normalizer.params["mean"],sampled_indices,data_sample
else:
#获取列数
label_name = df.iloc[0]
# 读取全量数据
source_data = torch.tensor(df.iloc[1:sample_rows, ].to_numpy(), dtype=torch.float32)
data_ori = torch.tensor(df.iloc[1:, ].to_numpy(), dtype=torch.float32)
normalizer = Normalizer(method=normalization_type)
# 初始化归一化器
normalizer.fit(data_ori)
data_sample = data_ori[::skip_rows]
sampled_indices = torch.arange(0, len(data_ori), skip_rows) # 记录行号
return label_name,source_data,normalizer.params["min"],normalizer.params["max"],normalizer.params["mean"],sampled_indices,data_sample
def get_train_data_from_csv(data_path,normalization = false,normalization_type = 'minmax'):
"""读取csv数据文件并生成标准化训练数据
Args:
data_path (str): 文件绝对路径
normalizationboolean, optional标准化正则化是否启用
normalization_typestr, optional标准化正则化算子
Returns:
train_data: 训练特征输入
normalizer: 标准正则化器
Raises:
data_path: 非法路径
Examples:
get_data_from_csv_feature("D://test.excel")
"""
# 读取前xx行数据
df = pd.read_csv(data_path,encoding='gbk')
df = df.dropna(axis=1,how='all') # 删除包含任何空值的列
df = df.dropna(axis=0,how='all') # 删除包含任何空值的行
# 尝试将值转换为数字
if is_number(df.iloc[0,0]):#首行为非标签行
#获取列数
label_name = []
cols = df.columns.size
for i in range(cols):
label_name.append("param"+ str(i+1))
# 读取全量数据
data_ori = torch.tensor(df.iloc[:, ].to_numpy(), dtype=torch.float32)
if not normalization:
return data_ori
normalizer = Normalizer(method=normalization_type)
# 初始化归一化器
normalizer.fit(data_ori)
data_normal = normalizer.transform(data_ori)
return data_normal,normalizer
else:
# 读取全量数据
data_ori = torch.tensor(df.iloc[1:, ].to_numpy(), dtype=torch.float32)
if not normalization:
return data_ori
normalizer = Normalizer(method=normalization_type)
# 初始化归一化器
normalizer.fit(data_ori)
data_normal = normalizer.transform(data_ori)
return data_normal,normalizer

View File

@@ -0,0 +1,25 @@
import torch
from torchvision import transforms
from PIL import Image
# 数据预处理
transform = transforms.Compose([
transforms.Resize(256),
transforms.CenterCrop(224),
transforms.ToTensor(),
transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])
# 预测函数
def predict(model_path,data_path,result_path,image_path):
# 加载模型
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = torch.load('model.pth', map_location=device)
model.eval()
img = Image.open(image_path).convert('RGB')
input_tensor = transform(img).unsqueeze(0).to(device)
with torch.no_grad():
output = model(input_tensor)
return output.argmax().item()
print(f"Predicted class: {predict('test.jpg')}")

View File

View File

@@ -0,0 +1,12 @@
import torch
import pandas as pd
import json
def tensor_to_excel(tensor,output_path,file_name = "output"):
df = pd.DataFrame(tensor.numpy())
df.to_excel(output_path + '/' + file_name + '.xlsx', index=False)
def tensor_to_json(tensor,output_path,file_name = "output"):
tensor_list = tensor.tolist() # 转换为Python列表
with open(output_path + '/' + file_name +'.json', 'w') as f:
json.dump(tensor_list, f) # 序列化为JSON

View File

@@ -0,0 +1,22 @@
#1 pytorch 自有格式导出
import torch
def export_model_pt(model,target,name = "model"):
script_model = torch.jit.script(model) # 或 torch.jit.trace(model, input)
script_model.save(target + name + ".pt")
#2 通用格式导出
def export_model_onnx(model,input_tensor,target,name="model"):
torch.onnx.export(model, input_tensor, target+ name + ".onnx")
#3二进制权重导出
def export_model_bin(model,target,name = "weights"):
torch.save(model.state_dict(), target + name + ".bin")
def export_model(model,target,file_name,name):
if name == 'bin':
return export_model_bin(model,target,file_name)
if name == 'onnx':
return export_model_onnx(model,target,file_name)
if name == 'pt':
return export_model_bin(model,target,file_name)
else:
raise ValueError(f"不支持的导出类型")

View File

@@ -0,0 +1 @@
import FC_ML_Data

View File

@@ -0,0 +1,15 @@
def trapezoidal_discrete(x, y):
"""
离散数据点的梯形积分法
参数:
x: 自变量数组(需单调递增)
y: 函数值数组
返回:积分近似值
"""
if len(x) != len(y):
raise ValueError("x和y数组长度必须相同")
integral = 0.0
for i in range(1, len(x)):
dx = x[i] - x[i-1]
integral += 0.5 * (y[i] + y[i-1]) * dx
return integral

View File

@@ -0,0 +1,6 @@
#对数据做微分操作
import torch
x = torch.tensor(2.0, requires_grad=True)
y = x**2 + 3*x + 1
y.backward() # 自动计算梯度
print(x.grad) # 输出导数结果

View File

@@ -0,0 +1,29 @@
import torch
import torch.fft
#滤波算法
def fft_filter(input, threshold=0.1):
fft_data = torch.fft.fft2(input) # 二维傅里叶变换
fft_shift = torch.fft.fftshift(fft_data) # 频谱中心化
mask = torch.abs(fft_shift) > threshold # 高频阈值过滤
fft_shift *= mask.float()
return torch.fft.ifft2(torch.fft.ifftshift(fft_shift)).real
def gaussian_kernel(size=3, sigma=1.0):
kernel = torch.exp(-(torch.arange(size).float()-size//2))
kernel = kernel.outer(kernel) # 生成二维核
return kernel / kernel.sum()
def spatial_gaussian_filter(input, kernel_size=3):
kernel = gaussian_kernel(kernel_size).to(input.device)
return torch.nn.functional.conv2d(
input,
kernel.view(1, 1, kernel_size, kernel_size),
padding=kernel_size//2
)
def median_filter(input, kernel_size=3):
pad = kernel_size // 2
padded = torch.nn.functional.pad(input, (pad, pad, pad, pad), mode='reflect')
unfolded = padded.unfold(2, kernel_size, 1).unfold(3, kernel_size, 1)
return unfolded.contiguous().view(*input.shape[:2], -1, kernel_size**2).median(dim=-1)[0]

View File

@@ -0,0 +1,47 @@
import torch
import torch.nn.functional as F
import matplotlib.pyplot as plt
import numpy as np
# 1. 一维线性插值示例
x_original = torch.linspace(0, 10, 5)
y_original = torch.sin(x_original)
x_new = torch.linspace(0, 10, 50)
y_interp = F.interpolate(
y_original.view(1, 1, -1),
size=x_new.numel(),
mode='linear',
align_corners=True
).squeeze()
# 2. 二维双线性插值示例
grid_original = torch.rand(1, 1, 5, 5) # 5x5随机矩阵
grid_interp = F.interpolate(
grid_original,
size=(20, 20),
mode='bilinear',
align_corners=True
).squeeze()
# 可视化
plt.figure(figsize=(12, 6))
# 一维插值对比
plt.subplot(1, 2, 1)
plt.plot(x_original.numpy(), y_original.numpy(), 'ro-', label='Original')
plt.plot(x_new.numpy(), y_interp.numpy(), 'b-', alpha=0.7, label='Interpolated')
plt.title('1D Linear Interpolation')
plt.legend()
# 二维插值对比
plt.subplot(1, 2, 2)
plt.imshow(grid_original.squeeze(), cmap='viridis', extent=[0, 1, 0, 1], interpolation='none')
plt.title('Original 5x5')
plt.colorbar()
plt.tight_layout()
plt.figure()
plt.imshow(grid_interp.numpy(), cmap='viridis', extent=[0, 1, 0, 1])
plt.title('Interpolated 20x20')
plt.colorbar()
plt.show()

View File

@@ -0,0 +1,66 @@
import torch
import torch.nn as nn
#数据归一化工具类
class Normalizer:
def __init__(self, method='minmax'):
self.method = method
self.params = {}
def fit(self, data):
"""计算归一化参数"""
if self.method == 'minmax':#Min-Max等区间缩放法
self.params['min'] = data.min(dim=0)[0]
self.params['max'] = data.max(dim=0)[0]
self.params['mean'] = data.mean(dim=0)
elif self.method == 'zscore': #Z-score等方差缩放法用于数据标准化数据特征数据分布未知、存在异常值、模型依赖梯度下降
self.params['mean'] = data.mean(dim=0)
self.params['std'] = data.std(dim=0)
elif self.method == 'decimal':#小数定标标准化与min-max比保持原始数据分布形态区间≈[-1,1]
self.params['max_abs'] = data.abs().max(dim=0)[0]
return self
def load_params(self,method = "minmax",min_in = 0,max_in = 0,mean_in =0,std=0,max_abs=0):
self.method = method
self.params['min'] = min_in
self.params['max'] = max_in
self.params['mean'] = mean_in
self.params['std'] = std
self.params['max_abs'] = max_abs
def transform(self, data):
"""应用归一化"""
if self.method == 'minmax':
return (data - self.params['min']) / (self.params['max'] - self.params['min'] + 1e-8)
elif self.method == 'zscore':
return (data - self.params['mean']) / (self.params['std'] + 1e-8)
elif self.method == 'decimal':
return data / (10 ** torch.ceil(torch.log10(self.params['max_abs'])))
return data
def inverse_transform(self, data):
"""逆归一化"""
if self.method == 'minmax':
return data * (self.params['max'] - self.params['min']) + self.params['min']
elif self.method == 'zscore':
return data * self.params['std'] + self.params['mean']
elif self.method == 'decimal':
return data * (10 ** torch.ceil(torch.log10(self.params['max_abs'])))
return data
# 示例用法
if __name__ == '__main__':
data = torch.randn(100, 3) * 5 + 2 # 模拟数据
# 初始化归一化器
normalizer = Normalizer(method='zscore')
normalizer.fit(data)
# 归一化转换
normalized_data = normalizer.transform(data)
print(f"归一化后数据范围: {normalized_data.min():.2f} ~ {normalized_data.max():.2f}")
# 逆归一化
original_data = normalizer.inverse_transform(normalized_data)
print(f"数据还原误差: {torch.abs(data - original_data).max():.4f}")

View File

@@ -0,0 +1,20 @@
import numpy as np
import torch
from scipy.interpolate import RBFInterpolator
#径向基函数法RBF插值算法
X_train = torch.rand(100, 2) # 100个2D训练点
y_train = torch.sin(X_train[:,0] * 2 * np.pi) # 目标函数
X_test = torch.rand(50, 2) # 50个测试点
# 初始化插值器
rbf = RBFInterpolator(kernel='gaussian', epsilon=0.1)
# GPU加速可选
if torch.cuda.is_available():
X_train, y_train, X_test = X_train.cuda(), y_train.cuda(), X_test.cuda()
rbf = rbf.cuda()
# 执行插值
preds = rbf(X_train, y_train, X_test)
print(preds.shape) # 应输出torch.Size([50])

View File

@@ -0,0 +1,28 @@
import torch
#前提
#系数选择L1/L2系数通常取0.001-0.1,需通过验证集调整‌
#L1正则化
def l1_regularization(model, lambda_l1):
l1_loss = 0.
for param in model.parameters():
l1_loss += torch.norm(param, p=1)
return lambda_l1 * l1_loss
# 训练循环示例
# for epoch in range(epochs):
# loss = criterion(outputs, labels) + l1_regularization(model, 0.001)
#L2正则化使用样例pytorch内置L2正则化
# import torch.optim as optim
#
# # 定义模型
# model = YourModel()
# # 设置weight_decay即为L2正则化系数推荐0.01-0.001
# optimizer = optim.Adam(model.parameters(), lr=0.001, weight_decay=0.01)
#混合L1+L2正则化
def elastic_regularization(model, lambda_l1, lambda_l2):
l1, l2 = 0., 0.
for param in model.parameters():
l1 += torch.norm(param, p=1)
l2 += torch.norm(param, p=2)
return lambda_l1*l1 + lambda_l2*l2

0
FC_ML_Data/__init__.py Normal file
View File