新建仓库维护数据预测项目
This commit is contained in:
231
FC_ML_Data/FC_ML_Data_Load/Data_Load_Excel.py
Normal file
231
FC_ML_Data/FC_ML_Data_Load/Data_Load_Excel.py
Normal file
@@ -0,0 +1,231 @@
|
||||
import copy
|
||||
|
||||
import pandas as pd
|
||||
import torch
|
||||
from sympy import false
|
||||
|
||||
from FC_ML_Data.FC_ML_Data_Output.Data_Output_File import tensor_to_json
|
||||
from FC_ML_Data.FC_ML_Data_Process.Data_Process_Normalization import Normalizer
|
||||
from FC_ML_Tool.Check import is_number
|
||||
|
||||
|
||||
def get_data_from_excel_xy(data_path,sheet_name='Sheet1',normalization = false,normalization_type = 'minmax'):
|
||||
"""读取Excel文件数据并转换成输入特征和输出特征,可以支持多种算子的标准化和正则化操作
|
||||
Args:
|
||||
data_path (str): 文件绝对路径
|
||||
sheet_name(str, optional):表单名
|
||||
normalization (boolean, optional): 标准化正则化选项
|
||||
normalization_type(str, optional):标准化正则化算子
|
||||
|
||||
Returns:
|
||||
x_data_ori: 输入特征原始tensor
|
||||
y_data_ori: 输出特征原始tensor
|
||||
x_data: 输入特征标准化或归一化后tensor
|
||||
y_data: 输出特征标准化或归一化后tensor
|
||||
|
||||
Raises:
|
||||
data_path: 非法路径
|
||||
|
||||
Examples:
|
||||
get_data_from_excel_xy("D://test.excel")
|
||||
"""
|
||||
data = pd.read_excel(data_path, sheet_name=sheet_name,header=None)
|
||||
|
||||
x_data_ori = torch.tensor(data.iloc[:, :-1].to_numpy(), dtype=torch.float32) # 除最后一列作为X
|
||||
y_data_ori = torch.tensor(data.iloc[:, -1].to_numpy(), dtype=torch.float32) # 最后一列作为Y
|
||||
# print(x_data_ori,y_data_ori)
|
||||
# x_data_ori = torch.reshape(x_data_ori,(1,-1))
|
||||
# y_data_ori = torch.reshape(y_data_ori,(1,-1))
|
||||
x_data = copy.deepcopy(x_data_ori)
|
||||
y_data = copy.deepcopy(y_data_ori)
|
||||
normalizer = Normalizer(method=normalization_type)
|
||||
if normalization:
|
||||
# 初始化归一化器
|
||||
normalizer.fit(x_data)
|
||||
# 归一化转换
|
||||
x_data = normalizer.transform(x_data)
|
||||
normalizer.fit(y_data)
|
||||
y_data = normalizer.transform(y_data)
|
||||
return x_data_ori,y_data_ori,x_data,y_data,normalizer
|
||||
|
||||
def get_data_from_csv(data_path,begin_x,end_x,begin_y,end_y,skip_rows=0, normalization = false,normalization_type = 'minmax'):
|
||||
"""读取CSV文件数据并转换成输入特征和输出特征,可以支持多种算子的标准化和正则化操作
|
||||
Args:
|
||||
data_path (str): 文件绝对路径
|
||||
begin_x(int):输入特征起始列
|
||||
end_x(int):输入特征结束列
|
||||
begin_y(int):输出特征起始列
|
||||
end_y(int):输出特征结束列
|
||||
skip_rows(int, optional):跳过读取行数
|
||||
normalization (boolean, optional): 标准化正则化选项
|
||||
normalization_type(str, optional):标准化正则化算子
|
||||
|
||||
Returns:
|
||||
x_data_ori: 输入特征原始tensor
|
||||
y_data_ori: 输出特征原始tensor
|
||||
x_data: 输入特征标准化或归一化后tensor
|
||||
y_data: 输出特征标准化或归一化后tensor
|
||||
|
||||
Raises:
|
||||
data_path: 非法路径
|
||||
|
||||
Examples:
|
||||
get_data_from_csv("D://test.excel",0,8,10,18)
|
||||
"""
|
||||
|
||||
data = pd.read_csv(data_path, encoding='gbk', skiprows=skip_rows) # 跳过首行
|
||||
# data = pd.read_excel(data_path, sheet_name=sheet_name,header=None)
|
||||
|
||||
x_data_ori = torch.tensor(data.iloc[:, begin_x:end_x].to_numpy(), dtype=torch.float32) # 除最后一列作为X
|
||||
y_data_ori = torch.tensor(data.iloc[:, begin_y,end_y].to_numpy(), dtype=torch.float32) # 最后一列作为Y
|
||||
# print(x_data_ori,y_data_ori)
|
||||
# x_data_ori = torch.reshape(x_data_ori,(1,-1))
|
||||
# y_data_ori = torch.reshape(y_data_ori,(1,-1))
|
||||
x_data = copy.deepcopy(x_data_ori)
|
||||
y_data = copy.deepcopy(y_data_ori)
|
||||
normalizer = Normalizer(method=normalization_type)
|
||||
if normalization:
|
||||
# 初始化归一化器
|
||||
normalizer.fit(x_data)
|
||||
# 归一化转换
|
||||
x_data = normalizer.transform(x_data)
|
||||
normalizer.fit(y_data)
|
||||
y_data = normalizer.transform(y_data)
|
||||
return x_data_ori,y_data_ori,x_data,y_data,normalizer
|
||||
|
||||
def get_data_from_csv_filter(data_path,filter_rows,filter_file_path,filter_file_name,skip_rows,skip_file_path,skip_file_name):
|
||||
"""读取csv数据文件并生成一个前xx行数据过滤文件,一个抽样行数据文件
|
||||
Args:
|
||||
data_path (str): 文件绝对路径
|
||||
filter_rows(int):过滤不读取的行数
|
||||
filter_file_path(str):输出过滤文件路径
|
||||
filter_file_name(str):输出过滤文件名
|
||||
skip_rows(int):抽样读取行数
|
||||
skip_file_path (str):输出抽样文件路径
|
||||
skip_file_name(str):输出抽样文件名
|
||||
|
||||
Returns:
|
||||
x_data_ori: 输入特征原始tensor
|
||||
y_data_ori: 输出特征原始tensor
|
||||
x_data: 输入特征标准化或归一化后tensor
|
||||
y_data: 输出特征标准化或归一化后tensor
|
||||
|
||||
Raises:
|
||||
data_path: 非法路径
|
||||
|
||||
Examples:
|
||||
get_data_from_csv_filter("D://test.excel",0,“D://filter//”,“filter.csv”,10,“D://skip//”,“skip.csv”)
|
||||
"""
|
||||
# 读取前xx行数据
|
||||
df = pd.read_csv(data_path,encoding='gbk',nrows=filter_rows)
|
||||
# 转换为PyTorch Tensor
|
||||
df_data = torch.tensor(df.values)
|
||||
tensor_to_json(df_data,filter_file_path,filter_file_name)
|
||||
data = pd.read_csv(data_path, encoding='gbk', skiprows=lambda x: x % skip_rows != 0)
|
||||
data_ori = torch.tensor(data.values)
|
||||
tensor_to_json(data_ori, skip_file_path, skip_file_name)
|
||||
return df_data,data_ori
|
||||
|
||||
def get_data_from_csv_feature(data_path,skip_rows = 100,sample_rows = 100,normalization_type = 'minmax'):
|
||||
"""读取csv数据文件并生成一个前xx行数据过滤文件,一个抽样行数据文件
|
||||
Args:
|
||||
data_path (str): 文件绝对路径
|
||||
sample_rows(int): 连续抽样总行数
|
||||
skip_rows(int):抽样行数量
|
||||
normalization_type(str, optional):标准化正则化算子
|
||||
Returns:
|
||||
label_name: 标签矩阵
|
||||
source_data: 原始数据
|
||||
max: 每一列的最大值
|
||||
min: 每一列的最小值
|
||||
average: 每一列的平均值
|
||||
sample_x: 抽样的横坐标
|
||||
sample_y: 抽样的纵坐标
|
||||
|
||||
Raises:
|
||||
data_path: 非法路径
|
||||
|
||||
Examples:
|
||||
get_data_from_csv_feature("D://test.excel",0,8,10,18)
|
||||
"""
|
||||
# 读取前xx行数据
|
||||
df = pd.read_csv(data_path,encoding='gbk')
|
||||
df = df.dropna(axis=1,how='all') # 删除包含任何空值的列
|
||||
df = df.dropna(axis=0,how='all') # 删除包含任何空值的行
|
||||
print(df.iloc[0,0])
|
||||
# 尝试将值转换为数字
|
||||
if is_number(df.iloc[0,0]):#首行为非标签行
|
||||
#获取列数
|
||||
label_name = []
|
||||
cols = df.columns.size
|
||||
for i in range(cols):
|
||||
label_name.append("param"+ str(i+1))
|
||||
# 读取全量数据
|
||||
source_data = torch.tensor(df.iloc[0:sample_rows, ].to_numpy(), dtype=torch.float32)
|
||||
data_ori = torch.tensor(df.iloc[:, ].to_numpy(), dtype=torch.float32)
|
||||
normalizer = Normalizer(method=normalization_type)
|
||||
# 初始化归一化器
|
||||
normalizer.fit(data_ori)
|
||||
data_sample = data_ori[::skip_rows]
|
||||
sampled_indices = torch.arange(0, len(data_ori), skip_rows) # 记录行号
|
||||
return label_name,source_data,normalizer.params["min"],normalizer.params["max"],normalizer.params["mean"],sampled_indices,data_sample
|
||||
else:
|
||||
#获取列数
|
||||
label_name = df.iloc[0]
|
||||
# 读取全量数据
|
||||
source_data = torch.tensor(df.iloc[1:sample_rows, ].to_numpy(), dtype=torch.float32)
|
||||
data_ori = torch.tensor(df.iloc[1:, ].to_numpy(), dtype=torch.float32)
|
||||
normalizer = Normalizer(method=normalization_type)
|
||||
# 初始化归一化器
|
||||
normalizer.fit(data_ori)
|
||||
data_sample = data_ori[::skip_rows]
|
||||
sampled_indices = torch.arange(0, len(data_ori), skip_rows) # 记录行号
|
||||
return label_name,source_data,normalizer.params["min"],normalizer.params["max"],normalizer.params["mean"],sampled_indices,data_sample
|
||||
|
||||
def get_train_data_from_csv(data_path,normalization = false,normalization_type = 'minmax'):
|
||||
"""读取csv数据文件并生成标准化训练数据
|
||||
Args:
|
||||
data_path (str): 文件绝对路径
|
||||
normalization(boolean, optional):标准化正则化是否启用
|
||||
normalization_type(str, optional):标准化正则化算子
|
||||
Returns:
|
||||
train_data: 训练特征输入
|
||||
normalizer: 标准正则化器
|
||||
Raises:
|
||||
data_path: 非法路径
|
||||
|
||||
Examples:
|
||||
get_data_from_csv_feature("D://test.excel")
|
||||
"""
|
||||
# 读取前xx行数据
|
||||
df = pd.read_csv(data_path,encoding='gbk')
|
||||
df = df.dropna(axis=1,how='all') # 删除包含任何空值的列
|
||||
df = df.dropna(axis=0,how='all') # 删除包含任何空值的行
|
||||
# 尝试将值转换为数字
|
||||
if is_number(df.iloc[0,0]):#首行为非标签行
|
||||
#获取列数
|
||||
label_name = []
|
||||
cols = df.columns.size
|
||||
for i in range(cols):
|
||||
label_name.append("param"+ str(i+1))
|
||||
# 读取全量数据
|
||||
data_ori = torch.tensor(df.iloc[:, ].to_numpy(), dtype=torch.float32)
|
||||
if not normalization:
|
||||
return data_ori
|
||||
normalizer = Normalizer(method=normalization_type)
|
||||
# 初始化归一化器
|
||||
normalizer.fit(data_ori)
|
||||
data_normal = normalizer.transform(data_ori)
|
||||
return data_normal,normalizer
|
||||
else:
|
||||
# 读取全量数据
|
||||
data_ori = torch.tensor(df.iloc[1:, ].to_numpy(), dtype=torch.float32)
|
||||
if not normalization:
|
||||
return data_ori
|
||||
normalizer = Normalizer(method=normalization_type)
|
||||
# 初始化归一化器
|
||||
normalizer.fit(data_ori)
|
||||
data_normal = normalizer.transform(data_ori)
|
||||
return data_normal,normalizer
|
||||
|
||||
|
||||
0
FC_ML_Data/FC_ML_Data_Load/Data_Load_Params.py
Normal file
0
FC_ML_Data/FC_ML_Data_Load/Data_Load_Params.py
Normal file
25
FC_ML_Data/FC_ML_Data_Load/Data_Load_Predict.py
Normal file
25
FC_ML_Data/FC_ML_Data_Load/Data_Load_Predict.py
Normal file
@@ -0,0 +1,25 @@
|
||||
import torch
|
||||
from torchvision import transforms
|
||||
from PIL import Image
|
||||
|
||||
# 数据预处理
|
||||
transform = transforms.Compose([
|
||||
transforms.Resize(256),
|
||||
transforms.CenterCrop(224),
|
||||
transforms.ToTensor(),
|
||||
transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
|
||||
])
|
||||
|
||||
# 预测函数
|
||||
def predict(model_path,data_path,result_path,image_path):
|
||||
# 加载模型
|
||||
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
|
||||
model = torch.load('model.pth', map_location=device)
|
||||
model.eval()
|
||||
img = Image.open(image_path).convert('RGB')
|
||||
input_tensor = transform(img).unsqueeze(0).to(device)
|
||||
with torch.no_grad():
|
||||
output = model(input_tensor)
|
||||
return output.argmax().item()
|
||||
|
||||
print(f"Predicted class: {predict('test.jpg')}")
|
||||
0
FC_ML_Data/FC_ML_Data_Load/__init__.py
Normal file
0
FC_ML_Data/FC_ML_Data_Load/__init__.py
Normal file
Reference in New Issue
Block a user