新增导入数据支持单行列头的功能

This commit is contained in:
2025-11-05 10:04:15 +08:00
parent b7ffbfc34a
commit c9b182b98a
4 changed files with 20017 additions and 20015 deletions

View File

@@ -149,7 +149,7 @@ def get_data_from_csv_feature(data_path,skip_rows = 100,sample_rows = 100,normal
get_data_from_csv_feature("D://test.excel",0,8,10,18)
"""
# 读取前xx行数据
df = pd.read_csv(data_path,encoding='gbk')
df = pd.read_csv(data_path,encoding='gbk',header=None)
df = df.dropna(axis=1,how='all') # 删除包含任何空值的列
df = df.dropna(axis=0,how='all') # 删除包含任何空值的行
print(df.iloc[0,0])
@@ -172,9 +172,14 @@ def get_data_from_csv_feature(data_path,skip_rows = 100,sample_rows = 100,normal
else:
#获取列数
label_name = df.iloc[0]
# 读取全量数据
source_data = torch.tensor(df.iloc[1:sample_rows, ].to_numpy(), dtype=torch.float32)
data_ori = torch.tensor(df.iloc[1:, ].to_numpy(), dtype=torch.float32)
# 重新读取全量数据读取全量数据
df = pd.read_csv(data_path, encoding='gbk')
data = df.iloc[0:sample_rows]
df = df.dropna(axis=1, how='all') # 删除包含任何空值的列
df = df.dropna(axis=0, how='all') # 删除包含任何空值的行
print(data.dtypes)
source_data = torch.tensor(df.iloc[0:sample_rows, ].to_numpy(), dtype=torch.float32)
data_ori = torch.tensor(df.iloc[0:, ].to_numpy(), dtype=torch.float32)
normalizer = Normalizer(method=normalization_type)
# 初始化归一化器
normalizer.fit(data_ori)
@@ -200,7 +205,7 @@ def get_train_data_from_csv(data_path,normalization = True,normalization_type =
:param normalization:
"""
# 读取前xx行数据
df = pd.read_csv(data_path,encoding='gbk')
df = pd.read_csv(data_path,encoding='gbk',header=None)
df = df.dropna(axis=1,how='all') # 删除包含任何空值的列
df = df.dropna(axis=0,how='all') # 删除包含任何空值的行
# 尝试将值转换为数字
@@ -220,8 +225,11 @@ def get_train_data_from_csv(data_path,normalization = True,normalization_type =
data_normal = normalizer.transform(data_ori)
return data_normal,normalizer
else:
df = pd.read_csv(data_path, encoding='gbk')
df = df.dropna(axis=1, how='all') # 删除包含任何空值的列
df = df.dropna(axis=0, how='all') # 删除包含任何空值的行
# 读取全量数据
data_ori = torch.tensor(df.iloc[1:, ].to_numpy(), dtype=torch.float32)
data_ori = torch.tensor(df.iloc[:, ].to_numpy(), dtype=torch.float32)
if not normalization:
return data_ori
normalizer = Normalizer(method=normalization_type)