Coding-基于exl序号寻找文件夹中对应序号的txt

基于exl序号寻找文件夹中对应序号的txt

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
import pandas as pd
import numpy as np
import os
import glob
import fnmatch
# 读取CSV文件
csv_file_path = 'now_have.csv' # 替换为你的CSV文件路径,前两列为序号对,后一列为真值,都有表头

# 读取 CSV 文件到 DataFrame
# data = pd.read_csv(csv_file_path, delimiter=' ', names=['c', 'l', 't'], header=None)
data_pd = pd.read_csv(csv_file_path, header=0)

# 打印第一列的内容
print("First column (c):")

# 定义文件夹路径
folder_path = 'dataset\\toushe\\toushe_kongjianguangyuan_aiwantisi_hs2048' # 空间光源+好光谱

save_name = 'gao_1121_good_light_good_spectrum_green_data.npy'

# 使用glob模块获取所有.txt文件
txt_files = glob.glob(os.path.join(folder_path, '*.txt'))

# 过滤掉文件名中包含 "gualazhi" 字符串的文件
filtered_txt_files = [file for file in txt_files if not fnmatch.fnmatch(os.path.basename(file), '*gualazhi*')]
data_y = []
# 打印所有.txt文件的路径
for file_path_ in txt_files:

file_path = os.path.basename(file_path_)
# 使用分隔符分割文件名
name_parts = file_path.split('-')

# 提取 "10-1" 部分
extracted_part = (int(name_parts[0]), int(name_parts[1]))

# 查找匹配的行
matching_rows = data_pd[(data_pd['c'] == extracted_part[0]) & (data_pd['l'] == extracted_part[1])]

# 如果找到匹配的行,打印 "OK"
if not matching_rows.empty:
with open(file_path_, 'r') as file:
lines = file.readlines()

# 提取数据部分
data_lines = lines[8:] # 跳过前8行元数据和表头
data = []

for line in data_lines:
if line.strip(): # 忽略空行
parts = line.strip().split(';')

wave = float(parts[0])
sample = float(parts[1])
dark = float(parts[2])
reference = float(parts[3])
corrected = float(parts[4])

feature=corrected/(reference-dark)
true_label = float(matching_rows['t'])
data.append([feature,true_label])

data_np = np.array(data)
data_y.append(data_np)
print("OK")
else:
# print("No match found")
pass


# 数据处理完
data_y_np = np.array(data_y)
transposed_data = np.transpose(data_y_np, (0, 2, 1))
np.save(save_name, transposed_data)