Coding-彩色图片分类-基于2D-Unet

摘要

训练一个图片分类神经网络(2D-Unet),包括
1.自定义dataload制作
2,网络定义
3.训练过程
4.测试过程
5.模型评估(准确率)

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
import glob  # 导入用于文件路径匹配的模块
from torchvision import transforms # 导入图像转换模块
from torch.utils import data # 导入PyTorch数据工具模块
from PIL import Image # 导入PIL图像处理库
import matplotlib.pyplot as plt # 导入绘图库
import torch # 导入PyTorch库
import torch.nn as nn # 导入PyTorch神经网络模块
import torch.nn.functional as F # 导入PyTorch函数库
from unet import Unet # 导入自定义的U-Net模型
import numpy as np # 导入NumPy库

# 标准化数据
transforms = transforms.Compose([
transforms.ToTensor(), # 将图像转换为张量
transforms.Resize((256, 256)), # 调整图像大小为256x256
transforms.Normalize(mean=0.5, std=0.5) # 标准化图像数据
])

class my_dataset(data.Dataset):
def __init__(self, imgs_path, annos_path):
self.imgs_path = imgs_path # 图像文件路径
self.annos_path = annos_path # 标签文件路径

def __getitem__(self, index):
img_path = self.imgs_path[index] # 获取图像路径
pil_img = Image.open(img_path) # 使用PIL打开图像
pil_img = transforms(pil_img) # 对图像进行预处理

anno_path = self.annos_path[index] # 获取标签路径
anno_img = Image.open(anno_path) # 使用PIL打开标签图像
pil_anno = transforms(anno_img) # 对标签图像进行预处理

return pil_img, pil_anno

def __len__(self):
return len(self.imgs_path) # 返回数据集的长度

def train(model, train_loader, criterion, optimizer, device):
model.train() # 设置模型为训练模式
train_loss = 0
for data, label in train_loader:
data = data.to(device)
optimizer.zero_grad() # 清除梯度
output = model(data) # 前向传播
loss = criterion(output, label.to(device).float()) # 计算损失
loss.backward() # 反向传播,计算梯度
optimizer.step() # 更新模型参数
train_loss += loss.item() * data.size(0)

train_loss /= len(train_loader.dataset) # 计算平均训练损失
return train_loss

def validate(model, val_loader, criterion, device):
model.eval() # 设置模型为评估模式
val_loss = 0
with torch.no_grad():
for data, label in val_loader:
data = data.to(device)
output = model(data) # 前向传播
loss = criterion(output, label.to(device).float()) # 计算损失
val_loss += loss.item() * data.size(0)

val_loss /= len(val_loader.dataset) # 计算平均验证损失
return val_loss

if __name__ =='__main__':
# 训练数据集导入
imgs_path = glob.glob('facade/train_picture/*.png') # 匹配训练图像文件路径
label_path = glob.glob('facade/train_label/*.jpg') # 匹配训练标签文件路径

# 测试数据集导入
test_imgs_path = glob.glob('facade/test_picture/*.png') # 匹配测试图像文件路径
test_label_path = glob.glob('facade/test_label/*.jpg') # 匹配测试标签文件路径

# 对数据和标签排序,确保一一对应
imgs_path = sorted(imgs_path)
label_path = sorted(label_path)

test_imgs_path = sorted(test_imgs_path)
test_label_path = sorted(test_label_path)

train_dataset = my_dataset(imgs_path, label_path)
test_dataset = my_dataset(test_imgs_path, test_label_path) # 创建测试数据集对象
train_loader = data.DataLoader(train_dataset, batch_size=4, shuffle=True) # 创建训练数据加载器
test_loader = data.DataLoader(test_dataset, batch_size=4, shuffle=False) # 创建测试数据加载器

# 创建U-Net模型
in_channels = 3 # 输入通道数
out_channels = 3 # 输出通道数
model = Unet(in_channels, out_channels) # 创建U-Net模型对象

criterion = nn.MSELoss() # 创建均方误差损失函数对象
optimizer = torch.optim.Adam(model.parameters(), lr=0.0001) # 创建优化器对象

# 将模型和数据移动到GPU上
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') # 检查是否有可用的GPU
model.to(device) # 将模型移动到GPU上

train_losses = [] # 保存训练损失的列表
val_losses = [] # 保存验证损失的列表
best_val_loss = np.inf # 初始化最佳验证损失为正无穷
best_model = None # 初始化最佳模型为空
epoch_times = 300 # 设定迭代次数

# 训练模型
for epoch in range(epoch_times):
train_loss = train(model, train_loader, criterion, optimizer, device) # 训练模型
val_loss = validate(model, test_loader, criterion, device) # 验证模型
train_losses.append(train_loss) # 保存训练损失
val_losses.append(val_loss) # 保存验证损失

if val_loss < best_val_loss:
best_val_loss = val_loss
best_model = model.state_dict()
torch.save(best_model, 'ckpt/model.ckpt') # 保存最佳模型参数
print("best_val_loss: " + str(val_loss))
with open("ckpt/model_loss.txt", "w") as f:
f.write(str(val_loss))

print('Epoch [{}/{}], Train Loss: {:.4f}, Val Loss: {:.4f}'.format(epoch+1, epoch_times, train_loss, val_loss))

val.py (导入模型进行生成测试)

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
from unet import Unet  # 导入自定义的U-Net模型
import torch # 导入PyTorch库
from torch.utils import data # 导入PyTorch数据工具模块
from PIL import Image # 导入PIL图像处理库
from torchvision import transforms # 导入图像转换模块
import glob # 导入用于文件路径匹配的模块

# 标准化数据
transforms = transforms.Compose([
transforms.ToTensor(), # 将图像转换为张量
transforms.Resize((256, 256)), # 调整图像大小为256x256
transforms.Normalize(mean=0.5, std=0.5) # 标准化图像数据
])

class my_dataset(data.Dataset):
def __init__(self, imgs_path, annos_path):
self.imgs_path = imgs_path # 图像文件路径
self.annos_path = annos_path # 标签文件路径

def __getitem__(self, index):
img_path = self.imgs_path[index] # 获取图像路径
pil_img = Image.open(img_path) # 使用PIL打开图像
pil_img = transforms(pil_img) # 对图像进行预处理

anno_path = self.annos_path[index] # 获取标签路径
anno_img = Image.open(anno_path) # 使用PIL打开标签图像
pil_anno = transforms(anno_img) # 对标签图像进行预处理

return pil_img, pil_anno

def __len__(self):
return len(self.imgs_path) # 返回数据集的长度

# 测试数据集导入
test_imgs_path = glob.glob('facade/test_picture/*.png') # 匹配测试图像文件路径
test_label_path = glob.glob('facade/test_label/*.jpg') # 匹配测试标签文件路径
test_dataset = my_dataset(test_imgs_path, test_label_path) # 创建测试数据集对象
test_loader = data.DataLoader(test_dataset, batch_size=1, shuffle=False) # 创建测试数据加载器

model = Unet(3, 3) # 创建U-Net模型对象
checkpoint = torch.load('ckpt/model.ckpt') # 加载模型参数
model.load_state_dict(checkpoint) # 加载模型参数

for data, label in test_loader:
data = data * 0.5 + 0.5 # 反标准化图像数据
output = model(data) # 前向传播
output = torch.squeeze(output, 0) # 去除输出张量的维度为1的维度
array = output.cpu().detach().numpy().transpose(1, 2, 0) # 将输出张量转换为NumPy数组,并调整通道顺序为HWC
image = Image.fromarray((array * 255).astype('uint8')) # 创建PIL图像对象
image.save('image1.jpg') # 保存图像为JPEG文件

unet.py

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
import torch
import torch.nn as nn
import torch.nn.functional as F

# UNet的一大层,包含了两层小的卷积
class DoubleConv(nn.Module):
def __init__(self, in_ch, out_ch):
super(DoubleConv, self).__init__()
self.conv = nn.Sequential(
nn.Conv2d(in_ch, out_ch, 3, padding=1),
nn.BatchNorm2d(out_ch),
nn.ReLU(inplace=True),
nn.Conv2d(out_ch, out_ch, 3, padding=1),
nn.BatchNorm2d(out_ch),
nn.ReLU(inplace=True)
)

def forward(self, x):
x = self.conv(x)
return x

# 定义输入进来的第一层
class InConv(nn.Module):
def __init__(self, in_ch, out_ch):
super(InConv, self).__init__()
self.conv = DoubleConv(in_ch, out_ch)

def forward(self, x):
x = self.conv(x)
return x

# 定义encoder中的向下传播,包括一个maxpool和一大层
class Down(nn.Module):
def __init__(self, in_ch, out_ch):
super(Down, self).__init__()
self.mpconv = nn.Sequential(
nn.MaxPool2d(2),
DoubleConv(in_ch, out_ch)
)

def forward(self, x):
x = self.mpconv(x)
return x

# 定义decoder中的向上传播
class Up(nn.Module):
def __init__(self, in_ch, out_ch, bilinear=True):
super(Up, self).__init__()
# 定义了self.up的方法
if bilinear:
self.up = nn.Upsample(scale_factor=2, mode='bilinear', align_corners=True)
else:
self.up = nn.ConvTranspose2d(in_ch // 2, in_ch // 2, 2, stride=2) # // 除以的结果向下取整

self.conv = DoubleConv(in_ch, out_ch)

def forward(self, x1, x2): # x2是左侧的输出,x1是上一大层来的输出
x1 = self.up(x1)

diffY = x2.size()[2] - x1.size()[2]
diffX = x2.size()[3] - x1.size()[3]

x1 = F.pad(x1, (diffX // 2, diffX - diffX // 2, diffY // 2, diffY - diffY // 2))
x = torch.cat([x2, x1], dim=1) # 将两个tensor拼接在一起 dim=1:在通道数(C)上进行拼接
x = self.conv(x)
return x

# 定义最终的输出
class OutConv(nn.Module):
def __init__(self, in_ch, out_ch):
super(OutConv, self).__init__()
self.conv = nn.Conv2d(in_ch, out_ch, 1)

def forward(self, x):
x = self.conv(x)
return x

class Unet(nn.Module):
def __init__(self, in_channels, classes): # in_channels 图片的通道数,1为灰度图,3为彩色图
super(Unet, self).__init__()
self.n_channels = in_channels
self.n_classes = classes

self.inc = InConv(in_channels, 64)
self.down1 = Down(64, 128)
self.down2 = Down(128, 256)
self.down3 = Down(256, 512)
self.down4 = Down(512, 512)
self.up1 = Up(1024, 256)
self.up2 = Up(512, 128)
self.up3 = Up(256, 64)
self.up4 = Up(128, 64)
self.outc = OutConv(64, classes)

def forward(self, x):
x1 = self.inc(x)
x2 = self.down1(x1)
x3 = self.down2(x2)
x4 = self.down3(x3)
x5 = self.down4(x4)
x = self.up1(x5, x4)
x = self.up2(x, x3)
x = self.up3(x, x2)
x = self.up4(x, x1)
x = self.outc(x)

return x