import time
import csv
import glob
import os
import random
import torch
from torch import nn
from torch.utils.data import Dataset, DataLoader
from torch.utils.tensorboard import SummaryWriter
from torchvision import transforms
from PIL import Image
from torch.utils.data.dataloader import default_collate
if torch.cuda.is_available():
print('use GPU')
else:
print('use CPU')
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
class data_remake(Dataset):
def __init__(self,root,resize,mode) :
super(data_remake, self).__init__()
self.root = root
self.resize = resize
self.name_to_label={}
for name in os.listdir(os.path.join(root)):
if not os.path.isdir(os.path.join(root, name)):
continue
self.name_to_label[name] = len(self.name_to_label.keys())
self.images, self.labels = self.load_csv('cards.csv')
if mode == 'train':
self.images = self.images[:7625]
self.labels = self.labels[:7625]
print(self.name_to_label)
elif mode == 'test':
self.images = self.images[7625:7890]
self.labels = self.labels[7625:7890]
# elif mode == 'vali':
# self.images = self.images[7890:]
# self.labels = self.labels[7890:]
def load_csv(self, filename):
images, labels = [], []
with open(filename, 'r', encoding='utf-8') as f:
reader = csv.reader(f)
next(reader) # Skip the header row
for row in reader:
images.append(row[1])
labels.append(int(row[0]))
return images, labels
def __len__(self):
return len(self.images)
# def __getitem__(self, idx):
# img, label = self.images[idx], self.labels[idx]
# tf = transforms.Compose([
# lambda x:Image.open(x).convert('RGB'),
# transforms.Resize((int(1.25 * self.resize), int(1.25 * self.resize))),
# transforms.RandomRotation(15),
# transforms.CenterCrop(self.resize),
# transforms.ToTensor(),
# ])
# img = tf(img)
# label = torch.tensor(label)
# return img, label
def __getitem__(self, idx):
img_path, label = self.images[idx], self.labels[idx]
# print(img_path)
# 使用 try-except 块来捕获可能的异常
try:
img = Image.open(img_path).convert('RGB')
except FileNotFoundError:
print("FileNotFoundError: Could not find image at path:", img_path)
return None, None
tf = transforms.Compose([
transforms.Resize((int(1.25 * self.resize), int(1.25 * self.resize))),
transforms.RandomRotation(15),
transforms.CenterCrop(self.resize),
transforms.ToTensor(),
])
img = tf(img)
label = torch.tensor(label)
return img, label
train_data = data_remake('train', 32, mode='train')
test_data = data_remake('test', 32, mode='test')
def my_collate_fn(batch):
# 过滤掉为 None 的数据
batch = [(img, label) for img, label in batch if img is not None]
return default_collate(batch)
# valid_data = data_remake('vali', 32, mode='vali')
train_data_size = len(train_data)
test_data_size = len(test_data)
print('训练数据集的长度为:{}'.format(train_data_size))
print('测试数据集的长度为:{}'.format(test_data_size))
train_dataloader = DataLoader(train_data, batch_size=64, collate_fn=my_collate_fn)
test_dataloader = DataLoader(test_data, batch_size=64, collate_fn=my_collate_fn)
# valid_dataloader = DataLoader(valid_data, batch_size=64)
class Genshin(nn.Module):
def __init__(self):
super(Genshin, self).__init__()
self.model = nn.Sequential(
nn.Conv2d(in_channels=3, out_channels=32, kernel_size=5, stride=1, padding=2), #padding需根据model结构和公式计算得出
nn.BatchNorm2d(32),
nn.ReLU(inplace=True),
nn.MaxPool2d(kernel_size=2),
nn.Conv2d(in_channels=32, out_channels=32, kernel_size=5, stride=1, padding=2),
nn.BatchNorm2d(32),
nn.ReLU(inplace=True),
nn.MaxPool2d(kernel_size=2),
nn.Conv2d(in_channels=32, out_channels=64, kernel_size=5, stride=1, padding=2),
nn.BatchNorm2d(64),
nn.ReLU(inplace=True),
nn.MaxPool2d(kernel_size=2),
nn.Flatten(),
nn.Linear(in_features=64 * 4 * 4, out_features=64 * 4),
nn.Linear(in_features=64 * 4, out_features=64),
nn.Linear(in_features=64, out_features=53),
)
def forward(self, input):
output = self.model(input)
return output
epoch = int(input('请输入训练轮数:'))
learning_rate = float(input('请输入学习率:'))
writer = SummaryWriter(input('请输入日志保存目录:'))
model = Genshin()
model = model.to(device)
loss_fn = nn.CrossEntropyLoss()
loss_fn = loss_fn.to(device)
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)
max_accuracy = 0.0
total_train_step = 0
total_test_step = 0
start_time = time.time()
for i in range(epoch):
print('-------第{}轮训练开始-------'.format(i+1))
for data in train_dataloader:
imgs, targets = data
imgs = imgs.to(device)
targets = targets.to(device)
output = model(imgs)
loss = loss_fn(output, targets)
#优化器优化模型
optimizer.zero_grad()
loss.backward()
optimizer.step()
total_train_step += 1
if total_train_step % 50 == 0:
end_time = time.time()
print('训练时间:{}'.format(end_time - start_time))
print('训练次数:{}, Loss:{}'.format(total_train_step, loss.item()))
writer.add_scalar('train_loss', loss.item(), total_train_step)
total_test_loss = 0
total_accuracy = 0
#测试步骤开始
with torch.no_grad():
for data in test_dataloader:
imgs, targets = data
imgs = imgs.to(device)
targets = targets.to(device)
output = model(imgs)
loss = loss_fn(output, targets)
total_test_loss += loss.item()
accuracy = (output.argmax(1) == targets).sum()
total_accuracy += accuracy
print('整体测试集Loss:{}'.format(total_test_loss))
print('整体测试集正确率:{}'.format(total_accuracy/test_data_size))
writer.add_scalar('test_loss', total_test_loss, total_test_step)
writer.add_scalar('test_accuracy', total_accuracy/test_data_size, total_test_step)
total_test_step += 1
# if max_accuracy < (total_accuracy/test_data_size):
# max_accuracy = total_accuracy/test_data_size
torch.save(model, 'ppppp_{}.pth'.format(total_test_step))
print('模型已保存!')
# print('当前最高精度为:{}'.format(max_accuracy))
writer.close()
#tensorboard命令:
#activate pytorch
#tensorboard --logdir=train_logs
1.一个pytorch模型训练的套路至少要包含数据、模型、训练、测试四个部分。对吗?
以我之见,我认为是正确的
从最开始重写dataset
到加载数据,把数据转换为张量
把转换好的张量放入模型中,进行训练
当然了在训练模型中,可以增加一些优化
将训练好的结果进行测试
2.数据:扑克数据集中有train test val三个数据集,他们分别有什么作用?
首先
根据英文的意思
训练,测试,有效
第一个肯定使用来训练出模型
第二将训练出的模型进行测试
最后将有效的数据进行对比,也就是validation验证
主要作用是来验证是否过拟合、以及用来调节训练参数等
举个简单例子,如果训练过头了,validation可以用来调节
3.模型: 对于扑克图像,它是离散数据还是连续数据?所以你应该选择CNN还是RNN?
我认为首先我们得了解,离散和连续的意思
离散可以说是,在图上散步的点
连续,直观来说就是一张图
就拿时间这个变量来说,我们一般认为时间是连续的,它确实是连续的,但是,我们用科学的测量,是没法把时间测成连续的,也就是说,我们哪怕用微秒纳秒也不能严格满足连续的定义,然而,这并不妨碍我们进行计算。因为我们可以近似把以纳秒为间隔的时间看成连续,因为已经足够小啦。所以,我们现实生活中是没法找到严格意义上可以用数值衡量为连续的。哪怕我们肉眼见到的物质,不也是有原子组成,原子不仅仅不是最小的,而且还存在间隔。所以我们没必要细究这
CNN是一种利用卷积计算的神经网络。它可以通过卷积计算将原像素很大的图片保留主要特征变成很小的像素图片
RNN 跟传统神经网络最大的区别在于每次都会将前一次的输出结果,带到下一次的隐藏层中,一起训练
RNN 之所以能够有效的处理序列数据,主要是基于他的比较特殊的运行原理
对于图片,可以把像素看成每一个点,所以选择CNN无疑的
RNN,会将之前的结果带入下一次,这个符合连续型的数据,所以这个用来处理视频
4.训练与测试:Loss和Accuracy是什么?用你的话来简述一下?
loss是损失函数
accuracy是准确率
预测和真实值之间,就是损失