问题描述
我在训练循环中遇到此错误.在我在 datasets.py 文件中添加增强函数之前,模型训练良好.我无法理解它是什么.我的模型正在训练图像.请往下看.
I am getting this error in the training loop. The model was training fine before I added an augmentation function in the datasets.py file. I can't understand what it is. My model is training on images. Please have a look below.
错误:
---------------------------------------------------------------------------
RuntimeError Traceback (most recent call last)
<ipython-input-9-efc5be094f2a> in <module>()
102 train.train_model(model, train_loader, val_loader, optimizer, criterion,
103 IMG_CLASS_NAMES, NUM_EPOCHS, project_name = "CSE5DL Assignment Task 1",
--> 104 ident_str= "Didn't used anything sp")
8 frames
/content/drive/My Drive/DL Assignment/train.py in train_model(model, train_loader, val_loader, optimizer, criterion, class_names, n_epochs, project_name, ident_str)
163 for epoch in tq.tqdm(range(n_epochs), total=n_epochs, desc='Epochs'):
164 _, _, train_metrics_dict = \
--> 165 train_epoch(epoch, model, optimizer, criterion, train_loader)
166 val_lbls, val_outs, val_metrics_dict = \
167 val_epoch(epoch, model, criterion, val_loader)
/content/drive/My Drive/DL Assignment/train.py in train_epoch(epoch, model, optimizer, criterion, loader)
61 optimizer.zero_grad()
62
---> 63 outputs = model(inputs)
64
65 loss = criterion(outputs, lbls)
/usr/local/lib/python3.7/dist-packages/torch/nn/modules/module.py in _call_impl(self, *input, **kwargs)
887 result = self._slow_forward(*input, **kwargs)
888 else:
--> 889 result = self.forward(*input, **kwargs)
890 for hook in itertools.chain(
891 _global_forward_hooks.values(),
/content/drive/My Drive/DL Assignment/models.py in forward(self, x)
40
41 def forward(self, x):
---> 42 x = self.seq(x)
43 # print(x.shape)
44 return x
/usr/local/lib/python3.7/dist-packages/torch/nn/modules/module.py in _call_impl(self, *input, **kwargs)
887 result = self._slow_forward(*input, **kwargs)
888 else:
--> 889 result = self.forward(*input, **kwargs)
890 for hook in itertools.chain(
891 _global_forward_hooks.values(),
/usr/local/lib/python3.7/dist-packages/torch/nn/modules/container.py in forward(self, input)
117 def forward(self, input):
118 for module in self:
--> 119 input = module(input)
120 return input
121
/usr/local/lib/python3.7/dist-packages/torch/nn/modules/module.py in _call_impl(self, *input, **kwargs)
887 result = self._slow_forward(*input, **kwargs)
888 else:
--> 889 result = self.forward(*input, **kwargs)
890 for hook in itertools.chain(
891 _global_forward_hooks.values(),
/usr/local/lib/python3.7/dist-packages/torch/nn/modules/linear.py in forward(self, input)
92
93 def forward(self, input: Tensor) -> Tensor:
---> 94 return F.linear(input, self.weight, self.bias)
95
96 def extra_repr(self) -> str:
/usr/local/lib/python3.7/dist-packages/torch/nn/functional.py in linear(input, weight, bias)
1751 if has_torch_function_variadic(input, weight):
1752 return handle_torch_function(linear, (input, weight), input, weight, bias=bias)
-> 1753 return torch._C._nn.linear(input, weight, bias)
1754
1755
RuntimeError: mat1 dim 1 must match mat2 dim 0
datasets.py 代码:
datasets.py code:
import collections
import csv
from pathlib import Path
import os
import numpy as np
import torch
import torchvision.transforms as transforms
from PIL import Image
import pandas as pd
to_tensor_transform = transforms.ToTensor()
from torch.utils.data import Dataset
from torchvision.datasets import ImageFolder
train_transforms = transforms.Compose([
transforms.RandomHorizontalFlip(),
transforms.RandomCrop(224),
transforms.RandomVerticalFlip(0.5),
transforms.ToTensor(),
])
class LesionDataset(Dataset):
def __init__(self, img_dir, labels_fname, augment=False):
self.img_dir = img_dir
self.augment = augment
self.labels_fname = pd.read_csv(labels_fname)
def __len__(self):
return len(self.labels_fname)
def __getitem__(self, idx):
image_id = self.labels_fname.iloc[idx,0]
image = Image.open(os.path.join(self.img_dir, image_id +'.jpg')).convert("RGB")
labels = self.labels_fname.drop(['image'], axis = 1)
labels = np.array(labels)
labels = np.argmax(labels, axis = 1)
label = labels[idx]
if self.augment:
image = train_transforms(image)
else:
image = to_tensor_transform(image)
return image, label
train.py 代码:
train.py code:
from datetime import datetime
import numpy as np
import torch
import torch.nn as nn
import tqdm.notebook as tq
import sklearn.metrics
import wandb
from torch.utils.data import DataLoader
from sklearn.metrics import confusion_matrix
import pandas as pd
import seaborn as sn
import matplotlib.pyplot as plt
device = torch.device("cpu")
if torch.cuda.is_available():
device = torch.device("cuda:0")
torch.cuda.set_device(device)
def plot_confusion_matrix(all_lbls, all_outputs, class_names, normalize = True):
cm = confusion_matrix(all_lbls, all_outputs)
cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]
df_cm = pd.DataFrame(cm, class_names, class_names)
ax = sn.heatmap(df_cm, annot=True, cmap='flare')
# TODO Task 1c - Set axis labels and show plot
plt.ylabel('True label')
plt.xlabel('Predicted label')
def count_classes(preds):
'''
Counts the number of predictions per class given preds, a tensor
shaped [batch, n_classes], where the maximum per preds[i]
is considered the "predicted class" for batch element i.
'''
pred_classes = preds.argmax(dim=1)
n_classes = preds.shape[1]
return [(pred_classes == c).sum().item() for c in range(n_classes)]
def train_epoch(epoch, model, optimizer, criterion, loader):
epoch_loss = 0
model.train()
# At the end all_outputs should store the output for each sample in the training data.
all_outputs = []
# At the end all_lbls should store the ground truth label for each sample in the training data.
all_lbls = []
for i, (inputs, lbls) in enumerate(loader):
inputs, lbls = inputs.to(device), lbls.to(device)
# Update model weights
# TODO: Tasb 1b - Perform a forward pass, backward pass and
# update the weights of your model with the batch of data.
optimizer.zero_grad()
outputs = model(inputs)
loss = criterion(outputs, lbls)
# Compute gradients and update model weights
loss.backward()
optimizer.step()
# TODO: Task 2d - Temporarily uncomment these lines
# print(count_classes(outputs))
# if i > 9:
# assert False
# Collect metrics
epoch_loss += loss.item()
all_outputs.extend(outputs.tolist())
all_lbls.extend(lbls.tolist())
all_outputs = np.array(all_outputs)
all_lbls = np.array(all_lbls)
all_outputs = np.argmax(all_outputs, axis = 1)
# Calculate epoch metrics
# TODO Task 1b - Use all_outputs and all_lbls with
# sklearn.metrics.accuracy_score and sklearn.metrics.recall_score
# to calculate the accuracy and unweighted average recall.
# Note sklearn.metrics.accuracy_score and sklearn.metrics.recall_score
# only take numpy arrays and also you need to convert all_outputs
# to the actual predicted class for each sample.
acc = sklearn.metrics.accuracy_score(all_outputs, all_lbls)
uar = sklearn.metrics.recall_score(all_lbls, all_outputs, average='macro')
metrics_dict = {
'Loss/train': (epoch_loss/len(loader)),
'Accuracy/train': acc,
'UAR/train': uar,
}
return all_lbls, all_outputs, metrics_dict
def val_epoch(epoch, model, criterion, loader):
epoch_loss = 0
model.eval()
all_outputs = []
all_lbls = []
for inputs, lbls in loader:
inputs, lbls = inputs.to(device), lbls.to(device)
# TODO Task 1b - Perform a forward pass through your model and
# obtain the validation loss (use torch.no_grad())
with torch.no_grad():
outputs = model(inputs)
loss = criterion(outputs, lbls)
# Collect metrics
epoch_loss += loss.item()
all_outputs.extend(outputs.tolist())
all_lbls.extend(lbls.tolist())
all_outputs = np.array(all_outputs)
all_lbls = np.array(all_lbls)
all_outputs = np.argmax(all_outputs, axis = 1)
# Calculate epoch metrics
# TODO Task 1b - Use all_outputs and all_lbls with
# sklearn.metrics.accuracy_score and sklearn.metrics.recall_score
# to calculate the accuracy and unweighted average recall.
# Note sklearn.metrics.accuracy_score and sklearn.metrics.recall_score
# only take numpy arrays and also you need to convert all_outputs
# to the actual predicted class for each sample.
acc = sklearn.metrics.accuracy_score(all_outputs, all_lbls)
uar = sklearn.metrics.recall_score(all_lbls, all_outputs, average='macro')
metrics_dict = {
'Loss/val': (epoch_loss/len(loader)),
'Accuracy/val': acc,
'UAR/val': uar,
}
return all_lbls, all_outputs, metrics_dict
def train_model(model, train_loader, val_loader, optimizer, criterion,
class_names, n_epochs, project_name, ident_str=None):
model.to(device)
# Initialise Weights and Biases project
if ident_str is None:
ident_str = datetime.now().strftime("%Y%m%d_%H%M%S")
exp_name = f"{model.__class__.__name__}_{ident_str}"
run = wandb.init(project = project_name, name=exp_name)
try:
# Train by iterating over epochs
for epoch in tq.tqdm(range(n_epochs), total=n_epochs, desc='Epochs'):
_, _, train_metrics_dict = \
train_epoch(epoch, model, optimizer, criterion, train_loader)
val_lbls, val_outs, val_metrics_dict = \
val_epoch(epoch, model, criterion, val_loader)
wandb.log({**train_metrics_dict, **val_metrics_dict})
finally:
run.finish()
plot_confusion_matrix(val_lbls, val_outs, class_names, normalize = True)
模型训练单元代码:
import torch
import torch.nn as nn
from torch.utils.data import DataLoader
from torchvision.datasets import ImageFolder
import datasets
import models
import train
from train import device
from train import plot_confusion_matrix
from sklearn.metrics import confusion_matrix
torch.manual_seed(42)
NUM_EPOCHS = 5
BATCH_SIZE = 64
model = models.SimpleBNConv()
model.to(device)
# Create datasets/loaders
# TODO Task 1b - Create the data loaders from LesionDatasets
# TODO Task 1d - Account for data issues, if applicable
train_dataset = datasets.LesionDataset('/content/data/img',
'/content/data/img/train.csv', augment = True)
val_dataset = datasets.LesionDataset('/content/data/img',
'/content/data/img/val.csv', augment = False)
# TODO Task 1d - Account for data issues, if applicable
train_ML_proportion = len(torch.nonzero(torch.Tensor(np.array(train_dataset.labels_fname["MEL"])))) / len(torch.Tensor(np.array(train_dataset.labels_fname["MEL"])))
train_NV_proportion = len(torch.nonzero(torch.Tensor(np.array(train_dataset.labels_fname["NV"])))) / len(torch.Tensor(np.array(train_dataset.labels_fname["NV"])))
train_BCC_proportion = len(torch.nonzero(torch.Tensor(np.array(train_dataset.labels_fname["BCC"])))) / len(torch.Tensor(np.array(train_dataset.labels_fname["BCC"])))
train_AKIEC_proportion = len(torch.nonzero(torch.Tensor(np.array(train_dataset.labels_fname["AKIEC"])))) / len(torch.Tensor(np.array(train_dataset.labels_fname["AKIEC"])))
train_BKL_proportion = len(torch.nonzero(torch.Tensor(np.array(train_dataset.labels_fname["BKL"])))) / len(torch.Tensor(np.array(train_dataset.labels_fname["BKL"])))
train_DF_proportion = len(torch.nonzero(torch.Tensor(np.array(train_dataset.labels_fname["DF"])))) / len(torch.Tensor(np.array(train_dataset.labels_fname["DF"])))
train_VASC_proportion = len(torch.nonzero(torch.Tensor(np.array(train_dataset.labels_fname["VASC"])))) / len(torch.Tensor(np.array(train_dataset.labels_fname["VASC"])))
print("Train data - ML proportion:", "{:.3f}".format(train_ML_proportion), "NV proportion:", "{:.3f}".format(train_NV_proportion), "BCC proportion:", "{:.3f}".format(train_BCC_proportion), "AKIEC proportion:",
"{:.3f}".format(train_AKIEC_proportion), "BKL proportion:", "{:.3f}".format(train_BKL_proportion), "DF proportion:", "{:.3f}".format(train_DF_proportion), "VASC proportion:", "{:.3f}".format(train_VASC_proportion))
# we can see that in the train dataset the data is skewed and around 70% labels are of class NV.
#computing the sampling weight for 7 classses in train
ML_weight = 1 - train_ML_proportion
NV_weight = 1 - train_NV_proportion
BCC_weight = 1 - train_BCC_proportion
AKIEC_weight = 1 - train_AKIEC_proportion
BKL_weight = 1 - train_BKL_proportion
DF_weight = 1 - train_DF_proportion
VASC_weight = 1 - train_VASC_proportion
weights = []
for i in train_dataset.labels_fname:
if i == 0:
weights.append(ML_weight)
elif i == 1:
weights.append(NV_weight)
elif i == 2:
weights.append(BCC_weight)
elif i == 3:
weights.append(AKIEC_weight)
elif i == 4:
weights.append(BKL_weight)
elif i == 5:
weights.append(DF_weight)
else:
weights.append(VASC_weight)
sampler = torch.utils.data.sampler.WeightedRandomSampler(weights, len(train_dataset))
train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, num_workers=1, sampler=sampler)
val_loader = DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=1)
# defining the Optimizer
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
criterion = nn.CrossEntropyLoss()
# Train model
# TODO Task 1c: Set to ident_str to a string that identifies this particular
# training run. Note this line in the training code
# exp_name = f"{model.__class__.__name__}_{ident_str}"
# So it means the the model class name is already included in the
# exp_name string. You can consider adding other information particular
# to this training run, e.g. learning rate (lr) used,
# augmentation (aug) used or not, etc.
train.train_model(model, train_loader, val_loader, optimizer, criterion,
IMG_CLASS_NAMES, NUM_EPOCHS, project_name = "CSE5DL Assignment Task 1",
ident_str= "Didn't used anything sp")
我希望增强会提高模型的准确性.请建议我还能尝试提高模型的准确性吗?我正在使用以下图层:
I expect that the augmentation would improve the accuracy of the model. Please suggest what else can I try to improve the model accuracy? I am using the following layers:
10 个 nn.Conv2d 层,分别有 8、8、16、16、32、32、64、64、128、128 个输出通道5 nn.MaxPool2d 层散布在输出通道的每次更改之前,和 nn.BatchNorm2d,和 nn.ReLU() 用于激活函数
10 nn.Conv2d layers, with 8, 8, 16, 16, 32, 32, 64, 64, 128, 128 output channels respectively5 nn.MaxPool2d layers interspersed between just before every change in output channels,and nn.BatchNorm2d,and nn.ReLU() for the activation function
谢谢.
推荐答案
更新 -: 我通过调整 datasets.pyimage
的大小修复了错误/code> 代码.
Update -: I fixed the error by resizing the image
in datasets.py
code.
这篇关于RuntimeError: mat1 dim 1 must match mat2 dim 0 添加数据增强的文章就介绍到这了,希望我们推荐的答案对大家有所帮助,也希望大家多多支持!