【深度学习实战（17）】计算语义分割的性能指标mIOU

一、指标介绍

在训练语义分割模型时，我们不仅需要知道训练，验证损失，还想要知道性能指标。

二、计算流程

（1）读取验证集的图片和标签（mask图）
（2）对模型预测的特征图进行解码，获得预测的mask图
（3）创建num_class x num_class尺寸的混淆矩阵hist
（4）将标签mask图和预测mask图转换为numpy数组
（5）将两个numpy数组展平为一维数组，使用np.bincount逐像素计算，再reshape，结果累计在混淆矩阵hist中
（6）根据hist混淆矩阵，计算语义分割指标mIOU，PA_Recall，Precision

三、相应代码

（1）读取验证集的图片和标签（mask图）

for image_id in tqdm(self.image_ids):
    #-------------------------------#
    #   从文件中读取图像
    #-------------------------------#
    image_path  = os.path.join(self.dataset_path, "JPEGImages/"+image_id+".jpg")
    image       = Image.open(image_path)
    #------------------------------#
    #   获得预测特征图
    #------------------------------#
    image       = self.get_miou_png(image)
    image.save(os.path.join(pred_dir, image_id + ".png"))

（2）对模型预测的特征图进行解码，获得预测的mask图

#------------------------------#
#   获得预测特征图
#------------------------------#
image       = self.get_miou_png(image)
image.save(os.path.join(pred_dir, image_id + ".png"))


    def get_miou_png(self, image):
        #---------------------------------------------------------#
        #   在这里将图像转换成RGB图像，防止灰度图在预测时报错。
        #   代码仅仅支持RGB图像的预测，所有其它类型的图像都会转化成RGB
        #---------------------------------------------------------#
        image       = cvtColor(image)
        orininal_h  = np.array(image).shape[0]
        orininal_w  = np.array(image).shape[1]
        #---------------------------------------------------------#
        #   给图像增加灰条，实现不失真的resize
        #   也可以直接resize进行识别
        #---------------------------------------------------------#
        image_data, nw, nh  = resize_image(image, (self.input_shape[1],self.input_shape[0]))
        #---------------------------------------------------------#
        #   添加上batch_size维度
        #---------------------------------------------------------#
        image_data  = np.expand_dims(np.transpose(preprocess_input(np.array(image_data, np.float32)), (2, 0, 1)), 0)

        with torch.no_grad():
            images = torch.from_numpy(image_data)
            if self.cuda:
                images = images.cuda()
                
            #---------------------------------------------------#
            #   图片传入网络进行预测
            #---------------------------------------------------#
            pr = self.net(images)[0]
            #---------------------------------------------------#
            #   取出每一个像素点的种类
            #---------------------------------------------------#
            pr = F.softmax(pr.permute(1,2,0),dim = -1).cpu().numpy()
            #--------------------------------------#
            #   将灰条部分截取掉
            #--------------------------------------#
            pr = pr[int((self.input_shape[0] - nh) // 2) : int((self.input_shape[0] - nh) // 2 + nh), \
                    int((self.input_shape[1] - nw) // 2) : int((self.input_shape[1] - nw) // 2 + nw)]
            #---------------------------------------------------#
            #   进行图片的resize
            #---------------------------------------------------#
            pr = cv2.resize(pr, (orininal_w, orininal_h), interpolation = cv2.INTER_LINEAR)
            #---------------------------------------------------#
            #   取出每一个像素点的种类
            #---------------------------------------------------#
            pr = pr.argmax(axis=-1)
    
        image = Image.fromarray(np.uint8(pr))
        return image

（3）创建num_class x num_class尺寸的混淆矩阵hist

print('Num classes', num_classes)  
#-----------------------------------------#
#   创建一个全是0的矩阵，是一个混淆矩阵
#-----------------------------------------#
hist = np.zeros((num_classes, num_classes))

（4）将标签mask图和预测mask图转换为numpy数组

#------------------------------------------------#
#   读取每一个（图片-标签）对
#------------------------------------------------#
for ind in range(len(gt_imgs)): 
    #------------------------------------------------#
    #   读取一张图像分割结果，转化成numpy数组
    #------------------------------------------------#
    pred = np.array(Image.open(pred_imgs[ind]))  
    #------------------------------------------------#
    #   读取一张对应的标签，转化成numpy数组
    #------------------------------------------------#
    label = np.array(Image.open(gt_imgs[ind]))

（5）将两个numpy数组展平为一维数组，使用np.bincount逐像素计算，再reshape，结果累计在混淆矩阵hist中

# 如果图像分割结果与标签的大小不一样，这张图片就不计算
if len(label.flatten()) != len(pred.flatten()):  
    print(
        'Skipping: len(gt) = {:d}, len(pred) = {:d}, {:s}, {:s}'.format(
            len(label.flatten()), len(pred.flatten()), gt_imgs[ind],
            pred_imgs[ind]))
    continue

#------------------------------------------------#
#   对一张图片计算21×21的hist矩阵，并累加
#------------------------------------------------#
hist += fast_hist(label.flatten(), pred.flatten(), num_classes) 

def fast_hist(a, b, n):
    #--------------------------------------------------------------------------------#
    #   a是转化成一维数组的标签，形状(H×W,)；b是转化成一维数组的预测结果，形状(H×W,)
    #--------------------------------------------------------------------------------#
    k = (a >= 0) & (a < n)
    #--------------------------------------------------------------------------------#
    #   np.bincount计算了从0到n**2-1这n**2个数中每个数出现的次数，返回值形状(n, n)
    #   返回中，写对角线上的为分类正确的像素点
    #--------------------------------------------------------------------------------#
    return np.bincount(n * a[k].astype(int) + b[k], minlength=n ** 2).reshape(n, n)

（6）根据hist混淆矩阵，计算语义分割指标mIOU，PA_Recall，Precision

#------------------------------------------------#
#   计算所有验证集图片的逐类别mIoU值
#------------------------------------------------#
IoUs        = per_class_iu(hist)
PA_Recall   = per_class_PA_Recall(hist)
Precision   = per_class_Precision(hist)

四、完整代码

class EvalCallback():
    def __init__(self, net, input_shape, num_classes, image_ids, dataset_path, log_dir, cuda, \
            miou_out_path=".temp_miou_out", eval_flag=True, period=1):
        super(EvalCallback, self).__init__()
        
        self.net                = net
        self.input_shape        = input_shape
        self.num_classes        = num_classes
        self.image_ids          = image_ids
        self.dataset_path       = dataset_path
        self.log_dir            = log_dir
        self.cuda               = cuda
        self.miou_out_path      = miou_out_path
        self.eval_flag          = eval_flag
        self.period             = period
        
        self.image_ids          = [image_id.split()[0] for image_id in image_ids]
        self.mious      = [0]
        self.epoches    = [0]
        if self.eval_flag:
            with open(os.path.join(self.log_dir, "epoch_miou.txt"), 'a') as f:
                f.write(str(0))
                f.write("\n")

    def get_miou_png(self, image):
        #---------------------------------------------------------#
        #   在这里将图像转换成RGB图像，防止灰度图在预测时报错。
        #   代码仅仅支持RGB图像的预测，所有其它类型的图像都会转化成RGB
        #---------------------------------------------------------#
        image       = cvtColor(image)
        orininal_h  = np.array(image).shape[0]
        orininal_w  = np.array(image).shape[1]
        #---------------------------------------------------------#
        #   给图像增加灰条，实现不失真的resize
        #   也可以直接resize进行识别
        #---------------------------------------------------------#
        image_data, nw, nh  = resize_image(image, (self.input_shape[1],self.input_shape[0]))
        #---------------------------------------------------------#
        #   添加上batch_size维度
        #---------------------------------------------------------#
        image_data  = np.expand_dims(np.transpose(preprocess_input(np.array(image_data, np.float32)), (2, 0, 1)), 0)

        with torch.no_grad():
            images = torch.from_numpy(image_data)
            if self.cuda:
                images = images.cuda()
                
            #---------------------------------------------------#
            #   图片传入网络进行预测
            #---------------------------------------------------#
            pr = self.net(images)[0]
            #---------------------------------------------------#
            #   取出每一个像素点的种类
            #---------------------------------------------------#
            pr = F.softmax(pr.permute(1,2,0),dim = -1).cpu().numpy()
            #--------------------------------------#
            #   将灰条部分截取掉
            #--------------------------------------#
            pr = pr[int((self.input_shape[0] - nh) // 2) : int((self.input_shape[0] - nh) // 2 + nh), \
                    int((self.input_shape[1] - nw) // 2) : int((self.input_shape[1] - nw) // 2 + nw)]
            #---------------------------------------------------#
            #   进行图片的resize
            #---------------------------------------------------#
            pr = cv2.resize(pr, (orininal_w, orininal_h), interpolation = cv2.INTER_LINEAR)
            #---------------------------------------------------#
            #   取出每一个像素点的种类
            #---------------------------------------------------#
            pr = pr.argmax(axis=-1)
    
        image = Image.fromarray(np.uint8(pr))
        return image
    
    def on_epoch_end(self, epoch, model_eval):
        if epoch % self.period == 0 and self.eval_flag:
            self.net    = model_eval
            gt_dir      = os.path.join(self.dataset_path, "SegmentationClass/")
            pred_dir    = os.path.join(self.miou_out_path, 'detection-results')
            if not os.path.exists(self.miou_out_path):
                os.makedirs(self.miou_out_path)
            if not os.path.exists(pred_dir):
                os.makedirs(pred_dir)
            print("Get miou.")
            for image_id in tqdm(self.image_ids):
                #-------------------------------#
                #   从文件中读取图像
                #-------------------------------#
                image_path  = os.path.join(self.dataset_path, "JPEGImages/"+image_id+".jpg")
                image       = Image.open(image_path)
                #------------------------------#
                #   获得预测特征图
                #------------------------------#
                image       = self.get_miou_png(image)
                image.save(os.path.join(pred_dir, image_id + ".png"))
                        
            print("Calculate miou.")
            _, IoUs, _, _ = compute_mIoU(gt_dir, pred_dir, self.image_ids, self.num_classes, None)  # 执行计算mIoU的函数
            temp_miou = np.nanmean(IoUs) * 100

            self.mious.append(temp_miou)
            self.epoches.append(epoch)

            with open(os.path.join(self.log_dir, "epoch_miou.txt"), 'a') as f:
                f.write(str(temp_miou))
                f.write("\n")
            
            plt.figure()
            plt.plot(self.epoches, self.mious, 'red', linewidth = 2, label='train miou')

            plt.grid(True)
            plt.xlabel('Epoch')
            plt.ylabel('Miou')
            plt.title('A Miou Curve')
            plt.legend(loc="upper right")

            plt.savefig(os.path.join(self.log_dir, "epoch_miou.png"))
            plt.cla()
            plt.close("all")

            print("Get miou done.")
            shutil.rmtree(self.miou_out_path)

m0_51579041

【深度学习实战（17）】计算语义分割的性能指标mIOU

一、指标介绍

二、计算流程

三、相应代码

四、完整代码