最近,我一直在使用tensorflow对象检测api Zoo中的一些模型。
主要是coco ssd_inception和oidv4 ssd_mobilenet。问题是要标记图像上的检测结果。
作为示例,当我在this图像上运行oidv4检测器时,它会显示出现的标签。 This是我从oidv下载的类文件。
当我在this图像上使用coco模型时,会引发另一个问题:
label = "{}: {:.2f}%".format(CLASSES[idx],
IndexError: list index out of range
显然,当 cocoa 只有80个要检测的物体时,它将此图像标记为索引84。我也让我的 cocoa classes文件。
我当前用于检测的代码:
# import the necessary packages
from imutils.video import VideoStream
from imutils.video import FPS
import numpy as np
import argparse
import imutils
import time
import cv2
# construct the argument parse and parse the arguments
ap = argparse.ArgumentParser()
ap.add_argument("-i", "--image", required=True,
help="path to image")
ap.add_argument("-c", "--confidence", type=float, default=0.3,
help="minimum probability to filter weak detections")
args = vars(ap.parse_args())
# Leemos las clases disponibles en openImages
CLASSES = [line.rstrip('\n') for line in open('classes.txt', 'r')]
print(CLASSES)
# Le damos colores a las cajas para cada clase
COLORS = np.random.uniform(0, 255, size=(len(CLASSES), 3))
# Importamos el modelo de red
cvNet = cv2.dnn.readNetFromTensorflow('frozen_inference_graph.pb', 'graph.pbtxt')
# Leemos una imagen
img = cv2.imread(args["image"])
# Obtenemos las dimensiones de la imagen
h = img.shape[0] # Alto
w = img.shape[1] # Ancho
cvNet.setInput(cv2.dnn.blobFromImage(img, size=(300, 300), swapRB=True, crop=False))
detections = cvNet.forward()
# loop over the detections
for i in np.arange(0, detections.shape[2]):
# extract the confidence (i.e., probability) associated with
# the prediction
confidence = detections[0, 0, i, 2]
# filter out weak detections by ensuring the `confidence` is
# greater than the minimum confidence
if confidence > args["confidence"]:
# extract the index of the class label from the
# `detections`, then compute the (x, y)-coordinates of
# the bounding box for the object
idx = int(detections[0, 0, i, 1])
print(idx )
box = detections[0, 0, i, 3:7] * np.array([w, h, w, h])
(startX, startY, endX, endY) = box.astype("int")
# draw the prediction on the frame
label = "{}: {:.2f}%".format(CLASSES[idx],
confidence * 100)
cv2.rectangle(img, (startX, startY), (endX, endY),
COLORS[idx], 2)
y = startY - 15 if startY - 15 > 15 else startY + 15
cv2.putText(img, label, (startX, y),
cv2.FONT_HERSHEY_SIMPLEX, 0.5, COLORS[idx], 2)
cv2.imshow('img', img)
cv2.waitKey()
在此先感谢您的帮助!
最佳答案
这里的问题是因为COCO dataset
将80 class names
放入90 class index
中。
您可以使用90类索引来创建标签,并使用预测的索引值进行映射。
以下是COCO dataset
的90个类的列表。
classes_90 = ["background", "person", "bicycle", "car", "motorcycle",
"airplane", "bus", "train", "truck", "boat", "traffic light", "fire hydrant",
"unknown", "stop sign", "parking meter", "bench", "bird", "cat", "dog", "horse",
"sheep", "cow", "elephant", "bear", "zebra", "giraffe", "unknown", "backpack",
"umbrella", "unknown", "unknown", "handbag", "tie", "suitcase", "frisbee", "skis",
"snowboard", "sports ball", "kite", "baseball bat", "baseball glove", "skateboard",
"surfboard", "tennis racket", "bottle", "unknown", "wine glass", "cup", "fork", "knife",
"spoon", "bowl", "banana", "apple", "sandwich", "orange", "broccoli", "carrot", "hot dog",
"pizza", "donut", "cake", "chair", "couch", "potted plant", "bed", "unknown", "dining table",
"unknown", "unknown", "toilet", "unknown", "tv", "laptop", "mouse", "remote", "keyboard",
"cell phone", "microwave", "oven", "toaster", "sink", "refrigerator", "unknown",
"book", "clock", "vase", "scissors", "teddy bear", "hair drier", "toothbrush" ]
以下是修改后的代码,可以解决您的问题。
# import the necessary packages
from imutils.video import VideoStream
from imutils.video import FPS
import numpy as np
import argparse
import imutils
import time
import cv2
# construct the argument parse and parse the arguments
ap = argparse.ArgumentParser()
ap.add_argument("-i", "--image", required=True,
help="path to image")
ap.add_argument("-c", "--confidence", type=float, default=0.3,
help="minimum probability to filter weak detections")
args = vars(ap.parse_args())
# Leemos las clases disponibles en openImages
CLASSES = classes_90 #New list of classess with 90 classess.
print(CLASSES)
# Le damos colores a las cajas para cada clase
COLORS = np.random.uniform(0, 255, size=(len(CLASSES), 3))
# Importamos el modelo de red
cvNet = cv2.dnn.readNetFromTensorflow('frozen_inference_graph.pb', 'graph.pbtxt')
# Leemos una imagen
img = cv2.imread(args["image"])
# Obtenemos las dimensiones de la imagen
h = img.shape[0] # Alto
w = img.shape[1] # Ancho
cvNet.setInput(cv2.dnn.blobFromImage(img, size=(300, 300), swapRB=True, crop=False))
detections = cvNet.forward()
# loop over the detections
for i in np.arange(0, detections.shape[2]):
# extract the confidence (i.e., probability) associated with
# the prediction
confidence = detections[0, 0, i, 2]
# filter out weak detections by ensuring the `confidence` is
# greater than the minimum confidence
if confidence > args["confidence"]:
# extract the index of the class label from the
# `detections`, then compute the (x, y)-coordinates of
# the bounding box for the object
idx = int(detections[0, 0, i, 1])
print(idx )
box = detections[0, 0, i, 3:7] * np.array([w, h, w, h])
(startX, startY, endX, endY) = box.astype("int")
# draw the prediction on the frame
label = "{}: {:.2f}%".format(CLASSES[idx],
confidence * 100)
cv2.rectangle(img, (startX, startY), (endX, endY),
COLORS[idx], 2)
y = startY - 15 if startY - 15 > 15 else startY + 15
cv2.putText(img, label, (startX, y),
cv2.FONT_HERSHEY_SIMPLEX, 0.5, COLORS[idx], 2)
cv2.imshow('img', img)
cv2.waitKey()
关于python - 如何从Zoo获得Tensorflow模型的类名,我们在Stack Overflow上找到一个类似的问题:https://stackoverflow.com/questions/59397139/