python - Python:在视频中查找笔迹

您是否知道一种算法，可以看到图像上有手写？我对知道手写的内容不感兴趣，只是知道有一个礼物？
我有一个录像带，有人在幻灯片上手写。我的目标是确定已经用手写填充了多少幻灯片。

相关视频可以在此处下载:http://www.filedropper.com/00_6
对于这个特定的视频，Quantify how much a slide has been filled with handwriting中已经提出了一个很好的解决方案
该解决方案基于将用于手写的特定颜色的数量相加。但是，如果笔迹不是蓝色，而是非笔迹上也可以找到的任何其他颜色，则此方法将不起作用。
因此，我有兴趣知道，是否存在确定图像上是否存在笔迹的更通用的解决方案？
到目前为止，我所做的是:
我当时想提取图像的轮廓，然后根据轮廓的弯曲程度以某种方式检测手写部分(但我不知道该怎么做)。但是，这可能不是最好的主意，因为它并不总是正确的……

import cv2
import matplotlib.pyplot as plt



img = cv2.imread(PATH TO IMAGE)
print("img shape=", img.shape)
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)

cv2.imshow("image", gray)
cv2.waitKey(1)
#### extract all contours
# Find Canny edges
edged = cv2.Canny(gray, 30, 200)
cv2.waitKey(0)

# Finding Contours
# Use a copy of the image e.g. edged.copy()
# since findContours alters the image
contours, hierarchy = cv2.findContours(edged,
    cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE)

cv2.imshow('Canny Edges After Contouring', edged)
cv2.waitKey(0)

print("Number of Contours found = " + str(len(contours)))

# Draw all contours
# -1 signifies drawing all contours
cv2.drawContours(img, contours, -1, (0, 255, 0), 3)

cv2.imshow('Contours', img)
cv2.waitKey(0)

最佳答案

您可以通过掩盖模板中的像素来识别手写占用的空间，然后对其他帧与模板之间的差异进行相同的处理。您可以为此使用扩张，开放和阈值设置。
让我们从template开始。让我们确定我们将要掩盖的部分:

import cv2
import numpy as np

template = cv2.imread('template.jpg')

现在，让我们扩大占用的像素以形成一个区域，稍后我们将对其进行遮罩(隐藏):

template = cv2.cvtColor(template, cv2.COLOR_BGR2GRAY)
kernel = np.ones((5, 5),np.uint8)
dilation = cv2.dilate(255 - template, kernel,iterations = 5)

然后，我们将阈值转换为黑白蒙版:

_, thresh = cv2.threshold(dilation,25,255,cv2.THRESH_BINARY_INV)

在以后的帧中，我们将所有这些像素都变成白色，从而从图片中减去该蒙版。例如:

import numpy as np
import cv2
vidcap = cv2.VideoCapture('0_0.mp4')
success,image = vidcap.read()
count = 0
frames = []

while count < 500:
  frames.append(image)
  success,image = vidcap.read()
  count += 1

mask = np.where(thresh == 0)

example = frames[300]
example[mask] = [255, 255, 255]
cv2.imshow('', example)
cv2.waitKey(0)

现在，我们将创建一个函数，该函数将返回模板和给定图片之间的差异。我们还将使用open来消除剩下的单个像素，这会使它变得丑陋。

def difference_with_mask(image):
    grayscale = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    kernel = np.ones((5, 5), np.uint8)
    dilation = cv2.dilate(255 - grayscale, kernel, iterations=5)
    _, thresh = cv2.threshold(dilation, 25, 255, cv2.THRESH_BINARY_INV)
    thresh[mask] = 255
    closing = cv2.morphologyEx(thresh, cv2.MORPH_CLOSE, kernel)
    return closing

cv2.imshow('', difference_with_mask(frames[400]))
cv2.waitKey(0)

为了解决您不希望将手检测为手写的事实，我建议您不要对每个单独的帧使用遮罩，而应使用最后30帧中的第15帧的95％...挂起。看这个:

results = []
for ix, frame in enumerate(frames):
    if ix % 30 == 0:
        history.append(frame)
    results.append(np.quantile(history, 0.95, axis=0))
    print(ix)

现在，示例框架变为此示例(移开了手，因为它在后30个第15帧中几乎不存在):

如您所见，手写的一小部分丢失了。稍后会出现，因为我们正在执行与时间相关的百分位数转换。稍后您将看到:在我的第18400帧示例中，出现了上图中缺少的文本。然后，您可以使用我提供的功能，结果如下:

现在我们开始!请注意，这种解决方案(不包括手)将需要更长的时间进行计算，因为需要完成一些计算。仅使用不关心手的图像就可以立即进行计算，以至于您可以在网络摄像头feed上实时运行它。
最终示例:
这是框架18,400:

最终图片:

如果希望遮罩将文本更细地包裹起来，可以使用此功能:

完整代码:

import os
import numpy as np
import cv2
vidcap = cv2.VideoCapture('0_0.mp4')
success,image = vidcap.read()
count = 0
from collections import deque
frames = deque(maxlen=700)

while count < 500:
  frames.append(image)
  success,image = vidcap.read()
  count += 1

template = cv2.imread('template.jpg')
template = cv2.cvtColor(template, cv2.COLOR_BGR2GRAY)
kernel = np.ones((5, 5),np.uint8)
dilation = cv2.dilate(255 - template, kernel,iterations = 5)

cv2.imwrite('dilation.jpg', dilation)
cv2.imshow('', dilation)
cv2.waitKey(0)

_, thresh = cv2.threshold(dilation,25,255,cv2.THRESH_BINARY_INV)
cv2.imwrite('thresh.jpg', thresh)
cv2.imshow('', thresh)
cv2.waitKey(0)

mask = np.where(thresh == 0)

example = frames[400]
cv2.imwrite('original.jpg', example)
cv2.imshow('', example)
cv2.waitKey(0)

example[mask] = 255
cv2.imwrite('example_masked.jpg', example)
cv2.imshow('', example)
cv2.waitKey(0)

def difference_with_mask(image):
    grayscale = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    kernel = np.ones((5, 5), np.uint8)
    dilation = cv2.dilate(255 - grayscale, kernel, iterations=5)
    _, thresh = cv2.threshold(dilation, 25, 255, cv2.THRESH_BINARY_INV)
    thresh[mask] = 255
    closing = cv2.morphologyEx(thresh, cv2.MORPH_CLOSE, kernel)
    return closing


cv2.imshow('', difference_with_mask(frames[400]))
cv2.waitKey(0)

masked_example = difference_with_mask(frames[400])
cv2.imwrite('masked_example.jpg', masked_example)

from collections import deque
history = deque(maxlen=15)

results = []
for ix, frame in enumerate(frames):
    if ix % 30 == 0:
        history.append(frame)
    results.append(np.quantile(history, 0.95, axis=0))
    print(ix)
    if ix > 500:
        break


cv2.imshow('', frames[400])
cv2.waitKey(0)

cv2.imshow('', results[400].astype(np.uint8))
cv2.imwrite('percentiled_frame.jpg', results[400].astype(np.uint8))
cv2.waitKey(0)

cv2.imshow('', difference_with_mask(results[400].astype(np.uint8)))
cv2.imwrite('final.jpg', difference_with_mask(results[400].astype(np.uint8)))
cv2.waitKey(0)