我正在使用Microsoft Computer Vision阅读收据。
我得到的结果被排序到按列分组的区域中,例如数量,产品名称,数量分别在三个不同的区域。
如果整个产品列表是一个区域,而每一行都是一个产品,我希望。
有没有什么方法可以配置Computer Vision来完成此任务,或者更有可能的是,由于所有单词的位置都可用,因此可以在结果的后处理中使用任何好的技术或库。
波纹管是收据的图像和计算机视觉的结果。
{
"language": "sv",
"textAngle": 2.0999999999999632,
"orientation": "Up",
"regions": [
{
"boundingBox": "1012,450,660,326",
"lines": [
{
"boundingBox": "1362,450,76,30",
"words": [
{
"boundingBox": "1362,450,76,30",
"text": "JULA"
}
]
},
{
"boundingBox": "1207,486,465,49",
"words": [
{
"boundingBox": "1207,502,172,33",
"text": "Ekslinsan"
},
{
"boundingBox": "1400,497,51,30",
"text": "3B,"
},
{
"boundingBox": "1479,491,95,33",
"text": "25467"
},
{
"boundingBox": "1595,486,77,32",
"text": "VALA"
}
]
},
{
"boundingBox": "1304,539,265,38",
"words": [
{
"boundingBox": "1304,539,265,38",
"text": "SE5S6944785601"
}
]
},
{
"boundingBox": "1245,584,369,44",
"words": [
{
"boundingBox": "1245,594,148,34",
"text": "Telefon:"
},
{
"boundingBox": "1421,584,193,37",
"text": "042-324040"
}
]
},
{
"boundingBox": "1012,695,269,35",
"words": [
{
"boundingBox": "1012,702,75,28",
"text": "Kund"
},
{
"boundingBox": "1109,695,172,33",
"text": "072202787"
}
]
},
{
"boundingBox": "1109,738,289,38",
"words": [
{
"boundingBox": "1109,744,133,32",
"text": "LILLVIK"
},
{
"boundingBox": "1265,738,133,32",
"text": "ANDREAS"
}
]
}
]
},
{
"boundingBox": "1085,845,14,516",
"lines": [
{
"boundingBox": "1090,845,9,29",
"words": [
{
"boundingBox": "1090,845,9,29",
"text": "1"
}
]
},
{
"boundingBox": "1087,1037,9,28",
"words": [
{
"boundingBox": "1087,1037,9,28",
"text": "1"
}
]
},
{
"boundingBox": "1086,1133,9,27",
"words": [
{
"boundingBox": "1086,1133,9,27",
"text": "I"
}
]
},
{
"boundingBox": "1085,1332,9,29",
"words": [
{
"boundingBox": "1085,1332,9,29",
"text": "1"
}
]
}
]
},
{
"boundingBox": "1122,839,454,573",
"lines": [
{
"boundingBox": "1128,839,173,33",
"words": [
{
"boundingBox": "1128,843,36,29",
"text": "ST"
},
{
"boundingBox": "1186,839,115,30",
"text": "661107"
}
]
},
{
"boundingBox": "1127,879,389,41",
"words": [
{
"boundingBox": "1127,887,232,33",
"text": "VERKTYGSLÅDR"
},
{
"boundingBox": "1382,883,36,28",
"text": "JC"
},
{
"boundingBox": "1441,882,16,26",
"text": "5"
},
{
"boundingBox": "1481,879,35,28",
"text": "ÅR"
}
]
},
{
"boundingBox": "1126,935,173,34",
"words": [
{
"boundingBox": "1126,940,36,29",
"text": "ST"
},
{
"boundingBox": "1187,935,112,32",
"text": "181460"
}
]
},
{
"boundingBox": "1126,967,450,50",
"words": [
{
"boundingBox": "1126,987,75,30",
"text": "BORR"
},
{
"boundingBox": "1224,977,193,35",
"text": "GLAS/KRKEL"
},
{
"boundingBox": "1440,974,16,27",
"text": "ø"
},
{
"boundingBox": "1482,971,34,27",
"text": "10"
},
{
"boundingBox": "1539,967,37,28",
"text": "MM"
}
]
},
{
"boundingBox": "1125,1027,173,37",
"words": [
{
"boundingBox": "1125,1036,36,28",
"text": "ST"
},
{
"boundingBox": "1185,1027,113,34",
"text": "181740"
}
]
},
{
"boundingBox": "1124,1062,432,49",
"words": [
{
"boundingBox": "1124,1071,252,40",
"text": "UNIVERSALBORR"
},
{
"boundingBox": "1400,1066,96,32",
"text": "8X120"
},
{
"boundingBox": "1519,1062,37,30",
"text": "MM"
}
]
},
{
"boundingBox": "1123,1125,175,34",
"words": [
{
"boundingBox": "1123,1129,36,30",
"text": "ST"
},
{
"boundingBox": "1183,1125,115,32",
"text": "181738"
}
]
},
{
"boundingBox": "1122,1164,416,44",
"words": [
{
"boundingBox": "1122,1170,255,38",
"text": "UNIVERSRLBORR"
},
{
"boundingBox": "1501,1164,37,31",
"text": "MM"
}
]
},
{
"boundingBox": "1123,1225,170,33",
"words": [
{
"boundingBox": "1123,1228,36,30",
"text": "ST"
},
{
"boundingBox": "1183,1225,110,32",
"text": "316401"
}
]
},
{
"boundingBox": "1123,1270,355,39",
"words": [
{
"boundingBox": "1123,1275,216,34",
"text": "LÅSCYLINDER"
},
{
"boundingBox": "1362,1270,116,33",
"text": "2-PACK"
}
]
},
{
"boundingBox": "1123,1327,177,34",
"words": [
{
"boundingBox": "1123,1330,37,31",
"text": "ST"
},
{
"boundingBox": "1183,1327,117,32",
"text": "396026"
}
]
},
{
"boundingBox": "1124,1373,356,39",
"words": [
{
"boundingBox": "1124,1377,216,35",
"text": "LÅSCYLINDER"
},
{
"boundingBox": "1363,1373,117,33",
"text": "2-PRCK"
}
]
}
]
},
{
"boundingBox": "1644,820,118,524",
"lines": [
{
"boundingBox": "1658,820,96,31",
"words": [
{
"boundingBox": "1658,820,96,31",
"text": "79,00"
}
]
},
{
"boundingBox": "1659,912,97,31",
"words": [
{
"boundingBox": "1659,916,50,27",
"text": "44,"
},
{
"boundingBox": "1719,912,37,28",
"text": "90"
}
]
},
{
"boundingBox": "1659,1004,98,32",
"words": [
{
"boundingBox": "1659,1007,51,29",
"text": "69,"
},
{
"boundingBox": "1720,1004,37,28",
"text": "90"
}
]
},
{
"boundingBox": "1661,1103,97,35",
"words": [
{
"boundingBox": "1661,1103,97,35",
"text": "49,90"
}
]
},
{
"boundingBox": "1644,1309,118,35",
"words": [
{
"boundingBox": "1644,1309,118,35",
"text": "299,00"
}
]
}
]
},
{
"boundingBox": "1064,1469,620,45",
"lines": [
{
"boundingBox": "1064,1469,620,45",
"words": [
{
"boundingBox": "1064,1481,237,33",
"text": "-Rabattcheck"
},
{
"boundingBox": "1324,1486,51,24",
"text": "nr:"
},
{
"boundingBox": "1384,1469,300,38",
"text": "935011035567095"
}
]
}
]
},
{
"boundingBox": "1123,1584,159,82",
"lines": [
{
"boundingBox": "1123,1584,159,33",
"words": [
{
"boundingBox": "1123,1584,159,33",
"text": "DELSUMMA"
}
]
},
{
"boundingBox": "1143,1635,116,31",
"words": [
{
"boundingBox": "1143,1635,116,31",
"text": "Rabatt"
}
]
}
]
},
{
"boundingBox": "1609,1570,180,189",
"lines": [
{
"boundingBox": "1609,1570,160,36",
"words": [
{
"boundingBox": "1609,1575,11,31",
"text": "|"
},
{
"boundingBox": "1648,1570,121,34",
"text": "041,70"
}
]
},
{
"boundingBox": "1690,1621,99,34",
"words": [
{
"boundingBox": "1690,1621,99,34",
"text": "50,00"
}
]
},
{
"boundingBox": "1651,1725,120,34",
"words": [
{
"boundingBox": "1651,1727,53,32",
"text": "991"
},
{
"boundingBox": "1715,1746,9,13",
"text": ","
},
{
"boundingBox": "1732,1725,39,32",
"text": "70"
}
]
}
]
},
{
"boundingBox": "992,1737,310,1226",
"lines": [
{
"boundingBox": "1123,1737,179,35",
"words": [
{
"boundingBox": "1123,1737,179,35",
"text": "SLUTSUMMA"
}
]
},
{
"boundingBox": "1036,2756,227,35",
"words": [
{
"boundingBox": "1036,2756,227,35",
"text": "Totalbelopp"
}
]
},
{
"boundingBox": "1140,2811,124,37",
"words": [
{
"boundingBox": "1140,2811,53,35",
"text": "991"
},
{
"boundingBox": "1207,2833,8,15",
"text": "/"
},
{
"boundingBox": "1225,2811,39,34",
"text": "70"
}
]
},
{
"boundingBox": "992,2927,271,36",
"words": [
{
"boundingBox": "992,2928,159,35",
"text": "Säljare:"
},
{
"boundingBox": "1182,2927,81,33",
"text": "7688"
}
]
}
]
},
{
"boundingBox": "1330,2754,145,92",
"lines": [
{
"boundingBox": "1330,2754,144,34",
"words": [
{
"boundingBox": "1330,2754,39,33",
"text": "Ex"
},
{
"boundingBox": "1394,2754,80,34",
"text": "Moms"
}
]
},
{
"boundingBox": "1352,2809,123,37",
"words": [
{
"boundingBox": "1352,2809,123,37",
"text": "793,36"
}
]
}
]
},
{
"boundingBox": "1563,2752,126,92",
"lines": [
{
"boundingBox": "1563,2752,125,33",
"words": [
{
"boundingBox": "1563,2752,82,33",
"text": "Moms"
},
{
"boundingBox": "1670,2755,18,27",
"text": "%"
}
]
},
{
"boundingBox": "1586,2808,103,36",
"words": [
{
"boundingBox": "1586,2808,103,36",
"text": "25,00"
}
]
}
]
},
{
"boundingBox": "1780,2751,123,93",
"lines": [
{
"boundingBox": "1820,2751,83,33",
"words": [
{
"boundingBox": "1820,2751,83,33",
"text": "Moms"
}
]
},
{
"boundingBox": "1780,2807,123,37",
"words": [
{
"boundingBox": "1780,2807,123,37",
"text": "198,34"
}
]
}
]
},
{
"boundingBox": "985,2924,966,573",
"lines": [
{
"boundingBox": "1523,2924,83,33",
"words": [
{
"boundingBox": "1523,2924,83,33",
"text": "7618"
}
]
},
{
"boundingBox": "1288,2926,167,33",
"words": [
{
"boundingBox": "1288,2939,17,7",
"text": "-"
},
{
"boundingBox": "1330,2926,125,33",
"text": "Sabina"
}
]
},
{
"boundingBox": "1182,2981,468,36",
"words": [
{
"boundingBox": "1182,2983,38,34",
"text": "24"
},
{
"boundingBox": "1245,2982,146,34",
"text": "oktober"
},
{
"boundingBox": "1416,2982,82,34",
"text": "2016"
},
{
"boundingBox": "1547,2982,10,33",
"text": "1"
},
{
"boundingBox": "1571,2981,79,34",
"text": "7:20"
}
]
},
{
"boundingBox": "991,2985,103,33",
"words": [
{
"boundingBox": "991,2985,103,33",
"text": "Datum"
}
]
},
{
"boundingBox": "1161,3040,403,34",
"words": [
{
"boundingBox": "1161,3040,96,34",
"text": "44601"
},
{
"boundingBox": "1288,3040,140,34",
"text": "Kvitto:"
},
{
"boundingBox": "1460,3040,104,34",
"text": "51756"
}
]
},
{
"boundingBox": "990,3042,103,33",
"words": [
{
"boundingBox": "990,3042,103,33",
"text": "Kassa"
}
]
},
{
"boundingBox": "1096,3157,728,40",
"words": [
{
"boundingBox": "1096,3159,105,38",
"text": "Spara"
},
{
"boundingBox": "1225,3157,163,39",
"text": "kvittot,"
},
{
"boundingBox": "1418,3157,127,39",
"text": "gäller"
},
{
"boundingBox": "1570,3169,63,26",
"text": "som"
},
{
"boundingBox": "1657,3158,167,39",
"text": "garanti."
}
]
},
{
"boundingBox": "1268,3217,388,39",
"words": [
{
"boundingBox": "1268,3217,103,39",
"text": "Öppet"
},
{
"boundingBox": "1397,3218,62,38",
"text": "köp"
},
{
"boundingBox": "1484,3218,41,37",
"text": "30"
},
{
"boundingBox": "1550,3218,106,38",
"text": "dager"
}
]
},
{
"boundingBox": "1290,3276,317,39",
"words": [
{
"boundingBox": "1290,3276,192,38",
"text": "VÄLKOMMEN"
},
{
"boundingBox": "1506,3278,101,37",
"text": "ÅTER!"
}
]
},
{
"boundingBox": "1116,3335,719,42",
"words": [
{
"boundingBox": "1116,3337,41,36",
"text": "Om"
},
{
"boundingBox": "1182,3335,82,38",
"text": "ditt"
},
{
"boundingBox": "1290,3346,84,28",
"text": "namn"
},
{
"boundingBox": "1398,3337,63,38",
"text": "och"
},
{
"boundingBox": "1485,3349,261,28",
"text": "personnummer"
},
{
"boundingBox": "1771,3338,64,37",
"text": "har"
}
]
},
{
"boundingBox": "1032,3395,894,42",
"words": [
{
"boundingBox": "1032,3397,146,36",
"text": "lämnats"
},
{
"boundingBox": "1204,3395,62,38",
"text": "för"
},
{
"boundingBox": "1290,3395,61,38",
"text": "att"
},
{
"boundingBox": "1377,3399,194,36",
"text": "genomföra"
},
{
"boundingBox": "1596,3399,61,36",
"text": "ett"
},
{
"boundingBox": "1685,3399,241,38",
"text": "JulaPro-köp"
}
]
},
{
"boundingBox": "985,3455,966,42",
"words": [
{
"boundingBox": "985,3456,193,37",
"text": "behandlar"
},
{
"boundingBox": "1203,3455,85,37",
"text": "Jula"
},
{
"boundingBox": "1312,3456,84,37",
"text": "dina"
},
{
"boundingBox": "1421,3458,195,39",
"text": "uppgifter"
},
{
"boundingBox": "1645,3462,12,33",
"text": "i"
},
{
"boundingBox": "1686,3458,173,38",
"text": "enlighet"
},
{
"boundingBox": "1886,3461,65,36",
"text": "med"
}
]
}
]
}
]
}
最佳答案
这不是计算机视觉问题,它不是NLP/文本模式识别问题。换句话说,所有OCR都不会做您想做的事;他们只从图像中提取文本。
通常的方法是收集许多不同类型的收据,研究其结构,然后使用基于规则的方法或基于机器学习的方法对每条信息进行分类。分类器可以具有{项目名称,项目价格,小计,总计,标题,其他}之类的类别。您可以使用边界框来形成网格单元,并将相邻单元用作特征。
教程
Applying OCR Technology for Receipt Recognition。
云服务
开源项目