我正在使用Microsoft Computer Vision阅读收据。

我得到的结果被排序到按列分组的区域中,例如数量,产品名称,数量分别在三个不同的区域。

如果整个产品列表是一个区域,而每一行都是一个产品,我希望。

有没有什么方法可以配置Computer Vision来完成此任务,或者更有可能的是,由于所有单词的位置都可用,因此可以在结果的后处理中使用任何好的技术或库。

波纹管是收据的图像和计算机视觉的结果。

c# - 处理OCR/计算机视觉结果以匹配收据结构-LMLPHP

{
  "language": "sv",
  "textAngle": 2.0999999999999632,
  "orientation": "Up",
  "regions": [
    {
      "boundingBox": "1012,450,660,326",
      "lines": [
        {
          "boundingBox": "1362,450,76,30",
          "words": [
            {
              "boundingBox": "1362,450,76,30",
              "text": "JULA"
            }
          ]
        },
        {
          "boundingBox": "1207,486,465,49",
          "words": [
            {
              "boundingBox": "1207,502,172,33",
              "text": "Ekslinsan"
            },
            {
              "boundingBox": "1400,497,51,30",
              "text": "3B,"
            },
            {
              "boundingBox": "1479,491,95,33",
              "text": "25467"
            },
            {
              "boundingBox": "1595,486,77,32",
              "text": "VALA"
            }
          ]
        },
        {
          "boundingBox": "1304,539,265,38",
          "words": [
            {
              "boundingBox": "1304,539,265,38",
              "text": "SE5S6944785601"
            }
          ]
        },
        {
          "boundingBox": "1245,584,369,44",
          "words": [
            {
              "boundingBox": "1245,594,148,34",
              "text": "Telefon:"
            },
            {
              "boundingBox": "1421,584,193,37",
              "text": "042-324040"
            }
          ]
        },
        {
          "boundingBox": "1012,695,269,35",
          "words": [
            {
              "boundingBox": "1012,702,75,28",
              "text": "Kund"
            },
            {
              "boundingBox": "1109,695,172,33",
              "text": "072202787"
            }
          ]
        },
        {
          "boundingBox": "1109,738,289,38",
          "words": [
            {
              "boundingBox": "1109,744,133,32",
              "text": "LILLVIK"
            },
            {
              "boundingBox": "1265,738,133,32",
              "text": "ANDREAS"
            }
          ]
        }
      ]
    },
    {
      "boundingBox": "1085,845,14,516",
      "lines": [
        {
          "boundingBox": "1090,845,9,29",
          "words": [
            {
              "boundingBox": "1090,845,9,29",
              "text": "1"
            }
          ]
        },
        {
          "boundingBox": "1087,1037,9,28",
          "words": [
            {
              "boundingBox": "1087,1037,9,28",
              "text": "1"
            }
          ]
        },
        {
          "boundingBox": "1086,1133,9,27",
          "words": [
            {
              "boundingBox": "1086,1133,9,27",
              "text": "I"
            }
          ]
        },
        {
          "boundingBox": "1085,1332,9,29",
          "words": [
            {
              "boundingBox": "1085,1332,9,29",
              "text": "1"
            }
          ]
        }
      ]
    },
    {
      "boundingBox": "1122,839,454,573",
      "lines": [
        {
          "boundingBox": "1128,839,173,33",
          "words": [
            {
              "boundingBox": "1128,843,36,29",
              "text": "ST"
            },
            {
              "boundingBox": "1186,839,115,30",
              "text": "661107"
            }
          ]
        },
        {
          "boundingBox": "1127,879,389,41",
          "words": [
            {
              "boundingBox": "1127,887,232,33",
              "text": "VERKTYGSLÅDR"
            },
            {
              "boundingBox": "1382,883,36,28",
              "text": "JC"
            },
            {
              "boundingBox": "1441,882,16,26",
              "text": "5"
            },
            {
              "boundingBox": "1481,879,35,28",
              "text": "ÅR"
            }
          ]
        },
        {
          "boundingBox": "1126,935,173,34",
          "words": [
            {
              "boundingBox": "1126,940,36,29",
              "text": "ST"
            },
            {
              "boundingBox": "1187,935,112,32",
              "text": "181460"
            }
          ]
        },
        {
          "boundingBox": "1126,967,450,50",
          "words": [
            {
              "boundingBox": "1126,987,75,30",
              "text": "BORR"
            },
            {
              "boundingBox": "1224,977,193,35",
              "text": "GLAS/KRKEL"
            },
            {
              "boundingBox": "1440,974,16,27",
              "text": "ø"
            },
            {
              "boundingBox": "1482,971,34,27",
              "text": "10"
            },
            {
              "boundingBox": "1539,967,37,28",
              "text": "MM"
            }
          ]
        },
        {
          "boundingBox": "1125,1027,173,37",
          "words": [
            {
              "boundingBox": "1125,1036,36,28",
              "text": "ST"
            },
            {
              "boundingBox": "1185,1027,113,34",
              "text": "181740"
            }
          ]
        },
        {
          "boundingBox": "1124,1062,432,49",
          "words": [
            {
              "boundingBox": "1124,1071,252,40",
              "text": "UNIVERSALBORR"
            },
            {
              "boundingBox": "1400,1066,96,32",
              "text": "8X120"
            },
            {
              "boundingBox": "1519,1062,37,30",
              "text": "MM"
            }
          ]
        },
        {
          "boundingBox": "1123,1125,175,34",
          "words": [
            {
              "boundingBox": "1123,1129,36,30",
              "text": "ST"
            },
            {
              "boundingBox": "1183,1125,115,32",
              "text": "181738"
            }
          ]
        },
        {
          "boundingBox": "1122,1164,416,44",
          "words": [
            {
              "boundingBox": "1122,1170,255,38",
              "text": "UNIVERSRLBORR"
            },
            {
              "boundingBox": "1501,1164,37,31",
              "text": "MM"
            }
          ]
        },
        {
          "boundingBox": "1123,1225,170,33",
          "words": [
            {
              "boundingBox": "1123,1228,36,30",
              "text": "ST"
            },
            {
              "boundingBox": "1183,1225,110,32",
              "text": "316401"
            }
          ]
        },
        {
          "boundingBox": "1123,1270,355,39",
          "words": [
            {
              "boundingBox": "1123,1275,216,34",
              "text": "LÅSCYLINDER"
            },
            {
              "boundingBox": "1362,1270,116,33",
              "text": "2-PACK"
            }
          ]
        },
        {
          "boundingBox": "1123,1327,177,34",
          "words": [
            {
              "boundingBox": "1123,1330,37,31",
              "text": "ST"
            },
            {
              "boundingBox": "1183,1327,117,32",
              "text": "396026"
            }
          ]
        },
        {
          "boundingBox": "1124,1373,356,39",
          "words": [
            {
              "boundingBox": "1124,1377,216,35",
              "text": "LÅSCYLINDER"
            },
            {
              "boundingBox": "1363,1373,117,33",
              "text": "2-PRCK"
            }
          ]
        }
      ]
    },
    {
      "boundingBox": "1644,820,118,524",
      "lines": [
        {
          "boundingBox": "1658,820,96,31",
          "words": [
            {
              "boundingBox": "1658,820,96,31",
              "text": "79,00"
            }
          ]
        },
        {
          "boundingBox": "1659,912,97,31",
          "words": [
            {
              "boundingBox": "1659,916,50,27",
              "text": "44,"
            },
            {
              "boundingBox": "1719,912,37,28",
              "text": "90"
            }
          ]
        },
        {
          "boundingBox": "1659,1004,98,32",
          "words": [
            {
              "boundingBox": "1659,1007,51,29",
              "text": "69,"
            },
            {
              "boundingBox": "1720,1004,37,28",
              "text": "90"
            }
          ]
        },
        {
          "boundingBox": "1661,1103,97,35",
          "words": [
            {
              "boundingBox": "1661,1103,97,35",
              "text": "49,90"
            }
          ]
        },
        {
          "boundingBox": "1644,1309,118,35",
          "words": [
            {
              "boundingBox": "1644,1309,118,35",
              "text": "299,00"
            }
          ]
        }
      ]
    },
    {
      "boundingBox": "1064,1469,620,45",
      "lines": [
        {
          "boundingBox": "1064,1469,620,45",
          "words": [
            {
              "boundingBox": "1064,1481,237,33",
              "text": "-Rabattcheck"
            },
            {
              "boundingBox": "1324,1486,51,24",
              "text": "nr:"
            },
            {
              "boundingBox": "1384,1469,300,38",
              "text": "935011035567095"
            }
          ]
        }
      ]
    },
    {
      "boundingBox": "1123,1584,159,82",
      "lines": [
        {
          "boundingBox": "1123,1584,159,33",
          "words": [
            {
              "boundingBox": "1123,1584,159,33",
              "text": "DELSUMMA"
            }
          ]
        },
        {
          "boundingBox": "1143,1635,116,31",
          "words": [
            {
              "boundingBox": "1143,1635,116,31",
              "text": "Rabatt"
            }
          ]
        }
      ]
    },
    {
      "boundingBox": "1609,1570,180,189",
      "lines": [
        {
          "boundingBox": "1609,1570,160,36",
          "words": [
            {
              "boundingBox": "1609,1575,11,31",
              "text": "|"
            },
            {
              "boundingBox": "1648,1570,121,34",
              "text": "041,70"
            }
          ]
        },
        {
          "boundingBox": "1690,1621,99,34",
          "words": [
            {
              "boundingBox": "1690,1621,99,34",
              "text": "50,00"
            }
          ]
        },
        {
          "boundingBox": "1651,1725,120,34",
          "words": [
            {
              "boundingBox": "1651,1727,53,32",
              "text": "991"
            },
            {
              "boundingBox": "1715,1746,9,13",
              "text": ","
            },
            {
              "boundingBox": "1732,1725,39,32",
              "text": "70"
            }
          ]
        }
      ]
    },
    {
      "boundingBox": "992,1737,310,1226",
      "lines": [
        {
          "boundingBox": "1123,1737,179,35",
          "words": [
            {
              "boundingBox": "1123,1737,179,35",
              "text": "SLUTSUMMA"
            }
          ]
        },
        {
          "boundingBox": "1036,2756,227,35",
          "words": [
            {
              "boundingBox": "1036,2756,227,35",
              "text": "Totalbelopp"
            }
          ]
        },
        {
          "boundingBox": "1140,2811,124,37",
          "words": [
            {
              "boundingBox": "1140,2811,53,35",
              "text": "991"
            },
            {
              "boundingBox": "1207,2833,8,15",
              "text": "/"
            },
            {
              "boundingBox": "1225,2811,39,34",
              "text": "70"
            }
          ]
        },
        {
          "boundingBox": "992,2927,271,36",
          "words": [
            {
              "boundingBox": "992,2928,159,35",
              "text": "Säljare:"
            },
            {
              "boundingBox": "1182,2927,81,33",
              "text": "7688"
            }
          ]
        }
      ]
    },
    {
      "boundingBox": "1330,2754,145,92",
      "lines": [
        {
          "boundingBox": "1330,2754,144,34",
          "words": [
            {
              "boundingBox": "1330,2754,39,33",
              "text": "Ex"
            },
            {
              "boundingBox": "1394,2754,80,34",
              "text": "Moms"
            }
          ]
        },
        {
          "boundingBox": "1352,2809,123,37",
          "words": [
            {
              "boundingBox": "1352,2809,123,37",
              "text": "793,36"
            }
          ]
        }
      ]
    },
    {
      "boundingBox": "1563,2752,126,92",
      "lines": [
        {
          "boundingBox": "1563,2752,125,33",
          "words": [
            {
              "boundingBox": "1563,2752,82,33",
              "text": "Moms"
            },
            {
              "boundingBox": "1670,2755,18,27",
              "text": "%"
            }
          ]
        },
        {
          "boundingBox": "1586,2808,103,36",
          "words": [
            {
              "boundingBox": "1586,2808,103,36",
              "text": "25,00"
            }
          ]
        }
      ]
    },
    {
      "boundingBox": "1780,2751,123,93",
      "lines": [
        {
          "boundingBox": "1820,2751,83,33",
          "words": [
            {
              "boundingBox": "1820,2751,83,33",
              "text": "Moms"
            }
          ]
        },
        {
          "boundingBox": "1780,2807,123,37",
          "words": [
            {
              "boundingBox": "1780,2807,123,37",
              "text": "198,34"
            }
          ]
        }
      ]
    },
    {
      "boundingBox": "985,2924,966,573",
      "lines": [
        {
          "boundingBox": "1523,2924,83,33",
          "words": [
            {
              "boundingBox": "1523,2924,83,33",
              "text": "7618"
            }
          ]
        },
        {
          "boundingBox": "1288,2926,167,33",
          "words": [
            {
              "boundingBox": "1288,2939,17,7",
              "text": "-"
            },
            {
              "boundingBox": "1330,2926,125,33",
              "text": "Sabina"
            }
          ]
        },
        {
          "boundingBox": "1182,2981,468,36",
          "words": [
            {
              "boundingBox": "1182,2983,38,34",
              "text": "24"
            },
            {
              "boundingBox": "1245,2982,146,34",
              "text": "oktober"
            },
            {
              "boundingBox": "1416,2982,82,34",
              "text": "2016"
            },
            {
              "boundingBox": "1547,2982,10,33",
              "text": "1"
            },
            {
              "boundingBox": "1571,2981,79,34",
              "text": "7:20"
            }
          ]
        },
        {
          "boundingBox": "991,2985,103,33",
          "words": [
            {
              "boundingBox": "991,2985,103,33",
              "text": "Datum"
            }
          ]
        },
        {
          "boundingBox": "1161,3040,403,34",
          "words": [
            {
              "boundingBox": "1161,3040,96,34",
              "text": "44601"
            },
            {
              "boundingBox": "1288,3040,140,34",
              "text": "Kvitto:"
            },
            {
              "boundingBox": "1460,3040,104,34",
              "text": "51756"
            }
          ]
        },
        {
          "boundingBox": "990,3042,103,33",
          "words": [
            {
              "boundingBox": "990,3042,103,33",
              "text": "Kassa"
            }
          ]
        },
        {
          "boundingBox": "1096,3157,728,40",
          "words": [
            {
              "boundingBox": "1096,3159,105,38",
              "text": "Spara"
            },
            {
              "boundingBox": "1225,3157,163,39",
              "text": "kvittot,"
            },
            {
              "boundingBox": "1418,3157,127,39",
              "text": "gäller"
            },
            {
              "boundingBox": "1570,3169,63,26",
              "text": "som"
            },
            {
              "boundingBox": "1657,3158,167,39",
              "text": "garanti."
            }
          ]
        },
        {
          "boundingBox": "1268,3217,388,39",
          "words": [
            {
              "boundingBox": "1268,3217,103,39",
              "text": "Öppet"
            },
            {
              "boundingBox": "1397,3218,62,38",
              "text": "köp"
            },
            {
              "boundingBox": "1484,3218,41,37",
              "text": "30"
            },
            {
              "boundingBox": "1550,3218,106,38",
              "text": "dager"
            }
          ]
        },
        {
          "boundingBox": "1290,3276,317,39",
          "words": [
            {
              "boundingBox": "1290,3276,192,38",
              "text": "VÄLKOMMEN"
            },
            {
              "boundingBox": "1506,3278,101,37",
              "text": "ÅTER!"
            }
          ]
        },
        {
          "boundingBox": "1116,3335,719,42",
          "words": [
            {
              "boundingBox": "1116,3337,41,36",
              "text": "Om"
            },
            {
              "boundingBox": "1182,3335,82,38",
              "text": "ditt"
            },
            {
              "boundingBox": "1290,3346,84,28",
              "text": "namn"
            },
            {
              "boundingBox": "1398,3337,63,38",
              "text": "och"
            },
            {
              "boundingBox": "1485,3349,261,28",
              "text": "personnummer"
            },
            {
              "boundingBox": "1771,3338,64,37",
              "text": "har"
            }
          ]
        },
        {
          "boundingBox": "1032,3395,894,42",
          "words": [
            {
              "boundingBox": "1032,3397,146,36",
              "text": "lämnats"
            },
            {
              "boundingBox": "1204,3395,62,38",
              "text": "för"
            },
            {
              "boundingBox": "1290,3395,61,38",
              "text": "att"
            },
            {
              "boundingBox": "1377,3399,194,36",
              "text": "genomföra"
            },
            {
              "boundingBox": "1596,3399,61,36",
              "text": "ett"
            },
            {
              "boundingBox": "1685,3399,241,38",
              "text": "JulaPro-köp"
            }
          ]
        },
        {
          "boundingBox": "985,3455,966,42",
          "words": [
            {
              "boundingBox": "985,3456,193,37",
              "text": "behandlar"
            },
            {
              "boundingBox": "1203,3455,85,37",
              "text": "Jula"
            },
            {
              "boundingBox": "1312,3456,84,37",
              "text": "dina"
            },
            {
              "boundingBox": "1421,3458,195,39",
              "text": "uppgifter"
            },
            {
              "boundingBox": "1645,3462,12,33",
              "text": "i"
            },
            {
              "boundingBox": "1686,3458,173,38",
              "text": "enlighet"
            },
            {
              "boundingBox": "1886,3461,65,36",
              "text": "med"
            }
          ]
        }
      ]
    }
  ]
}

最佳答案

这不是计算机视觉问题,它不是NLP/文本模式识别问题。换句话说,所有OCR都不会做您想做的事;他们只从图像中提取文本。

通常的方法是收集许多不同类型的收据,研究其结构,然后使用基于规则的方法或基于机器学习的方法对每条信息进行分类。分类器可以具有{项目名称,项目价格,小计,总计,标题,其他}之类的类别。您可以使用边界框来形成网格单元,并将相邻单元用作特征。

教程

Applying OCR Technology for Receipt Recognition

云服务

  • Google Vision API for Receipt OCR
  • Predicting Expense Type from Receipts with Microsoft Cognitive Services
  • Infrrd ReceiptExtraction API
  • AWS Marketplace - Receipt OCR Level 2 Data Extraction

  • 开源项目
  • receipt-scanner
  • Receipt scanner using AngularJS
  • iOS example
  • Using ad-supported library
  • 10-07 20:54