如何使用正则表达式或其他工具使用Firebase ML的TextRecognized从图像中获取文本

时间:2019-07-28 07:29:01

标签: android firebase ocr firebase-mlkit android-vision

我正在使用Firebase ML从图像获取文本。它可以很好地处理图像。但是问题是从图像中获取了特定的文本。

我想从下面3张图像中获取文本值。

https://i.stack.imgur.com/vkFxk.jpg

https://i.stack.imgur.com/GrpTW.jpg

https://i.stack.imgur.com/CSbJ3.jpg

  

所需的输出(*为第一张图像):

     

序列号:350813(可以是字母数字)

     

颜色印象数:487

     

黑人印象:21116

     

彩色大印象数:4

     

总印象数:21653

但是我坚持要从这些图像中获得适当的价值。 Firebase ML套件在不同的时间为我提供了不同的格式。因此,我无法放置任何正则表达式来获得准确的结果。

那么,有人可以帮助我如何从这些图像中获得正确的结果吗?

我没有发布所有代码。这是我的主要方法,将在从图像中捕获数据后运行。

private void processTextBlock(FirebaseVisionText result) {
    // [START mlkit_process_text_block]

   // regex for getting alpha numeric word only
    //(?:[0-9][^ ]*[A-Za-z][^ ]*)|(?:[A-Za-z][^ ]*[0-9][^ ]*/g
    //(?=\S*[0-9])(?=\S*[a-zA-Z])[a-zA-Z0-9]+/g

    String serialNumber="";
    String ALPHA_NUMERIC_PATTERN ="(?=\\S*[0-9])(?=\\S*[a-zA-Z])[a-zA-Z0-9]+";// g- global, m - multiline
    String NUMERIC_PATTERN ="^\\s*(-|\\+)?(\\d+|(\\d*(\\.\\d*)))([eE][+-]?\\d+)?\\s*$";

    Pattern pattern = Pattern.compile(ALPHA_NUMERIC_PATTERN);
    Matcher matcher = pattern.matcher(resultText);

    boolean isSerialNumberAlphaNumeric = false;

    if (matcher.find()) {
        isSerialNumberAlphaNumeric = true;
        serialNumber = resultText.substring(matcher.start(), matcher.end());
    }

    final Pattern patternNumeric = Pattern.compile(NUMERIC_PATTERN, Pattern.MULTILINE);
    final Matcher matcherNumeric = patternNumeric.matcher(resultText);

    List<String> dataIntegerList = new ArrayList<>();

    while (matcherNumeric.find()) {
        Log.e("vv", "groupCount: " + matcherNumeric.group(0));
        dataIntegerList.add(matcherNumeric.group(0));
    }

    listener.getTextCamera(callBackData(dataIntegerList, isSerialNumberAlphaNumeric,serialNumber));

    /*for (FirebaseVisionText.TextBlock block: result.getTextBlocks()) {
        String blockText = block.getText();
        Float blockConfidence = block.getConfidence();
        List<RecognizedLanguage> blockLanguages = block.getRecognizedLanguages();
        Point[] blockCornerPoints = block.getCornerPoints();
        Rect blockFrame = block.getBoundingBox();
        for (FirebaseVisionText.Line line: block.getLines()) {
            String lineText = line.getText();
            //Log.e("vv", lineText);

            Float lineConfidence = line.getConfidence();
            List<RecognizedLanguage> lineLanguages = line.getRecognizedLanguages();
            Point[] lineCornerPoints = line.getCornerPoints();
            Rect lineFrame = line.getBoundingBox();
            for (FirebaseVisionText.Element element: line.getElements()) {
                String elementText = element.getText();
                Float elementConfidence = element.getConfidence();
                List<RecognizedLanguage> elementLanguages = element.getRecognizedLanguages();
                Point[] elementCornerPoints = element.getCornerPoints();
                Rect elementFrame = element.getBoundingBox();
            }
        }
    }*/
    // [END mlkit_process_text_block]
}

private TextBean callBackData(List<String> dataIntegerList, boolean isSerialNumberAlphaNumeric, String serialNumber){

        String colorImpression= "";
        String blackImpression= "";
        String colorLargeIMpression= "";
        String totalImpression= "";

        if(dataIntegerList.size()>0){
            if(isSerialNumberAlphaNumeric && dataIntegerList.size()<= 4){
                // I have value of serial number
                if(dataIntegerList.size() == 1){
                    colorImpression= dataIntegerList.get(0);
                }
                else if(dataIntegerList.size() == 2){
                    colorImpression= dataIntegerList.get(0);
                    blackImpression= dataIntegerList.get(1);
                }
                else if(dataIntegerList.size() == 3 ){
                    colorImpression= dataIntegerList.get(0);
                    blackImpression= dataIntegerList.get(1);
                    colorLargeIMpression= dataIntegerList.get(2);
                }
                else{
                    colorImpression= dataIntegerList.get(0);
                    blackImpression= dataIntegerList.get(1);
                    colorLargeIMpression= dataIntegerList.get(2);
                    totalImpression= dataIntegerList.get(3);
                }

            }
            else{
              // don't have serial number
                if(dataIntegerList.size() == 1){
                serialNumber= dataIntegerList.get(0);
            }
            else if(dataIntegerList.size() == 2){
                serialNumber= dataIntegerList.get(0);
                colorImpression= dataIntegerList.get(1);
            }
            else if(dataIntegerList.size() == 3 ){
                serialNumber= dataIntegerList.get(0);
                colorImpression= dataIntegerList.get(1);
                blackImpression= dataIntegerList.get(2);
            }
            else if (dataIntegerList.size() == 4){
                serialNumber= dataIntegerList.get(0);
                colorImpression= dataIntegerList.get(1);
                blackImpression= dataIntegerList.get(2);
                colorLargeIMpression= dataIntegerList.get(3);
            }
            else{
                serialNumber = dataIntegerList.get(0);
                colorImpression= dataIntegerList.get(1);
                blackImpression= dataIntegerList.get(2);
                colorLargeIMpression= dataIntegerList.get(3);
                totalImpression= dataIntegerList.get(4);
            }
            }
        }

        TextBean data =  new TextBean();
        data.setSerialNumber(serialNumber);
        data.setColorImpression(colorImpression);
        data.setBlackImpression(blackImpression);
        data.setColorLargeImpression(colorLargeIMpression);
        data.setTotalImpression(totalImpression);

        return data;
    } 

FireBase ML Kit给了我以下输出(*第一张图片):

2019-07-28 13:06:16.544 20355-20355/com.textrecognizarionfirebase E/vv: e teedng
    16
2019-07-28 13:06:16.954 20355-20355/com.textrecognizarionfirebase E/vv: e Status
    Billing Information
    al Numnber
    Current Meter Reading
    0813
    21653
    F11
    F12
    5
    Local User
    Close
    Sun 1306 PM
    WhatsApp Image 2019-07-26 at 2.25.22 PMjpeg
    Color Impression
    Black Impressions
    Color Large
    Total Impressions
    487
    21166
    Impressic
2019-07-28 13:06:17.319 20355-20355/com.textrecognizarionfirebase E/vv: Sun 1206 PA
    P Image 2019-07-26 et 22522 PMjpeg
    Status
    rrent MeterR
    487
    21166
    Total Impres
    21653
    12
    Baling Infor rmetior
    Color Impressions
    Black Impressions
    Color Large
    3513
    Irmpressions
2019-07-28 13:06:17.678 20355-20355/com.textrecognizarionfirebase E/vv: Status
    Current Meter Reading
    21653
    DLL
    F11
    F12
    9%
    TS /
    6
    Close
    Insert
    Sun 1:o6 PM
    WhatsAp Imoge 2o19.07 .26 at 22s22 PMjpeg
    Color Impressions
    350813
    487
    Black Impressions f
    21166
    Color Large
    Impressions
    Total Impressions
    0004 Billing Information
    Serial Number
2019-07-28 13:06:18.084 20355-20355/com.textrecognizarionfirebase E/vv: Sun 1:0o6 PM
    WhotsApp Image 2019-07-26 at 2.25.22 PM.Jpeg
    e Status
    Current Meter Reading
    487
    21166
    21653
    D
    L
    F11
    F12
    6
    Local Us
    Close
    Color Impression
    Black fmpre ressions
    Color Large
    Total Impressions
    350813
    Impressio
    000 Billing Information
    Serial Number
2019-07-28 13:06:18.449 20355-20355/com.textrecognizarionfirebase E/vv: e Status
    Current Meter Reading
    Total Impressions
    21653
    DLL
    F11
    F12
    5
    6
    Local Us
    Close
    Sun 1:06 PM
    WhotsApp Image 2019-07-26 at 2.25.22 PMjpeg
    Color Impression
    Black ressions
    Color Large
    487
    350813
    21166
    Impressio
    000 Billing Information
    Serial Number
2019-07-28 13:06:18.757 20355-20355/com.textrecognizarionfirebase E/vv: Sun 1:06 PM
    Pp Image 2019-07-26 at 2.2s.22 PMjpeg
    eStatus
    EBilling Information
    Current Meter Reading
    Serial Number
    ons
    350813
    Total Impression
    21653
    DLL
    F11
    5
    Local User
    close
    487
    21166
    Color Large
    Impressio
    Color Impre
    Black Impression
2019-07-28 13:06:19.082 20355-20355/com.textrecognizarionfirebase E/vv: Sun 1306 PM
    Image
    at
    2 PM.jpeg
    eStatus
    Current Meter Reading
    350813
    21653
    DLL
    F12
    5
    Local Us
    Close
    Billing Information
    Serial Number
    487
    21166
    Impressio
    Color Large
    Total Impression
    Color Impressions
    Black pressions
2019-07-28 13:06:19.377 20355-20355/com.textrecognizarionfirebase E/vv: Machine StatuS
    Current Meter Reading
    21653
    DLL
    F10
    F11
    F12
    Local Us
    close
    SUn 1:06 PM
    WhatsApp Image 2019-07-26 at 2.25.22 PM jpeg9
    Color Impressions
    Black Impressions
    Color Large
    487
    350813
    21166
    Impressions
    Total Impressions
    800 Billing Information
    Serial Number
2019-07-28 13:06:19.697 20355-20355/com.textrecognizarionfirebase E/vv: Machine Status
    Current Meter Reading
    350813
    21653
    DLL
    FS
    F9
    F10
    F12
    5
    Local Us
    Close
    Sun 1:06 PM
    WhatsApp Image 2019-07-26 at 2.25.22 PMjpeg
    000 Billing Information
    Serial Number
    Color Impressions
    Black Impressions
    Color Large
    487
    21166
    Impressions
    Total Impressions
2019-07-28 13:06:20.043 20355-20355/com.textrecognizarionfirebase E/vv: e Status
    Current Meter Reading
    350813
    Total Impression
    21653
    4 T
    5
    Local Us
    close
    Sun 1:06 PM
    WhatsApp Image 2019-07-26 at 2.25.22 PM.jpeg
    000 Billing Information
    Serial Number
    Color Impression
    Black Impressions
    Color Large
    487
    21166
    Impressions
2019-07-28 13:06:21.161 20355-20355/com.textrecognizarionfirebase E/vv: Machine StatuS
    Current Meter Reading
    350813
    21653
    DLL
    F11
    F12
    5
    Local Us
    close
    Sun 1:06 PM
    WhatsApp Image 2019-07-26 at 2.25.22 PMjpeg
    000 Billing Information
    Serial Number
    Color Impressions
    Black Impressions
    Color Large
    487
    21166
    Impressions
    Total Impressions

0 个答案:

没有答案