PHP / JS / etc中的库用于解析带错误的日期时间?

时间:2016-06-24 21:20:02

标签: validation datetime ocr

我们正在进行OCR(光学图像识别),但是20次20次的日期时间包括错误。

例如:

isJunk

我认为应该有一种让计算机了解日期时间的方法。特别是考虑到一周的日期总是正确的,每个月只有4-5个星期。

在第一行示例中,有2个星期日在日期中有2X。那些是22和29.但2S与22不相似,但它类似于25或29.所以考虑到它们都是第29 ...... ....

有没有图书馆可以做到这一点?还是一些代码示例? 谢谢!

1 个答案:

答案 0 :(得分:0)

用JS和jQuery解决。这很棘手。这是演示页面的代码,我在上面进行了测试:

SUU Oct 2S,2016 l:30

SAT 2016年9月7日下午7:20

TUE Nov i5,2016 1I:25am

SAA 2016年12月24日下午1:30

SUN 2016年9月11日下午3:34

TUE Nov i5,2016 1i:25am

SAA 2016年12月24日下午1:30

<!doctype html>

<html lang="en">
<head>
    <meta charset="utf-8">

    <title>test</title>
    <meta name="description" content="test">
    <meta name="author" content="test">

    <style type="text/css">
        .grid {
            display: table;
            width: 100%;
            box-sizing: border-box;
        }
        .col-1-2 {
            float: left;
            width: 50%;
            padding: 15px;
            box-sizing: border-box;
        }
        input {
            width: 100%;
            box-sizing: border-box;
            padding: 10px 15px;
            font-size: 15px;
            margin-bottom: -1px;
            outline: none;
        }
        button {
            display: block;
            margin: 20px auto 15px;
            background: linear-gradient(#4285f4,#3a78de);
            border-radius: 2px;
            box-shadow: 0 0 1px #bbb;
            border: none;
            color: white;
            cursor: pointer;
            font-size: 16px;
            padding: 10px 15px;
        }
    </style>
</head>

<body>
    <div class="grid">
        <button type="" onclick='fix()'>Fix Dates</button>
        <div class="col-1-2 inputs">
            <input type="text" name="" value="SUU Oct 2S, 2016 l:30pm" placeholder="">
            <input type="text" name="" value="SAT Sep l7, 2016 7:20pm" placeholder="">
            <input type="text" name="" value="TUE Nov i5, 2016 1I:25am" placeholder="">
            <input type="text" name="" value="SAA Dec 24, 2016 1:30pm" placeholder="">
            <input type="text" name="" value="SUN Sep 11, 2016 3:34pm" placeholder="">
            <input type="text" name="" value="TUE Nov i5, 2016 1i:25am" placeholder="">
            <input type="text" name="" value="SAA Dec 24, 2016 1:30pm" placeholder="">
        </div>
        <div class="col-1-2 outputs">
            <input type="text" name="" value="" placeholder="">
            <input type="text" name="" value="" placeholder="">
            <input type="text" name="" value="" placeholder="">
            <input type="text" name="" value="" placeholder="">
            <input type="text" name="" value="" placeholder="">
            <input type="text" name="" value="" placeholder="">
            <input type="text" name="" value="" placeholder="">
        </div>
    </div>
    <script src="https://ajax.googleapis.com/ajax/libs/jquery/3.0.0/jquery.min.js"></script>
    <script src="https://cdn.jsdelivr.net/momentjs/2.13.0/moment.min.js"></script>
    <script type="text/javascript" charset="utf-8">
        var text = " ";
        var subtext = " ";
        var weekday = " ";
        var weekdayFixed = " ";
        var month = " ";
        var monthFixed = " ";
        var i = 0;
        var today = new Date();

        function checkWeekday(weekday,i) {
            if (/MON|TUE|WED|THU|FRI|SAT|SUN/.test(weekday))
                weekday = weekday;
            else if (/MO|M0/.test(weekday)) 
                weekday = 'MON'
            else if (/TU|UE/.test(weekday)) 
                weekday = 'TUE'
            else if (/WE|ED/.test(weekday)) 
                weekday = 'WED'
            else if (/TH|HU/.test(weekday)) 
                weekday = 'THU'
            else if (/FR|RI/.test(weekday)) 
                weekday = 'FRI'
            else if (/SA|AT/.test(weekday)) 
                weekday = 'SAT'
            else if (/SU|UN/.test(weekday)) 
                weekday = 'SUN'
            else if (/M|O|o|0/.test(weekday)) 
                weekday = 'MON'
            else if (/W/.test(weekday)) 
                weekday = 'WED'
            else if (/F|R/.test(weekday)) 
                weekday = 'FRI'
            else if (/A/.test(weekday)) 
                weekday = 'SAT'
            output(weekday+' ',i);
            weekdayFixed = weekday;
        }

        function checkMonth(month,i) {
            if (/Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec/.test(month))
                month = month;
            else if (/Ja|an/.test(month)) 
                month = 'Jan'
            else if (/Fe|eb/.test(month)) 
                month = 'Feb'
            else if (/ar/.test(month)) 
                month = 'Mar'
            else if (/Ap|pr/.test(month)) 
                month = 'Apr'
            else if (/ay/.test(month)) 
                month = 'May'
            else if (/un/.test(month)) 
                month = 'Jun'
            else if (/ul/.test(month)) 
                month = 'Jul'
            else if (/Au|ug/.test(month)) 
                month = 'Aug'
            else if (/Se|ep/.test(month)) 
                month = 'Sep'
            else if (/Oc|ct/.test(month)) 
                month = 'Oct'
            else if (/No|ov/.test(month)) 
                month = 'Nov'
            else if (/De|ec/.test(month)) 
                month = 'Dec'
            output(month+' ',i);
            monthFixed = month;
        }

        function checkDay(day,i) {
            if ([
                    '1','2','3','4','5','6','7','8','9','10',
                    '11','12','13','14','15','16','17','18','19','20',
                    '21','22','23','24','25','26','27','28','29','30','31'
                ].indexOf(day) >= 0) {
                output(day,i);
                for (var j=0; j<=999; j++) {
                    var temp = moment().date(j)._d;
                    if ((temp.toString().indexOf(month) > -1)&&
                       (temp.toString().substr(8,2).indexOf(day) > -1)&&
                       (temp.toString().toUpperCase().indexOf(weekdayFixed) > -1)) {
                            output(', '+temp.toString().substr(11,4)+' ', i);
                            break;
                    }
                }
            } else {
                for (var j=0; j<=999; j++) {
                    var temp = moment().date(j)._d;

                    if ((temp.toString().indexOf(month) > -1)&&
                       (temp.toString().toUpperCase().indexOf(weekdayFixed) > -1)) {
                        var firstDigit = day.match(/\d/);
                        var indexOfFirstDigit = day.indexOf(firstDigit);
                        var dayOnlyNumbers = day.replace(/\D/g,'');
                        if (indexOfFirstDigit == 0) {
                            if (temp.toString().substr(8,1).indexOf(dayOnlyNumbers) > -1) {
                                day = temp.toString().substr(8,2);
                                output(day,i);
                                output(', '+temp.toString().substr(11,4)+' ', i);
                                break;
                            }
                        } else {
                            if (temp.toString().substr(9,1).indexOf(dayOnlyNumbers) > -1) {
                                day = temp.toString().substr(8,2);
                                output(day,i);
                                output(', '+temp.toString().substr(11,4)+' ', i);
                                break;
                            }
                        }
                    }
                }
            }
        }

        function checkTimeH (timeH, i) {
            if (/l|i|I/.test(timeH))
                timeH = timeH.replace(/l|i|I/g, "1");
            output(timeH+':',i);
            timeHFixed = timeH;
        }

        function checkTimeM (timeM, i) {
            if (/l|i|I/.test(timeM))
                timeM = timeM.replace(/l|i|I/g, "1");
            output(timeM,i);
            timeMFixed = timeM;
        }

        function checkTimeAmPm (timeAmPm, i) {
            output(timeAmPm,i);
            timeAmPmFixed = timeAmPm;
        }

        function fix() {
            for (var i = 0; i < $('.inputs input').length; i++) {
                text = $('.outputs input').eq(i).val('');
                text = $('.inputs input').eq(i).val();

                weekday = text.substr(0,text.indexOf(' '));
                subtext = text.slice(text.indexOf(' ')+1);      
                checkWeekday(weekday,i);

                month = subtext.substring(0,subtext.indexOf(' '));
                subtext = subtext.slice(subtext.indexOf(' '));      
                checkMonth(month,i);

                day = subtext.substring(1,subtext.indexOf(','));
                subtext = subtext.slice(subtext.indexOf(',')+2);        
                checkDay(day,i);

                subtext = text.substring(text.lastIndexOf(' ')+1);
                timeH = subtext.substring(0,subtext.indexOf(':'));
                subtext = subtext.slice(subtext.indexOf(':')+1);
                checkTimeH(timeH,i);

                timeM = subtext.substring(0,2);
                subtext = subtext.slice(2);
                checkTimeM(timeM,i);

                timeAmPm = subtext.substring(0,2);
                checkTimeAmPm(timeAmPm,i);
            }
        }

        function output(output,i) {
            $('.outputs input').eq(i).val($('.outputs input').eq(i).val() + output);
        }
    </script>
</body>
</html>