我们正在进行OCR(光学图像识别),但是20次20次的日期时间包括错误。
例如:
isJunk
我认为应该有一种让计算机了解日期时间的方法。特别是考虑到一周的日期总是正确的,每个月只有4-5个星期。
在第一行示例中,有2个星期日在日期中有2X。那些是22和29.但2S与22不相似,但它类似于25或29.所以考虑到它们都是第29 ...... ....
有没有图书馆可以做到这一点?还是一些代码示例? 谢谢!
答案 0 :(得分:0)
用JS和jQuery解决。这很棘手。这是演示页面的代码,我在上面进行了测试:
SUU Oct 2S,2016 l:30
SAT 2016年9月7日下午7:20
TUE Nov i5,2016 1I:25am
SAA 2016年12月24日下午1:30
SUN 2016年9月11日下午3:34
TUE Nov i5,2016 1i:25am
SAA 2016年12月24日下午1:30
<!doctype html>
<html lang="en">
<head>
<meta charset="utf-8">
<title>test</title>
<meta name="description" content="test">
<meta name="author" content="test">
<style type="text/css">
.grid {
display: table;
width: 100%;
box-sizing: border-box;
}
.col-1-2 {
float: left;
width: 50%;
padding: 15px;
box-sizing: border-box;
}
input {
width: 100%;
box-sizing: border-box;
padding: 10px 15px;
font-size: 15px;
margin-bottom: -1px;
outline: none;
}
button {
display: block;
margin: 20px auto 15px;
background: linear-gradient(#4285f4,#3a78de);
border-radius: 2px;
box-shadow: 0 0 1px #bbb;
border: none;
color: white;
cursor: pointer;
font-size: 16px;
padding: 10px 15px;
}
</style>
</head>
<body>
<div class="grid">
<button type="" onclick='fix()'>Fix Dates</button>
<div class="col-1-2 inputs">
<input type="text" name="" value="SUU Oct 2S, 2016 l:30pm" placeholder="">
<input type="text" name="" value="SAT Sep l7, 2016 7:20pm" placeholder="">
<input type="text" name="" value="TUE Nov i5, 2016 1I:25am" placeholder="">
<input type="text" name="" value="SAA Dec 24, 2016 1:30pm" placeholder="">
<input type="text" name="" value="SUN Sep 11, 2016 3:34pm" placeholder="">
<input type="text" name="" value="TUE Nov i5, 2016 1i:25am" placeholder="">
<input type="text" name="" value="SAA Dec 24, 2016 1:30pm" placeholder="">
</div>
<div class="col-1-2 outputs">
<input type="text" name="" value="" placeholder="">
<input type="text" name="" value="" placeholder="">
<input type="text" name="" value="" placeholder="">
<input type="text" name="" value="" placeholder="">
<input type="text" name="" value="" placeholder="">
<input type="text" name="" value="" placeholder="">
<input type="text" name="" value="" placeholder="">
</div>
</div>
<script src="https://ajax.googleapis.com/ajax/libs/jquery/3.0.0/jquery.min.js"></script>
<script src="https://cdn.jsdelivr.net/momentjs/2.13.0/moment.min.js"></script>
<script type="text/javascript" charset="utf-8">
var text = " ";
var subtext = " ";
var weekday = " ";
var weekdayFixed = " ";
var month = " ";
var monthFixed = " ";
var i = 0;
var today = new Date();
function checkWeekday(weekday,i) {
if (/MON|TUE|WED|THU|FRI|SAT|SUN/.test(weekday))
weekday = weekday;
else if (/MO|M0/.test(weekday))
weekday = 'MON'
else if (/TU|UE/.test(weekday))
weekday = 'TUE'
else if (/WE|ED/.test(weekday))
weekday = 'WED'
else if (/TH|HU/.test(weekday))
weekday = 'THU'
else if (/FR|RI/.test(weekday))
weekday = 'FRI'
else if (/SA|AT/.test(weekday))
weekday = 'SAT'
else if (/SU|UN/.test(weekday))
weekday = 'SUN'
else if (/M|O|o|0/.test(weekday))
weekday = 'MON'
else if (/W/.test(weekday))
weekday = 'WED'
else if (/F|R/.test(weekday))
weekday = 'FRI'
else if (/A/.test(weekday))
weekday = 'SAT'
output(weekday+' ',i);
weekdayFixed = weekday;
}
function checkMonth(month,i) {
if (/Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec/.test(month))
month = month;
else if (/Ja|an/.test(month))
month = 'Jan'
else if (/Fe|eb/.test(month))
month = 'Feb'
else if (/ar/.test(month))
month = 'Mar'
else if (/Ap|pr/.test(month))
month = 'Apr'
else if (/ay/.test(month))
month = 'May'
else if (/un/.test(month))
month = 'Jun'
else if (/ul/.test(month))
month = 'Jul'
else if (/Au|ug/.test(month))
month = 'Aug'
else if (/Se|ep/.test(month))
month = 'Sep'
else if (/Oc|ct/.test(month))
month = 'Oct'
else if (/No|ov/.test(month))
month = 'Nov'
else if (/De|ec/.test(month))
month = 'Dec'
output(month+' ',i);
monthFixed = month;
}
function checkDay(day,i) {
if ([
'1','2','3','4','5','6','7','8','9','10',
'11','12','13','14','15','16','17','18','19','20',
'21','22','23','24','25','26','27','28','29','30','31'
].indexOf(day) >= 0) {
output(day,i);
for (var j=0; j<=999; j++) {
var temp = moment().date(j)._d;
if ((temp.toString().indexOf(month) > -1)&&
(temp.toString().substr(8,2).indexOf(day) > -1)&&
(temp.toString().toUpperCase().indexOf(weekdayFixed) > -1)) {
output(', '+temp.toString().substr(11,4)+' ', i);
break;
}
}
} else {
for (var j=0; j<=999; j++) {
var temp = moment().date(j)._d;
if ((temp.toString().indexOf(month) > -1)&&
(temp.toString().toUpperCase().indexOf(weekdayFixed) > -1)) {
var firstDigit = day.match(/\d/);
var indexOfFirstDigit = day.indexOf(firstDigit);
var dayOnlyNumbers = day.replace(/\D/g,'');
if (indexOfFirstDigit == 0) {
if (temp.toString().substr(8,1).indexOf(dayOnlyNumbers) > -1) {
day = temp.toString().substr(8,2);
output(day,i);
output(', '+temp.toString().substr(11,4)+' ', i);
break;
}
} else {
if (temp.toString().substr(9,1).indexOf(dayOnlyNumbers) > -1) {
day = temp.toString().substr(8,2);
output(day,i);
output(', '+temp.toString().substr(11,4)+' ', i);
break;
}
}
}
}
}
}
function checkTimeH (timeH, i) {
if (/l|i|I/.test(timeH))
timeH = timeH.replace(/l|i|I/g, "1");
output(timeH+':',i);
timeHFixed = timeH;
}
function checkTimeM (timeM, i) {
if (/l|i|I/.test(timeM))
timeM = timeM.replace(/l|i|I/g, "1");
output(timeM,i);
timeMFixed = timeM;
}
function checkTimeAmPm (timeAmPm, i) {
output(timeAmPm,i);
timeAmPmFixed = timeAmPm;
}
function fix() {
for (var i = 0; i < $('.inputs input').length; i++) {
text = $('.outputs input').eq(i).val('');
text = $('.inputs input').eq(i).val();
weekday = text.substr(0,text.indexOf(' '));
subtext = text.slice(text.indexOf(' ')+1);
checkWeekday(weekday,i);
month = subtext.substring(0,subtext.indexOf(' '));
subtext = subtext.slice(subtext.indexOf(' '));
checkMonth(month,i);
day = subtext.substring(1,subtext.indexOf(','));
subtext = subtext.slice(subtext.indexOf(',')+2);
checkDay(day,i);
subtext = text.substring(text.lastIndexOf(' ')+1);
timeH = subtext.substring(0,subtext.indexOf(':'));
subtext = subtext.slice(subtext.indexOf(':')+1);
checkTimeH(timeH,i);
timeM = subtext.substring(0,2);
subtext = subtext.slice(2);
checkTimeM(timeM,i);
timeAmPm = subtext.substring(0,2);
checkTimeAmPm(timeAmPm,i);
}
}
function output(output,i) {
$('.outputs input').eq(i).val($('.outputs input').eq(i).val() + output);
}
</script>
</body>
</html>