我一直在进行一个简单的英语测验。这样,我们需要验证答案,这些答案由用户在输入字段中输入。在当前的实现中,我正在将正确答案与用户答案进行精确比较。就像
HTML
<input type="text" id="answer" />
<button onclick="validate()">Validate</button>
JavaScript
var question = "Do you like movies?",
answer = "No, I don't like movies.";
function validate() {
var userInput = document.getElementById('answer').value;
if(answer == userInput) {
console.log("correct");
} else {
console.log("wrong");
}
}
但是我不想完全验证。例如,忽略大小写,逗号,撇号等。例如,如果用户输入,
我不喜欢电影
答案可能是正确的。我不知道如何开始以及从哪里开始。任何人都可以帮忙。
答案 0 :(得分:1)
一种选择是删除所有非单词字符和空格,并比较每个替换字符串的小写形式:
var question = "Do you like movies?",
answer = "No, I don't like movies.";
const normalize = str => str
.replace(/[^\w ]/g, '')
.toLowerCase();
function validate(userInput) {
const noramlizedInput = normalize(userInput)
const noramlizedAnswer = normalize(answer);
if (noramlizedInput == noramlizedAnswer) {
console.log("correct");
} else {
console.log("wrong");
}
}
validate('No i dont like movies');
validate("NO!!!!! I DON''t like movies.");
另一种选择是遍历userInput
的所有可能的子字符串,找出哪个与所需的answer
重叠最多,但这要复杂得多。
一个更简单的选择是检查有多少重叠的单词:
var question = "Do you like movies?",
answer = "No, I don't like movies.";
const normalize = str => str
.replace(/[^\w ]/g, '')
.toLowerCase()
.split(/\s+/)
function validate(userInput) {
const noramlizedInputArr = normalize(userInput);
const noramlizedAnswerArr = normalize(answer);
const overlapCount = noramlizedInputArr.reduce((a, word) => (
a + Number(noramlizedAnswerArr.includes(word))
), 0);
console.log(overlapCount);
if (overlapCount >= 4) {
console.log("correct");
} else {
console.log("wrong");
}
}
validate('No i dont like movies');
validate("NO!!!!! I DON''t like movies.");
validate("i dont like movies.");
validate("Yes I like movies.");
答案 1 :(得分:1)
如果您只想捕捉拼写错误和小变化,则将标准度量标准称为编辑距离或Levenshtein distance。这是将一个文本更改为另一个文本所需的最小删除,插入或替换数量的计数。诸如“不,我不喜欢电影”和“不,我不喜欢电影”之类的字符串具有较小的编辑距离。
这是一个快速而肮脏的递归编辑距离功能,它将使您有个想法:
function validate(text, pattern) {
// some simple preprocessing
let p = pattern.toLowerCase().replace(/[^a-z]+/ig, '')
let t= text.toLowerCase().replace(/[^a-z]+/ig, '')
// memoize recursive algorithm
let matrix = Array.from({length: t.length + 1}, () => [])
function editDistance(text, pattern, i = 0, j = 0){
if(i == text.length && j == pattern.length) return 0
if(i == text.length) return pattern.length - j
if(j == pattern.length) return text.length - i
let choices = [
(matrix[i+1][j+1] || (matrix[i+1][j+1] = editDistance(text, pattern, i+1, j+1))) + (text[i].toLowerCase() === pattern[j].toLowerCase() ? 0 : 1),
(matrix[i+1][j] || (matrix[i+1][j] = editDistance(text, pattern, i+1, j))) + 1,
(matrix[i][j+1] || (matrix[i][j+1] = editDistance(text, pattern, i, j+1))) + 1
]
return Math.min(...choices)
}
return editDistance(t, p)
}
// similar strings have smaller edit distances
console.log(validate("No I dont lik moves","No i dont like movies"))
// a little less similar
console.log(validate("Yes I like movies","No i dont like movies"))
// totally different
console.log(validate("Where is the bathroom","No i dont like movies"))
// careful -- small edit distance !== close meaning
console.log(validate("I do like tacos","I don't like tacos"))
匹配最小错别字的字符串,拾取最小可接受距离效果很好。当然,如果您要评估用户的意图,那么这些简单的色调都无法使用。像“我爱炸玉米饼”和“我讨厌炸玉米饼”之类的字符串具有较小的编辑距离,如果您不了解该语言,就无法确定它们的含义相反。如果您需要进行此级别的检查,则可以尝试使用Watson Conversation
之类的服务,该服务将返回用户意图进行输入。