如何在Javascript中对对象数组执行模式匹配?

时间:2019-02-03 12:07:09

标签: javascript arrays node.js algorithm pattern-matching

在NodeJS中使用NLP时,我收到了大量具有以下样式的令牌。实际输入包含数千个令牌。

[
    {
        "i": 0,
        "text": "How",
        "lemma": "how",
        "dependencyEdge": null,
        "pos": "ADVERB",
        "tag": "ADV",
        "is": {
            "stopword": true,
            "quantifier": false
        }
    },
    {
        "i": 1,
        "text": "to",
        "lemma": "to",
        "dependencyEdge": null,
        "pos": "PARTICLE",
        "tag": "PART",
        "is": {
            "stopword": true,
            "quantifier": false
        }
    },
    {
        "i": 2,
        "text": "ask",
        "lemma": "ask",
        "dependencyEdge": null,
        "pos": "VERB",
        "tag": "VERB",
        "is": {
            "stopword": true,
            "quantifier": false
        }
    },
    {
        "i": 3,
        "text": "a",
        "lemma": "a",
        "dependencyEdge": null,
        "pos": "DETERMINER",
        "tag": "DET",
        "is": {
            "stopword": true,
            "quantifier": false
        }
    },
    {
        "i": 4,
        "text": "javascript",
        "lemma": "javascript",
        "dependencyEdge": null,
        "pos": "NOUN",
        "tag": "NOUN",
        "is": {
            "stopword": false,
            "quantifier": false
        }
    },
    {
        "i": 5,
        "text": "question",
        "lemma": "question",
        "dependencyEdge": null,
        "pos": "NOUN",
        "tag": "NOUN",
        "is": {
            "stopword": false,
            "quantifier": false
        }
    },
            {
        "i": 6,
        "text": "on",
        "lemma": "on",
        "dependencyEdge": null,
        "pos": "ADPOSITION",
        "tag": "ADP",
        "is": {
            "stopword": true,
            "quantifier": false
        }
    },
    {
        "i": 7,
        "text": "StackOverflow",
        "lemma": "stackoverflow",
        "dependencyEdge": null,
        "pos": "NOUN",
        "tag": "PROPN",
        "is": {
            "stopword": false,
            "quantifier": false
        }
    },
    {
        "i": 8,
        "text": "?",
        "lemma": "?",
        "dependencyEdge": null,
        "pos": "PUNCTUATION",
        "tag": "PUNCT",
        "is": {
            "stopword": false,
            "quantifier": false
        }
    }
]

我想检索与特定模式匹配的数组索引切片:

模式1

  • pos: NOUNpos: ADJECTIVE开头
  • 然后可以有任意数量的pos: NOUN /ADJECTIVE
  • 并且必须以pos: NOUNis.stopword: false结尾

模式2

  • pos: VERB开头
  • 后跟pos: PARTICULE
  • 可以有任意数量的pos: NOUN /ADJECTIVE
  • 并且必须以pos: NOUNis.stopword: false结尾

我不介意针对每个模式分别遍历整个数组。是否有一个JS库可以帮助在objects中的array上执行模式匹配?与RegEx类似,但在数组而不是string上?

理想情况下,该函数的返回值要么是令牌的切片,要么只是它们的索引,例如:

[
    [ 3, 4, 5 ],
    [ 9, 10],
    [ 22 ]
]

1 个答案:

答案 0 :(得分:0)

以下是简单的匹配器作为起点:

function match(obj, matcher) {
    if (obj) {
        if (typeof obj === 'object') {
            for (let i in obj) {
                if (!matcher[i]) {
                    return false;
                } else if (matcher[i] instanceof RegExp && !matcher[i].test(''+obj[i])) {
                    return false;
                } else if (typeof obj[i] === 'object' && typeof matcher[i] === 'object') {
                    return match(obj[i], matcher[i]);
                }
            }
        }
    }
    return true;
}

var matcher = {
    foo: /^[0-9]/,
    bar: /baz|foo/,
    quux: {
        lorem: /^[a-z][0-9]/i
    }
};

console.log(match({foo: 10, bar: 'baz', quux: {
    lorem: 'A1'
}}, matcher));

console.log(match({foo: 10, bar: 'baz', quux: {
    lorem: 'AA'
}}, matcher));

在代码中,您可以遍历数组并在每个元素上运行匹配器。

您可以通过检查类型将匹配器扩展为包括布尔值,精确字符串和实数。