Question

这可能有些困难，因为除了标准运算符之外，我还没有人尝试过这样做。

我有一个约有5万行对象的表，每行需要针对它们运行一组“最多30个”表达式，并返回true或false。我已经成功地与大规模中长期操作员完成了此任务，但是很快就会变得混乱。

有人有更好的方法吗？下面的最小代码-问题在于validate函数之内。


const objects = [
    {
        'First Name': 'Chris',
        'Age': 18,
        'Major': 'Mathematics',
        'College Department': 'Mathematics'
    },
    {
        'First Name': 'null',
        'Age': 21,
        'Major': 'Mathematics',
        'College Department': 'Science'
    }
]

const validate = (object, rule) => {
    // logic to convert rule to logical expression
}

const results = objects.map(object => {
    var flags = []
    flags.push(validate(object, '[Fist Name] is null'))
    flags.push(validate(object, '[Age] < [Required Age]'))
    flags.push(validate(object, '[Major] === [College Department] and [Age] > [Required Age]'))  
    // Validate is supposed to return 
    // {Rule: '[Fist Name] is null', Flag: false/true, ...Rest of original object and key pairs} 

    // ... Return array of flags
    return flags;
})

// Result should look like this
// lets say required age is 18
[
    [
        {Rule: '[Fist Name] is null', Flag: false, 'First Name': 'Chris', 'Age': 18, 'Major': 'Mathematics', 'College Department': 'Mathematics'},
        {Rule: '[Age] < [Required Age]', Flag: false, 'First Name': 'Chris', 'Age': 18, 'Major': 'Mathematics', 'College Department': 'Mathematics'},
        {Rule: '[Major] === [College Department] and [Age] > [Required Age]', Flag: true, 'First Name': 'Chris', 'Age': 18, 'Major': 'Mathematics', 'College Department': 'Mathematics'}   
    ],
    [
        {Rule: '[Fist Name] is null', Flag: true, 'First Name': 'null', 'Age': 21, 'Major': 'Mathematics', 'College Department': 'Science'},
        {Rule: '[Age] < [Required Age]', Flag: false, 'First Name': 'null', 'Age': 21, 'Major': 'Mathematics', 'College Department': 'Science'},
        {Rule: '[Major] === [College Department] and [Age] > [Required Age]', Flag: false, 'First Name': 'null', 'Age': 21, 'Major': 'Mathematics', 'College Department': 'Science'}  
    ]
]

// I know how to concat the arrays into one, 
// so either the above output works or the one below works
[
    {Rule: '[Fist Name] is null', Flag: false, 'First Name': 'Chris', 'Age': 18, 'Major': 'Mathematics', 'College Department': 'Mathematics'},
    {Rule: '[Age] < [Required Age]', Flag: false, 'First Name': 'Chris', 'Age': 18, 'Major': 'Mathematics', 'College Department': 'Mathematics'},
    {Rule: '[Major] === [College Department] and [Age] > [Required Age]', Flag: true, 'First Name': 'Chris', 'Age': 18, 'Major': 'Mathematics', 'College Department': 'Mathematics'}, 
    {Rule: '[Fist Name] is null', Flag: true, 'First Name': 'null', 'Age': 21, 'Major': 'Mathematics', 'College Department': 'Science'},
    {Rule: '[Age] < [Required Age]', Flag: false, 'First Name': 'null', 'Age': 21, 'Major': 'Mathematics', 'College Department': 'Science'},
    {Rule: '[Major] === [College Department] and [Age] > [Required Age]', Flag: false, 'First Name': 'null', 'Age': 21, 'Major': 'Mathematics', 'College Department': 'Science'}  
]

更新这是我已经走了多远，但是只能对>，<=，>或至少我到目前为止已测试的所有内容执行此操作。我将在不久后添加更多评论。


const object = {Age: 10, Required: 18};
const rules = [
    {R: '["Age"] < ["Required"]', O: ['<']},
    {R: '["Age"] <= ["Required"]', O: ['<=']},
    {R: '["Age"] > ["Required"]', O: ['>']},
]

// Prototype that will parse the string
// ... then return the indexes of char
// ... we will use this to insert object name before the char
String.prototype.toIndices = function (d) { return this.split("").reduce((a, e, i) => e === d ? a.concat(i) : a, []) };

String.prototype.splice = function(idx, rem, str) {
    return this.slice(0, idx) + str + this.slice(idx + Math.abs(rem));
};

Object.prototype.validateRule = function (r) {
    const newString = r['R'].toIndices("[").map(s => {
        return r['R'].splice(s, 0, 'object');
    })

    var exp = [];

    for (let item = 0; item < newString.length; item++) {
        for (let obj = 0; obj < newString[item].split(" ").length; obj++) {
            if (newString[item].split(" ")[obj].includes("object"))
                exp.push(newString[item].split(" ")[obj])
        }
    }

    return [...exp].map((e, i) => i < exp.length - 1 ? [e, r['O'][0]] : [e]).reduce((a, b) => a.concat(b)).join(" ");
}


console.log({Rule: rules[0]['R'], Flag: eval(object.validateRule(rules[0]))});
// output
/*
{ Rule: '["Age"] < ["Required"]', Flag: true }
*/

console.log(rules.map(rule => { return {Rule: rule['R'], Flag: eval(object.validateRule(rule))} }));
// output
/*
[ { Rule: '["Age"] < ["Required"]', Flag: true },
  { Rule: '["Age"] <= ["Required"]', Flag: true },
  { Rule: '["Age"] > ["Required"]', Flag: false } ]
*/

Answer 1

如果您需要预先知道需要评估的表达式，请在代码中写出这些表达式。

const rules = new Map()
rules.set("[First Name] is null", function(object) {
    return object.firstName === null
})

创建自己的迷你语言需要大量工作。如果需要让用户添加自定义规则，则构建规则解析器和评估引擎是有意义的。

在解析代码中，评估表达式的方法通常是将输入分成令牌，然后在第二步中评估令牌。

function parse(textInput) {
    return arrayOfTokens
}

示例文本输入：[Fist Name] is null

令牌示例数组：[ new Field("First Name"), new OpIsNull() ]

为不同的文本输入编写大量的单元测试，并确保它返回期望的标记。当解析正常进行时，下一步是评估令牌。为标记，对象和期望的输出数组编写大量的单元测试。

function evaluate(object, tokens) {
    let leftHand = null
    if (tokens[0] instanceof Field) {
        leftHand = getFieldValue(object, tokens[0])
    }
    if (tokens[1] instanceof OpIsNull) {
        return leftHand === null
    }
    // etc
}

function getFieldValue(object, field) {
    if (field.name == "First Name") {
        return object.firstName
    }
}

Answer 2

经过几天的创建和修改，我已经开发了一种将字符串转换为逻辑表达式的解决方案。我用以下方法制作了一个包装：

https://www.npmjs.com/package/validate-table-rules

安装

from sklearn.preprocessing import MultiLabelBinarizer as MLB

mlb = MLB()
out = mlb.fit_transform([[*filter(pd.notna, x)] for x in zip(*map(df.get, df))])

pd.DataFrame(out, df.index, mlb.classes_)

   A1  A2  A3  A4  A5  A6
0   1   1   0   1   0   0
1   1   1   1   0   0   0
2   1   1   1   0   0   0
3   1   0   1   0   1   0
4   0   0   0   1   0   0
5   0   0   0   0   1   1

快速入门

npm install validate-table-rules

享受！

将动态字符串转换为JavaScript中的逻辑表达式

2 个答案: