在像这样的示例数据框中:
var program =
`
MyStatement {
MyStatement(true) {
MyStatement() {
var a = 1;
}
}
if (1) {
var c = 0;
}
}
`;
const acorn = require("acorn");
const Parser = acorn.Parser;
const tt = acorn.tokTypes; //used to access standard token types like "("
const TokenType = acorn.TokenType; //used to create new types of Tokens.
//add a new keyword to Acorn.
Parser.acorn.keywordTypes["MyStatement"] = new TokenType("MyStatement",{keyword: "MyStatement"});
//const isIdentifierStart = acorn.isIdentifierStart;
function wordsRegexp(words) {
return new RegExp("^(?:" + words.replace(/ /g, "|") + ")$")
}
var bruceware = function(Parser) {
return class extends Parser {
parse(program) {
console.log("hooking parse.");
//it appears it is necessary to add keywords here also.
var newKeywords = "break case catch continue debugger default do else finally for function if return switch throw try var while with null true false instanceof typeof void delete new in this const class extends export import super";
newKeywords += " MyStatement";
this.keywords = wordsRegexp(newKeywords);
return(super.parse(program));
}
parseStatement(context, topLevel, exports) {
var starttype = this.type;
console.log("!!!hooking parseStatement", starttype);
if (starttype == Parser.acorn.keywordTypes["MyStatement"]) {
console.log("Parse MyStatement");
var node = this.startNode();
return this.parseMyStatement(node);
}
else {
return(super.parseStatement(context, topLevel, exports));
}
}
parseMyStatement(node) {
console.log("parse MyStatement");
this.next();
//In my language, MyStatement doesn't have to have a parameter. It could be called as `MyStatement { ... }`
if (this.type == tt.parenL) {
node.test = this.parseOptionalParenExpression();
}
else {
node.test = 0; //If there is no test, just make it 0 for now (note that this may break code generation later).
}
node.isMyStatement = true; //set a flag so we know that this if a "MyStatement" instead of an if statement.
//process the body of the block just like a normal if statement for now.
// allow function declarations in branches, but only in non-strict mode
node.consequent = this.parseStatement("if");
//node.alternate = this.eat(acornTypes["else"]) ? this.parseStatement("if") : null;
return this.finishNode(node, "IfStatement")
};
//In my language, MyStatement, optionally has a parameter. It can also by called as MyStatement() { ... }
parseOptionalParenExpression() {
this.expect(tt.parenL);
//see what type it is
console.log("Type: ", this.type);
//allow it to be blank.
var val = 0; //for now just make the condition 0. Note that this may break code generation later.
if (this.type == tt.parenR) {
this.expect(tt.parenR);
}
else {
val = this.parseExpression();
this.expect(tt.parenR);
}
return val
};
}
}
process.stdout.write('\033c'); //cls
var result2 = Parser.extend(bruceware).parse(program); //attempt to parse
console.log(JSON.stringify(result2,null,' ')); //show the results.
我想删除相互匹配,因此输出应为:
Qid Sid L1 L2
id01 id02 74 72
id01 id03 74 68
id02 id01 72 74
id02 id03 72 68
在我的真实数据集中,我有成千上万的行,上面只是为了解释这个想法。
答案 0 :(得分:2)
这是另一个想法:
import pandas as pd
import numpy as np
data = {'Qid':['id01','id01','id02','id02'],'Sid':['id02','id02','id01','id03'],'L1':[74,74,72,72],'L2':[72,68,74,68]}
df = pd.DataFrame(data)
df[['L1','L2']] = df[['L1','L2']].astype(str) #Turn the values into strings so you can create sortable list over it.
df['aux'] = df[['Qid','Sid','L1','L2']].values.tolist() #create a list of the 4 columns
df['aux'] = df['aux'].apply(sorted).astype(str) #sort the list and treat it as a full string.
df = df.drop_duplicates(subset='aux').drop(columns='aux') #drop the rows where the list is duplicate, that is, there is the same combination of Qid, Sid, L1 and L2.
print(df)
输出:
Qid Sid L1 L2
0 id01 id02 74 72
1 id01 id02 74 68
3 id02 id03 72 68