Question

在像这样的示例数据框中：

var program = 
`
  MyStatement {
    MyStatement(true) {
      MyStatement() {
        var a = 1;
      }
    }
    if (1) {
      var c = 0;
    }
  }
`;

const acorn = require("acorn");

const Parser = acorn.Parser;
const tt = acorn.tokTypes; //used to access standard token types like "("
const TokenType = acorn.TokenType; //used to create new types of Tokens.

//add a new keyword to Acorn.
Parser.acorn.keywordTypes["MyStatement"] = new TokenType("MyStatement",{keyword: "MyStatement"});

//const isIdentifierStart = acorn.isIdentifierStart;

function wordsRegexp(words) {
  return new RegExp("^(?:" + words.replace(/ /g, "|") + ")$")
}

var bruceware = function(Parser) {
  return class extends Parser {
    parse(program) {
      console.log("hooking parse.");

      //it appears it is necessary to add keywords here also.
      var newKeywords = "break case catch continue debugger default do else finally for function if return switch throw try var while with null true false instanceof typeof void delete new in this const class extends export import super";
      newKeywords += " MyStatement";
      this.keywords = wordsRegexp(newKeywords);

      return(super.parse(program));
    }

    parseStatement(context, topLevel, exports) {
      var starttype = this.type;
      console.log("!!!hooking parseStatement", starttype);

      if (starttype == Parser.acorn.keywordTypes["MyStatement"]) {
        console.log("Parse MyStatement");
        var node = this.startNode();
        return this.parseMyStatement(node);
      }
      else {
        return(super.parseStatement(context, topLevel, exports));
      }
    }

    parseMyStatement(node) {
      console.log("parse MyStatement");
      this.next();

      //In my language, MyStatement doesn't have to have a parameter. It could be called as `MyStatement { ... }`
      if (this.type == tt.parenL) {
        node.test = this.parseOptionalParenExpression();
      }
      else {
        node.test = 0; //If there is no test, just make it 0 for now (note that this may break code generation later).
      }

      node.isMyStatement = true; //set a flag so we know that this if a "MyStatement" instead of an if statement.

      //process the body of the block just like a normal if statement for now.

      // allow function declarations in branches, but only in non-strict mode
      node.consequent = this.parseStatement("if");
      //node.alternate = this.eat(acornTypes["else"]) ? this.parseStatement("if") : null;
      return this.finishNode(node, "IfStatement")
    };

    //In my language, MyStatement, optionally has a parameter. It can also by called as MyStatement() { ... }
    parseOptionalParenExpression() {
      this.expect(tt.parenL);

      //see what type it is
      console.log("Type: ", this.type);

      //allow it to be blank.
      var val = 0; //for now just make the condition 0. Note that this may break code generation later.
      if (this.type == tt.parenR) {
        this.expect(tt.parenR);
      }
      else { 
        val = this.parseExpression();
        this.expect(tt.parenR);
      }

      return val
    };

  }
}

process.stdout.write('\033c'); //cls

var result2 = Parser.extend(bruceware).parse(program); //attempt to parse

console.log(JSON.stringify(result2,null,' ')); //show the results.

我想删除相互匹配，因此输出应为：

Qid     Sid     L1  L2
id01    id02    74  72
id01    id03    74  68
id02    id01    72  74
id02    id03    72  68

在我的真实数据集中，我有成千上万的行，上面只是为了解释这个想法。

Answer 1

这是另一个想法：

import pandas as pd
import numpy as np
data = {'Qid':['id01','id01','id02','id02'],'Sid':['id02','id02','id01','id03'],'L1':[74,74,72,72],'L2':[72,68,74,68]}
df = pd.DataFrame(data)
df[['L1','L2']] = df[['L1','L2']].astype(str) #Turn the values into strings so you can create sortable list over it.
df['aux'] = df[['Qid','Sid','L1','L2']].values.tolist() #create a list of the 4 columns
df['aux'] = df['aux'].apply(sorted).astype(str) #sort the list and treat it as a full string.
df = df.drop_duplicates(subset='aux').drop(columns='aux') #drop the rows where the list is duplicate, that is, there is the same combination of Qid, Sid, L1 and L2.
print(df)

输出：

    Qid   Sid  L1  L2
0  id01  id02  74  72
1  id01  id02  74  68
3  id02  id03  72  68

熊猫-比较行中的列ID并有条件地删除

1 个答案: