根据另一个数据框中的列在一个数据框中应用正则表达式

时间:2018-09-17 17:21:52

标签: r for-loop apply lapply

我有两个数据框-表A是模式表,表B是名称表。我想对表B进行子集化,使其与表a中的模式匹配。

public class DbHelper extends SQLiteOpenHelper {

    private static String DataBase_Name;
    private static int DataBase_Version;

    static {
        DataBase_Name = "MyDataBase";
        DataBase_Version = 4;
    }


    public SQLiteDatabase db;
    public List<Question> getQuestion;
    public String[] Flashcard;

    private Context ctx;


    public DbHelper(Context context) {
        super(context, DataBase_Name, null, DataBase_Version);
        db = getWritableDatabase();
    }



    @Override
    public void onCreate(SQLiteDatabase db) {
        this.db = db;

        ctx.getApplicationContext();

        String SQL_CREATE_DICTIONARY_TABLE = "CREATE TABLE " +
                QuestionTable.TABLE_NAME + " ( " +
                QuestionTable._ID + " INTEGER PRIMARY KEY AUTOINCREMENT, "+
                QuestionTable.ANSWER + " TEXT, " +
                QUESTION + " TEXT )";
        db.execSQL(SQL_CREATE_DICTIONARY_TABLE);
        try {
            InsertTable();
        } catch (IOException e) {
            e.printStackTrace();
        }
    }

    public void InsertTable(Context ctx) throws IOException {
        try {
            AssetManager am = ctx.getAssets();
            InputStream is = am.open("dictDB.txt");
            BufferedReader br = null;
            br = new BufferedReader(new InputStreamReader(is));

            String linee = null;

            List<String> lines = new ArrayList<>();
            while ((linee = br.readLine()) != null) {
                lines.add(linee);
            }

            for (String line : lines)
            {
                db.execSQL("INSERT INTO " + QuestionTable.TABLE_NAME + "( " + QuestionTable.ANSWER + ", " + QuestionTable.QUESTION + " )" + "VALUES (" + line + ")");
            }

        } catch (IOException e) {
            e.printStackTrace();
        }
    }

    public void AddRow(String Eng, String Pl) {

        db.execSQL("INSERT INTO "+QuestionTable.TABLE_NAME+"( "+ QuestionTable.ANSWER+", "+QuestionTable.QUESTION + ")" +"VALUES ("+Eng+", "+Pl,null);

    }

    @Override
    public void onUpgrade(SQLiteDatabase db, int oldVersion, int newVersion) {
        db.execSQL("DROP TABLE IF EXISTS "+ QuestionTable.TABLE_NAME );
        onCreate(db);

    }


    @RequiresApi(api = Build.VERSION_CODES.N)
    public List<Question> getQuestion(){

        List<Question> QuestionList = new ArrayList<>();
        db = getReadableDatabase();
        Cursor QuestionCursor = db.rawQuery("SELECT "+QuestionTable.ANSWER+", "+ QUESTION +" FROM "+ QuestionTable.TABLE_NAME + " ORDER BY RANDOM() LIMIT 10", null);

        if (QuestionCursor.moveToFirst()){
            do {
                Question question = new Question();
                question.setAnswer(QuestionCursor.getString((QuestionCursor.getColumnIndex(QuestionTable.ANSWER))));
                question.setQuestion(QuestionCursor.getString(QuestionCursor.getColumnIndex(QUESTION)));
                QuestionList.add(question);
            } while (QuestionCursor.moveToNext());
        }
    QuestionCursor.close();
        db.close();
    return QuestionList;
    }

    public String[] Flashcard()
    {
        String id;
        String EngWord;
        String PolWord;
        String Card[]=null;
        db= getReadableDatabase();

        id = db.rawQuery("SELECT " + QuestionTable._ID +"FROM " + QuestionTable.TABLE_NAME+ " ORDER BY RANDOM() LIMIT 1",null).toString();
        EngWord = db.rawQuery("SELECT " + QuestionTable.ANSWER +"FROM " + QuestionTable.TABLE_NAME+ "WHERE "+(id==QuestionTable._ID.toString()),null).toString();
        PolWord = db.rawQuery("SELECT " + QUESTION +"FROM " + QuestionTable.TABLE_NAME+ "WHERE "+(id==QuestionTable._ID.toString()),null).toString();

        Card = new String[] {EngWord, PolWord};
        db.close();

        return Card;
    }


}

我试图做一个for循环,就像:

A <- data.frame(pattern = c("aa", "bb", "cc", "dd"))
B <- data.frame(name = "aa1", "bb1", "abc", "def" ,"ddd")

我希望我的结果表for (i in 1:nrow(A)){ for (j in 1:nrow(B)){ DT <- data.frame(grep(A$pattern[i], B$name[j], ignore.case = T, value = T)) }} 仅包含DTaa1bb1

但这太慢了。我只是想知道是否还有更有效的方法?多谢!

2 个答案:

答案 0 :(得分:1)

不需要双循环,以下仅使用sapply循环。

inx <- unlist(sapply(A$pattern, grep, B$name))
B[inx, , drop = FALSE]
#  name
#1  aa1
#2  bb1
#5  ddd

答案 1 :(得分:1)

在您的示例输入数据中似乎有一个小错误(缺少B$name的声明不正确,并且两个stringsAsFactors = F对象都需要包含data.frame):

> A <- data.frame(pattern = c("aa", "bb", "cc", "dd"), stringsAsFactors = F)
> B <- data.frame(name = c("aa1", "bb1", "abc", "def" ,"ddd"), stringsAsFactors = F)

代码

# using sapply with grepl
> indices <- sapply(1:nrow(A), function(z) grepl(A$pattern[z], B$name[z]))
> indices
[1]  TRUE  TRUE FALSE FALSE

> B[indices, ]
[1] "aa1" "bb1" "ddd"