如何获取具有特定数据的行数?

时间:2018-10-08 15:40:56

标签: r

我有data.frame

          COL1         COL2 
1          X            X
2          X            X
3          X            Y
4          X            X
5          Z            X

如果列没有X,我需要获取适当的行号(在这种情况下为3和5)。我试图使用'sapply'通过“!=”检查数据,但我认为我还需要其他东西。

5 个答案:

答案 0 :(得分:2)

以下是使用which的解决方案,该解决方案可以在任意数量的列上使用:

# example data
df <- data.frame(
    col1 = c("X", "X", "X", "X", "Z"),
    col2 = c("X", "X", "Z", "X", "X"),
    stringsAsFactors = FALSE
)

# how to get rows without X
unique(sapply(df, function(x) which(x != "X")))

答案 1 :(得分:1)

这将起作用:

import pymysql, time, discord
from datetime import datetime

import warnings
warnings.simplefilter("ignore")

class BotEvents:

def __init__(self, *args, **kwargs):

    @self.bot.event
    async def on_ready():
        self.log('\n', save=True)
        self.log('-----------------------------------------', save=True)
        self.log('Authorize successfully', save=True)
        self.log('Authorized servers: ', save=True)
        [self.log('  Name (id): %s (%s)' % (info.name, info.id), save=True) for info in self.bot.guilds]
        self.log('-----------------------------------------', save=True)

        await self.bot.loop.create_task(self.female_logic())


def counter(self, server):
    db_conn, db_cursor = self.connect_db()
    result = db_cursor.execute("SELECT `user_id` FROM `users` WHERE server_id = '{}'".format(server.id))
    members = []
    if result != 0:
        for x in range(result):
            members.append(db_cursor.fetchone()[0])
    db_conn.close()
    while self.run_c:
        roles = []
        member_role = []
        db_conn, db_cursor = self.connect_db()
        result = db_cursor.execute("SELECT `role_id`, `min_exp` FROM `auto_roles` WHERE server_id = '{}' ORDER BY `min_exp`".format(server.id))
        if result != 0:
            info = db_cursor.fetchone()
            member_role = [str(info[0]), int(info[1])]
            for x in range(result - 1):
                info = db_cursor.fetchone()
                roles.append([str(info[0]), int(info[1])])
        db_conn.close()

        for member in server.members:
            if member != self.bot.user:
                db_conn, db_cursor = self.connect_db()
                if str(member.id) not in members:
                    db_cursor.execute("INSERT INTO `users` (`server_id`, `user_name`, `user_id`, `last_update_online`, `last_update`) VALUES ('{0}', '{1}', '{2}', '{3}', '{3}')".format(server.id, member.name.replace("'", ''), member.id, time.time()))
                    db_conn.commit()
                    members.append(str(member.id))
                    self.log(' [%s] Added user: %s' % (server.name, member.name), save=True)
                else:
                    db_cursor.execute("SELECT `member`, `balance`, `online_time`, `last_update_online`, `experience`, `last_update`, `autorole` FROM `users` WHERE (user_id = '{}' AND server_id = '{}')".format(member.id, server.id))
                    info = db_cursor.fetchone()
                    if info is not None:
                        info = {
                            'member':bool(info[0]),
                            'balance':int(info[1]),
                            'online_time':int(info[2]),
                            'last_update_online':int(info[3]),
                            'experience':int(info[4]),
                            'last_update':int(info[5]),
                            'autorole':info[6]
                            }

                        if int(time.time()) - info['last_update_online'] > 60:
                            if str(member.status) in ['online', 'idle', 'dnd']:
                                db_cursor.execute("UPDATE `users` SET online_time = {}, last_update_online = {} WHERE (user_id = '{}' AND server_id = '{}')".format(
                                                                                                                info['online_time'] + 1,
                                                                                                                time.time(),
                                                                                                                member.id,
                                                                                                                server.id))
                                db_conn.commit()
                                self.log('[%s] Updated online for user: %s ' % (server.name, member.name), save=True)


                        if int(time.time()) - info['last_update'] > 60:
                            if member.voice is not None:
                                db_cursor.execute("UPDATE `users` SET balance = {}, experience = {}, last_update = {} WHERE (user_id = '{}' AND server_id = '{}')".format(
                                                                                                                        info['balance'] + self.gold_per_minut, 
                                                                                                                        info['experience'] + self.exp_per_minut,
                                                                                                                        time.time(),
                                                                                                                        member.id,
                                                                                                                        server.id))
                                db_conn.commit()
                                self.log('[%s] Updated balance, experience for user: %s ' % (server.name, member.name), save=True)


                        if len(member_role) != 0:
                            if not info['member'] and info['experience'] >= member_role[1]:
                                role_o = discord.utils.get(server.roles, id=int(member_role[0]))
                                if role_o not in member.roles:
                                    self.bot.loop.create_task(member.add_roles(role_o))
                                    db_cursor.execute("UPDATE `users` SET member = {} WHERE (user_id = '{}' AND server_id = '{}')".format(True, member.id, server.id))
                                    db_conn.commit()

                            new_role = ''
                            for role in roles:
                                if role[1] <= info['experience']:
                                    new_role = role[0]

                            if new_role != '' and new_role != info['autorole']:
                                if info['autorole'] != '0':
                                    role_o = discord.utils.get(server.roles, id=int(info['autorole']))
                                    if role_o in member.roles:
                                        self.bot.loop.create_task(member.remove_roles(role_o))
                                        self.log('[%s] For user %s deleted old role %s(%s)' % (
                                                                                server.name, 
                                                                                member.name, 
                                                                                role_o.name, 
                                                                                role_o.id) , save=True)

                                        time.sleep(2)

                                role_o = discord.utils.get(server.roles, id=int(new_role))
                                if role_o is not None:
                                    if role_o not in member.roles:
                                        db_cursor.execute("UPDATE `users` SET autorole = '{}' WHERE (user_id = '{}' AND server_id = '{}')".format(new_role, member.id, server.id))
                                        db_conn.commit()
                                        self.bot.loop.create_task(member.add_roles(role_o))
                                        self.log('[%s] For user %s added role %s(%s)' % (server.name, member.name, role_o.name, role_o.id), save=True)
                db_conn.close()

答案 2 :(得分:1)

您接近了,将sapply与原始函数!=一起使用的想法基本上是正确的。

以下内容返回逻辑矩阵。

sapply(dat, `!=`, "X")
#      COL1  COL2
#[1,] FALSE FALSE
#[2,] FALSE FALSE
#[3,] FALSE  TRUE
#[4,] FALSE FALSE
#[5,]  TRUE FALSE

如果需要行号,请将其包装在which中,并将参数arr.ind设置为TRUE

which(sapply(dat, `!=`, "X"), arr.ind = TRUE)
#     row col
#[1,]   5   1
#[2,]   3   2

编辑。

发布了几种解决方案,这是比较测试。
由于OP表示可能有100列的数据集,所以我测试了两个不同的数据集,即问题中发布的数据集和更大的数据集。

函数RuiJaapRui,其中包含Jaap注释中的建议。

Rui <- function(DF, value = "X")
  which(sapply(DF, `!=`, value), arr.ind = TRUE)

DanY <- function(DF, value = "X")
  unique(sapply(DF, function(x) which(x != value)))

Jaap <- function(DF, value = "X")
  which(!!rowSums(DF != value))

RuiJaap <- function(DF, value = "X")
  which(dat != "X", arr.ind = TRUE)

library(ggplot2)
library(microbenchmark)

mb1 <- microbenchmark(Rui = Rui(dat),
                      RuiJaap = RuiJaap(dat),
                      Jaap = Jaap(dat),
                      DanY = DanY(dat),
                      times = 1e3)

mb2 <- microbenchmark(Rui = Rui(dat2),
                      RuiJaap = RuiJaap(dat2),
                      Jaap = Jaap(dat2),
                      DanY = DanY(dat2),
                      times = 1e2)

autoplot(mb1)
autoplot(mb2)

对于小型数据集,DanY更快,而对于大型数据集,RuiJaap是最快的。

数据。

dat <- read.table(text = "
          COL1         COL2 
1          X            X
2          X            X
3          X            Y
4          X            X
5          Z            X                  
", header = TRUE)


set.seed(1)
dat2 <- matrix("X", nrow = 20, ncol = 100)
dat2[sample(2000, 100)] <- "Y"
dat2 <- as.data.frame(dat2)

答案 3 :(得分:1)

这是一个tidyverse解决方案,可以在任意数量的列上使用

library(tidyverse)

df <- tibble(
 col1 = c("X", "X", "X", "X", "Z"),
 col2 = c("X", "X", "Z", "X", "X"),
 col3 = c("X", "X", "Z", "Z", "X"),
)

filter_all(df, any_vars(. != "X"))

哪个返回:

# A tibble: 3 x 3
 col1  col2  col3
  <chr> <chr> <chr>
  1     X     Z     Z
  2     X     X     Z
  3     Z     X     X

答案 4 :(得分:1)

这是基本的R解决方案,适用于任意数量的列-

which(rowSums(df == "X") != ncol(df))