Question

是否存在R函数来查找低于特定值的值。示例：下面是输入表。我需要在第二列（COL2）中看到的值“年龄”下方的另一列（COL3）中的值

COl1    COl2    
James   Age 
James   23  
Andrew  Age 
Andrew  24

我需要另一列

COl1    COl2    COl3
James   Age     23
James   23      23
Andrew  Age     24
Andrew  24      24

Answer 1

使用dplyr：

 df %>% 
  mutate_if(is.factor,as.character) %>% 
   mutate(COL3=ifelse(COl2=="Age",lead(COl2),COl2))
    COl1 COl2 COL3
1  James  Age   23
2  James   23   23
3 Andrew  Age   24
4 Andrew   24   24

使用base，我们可以执行以下操作并删除不需要的列：

 df$COL3<-expand.grid(df[which(df$COl2=="Age")+1,])
df
    COl1 COl2 COL3.COl1 COL3.COl2
1  James  Age     James        23
2  James   23    Andrew        23
3 Andrew  Age     James        24
4 Andrew   24    Andrew        24

Answer 2

用COL2替换数据框，然后重新加入原始数据框。

基础

merge(df, subset(df, COl2 != "Age"), by = c("COl1"))

dplyr

library(dplyr)
df %>% 
  left_join(df %>% filter(COl2 != "Age") , by = "COl1")

sqldf

library(sqldf)
sqldf('SELECT *
      FROM df
      LEFT JOIN(SELECT *
      FROM df WHERE COl2 != "Age" )USING (COl1)')

输出

    COl1 COl2.x COl2.y
1 Andrew    Age     24
2 Andrew     24     24
3  James    Age     23
4  James     23     23

数据

df <- structure(list(COl1 = structure(c(2L, 2L, 1L, 1L), .Label = c("Andrew", 
"James"), class = "factor"), COl2 = structure(c(3L, 1L, 3L, 2L
), .Label = c("23", "24", "Age"), class = "factor")), class = "data.frame", row.names = c(NA, 
-4L))

Answer 3

尝试使用dplyr的一种方法是使用cumsum创建组，然后在每个组中选择COl2之后的下一个"Age"值。

library(dplyr)

df %>%
  group_by(group = cumsum(COl2 == "Age")) %>%
  mutate(Col3 = COl2[which.max(COl2 == "Age") + 1]) %>%
  ungroup() %>%
  select(-group)

 #  COl1   COl2  Col3 
 #  <chr>  <chr> <chr>
 #1 James  Age   23   
 #2 James  23    23   
 #3 Andrew Age   24   
 #4 Andrew 24    24

或者因为我们以"Age"递增，所以我们可以从组中选择第二个值

library(dplyr)
df %>%
  group_by(group = cumsum(COl2 == "Age")) %>%
  mutate(Col3 = COl2[2L])

或使用基数R ave

with(df ,ave(COl2, cumsum(COl2 == "Age"), FUN = function(x) x[2L]))
#[1] "23" "23" "24" "24"

Answer 4

一种解决方案是从sqldf‍‍开始使用，方法是将数据帧df以指定的约束条件连接到自身：

library(sqldf)
result <- sqldf("SELECT df_origin.*, df_age.Col2 as Col3 FROM 
       df df_origin join
          (SELECT Col1, Col2, cast(Col2 as int) as Col2Int FROM df WHERE Col2Int > 0) df_age 
       on (df_origin.Col1 = df_age.Col1)")

Answer 5

再次使用dplyr / tidyr ¹：

library(tidyverse)

dat %>%
  mutate(COl3 = na_if(COl2, "Age")) %>%
  fill(COl3,     .direction = "up")

数据：

#dat <- read.table(
#  text = "COl1    COl2
#  James   Age
#  James   23
#  Andrew  Age
#  Andrew  24",
#  header = T,
#  stringsAsFactors = F
#)

输出：

#    COl1 COl2 COl3
#1  James  Age   23
#2  James   23   23
#3 Andrew  Age   24
#4 Andrew   24   24

¹仅当 !(any(is.na(dat$COl2))时，这是正确的。

Answer 6

以R为基础：

df <- read.table(text="COl1    COl2    
James   Age 
James   23  
Andrew  Age 
Andrew  24 ", h = T)

transform(df, COl3 = ave(COl2, COl1, FUN = function(x) tail(x,1)))
#     COl1 COl2 COl3
# 1  James  Age   23
# 2  James   23   23
# 3 Andrew  Age   24
# 4 Andrew   24   24

是否有R函数来查找低于特定值的值

6 个答案: