我的数据如下:
df<-data.frame(
hhid = c(5668,5595,4724,4756,4856,4730,4757,6320,4758,6319,6311,5477,6322),
pid=c(93660,93660,100960,100960,100960,100960,100960,100962,100960,100962,100962,93814,100962),
pname=c("AB","HG","DC","DC","DC","DC","DC","BA","DC","BA","BA","BH","BA"),
vid=c(462962,608757,772284,772284,772284,293475,293475,656323,293475,656323,81533,465612,656323),
vname=c("ABCD","KJUI","HIND","HIND","HIND","KAJA","KAJA","ADAR","KAJA","ADAR","SANG","NARI","ADAR"),
me=c(1,1,0,0,0,0,0,1,0,0,0,1,0),
ls=c(0,0,1,1,0,1,0,0,1,0,0,0,1),
lg=c(0,0,0,0,0,0,1,0,0,1,0,0,0),
lo=c(0,0,0,0,1,0,0,0,0,0,1,0,0),
amt=c(20000,20000,14000,14000,14000,14000,14000,27000,14000,27000,27000,20000,23000))
此数据中有2万多行。 hhid,pid,pname,vid,vname,ls,lg,lo,amt是列名。每个hid都是独特的。
我需要的输出是这样:
hhid pid pname vid vname LOS amt
5668 93660 AB 462962 ABCD me 20000
5595 93660 HG 608757 KJUI me 20000
4724 100960 DC 772284 HIND ls 14000
4756 100960 DC 772284 HIND ls 14000
4856 100960 DC 772284 HIND lo 14000
and so on.
所以基本上我想要用一个包含新列数据的列名me,ls,lg,lo的NEW COLUMN LOS替换me,ls,lg,lo列中的1,即用列名和将其保存到新的LOS列。
答案 0 :(得分:2)
您可以这样做:
df$LOS <- c("me", "ls", "lg", "lo")[apply(df[, c("me", "ls", "lg", "lo")]==1, 1, which)]
# > df
# hhid pid pname vid vname me ls lg lo amt LOS
# 1 5668 93660 AB 462962 ABCD 1 0 0 0 20000 me
# 2 5595 93660 HG 608757 KJUI 1 0 0 0 20000 me
# 3 4724 100960 DC 772284 HIND 0 1 0 0 14000 ls
# 4 4756 100960 DC 772284 HIND 0 1 0 0 14000 ls
# 5 4856 100960 DC 772284 HIND 0 0 0 1 14000 lo
# 6 4730 100960 DC 293475 KAJA 0 1 0 0 14000 ls
# 7 4757 100960 DC 293475 KAJA 0 0 1 0 14000 lg
# 8 6320 100962 BA 656323 ADAR 1 0 0 0 27000 me
# 9 4758 100960 DC 293475 KAJA 0 1 0 0 14000 ls
# 10 6319 100962 BA 656323 ADAR 0 0 1 0 27000 lg
# 11 6311 100962 BA 81533 SANG 0 0 0 1 27000 lo
# 12 5477 93814 BH 465612 NARI 1 0 0 0 20000 me
# 13 6322 100962 BA 656323 ADAR 0 1 0 0 23000 ls
或(定义列名的向量)
cols <- c("me", "ls", "lg", "lo")
df$LOS <- cols[apply(df[, cols]==1, 1, which)]
答案 1 :(得分:0)
您可以将数据转换为更长的格式,并用filter
来value = 1
行
library(dplyr)
tidyr::pivot_longer(df, cols = c(me, ls, lg, lo), names_to = "LOS") %>%
filter(value == 1) %>%
select(-value)
# hhid pid pname vid vname amt LOS
# <dbl> <dbl> <fct> <dbl> <fct> <dbl> <chr>
# 1 5668 93660 AB 462962 ABCD 20000 me
# 2 5595 93660 HG 608757 KJUI 20000 me
# 3 4724 100960 DC 772284 HIND 14000 ls
# 4 4756 100960 DC 772284 HIND 14000 ls
# 5 4856 100960 DC 772284 HIND 14000 lo
# 6 4730 100960 DC 293475 KAJA 14000 ls
# 7 4757 100960 DC 293475 KAJA 14000 lg
# 8 6320 100962 BA 656323 ADAR 27000 me
# 9 4758 100960 DC 293475 KAJA 14000 ls
#10 6319 100962 BA 656323 ADAR 27000 lg
#11 6311 100962 BA 81533 SANG 27000 lo
#12 5477 93814 BH 465612 NARI 20000 me
#13 6322 100962 BA 656323 ADAR 23000 ls
答案 2 :(得分:0)
Base R解决方案:
df <- transform(df[!(names(df) %in% c("me", "ls", "lg", "lo"))],
name = names(df)[(names(df) %in% c("me", "ls", "lg", "lo"))][apply(df[(names(df) %in% c("me", "ls", "lg", "lo"))], 1, which.max)])
数据:
df <- data.frame(
hhid = c(5668,5595,4724,4756,4856,4730,4757,6320,4758,6319,6311,5477,6322),
pid=c(93660,93660,100960,100960,100960,100960,100960,100962,100960,100962,100962,93814,100962),
pname=c("AB","HG","DC","DC","DC","DC","DC","BA","DC","BA","BA","BH","BA"),
vid=c(462962,608757,772284,772284,772284,293475,293475,656323,293475,656323,81533,465612,656323),
vname=c("ABCD","KJUI","HIND","HIND","HIND","KAJA","KAJA","ADAR","KAJA","ADAR","SANG","NARI","ADAR"),
me=c(1,1,0,0,0,0,0,1,0,0,0,1,0),
ls=c(0,0,1,1,0,1,0,0,1,0,0,0,1),
lg=c(0,0,0,0,0,0,1,0,0,1,0,0,0),
lo=c(0,0,0,0,1,0,0,0,0,0,1,0,0),
amt=c(20000,20000,14000,14000,14000,14000,14000,27000,14000,27000,27000,20000,23000))