当我要使用pivot_longer将多列从宽列转换为长列时,出现错误消息
我有一些代码,可以通过收集从宽转换为长,但是我必须逐列进行。我想使用ivot_longer来收集多列,而不是逐列。
这是一些输入数据:
(SELECT *
, 'ta' as attendence
from attendence.users
where company_name = 'MCC-INDIA'
and company_name = 'Ceekay Logistics Private Limited.'
)
UNION ALL
(SELECT *
, 'ka' as kaina_php
from kaina_test.users
where company_name = 'MCC-INDIA'
and company_name = 'Ceekay Logistics Private Limited.'
);
使用collect的代码为:
structure(list(id = c("81", "83", "85", "88", "1", "2"), look_work = c("yes",
"yes", "yes", "yes", "yes", "yes"), current_work = c("no", "yes",
"no", "no", "no", "no"), before_work = c("no", "NULL", "yes",
"yes", "yes", "yes"), keen_move = c("yes", "yes", "no", "no",
"no", "no"), city_size = c("village", "more than 500k inhabitants",
"more than 500k inhabitants", "village", "city up to 20k inhabitants",
"100k - 199k inhabitants"), gender = c("male", "female", "female",
"male", "female", "male"), age = c("18 - 24 years", "18 - 24 years",
"more than 50 years", "18 - 24 years", "31 - 40 years", "more than 50 years"
), education = c("secondary", "vocational", "secondary", "secondary",
"secondary", "secondary"), hf1 = c("", "", "", "1", "1", "1"),
hf2 = c("", "1", "1", "", "", ""), hf3 = c("", "", "", "",
"", ""), hf4 = c("", "", "", "", "", ""), hf5 = c("", "",
"", "", "", ""), hf6 = c("", "", "", "", "", ""), ac1 = c("",
"", "", "", "", "1"), ac2 = c("", "1", "1", "", "1", ""),
ac3 = c("", "", "", "", "1", ""), ac4 = c("", "", "", "",
"", ""), ac5 = c("", "", "", "", "", ""), ac6 = c("", "",
"", "", "", ""), cs1 = c("", "", "", "", "", ""), cs2 = c("",
"1", "1", "", "1", ""), cs3 = c("", "", "", "", "", "1"),
cs4 = c("", "", "", "1", "", ""), cs5 = c("", "", "", "",
"", ""), cs6 = c("", "", "", "", "", ""), cs7 = c("", "",
"", "", "", ""), cs8 = c("", "", "", "", "", ""), se1 = c("",
"", "1", "1", "", ""), se2 = c("", "", "", "", "1", ""),
se3 = c("", "1", "", "", "1", "1"), se4 = c("", "", "", "",
"", ""), se5 = c("", "", "", "", "", ""), se6 = c("", "",
"", "", "", ""), se7 = c("", "", "", "", "", ""), se8 = c("",
"", "", "1", "", "")), row.names = c(NA, 6L), class = "data.frame")
使用pivot_longer的代码为:
df1 <- df %>%
gather(key = "hf_com", value = "hf_com_freq", hf_<:hf6) %>%
gather(key = "ac_com", value = "ac_com_freq", ac1:ac6) %>%
filter(substring(hf_com, 3) == substring(ac_com, 3))
df1 <- df1 %>%
gather(key = "curr_sal", value = "curr_sal_freq", cs1:cs8) %>%
gather(key = "exp_sal", value = "exp_sal_freq", se1:se8) %>%
filter(substring(curr_sal, 3) == substring(exp_sal, 3))
我得到的预期结果是:
df_longer <- df %>%
pivot_longer(
cols = starts_with("hf"),
names_to = "hf_com",
values_to = "hf_freq",
names_prefix = "hf",
na.rm = TRUE)
使用pivot_longer,我收到以下错误消息:
structure(list(id = c("81", "83", "85", "88", "1", "2"), look_work = c("yes",
"yes", "yes", "yes", "yes", "yes"), current_work = c("no", "yes",
"no", "no", "no", "no"), before_work = c("no", "NULL", "yes",
"yes", "yes", "yes"), keen_move = c("yes", "yes", "no", "no",
"no", "no"), city_size = c("village", "more than 500k inhabitants",
"more than 500k inhabitants", "village", "city up to 20k inhabitants",
"100k - 199k inhabitants"), gender = c("male", "female", "female",
"male", "female", "male"), age = c("18 - 24 years", "18 - 24 years",
"more than 50 years", "18 - 24 years", "31 - 40 years", "more than 50 years"
), education = c("secondary", "vocational", "secondary", "secondary",
"secondary", "secondary"), hf_com = c("hf1", "hf1", "hf1", "hf1",
"hf1", "hf1"), hf_com_freq = c("", "", "", "1", "1", "1"), ac_com = c("ac1",
"ac1", "ac1", "ac1", "ac1", "ac1"), ac_com_freq = c("", "", "",
"", "", "1"), curr_sal = c("cs1", "cs1", "cs1", "cs1", "cs1",
"cs1"), curr_sal_freq = c("", "", "", "", "", ""), exp_sal = c("se1",
"se1", "se1", "se1", "se1", "se1"), exp_sal_freq = c("", "",
"1", "1", "", "")), row.names = c(NA, 6L), class = "data.frame")
此外,如果没有使用pivot_longer的解决方案,那么使用data.table的解决方案将不胜感激。
答案 0 :(得分:0)
我已经解决了问题:
这需要从以下更改:
df_longer <- df %>%
pivot_longer(
cols = starts_with("hf"),
names_to = "hf_com",
values_to = "hf_freq",
names_prefix = "hf",
na.rm = TRUE)
收件人:
df_longer <- df %>%
pivot_longer(
cols = starts_with("hf"),
names_to = "hf_com",
values_to = "hf_freq",
names_prefix = "hf",
values_drop_na = TRUE)