此问题是早期问题(Filter values from list in R)的扩展。我有一个类似于下面列出的长列表。与其他所有名称相比,列表中的名称“ issues.fields.customfield_10400 ”之一重复次数较少。检查此“名称”的存在/不存在是我正在尝试处理的任务之一。 NULL值非常好。
DF = structure(list(name = structure(c(7L, 3L, 1L, 6L, 4L, 2L, 5L,
7L, 3L, 1L, 6L, 4L, 2L, 5L, 7L, 3L, 1L, 6L, 4L, 5L, 7L, 3L, 1L,
6L, 4L, 5L), .Label = c("issues.fields.created", "issues.fields.customfield_10400",
"issues.fields.issuetype.name", "issues.fields.status.name",
"issues.fields.summary", "issues.fields.updated", "issues.key"
), class = "factor"), value = structure(c(18L, 13L, 4L, 4L, 11L,
7L, 10L, 17L, 14L, 3L, 6L, 11L, 7L, 9L, 16L, 13L, 2L, 2L, 11L,
8L, 15L, 14L, 1L, 5L, 11L, 12L), .Label = c("2017-05-05T13:09:12.381-0700",
"2017-06-07T07:03:11.155-0700", "2017-07-26T11:15:03.074-0700",
"2017-08-01T09:00:44.956-0700", "2017-08-14T13:47:21.612-0700",
"2017-08-14T13:47:30.419-0700", "AA1234567", "Acquire replacement files from XYZ",
"Add measurement ", "Ingest changed file location ", "Open",
"Re-classify \"Generic Assays\" (n=24)", "Sub-task", "Task",
"TEST-1030", "TEST-1192", "TEST-1357", "TEST-1358"), class = "factor")), .Names = c("name",
"value"), row.names = c(NA, 26L), class = "data.frame")
name value
1 issues.key TEST-1358
2 issues.fields.issuetype.name Sub-task
3 issues.fields.created 2017-08-01T09:00:44.956-0700
4 issues.fields.updated 2017-08-01T09:00:44.956-0700
5 issues.fields.status.name Open
6 issues.fields.customfield_10400 AA1234567
7 issues.fields.summary Ingest changed file location
8 issues.key TEST-1357
9 issues.fields.issuetype.name Task
10 issues.fields.created 2017-07-26T11:15:03.074-0700
11 issues.fields.updated 2017-08-14T13:47:30.419-0700
12 issues.fields.status.name Open
13 issues.fields.customfield_10400 AA1234567
14 issues.fields.summary Add measurement
15 issues.key TEST-1192
16 issues.fields.issuetype.name Sub-task
17 issues.fields.created 2017-06-07T07:03:11.155-0700
18 issues.fields.updated 2017-06-07T07:03:11.155-0700
19 issues.fields.status.name Open
20 issues.fields.summary Acquire replacement files from XYZ
21 issues.key TEST-1030
22 issues.fields.issuetype.name Task
23 issues.fields.created 2017-05-05T13:09:12.381-0700
24 issues.fields.updated 2017-08-14T13:47:21.612-0700
25 issues.fields.status.name Open
26 issues.fields.summary Re-classify "Generic Assays" (n=24)
当我取消堆叠列表时,我收到以下错误消息。
Error in (function (..., row.names = NULL, check.rows = FALSE, check.names = TRUE, :
arguments imply differing number of rows:
有人可以建议如何处理这种情况吗?
我需要创建如下所示的数据帧。
res = structure(list(issues.fields.created = structure(c(4L, 3L, 2L,
1L), .Label = c("2017-05-05T13:09:12.381-0700", "2017-06-07T07:03:11.155-0700",
"2017-07-26T11:15:03.074-0700", "2017-08-01T09:00:44.956-0700"
), class = "factor"), issues.fields.issuetype.name = structure(c(1L,
2L, 1L, 2L), .Label = c("Sub-task", "Task"), class = "factor"),
issues.fields.status.name = structure(c(1L, 1L, 1L, 1L), .Label = "Open", class = "factor"),
issues.fields.customfield_10400 = structure(c(2L, 2L, 1L,
1L), .Label = c("", "AA1234567"), class = "factor"), issues.fields.summary = structure(c(3L,
2L, 1L, 4L), .Label = c("Acquire replacement files from XYZ",
"Add measurement ", "Ingest changed file location", "Re-classify \"Generic Assays\" (n=24)"
), class = "factor"), issues.fields.updated = structure(c(2L,
4L, 1L, 3L), .Label = c("2017-06-07T07:03:11.155-0700", "2017-08-01T09:00:44.956-0700",
"2017-08-14T13:47:21.612-0700", "2017-08-14T13:47:30.419-0700"
), class = "factor"), issues.key = structure(c(4L, 3L, 2L,
1L), .Label = c("TEST-1030", "TEST-1192", "TEST-1357", "TEST-1358"
), class = "factor")), .Names = c("issues.fields.created",
"issues.fields.issuetype.name", "issues.fields.status.name",
"issues.fields.customfield_10400", "issues.fields.summary", "issues.fields.updated",
"issues.key"), row.names = c(NA, 4L), class = "data.frame")
issues.fields.created issues.fields.issuetype.name issues.fields.status.name
1 2017-08-01T09:00:44.956-0700 Sub-task Open
2 2017-07-26T11:15:03.074-0700 Task Open
3 2017-06-07T07:03:11.155-0700 Sub-task Open
4 2017-05-05T13:09:12.381-0700 Task Open
issues.fields.customfield_10400 issues.fields.summary
1 AA1234567 Ingest changed file location
2 AA1234567 Add measurement
3 Acquire replacement files from XYZ
4 Re-classify "Generic Assays" (n=24)
issues.fields.updated issues.key
1 2017-08-01T09:00:44.956-0700 TEST-1358
2 2017-08-14T13:47:30.419-0700 TEST-1357
3 2017-06-07T07:03:11.155-0700 TEST-1192
4 2017-08-14T13:47:21.612-0700 TEST-1030
答案 0 :(得分:5)
使用标题中提到的unstack
功能:
us = unstack(DF, value ~ name)
data.frame(lapply(us, `length<-`, max(lengths(us))))
这给出了
issues.fields.created issues.fields.customfield_10400 issues.fields.issuetype.name issues.fields.status.name
1 2017-08-01T09:00:44.956-0700 AA1234567 Sub-task Open
2 2017-07-26T11:15:03.074-0700 AA1234567 Task Open
3 2017-06-07T07:03:11.155-0700 <NA> Sub-task Open
4 2017-05-05T13:09:12.381-0700 <NA> Task Open
issues.fields.summary issues.fields.updated issues.key
1 Ingest changed file location 2017-08-01T09:00:44.956-0700 TEST-1358
2 Add measurement 2017-08-14T13:47:30.419-0700 TEST-1357
3 Acquire replacement files from XYZ 2017-06-07T07:03:11.155-0700 TEST-1192
4 Re-classify "Generic Assays" (n=24) 2017-08-14T13:47:21.612-0700 TEST-1030
缺少的值用NA
填充 - R中的标准代码 - 而不是空格。
答案 1 :(得分:2)
#Split `DF` by `name` into a list. Keep only the second column for each subgroup
mylist = lapply(split(DF, DF$name), function(a) as.character(a[,2]))
#Obtain the length of the subgroup in the list with most elements
temp = max(lengths(mylist))
#Subset all groups to the `temp`. `sapply` will simplify into matrix
output = as.data.frame(sapply(mylist, function(a) a[1:temp]))
答案 2 :(得分:1)
这只是改变了长期的&#39;广泛的&#39;格式。使用dplyr
和tidyr
...
library(dplyr)
library(tidyr)
df2 <- df %>% mutate(case=cumsum(name=="issues.key")) %>%
spread(key=name, value=value) %>%
select(-case)
df2
issues.fields.created issues.fields.customfield_10400 issues.fields.issuetype.name issues.fields.status.name issues.fields.summary issues.fields.updated issues.key
1 2017-08-01T09:00:44.956-0700 AA1234567 Sub-task Open Ingest changed file location 2017-08-01T09:00:44.956-0700 TEST-1358
2 2017-07-26T11:15:03.074-0700 AA1234567 Task Open Add measurement 2017-08-14T13:47:30.419-0700 TEST-1357
3 2017-06-07T07:03:11.155-0700 <NA> Sub-task Open Acquire replacement files from XYZ 2017-06-07T07:03:11.155-0700 TEST-1192
4 2017-05-05T13:09:12.381-0700 <NA> Task Open Re-classify "Generic Assays" (n=24) 2017-08-14T13:47:21.612-0700 TEST-1030
答案 3 :(得分:1)
使用data.table
(或reshape2
&#39; s)dcast
功能,您可以执行以下操作:
# create ID variable
dat$id <- cumsum(grepl("TEST-", dat$value, fixed=TRUE))
现在,按名称重塑id
library(data.table) # or library(reshape2)
dcast(dat, id~name, value.var="value", fill=NA)
这将在下面返回所需的结果。
id issues.fields.created issues.fields.customfield_10400 issues.fields.issuetype.name
1 1 2017-08-01T09:00:44.956-0700 AA1234567 Sub-task
2 2 2017-07-26T11:15:03.074-0700 AA1234567 Task
3 3 2017-06-07T07:03:11.155-0700 <NA> Sub-task
4 4 2017-05-05T13:09:12.381-0700 <NA> Task
issues.fields.status.name issues.fields.summary issues.fields.updated issues.key
1 Open Ingest changed file location 2017-08-01T09:00:44.956-0700 TEST-1358
2 Open Add measurement 2017-08-14T13:47:30.419-0700 TEST-1357
3 Open Acquire replacement files from XYZ 2017-06-07T07:03:11.155-0700 TEST-1192
4 Open Re-classify "Generic Assays" (n=24) 2017-08-14T13:47:21.612-0700 TEST-1030