如何处理这个数据过程?

时间:2018-10-15 10:52:27

标签: r

enter image description here

这是示例数据“ ex”和“ data”。

我想在右侧创建一个新的数据集。

所以,我想在“ ex”和“ data”之间匹配相同的行名,

此外,我想匹配“ ex”中的相同向量和“ data”的行名

解释起来太复杂了。

因此,我随便附上了图片。

这是我的代码,如下所示。不幸的是,我在创建新数据集时遇到麻烦。

我应该修改我的代码吗?

谢谢。

ex <- data.frame(matrix(c(5, 12, 14, 20,
                          4, 19, 17, 9,
                          11, 15, 8, 10), ncol=4))

data <- data.frame(matrix(c("A","B","C","D","E","F","G",
                            "H", "I", "J", "K", "L", "M", "N",
                            "O", "P", "Q", "R", "S", "T",
                            "A","B","C","D","E","F","G",
                            "H", "I", "J", "K", "L", "M", "N",
                            "O", "P", "Q", "R", "S", "T"), ncol=2))

##something problem this code
for (i in (1:nrow(ex)))
{
  if (row.names(data)[i]==row.names(ex)[i])
  {
    data$group[i] <- i
  }
  else if (row.names(data)==as.character(ex[i,1])) {
    data$group[i] <- i
  }

  else if (row.names(data)==as.character(ex[i,2])) {
    data$group[i] <- i
  } 

  else if (row.names(data)==as.character(ex[i,3])) {
    data$group[i] <- i
  }

  else if (row.names(data)==as.character(ex[i,4])) {
    data$group[i] <- i
  }

}

2 个答案:

答案 0 :(得分:1)

这是一个tidyverse解决方案,将为您提供帮助:

library(tidyverse)

# update ex dataset
ex_upd = ex %>%
  rownames_to_column("Group") %>% # add row names as a column
  gather(x, row_id, -Group) %>%   # reshape dataset
  select(-x)                      # remove column x

# update data and join ex_upd
data %>%
  rownames_to_column("row_id") %>%         # add row names as a column
  mutate(row_id = as.numeric(row_id)) %>%  # update to numeric variable
  left_join(ex_upd, by="row_id") %>%       # join ex updated dataset
  column_to_rownames("row_id")             # create row names from that column

#    X1 X2 Group
# 1   A  A  <NA>
# 2   B  B  <NA>
# 3   C  C  <NA>
# 4   D  D     2
# 5   E  E     1
# 6   F  F  <NA>
# 7   G  G  <NA>
# 8   H  H     2
# 9   I  I     2
# 10  J  J     3
# 11  K  K     3
# 12  L  L     2
# 13  M  M  <NA>
# 14  N  N     3
# 15  O  O     1
# 16  P  P  <NA>
# 17  Q  Q     1
# 18  R  R  <NA>
# 19  S  S     3
# 20  T  T     1

注意:我仍然不清楚为什么在您期望的输出中,前两行具有Group值,而第三行却没有。

如果您像这样更新ex数据集:

ex_upd = ex %>%
  rownames_to_column("Group") %>% 
  mutate(id = as.numeric(Group)) %>%  # (new code added to previous one)
  gather(x, row_id, -Group) %>%     
  select(-x)

您会在前三行中添加一个Group

答案 1 :(得分:1)

基本的R解决方案可以是:

ex$X5 <- as.numeric(rownames(ex))
ex$Group <- ex$X5
data$Group <- numeric(nrow(data)) 

for(i in 1:nrow(ex)) {
  select_rows <- unlist(ex[i, 1:5])
  data$Group[select_rows] <- ex$Group[i]
}

data

#    X1 X2 Group
# 1   A  A     1
# 2   B  B     2
# 3   C  C     3
# 4   D  D     2
# 5   E  E     1
# 6   F  F     0
# 7   G  G     0
# 8   H  H     2
# 9   I  I     2
# 10  J  J     3
# 11  K  K     3
# 12  L  L     2
# 13  M  M     0
# 14  N  N     3
# 15  O  O     1
# 16  P  P     0
# 17  Q  Q     1
# 18  R  R     0
# 19  S  S     3
# 20  T  T     1