Question

我想知道如何根据分数将列值分成三个不同的范围。

以下是我的以下数据

Name    V1.1    V1.2    V2.1    V2.2    V3.1    V3.2
John    French  86      Math    78      English 56
Sam     Math    97      French  86      English 79
Viru    English 93      Math    44      French  34

如果我考虑三个范围。第一范围为0-60，第二范围为61-90，第三范围为91-100。我希望所有技能的主题名称。

预期结果将是

Name   Level1    Level2      Level3
 John  English   Math,French  Null
 Sam   Null      French,Eng   Math
 Viru Math,Fren  Null        English

Answer 1

首先你需要将数据转换为长形式，每次观察一行（观察是一个单独的分数。你需要融化，但它很复杂，因为你的广泛形式不仅包括观察而且包括观察类一种方法是使用melt.data.table两次，但您可能更熟悉dplyr，它具有更易于使用的语法。

# first you need to convert to long form

library("data.table")
setDT(df)

lhs <- melt.data.table(df, id = "Name",  measure = patterns("\\.2"), 
                       variable.name = "obs", value.name = "score")
lhs[, obs := gsub("(V\\d+)\\.\\d+","\\1",obs)]
lhs
rhs <- melt.data.table(df, id = "Name", measure = patterns("V\\d\\.1"), 
                       variable.name = "obs", value.name = "subject")

rhs[, obs := gsub("(V\\d+)\\.\\d+","\\1",obs)]
rhs

df2 <- merge(lhs, rhs, by = c("Name","obs"))

#    Name obs score1 subject1
# 1: John  V1     86   French
# 2: John  V2     78     Math
# 3: John  V3     56  English
# 4:  Sam  V1     97     Math
# 5:  Sam  V2     86   French
# 6:  Sam  V3     79  English
# 7: Viru  V1     93  English
# 8: Viru  V2     44     Math
# 9: Viru  V3     34   French

然后你需要使用cut或其他一些功能来创建基于score1的三个级别。

然后，您应按这些级别进行分组，并将连接应用于主题，例如paste(..., collapse = ",")。

然后您需要使用cast或spread将其恢复为宽屏。

请付出一些努力，并根据您的尝试编辑您的问题，并尝试提出更具体的问题，而不仅仅是“请为我做这件事”。

Answer 2

使用splitstackshape和嵌套ifelse

的另一个选项

library(splitstackshape)
library(tidyr)    

# prepare data to convert in long format
data$subjects = do.call(paste, c(data[,grep("\\.1", colnames(data))], sep = ','))
data$marks    = do.call(paste, c(data[,grep("\\.2", colnames(data))], sep = ','))
data = data[,-grep("V", colnames(data))]

# use cSplit to convert wide to long
out = cSplit(setDT(data), sep = ",", c("subjects", "marks"), "long")

# nested ifelse to assign level based on the score range
out[, level := ifelse(marks <= 60, "level1", 
               ifelse(marks <= 90, "level2", "level3"))]

req = out[, toString(subjects), by= c("Name","level")]

这将给出

#> req
#   Name  level              V1
#1: John level2    French, Math
#2: John level1         English
#3:  Sam level3            Math
#4:  Sam level2 French, English
#5: Viru level3         English
#6: Viru level1    Math, French

您可以使用dcast中的spread或tidyr重新整形

spread(req, level, V1)

#   Name       level1          level2  level3
#1: John      English    French, Math      NA
#2:  Sam           NA French, English    Math
#3: Viru Math, French              NA English

数据

data = structure(list(Name = structure(1:3, .Label = c("John", "Sam", "Viru"), class = "factor"), V1.1 = structure(c(2L, 3L, 1L), .Label = c("English", "French", "Math"), class = "factor"), V1.2 = c(86L, 97L, 93L), V2.1 = structure(c(2L, 1L, 2L), .Label = c("French", "Math" ), class = "factor"), V2.2 = c(78L, 86L, 44L), V3.1 = structure(c(1L, 1L, 2L), .Label = c("English", "French"), class = "factor"), V3.2 = c(56L, 79L, 34L)), .Names = c("Name", "V1.1", "V1.2", "V2.1", "V2.2", "V3.1", "V3.2"), class = "data.frame", row.names = c(NA, -3L))

Answer 3

不是非常直观，但会导致请求的输出。需要包 sjmisc ！

library(sjmisc)
mydat <- data.frame(Name = c("John", "Sam", "Viru"),
                    V1.1 = c("French", "Math", "English"),
                    V1.2 = c(86, 97, 93),
                    V2.1 = c("Math", "French", "Math"),
                    V2.2 = c(78, 86, 44),
                    V3.1 = c("English", "English", "French"),
                    V3.2 = c(56, 79, 34))

# recode into groups
rec(mydat[, c(3,5,7)]) <- "min:60=1; 61:90=2; 91:max=3"

# convert to long format
newdf <- to_long(mydat, "no_use", 
    c("subject", "score"),
    c("V1.1", "V2.1", "V3.1"),
    c("V1.2", "V2.2", "V3.2")) %>% 
  select(-no_use) %>% 
  arrange(Name, score)

# at this point we are at a similar stage as described in the 
# other answers, so we have our data in a long format 

newdf

fdf <- list()

# iterate all unique names
for (i in unique(newdf$Name)) {
  dummy <- c()
  # iterare all three scores
  for (s in 1:3) {
    # find subjects related to the score
    dat <- newdf$subject[newdf$Name == i & newdf$score == s]  
    if (length(dat) == 0) dat <- ""
    dat <- paste0(dat, collapse = ",")
    dummy <- c(dummy, dat)
  }
  # add character vector with sorted subjects to list
  fdf[[length(fdf) + 1]] <- dummy
}
# list to data frame
finaldf <- as.data.frame(t(as.data.frame(fdf)))
finaldf <- cbind(unique(newdf$Name), finaldf)
# proper row/col names
colnames(finaldf) <- c("Names", "Level1", "Level2", "Level3")
rownames(finaldf) <- 1:nrow(finaldf)

finaldf

如何将列值分成范围

3 个答案: