使用R将逗号分隔数据转换为矩阵

时间:2016-06-20 15:19:03

标签: r

我的数据:

A/11:36/0,A/11:36/1,A/11:36/2,A/23:01/0,A/23:01/1,A/23:01/2,B/15:07/0,B/15:07/1,B/15:07/2
1,26,2,1,10,2,1,0,0

输出期望:

Name 0 1 2
A/11:36 1 26 2
A/23:01 1 10 2
B/15:07 1 0 0 

My Code

library(reshape)
library(library(splitstackshape))
input <- read.csv("D:/input.csv")
t_input <- t(input)
colnames(t_input)<- c("Name","Val")
data<-cSplit(t_input, 'V1', sep="/", type.convert=FALSE)
# here am going wrong, My script splitting the column1 into 3 columns. 
final_data <- cast(data, X1~X2)

我需要帮助将我的第1列分成两部分,如下所示:

A/11:36 0
A/11:36 1
A/11:36 2
A/23:01 0
A/23:01 1
A/23:01 2
B/15:07 0
B/15:07 1
B/15:07 2

有人可以帮我解决这个问题吗?

2 个答案:

答案 0 :(得分:0)

这是一个tidyr解决方案:

# read the sample data
data <- read.csv("input.csv", header = F)
tdata <- t(data)
colnames(tdata) <- c("name", "value")
df <- data.frame(tdata)

library(tidyr)

new_df <- df %>% 
    # extract the variables stored in 'name' to their own columns
    separate(name, c("group", "time", "x"), "/") %>%
    # transform to wide format
    spread(x, value, sep = "")

# final result
new_df
#       group  time x0 x1 x2
#     1     A 11:36  1 26  2
#     2     A 23:01  1 10  2
#     3     B 15:07  1  0  0

# if, for some reason, you really want the group and time columns together
new_df %>% unite(name, group, time, sep = "/")
#          name x0 x1 x2
#     1 A/11:36  1 26  2
#     2 A/23:01  1 10  2
#     3 B/15:07  1  0  0

# or if you want them together and skip the unite step, you can separate directly
# by splitting at a / that is not followed by another / anywhere in the string
df %>%
    separate(name, c("name", "x"), "/(?!.*/)") %>%
    spread(x, value, sep = "")
#          name x0 x1 x2
#     1 A/11:36  1 26  2
#     2 A/23:01  1 10  2
#     3 B/15:07  1  0  0

答案 1 :(得分:0)

    # read the sample data
    input <- read.csv("input.csv", header=FALSE)
    t_input <- t(input)
    colnames(t_input) <- c("name", "value")
    df <- data.frame(t_input)

    library(splitstackshape)

    new_df <- cSplit(t_input, 'name', sep="/", type.convert=FALSE)
    df1 <- reshape(new_df, timevar=c("name_3"), idvar = c("name_1",'name_2'), dir="wide")
    df2 <- within(df1, Name <- paste(name_1, name_2, sep='/'))
    df2[,c("name_1","name_2"):=NULL]
    Finaldf <- subset(df2, select=c(Name,value.0:value.2))
    write.csv(Finaldf, "output.csv", row.names = FALSE)

输出

Name value.0 value.1 value.2
A/11:36       1      26       2
A/23:01       1      10       2
B/15:07       1       0       0