这是我的虚拟数据集:
dataset<-data.frame(a=c(1,2,3,4),b=c('a','b','c','d'), c=c("HI","DD","gg","ff"))
g=list(c("a","b"),c(2,3,4), c(44,33,11,22),c("chr","ID","i","II"))
dataset$l<-g
dataset
a b c l
1 1 a HI a, b
2 2 b DD 2, 3, 4
3 3 c gg 44, 33, 11, 22
4 4 d ff chr, ID, i, II
> mode(dataset$l)
[1] "list"
当我尝试将数据集写入文件时:
> write.table(dataset, "dataset.txt", quote=F, sep="\t")
Error in write.table(x, file, nrow(x), p, rnames, sep, eol, na, dec, as.integer(quote), :
unimplemented type 'list' in 'EncodeElement'
我该如何解决这个问题?
答案 0 :(得分:3)
保存输出无法读取。 dump或dput的输出是ASCII,对于理解R对象结构的人来说是可读的,但我猜你想要它更传统的安排。
> apply(dataset, 1, function(x) paste(x, sep=",", collapse=","))
[1] "1,a,HI,c(\"a\", \"b\")"
[2] "2,b,DD,c(2, 3, 4)"
[3] "3,c,gg,c(44, 33, 11, 22)"
[4] "4,d,ff,c(\"chr\", \"ID\", \"i\", \"II\")"
反斜杠不会出现在文本文件输出中:
writeLines(con="test.txt", apply(dataset, 1, function(x) paste(x, sep=",", collapse=",")))
#-------output-----
1,a,HI,c("a", "b")
2,b,DD,c(2, 3, 4)
3,c,gg,c(44, 33, 11, 22)
4,d,ff,c("chr", "ID", "i", "II")
答案 1 :(得分:2)
我可以考虑一些选择,具体取决于你想要实现的目标。
如果仅用于展示,那么您可能只需要capture.output()
或sink()
;这些都不是非常方便回读R:
capture.output(dataset, file="myfile.txt")
### Result is a text file that looks like this:
# a b c l
# 1 1 a HI a, b
# 2 2 b DD 2, 3, 4
# 3 3 c gg 44, 33, 11, 22
# 4 4 d ff chr, ID, i, II
sink("myfile.txt")
dataset
sink()
## Same result as `capture.output()` approach
如果您希望能够将结果表读回R(虽然不保留列“l
”是列表的事实),您可以采用类似于@DWin建议的方法。 / p>
在下面的代码中,dataset2[sapply...
行标识哪些变量是列表并将它们连接成一个字符串。因此,它们成为简单的字符变量,允许您使用write.table()
。
dataset2 <- dataset # make a copy just to be on the safe side
dataset2[sapply(dataset2, is.list)] <- apply(dataset2[sapply(dataset2, is.list)],
1, function(x)
paste(unlist(x),
sep=", ", collapse=", "))
str(dataset2)
# 'data.frame': 4 obs. of 4 variables:
# $ a: num 1 2 3 4
# $ b: Factor w/ 4 levels "a","b","c","d": 1 2 3 4
# $ c: Factor w/ 4 levels "DD","ff","gg",..: 4 1 3 2
# $ l: chr "a, b" "2, 3, 4" "44, 33, 11, 22" "chr, ID, i, II"
write.table(dataset2, "myfile.txt", quote=FALSE, sep="\t")
# can be read back in with: dataset3 <- read.delim("myfile.txt")
答案 2 :(得分:2)
如果其中一个要求是保留excel的格式等,这可能会有所帮助:
writableTable <- tableFlatten(dataset, filler="")
# a b c l.01 l.02 l.03 l.04
# 1 a HI a b
# 2 b DD 2 3 4
# 3 c gg 44 33 11 22
# 4 d ff chr ID i II
write.csv(writableTable, "myFile.csv")
tableFlatten
使用函数listFlatten
,顾名思义,它使用嵌套列表并展平它们。
但是,如果列表中的元素大小不同,则会添加填充符(可以是NA
s,空格或任何其他用户定义的选项)
它的代码如下。
tableFlatten <- function(tableWithLists, filler="") {
# takes as input a table with lists and returns a flat table
# empty spots in lists are filled with value of `filler`
#
# depends on: listFlatten(.), findGroupRanges(.), fw0(.)
# index which columns are lists
listCols <- sapply(tableWithLists, is.list)
tableWithLists[listCols]
tableWithLists[!listCols]
# flatten lists into table
flattened <- sapply(tableWithLists[listCols], listFlatten, filler=filler, simplify=FALSE)
# fix names
for (i in 1:length(flattened)) colnames(flattened[[i]]) <- fw0(ncol(flattened[[i]]), 2)
# REASSEMBLE, IN ORDER
# find pivot point counts
pivots <- sapply(findGroupRanges(listCols), length)
#index markers
indNonList <- indList <- 1
# nonListGrp <- (0:(length(pivots)/2)) * 2 + 1
# ListGrp <- (1:(length(pivots)/2)) * 2
final <- data.frame(row.names=row.names(tableWithLists))
for (i in 1:length(pivots)) {
if(i %% 2 == 1) {
final <- cbind(final,
tableWithLists[!listCols][indNonList:((indNonList<-indNonList+pivots[[i]])-1)]
)
} else {
final <- cbind(final,
flattened[indList:((indList<-indList+pivots[[i]])-1)]
)
}
}
return(final)
}
#=====================================
listFlatten <- function(obj, filler=NA) {
## Flattens obj like rbind, but if elements are of different length, plugs in value filler
# Initialize Vars
bind <- FALSE
# IF ALL ELEMENTS ARE MATRIX-LIKE OR VECTORS, MAKE SURE SAME NUMBER OF COLUMNS
matLike <- sapply(obj, function(x) !is.null(dim(x)))
vecLike <- sapply(obj, is.vector)
# If all matrix-like.
if (all(matLike)) {
maxLng <- max(sapply(obj[matLike], ncol))
obj[matLike] <- lapply(obj[matLike], function(x) t(apply(x, 1, c, rep(filler, maxLng - ncol(x)))))
bind <- TRUE
# If all vector-like
} else if (all(vecLike)) {
maxLng <- max(sapply(obj[vecLike], length))
obj[vecLike] <- lapply(obj[vecLike], function(x) c(x, rep(filler, maxLng - length(x))))
bind <- TRUE
# If all are either matrix- or vector-like
} else if (all(matLike & vecLike)) {
maxLng <- max(sapply(obj[matLike], ncol), sapply(obj[vecLike], length))
# Add in filler's as needed
obj[matLike] <-
lapply(obj[matLike], function(x) t(apply(x, 1, c, rep(filler, maxLng - ncol(x)))))
obj[vecLike] <-
lapply(obj[vecLike], function(x) c(x, rep(filler, maxLng - length(x))))
bind <- TRUE
}
# If processed and ready to be returned, then just clean it up
if(bind) {
ret <- (do.call(rbind, obj))
colnames(ret) <- paste0("L", fw0(1:ncol(ret), digs=2))
return(ret)
}
# Otherwise, if obj is sitll a list, continue recursively
if (is.list(obj)) {
return(lapply(obj, listFlatten))
}
# If none of the above, return an error.
stop("Unknown object type")
}
#--------------------------------------------
findGroupRanges <- function(booleanVec) {
# returns list of indexes indicating a series of identical values
pivots <- which(sapply(2:length(booleanVec), function(i) booleanVec[[i]] != booleanVec[[i-1]]))
### THIS ISNT NEEDED...
# if (identical(pivots, numeric(0)))
# pivots <- length(booleanVec)
pivots <- c(0, pivots, length(booleanVec))
lapply(seq(2, length(pivots)), function(i)
seq(pivots[i-1]+1, pivots[i])
)
}
#--------------------------------------------
fw0 <- function(num, digs=NULL, mkSeq=TRUE) {
## formats digits with leading 0's.
## num should be an integer or range of integers.
## if mkSeq=T, then an num of length 1 will be expanded to seq(1, num).
# TODO 1: put more error check
if (is.list(num))
lapply(num, fw0)
if (!is.vector(num)) {
stop("num should be integer or vector")
}
# convert strings to numbers
num <- as.numeric(num)
# If num is a single number and mkSeq is T, expand to seq(1, num)
if(mkSeq && !length(num)>1)
num <- (1:num)
# number of digits is that of largest number or digs, whichever is max
digs <- max(nchar(max(abs(num))), digs)
# if there are a mix of neg & pos numbers, add a space for pos numbs
posSpace <- ifelse(sign(max(num)) != sign(min(num)), " ", "")
# return: paste appropriate 0's and preface neg/pos mark
sapply(num, function(x) ifelse(x<0,
paste0("-", paste0(rep(0, max(0, digs-nchar(abs(x)))), collapse=""), abs(x)),
paste0(posSpace, paste0(rep(0, max(0, digs-nchar(abs(x)))), collapse=""), x)
))
}
#-----------------------------------------------
答案 3 :(得分:1)
您可以使用dput。
dput(dataset, "dataset.txt")
答案 4 :(得分:1)
您也可以使用save()
save(dataset, file="dataset.RData")
答案 5 :(得分:0)
@Ananda提供的answer非常好,但是当我有一个包含两列列表的数据框时,我遇到了一个问题。
dataset<-data.frame(a=c(1,2,3,4),b=c('a','b','c','d'), c=c("HI","DD","gg","ff"))
g=list(c("a","b"),c(2,3,4), c(44,33,11,22),c("chr","ID","i","II"))
dataset$l<-g
dataset$l2<-g
dataset
a b c l l2
1 1 a HI a, b a, b
2 2 b DD 2, 3, 4 2, 3, 4
3 3 c gg 44, 33, 11, 22 44, 33, 11, 22
4 4 d ff chr, ID, i, II chr, ID, i, II
使用原始答案,两个列表列都包含两列的连接内容。
a b c l l2
1 1 a HI a, b, a, b a, b, a, b
2 2 b DD 2, 3, 4, 2, 3, 4 2, 3, 4, 2, 3, 4
3 3 c gg 44, 33, 11, 22, 44, 33, 11, 22 44, 33, 11, 22, 44, 33, 11, 22
4 4 d ff chr, ID, i, II, chr, ID, i, II chr, ID, i, II, chr, ID, i, II
相反,请尝试此修改版本:
dataset2 <- dataset # make a copy just to be on the safe side
dataset2[sapply(dataset2, is.list)] <-
sapply(dataset2[sapply(dataset2, is.list)],
function(x)sapply(x, function(y) paste(unlist(y),collapse=", ") ) )
dataset2
a b c l l2
1 1 a HI a, b a, b
2 2 b DD 2, 3, 4 2, 3, 4
3 3 c gg 44, 33, 11, 22 44, 33, 11, 22
4 4 d ff chr, ID, i, II chr, ID, i, II
答案 6 :(得分:-1)
我偶然发现了这一点,虽然有很多很棒的答案,但最后还是做了别的事情。为后代分享。
library(dplyr)
flatten_list = function(x){
if (typeof(x) != "list") {
return(x)
}
sapply(x, function(y) paste(y, collapse = " | "))
}
data %>%
mutate_each(funs(flatten_list)) ->
write_csv("data.csv")