我想循环遍历所有可用(=已安装)软件包的数据集 找出这些数据集是否有6列或更多列。这是我的试用版:
dat.list <- data(package=.packages(all.available=TRUE))$results # list of all installed packages
colnames(dat.list) # "Package" "LibPath" "Item" (= name of data set) "Title" (= description)
idx <- c()
i <- 3
## for(i in nrow(dat.list)) {
nme <- dat.list[[i,"Item"]] # data set as string
data(list=nme, package=dat.list[[i,"Package"]]) # load the data
## => fails with warning: In data(list = nme, package = dat.list[[i, "Package"]]) :
## data set 'BJsales.lead (BJsales)' not found
dat <- eval(as.name(nme)) # assign the data to the variable dat
ncl <- ncol(dat)
if(!is.null(ncl) && ncl >= 6) idx <- c(idx, i)
## }
显然,这很明显
不起作用,所以我修改了一个索引(这里:3)以查看失败的原因。如何(如果不是通过上面nme
)我可以确定数据集的名称,以便将数据集存储在变量中,然后访问其列数?
更新 结合jeremycg和nico的帖子,我提出了这个问题(同样:在弄清楚数据集的名称时并不完美)但是它已经完成了:
dat.list <- data(package=.packages(all.available=TRUE))$results # list of all installed packages
idx <- c()
for (i in 1:nrow(dat.list))
{
require(dat.list[i, "Package"], character.only=TRUE)
raw.name <- dat.list[i, "Item"] # data set (and parenthetical suffix) as raw string
name <- gsub('\\s.*','', raw.name) # name of data set
dat <- tryCatch(get(name), error=function(e) e) # assign the data to the variable dat (if not erroneous)
if(is(dat, "simpleError")) {
warning("Element ",i," threw an error")
dat <- NA
}
ncl <- ncol(dat)
if(!is.null(ncl) && ncl >= 6)
idx <- c(idx, i)
}
dat.list[idx, c("Package", "Item")]
答案 0 :(得分:2)
我猜你需要加载包来访问数据。
所以你需要在循环的开头添加:
require(dat.list[[i, "Package"]], character.only = TRUE)
(请参阅this question了解您需要使用charachter.only
变量的原因)
请注意,您还需要更改循环:
for(i in nrow(dat.list))
到
for(i in 1:nrow(dat.list))
还有另一个问题:返回一些数据集,其名称也在括号中。例如:
wine.classes (wine)
所以我们需要剥离它们。使用:
轻松完成dat.list[,3] <- sapply(strsplit(dat.list[,3], " "), function(x){x[1]})
最后,dat.list
可以使用[]
进行访问,无需[[]]
(更易于阅读!)。
所以,最后:
# List of all installed packages
dat.list <- data(package=.packages(all.available=TRUE))$results
# Remove package name in parentheses
dat.list[,3] <- sapply(strsplit(dat.list[, "Item"], " "),
function(x){x[1]})
idx <- c()
for (i in 1:nrow(dat.list))
{
require(dat.list[i, "Package"], character.only = T)
nme <- dat.list[i,"Item"] # data set as string
data(list=nme, package=dat.list[i,"Package"]) # load the data
dat <- eval(as.name(nme)) # assign the data to the variable dat
ncl <- ncol(dat)
if(!is.null(ncl) && ncl >= 6)
idx <- c(idx, i)
}
和
> dat.list[idx, "Item"]
[1] "Seatbelts" "USJudgeRatings" "WorldPhones" "airquality"
[5] "anscombe" "attitude" "crimtab" "euro.cross"
[9] "infert" "longley" "mtcars" "occupationalStatus"
[13] "state.x77" "swiss" "volcano" "car.test.frame"
[17] "car90" "solder" "stagec" "bladder"
[21] "bladder1" "bladder2" "cancer" "cgd"
[25] "cgd0" "colon" "flchain" "heart"
[29] "jasa" "jasa1" "kidney" "lung"
[33] "mgus" "mgus1" "mgus2" "nwtco"
[37] "ovarian" "pbc" "pbcseq" "rats2"
[41] "transplant" "veteran" "soldat" "patch"
[45] "tooth"