我目前正在尝试使用ggplot2来显示简单电流 - 电压实验的结果。当然,我设法为一组数据取得了良好的效果。
但是,我有许多电流电压数据集,我在R中递归输入以获得以下组织(参见最小代码):
data.frame(cbind(batch(string list), sample(string list), dataset(data.frame list)))
编辑:我的数据存储在文本文件名称batchname_samplenumber.txt中,包含电压和当前列。我用来导入它们的代码是:
require(plyr)
require(ggplot2)
#VARIABLES
regex <- "([[:alnum:]_]+).([[:alpha:]]+)"
regex2 <- "G5_([[:alnum:]]+)_([[:alnum:]]+).([[:alpha:]]+)"
#FUNCTIONS
getJ <- function(list, k) llply(list, function(i) llply(i, function(i, indix) getElement(i,indix), indix = k))
#FILES
files <- list.files("Data/",full.names= T)
#NAMES FOR FILES
paths <- llply(llply(files, basename),function(i) regmatches(i,regexec(regex,i)))
paths2 <- llply(llply(files, basename),function(i) regmatches(i,regexec(regex2,i)))
names <- llply(llply(getJ(paths, 2)),unlist)
batches <- llply(llply(getJ(paths2, 2)),unlist)
samples <- llply(llply(getJ(paths2, 3)),unlist)
#SETS OF DATA, NAMED
sets <- llply(files,function(i) read.table(i,skip = 0, header = F))
names(sets) <- names
for (i in as.list(names)) names(sets[[i]]) <- c("voltage","current")
df<-data.frame(cbind(batches,samples,sets))
可以通过以下方式生成最小数据:
require(plyr)
batch <- list("A","A","B","B")
sample <- list(1,2,1,2)
set <- list(data.frame(voltage = runif(10), current = runif(10)),data.frame(voltage = runif(10), current = runif(10)),data.frame(voltage = runif(10), current = runif(10)),data.frame(voltage = runif(10), current = runif(10)))
df<-data.frame(cbind(batch,sample,set))
我的问题是:是否可以按原样使用数据来使用类似于以下的代码进行绘图(这不起作用)?
ggplot(data, aes(x = dataset$current, y = dataset$voltage, colour = sample)) + facet_wrap(~batch)
更一般的版本是:ggplot2能够处理原始物理数据,而不是离散的统计数据(如钻石,汽车)吗?
答案 0 :(得分:1)
目前尚不清楚如何根据数据集定义样本名称。 ggplot2的一般思路是,您应该以融合(长格式)data.frame的形式对所有数据进行分组。
library(ggplot2)
library(plyr)
library(reshape2)
l1 <- list(batch="b1", sample=paste("s", 1:4, sep=""),
dataset=data.frame(current=rnorm(10*4), voltage=rnorm(10*4)))
l2 <- list(batch="b2", sample=paste("s", 1:4, sep=""),
dataset=data.frame(current=rnorm(10*4), voltage=rnorm(10*4)))
l3 <- list(batch="b3", sample=paste("s", 1:4, sep=""),
dataset=data.frame(current=rnorm(10*4), voltage=rnorm(10*4)))
list_to_df <- function(l, n=10){
m <- l[["dataset"]]
m$batch <- l[["batch"]]
m$sample <- rep(l[["sample"]], each=n)
m
}
## list_to_df(l1)
m <- ldply(list(l1, l2, l3), list_to_df)
ggplot(m) + facet_wrap(~batch)+
geom_path(aes(current, voltage, colour=sample))
答案 1 :(得分:1)
使用新定义的问题(名为“batchname_samplenumber.txt”的两列文件),我建议采用以下策略:
read_custom <- function(f, ...) {
d <- read.table(f, ...)
names(d) <- c("V", "I")
## extract sample and batch from the base filename
ids <- strsplit(gsub(".txt", "", f), "_")
d$batch <- ids[[1]][1]
d$sample <- ids[[1]][2]
d
}
## list files to read
files <- list.files(pattern=".txt")
## read them all in a single data.frame
m <- ldply(files, read_custom)