无法使用csv文件中的r解析数据

时间:2014-10-31 00:10:05

标签: r

我需要从csv文件中检索数据并对其进行图形化。数据如下所示:

S096-A91    11525   11492   11526   11514   11570   11563   11570   11562   11575   11594   11586   11592   11597   11596   11603   11608   11592   11605   11607   11599   11610   11601   11598   11610   11602   11611   11607   11608   11621   11614   11609   11616   11602   11610   11623



   Raw Data for Probe FAM-MGB                                                                                                                                           
    Chamber ID  1   2   3   4   5   6   7   8   9   10  11  12  13  14  15  16  17  18  19  20  21  22  23  24  25  26  27  28  29  30  31  32  33  34  35
    S235-A01    4210    4260    4273    4274    4290    4310    4324    4331    4357    4367    4371    4381    4396    4393    4409    4416    4424    4430    4443    4457    4490    4538    4604    4669    4719    4779    4812    4853    4908    4938    4986    5019    5039    5073    5110
    S235-A02    4166    4224    4250    4253    4252    4275    4266    4272    4290    4294    4295    4300    4300    4301    4308    4310    4309    4320    4325    4346    4368    4435    4545    4688    4887    5107    5329    5541    5754    5944    6123    6282    6418    6541    6651

我需要来自(探测器FAM-MGB的原始数据)的数据,并且ID会继续保持大约40k以上的数据。我不能包括最左边的列S235-A01,因为我必须绘制点图。到目前为止,我几乎所有的工作都在工作,下面是代码:

#------DATA---------------------------
#|Raw Data for Probe FAM-MGB         |
#|Raw Data for Passive Reference ROX |
#|Bkgd Data for Passive Reference ROX|
#|Bkgd Data for Probe FAM-MGB        |
#-------------------------------------

#---- BEGIN file operations ------
getData <- function(file,name) {
  #lines <- readLines("Baseline Poly Raw.csv") #get each full row
  for(i in 1:length(file)) {
    lines <- readLines(file[i])
    indx <- grep(name,  lines) #row number for anything with search term in it

    counter <- 3

    dat <- read.table(text=lines[(indx+2)],
                      sep="", header=FALSE, stringsAsFactors=FALSE, check.names=FALSE)

    return(dat) #only getting 1 row for now for testing
    while(dat != " ") {#get all the data up to the whitespace
      print(dat)
      dat <- read.table(text=lines[(indx+counter)],  #read only one line per loop
                        sep=",", header=FALSE, stringsAsFactors=FALSE, check.names=FALSE)
      counter <- counter + 1
    }
  }
}  

readFiles <- function() { #gets all the files in directory ending in .csv, returns them as temp[x]
  temp = list.files(pattern="*.csv")
  #for (i in 1:length(temp)) print(temp[i])
  return(temp)
}

onStart <- function() { #function to get everything moving
  temp <- readFiles()   #gets all the files in directory ending in .csv, returns them as temp[x]
  getData(temp,"Raw Data for Probe FAM-MGB") #gets all data for this section form all the .csv files in directory
}

#------END file operations


#------BEGIN Plotting functions

quickPlot <- function(xs,ys) { #quickly plot any points with a regression line
  plot(xs,ys, xlab="x", ylab="y")
  abline(lm(ys~xs))
  title("Test graph")
}

polyFit <- function(xs,ys,degree) { #polynomial fitting a data set degree 3
  fit3 <- lm(ys~poly(xs,degree,raw=TRUE))
  xx <- seq(0,160, length=50)
  plot(xs,ys,pch='@')
  lines(xx, predict(fit3, data.frame(xs=xx)), col="green")
}

logFit <- function(xs,ys) { #graph the data set with log(x), y
  logEstimate = lm(ys ~ log(xs))
  plot(xs,ys,pch='@')
  lines(xs,predict(logEstimate),col='green')
}

eFit <- function(xs,ys) {
  expEstimate = lm(log(ys) ~ xs)
  plot(xs, ys,pch='@')
  lines(xs, exp(predict(expEstimate)),col='green')
}

sigmoidalFit <- function(xs,ys) {

}
#------END plotting functions

getNormalization <- function(data) {

}

askYesNo <- function() {
  n <- readline("Would you like to save the image? (Y/N):")
  if(n == 'Y' | n == 'y' | n == 'yes' | n == 'Yes') {
    print("Image saved")
    return(1)
  } else {
    print("Image not saved")
    return(0)
  }
}

# x <- c(32,64,96,118,126,144,152.5,158)
# y <- c(99.5,104.8,108.5,100,86,64,35.3,15)
# 
# polyFit(x,y,3)

x <- onStart() #only get 1 data row for now, just for testing

print(x)

print(x)yield:

V1
1 S235-A01,4210,4260,4273,4274,4290,4310,4324,4331,4357,4367,4371,4381,4396,4393,4409,4416,4424,4430,4443,4457,4490,4538,4604,4669,4719,4779,4812,4853,4908,4938,4986,5019,5039,5073,5110,

我无法绘制,因为它包含S235-A01。我不知道如何摆脱它。任何帮助都会被贬低,谢谢。 -SAM

0 个答案:

没有答案