在Ch中将chs转换为csv

时间:2016-11-27 07:27:38

标签: r

我有一个任务可以将带有“.chs”扩展名的文件转换为R中的csv。因为谷歌没有多大帮助。或者请告诉我,无论如何直接在R中调用“.chs”文件?

1 个答案:

答案 0 :(得分:0)

假设这是一个ASCII bufkit集合模型(例如http://weather.uncc.edu/data/bufkit/ascii/2016021917_hrrr.prof.chs):

library(purrr)

URL <- "http://weather.uncc.edu/data/bufkit/ascii/2016021917_hrrr.prof.chs"
fil <- basename(URL)
if (!file.exists(fil)) download.file(URL, fil)

l <- readLines(fil)
starts <- which(grepl("^STATION", l))
ends <- which(grepl("^\\*\\*\\*", l))
ends <- ends[seq(2, length(ends), 2)]

map2_df(starts, ends, function(start, end) {

  dat <- l[start:end]

  dat_se <- which(grepl("^\\*\\*\\*", dat)) + c(2, -2)

  tab <- dat[dat_se[1]:dat_se[2]]

  df <- read.table(text=tab, header=TRUE, stringsAsFactors=FALSE)

  scan(text=dat[1], what=character(), quiet=TRUE)[3:5] %>%
    setNames(c("lat", "lon", "station")) %>%
    as.list() -> st

  df$lat <- st$lat
  df$lon <- st$lon
  df$station <- st$station

  scan(text=dat[2], what=character(), quiet=TRUE) %>%
    setNames(c("timestamp", "X1")) %>%
    as.list() -> ts

  sprintf("20%s-%s-%s %s:%s",
          substr(ts$timestamp, 1, 2),
          substr(ts$timestamp, 3, 4),
          substr(ts$timestamp, 5, 6),
          substr(ts$timestamp, 7, 8),
          substr(ts$timestamp, 9, 10)) %>%
    as.POSIXct() -> ts$timestamp

  df$timestamp <- ts$timestamp
  df$X1 <- ts$X1

  scan(text=dat[4], what=character(), quiet=TRUE) %>% 
    tail(1) %>% 
    gsub("PROJECTION=", "", .) -> df$projection

  stri_match_all_regex(dat[length(dat)-1], "([[:digit:]\\.]+)")[[1]][,2] %>%
    as.numeric() %>%
    setNames(c("prcp_hr", "prcp_tot", "sfc_pres")) %>%
    as.list() -> prcp

  df$prcp_hr <- prcp$prcp_hr
  df$prcp_tot <- prcp$prcp_tot
  df$sf_pres <- prcp$sfc_pres

  setNames(df, tolower(colnames(df)))

}) %>% dplyr::glimpse()
## Observations: 800
## Variables: 17
## $ lyr        <int> 50, 49, 48, 47, 46, 45, 44, 43, 42, 41, 40, 39, 3...
## $ temp       <dbl> -56.3, -57.8, -59.3, -61.3, -62.9, -64.6, -67.7, ...
## $ depr       <dbl> 30.3, 27.6, 25.1, 22.1, 99.0, 99.0, 99.0, 99.0, 9...
## $ kts        <dbl> 7.2, 9.0, 7.6, 5.7, 6.0, 8.1, 15.4, 27.5, 36.9, 4...
## $ dir        <dbl> 88, 62, 23, 329, 291, 303, 313, 305, 289, 293, 29...
## $ pres       <dbl> 22, 27, 33, 38, 44, 51, 58, 65, 73, 82, 93, 106, ...
## $ rh         <dbl> 1, 2, 3, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 20, 18,...
## $ omeg       <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0...
## $ lat        <chr> "32.90N", "32.90N", "32.90N", "32.90N", "32.90N",...
## $ lon        <chr> "80.03W", "80.03W", "80.03W", "80.03W", "80.03W",...
## $ station    <chr> "CHS", "CHS", "CHS", "CHS", "CHS", "CHS", "CHS", ...
## $ timestamp  <dttm> 2016-02-19 17:00:00, 2016-02-19 17:00:00, 2016-0...
## $ x1         <chr> "722080", "722080", "722080", "722080", "722080",...
## $ projection <chr> "0", "0", "0", "0", "0", "0", "0", "0", "0", "0",...
## $ prcp_hr    <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0...
## $ prcp_tot   <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0...
## $ sf_pres    <dbl> 1033, 1033, 1033, 1033, 1033, 1033, 1033, 1033, 1...

如果这是所述文件类型,则可以使用说明文件(如果这不是正确的数据,则不会在说明文件上花费周期)。