我有一个任务可以将带有“.chs”扩展名的文件转换为R中的csv。因为谷歌没有多大帮助。或者请告诉我,无论如何直接在R中调用“.chs”文件?
答案 0 :(得分:0)
假设这是一个ASCII bufkit集合模型(例如http://weather.uncc.edu/data/bufkit/ascii/2016021917_hrrr.prof.chs):
library(purrr)
URL <- "http://weather.uncc.edu/data/bufkit/ascii/2016021917_hrrr.prof.chs"
fil <- basename(URL)
if (!file.exists(fil)) download.file(URL, fil)
l <- readLines(fil)
starts <- which(grepl("^STATION", l))
ends <- which(grepl("^\\*\\*\\*", l))
ends <- ends[seq(2, length(ends), 2)]
map2_df(starts, ends, function(start, end) {
dat <- l[start:end]
dat_se <- which(grepl("^\\*\\*\\*", dat)) + c(2, -2)
tab <- dat[dat_se[1]:dat_se[2]]
df <- read.table(text=tab, header=TRUE, stringsAsFactors=FALSE)
scan(text=dat[1], what=character(), quiet=TRUE)[3:5] %>%
setNames(c("lat", "lon", "station")) %>%
as.list() -> st
df$lat <- st$lat
df$lon <- st$lon
df$station <- st$station
scan(text=dat[2], what=character(), quiet=TRUE) %>%
setNames(c("timestamp", "X1")) %>%
as.list() -> ts
sprintf("20%s-%s-%s %s:%s",
substr(ts$timestamp, 1, 2),
substr(ts$timestamp, 3, 4),
substr(ts$timestamp, 5, 6),
substr(ts$timestamp, 7, 8),
substr(ts$timestamp, 9, 10)) %>%
as.POSIXct() -> ts$timestamp
df$timestamp <- ts$timestamp
df$X1 <- ts$X1
scan(text=dat[4], what=character(), quiet=TRUE) %>%
tail(1) %>%
gsub("PROJECTION=", "", .) -> df$projection
stri_match_all_regex(dat[length(dat)-1], "([[:digit:]\\.]+)")[[1]][,2] %>%
as.numeric() %>%
setNames(c("prcp_hr", "prcp_tot", "sfc_pres")) %>%
as.list() -> prcp
df$prcp_hr <- prcp$prcp_hr
df$prcp_tot <- prcp$prcp_tot
df$sf_pres <- prcp$sfc_pres
setNames(df, tolower(colnames(df)))
}) %>% dplyr::glimpse()
## Observations: 800
## Variables: 17
## $ lyr <int> 50, 49, 48, 47, 46, 45, 44, 43, 42, 41, 40, 39, 3...
## $ temp <dbl> -56.3, -57.8, -59.3, -61.3, -62.9, -64.6, -67.7, ...
## $ depr <dbl> 30.3, 27.6, 25.1, 22.1, 99.0, 99.0, 99.0, 99.0, 9...
## $ kts <dbl> 7.2, 9.0, 7.6, 5.7, 6.0, 8.1, 15.4, 27.5, 36.9, 4...
## $ dir <dbl> 88, 62, 23, 329, 291, 303, 313, 305, 289, 293, 29...
## $ pres <dbl> 22, 27, 33, 38, 44, 51, 58, 65, 73, 82, 93, 106, ...
## $ rh <dbl> 1, 2, 3, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 20, 18,...
## $ omeg <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0...
## $ lat <chr> "32.90N", "32.90N", "32.90N", "32.90N", "32.90N",...
## $ lon <chr> "80.03W", "80.03W", "80.03W", "80.03W", "80.03W",...
## $ station <chr> "CHS", "CHS", "CHS", "CHS", "CHS", "CHS", "CHS", ...
## $ timestamp <dttm> 2016-02-19 17:00:00, 2016-02-19 17:00:00, 2016-0...
## $ x1 <chr> "722080", "722080", "722080", "722080", "722080",...
## $ projection <chr> "0", "0", "0", "0", "0", "0", "0", "0", "0", "0",...
## $ prcp_hr <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0...
## $ prcp_tot <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0...
## $ sf_pres <dbl> 1033, 1033, 1033, 1033, 1033, 1033, 1033, 1033, 1...
如果这是所述文件类型,则可以使用说明文件(如果这不是正确的数据,则不会在说明文件上花费周期)。