我在182个文件夹中获得了182个用户的gps轨迹数据,文件夹名称为userid,例如'000','001','002'.....'181'。在每个文件夹中有另一个名为“Trajectories”的文件夹,其中包含许多.plt文件,每个文件指示该用户记录的一个轨迹。以下是.plt文件的示例:
lat,lon,default,altitude,num of days,date,time
39.921712,116.472343,0,13,39298.1462037037,2007-08-04,03:30:32
39.921705,116.472343,0,13,39298.1462152778,2007-08-04,03:30:33
39.921695,116.472345,0,13,39298.1462268519,2007-08-04,03:30:34
39.921683,116.472342,0,13,39298.1462384259,2007-08-04,03:30:35
39.921672,116.472342,0,13,39298.14625,2007-08-04,03:30:36
39.921583,116.472315,0,13,39298.1462731481,2007-08-04,03:30:38
39.921572,116.472315,0,13,39298.1462847222,2007-08-04,03:30:39
39.92156,116.47229,0,13,39298.1462962963,2007-08-04,03:30:40
39.921565,116.47229,0,13,39298.1463078704,2007-08-04,03:30:41
39.92157,116.472288,0,13,39298.1463194444,2007-08-04,03:30:42
39.921577,116.4723,0,13,39298.1463310185,2007-08-04,03:30:43
39.92158,116.472307,0,13,39298.1463425926,2007-08-04,03:30:44
39.921562,116.472303,0,13,39298.1463541667,2007-08-04,03:30:45
39.921545,116.472308,0,13,39298.1463657407,2007-08-04,03:30:46
39.921532,116.472313,0,13,39298.1463773148,2007-08-04,03:30:47
39.921505,116.472317,0,13,39298.146400463,2007-08-04,03:30:49
39.921492,116.472317,0,13,39298.146412037,2007-08-04,03:30:50
39.921485,116.472323,0,13,39298.1464351852,2007-08-04,03:30:52
39.921487,116.472323,0,13,39298.1464930556,2007-08-04,03:30:57
39.921487,116.472318,0,13,39298.1465046296,2007-08-04,03:30:58
现在我要做的是计算每个用户覆盖的平均距离,这意味着我需要计算用户文件夹中每个.plt文件的距离,然后除以plt文件的数量并得到平均距离
我是r的新手,我只是写了一些像这样的代码:
dir <- "Geolife Trajectories 1.3/Data"
subdirs <- list.files(dir)
numbersubdirs <- setdiff(subdirs)
df <- NULL
for(i in 1:length(numbersubdirs)) {
path <- paste0(dir,"/",numbersubdirs[i],"/Trajectory")
pltfiles <- list.files(path)
for(j in pltfiles) {
pltpath <- paste0(path,"/",j)
data <- read.table(pltpath, skip = 6, header = FALSE, sep =",",stringsAsFactors = FALSE)
我真的不知道如何完成这个循环......有人可以给我一些帮助吗?非常感谢!
答案 0 :(得分:1)
计算每个GPS坐标之间的距离
gps_distance <- function(lat, lon) {
len <- length(lat)
earth.radius <- 6.371 * 10^6 # m
# Convert degrees to radians
latrad <- lat * pi/180 # rad
lonrad <- lon * pi/180 # rad
dlat <- latrad[2:len] - latrad[1:(len-1)]
dlon <- lonrad[2:len] - lonrad[1:(len-1)]
a <- sin(dlat / 2)^2 + sin(dlon / 2)^2 * cos(latrad[1:(len-1)]) * cos(latrad[2:len])
c <- 2 * atan2(sqrt(a), sqrt(1-a))
d <- c(NA, c)
# Return in meters
d * earth.radius
}
dplyr
包将简化编码,尽管你可以得到类似的结果。您的示例不易重现,但以下内容应该有效。
library(dplyr)
data_path <- "Geolife Trajectories 1.3/Data"
users <- dir(data_path, full.names = T)
lapply(users, data_path = data_path, gps_distance = gps_distance,
# Applies this function for each user
function(user, data_path, gps_distance) {
# Find all .plt files in their 'Trajectory' directory
file.path(data_path, 'Trajectory') %>%
dir(pattern = '.*\\.plt', full.names = T) %>%
lapply(function(file) {
# then read in and return the total_distance in each file
read.csv(file, stringsAsFactors = F) %>%
mutate(distance = gps_distance(lat, lon)) %>%
summarize(total_distance = sum(distance, na.rm = T))
}) %>%
# then calculate the average distance that each user traveled
# with their trajectories
bind_rows() %>%
summarize(avg_distance = mean(total_distance, na.rm = T))
})