在数据帧中按行程计算距离

时间:2017-06-07 08:35:18

标签: r location distance latitude-longitude distribution

我有一个由福克兰群岛的企鹅制造的46次旅行的数据框,我计算了一些参数,假设每次觅食行程是企鹅离海岸距离超过1000米,并且当回到< 1000米penguin 1 data in .xlsx .Rda al dataframe stored

library(sp)
>p.1 

 date               | lon          | lat          | trip | distancetocoast
2014-06-11 19:02:00 | -58,3508585  | -51,88438373 | 1    | 2236.067977
2014-06-12 01:02:00 | -58,35589725 | -51,88349529 | 1    | 1000
2014-06-12 13:02:00 | -58,27224941 | -51,91903677 | 1    | 7211,102551
2014-06-12 19:02:00 | -58,27974654 | -51,90535003 | 1    | 5830,951895
2014-06-13 01:02:00 | -58,32331901 | -51,89410464 | 1    | 3605,551275
2014-06-13 07:02:00 | -58,35833139 | -51,88809227 | 1    | 1414,213562
2014-06-13 13:02:00 | -58,35617673 | -51,88156281 | 1    | 1000
2014-06-13 19:02:00 | -58,34055711 | -51,89002367 | 1    | 2236,067977
2014-06-14 01:02:00 | -58,34982536 | -51,8715761  | 2    | 1000
2014-06-14 13:02:00 | -58,3073814  | -51,92722937 | 2    | 7071,067812
2014-06-14 19:02:00 | -58,34581314 | -51,86761133 | 3    | 1000
2014-06-15 01:02:00 | -58,34050624 | -51,88382088 | 3    | 1414,213562
2014-06-15 13:02:00 | -58,2974691  | -51,91795326 | 3    | 6324,55532
2014-06-15 19:02:00 | -58,19881901 | -51,95172233 | 3    | 13000
2014-06-16 01:02:00 | -58,1348416  | -51,98673766 | 3    | 18788,29423
2014-06-16 07:02:00 | -57,99399544 | -52,06988191 | 3    | 28861,73938
2014-06-16 13:02:00 | -58,00469754 | -52,02795069 | 3    | 26627,05391
2014-06-16 19:02:00 | -57,92758675 | -52,02184666 | 3    | 29000
2014-06-17 01:02:00 | -57,91658235 | -51,99748699 | 3    | 28284,27125
2014-06-17 07:02:00 | -57,77015528 | -51,99031797 | 3    | 30805,8436
2014-06-17 13:02:00 | -57,99601712 | -51,91519551 | 3    | 17804,49381
2014-06-17 19:02:00 | -58,06820013 | -51,92972737 | 3    | 14866,06875
2014-06-18 01:02:00 | -58,19845185 | -51,89522513 | 3    | 7615,773106
2014-06-18 07:02:00 | -58,35241361 | -51,88015998 | 3    | 1000
2014-06-18 13:02:00 | -58,35603546 | -51,88336878 | 3    | 1000
2014-06-18 19:02:00 | -58,33350332 | -51,87308427 | 3    | 1000
2014-06-19 01:02:00 | -58,33839581 | -51,87846631 | 3    | 1414,213562
2014-06-19 07:02:00 | -58,42661519 | -51,80902388 | 4    | 0
2014-06-19 13:02:00 | -58,30461883 | -51,93745837 | 4    | 7810,249676
2014-06-19 19:02:00 | -58,18362875 | -51,96475914 | 4    | 14317,82106

我使用此代码计算矩阵中每个点的距离:

distancebetwenpoints=spDists(locs1_utm, longlat=FALSE)
p.1$dist=distancebetwenpoints   #INCLUDE COLUMN DISTANCE TO THE COLONY OF EACH POINT

然而,这包括数据帧p.1中的779值的巨大矩阵,我尝试了其他方法,但我无法使其工作,并且使用矩阵我无法正确地研究统计数据,因此,我该如何计算企鹅在每次旅行中的距离?

FalklandCRS = CRS("+proj=utm +zone=21 +south +datum=WGS84 +units=m +no_defs") 
locs1 = sp::SpatialPointsDataFrame(coords = cbind(subset(p.1$lon, p.1$id == 1), subset(p.1$lat, p.1$id == 1)), data = subset(p.1, p.1$id == 1), proj4string=CRS("+proj=longlat +datum=WGS84"))
locs1_utm = sp::spTransform(locs1, FalklandCRS)

head(p.1)
id                date       lon       lat   lon.025     lon.5   lon.975   lat.025     lat.5   lat.975
1  1 2014-06-11 19:02:00 -58.35086 -51.88438 -58.35466 -58.35112 -58.34588 -51.88563 -51.88442 -51.88295
2  1 2014-06-12 01:02:00 -58.35590 -51.88350 -58.36226 -58.35594 -58.34932 -51.88602 -51.88350 -51.88068
4  1 2014-06-12 13:02:00 -58.27225 -51.91904 -58.32643 -58.26749 -58.24544 -51.93304 -51.91970 -51.90113
5  1 2014-06-12 19:02:00 -58.27975 -51.90535 -58.51877 -58.27893 -58.02546 -52.05056 -51.90636 -51.75887
6  1 2014-06-13 01:02:00 -58.32332 -51.89410 -58.56753 -58.32094 -58.09604 -52.04776 -51.89361 -51.74586
7  1 2014-06-13 07:02:00 -58.35833 -51.88809 -58.37411 -58.35856 -58.34158 -51.89519 -51.88818 -51.88029
  bathy    bathy2 distancetocoast
1    -1  4.080409        2236.068
2    -1  4.080409        1000.000
4    -6 -5.849781        7211.103
5    -4 -5.308407        5830.952
6    -1 -2.060174        3605.551
7    -1 -1.450360        1414.214

2 个答案:

答案 0 :(得分:0)

我冒昧地从头开始创建了一个解决方法

library(lubridate)
library(data.table)

dt<-data.table(
  date=ymd_hms(c("2014-06-11 19:02:00"
             ,"2014-06-12 01:02:00"
             ,"2014-06-12 13:02:00"
             ,"2014-06-12 19:02:00"
             ,"2014-06-13 01:02:00"
             ,"2014-06-13 07:02:00"
             ,"2014-06-13 13:02:00"
             ,"2014-06-13 19:02:00"
             ,"2014-06-14 01:02:00"
             ,"2014-06-14 13:02:00"
             ,"2014-06-14 19:02:00"
             ,"2014-06-15 01:02:00"
             ,"2014-06-15 13:02:00"
             ,"2014-06-15 19:02:00"
             ,"2014-06-16 01:02:00"
             ,"2014-06-16 07:02:00"
             ,"2014-06-16 13:02:00"
             ,"2014-06-16 19:02:00"
             ,"2014-06-17 01:02:00"
             ,"2014-06-17 07:02:00"
             ,"2014-06-17 13:02:00"
             ,"2014-06-17 19:02:00"
             ,"2014-06-18 01:02:00"
             ,"2014-06-18 07:02:00"
             ,"2014-06-18 13:02:00"
             ,"2014-06-18 19:02:00"
             ,"2014-06-19 01:02:00"
             ,"2014-06-19 07:02:00"
             ,"2014-06-19 13:02:00"
             ,"2014-06-19 19:02:00"))
  ,lon=c(-58.3508585 
         ,-58.35589725
         ,-58.27224941
         ,-58.27974654
         ,-58.32331901
         ,-58.35833139
         ,-58.35617673
         ,-58.34055711
     ,-58.34982536
     ,-58.3073814 
     ,-58.34581314
     ,-58.34050624
     ,-58.2974691 
     ,-58.19881901
     ,-58.1348416 
     ,-57.99399544
     ,-58.00469754
     ,-57.92758675
     ,-57.91658235
     ,-57.77015528
     ,-57.99601712
     ,-58.06820013
     ,-58.19845185
     ,-58.35241361
     ,-58.35603546
     ,-58.33350332
     ,-58.33839581
     ,-58.42661519
     ,-58.30461883
     ,-58.18362875)
  ,lat=c(-51.88438373 
     ,-51.88349529 
     ,-51.91903677 
     ,-51.90535003 
     ,-51.89410464 
     ,-51.88809227 
     ,-51.88156281 
     ,-51.89002367 
     ,-51.8715761  
     ,-51.92722937 
     ,-51.86761133 
     ,-51.88382088 
     ,-51.91795326 
     ,-51.95172233 
     ,-51.98673766 
     ,-52.06988191 
     ,-52.02795069 
     ,-52.02184666 
     ,-51.99748699 
     ,-51.99031797 
     ,-51.91519551 
     ,-51.92972737 
     ,-51.89522513 
     ,-51.88015998 
     ,-51.88336878 
     ,-51.87308427 
     ,-51.87846631 
     ,-51.80902388 
     ,-51.93745837 
     ,-51.96475914 )
  ,trip=c( 1 
       , 1 
       , 1 
       , 1 
       , 1 
       , 1 
       , 1 
       , 1 
       , 2 
       , 2 
       , 3 
       , 3 
       , 3 
       , 3 
       , 3 
       , 3 
       , 3 
       , 3 
       , 3 
       , 3 
       , 3 
       , 3 
       , 3 
       , 3 
       , 3 
       , 3 
       , 3 
       , 4 
       , 4 
       , 4 )
  ,distance2coast=c(2236.067977
                ,1000
                ,7211.102551
                ,5830.951895
                ,3605.551275
                ,1414.213562
                ,1000
                ,2236.067977
                ,1000
                ,7071.067812
                ,1000
                ,1414.213562
                ,6324.55532
                ,13000
                ,18788.29423
                ,28861.73938
                ,26627.05391
                ,29000
                ,28284.27125
                ,30805.8436
                ,17804.49381
                ,14866.06875
                ,7615.773106
                ,1000
                ,1000
                ,1000
                ,1414.213562
                ,0
                ,7810.249676
                ,14317.82106)

计算球体上点的距离的函数:

distanceOfPoints <- function(long1, lat1, long2, lat2) {
  R <- 6371 
  d <- acos(sin(lat1)*sin(lat2) + cos(lat1)*cos(lat2) * cos(long2-long1)) * R
  return(d) #km
}
##yet another function for the distance calculation between geodetic coordinates

gcd.hf <- function(long1, lat1, long2, lat2) {
  R <- 6371 # Earth mean radius [km]
   delta.long <- (long2 - long1)
   delta.lat <- (lat2 - lat1)
  a <- sin(delta.lat/2)^2 + cos(lat1) * cos(lat2) * sin(delta.long/2)^2
  c <- 2 * asin(min(1,sqrt(a)))
  d = R * c
  return(d) # Distance in km
}

此外:

dt<-dt[order(date)]#order chronologically

然后我们根据“旅行”将数据分成列表:

dt_split<-split(dt,by="trip")
names(dt_split)<-paste0("trip",names(dt_split))#and we give name to each element

一个简单的函数,它考虑连续的行并计算累积距离。

calcDistancePerTrip<-function(y){#y is dataframe-element of list
  l1<-lapply(seq_along(1:nrow(y)),function(x){
    dist<-distanceOfPoints(long1 = y[,.(lon)][x],long2= y[,.(lon)][x+1],lat1 = y[,.(lat)][x],lat2 = y[,.(lat)][x+1])
    #you can always use the gcd.hf for the dist calculation
    return(dist)
  })
  l1<-as.numeric(unlist(l1))#list to numeric vector
  tot_dist<-sum(l1,na.rm = T)#cumulative distance per trip 
  return(tot_dist)
}

最后我们将它“循环”在初始列表上:

lapply(dt_split,calcDistancePerTrip)

屈服于:

$trip1
[1] 531.6949

$trip2
[1] 354.8975

$trip3
[1] 3067.865

$trip4
[1] 1012.005

答案 1 :(得分:0)

我发现从data.frame获取所有信息的最佳方法是使用adehabitat包。 因此,我使用名为locs_utm的“SpatialPointDataframe”(与上面我称之为locs1_utm相同,将其转换为带有“x”和“y”名称的坐标,并创建了一个名为BirdTrips的列,其中包含唯一值,以便稍后使用当爆发和分裂获得每次旅行的距离和时间,将id列与旅行列相结合。

gent$trip[gent$trip<10]=paste(0,gent$trip[gent$trip<10],sep="")#so that trip 1,2,3 becomes 01,02,03...
gent$BirdTrip=paste(gent$id,gent$trip,sep=".")  #combine the animal id with the trip so you have 1.01,1.02, 1.03 and soon
gent$BirdTrip=as.factor(gent$BirdTrip)

BirdTrip
  1.01
  1.01
  ...
  2.01


foo = as.data.frame(locs_utm) #transform locs_utm-->SpatialPointDataframe into data.frame
foo = cbind.data.frame(foo$coords.x1, foo$coords.x2)  #convert lon lat, columns in coordinatse
names(foo) = c("x", "y")  #convert the columns in "x" and "y" to be able to use the as.ltraj and convert thedata in ltraj forat

tr=as.ltraj(xy=foo,
            date=p.1$date,
            id=p.1$id,
            burst=p.1$BirdTrip)

class(tr)    #[1] "ltraj" "list" 
tr=ld(tr)   #transform the ltraj into data.frame

为了收集一个data.frame中的所有信息,我使用as.POSIXct创建了一些向量,并使用tappply函数来计算行程的长度,行程的开始和结束日期以及最大位移。

ts = as.data.frame(as.POSIXct(tapply(tr$date, tr$burst, min, na.rm = T), origin = "1970-01-01")) #trip start
te = as.data.frame(as.POSIXct(tapply(tr$date, tr$burst, max, na.rm = T), origin = "1970-01-01")) #trip end
th = as.data.frame(tapply(tr$dt, tr$burst, sum, na.rm = T)/3600) #trip hours
tl = as.data.frame(tapply(tr$dist, list(tr$burst), sum, na.rm = T)/1000) #trip kms-->LENGTH
td = as.data.frame(sqrt(tapply(tr$R2n, list(tr$burst), max, na.rm = T))/1000) #max displacement kms

resumen = cbind.data.frame(ts[,1], te[,1], as.numeric(th[,1]), as.numeric(tl[,1]), as.numeric(td[,1]))
names(resumen) = c("Start", "End", "Hours", "Length", "Displacement")
View(resumen)