Question

的所有人。我正在尝试独立和同时根据距离（UTM）和时间（H：M：S）标准过滤GPS位置数据。这是数据结构：

head(collar)
FID  animal      date       time        zone easting northing
1    URAM01_2012 6/24/2012  10:00:00 AM 13S  356664  3971340
2    URAM01_2012 6/24/2012  1:02:00 PM  13S  356760  3971480
3    URAM01_2012 6/24/2012  4:01:00 PM  13S  357482  3972325
4    URAM01_2012 6/24/2012  7:01:00 PM  13S  356882  3971327
5    URAM01_2012 6/25/2012  4:01:00 AM  13S  356574  3971765
6    URAM01_2012 6/25/2012  7:01:00 AM  13S  357796  3972231

现在我只按距离过滤，但我遇到了一些问题。代码应该计算FID [1]和FID [2]之间的距离，然后在新列（$ step.length）中将该距离分配给FID [1]。在计算了所有距离之后，然后基于距离规则对数据进行子集化。现在我把它设置到我希望所有位置相距> 200米的位置。一旦子集化，则重复该过程，直到所有后续位置之间的距离> 200m。这是我编写的代码，它只完成了我想做的部分：

reps <- 10
 #Begin loop for the number of reps. Right now it's at 10 just to see if the code works.
for(rep in 1:reps){

  #Begin loop for the number of GPS locations in the file
  for(i in 1:length(collar$FID)){

    #Calculate the distance between a GPS location and the next GPS locations. the formula is the hypotenuse of the Pythagorean theorem. 
    collar$step.length[i] <- sqrt(((collar$easting[i] - collar$easting[i+1])^2) + ((collar$northing[i] - collar$northing[i+1])^2))

  }

  #Subset the data. Select all locations that are >200m from the next GPS location.
  collar <- subset(collar, step.length >200) 

}

现在，代码并不完美，我想在代码中添加两个条件。

1。）不考虑动物ID。因此，当距离应为NA时，将使用新动物的第一位置产生动物最后位置的距离。我认为使用（我在1：独特（衣领$动物））可能会工作，但它没有（令人震惊），我不知道该怎么做，因为（我的长度（领$动物））没有' t仅使用唯一值。

2。）当所有位置都> 200米时，我还想在for循环中插入一个中断。我确信必须有一个更好的方法来做到这一点，但我认为我会将代表设置为大的（例如10000），一旦满足标准，R就会破坏：

if(collar$step.length > 200){
   break }

然而，由于if条件> 1，因此仅使用第一个元素。我还没有想过时间或距离/时间，但如果有人对这些努力有任何建议，我会很感激这个建议。感谢您的帮助和指导。

Answer 1

我不太明白你要对代表做什么，但你可以利用split和unsplit功能来关注每一只动物。

首先，我创建了一个distance()函数，该函数从对象中找到名为easting和northing的列，以创建距离向量。然后我们将动物分开，并将distance函数应用于每只动物。我们将这个距离列表添加到动物列表中，其中包含一些mapply代码，然后unsplit结果将所有内容重新组合在一起。

让我知道你想用“＆gt; 200”步骤做什么。

distance <- function(x){
  easting <- x$easting
  northing <- x$northing
  easting2 <- c(easting[-1], NA)
  northing2 <- c(northing[-1], NA)
  sqrt((easting - easting2)^2 + (northing - northing2)^2)
}
s <- split(collar, collar$animal)
distances <- lapply(s, distance)
s2 <- mapply(cbind, s, "Distance" = distances, SIMPLIFY = F)
collar.new <- unsplit(s2, collar$animal)

修改

道歉，如果这很麻烦，我相信我可以缩短它但是现在让我知道它是否适合你。我也很想知道它在编制自己的数据时运行速度有多快。

filterout <- function(input, value = NULL){ # requirements of the input object stopifnot(all(c("FID","animal","easting","northing") %in% colnames(input))) distance <- function(x){ # internal distance function e1 <- x$easting; e2 <- c(NA, e1[-nrow(x)]) n1 <- x$northing; n2 <- c(NA, n1[-nrow(x)]) sqrt((e1 - e2)^2 + (n1 - n2)^2) } nc <- ncol(input) # save so we can "rewrite" Distance values each reiteration f <- function(input){ # the recursive function (will run until condition is met) z <- split(input[,-(nc+1)], input$animal) # split by animal & remove (if any) prior Distance column distances <- lapply(z, distance) # collect distances z2 <- mapply(cbind, z, "Distance" = distances, SIMPLIFY = F) # attach distances r1 <- lapply(z2, function(x) { # delete first row under criteria a <- x$Distance < value # CRITERIA a[is.na(a)] <- FALSE # Corrects NA values into FALSE so we don't lose them first <- which(a == T)[1] # we want to remove one at a time `if`(is.na(first), integer(0), x$FID[first]) # returns FIDs to remove }) z3 <- unsplit(z2, input$animal) # Whether to keep going or not if(length(unlist(r1)) != 0){ # if list of rows under criteria is not empty remove <- which(z3$FID %in% unlist(r1, use.names = F)) # remove them print(unlist(r1, use.names = F)) # OPTIONAL*** printing removed FIDs f(z3[-remove,]) # and run again } else { return(z3) # otherwise return the final list } } f(input) }

该功能可以按如下方式使用：

filterout(input = collar, value = 200) filterout(input = collar, value = 400) filterout(input = collar, value = 600)

<强> EDIT2：

我打开了一个赏金问题来弄清楚如何做某个步骤，但希望这个答案有所帮助。做37k行可能需要一点点〜一分钟，但让我知道〜

x <- collar skipdistance <- function(x, value = 200){ d <- as.matrix(dist(x[,c("easting","northing")])) d[lower.tri(d)] <- 0 pick <- which(d > value, arr.ind = T) # pick[order(pick[,"row"]),] # visual clarity findConnectionsBase <- function(m) { n <- nrow(m) myConnections <- matrix(integer(0), nrow = n, ncol = 2) i <- j <- 1L k <- 2L while (i <= n) { myConnections[j, ] <- m[i, ] while (k <= n && m[i, 2] != m[k, 1]) {k <- k + 1L} i <- k j <- j + 1L } myConnections[!is.na(myConnections[,1]), ] } keep.ind <- findConnectionsBase(pick) keep.row <- unique(c(keep.ind)) cbind(x[keep.row,], Distance = c(NA,d[keep.ind])) } a <- do.call(rbind,lapply(split(x, x$animal), skipdistance, value = 200)) dim(a)

编辑＃3：

library(lubridate) # great package for string -> dates # changed to give just rows that satisfy greater than value criteria skip <- function(dist.var, value = 200){ d <- as.matrix(dist(dist.var)) d[lower.tri(d)] <- 0 pick <- which(d > value, arr.ind = T) # pick[order(pick[,"row"]),] # visual clarity findConnectionsBase <- function(m) { n <- nrow(m) myConnections <- matrix(integer(0), nrow = n, ncol = 2) i <- j <- 1L k <- 2L while (i <= n) { myConnections[j, ] <- m[i, ] while (k <= n && m[i, 2] != m[k, 1]) {k <- k + 1L} i <- k j <- j + 1L } myConnections[!is.na(myConnections[,1]), ] } unique(c(findConnectionsBase(pick))) } collar <- structure(list(FID = 1:8, animal = c("URAM01_2012", "URAM01_2012", "URAM01_2012", "URAM01_2012", "URAM01_2013", "URAM01_2013", "URAM01_2013", "URAM01_2013"), date = c("6/24/2012", "6/24/2012", "6/24/2012", "6/24/2012", "6/25/2012", "6/25/2012", "6/25/2012", "6/25/2012" ), time = c("10:00:00AM", "1:02:00PM", "4:01:00PM", "7:01:00PM", "4:01:00AM", "7:01:00AM", "7:01:00AM", "7:01:00AM"), zone = c("13S", "13S", "13S", "13S", "13S", "13S", "13S", "13S"), easting = c(356664L, 356760L, 356762L, 356882L, 356574L, 357796L, 357720L, 357300L), northing = c(3971340L, 3971480L, 3971498L, 3971498L, 3971765L, 3972231L, 3972230L, 3972531L)), .Names = c("FID", "animal", "date", "time", "zone", "easting", "northing"), class = "data.frame", row.names = c(NA, -8L)) collar[skip(dist.var = collar[,c("easting","northing")], value = 200),] # dist function works on dates, but it makes sense to convert to hours dist(lubridate::mdy_hms(paste(collar$date, collar$time))) hours <- 2.99 collar[ skip(dist.var = lubridate::mdy_hms(paste(collar$date, collar$time)), value = hours * 3600), ]

Answer 2

非常感谢Evan的辛勤工作。显然，他生成的代码与我提出的代码略有不同，但这对于这个社区来说是件好事;我们自己分享独特的解决方案可能不会想到。请参阅编辑＃2以获取最终代码，该代码按连续点之间的距离过滤GPS项圈数据。

使用多个for循环计算距离和子集

2 个答案: