Question

我有一个df表示在特定位置的第一个和最后一个记录时间。示例原始数据链接在下面的代码中。

df <- read.csv('https://raw.githubusercontent.com/smitty1788/Personal-Website/master/example.csv', header = T)


    address                                   fuel  name    Long        Lat Time
1   625-627 S St NW, Washington, DC 20001, USA  87  EC6502  -77.02081   38.91411    5/18/2017 13:36
2   625-627 S St NW, Washington, DC 20001, USA  87  EC6502  -77.02081   38.91411    5/18/2017 15:28
3   1301-1327 Howard Rd SE, Washington, DC 20020, USA   87  EC6502  -76.99312   38.86101    5/18/2017 16:03
4   1301-1327 Howard Rd SE, Washington, DC 20020, USA   87  EC6502  -76.99312   38.86101    5/18/2017 20:17
5   821 Whittier Pl NW, Washington, DC 20012, USA   81  EC6502  -77.02542   38.97149    5/18/2017 21:03
6   821 Whittier Pl NW, Washington, DC 20012, USA   81  EC6502  -77.02542   38.97149    5/19/2017 8:35
7   1327 Allison St NW, Washington, DC 20011, USA   81  EC6502  -77.03118   38.94508    5/19/2017 8:50
8   1327 Allison St NW, Washington, DC 20011, USA   81  EC6502  -77.03118   38.94508    5/19/2017 8:55
9   815 Whittier Pl NW, Washington, DC 20012, USA   81  EC6502  -77.02481   38.97148    5/19/2017 9:11
10  1655-1699 N Rhodes St, Arlington, VA 22201, USA 100 EP0253  -77.08  38.89306    5/18/2017 13:36
11  1655-1699 N Rhodes St, Arlington, VA 22201, USA 100 EP0253  -77.08  38.89306    5/18/2017 15:02
12  2617 N Stuart St, Arlington, VA 22207, USA  100 EP0253  -77.11257   38.9066 5/18/2017 15:28
13  2617 N Stuart St, Arlington, VA 22207, USA  100 EP0253  -77.11257   38.9066 5/18/2017 16:54
14  1432-1488 N Quincy St, Arlington, VA 22201, USA 100 EP0253  -77.10842   38.8887 5/18/2017 17:14
15  1432-1488 N Quincy St, Arlington, VA 22201, USA 100 EP0253  -77.10842   38.8887 5/18/2017 18:30
16  1020-1028 N Stafford St, Arlington, VA 22201, USA   84  EP0253  -77.11047   38.88278    5/18/2017 23:15
17  1020-1028 N Stafford St, Arlington, VA 22201, USA   84  EP0253  -77.11047   38.88278    5/19/2017 13:53

数据表明第2行和第3行，第4行，第5行，第6行和第7行之间有一次旅行，依此类推，列＃34;名称＆＃34;

我试图找出一种有效的方法来重新组织数据，以便一行显示起始位置和结束位置（end_address，end_fuel，end_long，end_lat，end_time）。基本上，每一行都是一次旅行。理想情况下，新的df将像这样组织

name, st_address, st_fuel, st_long, st_lat, st_time, end_address, end_fuel,  end_long,  end_lat,  end_time

有人能帮我确定一种方法吗？谢谢！

Answer 1

依赖group_by识别车辆名称的dplyr解决方案。

library(dplyr)

# code each pair with a trip id by dividing by 2 - code each trip as 1 = from, 0 = to
df <- df %>% 
  group_by(name) %>% 
  mutate(trip_id  = (1 + seq_along(address)) %/% 2,
                    from_to  = (seq_along(address) %% 2))

# seprate into from and to
df_from <- df %>% filter(from_to %% 2 == 1) %>% select(-from_to)
df_to   <- df %>% filter(from_to %% 2 == 0) %>% select(-from_to)

# join the result
result <- inner_join(df_from, df_to, by = c("name", "trip_id"))

Answer 2

library(tidyverse)
library(lubridate)

df <- read.csv('https://raw.githubusercontent.com/smitty1788/Personal-Website/master/example.csv', 
           header = T)

# Remove 1st and Last row of each group
df_clean <- df %>%
  mutate(Time = mdy_hm(Time)) %>% 
  group_by(name) %>%
  arrange(name, Time) %>%
  filter(row_number() != 1,
         row_number() != n())


df_tripID <- df_clean %>% 
      group_by(name) %>% 
      mutate(trip_id  = (1 + seq_along(address)) %/% 2,
             from_to  = (seq_along(address) %% 2))

# seprate into from and to
df_from <- df_tripID %>% 
  filter(from_to %% 2 == 1) %>% 
  select(-from_to)
df_to   <- df_tripID %>% 
  filter(from_to %% 2 == 0) %>% 
  select(-from_to)

# join the result
car2go_trips <- inner_join(df_from, df_to, by = c("name", "trip_id"))

重新排序时间/位置数据以列出点到点行程

2 个答案: