我正在尝试为数据创建旅行号码。数据包含车辆ID和时间戳。对于相同ID,彼此接近的所有时间戳被认为是单次旅行的一部分。如果连续ID之间的时间差大于1小时,我希望它被视为新的旅行。
示例数据:
{
"name": "rh-react",
"version": "1.0.0",
"description": "Pages accessible for outside world",
"main": "index.js",
"scripts": {
"test": "echo \"Error: no test specified\" && exit 1",
"dev": "webpack -d --watch",
"build": "sh -ac '. .env; webpack -d'",
"build:test": "sh -ac '. .env.test; webpack -p'",
"build:prod": "sh -ac '. .env.prod; webpack -p'"
},
"author": "",
"license": "ISC",
"dependencies": {
"ag-grid": "^13.3.1",
"ag-grid-react": "^13.3.0",
"axios": "^0.17.1",
"babel-loader": "^7.1.2",
"babel-plugin-add-module-exports": "^0.2.1",
"babel-preset-es2015": "^6.24.1",
"babel-preset-react": "^6.24.1",
"bootstrap": "^3.3.7",
"classnames": "^2.2.5",
"dotenv": "^4.0.0",
"fsevents": "^1.1.3",
"jquery": "^3.2.1",
"moment": "^2.19.1",
"query-string": "^5.0.0",
"react": "^16.2.0",
"react-async-script-loader": "^0.3.0",
"react-click-outside": "^3.0.0",
"react-cookies": "^0.1.0",
"react-datepicker": "^0.61.0",
"react-datetime": "^2.11.1",
"react-dom": "^16.2.0",
"react-dom-factories": "^1.0.2",
"react-dropzone": "^4.2.3",
"react-dropzone-component": "^3.0.0",
"react-moment": "^0.6.5",
"react-notifications": "^1.4.3",
"react-nvd3": "^0.5.7",
"react-router": "^2.8.1",
"react-router-dom": "^4.2.2",
"react-scripts": "^1.0.17",
"react-select": "^1.0.0-rc.10",
"react-select2-wrapper": "^1.0.4-beta5",
"reactjs-localstorage": "0.0.5",
"webpack": "^3.9.1",
"whatwg-fetch": "^2.0.3"
},
"devDependencies": {
"babel-core": "^6.26.0",
"babel-loader": "^7.1.2",
"babel-preset-env": "^1.6.0",
"babel-preset-stage-0": "^6.24.1",
"compression-webpack-plugin": "^1.0.1",
"css-loader": "^0.28.7",
"eslint": "^4.11.0",
"eslint-plugin-react": "^7.5.1",
"style-loader": "^0.19.0",
"uglifyjs-webpack-plugin": "^1.1.2",
"webpack": "^3.6.0",
"webpack-bundle-analyzer": "^2.9.1"
}
}
预期产出:
data<-data.frame(ID = c('A', 'A', 'A', 'A','A', 'A', 'A', 'A','A','B','B','B','B','B','B','B','B','B'), date = c('2018-13-3 09:20:25','2018-13-3 09:23:42','2018-13-3 09:34:08','2018-13-3 11:25:25','2018-13-3 11:32:25','2018-13-3 11:33:42','2018-13-3 11:34:08','2018-13-3 11:36:25','2018-13-3 11:40:25','2018-13-3 11:23:42','2018-13-3 11:24:08','2018-13-3 11:25:25','2018-13-3 12:32:25','2018-13-3 12:33:42','2018-13-3 15:34:08','2018-13-3 15:36:25','2018-13-3 15:37:25','2018-13-3 15:38:25'))
我目前正在尝试使用未在R中优化的for循环。任何人都可以用更有效的方式帮助我吗?
答案 0 :(得分:1)
我们将&#39;日期转换为&#39;到日期时间类,按&#39; ID&#39;分组,检查&#39; date&#39;的相邻元素之间的差异。大于1,得到逻辑向量的累积和
library(dplyr)
library(lubridate)
data %>%
mutate(date = ydm_hms(date)) %>%
group_by(ID) %>%
mutate(Trip = cumsum(difftime(date, lag(date,default = date[1]), unit = 'hour')> 1) + 1)
# A tibble: 18 x 3
# Groups: ID [2]
# ID date Trip
# <fctr> <dttm> <dbl>
# 1 A 2018-03-13 09:20:25 1.00
# 2 A 2018-03-13 09:23:42 1.00
# 3 A 2018-03-13 09:34:08 1.00
# 4 A 2018-03-13 11:25:25 2.00
# 5 A 2018-03-13 11:32:25 2.00
# 6 A 2018-03-13 11:33:42 2.00
# 7 A 2018-03-13 11:34:08 2.00
# 8 A 2018-03-13 11:36:25 2.00
# 9 A 2018-03-13 11:40:25 2.00
#10 B 2018-03-13 11:23:42 1.00
#11 B 2018-03-13 11:24:08 1.00
#12 B 2018-03-13 11:25:25 1.00
#13 B 2018-03-13 12:32:25 2.00
#14 B 2018-03-13 12:33:42 2.00
#15 B 2018-03-13 15:34:08 3.00
#16 B 2018-03-13 15:36:25 3.00
#17 B 2018-03-13 15:37:25 3.00
#18 B 2018-03-13 15:38:25 3.00