使用小时/分钟和seq创建标签以创建箱

时间:2019-02-27 13:13:53

标签: r dplyr lubridate

我有一些小时/分钟的数据。数字在hour_min上为as.numeric。 hour_min是<class 'list'>: [{'shop': 'G2PLAY', 'price': '100.59'}, {'shop': 'Kinguin', 'price': '103.80'}, {'shop': 'Cdkeys.com', 'price': '104.29'}, {'shop': 'G2A.COM', 'price': '105.86'}, {'shop': 'Gamesdeal', 'price': '108.75'}, {'shop': 'GameStop', 'price': '259.99'}, {'shop': 'GameStop', 'price': '259.99'}, {'shop': 'Play', 'price': '42.93'}, {'shop': 'PSNGAMES', 'price': '44.46'}, {'shop': 'Play', 'price': '45.12'}, {'shop': 'Eneba', 'price': '47.09'}, {'shop': 'HRKGAME', 'price': '47.99'}, {'shop': 'G2PLAY', 'price': '48.92'}, {'shop': 'Gamebillet', 'price': '49.29'}, {'shop': 'Instant Gaming', 'price': '49.34'}, {'shop': 'G2PLAY', 'price': '49.49'}, {'shop': 'GamingDragons', 'price': '49.57'}, {'shop': 'GAMIVO', 'price': '49.72'}, {'shop': 'GAMIVO', 'price': '50.05'}, {'shop': 'Cdkeys.com', 'price': '50.09'}, {'shop': 'G2PLAY', 'price': '50.38'}, {'shop': 'Kinguin', 'price': '50.48'}, {'shop': 'GAMIVO', 'price': '50.83'}, {'shop': 'Kinguin', 'price': '51.07'}, {'shop': 'IGVault', 'price': '51.26'}, {'shop': 'Mmoga', 'price': '51.45'}, {'shop': 'GreenmanGaming', 'price': '51.59'}, {'shop': 'Kinguin', 'price': '51.99'}, {'shop': 'G2A.COM', 'price': '53.49'}, {'shop': '2Game', 'price': '53.99'}, {'shop': 'DLGamer', 'price': '55.19'}, {'shop': 'G2A.COM', 'price': '56.79'}, {'shop': 'G2A.COM', 'price': '57.26'}, {'shop': 'GamesRocket', 'price': '57.60'}, {'shop': 'G2PLAY', 'price': '58.80'}, {'shop': 'Press', 'price': '59.21'}, {'shop': 'Instant Gaming', 'price': '59.21'}, {'shop': 'GamersGate', 'price': '59.99'}, {'shop': 'Kinguin', 'price': '60.67'}, {'shop': 'GAMIVO', 'price': '60.77'}, {'shop': 'Mmoga', 'price': '62.88'}, {'shop': 'Cdkeys.com', 'price': '63.99'}, {'shop': 'Gamesdeal', 'price': '65.24'}, {'shop': 'GamingDragons', 'price': '66.45'}, {'shop': 'GamingDragons', 'price': '66.45'}, {'shop': 'Gamebillet', 'price': '74.68'}, {'shop': 'Cdkeys.com', 'price': '76.49'}, {'shop': 'Instant Gaming', 'price': '76.74'}, {'shop': 'GreenmanGaming', 'price': '77.39'}, {'shop': 'G2PLAY', 'price': '77.70'}, {'shop': 'Kinguin', 'price': '80.18'}, {'shop': '2Game', 'price': '80.99'}, {'shop': 'Mmoga', 'price': '82.32'}, {'shop': 'DLGamer', 'price': '82.79'}, {'shop': 'GAMIVO', 'price': '82.87'}, {'shop': 'GAMIVO', 'price': '82.87'}, {'shop': 'Gamebillet', 'price': '82.98'}, {'shop': 'Instant Gaming', 'price': '83.01'}, {'shop': 'HRKGAME', 'price': '83.49'}, {'shop': 'G2A.COM', 'price': '84.24'}, {'shop': 'Cdkeys.com', 'price': '84.49'}, {'shop': 'Cdkeys.com', 'price': '84.49'}, {'shop': 'G2PLAY', 'price': '85.15'}, {'shop': 'G2A.COM', 'price': '85.92'}, {'shop': 'GreenmanGaming', 'price': '85.99'}, {'shop': 'GamesRocket', 'price': '86.02'}, {'shop': 'G2A.COM', 'price': '86.51'}, {'shop': 'Instant Gaming', 'price': '87.07'}, {'shop': 'Press', 'price': '87.07'}, {'shop': 'Kinguin', 'price': '87.87'}, {'shop': 'GAMIVO', 'price': '88.40'}, {'shop': 'GamersGate', 'price': '89.99'}, {'shop': '2Game', 'price': '89.99'}, {'shop': 'GameStop', 'price': '89.99'}, {'shop': 'GameStop', 'price': '89.99'}, {'shop': 'G2PLAY', 'price': '90.48'}, {'shop': 'Mmoga', 'price': '91.47'}, {'shop': 'G2PLAY', 'price': '91.54'}, {'shop': 'DLGamer', 'price': '91.99'}, {'shop': 'HRKGAME', 'price': '92.52'}, {'shop': 'Kinguin', 'price': '93.36'}, {'shop': 'GAMIVO', 'price': '93.93'}, {'shop': 'Kinguin', 'price': '94.46'}, {'shop': 'GamesRocket', 'price': '94.68'}, {'shop': 'Gamesdeal', 'price': '97.87'}, {'shop': 'GamersGate', 'price': '99.99'}] 对象。

hms

我想创建30分钟的间隔,所以我在使用以下内容:如果我不使用library(dplyr) library(lubridate) df <- structure(list(hour_min = structure(c(NA, 69300, 46800, 35100, 52200, 37800, 52200, NA, 45300, 42300, NA, 29700, 46800, 34200, 32400, 43200, 36000, 41400, 29700, 36000), units = "secs", class = c("hms", "difftime")), number = c(NA, 69300, 46800, 35100, 52200, 37800, 52200, NA, 45300, 42300, NA, 29700, 46800, 34200, 32400, 43200, 36000, 41400, 29700, 36000)), class = "data.frame", row.names = c(NA, -20L), .Names = c("hour_min", "number")) hour_min number 1 NA NA 2 19:15:00 69300 3 13:00:00 46800 4 09:45:00 35100 5 14:30:00 52200 6 10:30:00 37800 7 14:30:00 52200 8 NA NA 9 12:35:00 45300 10 11:45:00 42300 11 NA NA 12 08:15:00 29700 13 13:00:00 46800 14 09:30:00 34200 15 09:00:00 32400 16 12:00:00 43200 17 10:00:00 36000 18 11:30:00 41400 19 08:15:00 29700 20 10:00:00 36000 ,那么它似乎可以正常工作...如何获得漂亮的标签。

labels

因此,没有标签,我可以得到:我想计数,但需要30分钟。间隔。

df$interval <- cut(df$number,
                          breaks = seq(as.numeric(hms::as.hms("07:00:00")), 
                                       as.numeric(hms::as.hms("23:00:00")), 1800),
                          labels = as.character(seq(hms::as.hms("07:00:00"), 
                                       hms::as.hms("23:00:00"), 1800)))

但是我需要标签。解决方案?

2 个答案:

答案 0 :(得分:1)

这是我自己的解决方案:我需要在标签后使用hms:as.hms

df$interval <- cut(df$number,
                          breaks = seq(as.numeric(hms::as.hms("07:00:00")), 
                                       as.numeric(hms::as.hms("23:00:00")), 1800),
                          labels = hms::as.hms( seq(as.numeric(hms::as.hms("07:00:00")), 
                                       as.numeric(hms::as.hms("22:30:00")), 1800))                      )

答案 1 :(得分:1)

使用number对象以一天为零数的事实,将times转换为chron times类,得到times列。在这种情况下,我们可以先使用trunc.times,然后再使用count

library(chron)
library(dplyr)
library(lubridate)
library(tidyr)

df %>% 
  mutate(times = (number / (24 * 60 * 60)) %>% times %>% trunc("00:30:00")) %>%
  drop_na %>%
  count(times)

给予:

# A tibble: 11 x 2
   times           n
   <S3: times> <int>
 1 08:00:00        2
 2 09:00:00        1
 3 09:30:00        2
 4 10:00:00        2
 5 10:30:00        1
 6 11:30:00        2
 7 12:00:00        1
 8 12:30:00        1
 9 13:00:00        2
10 14:30:00        2
11 19:00:00        1

仅计时

请注意,可以仅使用chron来替代编写以下内容:

library(chron)

tt <- trunc(times(df$number / (24 * 60 * 60)), "00:30:00")
table(tt)

给予:

08:00:00 09:00:00 09:30:00 10:00:00 10:30:00 11:30:00 12:00:00 12:30:00 
       2        1        2        2        1        2        1        1 
13:00:00 14:30:00 19:00:00 
       2        2        1 

或使用aggregate代替table

aggregate(list(n = tt), list(times = tt), length)

给予:

      times n
1  08:00:00 2
2  09:00:00 1
3  09:30:00 2
4  10:00:00 2
5  10:30:00 1
6  11:30:00 2
7  12:00:00 1
8  12:30:00 1
9  13:00:00 2
10 14:30:00 2
11 19:00:00 1