从字符串到数字的持续时间

时间:2018-07-23 09:12:30

标签: r

我有一个数据,其中时间/持续时间以字符串格式显示-“ 1天4小时58分钟52秒”,“ 1周1天20小时30分钟49秒”等。如何转换持续时间,使之显示为天数?问题是某些行只有几秒钟,几分钟和几秒钟,等等。谢谢!

数据示例:

Duration_1=c("43 weeks, 1 day, 18 hours, 59 minutes, 13 seconds", "12 seconds", "33 minutes, 58 seconds", "1 hour, 54 minutes, 3 seconds", "55 minutes, 4 seconds") 
Duration_2=c("55 seconds", "21 hours, 16 minutes, 40 seconds", "2 days, 46 minutes, 55 seconds", "13 hours, 53 minutes, 8 seconds", "15 weeks, 6 days, 5 hours, 37 minutes, 6 seconds") 
Duration=data.frame(Duration_1,Duration_2) 

2 个答案:

答案 0 :(得分:1)

好吧,您需要使用一些简单的正则表达式编写解析器:

foo <- function(x) {
  x <- as.character(x)
  pattern <- "\\d+(?= second)" #lookahead regex (digits followed by space+seconds)
  secs <- regmatches(x, gregexpr(pattern, x, perl = TRUE))
  secs[lengths(secs) == 0] <- 0
  secs <- unlist(secs)

  pattern <- "\\d+(?= minute)"
  mins <- regmatches(x, gregexpr(pattern, x, perl = TRUE))
  mins[lengths(mins) == 0] <- 0
  mins <- unlist(mins)

  pattern <- "\\d+(?= hour)"
  hours <- regmatches(x, gregexpr(pattern, x, perl = TRUE))
  hours[lengths(hours) == 0] <- 0
  hours <- unlist(hours)

  pattern <- "\\d+(?= day)"
  days <- regmatches(x, gregexpr(pattern, x, perl = TRUE))
  days[lengths(days) == 0] <- 0
  days <- unlist(days)

  pattern <- "\\d+(?= week)"
  weeks <- regmatches(x, gregexpr(pattern, x, perl = TRUE))
  weeks[lengths(weeks) == 0] <- 0
  weeks <- unlist(weeks)

  tmp <- cbind(weeks, days, hours, mins, secs)
  mode(tmp) <- "numeric"

  mult <- c(7 * 24 * 3600, 24 * 3600, 3600, 60, 1) #result is in seconds
  c(tmp %*% mult)
}

Duration[] <- lapply(Duration, foo)
#Duration_1 Duration_2
#1   26161153         55
#2         12      76600
#3       2038     175615
#4       6843      49988
#5       3304    9610626

答案 1 :(得分:0)

  

如何转换持续时间,使之显示为天数?

作为其他解决方案,我们可以利用difftime,例如g。:

unitnames = c(week="weeks", weeks="weeks", day="days", days="days", hour="hours", hours="hours",
              minute="mins", minutes="mins", second="secs", seconds="secs")
converdays =
function(w)
{ sapply(strsplit(w, ", "),  # for each string, separate the quantities by ", "
         function(x)
           do.call(sum,      # sum up the duration quantities, computed such:
                   lapply(strsplit(x, " "),  # split into magnitude and unit
                          function(y)        # convert to a "difftime" with that unit
                          { z = as.difftime(as.integer(y[1]), units=unitnames[y[2]])
                            units(z)="days"  # change that unit to the desired "days"
                            return(z)
                          }
                         )
                  )
        )
}

converdays(Duration_1)
# [1] 3.027911e+02 1.388889e-04 2.358796e-02 7.920139e-02 3.824074e-02
converdays(Duration_2)
# [1] 6.365741e-04 8.865741e-01 2.032581e+00 5.785648e-01 1.112341e+02

另一种变体是:应该更喜欢输出以保留类difftime以便能够轻松转换为不同的单位,

unitnames = c(week ="weeks", day ="days", hour ="hours", minute ="mins", second ="secs",
              weeks="weeks", days="days", hours="hours", minutes="mins", seconds="secs")
csplit = function(x, s, f) do.call(c, lapply(strsplit(x, s), f))  # helper function to split
convertds = function(w)                  # convert to difftimes
             csplit(w, ", ",             # for each string, separate the quantities by ", "
                    function(x)
                     sum(csplit(x, " ",  # split into magnitude and unit, convert and sum up
                         function(y) as.difftime(as.integer(y[1]), units=unitnames[y[2]]))))
print (convertds(Duration_1) -> d1)
# Time differences in secs
# [1] 26161153       12     2038     6843     3304
units(d1)="days"
d1
# Time differences in days
# [1] 3.027911e+02 1.388889e-04 2.358796e-02 7.920139e-02 3.824074e-02