在Google BigQuery(Standard SQL)中将HH:MM:SS格式的字符串转换为秒

时间:2019-03-27 20:34:21

标签: sql google-bigquery

我在Google Analytics(分析)的表格中有一个称为“视频长度”的自定义维度。数据是字符串值,但是其中一些采用以下三种格式:

1)HH:MM:SS(例如1:54:55)

2)MM:SS(例如2:26)

3)秒数(例如,对于长度为2:26的视频,长度为 146

  • 在值是HH:MM:SS格式或MM:SS格式以及字符长度的情况下,我尝试了case语句为零。
  • 我还为遵循MM:SS格式的值添加了其他零。
  • 我必须CAST才能INT64,以便我可以将这些值加起来以获得字符串。

查询有效,但是当我运行查询时,出现错误消息“无法解析输入字符串”

select old_video_length,
 case 
  when videoLength like "%:%" and length(videoLength) > 7 then cast(cast(parse_time("%E*S",videoLength) as string) as int64)+cast(cast(parse_time("%M",videoLength) as string) as int64)*60+cast(cast(parse_time("%H",videoLength) as string) as int64)*3600
  when videoLength like "%:%" and length(videoLength) between 6 and 7 then cast(cast(parse_time("%E*S",concat("0",videoLength)) as string) as int64)+cast(cast(parse_time("%M",concat("0",videoLength)) as string) as int64)*60+cast(cast(parse_time("%H",concat("0",videoLength)) as string) as int64)*3600
  when videoLength like "%:%" and length(videoLength) <= 5 then cast(cast(parse_time("%E*S",concat("00:",videoLength)) as string) as int64)+cast(cast(parse_time("%M",concat("00:",videoLength)) as string) as int64)*60+cast(cast(parse_time("%H",concat("00:",videoLength)) as string) as int64)*3600
  else cast(videoLength as int64) end as video_length_converted
  from vid_length_table


Ideally, I'd like the table to look like this: 

old_video_length | video_length_converted
1:54:55            6895
2:26               146
146                146

2 个答案:

答案 0 :(得分:2)

以下是用于BigQuery标准SQL

ageGenderF_ = ageGenderF_ %>%
    ungroup() %>%
    mutate(word = factor(word_, ordered = TRUE, levels = ageGenderFLowRank$word_))

ageGenderF_ %>%
    # https://ibecav.github.io/slopegraph/
    ggplot(., aes(x = genAge, y = rank, group = word_)) +
    geom_line(aes(color = word_, alpha = 1), size = 1.5) +
    geom_label(aes(label = rank), 
               size = 2.5, 
               label.padding = unit(0.15, "lines"), 
               label.size = 0.0) +
    scale_x_discrete(position = "top", expand = c(0, .05) ) +
    scale_y_reverse(breaks = filter(ageGenderF_, genAge == "Women, 15-19") %>% pull(rank), 
                    labels = filter(ageGenderF_, genAge == "Women, 15-19") %>% pull(word),
                    sec.axis = dup_axis(~., 
                                        breaks = filter(ageGenderF_, genAge == "Women, 36+") %>% pull(rank), 
                                        labels = filter(ageGenderF_, genAge == "Women, 36+") %>% pull(word) ) ) +
    theme_bw() +
    # Remove the legend
    theme(legend.position = "none",
          # Remove the panel border
          panel.border     = element_blank(),
          # Remove just about everything from the y axis
          axis.title.y     = element_blank(),
          panel.grid.major.y = element_blank(),
          panel.grid.minor.y = element_blank(),
          # Remove a few things from the x axis and increase font size
          axis.title.x     = element_blank(),
          panel.grid.major.x = element_blank(),
          axis.text.x.top      = element_text(size=10),
          # Remove x & y tick marks
          axis.ticks       = element_blank(),
          axis.ticks.length = unit(0, "cm"),
          # Format title & subtitle
          plot.title       = element_text(size=10, face = "bold", hjust = 0.5),
          plot.subtitle    = element_text(hjust = 0.5) )

您可以使用问题中的示例数据来测试,玩游戏,如下例所示

#standardSQL
SELECT videoLength AS old_video_length,
  CASE
    WHEN REGEXP_CONTAINS(videoLength, r':\d\d:\d\d$') THEN TIME_DIFF(SAFE.PARSE_TIME('%T', videoLength), TIME '00:00:00', SECOND)
    WHEN REGEXP_CONTAINS(videoLength, r':\d\d$') THEN TIME_DIFF(SAFE.PARSE_TIME('%M:%S', videoLength), TIME '00:00:00', SECOND)
    ELSE SAFE_CAST(videoLength AS INT64)
  END AS video_length_converted
FROM `project.dataset.vid_length_table`  

有结果

#standardSQL
WITH `project.dataset.vid_length_table` AS (
  SELECT '1:54:55' videoLength UNION ALL
  SELECT '2:26' UNION ALL
  SELECT '146'
)
SELECT videoLength AS old_video_length,
  CASE
    WHEN REGEXP_CONTAINS(videoLength, r':\d\d:\d\d$') THEN TIME_DIFF(SAFE.PARSE_TIME('%T', videoLength), TIME '00:00:00', SECOND)
    WHEN REGEXP_CONTAINS(videoLength, r':\d\d$') THEN TIME_DIFF(SAFE.PARSE_TIME('%M:%S', videoLength), TIME '00:00:00', SECOND)
    ELSE SAFE_CAST(videoLength AS INT64)
  END AS video_length_converted
FROM `project.dataset.vid_length_table`   

答案 1 :(得分:1)

首先,使用与列相同的格式将字符串转换为时间戳,然后使用 UNIX_SECONDS 函数

将时间戳转换为秒。

解决方案:

#standardSQL
SELECT
case 
  when videoLength like "%:%:%" then UNIX_SECONDS(PARSE_TIMESTAMP("%H:%M:%S",videoLength))
  when videoLength like "%:%" then UNIX_SECONDS(PARSE_TIMESTAMP("%M:%S",videoLength))
  else cast(videoLength as int64) end as video_length_converted
from vid_length_table

示例:

#standardSQL
WITH vid_length_table as (
  SELECT '1:54:55' videoLength UNION ALL
  SELECT '2:26' UNION ALL
  SELECT '146'
) 
SELECT videoLength AS old_video_length,
case 
  when videoLength like "%:%:%" then UNIX_SECONDS(PARSE_TIMESTAMP("%H:%M:%S",videoLength))
  when videoLength like "%:%" then UNIX_SECONDS(PARSE_TIMESTAMP("%M:%S",videoLength))
  else cast(videoLength as int64) end as video_length_converted
from vid_length_table

结果:

Row old_video_length    video_length_converted   
1   1:54:55             6895     
2   2:26                146  
3   146                 146