请耐心等待,我的英语不够完美
我已经设置了一个mysql表来将日志聚合到其中:
create table logs(
id BIGINT UNSIGNED NOT NULL AUTO_INCREMENT,
player_id MEDIUMINT UNSIGNED NOT NULL,
`in` DATETIME NOT NULL,
`out` DATETIME NOT NULL,
channel_id INT UNSIGNED NOT NULL,
frame_id INT UNSIGNED NOT NULL,
media_id INT UNSIGNED NOT NULL
PRIMARY KEY (id)
我在in
和out
之间使用timediff来查找文件播放时间和播放时间。
它变得有点大,所以我决定将所需信息汇总到新表中:
create table reports(
id BIGINT UNSIGNED NOT NULL AUTO_INCREMENT,
year SMALLINT UNSIGNED NOT NULL,
month TINYINT UNSIGNED NOT NULL,
day TINYINT UNSIGNED NOT NULL,
hour TINYINT UNSIGNED NOT NULL,
player_id MEDIUMINT UNSIGNED NOT NULL,
channel_id INT UNSIGNED NOT NULL,
frame_id INT UNSIGNED NOT NULL,
media_id INT UNSIGNED NOT NULL,
count MEDIUMINT UNSIGNED NOT NULL,
duration MEDIUMINT UNSIGNED NOT NULL
我使用以下查询填写报告表:
insert into reports (year, month, day, hour, player_id, media_id, channel_id, frame_id, count, duration)
select year(logs.in), month(logs.in) as Month, day(logs.in) as Day, hour(logs.in) as Hour, logs.player_id, logs.media_id, logs.channel_id, logs.frame_id, count(logs.media_id) as Count, SUM(TIME_TO_SEC(TIMEDIFF(logs.out, logs.in))) as Duration
FROM logs
where (logs.player_id=1 OR logs.player_id=2)
GROUP BY year(logs.in), month(logs.in), day(logs.in), hour(logs.in), player_id, media_id, channel_id, frame_id;
如果timediff不到一小时,它就可以了。但是有一种媒体可以播放的时间远远超过几分钟:
+-------+------+------+-----------+------------+----------+----------+----------------------+--------------+
| Month | Day | Hour | player_id | channel_id | frame_id | media_id | count(logs.media_id) | Duration |
+-------+------+------+-----------+------------+----------+----------+----------------------+--------------+
| 6 | 19 | 14 | 5 | 4 | 18 | 54 | 1 | 275h 48m 24s |
| 6 | 20 | 8 | 4 | 3 | 18 | 54 | 1 | 78h 45m 28s |
因此,如果我检查文件是否在6月20日的player_id 5上播放,我的查询会告诉我它不是,尽管它是。我想要实现的目标就是填写我的报告表:
+-------+------+------+-----------+------------+----------+----------+----------------------+--------------+
| Month | Day | Hour | player_id | channel_id | frame_id | media_id | count(logs.media_id) | Duration |
+-------+------+------+-----------+------------+----------+----------+----------------------+--------------+
| 6 | 19 | 14 | 5 | 4 | 18 | 54 | 1 | 48m 24s |
| 6 | 19 | 15 | 5 | 4 | 18 | 54 | 1 | 1h 00m 00s |
| 6 | 19 | 16 | 5 | 4 | 18 | 54 | 1 | 1h 00m 00s |
有办法吗?提前谢谢。
答案 0 :(得分:0)
CREATE DEFINER=`root`@`localhost` PROCEDURE `make_intervals`(startdate
timestamp, enddate timestamp, intval integer, unitval varchar(10))
BEGIN
declare thisDate timestamp;
declare nextDate timestamp;
set thisDate = startdate;
drop temporary table if exists time_intervals;
create temporary table if not exists time_intervals
(
interval_start timestamp,
interval_end timestamp
);
repeat
select
case unitval
when 'MICROSECOND' then timestampadd
(MICROSECOND, intval, thisDate)
when 'SECOND' then timestampadd(SECOND, intval, thisDate)
when 'MINUTE' then timestampadd(MINUTE, intval, thisDate)
when 'HOUR' then timestampadd(HOUR, intval, thisDate)
when 'DAY' then timestampadd(DAY, intval, thisDate)
when 'WEEK' then timestampadd(WEEK, intval, thisDate)
when 'MONTH' then timestampadd(MONTH, intval, thisDate)
when 'QUARTER' then timestampadd(QUARTER, intval, thisDate)
when 'YEAR' then timestampadd(YEAR, intval, thisDate)
end into nextDate;
insert into time_intervals select thisDate, timestampadd(MICROSECOND, -1, nextDate);
set thisDate = nextDate;
until thisDate >= enddate
end repeat;
END
然后我针对这个临时表处理我的主表
SELECT DATE(time_intervals.interval_start) AS Date, EXTRACT(HOUR FROM time_intervals.interval_start) AS Hour, player_id, channel_id, frame_id, media_id, count(media_id),
sum(CASE
WHEN time_intervals.interval_start > TIMESTAMPADD(HOUR,HOUR(logs.in), DATE(logs.in))
AND time_intervals.interval_start < TIMESTAMPADD(HOUR,HOUR(logs.out), DATE(logs.out))
THEN 3600
WHEN time_intervals.interval_start = TIMESTAMPADD(HOUR,HOUR(logs.in), DATE(logs.in))
AND time_intervals.interval_start < TIMESTAMPADD(HOUR,HOUR(logs.out), DATE(logs.out))
THEN 3600 - EXTRACT(MINUTE FROM logs.in)*60 - EXTRACT(SECOND FROM logs.in)
WHEN time_intervals.interval_start = TIMESTAMPADD(HOUR,HOUR(logs.out), DATE(logs.out))
AND time_intervals.interval_start > TIMESTAMPADD(HOUR,HOUR(logs.in), DATE(logs.in))
THEN EXTRACT(SECOND FROM logs.out) + EXTRACT(MINUTE FROM logs.out)*60
WHEN time_intervals.interval_start = TIMESTAMPADD(HOUR,HOUR(logs.in), DATE(logs.in))
AND time_intervals.interval_start = TIMESTAMPADD(HOUR,HOUR(logs.out), DATE(logs.out))
THEN EXTRACT(MINUTE FROM logs.out)*60 + EXTRACT(SECOND FROM logs.out) - EXTRACT(MINUTE FROM logs.in)*60 - EXTRACT(SECOND FROM logs.in)
ELSE 0
END) AS duration
FROM time_intervals
LEFT JOIN logs
Force index(media_id, player_id, channel_id, frame_id)
ON time_intervals.interval_start >= TIMESTAMPADD(HOUR,HOUR(logs.in), DATE(logs.in))
AND time_intervals.interval_start <= TIMESTAMPADD(HOUR,HOUR(logs.out), DATE(logs.out))
GROUP BY media_id, player_id, date, hour, channel_id, frame_id;
需要一段时间来处理,特别是数十亿行,就像我的情况一样,但是有效。