SQL - 基于DateTime查询多个聚合 - MySQL

时间:2011-09-25 17:40:54

标签: mysql sql aggregation

这是一个复杂的问题。但我有一个表有一个DATETIME字段,还有一些其他int和float字段需要求和和平均。我们希望根据时间戳对此表进行求和和平均,并最终旨在开发3个查询,这些查询在某种意义上将相互构建。

所以能够看起来像这样

     TIMESTAMP      |subj_diff| SCR2  | SCR3
2011-09-20 09:01:37 |  1      | 0.02  | 1.6
2011-09-20 09:04:18 |  3      | 0.09  | 1.8
2011-09-20 14:24:55 |  5      | 0.21  | 1.2
2011-09-21 18:50:47 |  8      | 0.08  | 0.9
2011-09-21 18:54:21 |  9      | 0.12  | 2.1

我们想要生成的三个查询是:

1. 将以前数据中的所有前面的项目加起来,包括当前选择的记录。还应该有另一个列的总数所以说,例如,如果我们想要在20和21之间的结果,返回的表将如下所示:

     TIMESTAMP      |subj_diff| SCR2  | SCR3  | COUNT
2011-09-20 09:01:37 |  1      | 0.02  | ...   |  1
2011-09-20 09:04:18 |  4      | 0.11  |       |  2
2011-09-20 14:24:55 |  9      | 0.32  |       |  3
2011-09-21 18:50:47 |  17     | ...
2011-09-21 18:54:21 |  26     |  

2. 以5分钟的时间间隔汇总结果 - 与上述类似,但查询将返回3行作为行1&在图2中,行4和5将以与上述相同的方式加在一起。在该查询中,如果对于每5分钟间隔没有任何0,则返回计数为0。

     TIMESTAMP      |subj_diff| SCR2  | SCR3  | COUNT
2011-09-20 09:05:00 |  4      | 0.11  | 3.4   |   2
2011-09-20 14:25:00 |  5      | 0.21  | 1.2   |   1
2011-09-21 18:55:00 |  17     | 0.20  | 3.0   |   2

3. 在查询编号1中,对于当天每5分钟一次的查询编号2的结果集执行相同的操作(即从00:05:00到24:00:00 )。

这是一个相当棘手的问题,我不知道如何开始这个。有人能写SQL来解决这个问题吗?

使用游标和存储过程的一些基本代码,但它并没有真正起作用。

DROP PROCEDURE curdemo;
DELIMITER $$ 
CREATE PROCEDURE curdemo()
BEGIN
DECLARE done INT DEFAULT 0;

DECLARE a datetime;
DECLARE b,c FLOAT;

DECLARE cur1 CURSOR FOR 
SELECT  msgDate, subj_diff FROM classifier_results
WHERE DATE(msgDate) >= DATE('2011-09-25');

DECLARE CONTINUE HANDLER FOR NOT FOUND SET done = 1;

CREATE TEMPORARY TABLE IF NOT EXISTS temp_scores (d datetime, acc float);

OPEN cur1;

read_loop: LOOP
    FETCH cur1 INTO a, b;
    IF done THEN
      LEAVE read_loop;
    END IF;
    INSERT temp_scores(d,acc)
    SELECT a, SUM(subj_diff) FROM classifier_results 
    WHERE DATE(msgDate) >= DATE('2011-09-25')
    AND msgDate <= a;

END LOOP;

CLOSE cur1;

SELECT * FROM temp_scores;

END;

干杯!

4 个答案:

答案 0 :(得分:1)

我没有对此进行测试,但请告诉我这是否适合您:

1

SET @csum:=0;
SELECT  a.msgDate, (@csum:=@csum + a.subj_diff) AS subj_diff
FROM classifier_results a

2

SELECT FROM_UNIXTIME(FLOOR(UNIX_TIMESTAMP(a.msgDate)/(60*5))*(60*5)) as msgDate, sum(a.subj_diff) AS subj_diff
FROM classifier_results a

3

SET @csum:=0;
SELECT b.msgDate, (@csum:=@csum + b.subj_diff) AS subj_diff
FROM (
SELECT FROM_UNIXTIME(FLOOR(UNIX_TIMESTAMP(a.msgDate)/(60*5))*(60*5)) as msgDate, sum(a.subj_diff) AS subj_diff
FROM classifier_results a
) b

答案 1 :(得分:1)

让我们看看......

1:

SELECT TIMESTAMP, 
    (@var_subj_diff := @var_subj_diff + subj_diff) AS subj_diff, 
    (@var_SCR2 := @var_SCR2 + SCR2) AS SCR2, 
    (@var_SCR3 := @var_SCR3 + SCR3) AS SCR3,
    (@rownum := @rownum + 1) AS COUNT
FROM classifier_results, 
    (SELECT @var_subj_diff := 0, @var_SCR2 := 0, @var_SCR3 := 0, @rownum := 0) AS vars
WHERE TIMESTAMP BETWEEN '2011-09-20' AND '2011-09-21'
ORDER BY TIMESTAMP ASC

2:

SELECT FROM_UNIXTIME(ROUND(UNIX_TIMESTAMP(TIMESTAMP) / (60 * 5)) * (60 * 5)) AS TIMESTAMP, 
    SUM(subj_diff) AS subj_diff, SUM(SCR2) AS SCR2, SUM(SCR3) AS SCR3, COUNT(*) AS COUNT
FROM classifer_results
GROUP BY TIMESTAMP
ORDER BY TIMESTAMP ASC

3:

SELECT FROM_UNIXTIME(ROUND(UNIX_TIMESTAMP(TIMESTAMP) / (60 * 5)) * (60 * 5)) AS TIMESTAMP, 
    (@var_subj_diff := @var_subj_diff + SUM(subj_diff)) AS subj_diff, 
    (@var_SCR2 := @var_SCR2 + SUM(SCR2)) AS SCR2, 
    (@var_SCR3 := @var_SCR3 + SUM(SCR3)) AS SCR3,
    (@rownum := @rownum + 1) AS COUNT
FROM classifier_results, 
    (SELECT @var_subj_diff := 0, @var_SCR2 := 0, @var_SCR3 := 0, @rownum := 0) AS vars
GROUP BY TIMESTAMP
WHERE TIMESTAMP BETWEEN '2011-09-20' AND '2011-09-21'
ORDER BY TIMESTAMP ASC

希望我理解正确,让我知道一些问题出现了。 :)

答案 2 :(得分:1)

试试这段代码 -

创建并填充表:

CREATE TABLE classifier_results(
  `TIMESTAMP` DATETIME NOT NULL,
  subj_diff INT(11) DEFAULT NULL,
  scr2 FLOAT(10, 5) DEFAULT NULL,
  scr3 FLOAT(10, 5) DEFAULT NULL
);

INSERT INTO classifier_results VALUES 
  ('2011-09-20 09:01:37', 1, 0.02000, 1.60000),
  ('2011-09-20 09:04:18', 3, 0.09000, 1.80000),
  ('2011-09-20 14:24:55', 5, 0.21000, 1.20000),
  ('2011-09-21 18:50:47', 8, 0.08000, 0.90000),
  ('2011-09-21 18:54:21', 9, 0.12000, 2.10000);

执行这些查询:

-- 1 query
SET @subj_diff = 0;
SET @scr2 = 0;
SET @scr3 = 0;
SET @cnt = 0;
SELECT timestamp,
  @subj_diff:=IF(@subj_diff IS NULL, subj_diff, @subj_diff + subj_diff) subj_diff,
  @scr2:=IF(@scr2 IS NULL, scr2, @scr2 + scr2) scr2,
  @scr3:=IF(@scr3 IS NULL, scr3, @scr3 + scr3) scr3,
  @cnt:=@cnt+1 count
FROM classifier_results;

+---------------------+-----------+---------+---------+-------+
| timestamp           | subj_diff | scr2    | scr3    | count |
+---------------------+-----------+---------+---------+-------+
| 2011-09-20 09:01:37 |         1 | 0.02000 | 1.60000 |     1 |
| 2011-09-20 09:04:18 |         4 | 0.11000 | 3.40000 |     2 |
| 2011-09-20 14:24:55 |         9 | 0.32000 | 4.60000 |     3 |
| 2011-09-21 18:50:47 |        17 | 0.40000 | 5.50000 |     4 |
| 2011-09-21 18:54:21 |        26 | 0.52000 | 7.60000 |     5 |
+---------------------+-----------+---------+---------+-------+

-- 2 query
SELECT
  DATE(timestamp) + INTERVAL 5  * (12 * HOUR(timestamp) + FLOOR(MINUTE(timestamp) / 5)) MINUTE new_timestamp,
  SUM(subj_diff) subj_diff,
  SUM(scr2) scr2,
  SUM(scr3) scr3,
  COUNT(*) count
FROM classifier_results
GROUP BY new_timestamp;

+---------------------+-----------+---------+---------+-------+
| new_timestamp       | subj_diff | scr2    | scr3    | count |
+---------------------+-----------+---------+---------+-------+
| 2011-09-20 09:00:00 |         4 | 0.11000 | 3.40000 |     2 |
| 2011-09-20 14:20:00 |         5 | 0.21000 | 1.20000 |     1 |
| 2011-09-21 18:50:00 |        17 | 0.20000 | 3.00000 |     2 |
+---------------------+-----------+---------+---------+-------+

-- 3 query
SET @subj_diff = 0;
SET @scr2 = 0;
SET @scr3 = 0;
SET @cnt = 0;
SELECT new_timestamp timestamp,
  @subj_diff:=IF(@subj_diff IS NULL, subj_diff, @subj_diff + subj_diff) subj_diff,
  @scr2:=IF(@scr2 IS NULL, scr2, @scr2 + scr2) scr2,
  @scr3:=IF(@scr3 IS NULL, scr3, @scr3 + scr3) scr3,
  @cnt:=@cnt+1 count
FROM (
  SELECT
    DATE(timestamp) + INTERVAL 5  * (12 * HOUR(timestamp) + FLOOR(MINUTE(timestamp) / 5)) MINUTE new_timestamp,
    SUM(subj_diff) subj_diff,
    SUM(scr2) scr2,
    SUM(scr3) scr3,
    COUNT(*) count
  FROM classifier_results
  GROUP BY new_timestamp
) t;

+---------------------+-----------+---------+---------+-------+
| timestamp           | subj_diff | scr2    | scr3    | count |
+---------------------+-----------+---------+---------+-------+
| 2011-09-20 09:00:00 |         4 | 0.11000 | 3.40000 |     1 |
| 2011-09-20 14:20:00 |         9 | 0.32000 | 4.60000 |     2 |
| 2011-09-21 18:50:00 |        26 | 0.52000 | 7.60000 |     3 |
+---------------------+-----------+---------+---------+-------+
祝你好运!

答案 3 :(得分:0)

在我看来,您可能希望使用存储过程和游标深入研究数据库编程。

存储过程:http://dev.mysql.com/doc/refman/5.5/en/stored-programs-defining.html

游标:http://dev.mysql.com/doc/refman/5.5/en/cursors.html

这是一个巨大的球形蜡,超出了这篇文章的范围。