MySQL中值查询,用于检索每个分组ID的中位数

时间:2017-01-20 17:49:31

标签: mysql group-by median

中值查询返回类似于以下内容的结果:

Vendor_id |发票中位数_total
97 ............. | 418

我希望中值查询的结果看起来类似于以下avg函数查询:

SELECT vendor_id, avg(invoice_total)
FROM invoices
GROUP BY vendor_id;

中位数查询:

SELECT t3.vendor_id, AVG(middle_values) AS 'median'
FROM (
    SELECT t1.invoice_total AS 'middle_values', t1.vendor_id
    FROM
    (
        SELECT @row:=@row+1 as `row`, iv.invoice_total, iv.vendor_id
        FROM invoices AS iv, (SELECT @row:=0) AS r
        WHERE iv.vendor_id = 97
        ORDER BY iv.invoice_total
    ) AS t1,
    (
        SELECT COUNT(*) as 'count'
        FROM invoices iv
        WHERE iv.vendor_id = 97
    ) AS t2
-- the following condition will return 1 record for odd number sets, or 2 records for even number sets.
WHERE t1.row >= t2.count/2 and t1.row <= ((t2.count/2) +1)) AS t3

我认为主要的关键部分是第3(和嵌套)选择语句。

SELECT @row:=@row+1 as `row`
FROM (SELECT @row:=@row+1 as `row`, vendor_id, invoice_total
      FROM invoices
      ORDER BY vendor_id, invoice_total) t, invoices inv
WHERE inv.vendor_id = t.vendor_id;

如果每次查询转换到不同的vendor_id时都可以重新设置@row计数器。那将是一个巨大的进步。

表:

CREATE TABLE IF NOT EXISTS `invoices` (
  `invoice_id` int(11) NOT NULL AUTO_INCREMENT,
  `vendor_id` int(11) NOT NULL,
  `invoice_number` varchar(50) NOT NULL,
  `invoice_date` date NOT NULL,
  `invoice_total` decimal(9,2) NOT NULL,
  `payment_total` decimal(9,2) NOT NULL DEFAULT '0.00',
  `credit_total` decimal(9,2) NOT NULL DEFAULT '0.00',
  `terms_id` int(11) NOT NULL,
  `invoice_due_date` date NOT NULL,
  `payment_date` date DEFAULT NULL,
  PRIMARY KEY (`invoice_id`),
  KEY `invoices_fk_vendors` (`vendor_id`),
  KEY `invoices_fk_terms` (`terms_id`),
  KEY `invoices_invoice_date_ix` (`invoice_date`),
  CONSTRAINT `invoices_fk_terms` FOREIGN KEY (`terms_id`) REFERENCES `terms` (`terms_id`),
  CONSTRAINT `invoices_fk_vendors` FOREIGN KEY (`vendor_id`) REFERENCES `vendors` (`vendor_id`)
) ENGINE=InnoDB AUTO_INCREMENT=119 DEFAULT CHARSET=latin1;

插入内容:

INSERT INTO `invoices` VALUES (118, 97, '456792', '2011-08-03', 565.60, 0.00, 0.00, 2, '2011-09-02', NULL);
INSERT INTO `invoices` VALUES (117, 97, '456791', '2011-08-03', 4390.00, 0.00, 0.00, 2, '2011-09-02', NULL);
INSERT INTO `invoices` VALUES (116, 97, '456701', '2011-08-02', 270.50, 0.00, 0.00, 2, '2011-09-01', NULL);
INSERT INTO `invoices` VALUES (115, 97, '456789', '2011-08-01', 8344.50, 0.00, 0.00, 2, '2011-08-31', NULL);
INSERT INTO `invoices` VALUES (114, 123, '963253249', '2011-08-02', 127.75, 127.75, 0.00, 3, '2011-09-01', '2011-09-04');
INSERT INTO `invoices` VALUES (113, 37, '547480102', '2011-08-01', 224.00, 0.00, 0.00, 3, '2011-08-31', NULL);
INSERT INTO `invoices` VALUES (112, 110, '0-2436', '2011-07-31', 10976.06, 0.00, 0.00, 3, '2011-08-30', NULL);
INSERT INTO `invoices` VALUES (111, 123, '263253257', '2011-07-30', 22.57, 22.57, 0.00, 3, '2011-08-29', '2011-09-03');

1 个答案:

答案 0 :(得分:1)

尝试使用此功能在vendor_ids

中分配行号
SELECT 
    t.*,
    @rn:=IF(vendor_id = @prev_vid,
        @rn + 1,
        IF(@prev_vid:=vendor_id, 1, 1)) rn
FROM
    (SELECT 
        *
    FROM
        invoices
    ORDER BY vendor_id , invoice_total) t
        CROSS JOIN
    (SELECT @rn:=0, @prev_vid:=- 1) t2;

最后查询:

SELECT 
    t1.vendor_id, AVG(t1.invoice_total) median
FROM
    (SELECT 
        t.*,
            @rn:=IF(vendor_id = @prev_vid, @rn + 1, IF(@prev_vid:=vendor_id, 1, 1)) row
    FROM
        (SELECT 
        *
    FROM
        invoices
    ORDER BY vendor_id , invoice_total) t
    CROSS JOIN (SELECT @rn:=0, @prev_vid:=- 1) t2) AS t1
        INNER JOIN
    (SELECT 
        vendor_id, COUNT(*) cnt
    FROM
        invoices
    GROUP BY vendor_id) AS t2 ON t1.vendor_id = t2.vendor_id
        AND t1.row >= t2.cnt / 2
        AND t1.row <= ((t2.cnt / 2) + 1)
GROUP BY t1.vendor_id;