中值查询返回类似于以下内容的结果:
Vendor_id |发票中位数_total
97 ............. | 418
我希望中值查询的结果看起来类似于以下avg函数查询:
SELECT vendor_id, avg(invoice_total)
FROM invoices
GROUP BY vendor_id;
中位数查询:
SELECT t3.vendor_id, AVG(middle_values) AS 'median'
FROM (
SELECT t1.invoice_total AS 'middle_values', t1.vendor_id
FROM
(
SELECT @row:=@row+1 as `row`, iv.invoice_total, iv.vendor_id
FROM invoices AS iv, (SELECT @row:=0) AS r
WHERE iv.vendor_id = 97
ORDER BY iv.invoice_total
) AS t1,
(
SELECT COUNT(*) as 'count'
FROM invoices iv
WHERE iv.vendor_id = 97
) AS t2
-- the following condition will return 1 record for odd number sets, or 2 records for even number sets.
WHERE t1.row >= t2.count/2 and t1.row <= ((t2.count/2) +1)) AS t3
我认为主要的关键部分是第3(和嵌套)选择语句。
SELECT @row:=@row+1 as `row`
FROM (SELECT @row:=@row+1 as `row`, vendor_id, invoice_total
FROM invoices
ORDER BY vendor_id, invoice_total) t, invoices inv
WHERE inv.vendor_id = t.vendor_id;
如果每次查询转换到不同的vendor_id时都可以重新设置@row计数器。那将是一个巨大的进步。
表:
CREATE TABLE IF NOT EXISTS `invoices` (
`invoice_id` int(11) NOT NULL AUTO_INCREMENT,
`vendor_id` int(11) NOT NULL,
`invoice_number` varchar(50) NOT NULL,
`invoice_date` date NOT NULL,
`invoice_total` decimal(9,2) NOT NULL,
`payment_total` decimal(9,2) NOT NULL DEFAULT '0.00',
`credit_total` decimal(9,2) NOT NULL DEFAULT '0.00',
`terms_id` int(11) NOT NULL,
`invoice_due_date` date NOT NULL,
`payment_date` date DEFAULT NULL,
PRIMARY KEY (`invoice_id`),
KEY `invoices_fk_vendors` (`vendor_id`),
KEY `invoices_fk_terms` (`terms_id`),
KEY `invoices_invoice_date_ix` (`invoice_date`),
CONSTRAINT `invoices_fk_terms` FOREIGN KEY (`terms_id`) REFERENCES `terms` (`terms_id`),
CONSTRAINT `invoices_fk_vendors` FOREIGN KEY (`vendor_id`) REFERENCES `vendors` (`vendor_id`)
) ENGINE=InnoDB AUTO_INCREMENT=119 DEFAULT CHARSET=latin1;
插入内容:
INSERT INTO `invoices` VALUES (118, 97, '456792', '2011-08-03', 565.60, 0.00, 0.00, 2, '2011-09-02', NULL);
INSERT INTO `invoices` VALUES (117, 97, '456791', '2011-08-03', 4390.00, 0.00, 0.00, 2, '2011-09-02', NULL);
INSERT INTO `invoices` VALUES (116, 97, '456701', '2011-08-02', 270.50, 0.00, 0.00, 2, '2011-09-01', NULL);
INSERT INTO `invoices` VALUES (115, 97, '456789', '2011-08-01', 8344.50, 0.00, 0.00, 2, '2011-08-31', NULL);
INSERT INTO `invoices` VALUES (114, 123, '963253249', '2011-08-02', 127.75, 127.75, 0.00, 3, '2011-09-01', '2011-09-04');
INSERT INTO `invoices` VALUES (113, 37, '547480102', '2011-08-01', 224.00, 0.00, 0.00, 3, '2011-08-31', NULL);
INSERT INTO `invoices` VALUES (112, 110, '0-2436', '2011-07-31', 10976.06, 0.00, 0.00, 3, '2011-08-30', NULL);
INSERT INTO `invoices` VALUES (111, 123, '263253257', '2011-07-30', 22.57, 22.57, 0.00, 3, '2011-08-29', '2011-09-03');
答案 0 :(得分:1)
尝试使用此功能在vendor_ids
中分配行号SELECT
t.*,
@rn:=IF(vendor_id = @prev_vid,
@rn + 1,
IF(@prev_vid:=vendor_id, 1, 1)) rn
FROM
(SELECT
*
FROM
invoices
ORDER BY vendor_id , invoice_total) t
CROSS JOIN
(SELECT @rn:=0, @prev_vid:=- 1) t2;
最后查询:
SELECT
t1.vendor_id, AVG(t1.invoice_total) median
FROM
(SELECT
t.*,
@rn:=IF(vendor_id = @prev_vid, @rn + 1, IF(@prev_vid:=vendor_id, 1, 1)) row
FROM
(SELECT
*
FROM
invoices
ORDER BY vendor_id , invoice_total) t
CROSS JOIN (SELECT @rn:=0, @prev_vid:=- 1) t2) AS t1
INNER JOIN
(SELECT
vendor_id, COUNT(*) cnt
FROM
invoices
GROUP BY vendor_id) AS t2 ON t1.vendor_id = t2.vendor_id
AND t1.row >= t2.cnt / 2
AND t1.row <= ((t2.cnt / 2) + 1)
GROUP BY t1.vendor_id;