CREATE TABLE sales (
id SERIAL PRIMARY KEY,
event_date DATE,
customer VARCHAR,
orderID VARCHAR,
sales_volume DECIMAL
);
INSERT INTO sales
(event_date, customer, orderID, sales_volume)
VALUES
('2020-01-08', 'Customer_A', 'Order_001', '130'),
('2020-01-12', 'Customer_A', 'Order_002', '120'),
('2020-01-18', 'Customer_B', 'Order_001', '115'),
('2020-01-22', 'Customer_B', 'Order_002', '300'),
('2020-01-23', 'Customer_B', 'Order_003', '540'),
('2020-01-24', 'Customer_C', 'Order_001', '421'),
('2020-01-26', 'Customer_D', 'Order_001', '198'),
('2020-04-08', 'Customer_B', 'Order_004', '325'),
('2020-04-09', 'Customer_B', 'Order_005', '325'),
('2020-04-11', 'Customer_B', 'Order_006', '425'),
('2020-04-15', 'Customer_D', 'Order_002', '914'),
('2020-04-18', 'Customer_D', 'Order_003', '418'),
('2020-04-20', 'Customer_E', 'Order_001', '723'),
('2020-04-30', 'Customer_C', 'Order_002', '665'),
('2020-06-01', 'Customer_B', 'Order_005', '982'),
('2020-06-15', 'Customer_B', 'Order_006', '100'),
('2020-06-19', 'Customer_C', 'Order_003', '250'),
('2020-06-20', 'Customer_C', 'Order_004', '322'),
('2020-06-30', 'Customer_E', 'Order_002', '924'),
('2020-06-25', 'Customer_A', 'Order_003', '445');
预期结果:
customer | orderid | event_date | sales_volume
-------------|---------------|--------------------|----------------------
Customer_A | Order_002 | 2020-01-12 | 120
Customer_A | Order_001 | 2020-01-08 | 130
Customer_B | Order_002 | 2020-01-22 | 300
-------------|---------------|--------------------|------------------------
Customer_E | Order_001 | 2020-04-20 | 723
-------------|---------------|--------------------|------------------------
Customer_B | Order_005 | 2020-06-01 | 982
Customer_A | Order_003 | 2020-06-25 | 445
我有一个庞大的数据库,需要从中提取一些数据用于案例研究。
问题是我需要提取全年的数据,因为我希望能够在案例研究中进行月度分析。因此,我不能用 dates
或 LIMIT
限制提取。
因此,我解决这个问题的想法是一个查询,它根据列 customer 每月随机提取 1-3 个值。
应满足以下条件:
e.g. Customer_A in the example
ORDER BY random()
你知道这是否可能吗?
如果是,我需要如何修改以下查询?
SELECT
s.customer,
s.orderID,
s.event_date,
SUM(s.sales_volume) AS sales_volume
FROM sales s
GROUP BY 1,2,3
ORDER BY random();
答案 0 :(得分:0)
它能回答你的问题吗?
select
customer,
orderid,
event_date,
sales_volume
from (
select
*,
row_number() over(partition by customer order by random()) seq
from (
SELECT
s.customer,
s.orderID,
s.event_date,
sum(s.sales_volume) AS sales_volume
FROM sales s
GROUP BY 1,2,3
) volumes
) sample
where seq <= 3
order by customer, seq
答案 1 :(得分:0)
修改了您的DB-Fiddle
每月随机排列行并从结果中提取 1-3:
SELECT * from (SELECT
customer,
orderID,
event_date,
sales_volume ,
rank() OVER (PARTITION BY date_trunc('month', event_date) ORDER BY random()) as rand
from sales) temp
where rand < 1 + random() * 3
order by event_date
我按 event_date 命令将每个月的行放在一起,根据您的需要进行更改
并且您可以通过增加因子来增加获得 3 行的机会,但将其限制为 3,例如
where rand < 1 + random() * 6 and rand <= 3