Question

我的表格结构与此类似

Customer_id Country item_type   Order_Size  Dates      Codes
A401           US   Fruit        Small       3/14/2016  11
A401           US   Fruit        Big         5/22/2016  12
A401           US   Vegetable   Small        7/12/2016  11
B509           US   Vegetable   Small        3/25/2015  92
B509           US   Vegetable   Big          3/15/2014  11
B509           US   Vegetable   Small        3/1/2014   34
A402           CA   Fruit       Small        3/14/2016  56
A402           CA   Fruit       Big          5/22/2016  76
A402           CA   Fruit       Small       7/12/2016   85
A403           CA   Vegetable   Small       7/12/2016   11
A403           CA   Vegetable   Small       3/25/2015   16
A403           CA   Vegetable   Big         3/15/2014   17
A403           CA   Vegetable   Small       3/1/2014    12

我正在寻找每个国家/地区每个item_type存在多少重复客户仅他们购买Order_size = Big且仅使用order_size＆lt;＆gt; Big购买的商品。为此，我写了这段代码。

 SELECT Country,item_type,count(customer_id) from
   (select Country,customer_id, t.item_type, count(*)  as REPEATS
     from (select t.*,
         min(case when Order_Size = 'Big' then dates end) over (partition by customer_id, item_type) as min_big
  from data_test as t
 ) t
where dates > min_big
group by 1,2,3) D
group by 1,2

结果：

Country item_type   Count(Distinct(Customer_id))
CA  Vegetable   1
US  Vegetable   1
CA  Fruit   1

现在可以使用了，但是我想再添加一个条件，只有当代码在某个条件的某个表中时才有条件，所以我想在我修改代码时添加多个条件，其中一个是子查询。

SELECT Country,item_type,count(customer_id) from
   (select Country,customer_id, t.item_type, count(*)  as REPEATS
     from (select t.*,
         min(case when (Order_Size = 'Big' and Codes IN (SELECT CODES from table1 where type='TRUE' group by 1)) then dates end) over (partition by customer_id, item_type) as min_big
  from data_test as t
 ) t
where dates > min_big
group by 1,2,3) D
group by 1,2

这是一个错误 - 在case语句中的When子句中出现非法表达式。我还读到你不能在案例中使用子查询，也不能使用 IN 。我已经阅读了许多与此相关的其他问题，但我仍然不清楚如何避免使用子查询以防万一。我如何更改不会抛出错误的代码，也可以快速处理，因为我的表非常大？

Answer 1

您可以更改您的代码以连接到驱动答案的表格，这个代码应该包括在内吗？＃34;在其他规则之前。确保对代码字段中不同的数据集执行外连接以防止欺骗。

此方法包括在所有其他规则之前的条件结果，并且不会引发错误。下面我创建了一个带有唯一主索引的volatile表来执行此操作，但您可以加入派生表并获得类似的结果。

create volatile table vt_fruit_exp
( Customer_id char(4)
, Country char(2)
, item_type varchar(20)
, Order_Size char(5)
, Dates date
, Codes byteint)
primary index (Customer_id) on commit preserve rows;

insert into vt_fruit_exp values('A401','US','Fruit'    ,'Small' ,'2016-03-14', 11);
insert into vt_fruit_exp values('A401','US','Fruit'    ,'Big'   ,'2016-05-22', 12);
insert into vt_fruit_exp values('A401','US','Vegetable','Small' ,'2016-07-12', 11);
insert into vt_fruit_exp values('B509','US','Vegetable','Small' ,'2015-03-25', 92);
insert into vt_fruit_exp values('B509','US','Vegetable','Big'   ,'2014-03-15', 11);
insert into vt_fruit_exp values('B509','US','Vegetable','Small' ,'2014-03-01', 34);
insert into vt_fruit_exp values('A402','CA','Fruit'    ,'Small' ,'2016-03-14', 56);
insert into vt_fruit_exp values('A402','CA','Fruit'    ,'Big'   ,'2016-05-22', 76);
insert into vt_fruit_exp values('A402','CA','Fruit'    ,'Small' ,'2016-07-12', 85);
insert into vt_fruit_exp values('A403','CA','Vegetable','Small' ,'2016-07-12', 11);
insert into vt_fruit_exp values('A403','CA','Vegetable','Small' ,'2015-03-25', 16);
insert into vt_fruit_exp values('A403','CA','Vegetable','Big'   ,'2014-03-15', 17);
insert into vt_fruit_exp values('A403','CA','Vegetable','Small' ,'2014-03-01', 12);

create volatile table Table1
( Codes byteint,Code_In_flg byteint) unique primary index (Codes) 
on commit preserve rows
;
insert into Table1 values (11,1); 
insert into Table1 values (76,1);
insert into Table1 values (12,1);

-- Each country-> how many repeated customers for each item_type are present AFTER they purchased Order_size=Big.  Only items purchased with order_size<>Big
-- Country item_type   Count(Distinct(Customer_id))
-- CA  Vegetable   1
-- US  Vegetable   1
-- CA  Fruit       1

SELECT
  Country
, item_type
, count(customer_id) 
FROM (
  select Country,customer_id, t.item_type, count(*)  as REPEATS
  from (
    Select
      t.*
    , Min(Case When Order_Size = 'big' Then Dates End) Over (Partition By Customer_Id, Item_Type) As Min_Big
    From vt_fruit_exp As T
  ) t
where dates > min_big
group by 1,2,3) D
group by 1,2;

-- This works now but I wanted to add one more condition as to only when the codes are within certain table with condition so I wanted to add multiple conditions with one being subquery with the case when I modified my code.

-- use a join to the table that refers to whether the code is to be included or not instead of attempting a subquery withing ordered analytic
SELECT
  Country
, item_type
, count(customer_id)
FROM (
  select Country,customer_id, t.item_type, count(*)  as REPEATS
  from (
    Select
      t.*
    , Min(Case When Order_Size = 'big' And b.Code_In_flg=1 Then Dates End) Over (Partition By Customer_Id, Item_Type) As Min_Big
  from vt_fruit_exp T left outer join Table1 B on t.Codes=b.Codes 
 ) t
where dates > min_big
group by 1,2,3) D
group by 1,2

使用subqquery时可以替代Case以避免错误（case语句中的When子句中的非法表达式）

1 个答案: