Hive Expression不在GROUP BY键中

时间:2016-12-03 06:07:21

标签: hadoop hive

我需要在hive中执行此查询:

SELECT 
    waybill_no,
    vehicle_no, 
    WAYBILL_TYPE, 
    etim_no, 
    trip_no, 
    route_no, 
    sum(full_ticket_count) as full_ticket_count, 
    sum(half_ticket_count) as half_ticket_count, 
    sum(BOT_ticket_count) as BOT_ticket_count, 
    sum(manual_ticket_count) as manual_ticket_count, 
    sum(window_booking_count) as window_booking_count, 
    sum(pass_ticket_count) as pass_ticket_count, 
    sum(luggage_ticket_count) as luggage_ticket_count, 
    format((sum(base_fare)/100),2) as base_fare,
    format((sum(asc_amt-ysy_amt)/100),2) as asc_amt,
    format((sum(ysy_amt)/100),2) as ysy_amt, 
    format((sum(bsc)/100),2) as bsc,
    format((sum(toll)/100),2) as toll,
    format((sum(it)/100),2) as it,
    format((sum(sleeper)/100),2) as sleeper,
    format((sum(toll_fee)/100),2) as toll_fee,
    format((sum(bus_stand_fee)/100),2) as bus_stand_fee,
    sum(insp_ticket_count) as insp_ticket_count,
    sum(insp_ticket_amt) as insp_ticket_amt,
    sum(dhaba_collection) as dhaba_collection,
    sum( window_booking) as window_booking,
    format((sum(penalty_amt)/100),2) as penalty_amt,
    format((sum(luggage_amt)/100),2) as luggage_amt, 
    format((sum(total_amount)/100),2) as total_amount,
    format((sum(total_expenses)/100),2) as total_expenses 
FROM (
    SELECT 
        td.waybill_no, 
        td.vehicle_no,
        wb.WAYBILL_TYPE,
        td.etim_no,
        td.trip_no,
        route_no,
        sum( if (ticket_code IN('TO','BS','FX','CR'),0,full_ticket_count)) as full_ticket_count,
        sum( if (ticket_code IN('TO','BS','FX','CR'),0,half_ticket_count)) as half_ticket_count,
        sum( if (ticket_code IN('FX','CR'),0,1)) as BOT_ticket_count,
        '0' as manual_ticket_count,
        sum( if (ticket_code='WI',1,0)) as window_booking_count,
        sum( if (ticket_code='PS',(full_ticket_count+half_ticket_count),0)) as pass_ticket_count,
        sum(luggage_ticket_count) as luggage_ticket_count,
        sum( if (td.ticket_code IN('TO','BS','FX','CR'),0,((full_ticket_count*adult_basic_fare)+(half_ticket_count*child_basic_fare)))) as base_fare,
        sum( if (td.ticket_code IN('TO','BS','FX','CR'),0,((full_ticket_count*td.asc_amt)+(half_ticket_count*td.asc_amt)))) as asc_amt,
        sum( 
            if (
                ticket_code IN('TO','BS'),0,((
                    if(ticket_code IN('WI'),1,full_ticket_count) * state_code_10
                )+(
                    if(ticket_code IN('WI'),0,half_ticket_count)*state_code_10
                ))
            )
        ) as ysy_amt,
        sum( if(td.ticket_code IN('TO','BS','FX','CR'),0,((full_ticket_count*td.bsc_amt)+(half_ticket_count*td.bsc_amt)))) as bsc,
        sum( if(td.ticket_code IN('TO','BS','FX','CR'),0,((full_ticket_count*td.toll_amt)+(half_ticket_count*td.toll_amt)))) as toll,
        sum( if(td.ticket_code IN('TO','BS','FX','CR'),0,((full_ticket_count*td.it_amt)+(half_ticket_count*td.it_amt)))) as it,
        sum( if(td.ticket_code IN('TO','BS','FX','CR'),0,((full_ticket_count*td.sleeper_amt)+(half_ticket_count*td.sleeper_amt)))) as sleeper,
        sum( if(td.ticket_code='TO',(td.total_amount),0)) as toll_fee,
        sum( if(td.ticket_code='BS',(td.total_amount),0)) as bus_stand_fee,
        sum( if(td.ticket_code='IP',full_ticket_count,0)) as insp_ticket_count, 
        sum( if(td.ticket_code='IP',(td.total_amount),0)) as insp_ticket_amt,
        sum( if(td.ticket_code='DC',(td.total_amount),0)) as dhaba_collection,
        sum( if(td.ticket_code='WI',(td.total_amount),0)) as window_booking,
        sum( if(td.ticket_code='PN',(td.total_amount),0)) as penalty_amt,
        sum( if(td.ticket_code='LG',(td.total_amount),0)) as luggage_amt,
        sum( 
            if(
                td.ticket_code IN('TO','BS','FX','CR'),0,td.total_amount
            ) + if(
                ticket_code='PA',adult_basic_fare,0
            ) - if(
                ticket_code='DC',if(docket_no='SBICARD',(total_amount),0),0
            )
        ) as total_amount,
        sum( 
            if(
                td.ticket_code NOT IN('TO','BS','WI'),0,td.total_amount
            )+if(
                td.ticket_code='PA',((full_ticket_count*adult_basic_fare)+(half_ticket_count*child_basic_fare)),0
            )+if(
                e_purse_flag='1',total_amount,0
            )
        ) as total_expenses,
        td.ticket_code
    FROM ticket_data as td
    INNER JOIN waybillprogramming wb on wb.waybill_no=td.waybill_no
    WHERE wb.collection_tm  between str_to_date('2016-07-11 00:00:00','%Y-%m-%d %H:%i:%s') and str_to_date('2016-07-11 23:59:59','%Y-%m-%d %H:%i:%s')   AND td.`ticket_type` != '51'
    GROUP BY wb.waybill_no

    UNION

    SELECT
        wb.waybill_no,
        '' as vehicle_no,
        wb.WAYBILL_TYPE,
        wb.etim_no ,
        '0' as trip_no,
        '0' as route_no,
        '0' as full_ticket_count,
        '0' as half_ticket_count,sum(NO_OF_PASSENGER) as BOT_ticket_count,
        '0' as manual_ticket_count,
        '0' as window_booking_count,
        '0' as pass_ticket_count,
        '0' as luggage_ticket_count,
        '0' as base_fare,
        '0' as asc_amt,
        '0' as ysy_amt,
        '0' as bsc,
        '0' as toll,
        '0' as it,
        '0' as sleeper,
        '0' as toll_fee,
        '0' as bus_stand_fee,
        '0' as insp_ticket_count,
        '0' as insp_ticket_amt,
        '0' as dhaba_collection,
        '0' as window_booking,
        '0' as penalty_amt,
        '0' as luggage_amt,
        ( sum( TICKET_SALE_AMOUNT + RECOVERY_AMT + DHABA_COLLECTION ) * 100 ) as total_amount,
        ( sum( mtc.BRIDGE_FEE + mtc.BUS_STAND_FEE + mtc.REFUND_AMT + mtc.DIESEL_PERCHASED + mtc.OTHER_EXPENSES + mtc.OTHER_BOOKING + mtc.PART_CASH ) * 100 ) as  total_expenses,
        'NN' as ticket_code
    FROM manual_ticket_collection mtc
    INNER JOIN waybillprogramming wb on mtc.WAYBILL_NO = wb.WAYBILL_NO
    WHERE collection_tm between str_to_date('2016-07-11 00:00:00','%Y-%m-%d %H:%i:%s') and str_to_date('2016-07-11 23:59:59','%Y-%m-%d %H:%i:%s')
    GROUP BY wb.waybill_no 

    UNION

    SELECT 
        wb.waybill_no,
        '' as vehicle_no,
        wb.WAYBILL_TYPE,
        wb.etim_no,
        '0' as trip_no,
        '0' as route_no,
        '0' as full_ticket_count,
        '0' as half_ticket_count,
        '0' as BOT_ticket_count,
        count(ticket_code) as manual_ticket_count,
        sum(if(ticket_code='WI',(full_ticket+half_ticket),0)) as window_booking_count,
        sum(if(ticket_code='PS',(full_ticket+half_ticket),0)) as pass_ticket_count,
        sum(lugg_ticket) as luggage_ticket_count,
        sum(if(mtd.ticket_code IN('TO','BS'),0,((full_ticket*full_base_fare)+(half_ticket*half_base_fare)))*100) as base_fare,
        sum(if(mtd.ticket_code IN('TO','BS'),0,((full_ticket*mtd.asc_amt)+(half_ticket*mtd.asc_amt)))*100) as asc_amt,
        '0' as ysy_amt,
        sum(if(mtd.ticket_code IN('TO','BS'),0,((full_ticket*mtd.bsc_amt)+(half_ticket*mtd.bsc_amt)))*100) as bsc,
        sum(if(mtd.ticket_code IN('TO','BS'),0,((full_ticket*mtd.toll_amt)+(half_ticket*mtd.toll_amt)))*100) as toll,
        sum(if(mtd.ticket_code IN('TO','BS'),0,((full_ticket*mtd.it_amt)+(half_ticket*mtd.it_amt)))*100) as it,
        sum(if(mtd.ticket_code IN('TO','BS'),0,((full_ticket*mtd.sleeper_amt)+(half_ticket*mtd.sleeper_amt)))*100) as sleeper,
        sum(if(mtd.ticket_code='TO',(mtd.total_amt),0)) as toll_fee,
        sum(if(mtd.ticket_code='BS',(mtd.total_amt),0)*100) as bus_stand_fee,
        sum(if(mtd.ticket_code='IP',full_ticket,0)) as insp_ticket_count,
        sum(if(mtd.ticket_code='IP',(mtd.total_amt),0)*100) as insp_ticket_amt,
        sum(if(mtd.ticket_code='DC',(mtd.total_amt),0)) as dhaba_collection,
        sum(if(mtd.ticket_code='WI',(mtd.total_amt),0)) as window_booking,
        sum(if(mtd.ticket_code='PN',(mtd.total_amt),0)*100) as penalty_amt,
        sum(if(mtd.ticket_code='LG',(mtd.total_amt),0)*100) as luggage_amt,
        sum(total_amt*100) as total_amount,
        sum((if(ticket_code='TO', total_amt,0)+if(ticket_code='BS', total_amt,0))*100) as total_expenses,
        mtd.ticket_code
    FROM manual_ticket_details mtd
    INNER JOIN waybillprogramming wb on mtd.WAYBILL_NO = wb.WAYBILL_NO
    WHERE collection_tm between str_to_date('2016-07-11 00:00:00','%Y-%m-%d %H:%i:%s') and str_to_date('2016-07-11 23:59:59','%Y-%m-%d %H:%i:%s') 
    GROUP BY wb.waybill_no
)abc
GROUP BY waybill_no;

但是我在执行查询时遇到此错误

  

FAILED:SemanticException [错误10025]:第35行:2表达式不在   GROUP BY键'waybill_no')

2 个答案:

答案 0 :(得分:1)

您正在选择td.waybill_no并在GROUP BY wb.waybill_no上进行分组。由于它是内部联接,请尝试选择wb.waybill_no

答案 1 :(得分:1)

在Hive上从Apache wiki报价


但是,下面的查询不起作用:

SELECT
   a,
   b
FROM
   t1
GROUP BY
   a;

这是因为select子句还有一个附加列(b),它不包含在group by子句中(并且它也不是聚合函数)。这是因为,如果表t1看起来像:

| a b |
| 100 | 1 |
| 100 | 2 |
| 100 | 3 |

由于仅对a进行分组,Hive应该为a = 100的组显示b的哪个值?有人可以说它应该是第一个值或最低值,但是我们都同意有多种可能的选择。 Hive通过使select子句中未包含在group by子句中的列变为无效SQL(准确地说是HQL)来消除这种猜测。


在MapReduce范例中将其视为reduce语句。必须指定聚合函数,否则必须将列包含在GROUP BY子句中。