在UNION中使用Order By NULL

时间:2011-03-30 13:20:37

标签: mysql sql

我有一个查询(见下文),我有一个自定义开发的UDF,用于计算某些点是否在多边形(UNION中的第一个查询)或圆形(UNION中的第二个查询)形状内。

select  e.inquiry_match_type_id 
        , a.geo_boundary_id 
        , GeoBoundaryContains(c.tpi_geo_boundary_coverage_type_id, 29.287437, -95.055807, a.lat, a.lon, a.geo_boundary_vertex_id ) in_out 
        , e.inquiry_id 
        , e.external_id 
        , COALESCE(f.inquiry_device_id,0) inquiry_device_id 
        , b.external_info1 
        , b.external_info2 
        , b.geo_boundary_id 
        , b.geo_boundary_type_id 
from    geo_boundary_vertex a 
        join geo_boundary b on b.geo_boundary_id = a.geo_boundary_id 
        join trackpoint_index_geo_boundary_mem c on c.geo_boundary_id = b.geo_boundary_id 
        join trackpoint_index_mem d on d.trackpoint_index_id = c.trackpoint_index_id 
        join inquiry_mem e on e.inquiry_id = b.inquiry_id left 
        outer join inquiry_device_mem f on f.inquiry_id = e.inquiry_id and f.device_id = 3201 
where   d.trackpoint_index_id = 3127  
        and b.geo_boundary_type_id = 3  
        and e.expiration_date >= now() 
group by 
        a.geo_boundary_id 
UNION 
select  e.inquiry_match_type_id      
        , b.geo_boundary_id      
        , GeoBoundaryContains( c.tpi_geo_boundary_coverage_type_id, 29.287437, -95.055807, b.centroid_lat, b.centoid_lon, b.radius ) in_out      
        , e.inquiry_id      
        , e.external_id      
        , COALESCE(f.inquiry_device_id,0) inquiry_device_id      
        , b.external_info1      
        , b.external_info2      
        , b.geo_boundary_id      
        , b.geo_boundary_type_id 
from    geo_boundary b 
        join trackpoint_index_geo_boundary_mem c on c.geo_boundary_id = b.geo_boundary_id 
        join trackpoint_index_mem d on d.trackpoint_index_id = c.trackpoint_index_id 
        join inquiry_mem e on e.inquiry_id = b.inquiry_id 
        left outer join inquiry_device_mem f on f.inquiry_id = e.inquiry_id and f.device_id = 3201 
where   d.trackpoint_index_id = 3127  
        and b.geo_boundary_type_id = 2  
        and e.expiration_date >= now() 
group by
        b.geo_boundary_id 

当我为查询运行解释时,我得到以下内容:

 id      select_type     table       type     possible_keys                                                                                                                                              key                                  key_len     ref                       rows     Extra                           
 ------  --------------  ----------  -------  ---------------------------------------------------------------------------------------------------------------------------------------------------------  -----------------------------------  ----------  ------------------------  -------  ------------------------------- 
 1       PRIMARY         d           const    PRIMARY                                                                                                                                                    PRIMARY                              4           const                     1        Using temporary; Using filesort 
 1       PRIMARY         c           ref      PRIMARY,fk_mtp_idx_geo_boundary_mtp_idx,fk_mtp_idx_geo_boundary_geo_boundary,fk_mtp_idx_geo_boundary_mtp_mem_idx,fk_mtp_idx_geo_boundary_geo_boundary_mem  fk_mtp_idx_geo_boundary_mtp_idx      4           const                     9                                        
 1       PRIMARY         b           eq_ref   PRIMARY,fk_geo_boundary_inquiry,fk_geo_boundary_geo_boundary_type                                                                                          PRIMARY                              4           gothim.c.geo_boundary_id  1        Using where                     
 1       PRIMARY         e           eq_ref   PRIMARY                                                                                                                                                    PRIMARY                              4           gothim.b.inquiry_id       1        Using where                     
 1       PRIMARY         f           ref      fk_inquiry_device_mem_inquiry                                                                                                                              fk_inquiry_device_mem_inquiry        4           gothim.e.inquiry_id       2                                        
 1       PRIMARY         a           ref      fk_geo_boundary_vertex_geo_boundary                                                                                                                        fk_geo_boundary_vertex_geo_boundary  4           gothim.b.geo_boundary_id  11       Using where                     
 2       UNION           d           const    PRIMARY                                                                                                                                                    PRIMARY                              4           const                     1        Using temporary; Using filesort 
 2       UNION           c           ref      PRIMARY,fk_mtp_idx_geo_boundary_mtp_idx,fk_mtp_idx_geo_boundary_geo_boundary,fk_mtp_idx_geo_boundary_mtp_mem_idx,fk_mtp_idx_geo_boundary_geo_boundary_mem  fk_mtp_idx_geo_boundary_mtp_idx      4           const                     9                                        
 2       UNION           b           eq_ref   PRIMARY,fk_geo_boundary_inquiry,fk_geo_boundary_geo_boundary_type                                                                                          PRIMARY                              4           gothim.c.geo_boundary_id  1        Using where                     
 2       UNION           e           eq_ref   PRIMARY                                                                                                                                                    PRIMARY                              4           gothim.b.inquiry_id       1        Using where                     
 2       UNION           f           ref      fk_inquiry_device_mem_inquiry                                                                                                                              fk_inquiry_device_mem_inquiry        4           gothim.e.inquiry_id       2                                        
 (null)  UNION RESULT    <union1,2>  ALL      (null)                                                                                                                                                     (null)                               (null)      (null)                    (null)   Using filesort                  

 12 record(s) selected [Fetch MetaData: 1ms] [Fetch Data: 5ms] 

现在,我可以拆分查询并使用ORDER BY NULL技巧来删除文件,但是当我尝试将它添加到UNION的末尾时,它不起作用。

我正在考虑将查询分成2个查询或者可能完全重写它而不是使用UNION(当然这有点困难)。我反对的另一件事就是我们已经开始使用它了,我想限制更改 - 我本来希望能够在查询结束时添加ORDER BY NULL并完成它,但是它不适用于UNION。

非常感谢任何帮助。

3 个答案:

答案 0 :(得分:7)

通常情况下,ORDER BY可用于UNION内的各个查询,如下所示:

(
SELECT  *
FROM    table1, …
GROUP BY
        id
ORDER BY 
        NULL
)
UNION ALL
(
SELECT  *
FROM    table2, …
GROUP BY
        id
ORDER BY 
        NULL
)

但是,作为docs州:

  

但是,对于单个ORDER BY语句使用SELECT并不意味着行在最终结果中出现的顺序,因为默认情况下UNION会生成一组无序行。因此,在此上下文中使用ORDER BY通常与LIMIT结合使用,因此它用于确定要为SELECT检索的所选行的子集,即使它不一定会影响最终UNION结果中这些行的顺序。如果ORDER BYLIMIT出现SELECT,则会对其进行优化,因为它无论如何都不会产生任何影响。

这当然是一个聪明的举动,但不太聪明,因为他们忘了优化GROUP BY的排序行为。

现在,您应该为您的个人查询添加一个非常高的LIMIT

(
SELECT  *
FROM    table1, …
GROUP BY
        id
ORDER BY 
        NULL
LIMIT 100000000
)
UNION ALL
(
SELECT  *
FROM    table2, …
GROUP BY
        id
ORDER BY 
        NULL
LIMIT 100000000
)

我会把它作为一个错误发布到MySQL,希望他们能在下一个版本中修复它,但同时你可以使用这个解决方案。

请注意,类似的解决方案(使用TOP 100%)用于强制SQL Server 2000中的子查询排序,但是,它在2005停止工作(ORDER BY没有使用TOP 100%进行优化的子查询中的效果。

使用它是安全的,因为即使优化器行为在下一个版本中发生变化,它也不会破坏您的查询,但只会使它们像现在一样慢。

答案 1 :(得分:0)

也许尝试像

这样的东西
SELECT *
FROM
(
    [your entire query here]
) DerivedTable
ORDER BY NULL

如果我错过了情节,我从未使用过MySQL,请原谅我。)

编辑:如果单独运行每个单独的查询(正如您所说,可以工作),但将数据插入临时表会怎样。然后,最后只需从临时表中进行选择。

答案 2 :(得分:0)

您是否尝试将UNION更改为UNION ALL

UNION尝试删除重复的行。为了做到这一点,它必须对中间结果进行排序,这可能会解释您在执行计划中看到的内容。

来自MySQL Union

  

默认情况下,MySQL UNION会删除所有内容   结果集中的重复行   即使你没有明确使用   关键字UNION之后的DISTINCT。

     

如果你明确使用UNION ALL,那么   重复的行保留在结果中   组。你只在案例中使用它   你想保留重复的行   或者你确定没有   结果集中的重复行。

修改

我怀疑它会有什么不同(甚至可能更糟)但你可以尝试遵循“等效”查询

select  *
from    (
          select  b.geo_boundary_id      
                  , GeoBoundaryContains( c.tpi_geo_boundary_coverage_type_id, 29.287437, -95.055807, b.centroid_lat, b.centoid_lon, b.radius ) in_out      
          from    geo_boundary b 
                  join trackpoint_index_geo_boundary_mem c on c.geo_boundary_id = b.geo_boundary_id 
          where   b.geo_boundary_type_id = 2  
          group by
                  b.geo_boundary_id 
          union all        
          select  a.geo_boundary_id 
                  , GeoBoundaryContains(c.tpi_geo_boundary_coverage_type_id, 29.287437, -95.055807, a.lat, a.lon, a.geo_boundary_vertex_id ) in_out 
          from    geo_boundary_vertex a 
                  join geo_boundary b on b.geo_boundary_id = a.geo_boundary_id 
                  join trackpoint_index_geo_boundary_mem c on c.geo_boundary_id = b.geo_boundary_id 
          where   b.geo_boundary_type_id = 3  
          group by 
                  a.geo_boundary_id 
        ) s
        inner join (                  
          select  e.inquiry_match_type_id 
                  , e.inquiry_id 
                  , e.external_id 
                  , COALESCE(f.inquiry_device_id,0) inquiry_device_id 
                  , b.external_info1 
                  , b.external_info2 
                  , b.geo_boundary_id 
                  , b.geo_boundary_type_id 
          from    geo_boundary b 
                  join trackpoint_index_geo_boundary_mem c on c.geo_boundary_id = b.geo_boundary_id 
                  join trackpoint_index_mem d on d.trackpoint_index_id = c.trackpoint_index_id 
                  join inquiry_mem e on e.inquiry_id = b.inquiry_id left 
                  outer join inquiry_device_mem f on f.inquiry_id = e.inquiry_id and f.device_id = 3201 
          where   d.trackpoint_index_id = 3127  
                  and b.geo_boundary_type_id IN (2, 3)
                  and e.expiration_date >= now() 
        ) r on r.geo_boundary_id = s.geo_boundary_id