BigQuery - 查找最近的区域

时间:2018-02-14 06:45:11

标签: sql google-bigquery

我有两张桌子,对于A中的每个区域,我想找到B中最近的区域。

A:
------------------------
ID | Start | End | Color 
------------------------
 1 |  400  | 500 | White
------------------------
 1 |  10   | 20  | Red 
------------------------
 2 |   2   |  10 | Blue 
------------------------
 4 |   88  |  90 | Color 
------------------------

B:
------------------------
ID | Start | End | Name 
------------------------
 1 |  1    | 2   | XYZ1 
------------------------
 1 |  50   | 60  | XYZ4 
------------------------
 2 |  150  | 160 | ABC1 
------------------------
 2 |  50   | 60  | ABC2 
------------------------
 4 |  100  | 120 | EFG 
------------------------

RS:
---------------------------------------
ID | Start | End | Color | Closest Name
---------------------------------------
 1 |  400  | 500 | White |   XYZ4
---------------------------------------
 1 |  10   | 20  | Red   |   XYZ1
---------------------------------------
 2 |   2   |  10 | Blue  |   ABC2
---------------------------------------
 4 |   88  |  90 | Color |   EFG
---------------------------------------

目前,我首先通过加入两个表来找到最小距离:

MinDist 表:

SELECT   A.ID,   A.Start,  A.End,   
MIN(CASE  
WHEN (ABS(A.End-B.Start)>=ABS(A.Start - B.End)) 
THEN ABS(A.Start-B.End)     
ELSE ABS(A.End - B.Start) 
END) AS distance 
FROM ( Select A ... ) 
Join B On A.ID=B.ID) 
Group By A.ID,   A.Start,  A.End

然后通过再次连接表A和B重新计算距离, GlobDist 表(注意,查询在这种情况下检索B.Name):

SELECT   A.ID,   A.Start,  A.End,   
CASE  
WHEN (ABS(A.End-B.Start)>=ABS(A.Start - B.End)) 
THEN ABS(A.Start-B.End)     
ELSE ABS(A.End - B.Start) 
END AS distance,
B.Name 
FROM ( Select A ... ) 
Join B On A.ID=B.ID) 

最后在

上加入这两个表MinDist和GlobDist Tables
GlobDist.ID= MinDist.ID, 
GlobDist.Start=MinDist.Start, 
GlobDist.End= MinDist.End, 
GlobDist.distance= MinDist.distance.

我测试了ROW_NUMBER()和PARTITION BY(ID,Start,End),但花了更长的时间。那么,解决这个问题的最快,最有效的方法是什么?如何减少重复计算?

谢谢!

1 个答案:

答案 0 :(得分:1)

以下解决方案适用于BigQuery Standard SQL,简单如下

    package main

    import (
        "encoding/json"
        "fmt"
        "log"
    )

    type Icons struct {
        URL    string
        BySize map[string]string
    }



    type CatalogBank struct {
        Advice   string `json:"advice"`
        BankCode string `json:"bank_code"`
        BankName string `json:"bank_name"`
        BIC      string `json:"bic"`
        Credentials []struct {
            Label  string `json:"label"`
            Masked bool   `json:"masked"`
        } `json:"credentials"`
        Icon     *Icons `json:"-,"`
        Language []byte `json:"language"`
    }

    func (p *CatalogBank) Unmarshal(data []byte) error {


        type Transient struct {
            *CatalogBank
            Icon []interface{} `json:"icon"`
        }

        var transient = &Transient{CatalogBank:p}
        err := json.Unmarshal([]byte(data), transient)
        if err != nil {
            return err
        }
        p.Icon = &Icons{
            BySize: make(map[string]string),
        }

        if len(transient.Icon) > 0 {
            if url, ok := transient.Icon[0].(string); ok {
                p.Icon.URL = url
            }
            if aMap, ok := transient.Icon[1].(map[string]interface{}); ok {
                for k, v := range aMap {
                    p.Icon.BySize[k] = v.(string)
                }
            }
        }
        return nil
    }

    func main() {

        data := `
            {
            "Advice":"abc",
            "icon": [
                    "https://api.figo.me/assets/images/accounts/postbank.png",
                    {
                      "48x48": "https://api.figo.me/assets/images/accounts/postbank_48.png",
                      "60x60": "https://api.figo.me/assets/images/accounts/postbank_60.png",
                      "72x72": "https://api.figo.me/assets/images/accounts/postbank_72.png",
                      "84x84": "https://api.figo.me/assets/images/accounts/postbank_84.png",
                      "96x96": "https://api.figo.me/assets/images/accounts/postbank_96.png",
                      "120x120": "https://api.figo.me/assets/images/accounts/postbank_120.png",
                      "144x144": "https://api.figo.me/assets/images/accounts/postbank_144.png",
                      "192x192": "https://api.figo.me/assets/images/accounts/postbank_192.png",
                      "256x256": "https://api.figo.me/assets/images/accounts/postbank_256.png"
                    }
                  ]
            }
            `

        bank := &CatalogBank{}
        err := bank.Unmarshal([]byte(data))
        if err != nil {
            log.Fatal(err)
        }
        fmt.Printf("advice: %v\n", bank.Advice)

        fmt.Printf("icon: %v\n", bank.Icon.URL)
        for size, icon := range bank.Icon.BySize {
            fmt.Printf("%v =>  %v\n ",size, icon)
        }
    }

您可以在问题中使用虚拟数据进行上述测试/播放

结果如下

#standardSQL
SELECT a_id, a_start, a_end, color,  
  ARRAY_AGG(name ORDER BY POW(ABS(a_start - b_start), 2) + POW(ABS(a_end - b_end), 2) LIMIT 1)[SAFE_OFFSET(0)] name
FROM A JOIN B ON a_id = b_id
GROUP BY a_id, a_start, a_end, color
-- ORDER BY a_id