SQL Geometry VS十进制(8,6)Lat,Long Performance

时间:2016-01-13 10:14:22

标签: sql sql-server database-performance sqlgeography geographic-distance

我正在研究在给定坐标附近选择最近点的表现。

选项是以太使用两个decimal(8,6) - lat,long列或单geography列并使用它。

我只对哪个更快感兴趣?

1 个答案:

答案 0 :(得分:2)

TL; DR 地理位置快了~10倍。

好的,我已经设置了测试:

一些表格id,lat,long (int, decimal(8,6),decimal(8,6))另一个表格id,coord (int, geography)

然后插入47k随机数据。

对于索引第一个表我在lat上使用非聚集递增索引,long使用填充因子95。  第二个GRIDS =(LEVEL_1 = LOW,LEVEL_2 = MEDIUM,LEVEL_3 = LOW,LEVEL_4 = LOW,填充因子为95。

CREATE TABLE dbo.Temp
(
Id int NOT NULL IDENTITY (1, 1),
Coord geography NOT NULL
)  ON [PRIMARY]
 TEXTIMAGE_ON [PRIMARY]
GO
ALTER TABLE dbo.Temp ADD CONSTRAINT
    PK_Temp PRIMARY KEY CLUSTERED 
    (
    Id
    ) WITH( STATISTICS_NORECOMPUTE = OFF, IGNORE_DUP_KEY = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON) ON [PRIMARY]

GO


declare @i int =0
    declare @lat decimal(8,6) =0.0
      declare @long decimal(8,6) =0.0
  while (@i < 47000)
  begin
  set @lat =(select (0.9 -Rand()*1.8)*100)
 set @long =(select (0.9 -Rand()*1.8)*100)
    insert into Temp
  select geography::Point(@lat, @long,4326)


set @i =@i+1

 end

go


CREATE SPATIAL INDEX [SpatialIndex_1] ON [dbo].Temp
(
    [coord]
)USING  GEOGRAPHY_GRID 
WITH (GRIDS =(LEVEL_1 = LOW,LEVEL_2 = MEDIUM,LEVEL_3 = LOW,LEVEL_4 = LOW), 
CELLS_PER_OBJECT = 16, PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, SORT_IN_TEMPDB = OFF, DROP_EXISTING = OFF, ONLINE = OFF, ALLOW_ROW_LOCKS = OFF, ALLOW_PAGE_LOCKS = ON, FILLFACTOR = 95) ON [PRIMARY]

GO

CREATE TABLE [dbo].[Temp2](
    [Id] [int] IDENTITY(1,1) NOT NULL,
    [Lat] [decimal](8, 6) NOT NULL,
    [Long] [decimal](8, 6) NOT NULL,
 CONSTRAINT [PK_Temp2] PRIMARY KEY CLUSTERED 
(
    [Id] ASC
)WITH (PAD_INDEX  = OFF, STATISTICS_NORECOMPUTE  = OFF, IGNORE_DUP_KEY = OFF, ALLOW_ROW_LOCKS  = ON, ALLOW_PAGE_LOCKS  = ON) ON [PRIMARY]
) ON [PRIMARY]

GO


declare @i int = 0
declare @lat decimal(8,6)  = 0 
declare @long decimal(8,6)  = 0

while (@i < 47000)
begin
set @lat = (select (0.9 - (RAND()*1.8))*100)
set @long = (select (0.9 - (RAND()*1.8))*100)

insert into Temp2
select @lat , @long

set @i = @i +1
end

go
CREATE NONCLUSTERED INDEX [Coord_IX] ON [dbo].[Temp2] 
(
    [Lat] ASC,
    [Long] ASC
)WITH (PAD_INDEX  = OFF, STATISTICS_NORECOMPUTE  = OFF, SORT_IN_TEMPDB = OFF, IGNORE_DUP_KEY = OFF, DROP_EXISTING = OFF, ONLINE = OFF, ALLOW_ROW_LOCKS  = ON, ALLOW_PAGE_LOCKS  = ON, FILLFACTOR = 95) ON [PRIMARY]
GO

然后我做了几个测试:

首先是Lat,Long。

declare @lat decimal(8,6) = 0.0,
 @lon decimal(8,6) = 0.0,
@i int = 0,
@start datetime = getdate()

while(@i < 100)
begin

set @lat =   (select (0.9 - Rand()*1.8)*100)
set @lon =  (select (0.9 - (RAND()*1.8))*100.0)

DECLARE @lat_s FLOAT = SIN(@lat * PI() / 180),
        @lat_c FLOAT = COS(@lat * PI() / 180)


SELECT DISTINCT top 1000 @lat, @lon, *
FROM (
    SELECT
        lat,
        long,
        ((ACOS(@lat_s * SIN(lat * PI() / 180) + @lat_c * COS(lat * PI() / 180) * COS((@lon - long) * PI() / 180)) * 180 / PI()) * 60 * 1.1515) AS dist
    FROM dbo.Temp2
) t
ORDER BY dist

set @i= @i+1
end
print CONVERT(varchar,(getdate()-@start),108)
go

其次是地理位置。

 DECLARE @g geography;


   declare @point nvarchar(50)  =''
 declare @i int =0,
     @lat decimal(8,6) =0.0,
       @long decimal(8,6) =0.0,
       @start datetime = getdate()
  while (@i < 100)
  begin
  set @lat =(select (0.9 -Rand()*1.8)*100)
 set @long =(select (0.9 -Rand()*1.8)*100)
 set @point = (select 'POINT('+CONVERT(varchar(10), @lat)+ '  ' +CONVERT(varchar(10), @long)+')')
 SET @g = geography::STGeomFromText(@point, 4326);
    SELECT TOP 1000
    @lat,
    @long,
        @g.STDistance(st.[coord]) AS [DistanceFromPoint (in meters)] 
    ,   st.[coord]
    ,   st.id
FROM    Temp st 
ORDER BY @g.STDistance(st.[coord]) ASC

set @i =@i+1

 end
print CONVERT(varchar,(getdate()-@start),108)
 go

<强>结果:

  • Lat,Long - 00:00:10
  • 地理 - 00:02:21

对于那些想知道为什么地理位置如此糟糕的人 这里的执行计划 - 请注意它不使用空间索引,并且由于行大小为4047字节(十进制为25字节),因此需要多年才能排序。试试to force index results in runtime error

enter image description here

PS 我也做了一个平面,但与球面的差异非常小~0.5秒(回到9.5-10.0秒,看起来似乎稍快)仍然要全部在一个地方这里的剧本:

print 'flat'
declare @lat decimal(8,6) = 0.0,
 @lon decimal(8,6) = 0.0,
@i int = 0,
@start datetime = getdate()

while(@i < 100)
begin

set @lat =   (select (0.9 - Rand()*1.8)*100)
set @lon =  (select (0.9 - (RAND()*1.8))*100.0)

SELECT DISTINCT top 1000 @lat, @lon, *
FROM (
    SELECT
        lat,
        long,
        sqrt(power((@lat - lat),2) + (power((@lon - long),2))) AS dist
    FROM dbo.Temp2
) t

ORDER BY dist

set @i= @i+1
end
print CONVERT(varchar,(getdate()-@start),108)
go

<强>更新

切换到SQL 2014并强制使用带有10M记录的索引:

  • Lat,Long参演00:00:22.935
  • Flat花了00:00:22.988
  • 地理位置为00:00:02.427

使用的地理脚本:

DECLARE @g geography;
declare @point nvarchar(50)  =''
declare @i int =0,
        @lat decimal(8,6) =0.0,
        @long decimal(8,6) =0.0,
        @start datetime = getdate()
set @lat =(select (0.9 -Rand()*1.8)*100)
set @long =(select (0.9 -Rand()*1.8)*100)
set @point = (select 'POINT('+CONVERT(varchar(10), @lat)+ '  ' 
             +CONVERT(varchar(10), @long)+')')
SET @g = geography::STGeomFromText(@point, 4326);

SELECT TOP 1000
    @lat,
    @long,
        @g.STDistance(st.[coord]) AS [DistanceFromPoint (in meters)] 
    ,   st.[coord]
    ,   st.id
FROM    Temp st with(index([SpatialIndex_1]))
WHERE @g.STDistance(st.[coord])  IS NOT NULL
ORDER BY @g.STDistance(st.[coord]) asc