我正在研究在给定坐标附近选择最近点的表现。
选项是以太使用两个decimal(8,6)
- lat,long列或单geography
列并使用它。
我只对哪个更快感兴趣?
答案 0 :(得分:2)
TL; DR 地理位置快了~10倍。
好的,我已经设置了测试:
一些表格id,lat,long (int, decimal(8,6),decimal(8,6))
另一个表格id,coord (int, geography)
。
然后插入47k随机数据。
对于索引第一个表我在lat上使用非聚集递增索引,long使用填充因子95。
第二个GRIDS =(LEVEL_1 = LOW,LEVEL_2 = MEDIUM,LEVEL_3 = LOW,LEVEL_4 = LOW
,填充因子为95。
CREATE TABLE dbo.Temp
(
Id int NOT NULL IDENTITY (1, 1),
Coord geography NOT NULL
) ON [PRIMARY]
TEXTIMAGE_ON [PRIMARY]
GO
ALTER TABLE dbo.Temp ADD CONSTRAINT
PK_Temp PRIMARY KEY CLUSTERED
(
Id
) WITH( STATISTICS_NORECOMPUTE = OFF, IGNORE_DUP_KEY = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON) ON [PRIMARY]
GO
declare @i int =0
declare @lat decimal(8,6) =0.0
declare @long decimal(8,6) =0.0
while (@i < 47000)
begin
set @lat =(select (0.9 -Rand()*1.8)*100)
set @long =(select (0.9 -Rand()*1.8)*100)
insert into Temp
select geography::Point(@lat, @long,4326)
set @i =@i+1
end
go
CREATE SPATIAL INDEX [SpatialIndex_1] ON [dbo].Temp
(
[coord]
)USING GEOGRAPHY_GRID
WITH (GRIDS =(LEVEL_1 = LOW,LEVEL_2 = MEDIUM,LEVEL_3 = LOW,LEVEL_4 = LOW),
CELLS_PER_OBJECT = 16, PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, SORT_IN_TEMPDB = OFF, DROP_EXISTING = OFF, ONLINE = OFF, ALLOW_ROW_LOCKS = OFF, ALLOW_PAGE_LOCKS = ON, FILLFACTOR = 95) ON [PRIMARY]
GO
CREATE TABLE [dbo].[Temp2](
[Id] [int] IDENTITY(1,1) NOT NULL,
[Lat] [decimal](8, 6) NOT NULL,
[Long] [decimal](8, 6) NOT NULL,
CONSTRAINT [PK_Temp2] PRIMARY KEY CLUSTERED
(
[Id] ASC
)WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, IGNORE_DUP_KEY = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON) ON [PRIMARY]
) ON [PRIMARY]
GO
declare @i int = 0
declare @lat decimal(8,6) = 0
declare @long decimal(8,6) = 0
while (@i < 47000)
begin
set @lat = (select (0.9 - (RAND()*1.8))*100)
set @long = (select (0.9 - (RAND()*1.8))*100)
insert into Temp2
select @lat , @long
set @i = @i +1
end
go
CREATE NONCLUSTERED INDEX [Coord_IX] ON [dbo].[Temp2]
(
[Lat] ASC,
[Long] ASC
)WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, SORT_IN_TEMPDB = OFF, IGNORE_DUP_KEY = OFF, DROP_EXISTING = OFF, ONLINE = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON, FILLFACTOR = 95) ON [PRIMARY]
GO
然后我做了几个测试:
首先是Lat,Long。
declare @lat decimal(8,6) = 0.0,
@lon decimal(8,6) = 0.0,
@i int = 0,
@start datetime = getdate()
while(@i < 100)
begin
set @lat = (select (0.9 - Rand()*1.8)*100)
set @lon = (select (0.9 - (RAND()*1.8))*100.0)
DECLARE @lat_s FLOAT = SIN(@lat * PI() / 180),
@lat_c FLOAT = COS(@lat * PI() / 180)
SELECT DISTINCT top 1000 @lat, @lon, *
FROM (
SELECT
lat,
long,
((ACOS(@lat_s * SIN(lat * PI() / 180) + @lat_c * COS(lat * PI() / 180) * COS((@lon - long) * PI() / 180)) * 180 / PI()) * 60 * 1.1515) AS dist
FROM dbo.Temp2
) t
ORDER BY dist
set @i= @i+1
end
print CONVERT(varchar,(getdate()-@start),108)
go
其次是地理位置。
DECLARE @g geography;
declare @point nvarchar(50) =''
declare @i int =0,
@lat decimal(8,6) =0.0,
@long decimal(8,6) =0.0,
@start datetime = getdate()
while (@i < 100)
begin
set @lat =(select (0.9 -Rand()*1.8)*100)
set @long =(select (0.9 -Rand()*1.8)*100)
set @point = (select 'POINT('+CONVERT(varchar(10), @lat)+ ' ' +CONVERT(varchar(10), @long)+')')
SET @g = geography::STGeomFromText(@point, 4326);
SELECT TOP 1000
@lat,
@long,
@g.STDistance(st.[coord]) AS [DistanceFromPoint (in meters)]
, st.[coord]
, st.id
FROM Temp st
ORDER BY @g.STDistance(st.[coord]) ASC
set @i =@i+1
end
print CONVERT(varchar,(getdate()-@start),108)
go
<强>结果:强>
对于那些想知道为什么地理位置如此糟糕的人 这里的执行计划 - 请注意它不使用空间索引,并且由于行大小为4047字节(十进制为25字节),因此需要多年才能排序。试试to force index results in runtime error
PS 我也做了一个平面,但与球面的差异非常小~0.5秒(回到9.5-10.0秒,看起来似乎稍快)仍然要全部在一个地方这里的剧本:
print 'flat'
declare @lat decimal(8,6) = 0.0,
@lon decimal(8,6) = 0.0,
@i int = 0,
@start datetime = getdate()
while(@i < 100)
begin
set @lat = (select (0.9 - Rand()*1.8)*100)
set @lon = (select (0.9 - (RAND()*1.8))*100.0)
SELECT DISTINCT top 1000 @lat, @lon, *
FROM (
SELECT
lat,
long,
sqrt(power((@lat - lat),2) + (power((@lon - long),2))) AS dist
FROM dbo.Temp2
) t
ORDER BY dist
set @i= @i+1
end
print CONVERT(varchar,(getdate()-@start),108)
go
<强>更新强>
切换到SQL 2014并强制使用带有10M记录的索引:
使用的地理脚本:
DECLARE @g geography;
declare @point nvarchar(50) =''
declare @i int =0,
@lat decimal(8,6) =0.0,
@long decimal(8,6) =0.0,
@start datetime = getdate()
set @lat =(select (0.9 -Rand()*1.8)*100)
set @long =(select (0.9 -Rand()*1.8)*100)
set @point = (select 'POINT('+CONVERT(varchar(10), @lat)+ ' '
+CONVERT(varchar(10), @long)+')')
SET @g = geography::STGeomFromText(@point, 4326);
SELECT TOP 1000
@lat,
@long,
@g.STDistance(st.[coord]) AS [DistanceFromPoint (in meters)]
, st.[coord]
, st.id
FROM Temp st with(index([SpatialIndex_1]))
WHERE @g.STDistance(st.[coord]) IS NOT NULL
ORDER BY @g.STDistance(st.[coord]) asc