我有一个包含大量记录的数据库(n_building / n_residence表中有100个记录,而buildinggeo表中有数百万个记录)。这是数据库的简化版本:
这是导出到SQL时的样子:
CREATE TABLE IF NOT EXISTS `district` (
`districtid` INT(20) NOT NULL COMMENT 'cbs_wijk_cd',
`description` VARCHAR(255) NOT NULL COMMENT 'cbs_wijk_oms',
`municipalityid` INT(20) NOT NULL COMMENT 'FK gemeente',
PRIMARY KEY (`districtid`),
INDEX `wijk_gemeente_fk` (`municipalityid` ASC),
CONSTRAINT `fk_district_municipality`
FOREIGN KEY (`municipalityid`)
REFERENCES `municipality` (`municipalityid`)
ON DELETE CASCADE
ON UPDATE CASCADE)
ENGINE = InnoDB;
CREATE TABLE IF NOT EXISTS `neighborhood` (
`neighborhoodid` INT(20) NOT NULL COMMENT 'cbs_buurt_cd',
`description` VARCHAR(255) NOT NULL COMMENT 'cbs_buurt_oms',
`districtid` INT(20) NOT NULL COMMENT 'FK wijk',
`municipalityid` INT(20) NOT NULL COMMENT 'FK gemeente',
PRIMARY KEY (`neighborhoodid`),
INDEX `buurt_gemeente_fk` (`municipalityid` ASC),
INDEX `buurt_wijk_fk` (`districtid` ASC),
FULLTEXT INDEX `index_neighborhood_description` (`description` ASC),
CONSTRAINT `fk_neighborhood_municipality`
FOREIGN KEY (`municipalityid`)
REFERENCES `municipality` (`municipalityid`)
ON DELETE CASCADE
ON UPDATE CASCADE,
CONSTRAINT `fk_neighborhood_district`
FOREIGN KEY (`districtid`)
REFERENCES `district` (`districtid`)
ON DELETE CASCADE
ON UPDATE CASCADE)
ENGINE = InnoDB;
CREATE TABLE IF NOT EXISTS `n_building` (
`buildingid` BIGINT(20) NOT NULL,
`neighborhoodid` INT(10) NOT NULL,
`constructionyear` INT(4) NOT NULL,
`height` INT(3) NOT NULL DEFAULT 9,
`multifamily` TINYINT(1) NOT NULL DEFAULT 0,
PRIMARY KEY (`buildingid`),
INDEX `fk_building_buurt_idx` (`neighborhoodid` ASC),
INDEX `index_neighborhoodid_buildingid` (`neighborhoodid` ASC, `buildingid` ASC),
CONSTRAINT `fk_building_neighborhood`
FOREIGN KEY (`neighborhoodid`)
REFERENCES `neighborhood` (`neighborhoodid`)
ON DELETE CASCADE
ON UPDATE CASCADE)
ENGINE = InnoDB;
CREATE TABLE IF NOT EXISTS `n_buildinggeo` (
`buildingid` BIGINT(20) NOT NULL,
`order` INT(5) NOT NULL,
`lat` DECIMAL(11,8) NOT NULL,
`lon` DECIMAL(11,8) NOT NULL,
PRIMARY KEY (`buildingid`, `order`),
CONSTRAINT `fk_buildinggeo_building`
FOREIGN KEY (`buildingid`)
REFERENCES `n_building` (`buildingid`)
ON DELETE CASCADE
ON UPDATE CASCADE)
ENGINE = InnoDB;
CREATE TABLE IF NOT EXISTS `n_residence` (
`residenceid` BIGINT(20) NOT NULL,
`buildingid` BIGINT(20) NOT NULL,
`geolat` DECIMAL(11,8) NOT NULL,
`geolon` DECIMAL(11,8) NOT NULL,
PRIMARY KEY (`residenceid`),
INDEX `fk_residence_building_idx` (`buildingid` ASC),
INDEX `index_geoloat_geolon_residenceid` (`geolat` ASC, `geolon` ASC, `residenceid` ASC),
INDEX `index_geolat` (`geolat` ASC),
INDEX `index_geolon` (`geolon` ASC),
CONSTRAINT `fk_residence_building`
FOREIGN KEY (`buildingid`)
REFERENCES `n_building` (`buildingid`)
ON DELETE CASCADE
ON UPDATE CASCADE)
ENGINE = InnoDB;
CREATE TABLE IF NOT EXISTS `n_vabidata` (
`residenceid` BIGINT(20) NOT NULL,
`index` FLOAT NULL COMMENT ' ',
`indexdate` VARCHAR(25) NULL,
`type` VARCHAR(100) NULL,
`subtype` VARCHAR(150) NULL,
`rooftype` VARCHAR(50) NULL,
PRIMARY KEY (`residenceid`),
CONSTRAINT `fk_vabidata_residence`
FOREIGN KEY (`residenceid`)
REFERENCES `n_residence` (`residenceid`)
ON DELETE CASCADE
ON UPDATE CASCADE)
ENGINE = InnoDB;
我的目标是创建此数据库某部分内容的JSON表示(这显然是匿名数据):
[
{
"buildingid": "632100000000000",
"buurtid": "6320103",
"constructionyear": "1969",
"height": "9",
"multifamily": "0",
"gemeenteid": "632",
"geo": [
{
"lat": "52.000",
"lon": "4.000"
},
{
"lat": "52.000",
"lon": "4.000"
},
{
"lat": "52.000",
"lon": "4.000"
},
{
"lat": "52.000",
"lon": "4.000"
},
{
"lat": "52.000",
"lon": "4.000"
}
],
"res": [
{
"residenceid": "632010000000000",
"surface": "159",
"postalcode": "3400AA",
"streetname": "Streetname",
"housenumber": "00",
"clusternr": "6320103533",
"owner": "onbekend",
"usageelec": "2463",
"usagegas": "2006",
"nomupd": "0",
"cpwin": "0",
"cpble": "0",
"enet": "0",
"gnet": "0",
"type": null
}
]
}
]
有两种方法可以过滤数据库:通过邻域(所有建筑物等来自该社区)或通过边界框(所有建筑物等都在其中)。起初我以一种非常简单的方式决定:
$path2 = Config::Path(2);//minlat
$path3 = Config::Path(3);//minlon
$path4 = Config::Path(4);//maxlat
$path5 = Config::Path(5);//maxlon
if (($path2 && is_numeric($path2) && $path3 && is_numeric($path3) &&
$path4 && is_numeric($path4) && $path5 && is_numeric($path5)) ||
($path2 == "district" && $path3 && is_numeric($path3))) {
if ($path2 == "neighborhood") {
$buildings = DBUtils::FetchQuery("
SELECT b.`buildingid`, b.`neighborhoodid` AS buurtid, b.`constructionyear`,
b.`height`, b.`multifamily`, n.`municipalityid` AS gemeenteid
FROM `neighborhood` n
INNER JOIN `n_building` b ON b.`neighborhoodid` = n.`neighborhoodid`
INNER JOIN `n_residence` r ON r.`buildingid` = b.`buildingid`
WHERE b.`neighborhoodid` = '$path3'
GROUP BY b.`buildingid`;
");
} else {
$buildings = DBUtils::FetchQuery("
SELECT b.`buildingid`, b.`neighborhoodid` AS buurtid, b.`constructionyear`,
b.`height`, b.`multifamily`, n.`municipalityid` AS gemeenteid
FROM `neighborhood` n
INNER JOIN `n_building` b ON b.`neighborhoodid` = n.`neighborhoodid`
INNER JOIN `n_residence` r ON r.`buildingid` = b.`buildingid`
WHERE r.`geolat` >= '$path2'
AND r.`geolon` >= '$path3'
AND r.`geolat` <= '$path4'
AND r.`geolon` <= '$path5'
GROUP BY b.`buildingid`;
");
}
if ($buildings && count($buildings) > 0) {
for ($i = 0; $i < count($buildings); $i++) {
$building = $buildings[$i];
$buildinggeo = DBUtils::FetchQuery("
SELECT bg.`lat`, bg.`lon`
FROM `n_buildinggeo` bg
WHERE bg.`buildingid` = '$building[buildingid]';
");
if ($buildinggeo && count($buildinggeo) > 0) {
$buildings[$i]['geo'] = $buildinggeo;
$buildingresidences = DBUtils::FetchQuery("
SELECT r.`residenceid`, r.`surface`, r.`postalcode`, r.`streetname`,
r.`housenumber`, r.`clusternr`, r.`owner`, r.`usageelec`,
r.`usagegas`, r.`nomupd`, r.`cpwin`, r.`cpble`, r.`enet`,
r.`gnet`, v.`type`
FROM `n_residence` r
LEFT OUTER JOIN `n_vabidata` v ON r.`residenceid` = v.`residenceid`
WHERE r.`buildingid` = '$building[buildingid]';
");
if ($buildingresidences && count($buildingresidences) > 0) {
$buildings[$i]['res'] = $buildingresidences;
}
}
}
echo json_encode($buildings);
}
}
后来我决定在一个查询中获取所有建筑/住宅/ vabidata信息并从中创建所需的JSON结构,因为每个请求(> 5个建筑物)中的大部分时间都用于获取住宅数据。
$path2 = Config::Path(2);//minlat
$path3 = Config::Path(3);//minlon
$path4 = Config::Path(4);//maxlat
$path5 = Config::Path(5);//maxlon
if (($path2 && is_numeric($path2) && $path3 && is_numeric($path3) &&
$path4 && is_numeric($path4) && $path5 && is_numeric($path5)) ||
($path2 == "district" && $path3 && is_numeric($path3))) {
if ($path2 == "district") {
$results = DBUtils::FetchQuery("
SELECT b.`buildingid`, b.`neighborhoodid`, b.`constructionyear`,
b.`height`, b.`multifamily`, n.`municipalityid`, r.`residenceid`,
r.`surface`, r.`postalcode`, r.`streetname`, r.`housenumber`,
r.`clusternr`, r.`owner`, r.`usageelec`, r.`usagegas`,
r.`nomupd`, r.`cpwin`, r.`cpble`, r.`enet`, r.`gnet`,
v.`type`
FROM `neighborhood` n
INNER JOIN `n_building` b ON b.`neighborhoodid` = n.`neighborhoodid`
INNER JOIN `n_residence` r ON r.`buildingid` = b.`buildingid`
LEFT OUTER JOIN `n_vabidata` v ON r.`residenceid` = v.`residenceid`
WHERE b.`neighborhoodid` = '$path3';
");
} else {
$results = DBUtils::FetchQuery("
SELECT b.`buildingid`, b.`neighborhoodid`, b.`constructionyear`,
b.`height`, b.`multifamily`, n.`municipalityid`, r.`residenceid`,
r.`surface`, r.`postalcode`, r.`streetname`, r.`housenumber`,
r.`clusternr`, r.`owner`, r.`usageelec`, r.`usagegas`,
r.`nomupd`, r.`cpwin`, r.`cpble`, r.`enet`, r.`gnet`,
v.`type`
FROM `neighborhood` n
INNER JOIN `n_building` b ON b.`neighborhoodid` = n.`neighborhoodid`
INNER JOIN `n_residence` r ON r.`buildingid` = b.`buildingid`
LEFT OUTER JOIN `n_vabidata` v ON r.`residenceid` = v.`residenceid`
WHERE r.`geolat` >= '$path2'
AND r.`geolon` >= '$path3'
AND r.`geolat` <= '$path4'
AND r.`geolon` <= '$path5';
");
}
if ($results && count($results) > 0) {
$buildings = array();
for ($i = 0; $i < count($results); $i++) {
$b = $results[$i];
if (!array_key_exists($b['buildingid'],$buildings)) {
$buildings[$b['buildingid']] = array(
"buildingid" => $b['buildingid'],
"buurtid" => $b['neighborhoodid'],
"constructionyear" => $b['constructionyear'],
"height" => $b['height'],
"multifamily" => $b['multifamily'],
"gemeenteid" => $b['municipalityid'],
"res" => array()
);
}
$buildings[$b['buildingid']]['res'][] = array(
"residenceid" => $b['residenceid'],
"surface" => $b['surface'],
"postalcode" => $b['postalcode'],
"streetname" => $b['streetname'],
"housenumber" => $b['housenumber'],
"clusternr" => $b['clusternr'],
"owner" => $b['owner'],
"usageelec" => $b['usageelec'],
"usagegas" => $b['usagegas'],
"nomupd" => $b['nomupd'],
"cpwin" => $b['cpwin'],
"cpble" => $b['cpble'],
"enet" => $b['enet'],
"gnet" => $b['gnet'],
"type" => $b['type']
);
}
$buildings = array_values($buildings);
for ($i = 0; $i < count($buildings); $i++) {
$building = $buildings[$i];
$buildinggeo = DBUtils::FetchQuery("
SELECT bg.`lat`, bg.`lon`
FROM `n_buildinggeo` bg
WHERE bg.`buildingid` = '$building[buildingid]';
");
if ($buildinggeo && count($buildinggeo) > 0) {
$buildings[$i]['geo'] = $buildinggeo;
}
}
echo json_encode($buildings);
}
}
然而,这种方法似乎比以前慢了30-70%。我的问题是:你们中的任何人都可以找到原因(可能还有解决方案)为什么这两个查询的表现都和它们一样糟糕?如果您有任何疑问或需要任何其他信息,请询问。
修改
这是EXPLAIN对第二个查询(一体化)的边界框版本的结果:
答案 0 :(得分:2)
WHERE b.`neighborhoodid` = '$path3'
GROUP BY b.`buildingid`
b
需要INDEX(neighborhoodid, buildingid)
- 按此顺序
r
需要INDEX(buildingid)
r
需要INDEX(lat, lon, residenceid)
- 按此顺序
(请提供SHOW CREATE TABLE
,以便我们查看您拥有的索引。)
WHERE r.`geolat` >= '$path2'
AND r.`geolon` >= '$path3'
AND r.`geolat` <= '$path4'
AND r.`geolon` <= '$path5'
无法根据情况进行优化。有关如何通过lat / lon处理大型搜索,请参阅application
。