我正在努力应对MySQL中的生命目录数据库(http://www.catalogueoflife.org/services/index.html)。该数据库包含有关物种科学名称的信息,包括有效的可接受名称和无效或过时的同义词。这(http://www.catalogueoflife.org/services/Database_documentation.pdf)是一个解释数据库如何工作的PDF,以下是用于编写和填充表格的代码:
CREATE DATABASE icol2011ac;
USE icol2011ac;
--
-- Table structure for table `author_string`
--
DROP TABLE IF EXISTS `author_string`;
/*!40101 SET @saved_cs_client = @@character_set_client */;
/*!40101 SET character_set_client = utf8 */;
CREATE TABLE `author_string` (
`id` int(10) UNSIGNED NOT NULL AUTO_INCREMENT,
`string` varchar(255) NOT NULL COMMENT 'Name of author(s), who described the taxon or published the current combination and the year when appropriate.',
PRIMARY KEY (`id`),
UNIQUE KEY `string` (`string`)
) ENGINE=MyISAM AUTO_INCREMENT=79193 DEFAULT CHARSET=utf8 COMMENT='Author citations of taxa and synonyms';
/*!40101 SET character_set_client = @saved_cs_client */;
--
-- Table structure for table `hybrid`
--
DROP TABLE IF EXISTS `hybrid`;
/*!40101 SET @saved_cs_client = @@character_set_client */;
/*!40101 SET character_set_client = utf8 */;
CREATE TABLE `hybrid` (
`taxon_id` int(10) UNSIGNED NOT NULL,
`parent_taxon_id` int(10) UNSIGNED NOT NULL COMMENT 'References two (or three) parent taxon ids',
PRIMARY KEY (`taxon_id`,`parent_taxon_id`),
KEY `parent_taxon_id` (`parent_taxon_id`)
) ENGINE=MyISAM DEFAULT CHARSET=utf8 COMMENT='Links to parent taxa of hybrids';
/*!40101 SET character_set_client = @saved_cs_client */;
--
-- Table structure for table `reference`
--
DROP TABLE IF EXISTS `reference`;
/*!40101 SET @saved_cs_client = @@character_set_client */;
/*!40101 SET character_set_client = utf8 */;
CREATE TABLE `reference` (
`id` int(10) UNSIGNED NOT NULL AUTO_INCREMENT,
`authors` varchar(255) DEFAULT NULL COMMENT 'Complete author string',
`year` varchar(25) DEFAULT NULL COMMENT 'Year(s) of publication',
`title` varchar(255) DEFAULT NULL COMMENT 'Title of the publication',
`text` text COMMENT 'Additional information pertaining to the publication',
`uri_id` int(10) UNSIGNED DEFAULT NULL COMMENT 'Link to downloadable version',
PRIMARY KEY (`id`),
KEY `authors` (`authors`),
KEY `year` (`year`),
KEY `uri_id` (`uri_id`)
) ENGINE=MyISAM AUTO_INCREMENT=60462 DEFAULT CHARSET=utf8 COMMENT='References used for taxa, common names and synonyms';
/*!40101 SET character_set_client = @saved_cs_client */;
--
-- Table structure for table `reference_to_synonym`
--
DROP TABLE IF EXISTS `reference_to_synonym`;
/*!40101 SET @saved_cs_client = @@character_set_client */;
/*!40101 SET character_set_client = utf8 */;
CREATE TABLE `reference_to_synonym` (
`reference_id` int(10) UNSIGNED NOT NULL,
`synonym_id` int(10) UNSIGNED NOT NULL,
PRIMARY KEY (`reference_id`,`synonym_id`),
KEY `synonym_id` (`synonym_id`)
) ENGINE=MyISAM DEFAULT CHARSET=utf8 COMMENT='Links references to synonyms';
/*!40101 SET character_set_client = @saved_cs_client */;
--
-- Table structure for table `reference_to_taxon`
--
DROP TABLE IF EXISTS `reference_to_taxon`;
/*!40101 SET @saved_cs_client = @@character_set_client */;
/*!40101 SET character_set_client = utf8 */;
CREATE TABLE `reference_to_taxon` (
`reference_id` int(10) UNSIGNED NOT NULL,
`taxon_id` int(10) UNSIGNED NOT NULL,
PRIMARY KEY (`reference_id`,`taxon_id`),
KEY `taxon_id` (`taxon_id`)
) ENGINE=MyISAM DEFAULT CHARSET=utf8 COMMENT='Links references to taxa';
/*!40101 SET character_set_client = @saved_cs_client */;
--
-- Table structure for table `scientific_name_element`
--
DROP TABLE IF EXISTS `scientific_name_element`;
/*!40101 SET @saved_cs_client = @@character_set_client */;
/*!40101 SET character_set_client = utf8 */;
CREATE TABLE `scientific_name_element` (
`id` int(10) UNSIGNED NOT NULL AUTO_INCREMENT,
`name_element` varchar(100) NOT NULL COMMENT 'Basic element of a scientific name; e.g. the epithet argentatus as used in Larus argentatus argenteus',
PRIMARY KEY (`id`),
UNIQUE KEY `name_element` (`name_element`)
) ENGINE=MyISAM AUTO_INCREMENT=204459 DEFAULT CHARSET=utf8 COMMENT='Individual elements used to generate a scientific name';
/*!40101 SET character_set_client = @saved_cs_client */;
--
-- Table structure for table `scientific_name_status`
--
DROP TABLE IF EXISTS `scientific_name_status`;
/*!40101 SET @saved_cs_client = @@character_set_client */;
/*!40101 SET character_set_client = utf8 */;
CREATE TABLE `scientific_name_status` (
`id` tinyint(2) UNSIGNED NOT NULL AUTO_INCREMENT,
`name_status` varchar(50) NOT NULL COMMENT 'Name status of a taxon',
PRIMARY KEY (`id`),
UNIQUE KEY `name_status` (`name_status`)
) ENGINE=MyISAM AUTO_INCREMENT=6 DEFAULT CHARSET=utf8 COMMENT='Predetermined list of name statuses';
/*!40101 SET character_set_client = @saved_cs_client */;
--
-- Table structure for table `scrutiny`
--
DROP TABLE IF EXISTS `scrutiny`;
/*!40101 SET @saved_cs_client = @@character_set_client */;
/*!40101 SET character_set_client = utf8 */;
CREATE TABLE `scrutiny` (
`id` int(10) UNSIGNED NOT NULL AUTO_INCREMENT,
`scrutiny_date` date DEFAULT NULL COMMENT 'Most recent date a taxon name was verified; must parse correctly',
`original_scrutiny_date` varchar(100) DEFAULT NULL COMMENT 'Date as used in the original database; may be incomplete',
`specialist_id` int(10) UNSIGNED NOT NULL COMMENT 'Link to the specialist who examined the validity of a taxon',
PRIMARY KEY (`id`),
UNIQUE KEY `unique` (`scrutiny_date`,`specialist_id`,`original_scrutiny_date`),
KEY `scrutiny_date` (`scrutiny_date`),
KEY `specialist_id` (`specialist_id`)
) ENGINE=MyISAM AUTO_INCREMENT=1271 DEFAULT CHARSET=utf8 COMMENT='Latest scrutiny date of a taxon';
/*!40101 SET character_set_client = @saved_cs_client */;
--
-- Table structure for table `source_database`
--
DROP TABLE IF EXISTS `source_database`;
/*!40101 SET @saved_cs_client = @@character_set_client */;
/*!40101 SET character_set_client = utf8 */;
CREATE TABLE `source_database` (
`id` int(10) UNSIGNED NOT NULL AUTO_INCREMENT,
`name` varchar(255) NOT NULL COMMENT 'Full name of the source database',
`abbreviated_name` varchar(50) DEFAULT NULL COMMENT 'Abbreviated name of the source database',
`group_name_in_english` varchar(255) DEFAULT NULL COMMENT 'Name in English of the group(s) treated in the database',
`authors_and_editors` varchar(255) DEFAULT NULL COMMENT 'Optional author(s) and editor(s) of the source database',
`organisation` varchar(255) DEFAULT NULL COMMENT 'Optional organisation which has compiled or is owning the source database',
`contact_person` varchar(255) DEFAULT NULL COMMENT 'Optional contact person of the source database',
`version` varchar(25) DEFAULT NULL COMMENT 'Optional version number of the source database',
`release_date` date DEFAULT NULL COMMENT 'Optional most recent release date of the source database',
`abstract` text COMMENT 'Optional free text field describing the source database',
#`taxonomic_coverage` text,
PRIMARY KEY (`id`),
UNIQUE KEY `name` (`name`,`abbreviated_name`)
) ENGINE=MyISAM AUTO_INCREMENT=79 DEFAULT CHARSET=utf8 COMMENT='Information about source databases';
/*!40101 SET character_set_client = @saved_cs_client */;
--
-- Table structure for table `specialist`
--
DROP TABLE IF EXISTS `specialist`;
/*!40101 SET @saved_cs_client = @@character_set_client */;
/*!40101 SET character_set_client = utf8 */;
CREATE TABLE `specialist` (
`id` int(10) UNSIGNED NOT NULL AUTO_INCREMENT,
`name` varchar(100) NOT NULL,
PRIMARY KEY (`id`),
UNIQUE KEY `name` (`name`)
) ENGINE=MyISAM AUTO_INCREMENT=182 DEFAULT CHARSET=utf8 COMMENT='Specialists who have verified the validity of taxa';
/*!40101 SET character_set_client = @saved_cs_client */;
--
-- Table structure for table `synonym`
--
DROP TABLE IF EXISTS `synonym`;
/*!40101 SET @saved_cs_client = @@character_set_client */;
/*!40101 SET character_set_client = utf8 */;
CREATE TABLE `synonym` (
`id` int(10) UNSIGNED NOT NULL AUTO_INCREMENT,
`taxon_id` int(10) UNSIGNED NOT NULL COMMENT 'Link to valid taxon to which the synonym relates',
`author_string_id` int(10) UNSIGNED DEFAULT NULL COMMENT 'Link to author citation of the synonym',
`scientific_name_status_id` tinyint(2) UNSIGNED NOT NULL COMMENT 'Link to the name status of the synonym',
`original_id` varchar(100) DEFAULT NULL,
PRIMARY KEY (`id`),
KEY `taxon_id` (`taxon_id`),
KEY `author_string_id` (`author_string_id`),
KEY `scientific_name_status_id` (`scientific_name_status_id`)
) ENGINE=MyISAM AUTO_INCREMENT=7618428 DEFAULT CHARSET=utf8 COMMENT='Synonym details linked to a valid taxon';
/*!40101 SET character_set_client = @saved_cs_client */;
--
-- Table structure for table `synonym_name_element`
--
DROP TABLE IF EXISTS `synonym_name_element`;
/*!40101 SET @saved_cs_client = @@character_set_client */;
/*!40101 SET character_set_client = utf8 */;
CREATE TABLE `synonym_name_element` (
`taxonomic_rank_id` tinyint(3) UNSIGNED NOT NULL,
`scientific_name_element_id` int(10) UNSIGNED NOT NULL,
`synonym_id` int(10) UNSIGNED NOT NULL,
`hybrid_order` tinyint(1) UNSIGNED DEFAULT NULL COMMENT 'Order of parents if synonym is a hybrid; see documentation for details',
UNIQUE KEY `unique` (`taxonomic_rank_id`,`synonym_id`),
KEY `taxonomic_rank_id` (`taxonomic_rank_id`),
KEY `scientific_name_element_id` (`scientific_name_element_id`),
KEY `synonym_id` (`synonym_id`)
) ENGINE=MyISAM DEFAULT CHARSET=utf8 COMMENT='Name elements of a complete synonym';
/*!40101 SET character_set_client = @saved_cs_client */;
--
-- Table structure for table `taxon`
--
DROP TABLE IF EXISTS `taxon`;
/*!40101 SET @saved_cs_client = @@character_set_client */;
/*!40101 SET character_set_client = utf8 */;
CREATE TABLE `taxon` (
`id` int(10) UNSIGNED NOT NULL AUTO_INCREMENT,
`taxonomic_rank_id` tinyint(3) UNSIGNED NOT NULL,
`source_database_id` int(10) UNSIGNED DEFAULT NULL,
`original_id` varchar(100) DEFAULT NULL,
PRIMARY KEY (`id`),
KEY `taxonomic_rank_id` (`taxonomic_rank_id`),
KEY `source_database_id` (`source_database_id`)
) ENGINE=MyISAM AUTO_INCREMENT=7618427 DEFAULT CHARSET=utf8 COMMENT='Scientific name elements and hierarchy of a taxon';
/*!40101 SET character_set_client = @saved_cs_client */;
--
-- Table structure for table `taxonomic_coverage`
--
DROP TABLE IF EXISTS `taxonomic_coverage`;
CREATE TABLE `taxonomic_coverage` (
`source_database_id` int(10) NOT NULL,
`taxon_id` int(10) NOT NULL,
`sector` tinyint(2) NOT NULL,
`point_of_attachment` tinyint(1) NOT NULL DEFAULT '0',
KEY `source_database_id` (`source_database_id`),
KEY `sector` (`sector`),
KEY `taxon_id` (`taxon_id`)
) ENGINE=MyISAM DEFAULT CHARSET=utf8;
--
-- Table structure for table `taxon_detail`
--
DROP TABLE IF EXISTS `taxon_detail`;
/*!40101 SET @saved_cs_client = @@character_set_client */;
/*!40101 SET character_set_client = utf8 */;
CREATE TABLE `taxon_detail` (
`taxon_id` int(10) UNSIGNED NOT NULL,
`author_string_id` int(10) UNSIGNED DEFAULT NULL COMMENT 'Link to author citation of the taxon',
`scientific_name_status_id` tinyint(2) UNSIGNED NOT NULL,
`scrutiny_id` int(10) UNSIGNED DEFAULT NULL,
`additional_data` text COMMENT 'Optional free text field describing the taxon',
PRIMARY KEY (`taxon_id`),
KEY `author_string_id` (`author_string_id`),
KEY `taxononomic_status_id` (`scientific_name_status_id`),
KEY `scrutiny_id` (`scrutiny_id`)
) ENGINE=MyISAM DEFAULT CHARSET=utf8 COMMENT='Details pertaining to species and infraspecies';
/*!40101 SET character_set_client = @saved_cs_client */;
--
-- Table structure for table `taxon_name_element`
--
DROP TABLE IF EXISTS `taxon_name_element`;
/*!40101 SET @saved_cs_client = @@character_set_client */;
/*!40101 SET character_set_client = utf8 */;
CREATE TABLE `taxon_name_element` (
`taxon_id` int(10) UNSIGNED NOT NULL,
`scientific_name_element_id` int(10) UNSIGNED NOT NULL,
`parent_id` int(10) UNSIGNED DEFAULT NULL,
PRIMARY KEY (`taxon_id`),
KEY `scientific_name_element_id` (`scientific_name_element_id`),
KEY `parent_id` (`parent_id`)
) ENGINE=MyISAM DEFAULT CHARSET=utf8;
/*!40101 SET character_set_client = @saved_cs_client */;
--
-- Table structure for table `taxonomic_rank`
--
DROP TABLE IF EXISTS `taxonomic_rank`;
/*!40101 SET @saved_cs_client = @@character_set_client */;
/*!40101 SET character_set_client = utf8 */;
CREATE TABLE `taxonomic_rank` (
`id` tinyint(3) UNSIGNED NOT NULL AUTO_INCREMENT,
`rank` varchar(50) NOT NULL COMMENT 'Taxonomic rank (e.g. family, subspecies)',
`marker_displayed` varchar(50) DEFAULT NULL,
`standard` tinyint(1) NOT NULL DEFAULT '0',
PRIMARY KEY (`id`),
UNIQUE KEY `rank` (`rank`)
) ENGINE=MyISAM AUTO_INCREMENT=132 DEFAULT CHARSET=utf8 COMMENT='Predetermined list of taxonomic ranks';
/*!40101 SET character_set_client = @saved_cs_client */;
/*!40101 SET SQL_MODE=@OLD_SQL_MODE */;
/*!40014 SET FOREIGN_KEY_CHECKS=@OLD_FOREIGN_KEY_CHECKS */;
/*!40014 SET UNIQUE_CHECKS=@OLD_UNIQUE_CHECKS */;
/*!40101 SET CHARACTER_SET_CLIENT=@OLD_CHARACTER_SET_CLIENT */;
/*!40101 SET CHARACTER_SET_RESULTS=@OLD_CHARACTER_SET_RESULTS */;
/*!40101 SET COLLATION_CONNECTION=@OLD_COLLATION_CONNECTION */;
/*!40111 SET SQL_NOTES=@OLD_SQL_NOTES */;
-- Dump completed on 2010-12-16 15:47:12
-- Added quick fix for adding non-ISO countries and languages to ISO tables
ALTER TABLE `language` ADD `standard` TINYINT( 1 ) NOT NULL DEFAULT '1';
ALTER TABLE `country` ADD `standard` TINYINT( 1 ) NOT NULL DEFAULT '1';
ALTER TABLE `country` CHANGE `iso` `iso` CHAR( 3 ) CHARACTER SET utf8 COLLATE utf8_general_ci NOT NULL COMMENT 'ISO 3166-1-Alpha-2 code';
ALTER TABLE `common_name` CHANGE `country_iso` `country_iso` CHAR( 3 ) CHARACTER SET utf8 COLLATE utf8_general_ci NULL DEFAULT NULL COMMENT 'Optional country code if usage is restricted to a particular country' ;
#PART2: importing files into MySQL
LOAD DATA INFILE 'C:/Quasi Eigene Dateien/Catalogue of Life/2011AC_baseschema_dump/author_string.txt' INTO TABLE author_string FIELDS TERMINATED BY '\t' LINES TERMINATED BY '\n';
LOAD DATA INFILE 'C:/Quasi Eigene Dateien/Catalogue of Life/2011AC_baseschema_dump/hybrid.txt' INTO TABLE hybrid FIELDS TERMINATED BY '\t' LINES TERMINATED BY '\n';
LOAD DATA INFILE 'C:/Quasi Eigene Dateien/Catalogue of Life/2011AC_baseschema_dump/reference.txt' INTO TABLE reference FIELDS TERMINATED BY '\t' LINES TERMINATED BY '\n';
LOAD DATA INFILE 'C:/Quasi Eigene Dateien/Catalogue of Life/2011AC_baseschema_dump/reference_to_synonym.txt' INTO TABLE reference_to_synonym FIELDS TERMINATED BY '\t' LINES TERMINATED BY '\n';
LOAD DATA INFILE 'C:/Quasi Eigene Dateien/Catalogue of Life/2011AC_baseschema_dump/reference_to_taxon.txt' INTO TABLE reference_to_taxon FIELDS TERMINATED BY '\t' LINES TERMINATED BY '\n';
LOAD DATA INFILE 'C:/Quasi Eigene Dateien/Catalogue of Life/2011AC_baseschema_dump/region.txt' INTO TABLE region FIELDS TERMINATED BY '\t' LINES TERMINATED BY '\n';
LOAD DATA INFILE 'C:/Quasi Eigene Dateien/Catalogue of Life/2011AC_baseschema_dump/scientific_name_element.txt' INTO TABLE scientific_name_element FIELDS TERMINATED BY '\t' LINES TERMINATED BY '\n';
LOAD DATA INFILE 'C:/Quasi Eigene Dateien/Catalogue of Life/2011AC_baseschema_dump/scientific_name_status.txt' INTO TABLE scientific_name_status FIELDS TERMINATED BY '\t' LINES TERMINATED BY '\n';
LOAD DATA INFILE 'C:/Quasi Eigene Dateien/Catalogue of Life/2011AC_baseschema_dump/scrutiny.txt' INTO TABLE scrutiny FIELDS TERMINATED BY '\t' LINES TERMINATED BY '\n';
LOAD DATA INFILE 'C:/Quasi Eigene Dateien/Catalogue of Life/2011AC_baseschema_dump/source_database.txt' INTO TABLE source_database FIELDS TERMINATED BY '\t' LINES TERMINATED BY '\n';
LOAD DATA INFILE 'C:/Quasi Eigene Dateien/Catalogue of Life/2011AC_baseschema_dump/specialist.txt' INTO TABLE specialist FIELDS TERMINATED BY '\t' LINES TERMINATED BY '\n';
LOAD DATA INFILE 'C:/Quasi Eigene Dateien/Catalogue of Life/2011AC_baseschema_dump/synonym.txt' INTO TABLE synonym FIELDS TERMINATED BY '\t' LINES TERMINATED BY '\n';
LOAD DATA INFILE 'C:/Quasi Eigene Dateien/Catalogue of Life/2011AC_baseschema_dump/synonym_name_element.txt' INTO TABLE synonym_name_element FIELDS TERMINATED BY '\t' LINES TERMINATED BY '\n';
LOAD DATA INFILE 'C:/Quasi Eigene Dateien/Catalogue of Life/2011AC_baseschema_dump/taxon.txt' INTO TABLE taxon FIELDS TERMINATED BY '\t' LINES TERMINATED BY '\n';
LOAD DATA INFILE 'C:/Quasi Eigene Dateien/Catalogue of Life/2011AC_baseschema_dump/taxon_detail.txt' INTO TABLE taxon_detail FIELDS TERMINATED BY '\t' LINES TERMINATED BY '\n';
LOAD DATA INFILE 'C:/Quasi Eigene Dateien/Catalogue of Life/2011AC_baseschema_dump/taxon_name_element.txt' INTO TABLE taxon_name_element FIELDS TERMINATED BY '\t' LINES TERMINATED BY '\n';
LOAD DATA INFILE 'C:/Quasi Eigene Dateien/Catalogue of Life/2011AC_baseschema_dump/taxonomic_rank.txt' INTO TABLE taxonomic_rank FIELDS TERMINATED BY '\t' LINES TERMINATED BY '\n';
我还有一张博物馆标本长表,其中包含科学名称(有效名称和无效名称),我需要根据生命目录数据库进行检查。因此,对于我的表中的每个名称,我需要检查它是否是有效名称,如果它是无效的同义词,请为其指定当前有效的名称。
博物馆标本表中的名称格式如下: 属种加词亚种加词作者/年 例如.. Bellis perennis - L. 每一行总是包含关于属和物种的信息,关于亚种和作者/年的信息并不总是给出。
我发现很难通过这个分类匹配的必要查询代码进行思考,因为只有一个表中最多三个元素(属名称元素,物种名称元素和有时亚种名称元素)的组合,以及信息关于其他表中的分类单元ID,分类单元组和分类单元名称状态,可以创建有效的名称和同义词。
我的博物馆标本存储在一个大表中,其中包括属名,物种名称,有时亚种名称和/或作者各自存储在不同的列中。
相应地构建查询代码的解决方案将非常感谢!!非常感谢您花时间来完成这个!!
答案 0 :(得分:0)
我不会尝试用MySQL查询解决您的问题 - 我会使用过程编程语言编写一个程序来处理您描述的复杂性。根据您(或您周围的人)熟悉的内容,您可以使用perl,python甚至php来编写代码。
这个应用程序可以遍历博物馆桌子的每一行,并且每行尝试一次以匹配生活目录。您可能需要匹配多行,您可能需要通过添加子类来消除歧义,您可能找不到完全匹配但可能部分匹配的内容。您的程序将以全面的方式处理这些案例中的每一个,为您提供最合适的输出。