我的第一个SOLR索引项目。从opencart电子商务网站导入产品数据。它获取正确数量的记录(10910),但仅处理173.感谢有人可以帮助我弄清楚这一点。
"Total Requests made to DataSource":"1",
"Total Rows Fetched":"10910",
"Total Documents Processed":"173",
"Total Documents Skipped":"0",
"Full Dump Started":"2016-07-27 11:22:58",
"":"Indexing completed. Added/Updated: 173 documents. Deleted 0
data-config.xml
<dataConfig>
<dataSource type="JdbcDataSource" driver="com.mysql.jdbc.Driver" url="jdbc:mysql://localhost/dbname" user="root" password="password" />
<document name="doc">
<entity name="dbname" transformer="RegexTransformer" query="SELECT Query " deltaImportQuery="SELECT " deltaQuery=" SELECT p.product_id as id, p.date_modified FROM oc_product AS p WHERE p.date_modified > '${dataimporter.last_index_time}'">
<field column="id" sourceColName="id" />
<field column="model" sourceColName="model" />
<field column="price" sourceColName="price" />
<field column="selling_price" sourceColName="selling_price" />
<field column="stock_status" sourceColName="stock_status" />
<field column="name" sourceColName="name" />
<field column="set_description" sourceColName="set_description" />
<field column="description" sourceColName="description" />
<field column="categories" sourceColName="categories" splitBy="," />
<field column="category_ids" sourceColName="category_ids" splitBy="," />
<field column="filter_ids" sourceColName="filter_ids" splitBy="," />
<field column="filters" sourceColName="filters" splitBy="," />
<field column="store_ids" sourceColName="store_ids" splitBy="," />
</entity>
</document>
managed-schema - 在solr 6.1.0中使用configtest中提供的默认值进行以下更改
<field name="id" type="int" indexed="true" stored="true" required="true" multiValued="false" />
<field name="model" type="string" indexed="true" stored="true" required="true" multiValued="false" />
<field name="price" type="float" indexed="true" stored="true" multiValued="false" />
<field name="selling_price" type="float" indexed="true" stored="true" />
<field name="stock_status" type="string" indexed="true" stored="true" />
<field name="name" type="string" indexed="true" stored="true" />
<field name="set_description" type="text_general" indexed="true" stored="true" />
<field name="description" type="text_general" indexed="true" stored="true" />
<field name="categories" type="string" indexed="true" stored="true" />
<field name="category_ids" type="int" indexed="true" stored="true" multiValued="true" />
<field name="filter_ids" type="int" indexed="true" stored="true" multiValued="true" />
<field name="filters" type="string" indexed="true" stored="true" />
<field name="store_ids" type="int" indexed="true" stored="true" multiValued="true" />
solrconfig.xml - 默认情况下使用以下更改
<lib dir="../../../contrib/dataimporthandler/lib/" regex=".*\.jar" />
<lib dir="../../../contrib/dataimporthandler-extras/lib/" regex=".*\.jar" />
<lib dir="../../../dist/" regex="solr-dataimporthandler-\d.*\.jar" />
<requestHandler name="/dataimport" class="org.apache.solr.handler.dataimport.DataImportHandler">
<lst name="defaults">
<str name="config">data-config.xml</str>
</lst>
</requestHandler>
错误日志:
Error creating document : SolrInputDocument(fields: [selling_price=299.0000,​ filter_ids=12,​ 13,​ 19,​ 24,​ 43,​ 58,​ 62,​ stock_status=In Stock,​ store_ids=0,​ 2,​ description=Kurti length = 44 inches. No color bleed. Interlock stitching done. Side slit protection stitching done. Double bottom fold stitching done.,​ filters=Long,​ लॉंग,​ Straight,​ स्ट्रेट,​ Full Sleeve,​ फुल स्लीव,​ Solid,​ सॉलिड,​ Rayon,​ रेयॉन,​ V Neck,​ वी नेक,​ Size Set,​ साइज़ सेट,​ set_description=1 Set = Total 5 pieces,​ 1 each of 36,​ 38,​ 40,​ 42,​ 44,​ price=290.0000,​ name=Green Rayon Straight Solid Long V Neck Kurti,​ model=GNM_JP_GMI026,​ id=11856,​ category_ids=0,​ 61,​ categories=Green Rayon Straight Solid Long V Neck Kurti,​ ग्रीन रेयॉन स्ट्रेट सॉलिड लॉंग वी नेक कुर्ती,​ _version_=1541015877146640386])
org.apache.solr.common.SolrException:错误:[doc = 11856]错误添加字段&#39; filter_ids&#39; =&#39; 12,13,19,24,43,58,62&# 39; msg =输入字符串:&#34; 12,13,19,24,43,58,62&#34;
at org.apache.solr.update.DocumentBuilder.toDocument(DocumentBuilder.java:177)
at org.apache.solr.update.AddUpdateCommand.getLuceneDocument(AddUpdateCommand.java:82)
at org.apache.solr.update.DirectUpdateHandler2.doNormalUpdate(DirectUpdateHandler2.java:280)
at org.apache.solr.update.DirectUpdateHandler2.addDoc0(DirectUpdateHandler2.java:214)
at org.apache.solr.update.DirectUpdateHandler2.addDoc(DirectUpdateHandler2.java:169)
at org.apache.solr.update.processor.RunUpdateProcessor.processAdd(RunUpdateProcessorFactory.java:68)
at org.apache.solr.update.processor.UpdateRequestProcessor.processAdd(UpdateRequestProcessor.java:48)
at org.apache.solr.update.processor.DistributedUpdateProcessor.doLocalAdd(DistributedUpdateProcessor.java:939)
at org.apache.solr.update.processor.DistributedUpdateProcessor.versionAdd(DistributedUpdateProcessor.java:1094)
at org.apache.solr.update.processor.DistributedUpdateProcessor.processAdd(DistributedUpdateProcessor.java:720)
at org.apache.solr.update.processor.LogUpdateProcessorFactory$LogUpdateProcessor.processAdd(LogUpdateProcessorFactory.java:103)
at org.apache.solr.handler.dataimport.SolrWriter.upload(SolrWriter.java:74)
at org.apache.solr.handler.dataimport.DataImportHandler$1.upload(DataImportHandler.java:260)
at org.apache.solr.handler.dataimport.DocBuilder.buildDocument(DocBuilder.java:524)
at org.apache.solr.handler.dataimport.DocBuilder.buildDocument(DocBuilder.java:414)
at org.apache.solr.handler.dataimport.DocBuilder.doFullDump(DocBuilder.java:329)
at org.apache.solr.handler.dataimport.DocBuilder.execute(DocBuilder.java:232)
at org.apache.solr.handler.dataimport.DataImporter.doFullImport(DataImporter.java:416)
at org.apache.solr.handler.dataimport.DataImporter.runCmd(DataImporter.java:480)
at org.apache.solr.handler.dataimport.DataImporter$1.run(DataImporter.java:461)
引起:java.lang.NumberFormatException:对于输入字符串:&#34; 12,13,19,24,43,58,62&#34;
at java.lang.NumberFormatException.forInputString(Unknown Source)
at java.lang.Integer.parseInt(Unknown Source)
at java.lang.Integer.parseInt(Unknown Source)
at org.apache.solr.schema.TrieField.createField(TrieField.java:702)
at org.apache.solr.schema.TrieField.createFields(TrieField.java:741)
at org.apache.solr.update.DocumentBuilder.addField(DocumentBuilder.java:47)
at org.apache.solr.update.DocumentBuilder.toDocument(DocumentBuilder.java:122)
答案 0 :(得分:0)
发现问题,我使用数据类型整数来表示字段category_ids,filter_ids并为它们分配逗号分隔值。虽然我使用了multiValued = true和SpiltBy逗号,但它不会工作。
因此唯一的产品被索引,这些产品具有单个值。
我将数据类型更改为字符串,并将所有记录编入索引。
索引已完成。添加/更新:10910文档。删除了0个文件。 (时长:28秒) 请求数:1,提取:10,910 390 / s,跳过:0,已处理:10,910 390 / s 开始于:2分钟前
感谢您Uwe Allner提供帮助。