Solr 6 dataimport获取了10000条记录但仅处理了173条记录

时间:2016-07-27 11:51:26

标签: solr

我的第一个SOLR索引项目。从opencart电子商务网站导入产品数据。它获取正确数量的记录(10910),但仅处理173.感谢有人可以帮助我弄清楚这一点。

"Total Requests made to DataSource":"1",
"Total Rows Fetched":"10910",
"Total Documents Processed":"173",
"Total Documents Skipped":"0",
"Full Dump Started":"2016-07-27 11:22:58",
"":"Indexing completed. Added/Updated: 173 documents. Deleted 0 

data-config.xml

<dataConfig>
<dataSource type="JdbcDataSource" driver="com.mysql.jdbc.Driver" url="jdbc:mysql://localhost/dbname" user="root" password="password" />
<document name="doc">
    <entity name="dbname" transformer="RegexTransformer" query="SELECT Query " deltaImportQuery="SELECT " deltaQuery="        SELECT p.product_id as id, p.date_modified         FROM oc_product AS p         WHERE p.date_modified &gt; '${dataimporter.last_index_time}'">
        <field column="id" sourceColName="id" />
        <field column="model" sourceColName="model" />
        <field column="price" sourceColName="price" />
        <field column="selling_price" sourceColName="selling_price" />
        <field column="stock_status" sourceColName="stock_status" />
        <field column="name" sourceColName="name" />
        <field column="set_description" sourceColName="set_description" />
        <field column="description" sourceColName="description" />
        <field column="categories" sourceColName="categories" splitBy="," />
        <field column="category_ids" sourceColName="category_ids" splitBy="," />
        <field column="filter_ids" sourceColName="filter_ids" splitBy="," />
        <field column="filters" sourceColName="filters" splitBy="," />
        <field column="store_ids" sourceColName="store_ids" splitBy="," />
    </entity>
</document>

managed-schema - 在solr 6.1.0中使用configtest中提供的默认值进行以下更改

<field name="id" type="int" indexed="true" stored="true" required="true" multiValued="false" /> 
<field name="model" type="string" indexed="true" stored="true" required="true" multiValued="false" />
<field name="price" type="float" indexed="true" stored="true"  multiValued="false" />
<field name="selling_price" type="float" indexed="true" stored="true"  />
<field name="stock_status" type="string" indexed="true" stored="true" />
<field name="name" type="string" indexed="true" stored="true"  />
<field name="set_description" type="text_general" indexed="true" stored="true"  />
<field name="description" type="text_general" indexed="true" stored="true"  />
<field name="categories" type="string" indexed="true" stored="true"  />
<field name="category_ids" type="int" indexed="true" stored="true" multiValued="true" />
<field name="filter_ids" type="int" indexed="true" stored="true" multiValued="true" />
<field name="filters" type="string" indexed="true" stored="true"   />
<field name="store_ids" type="int" indexed="true" stored="true" multiValued="true" />

solrconfig.xml - 默认情况下使用以下更改

<lib dir="../../../contrib/dataimporthandler/lib/" regex=".*\.jar" />
<lib dir="../../../contrib/dataimporthandler-extras/lib/" regex=".*\.jar" />
<lib dir="../../../dist/" regex="solr-dataimporthandler-\d.*\.jar" />

<requestHandler name="/dataimport" class="org.apache.solr.handler.dataimport.DataImportHandler">
    <lst name="defaults">
        <str name="config">data-config.xml</str>
    </lst>
</requestHandler>

错误日志:

Error creating document : SolrInputDocument(fields: [selling_price=299.0000,&#8203; filter_ids=12,&#8203; 13,&#8203; 19,&#8203; 24,&#8203; 43,&#8203; 58,&#8203; 62,&#8203; stock_status=In Stock,&#8203; store_ids=0,&#8203; 2,&#8203; description=Kurti length = 44 inches. No color bleed. Interlock stitching done. Side slit protection stitching done. Double bottom fold stitching done.,&#8203; filters=Long,&#8203; लॉंग,&#8203; Straight,&#8203; स्ट्रेट,&#8203; Full Sleeve,&#8203; फुल स्लीव,&#8203; Solid,&#8203; सॉलिड,&#8203; Rayon,&#8203; रेयॉन,&#8203; V Neck,&#8203; वी नेक,&#8203; Size Set,&#8203; साइज़ सेट,&#8203; set_description=1 Set = Total 5 pieces,&#8203; 1 each of 36,&#8203; 38,&#8203; 40,&#8203; 42,&#8203; 44,&#8203; price=290.0000,&#8203; name=Green Rayon Straight Solid Long V Neck Kurti,&#8203; model=GNM_JP_GMI026,&#8203; id=11856,&#8203; category_ids=0,&#8203; 61,&#8203; categories=Green Rayon Straight Solid Long V Neck Kurti,&#8203; ग्रीन रेयॉन स्ट्रेट सॉलिड लॉंग वी नेक कुर्ती,&#8203; _version_=1541015877146640386])

org.apache.solr.common.SolrException:错误:[doc = 11856]错误添加字段&#39; filter_ids&#39; =&#39; 12,13,19,24,43,58,62&# 39; msg =输入字符串:&#34; 12,13,19,24,43,58,62&#34;

at org.apache.solr.update.DocumentBuilder.toDocument(DocumentBuilder.java:177)

at org.apache.solr.update.AddUpdateCommand.getLuceneDocument(AddUpdateCommand.java:82)

at org.apache.solr.update.DirectUpdateHandler2.doNormalUpdate(DirectUpdateHandler2.java:280)

at org.apache.solr.update.DirectUpdateHandler2.addDoc0(DirectUpdateHandler2.java:214)

at org.apache.solr.update.DirectUpdateHandler2.addDoc(DirectUpdateHandler2.java:169)

at org.apache.solr.update.processor.RunUpdateProcessor.processAdd(RunUpdateProcessorFactory.java:68)

at org.apache.solr.update.processor.UpdateRequestProcessor.processAdd(UpdateRequestProcessor.java:48)

at org.apache.solr.update.processor.DistributedUpdateProcessor.doLocalAdd(DistributedUpdateProcessor.java:939)

at org.apache.solr.update.processor.DistributedUpdateProcessor.versionAdd(DistributedUpdateProcessor.java:1094)

at org.apache.solr.update.processor.DistributedUpdateProcessor.processAdd(DistributedUpdateProcessor.java:720)

at org.apache.solr.update.processor.LogUpdateProcessorFactory$LogUpdateProcessor.processAdd(LogUpdateProcessorFactory.java:103)

at org.apache.solr.handler.dataimport.SolrWriter.upload(SolrWriter.java:74)

at org.apache.solr.handler.dataimport.DataImportHandler$1.upload(DataImportHandler.java:260)

at org.apache.solr.handler.dataimport.DocBuilder.buildDocument(DocBuilder.java:524)

at org.apache.solr.handler.dataimport.DocBuilder.buildDocument(DocBuilder.java:414)

at org.apache.solr.handler.dataimport.DocBuilder.doFullDump(DocBuilder.java:329)

at org.apache.solr.handler.dataimport.DocBuilder.execute(DocBuilder.java:232)

at org.apache.solr.handler.dataimport.DataImporter.doFullImport(DataImporter.java:416)

at org.apache.solr.handler.dataimport.DataImporter.runCmd(DataImporter.java:480)

at org.apache.solr.handler.dataimport.DataImporter$1.run(DataImporter.java:461)

引起:java.lang.NumberFormatException:对于输入字符串:&#34; 12,13,19,24,43,58,62&#34;

at java.lang.NumberFormatException.forInputString(Unknown Source)

at java.lang.Integer.parseInt(Unknown Source)

at java.lang.Integer.parseInt(Unknown Source)

at org.apache.solr.schema.TrieField.createField(TrieField.java:702)

at org.apache.solr.schema.TrieField.createFields(TrieField.java:741)

at org.apache.solr.update.DocumentBuilder.addField(DocumentBuilder.java:47)

at org.apache.solr.update.DocumentBuilder.toDocument(DocumentBuilder.java:122)

1 个答案:

答案 0 :(得分:0)

发现问题,我使用数据类型整数来表示字段category_ids,filter_ids并为它们分配逗号分隔值。虽然我使用了multiValued = true和SpiltBy逗号,但它不会工作。

因此唯一的产品被索引,这些产品具有单个值。

我将数据类型更改为字符串,并将所有记录编入索引。

索引已完成。添加/更新:10910文档。删除了0个文件。 (时长:28秒) 请求数:1,提取:10,910 390 / s,跳过:0,已处理:10,910 390 / s 开始于:2分钟前

感谢您Uwe Allner提供帮助。