无法在Solr中搜索字段

时间:2013-09-25 13:08:41

标签: search solr field

我无法从solr索引中搜索名为doc_id的特定字段。 我得到的数据来自DB2,而doc_id是表中的PK。

这里是数据导入和架构xml

请帮忙

<dataConfig>
  <dataSource driver="com.ibm.db2.jcc.DB2Driver" url="jdbc:db2://infbz1051.in.ibm.com:50000/SCION" user="db2admin" password="db2admin" batchSize="10000" readOnly="true" autoCommit="true" transactionIsolation="TRANSACTION_READ_UNCOMMITTED" connectionTimeout="5000000" />
  <document name="socialpost">        
    <entity name="post" pk="doc_id" query="select d.DOC_ID,d.SRC_DOC_ID,d.PARENT_ID,d.SRC_PARENT_ID,d.AUTHOR_ID,d.FORUM,d.CUSTOMER as customer,d.TEXT as post_text,d.text as raw_text,d.URL as doc_url,d.LIKES,d.FWD,d.COMMENTS,a.AUTHOR_ID,a.AUTHOR_NAME,a.EMAIL as author_email,a.AGE as author_age,a.GENDER as author_gender from DOC_DETAILS d,AUTHOR_DETAILS a where d.AUTHOR_ID=a.AUTHOR_ID" deltaImportQuery="select d.DOC_ID,d.SRC_DOC_ID,d.PARENT_ID,d.SRC_PARENT_ID,d.AUTHOR_ID,d.FORUM,d.CUSTOMER,d.TEXT as post_text,d.text as raw_text,d.URL as doc_url,d.LIKES,d.FWD,d.COMMENTS,a.AUTHOR_ID,a.AUTHOR_NAME,a.EMAIL as author_email,a.AGE as author_age,a.GENDER as author_gender from DOC_DETAILS d,AUTHOR_DETAILS a where  d.AUTHOR_ID=a.AUTHOR_ID AND d.doc_id='${dataimporter.delta.DOC_ID}'  with ur" deltaQuery="select DOC_ID from doc_details where lastupdatedtime &gt; '${dataimporter.last_index_time}' with ur">
       <entity name="concept" query="SELECT S.concept as concept FROM SENTIMENT_AND_CONCEPTS S where S.DOC_ID='${post.DOC_ID}' with ur" >
       </entity>
       <entity name="category" query="SELECT ann.doc_id, ann.ANNOTATIONMETAVALUES as category FROM annotations ann where ann.annotator_id=125  and ann.doc_id='${post.DOC_ID}' with ur" >
       </entity>
       <entity name="hcategory" query="SELECT ann.doc_id, ann.ANNOTATIONMETAVALUES as hcategory FROM annotations ann where ann.annotator_id=129  and ann.doc_id='${post.DOC_ID}' with ur" >
       </entity>
       <entity name="tcategory" query="SELECT ann.doc_id, ann.ANNOTATIONMETAVALUES as tcategory FROM annotations ann where ann.annotator_id=127  and ann.doc_id='${post.DOC_ID}' with ur" >
       </entity>
       <entity name="isparent" query="SELECT count(*) as isparent  FROM db2admin.doc_details where doc_id='${post.DOC_ID}' and parent_id=doc_id with ur" >
       </entity>
       <entity name="rankscore" query="select c.CATEGORY_RANK_SCORE as rankscore from CATEGORY_ASSIGNMENT_TABLE c,ANNOTATIONS A where a.doc_id=c.doc_id AND a.ANNOTATIONMETAVALUES =c.category_name AND a.ANNOTATOR_ID=125 and  a.doc_id='${post.DOC_ID}' with ur" >
       </entity>
       <entity name="posttype" query="SELECT ann.doc_id, ann.ANNOTATIONMETAVALUES as posttype FROM annotations ann where ann.annotator_id=35  and ann.doc_id='${post.DOC_ID}' with ur" >
       </entity>
       <entity name="sentimentinfo" query="select c.CATEGORY_SENTIMENT as sentimentinfo from CATEGORY_ASSIGNMENT_TABLE c,ANNOTATIONS A where a.doc_id=c.doc_id AND a.ANNOTATIONMETAVALUES =c.category_name AND a.ANNOTATOR_ID=125 and  a.doc_id='${post.DOC_ID}' with ur" >
       </entity>
    </entity>
  </document>
</dataConfig>

Schema.xml的

<field name="doc_id" type="string" indexed="true" stored="true" required="true" multiValued="false"/> 
<field name="customer" type="string" indexed="true" stored="true" multiValued="false"/>
<field name="src_doc_id" type="string" indexed="true" stored="true" omitNorms="true" multiValued="false"/>
<field name="parentid" type="string" indexed="true" stored="true" multiValued="false"/>
<field name="src_parent_id" type="string" indexed="true" stored="true" multiValued="false"/>
<field name="author_id" type="string" indexed="true" stored="true" multiValued="false"/>
<field name="forum" type="string" indexed="true" stored="true" multiValued="false"/>
<field name="timeposted" type="date" indexed="true" stored="true" multiValued="false"/>
<field name="post_text" type="string" indexed="true" stored="true" termVectors="true" termPositions="true" termOffsets="true" multiValued="false"/>
<!-- Raw text copy for retrieval convenience --> 
<field name="raw_text" type="string" indexed="true" stored="true" omitNorms="true" termVectors="true" termPositions="true" termOffsets="true" multiValued="true"/>
<copyfield src="post_text" dest="text_raw" />
<field name="url" type="string" indexed="true" stored="true" omitNorms="true" multiValued="false"/>
<field name="likes_fwd_comments" type="int" indexed="true" stored="true" multiValued="false"/>
<field name="lastupdatedtime" type="date" indexed="true" stored="true" omitNorms="true" multiValued="false"/>

1 个答案:

答案 0 :(得分:0)

SQL非常直观 - 来自此的各种SQL结果将包含像“d.DOC_ID”或“ann.doc_id”这样的列...而不是像Solr架构中的“doc_id”。

在某些SQL字段中,您确实有“as customer”之类的内容,这意味着这些特定字段将正常工作,假设整个文档通过验证 - 标记为required(或uniqueKey)的字段都存在,等

您可能需要在select语句中为每个字段添加“AS xxx”。

如果要搜索使用“as”语法的字段,请注意它们中的大多数都设置为键入“string”。在示例模式中,“string”类型是StrField,它没有分析功能,因此只能进行精确的整个字段或通配符匹配。您不能使用StrField类型搜索多字符串中的单个单词。