无法搜索Solr字段类型=“text_general_maxlength”?

时间:2015-07-22 12:00:35

标签: solr

我是SOLR的新手,他是我无法搜索的字段 type="text_general_maxlength" 。我尝试使用 <copyField> ,其中我提到字段类型为 "string" ,但它不起作用。我正在给我的schema.xml 请让我知道我在哪里做错了,或者我提出了更具体的问题。 提前致谢

<schema name="hadoop-logs-schema" version="1.5">
<fields>
<field name="msg" type="string" indexed="true" stored="false" multiValued="true"/>
   <field name="id" type="string" indexed="true" stored="true" required="true" multiValued="false" />
    

<field name="cluster" type="string" multiValued="false" docValues="true"/>
  <field name="rowtype" type="string" multiValued="false" docValues="true"/>
  <field name="level" type="string" docValues="true" multiValued="false"/>
  <field name="line_number" type="tint" omitNorms="false"/>
  <field name="log_message" type="text_general_maxlength" omitNorms="false" multiValued="false"/>
  <field name="file" type="string" docValues="true" multiValued="false"/>
  <field name="host" type="string" docValues="true" multiValued="false"/>
  <field name="logger_name" type="string" docValues="true" multiValued="false"/>
  <field name="logtime" type="tdate" multiValued="false"/>
  <field name="logtype" type="string" docValues="true" multiValued="false"/>
  <field name="message" type="string" indexed="true" stored="true"/>
  <field name="method" type="string" omitNorms="false" multiValued="false"/>
  <field name="path" type="string" docValues="true" multiValued="false"/>
  <field name="seq_num" type="tlong" omitNorms="false" multiValued="false"/>
  <field name="tags" type="string" multiValued="true"/>
  <field name="thread_name" type="string" omitNorms="false" multiValued="false"/>
  <field name="type" type="string" docValues="true" multiValued="false"/>



</fields> <copyField source="log_message" dest="msg"/>

<uniqueKey>id</uniqueKey>

<types> <!-- Custom --> <fieldType name="logLevel" class="solr.EnumField" enumsConfig="enumsConfig.xml" enumName="log_levels"/> <fieldType name="text_general_maxlen" class="solr.TextField" positionIncrementGap="100" multiValued="false"> <analyzer> <tokenizer class="solr.WhitespaceTokenizerFactory"/> <filter class="solr.LengthFilterFactory" min="0" max="32766"/> </analyzer> <analyzer type="index"> <tokenizer class="solr.StandardTokenizerFactory"/> <filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt" /> <!-- in this example, we will only use synonyms at query time <filter class="solr.SynonymFilterFactory" synonyms="index_synonyms.txt" ignoreCase="true" expand="false"/> --> <filter class="solr.LowerCaseFilterFactory"/> </analyzer> <analyzer type="query"> <tokenizer class="solr.StandardTokenizerFactory"/> <filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt" /> <filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/> <filter class="solr.LowerCaseFilterFactory"/> </analyzer> </fieldType>

`

<!-- boolean type: "true" or "false" -->
<fieldType name="boolean" class="solr.BoolField" sortMissingLast="true"/>

<fieldType name="booleans" class="solr.BoolField" sortMissingLast="true" multiValued="true"/>


<fieldType name="int" class="solr.TrieIntField" precisionStep="0" positionIncrementGap="0"/>
<fieldType name="float" class="solr.TrieFloatField" precisionStep="0" positionIncrementGap="0"/>
<fieldType name="long" class="solr.TrieLongField" precisionStep="0" positionIncrementGap="0"/>
<fieldType name="double" class="solr.TrieDoubleField" precisionStep="0" positionIncrementGap="0"/>


<fieldType name="tint" class="solr.TrieIntField" precisionStep="8" positionIncrementGap="0"/>
<fieldType name="tfloat" class="solr.TrieFloatField" precisionStep="8" positionIncrementGap="0"/>
<fieldType name="tlong" class="solr.TrieLongField" precisionStep="8" positionIncrementGap="0"/>
<fieldType name="tdouble" class="solr.TrieDoubleField" precisionStep="8" positionIncrementGap="0"/>

<fieldType name="tints" class="solr.TrieIntField" precisionStep="8" positionIncrementGap="0" multiValued="true"/>
<fieldType name="tfloats" class="solr.TrieFloatField" precisionStep="8" positionIncrementGap="0" multiValued="true"/>
<fieldType name="tlongs" class="solr.TrieLongField" precisionStep="8" positionIncrementGap="0" multiValued="true"/>
<fieldType name="tdoubles" class="solr.TrieDoubleField" precisionStep="8" positionIncrementGap="0" multiValued="true"/>


<fieldType name="date" class="solr.TrieDateField" precisionStep="0" positionIncrementGap="0"/>

<!-- A Trie based date field for faster date range queries and date faceting. -->
<fieldType name="tdate" class="solr.TrieDateField" precisionStep="6" positionIncrementGap="0"/>

<fieldType name="tdates" class="solr.TrieDateField" precisionStep="6" positionIncrementGap="0" multiValued="true"/>


<!--Binary data type. The data should be sent/retrieved in as Base64 encoded Strings -->
<fieldtype name="binary" class="solr.BinaryField"/>


<fieldType name="pint" class="solr.IntField"/>
<fieldType name="plong" class="solr.LongField"/>
<fieldType name="pfloat" class="solr.FloatField"/>
<fieldType name="pdouble" class="solr.DoubleField"/>
<fieldType name="pdate" class="solr.DateField" sortMissingLast="true"/>


<fieldType name="random" class="solr.RandomSortField" indexed="true" />




<!-- A text field that only splits on whitespace for exact matching of words -->
<fieldType name="text_ws" class="solr.TextField" positionIncrementGap="100">
  <analyzer>
    <tokenizer class="solr.WhitespaceTokenizerFactory"/>
  </analyzer>
</fieldType>

<!-- A general text field that has reasonable, generic
     cross-language defaults: it tokenizes with StandardTokenizer,
 removes stop words from case-insensitive "stopwords.txt"
 (empty by default), and down cases.  At query time only, it
 also applies synonyms. -->
<fieldType name="text_general" class="solr.TextField" positionIncrementGap="100" multiValued="true">
  <analyzer type="index">
    <tokenizer class="solr.StandardTokenizerFactory"/>
    <filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt" />
    <!-- in this example, we will only use synonyms at query time
    <filter class="solr.SynonymFilterFactory" synonyms="index_synonyms.txt" ignoreCase="true" expand="false"/>
    -->
    <filter class="solr.LowerCaseFilterFactory"/>
  </analyzer>
  <analyzer type="query">
    <tokenizer class="solr.StandardTokenizerFactory"/>
    <filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt" />
    <filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/>
    <filter class="solr.LowerCaseFilterFactory"/>
  </analyzer>
</fieldType>


<fieldType name="text_en" class="solr.TextField" positionIncrementGap="100">
  <analyzer type="index">
    <tokenizer class="solr.StandardTokenizerFactory"/>
    <!-- in this example, we will only use synonyms at query time
    <filter class="solr.SynonymFilterFactory" synonyms="index_synonyms.txt" ignoreCase="true" expand="false"/>
    -->
    <!-- Case insensitive stop word removal.
    -->
    <filter class="solr.StopFilterFactory"
            ignoreCase="true"
            words="lang/stopwords_en.txt"
        />
    <filter class="solr.LowerCaseFilterFactory"/>
    <filter class="solr.EnglishPossessiveFilterFactory"/>
    <filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>
    <!-- Optionally you may want to use this less aggressive stemmer instead of PorterStemFilterFactory:
    <filter class="solr.EnglishMinimalStemFilterFactory"/>
-->
    <filter class="solr.PorterStemFilterFactory"/>
  </analyzer>
  <analyzer type="query">
    <tokenizer class="solr.StandardTokenizerFactory"/>
    <filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/>
    <filter class="solr.StopFilterFactory"
            ignoreCase="true"
            words="lang/stopwords_en.txt"
        />
    <filter class="solr.LowerCaseFilterFactory"/>
    <filter class="solr.EnglishPossessiveFilterFactory"/>
    <filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>
    <!-- Optionally you may want to use this less aggressive stemmer instead of PorterStemFilterFactory:
    <filter class="solr.EnglishMinimalStemFilterFactory"/>
-->
    <filter class="solr.PorterStemFilterFactory"/>
  </analyzer>
</fieldType>


<fieldType name="text_en_splitting" class="solr.TextField" positionIncrementGap="100" autoGeneratePhraseQueries="true">
  <analyzer type="index">
    <tokenizer class="solr.WhitespaceTokenizerFactory"/>
    <!-- in this example, we will only use synonyms at query time
    <filter class="solr.SynonymFilterFactory" synonyms="index_synonyms.txt" ignoreCase="true" expand="false"/>
    -->
    <!-- Case insensitive stop word removal.
    -->
    <filter class="solr.StopFilterFactory"
            ignoreCase="true"
            words="lang/stopwords_en.txt"
        />
    <filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="1" catenateNumbers="1" catenateAll="0" splitOnCaseChange="1"/>
    <filter class="solr.LowerCaseFilterFactory"/>
    <filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>
    <filter class="solr.PorterStemFilterFactory"/>
  </analyzer>
  <analyzer type="query">
    <tokenizer class="solr.WhitespaceTokenizerFactory"/>
    <filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/>
    <filter class="solr.StopFilterFactory"
            ignoreCase="true"
            words="lang/stopwords_en.txt"
        />
    <filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="0" catenateNumbers="0" catenateAll="0" splitOnCaseChange="1"/>
    <filter class="solr.LowerCaseFilterFactory"/>
    <filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>
    <filter class="solr.PorterStemFilterFactory"/>
  </analyzer>
</fieldType>

<!-- Less flexible matching, but less false matches.  Probably not ideal for product names,
     but may be good for SKUs.  Can insert dashes in the wrong place and still match. -->
<fieldType name="text_en_splitting_tight" class="solr.TextField" positionIncrementGap="100" autoGeneratePhraseQueries="true">
  <analyzer>
    <tokenizer class="solr.WhitespaceTokenizerFactory"/>
    <filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="false"/>
    <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_en.txt"/>
    <filter class="solr.WordDelimiterFilterFactory" generateWordParts="0" generateNumberParts="0" catenateWords="1" catenateNumbers="1" catenateAll="0"/>
    <filter class="solr.LowerCaseFilterFactory"/>
    <filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>
    <filter class="solr.EnglishMinimalStemFilterFactory"/>
    <!-- this filter can remove any duplicate tokens that appear at the same position - sometimes
         possible with WordDelimiterFilter in conjuncton with stemming. -->
    <filter class="solr.RemoveDuplicatesTokenFilterFactory"/>
  </analyzer>
</fieldType>

<!-- Just like text_general except it reverses the characters of
 each token, to enable more efficient leading wildcard queries. -->
<fieldType name="text_general_rev" class="solr.TextField" positionIncrementGap="100">
  <analyzer type="index">
    <tokenizer class="solr.StandardTokenizerFactory"/>
    <filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt" />
    <filter class="solr.LowerCaseFilterFactory"/>
    <filter class="solr.ReversedWildcardFilterFactory" withOriginal="true"
            maxPosAsterisk="3" maxPosQuestion="2" maxFractionAsterisk="0.33"/>
  </analyzer>
  <analyzer type="query">
    <tokenizer class="solr.StandardTokenizerFactory"/>
    <filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/>
    <filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt" />
    <filter class="solr.LowerCaseFilterFactory"/>
  </analyzer>
</fieldType>



<fieldType name="alphaOnlySort" class="solr.TextField" sortMissingLast="true" omitNorms="true">
  <analyzer>
    <!-- KeywordTokenizer does no actual tokenizing, so the entire
         input string is preserved as a single token
      -->
    <tokenizer class="solr.KeywordTokenizerFactory"/>
    <!-- The LowerCase TokenFilter does what you expect, which can be
         when you want your sorting to be case insensitive
      -->
    <filter class="solr.LowerCaseFilterFactory" />
    <!-- The TrimFilter removes any leading or trailing whitespace -->
    <filter class="solr.TrimFilterFactory" />

    <filter class="solr.PatternReplaceFilterFactory"
            pattern="([^a-z])" replacement="" replace="all"
        />
  </analyzer>
</fieldType>

<fieldtype name="phonetic" stored="false" indexed="true" class="solr.TextField" >
  <analyzer>
    <tokenizer class="solr.StandardTokenizerFactory"/>
    <filter class="solr.DoubleMetaphoneFilterFactory" inject="false"/>
  </analyzer>
</fieldtype>

<fieldtype name="payloads" stored="false" indexed="true" class="solr.TextField" >
  <analyzer>
    <tokenizer class="solr.WhitespaceTokenizerFactory"/>
    <!--
    The DelimitedPayloadTokenFilter can put payloads on tokens... for example,
    a token of "foo|1.4"  would be indexed as "foo" with a payload of 1.4f
    Attributes of the DelimitedPayloadTokenFilterFactory : 
     "delimiter" - a one character delimiter. Default is | (pipe)
 "encoder" - how to encode the following value into a playload
    float -> org.apache.lucene.analysis.payloads.FloatEncoder,
    integer -> o.a.l.a.p.IntegerEncoder
    identity -> o.a.l.a.p.IdentityEncoder
        Fully Qualified class name implementing PayloadEncoder, Encoder must have a no arg constructor.
     -->
    <filter class="solr.DelimitedPayloadTokenFilterFactory" encoder="float"/>
  </analyzer>
</fieldtype>

<!-- lowercases the entire field value, keeping it as a single token.  -->
<fieldType name="lowercase" class="solr.TextField" positionIncrementGap="100">
  <analyzer>
    <tokenizer class="solr.KeywordTokenizerFactory"/>
    <filter class="solr.LowerCaseFilterFactory" />
  </analyzer>
</fieldType>

<!-- 
  Example of using PathHierarchyTokenizerFactory at index time, so
  queries for paths match documents at that path, or in descendent paths
-->
<fieldType name="descendent_path" class="solr.TextField">
  <analyzer type="index">
    <tokenizer class="solr.PathHierarchyTokenizerFactory" delimiter="/" />
  </analyzer>
  <analyzer type="query">
    <tokenizer class="solr.KeywordTokenizerFactory" />
  </analyzer>
</fieldType>

<fieldType name="ancestor_path" class="solr.TextField">
  <analyzer type="index">
    <tokenizer class="solr.KeywordTokenizerFactory" />
  </analyzer>
  <analyzer type="query">
    <tokenizer class="solr.PathHierarchyTokenizerFactory" delimiter="/" />
  </analyzer>
</fieldType>

<fieldtype name="ignored" stored="false" indexed="false" multiValued="true" class="solr.StrField" />


<fieldType name="point" class="solr.PointType" dimension="2" subFieldSuffix="_d"/>

<!-- A specialized field for geospatial search. If indexed, this fieldType must not be multivalued. -->
<fieldType name="location" class="solr.LatLonType" subFieldSuffix="_coordinate"/>


<fieldType name="location_rpt" class="solr.SpatialRecursivePrefixTreeFieldType"
           geo="true" distErrPct="0.025" maxDistErr="0.000009" units="degrees" />


<fieldType name="currency" class="solr.CurrencyField" precisionStep="8" defaultCurrency="USD" currencyConfig="currency.xml" />

`

1 个答案:

答案 0 :(得分:1)

您尚未提及indexed="true" stored="true"

更改

<field name="log_message" type="text_general_maxlength" omitNorms="false" multiValued="false"/>

<field name="log_message" type="text_general_maxlength" omitNorms="false" indexed="true" stored="true" multiValued="false"/>

添加如上所示的条目,重新索引并尝试搜索。

只要您想搜索字段,就需要将其编入索引。