在Apache Java SOLR API中,读取TermVectorComponent响应的正确Java对象是什么? http://lucene.apache.org/solr/api/index-all.html
例如,要将SOLR中的文档列表响应解析为名为SolrDocumentList的Java对象,该对象由SOLR Apache 3.5 API参考中指定的SolrDocument类型的对象组成,
NamedList<Object> solrResponse = solrServer.request(new QueryRequest(solrQuery));
SolrDocumentList solrDocumentList = (SolrDocumentList) solrResponse.get("response");
使用Java SOLR 3.5 API将术语向量转换为TermVectorComponent列表和使用Java SOLR 3.5 API取出TermVector组件对象的等效方法是什么?
此外,什么是Java对象,可以从对查询的响应中读取termVector列表。
例如,响应如下所示:
<lst name="termVectors">
<lst name="doc-2">
<str name="uniqueKey">49667.16</str>
<lst name="contents">
<lst name="15">
<int name="tf">2</int>
<lst name="offsets">
<int name="start">147</int>
<int name="end">149</int>
<int name="start">278</int>
<int name="end">280</int>
</lst>
<lst name="positions">
<int name="position">23</int>
<int name="position">47</int>
</lst>
<int name="df">9</int>
<double name="tf-idf">0.2222222222222222</double>
</lst>
<lst name="15,">
<int name="tf">1</int>
<lst name="offsets">
<int name="start">1193</int>
<int name="end">1196</int>
</lst>
<lst name="positions">
<int name="position">188</int>
</lst>
<int name="df">3</int>
<double name="tf-idf">0.3333333333333333</double>
</lst>
<lst name="15.">
<int name="tf">1</int>
<lst name="offsets">
<int name="start">1019</int>
<int name="end">1022</int>
</lst>
<lst name="positions">
<int name="position">161</int>
</lst>
<int name="df">5</int>
<double name="tf-idf">0.2</double>
</lst>
<lst name="2">
<int name="tf">1</int>
<lst name="offsets">
<int name="start">1751</int>
<int name="end">1752</int>
</lst>
<lst name="positions">
<int name="position">276</int>
</lst>
<int name="df">10</int>
<double name="tf-idf">0.1</double>
</lst>
<lst name="22a">
<int name="tf">2</int>
<lst name="offsets">
<int name="start">1110</int>
<int name="end">1113</int>
<int name="start">1373</int>
<int name="end">1376</int>
</lst>
<lst name="positions">
<int name="position">174</int>
<int name="position">213</int>
</lst>
<int name="df">4</int>
<double name="tf-idf">0.5</double>
</lst>
<lst name="22b">
<int name="tf">1</int>
<lst name="offsets">
<int name="start">1118</int>
<int name="end">1121</int>
</lst>
<lst name="positions">
<int name="position">176</int>
</lst>
<int name="df">4</int>
<double name="tf-idf">0.25</double>
</lst>
<lst name="22b.">
<int name="tf">1</int>
<lst name="offsets">
<int name="start">1381</int>
<int name="end">1385</int>
</lst>
<lst name="positions">
<int name="position">215</int>
</lst>
<int name="df">3</int>
<double name="tf-idf">0.3333333333333333</double>
</lst>
<lst name="acceptable">
<int name="tf">1</int>
<lst name="offsets">
<int name="start">1556</int>
<int name="end">1566</int>
</lst>
<lst name="positions">
<int name="position">246</int>
</lst>
<int name="df">1</int>
<double name="tf-idf">1.0</double>
</lst>
<lst name="achieve">
<int name="tf">1</int>
<lst name="offsets">
<int name="start">883</int>
<int name="end">890</int>
</lst>
<lst name="positions">
<int name="position">138</int>
</lst>
<int name="df">4</int>
<double name="tf-idf">0.25</double>
</lst>
<lst name="allow">
<int name="tf">1</int>
<lst name="offsets">
<int name="start">1550</int>
<int name="end">1555</int>
</lst>
<lst name="positions">
<int name="position">245</int>
</lst>
<int name="df">3</int>
<double name="tf-idf">0.3333333333333333</double>
</lst>
<lst name="also">
<int name="tf">1</int>
<lst name="offsets">
<int name="start">224</int>
<int name="end">228</int>
</lst>
<lst name="positions">
<int name="position">38</int>
</lst>
<int name="df">9</int>
<double name="tf-idf">0.1111111111111111</double>
</lst>
<lst name="also,">
<int name="tf">1</int>
<lst name="offsets">
<int name="start">780</int>
<int name="end">785</int>
</lst>
<lst name="positions">
<int name="position">123</int>
</lst>
<int name="df">1</int>
<double name="tf-idf">1.0</double>
</lst>
<lst name="amplified">
<int name="tf">2</int>
<lst name="offsets">
<int name="start">1583</int>
<int name="end">1592</int>
<int name="start">1656</int>
<int name="end">1665</int>
</lst>
<lst name="positions">
<int name="position">250</int>
<int name="position">262</int>
</lst>
<int name="df">4</int>
<double name="tf-idf">0.5</double>
</lst>
<lst name="amplifier">
<int name="tf">1</int>
<lst name="offsets">
<int name="start">1741</int>
<int name="end">1750</int>
</lst>
<lst name="positions">
<int name="position">275</int>
</lst>
<int name="df">22</int>
<double name="tf-idf">0.045454545454545456</double>
</lst>
<lst name="amplifier.">
<int name="tf">2</int>
<lst name="offsets">
<int name="start">57</int>
<int name="end">67</int>
<int name="start">647</int>
<int name="end">657</int>
</lst>
<lst name="positions">
<int name="position">7</int>
<int name="position">104</int>
</lst>
<int name="df">4</int>
<double name="tf-idf">0.5</double>
</lst>
<lst name="amplitude">
<int name="tf">3</int>
<lst name="offsets">
<int name="start">72</int>
<int name="end">81</int>
<int name="start">759</int>
<int name="end">768</int>
<int name="start">848</int>
<int name="end">857</int>
</lst>
<lst name="positions">
<int name="position">9</int>
<int name="position">121</int>
<int name="position">134</int>
</lst>
<int name="df">1</int>
<double name="tf-idf">3.0</double>
</lst>
<lst name="appear">
<int name="tf">1</int>
<lst name="offsets">
<int name="start">110</int>
<int name="end">117</int>
</lst>
<lst name="positions">
<int name="position">16</int>
</lst>
<int name="df">1</int>
<double name="tf-idf">1.0</double>
</lst>
<lst name="between">
<int name="tf">1</int>
<lst name="offsets">
<int name="start">934</int>
<int name="end">941</int>
</lst>
<lst name="positions">
<int name="position">146</int>
</lst>
<int name="df">7</int>
<double name="tf-idf">0.14285714285714285</double>
</lst>
<lst name="c4">
<int name="tf">2</int>
<lst name="offsets">
<int name="start">1033</int>
<int name="end">1035</int>
<int name="start">1242</int>
<int name="end">1244</int>
</lst>
<lst name="positions">
<int name="position">163</int>
<int name="position">195</int>
</lst>
<int name="df">4</int>
<double name="tf-idf">0.5</double>
</lst>
<lst name="c4,">
<int name="tf">1</int>
<lst name="offsets">
<int name="start">1471</int>
<int name="end">1474</int>
</lst>
<lst name="positions">
<int name="position">229</int>
</lst>
<int name="df">2</int>
<double name="tf-idf">0.5</double>
</lst>
<lst name="c5">
<int name="tf">3</int>
<lst name="offsets">
<int name="start">210</int>
<int name="end">212</int>
<int name="start">715</int>
<int name="end">717</int>
<int name="start">993</int>
<int name="end">995</int>
</lst>
<lst name="positions">
<int name="position">34</int>
<int name="position">113</int>
<int name="position">155</int>
</lst>
<int name="df">5</int>
<double name="tf-idf">0.6</double>
</lst>
<lst name="c5,">
<int name="tf">1</int>
<lst name="offsets">
<int name="start">1475</int>
<int name="end">1478</int>
</lst>
<lst name="positions">
<int name="position">230</int>
</lst>
<int name="df">2</int>
<double name="tf-idf">0.5</double>
</lst>
<lst name="c6">
<int name="tf">4</int>
<lst name="offsets">
<int name="start">217</int>
<int name="end">219</int>
<int name="start">722</int>
<int name="end">724</int>
<int name="start">1000</int>
<int name="end">1002</int>
<int name="start">1483</int>
<int name="end">1485</int>
</lst>
<lst name="positions">
<int name="position">36</int>
<int name="position">115</int>
<int name="position">157</int>
<int name="position">232</int>
</lst>
<int name="df">5</int>
<double name="tf-idf">0.8</double>
</lst>
<lst name="can">
<int name="tf">2</int>
<lst name="offsets">
<int name="start">558</int>
<int name="end">561</int>
<int name="start">1486</int>
<int name="end">1489</int>
</lst>
<lst name="positions">
<int name="position">89</int>
<int name="position">233</int>
</lst>
<int name="df">9</int>
<double name="tf-idf">0.2222222222222222</double>
</lst>
<lst name="capacitance">
<int name="tf">2</int>
<lst name="offsets">
<int name="start">665</int>
<int name="end">677</int>
<int name="start">1216</int>
<int name="end">1228</int>
</lst>
<lst name="positions">
<int name="position">107</int>
<int name="position">192</int>
</lst>
<int name="df">6</int>
<double name="tf-idf">0.3333333333333333</double>
</lst>
<lst name="capacitor">
<int name="tf">8</int>
<lst name="offsets">
<int name="start">199</int>
<int name="end">209</int>
<int name="start">704</int>
<int name="end">714</int>
<int name="start">982</int>
<int name="end">992</int>
<int name="start">1023</int>
<int name="end">1032</int>
<int name="start">1057</int>
<int name="end">1067</int>
<int name="start">1232</int>
<int name="end">1241</int>
<int name="start">1266</int>
<int name="end">1276</int>
<int name="start">1460</int>
<int name="end">1470</int>
</lst>
<lst name="positions">
<int name="position">33</int>
<int name="position">112</int>
<int name="position">154</int>
<int name="position">162</int>
<int name="position">167</int>
<int name="position">194</int>
<int name="position">199</int>
<int name="position">228</int>
</lst>
<int name="df">16</int>
<double name="tf-idf">0.5</double>
</lst>
<lst name="cause">
<int name="tf">3</int>
<lst name="offsets">
<int name="start">506</int>
<int name="end">511</int>
<int name="start">562</int>
<int name="end">567</int>
<int name="start">1122</int>
<int name="end">1127</int>
</lst>
<lst name="positions">
<int name="position">84</int>
<int name="position">90</int>
<int name="position">177</int>
</lst>
<int name="df">5</int>
<double name="tf-idf">0.6</double>
</lst>
<lst name="characteristics,">
<int name="tf">1</int>
<lst name="offsets">
<int name="start">536</int>
<int name="end">552</int>
</lst>
<lst name="positions">
<int name="position">87</int>
</lst>
<int name="df">1</int>
<double name="tf-idf">1.0</double>
</lst>
<lst name="chopper-stabilized">
<int name="tf">1</int>
<lst name="offsets">
<int name="start">38</int>
<int name="end">56</int>
</lst>
<lst name="positions">
<int name="position">6</int>
</lst>
<int name="df">9</int>
<double name="tf-idf">0.1111111111111111</double>
</lst>
<lst name="chopping">
<int name="tf">6</int>
<lst name="offsets">
<int name="start">236</int>
<int name="end">244</int>
<int name="start">793</int>
<int name="end">801</int>
<int name="start">942</int>
<int name="end">950</int>
<int name="start">1390</int>
<int name="end">1398</int>
<int name="start">1507</int>
<int name="end">1515</int>
<int name="start">1608</int>
<int name="end">1616</int>
</lst>
<lst name="positions">
<int name="position">41</int>
<int name="position">126</int>
<int name="position">147</int>
<int name="position">217</int>
<int name="position">238</int>
<int name="position">254</int>
</lst>
<int name="df">19</int>
<double name="tf-idf">0.3157894736842105</double>
</lst>
<lst name="circuitry.">
<int name="tf">1</int>
<lst name="offsets">
<int name="start">446</int>
<int name="end">456</int>
</lst>
<lst name="positions">
<int name="position">74</int>
</lst>
<int name="df">1</int>
<double name="tf-idf">1.0</double>
</lst>
<str name="uniqueKeyFieldName">documentPageId</str>
</lst>
答案 0 :(得分:5)
我的应用程序设计没有足够的上下文,但是从您的代码中我猜测您正在使用Solrj
客户端查询&amp;处理Solr响应。您可以尝试以下方法,
QueryResponse queryResponse = server.query(solrQuery);
TermsResponse termsResponse = queryResponse.getTermsResponse();
TermsResponse
封装了来自术语组件的响应,也许TermsResponse
不会包含来自响应的完整术语矢量信息,在这种情况下,以下选项可能值得探索,
Iterator<Entry<String, Object>> termVectors = ((NamedList) solrResponse.get("termVectors")).iterator();
while(termVectors.hasNext()){
Entry<String, Object> docTermVector = termVectors.next();
for(Iterator<Entry<String, Object>> fi = ((NamedList)docTermVector.getValue()).iterator(); fi.hasNext(); ){
Entry<String, Object> fieldEntry = fi.next();
if(fieldEntry.getKey().equals("contents")){
for(Iterator<Entry<String, Object>> tvInfoIt = ((NamedList)fieldEntry.getValue()).iterator(); tvInfoIt.hasNext(); ){
Entry<String, Object> tvInfo = tvInfoIt.next();
NamedList tv = (NamedList) tvInfo.getValue();
System.out.println("Vector Info: " + tvInfo.getKey() + " tf: " + tv.get("tf") + " df: " + tv.get("df") + " tf-idf: " + tv.get("tf-idf"));
}
}
}
}
这应该产生,
Vector Info: 15 tf: 2 df: 9 tf-idf: 0.2222222222222222
Vector Info: 15, tf: 1 df: 3 tf-idf: 0.3333333333333333
........
您可以根据需要将其处理到您自己的TermVector域对象中,希望这会有所帮助。
答案 1 :(得分:0)
这是我尝试从SolrJ提供的QueryResponse对象中获取术语向量数据。为了达到我的理解风格,我不得不把对象分解成它的部分......
由于知识产权问题,我没有包括Pojos,但它们应该相当容易推断......一个载体有一个或多个信息。
希望这有帮助。
SCOTT
/**
* accept a list of things and marry the term vectors from the response to the list of things
*
* @param aQueryResponse
* @param list
*/
protected void extractTermVectorData(final QueryResponse aQueryResponse, final List<? extends BaseModel> list)
{
final NamedList<Object> response = aQueryResponse.getResponse();
NamedList<Object> termVectorsObject = null;
for ( int i = 0; i < response.size(); i++ )
{
final String name = response.getName(i);
if ( "termVectors".contentEquals(name) )
{
termVectorsObject = (NamedList<Object>)response.getVal(i);
break;
}
}
if ( null != termVectorsObject )
{
final ArrayList<IMGTermVector> termVectorList = process(termVectorsObject);
final int i = 0;
for ( final BaseModel model : list )
{
final IMGTermVector anIMGTermVector = termVectorList.get(i);
model.setTermVector(anIMGTermVector);
}
}
}
/**
* @param termVectorNamedList
* @return
*/
private ArrayList<IMGTermVector> process(final NamedList<Object> termVectorNamedList)
{
final Iterator<Entry<String, Object>> termVectorIteratior = consumeTopListHeader(termVectorNamedList);
final ArrayList<IMGTermVector> vectors = consumeVectorsFromList(termVectorIteratior);
return vectors;
}
/**
* @param termVectorNamedList
* @return
*/
private Iterator<Entry<String, Object>> consumeTopListHeader(final NamedList<Object> termVectorNamedList)
{
final Iterator<Entry<String, Object>> termVectorIteratior = termVectorNamedList.iterator();
final Entry<String, Object> termVectorHeaderEntry = termVectorIteratior.next();
final String termVectorHeaderEntryKey = termVectorHeaderEntry.getKey();
final Object termVectorHeaderEntryValue = termVectorHeaderEntry.getValue();
System.out.println(termVectorHeaderEntryKey + "=" + (String)termVectorHeaderEntryValue);
return termVectorIteratior;
}
/**
* @param termVectorIteratior
* @return
*/
@SuppressWarnings("unchecked")
private ArrayList<IMGTermVector> consumeVectorsFromList(final Iterator<Entry<String, Object>> termVectorIteratior)
{
final ArrayList<IMGTermVector> vectors = new ArrayList<IMGTermVector>();
while ( termVectorIteratior.hasNext() )
{
final IMGTermVector vector = new IMGTermVector();
vectors.add(vector);
final Entry<String, Object> termVectorEntry = termVectorIteratior.next();
final String termVectorEntryKey = termVectorEntry.getKey();
vector.setFieldEntry(termVectorEntryKey);
System.out.println("processing vector #" + termVectorEntryKey);
final NamedList<Object> termVectorEntryValue = (NamedList<Object>)termVectorEntry.getValue();
convertIntoVector(termVectorEntryValue, vector);
}
return vectors;
}
/**
* @param termVectorEntryValue
* @param vector
*/
private void convertIntoVector(final NamedList<Object> termVectorEntryValue, final IMGTermVector vector)
{
//
// grab vector header
//
final Iterator<Entry<String, Object>> vectorTermIteratior = consumeVectorHeader(termVectorEntryValue, vector);
//
// now process terms
//
final List<IMGTermVectorInfo> vectorInfoList = convertFilteredTextIntoInfos(vectorTermIteratior);
vector.setInfos(vectorInfoList);
}
/**
* @param vectorTermIteratior
* @return
*/
@SuppressWarnings("unchecked")
private List<IMGTermVectorInfo> convertFilteredTextIntoInfos(
final Iterator<Entry<String, Object>> termVectorEntryValueIteratior)
{
final List<IMGTermVectorInfo> vectorInfoList = new ArrayList<IMGTermVectorInfo>();
final Entry<String, Object> termVectorEntryValueIteratiorEntry = termVectorEntryValueIteratior.next();
final String key = termVectorEntryValueIteratiorEntry.getKey();
final NamedList<Object> value = (NamedList<Object>)termVectorEntryValueIteratiorEntry.getValue();
System.out.println("processing components of key " + key);
for ( final Iterator<Entry<String, Object>> termVectorInfoIteratior = value.iterator(); termVectorInfoIteratior
.hasNext(); )
{
final Entry<String, Object> fieldEntry = termVectorInfoIteratior.next();
final NamedList<Object> tv = (NamedList<Object>)fieldEntry.getValue();
final IMGTermVectorInfo info = parseTermVectorInfo(fieldEntry, tv);
vectorInfoList.add(info);
}
return vectorInfoList;
}
private IMGTermVectorInfo parseTermVectorInfo(final Entry<String, Object> tvInfo, final NamedList<Object> tv)
{
final IMGTermVectorInfo info = new IMGTermVectorInfo();
System.out.println("Vector Info: " + tvInfo.getKey() + " tf: " + tv.get("tf") + " df: " + tv.get("df")
+ " tf-idf: " + tv.get("tf-idf"));
info.setInfo(tvInfo.getKey());
info.setTf((Integer)tv.get("tf"));
info.setDf((Integer)tv.get("df"));
info.setTfidf((Double)tv.get("tf-idf"));
return info;
}
/**
* @param termVectorEntryValue
* @param vector
* @return
*/
private Iterator<Entry<String, Object>> consumeVectorHeader(final NamedList<Object> termVectorEntryValue,
final IMGTermVector vector)
{
final Iterator<Entry<String, Object>> termVectorEntryValueIteratior = termVectorEntryValue.iterator();
final Entry<String, Object> termVectorEntryValueIteratiorEntry = termVectorEntryValueIteratior.next();
final String key = termVectorEntryValueIteratiorEntry.getKey();
final String value = (String)termVectorEntryValueIteratiorEntry.getValue();
System.out.println(" " + key + "=" + value + " <<<--- ignoring this data for now");
return termVectorEntryValueIteratior;
}