class Springboard {
static let springboard = XCUIApplication(privateWithPath: nil, bundleID: "com.apple.springboard")
class func launchHealth() {
springboard.resolve()
let icon = springboard.icons["Health"]
if icon.exists {
icon.tap()
// To query elements in the Health app
let health = XCUIApplication(privateWithPath: nil, bundleID: "com.apple.Health")
}
}
}
结果: doc1 = "I got the new Apple iPhone 8";
doc2 = "have you seen the new Apple iPhone 8?";
doc3 = "the Apple iPhone 8 is out";
doc4 = "another doc without the common words";
find_commons(["doc1", "doc2", "doc3", "doc4"]);
或类似的东西
其他问题:使用Lucene的数据是否有更好的库/系统来实现这一目标?
答案 0 :(得分:1)
是的,您可以使用TermVector
来检索此信息。
首先,您需要确保TermVectors存储在索引中,例如:
private static Document createDocument(String title, String content) {
Document doc = new Document();
doc.add(new StringField("title", title, Field.Store.YES));
FieldType type = new FieldType();
type.setTokenized(true);
type.setStoreTermVectors(true);
type.setStored(false);
type.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS);
doc.add(new Field("content", content, type));
return doc;
}
然后,您可以检索给定文档ID的术语向量:
private static List<String> getTermsForDoc(int docId, String field, IndexReader reader) throws IOException {
List<String> result = new ArrayList<>();
Terms terms = reader.getTermVector(docId, field);
TermsEnum it = terms.iterator();
for(BytesRef br = it.next(); br != null; br = it.next()) {
result.add(br.utf8ToString());
}
return result;
}
最后,您可以检索两个文档的常用术语:
private static List<String> getCommonTerms(int docId1, int docId2, IndexSearcher searcher) throws IOException {
// Using the field "content" is just an example here.
List<String> termList1 = getTermsForDoc(docId1, "content", searcher);
List<String> termList2 = getTermsForDoc(docId2, "content", searcher);
termList1.retainAll(termList2);
return termList1;
}
当然,这可以很容易地扩展到允许任意数量的文档。