实际上我正在尝试使用ASP.net Core 1.0中的Elasticsearch和NEST客户端索引某些文件(最重要的PDF)。
我找到了一些代码段,并试图将它用于我的目的。
这些是我用来设置Elasticsearch索引和Pipeline的代码段:
private void SetupElasticSearch()
{
ConnectionSettings settings = new ConnectionSettings(new Uri(_appSettings.ElasticSearchSettings.Url))
.MapDefaultTypeIndices(m => m
.Add(typeof(FSDocumentFile), _appSettings.ElasticSearchSettings.IndexName)
).OnRequestCompleted(response =>
{
_logger.LogInformation(String.Format("{0} {1}", response.HttpMethod, response.Uri));
if (response.RequestBodyInBytes != null)
{
_logger.LogInformation(Encoding.UTF8.GetString(response.RequestBodyInBytes));
}
// log out the response and the response body, if one exists for the type of response
_logger.LogInformation(String.Format("{0}", response.HttpStatusCode));
if (response.ResponseBodyInBytes != null)
{
_logger.LogInformation(Encoding.UTF8.GetString(response.ResponseBodyInBytes));
}
});
ElasticClient client = new ElasticClient(settings);
CreateElasticSearchIndex(client);
CreatePipeline(client);
}
private void CreateElasticSearchIndex(ElasticClient Client)
{
Client.DeleteIndex(_appSettings.ElasticSearchSettings.IndexName);
Client.CreateIndex(_appSettings.ElasticSearchSettings.IndexName, c => c
.Settings(s => s
.Analysis(a => a
.Analyzers(ad => ad
.Custom("windows_path_hierarchy_analyzer", ca => ca
.Tokenizer("windows_path_hierarchy_tokenizer")
)
)
.Tokenizers(t => t
.PathHierarchy("windows_path_hierarchy_tokenizer", ph => ph
.Delimiter('\\')
)
)
)
)
.Mappings(m => m
.Map<FSDocumentFile>(mp => mp
.AllField(all => all
.Enabled(false)
)
.Properties(ps => ps
.Number(n => n
.Name(nn => nn.Id)
)
.Text(s => s
.Name(n => n.Comment)
//.Analyzer("windows_path_hierarchy_analyzer")
)
//.Text(s => s
// .Name(n => n.Content)
//)
.Object<Attachment>(a => a
.Name(n => n.FileData)
.Properties(p => p
.Text(t => t
.Name(n => n.Name)
)
.Text(t => t
.Name(n => n.Content)
)
.Text(t => t
.Name(n => n.ContentType)
)
.Number(n => n
.Name(nn => nn.ContentLength)
)
.Date(d => d
.Name(n => n.Date)
)
.Text(t => t
.Name(n => n.Author)
)
.Text(t => t
.Name(n => n.Title)
)
.Text(t => t
.Name(n => n.Keywords)
)
)
)
)
)
)
);
}
private void CreatePipeline(ElasticClient Client)
{
Client.PutPipeline("attachments", p => p
.Description("Document attachment pipeline")
.Processors(pr => pr
.Attachment<FSDocumentFile>(a => a
.Field(f => f.FileData.Content)
.TargetField(f => f.Content)
)
.Remove<FSDocumentFile>(r => r
.Field(f => f.FileData)
)
)
);
}
这是用于索引的FSFileInfo classe的定义:
[ElasticsearchType(Name = "FSDocumentFile")]
public class FSDocumentFile
{
public int Id { get; set; }
/// <summary>
/// FileData Base64 encoded
/// </summary>
public string Content { get; set; }
[Attachment(Store = true)]
public Attachment FileData { get; set; }
public string Comment { get; set; }
}
这是我用来索引文件的代码:
FSDocumentFile fsFile = new FSDocumentFile()
{
Id = df.DocumentFileID,
FileData = new Attachment()
{
Content = Convert.ToBase64String(fd.FileBytes),
ContentType = "application/pdf",
ContentLength = fd.FileBytes.Count(),
Name = fileName
},
Comment = "TEst Comment" + df.DocumentFileID.ToString()
};
ElasticClient client = new ElasticClient(settings);
Result callResult = client.Index<FSDocumentFile>(fsFile, fi => fi.Pipeline("attachments")).Result;
它总是会导致Elasticsearch上出现错误:
[2016-11-28T15:40:28,311] [错误] [o.e.a.i.IngestActionFilter] [mU3hlQ7]无法执行管道[附件] org.elasticsearch.ElasticsearchException:java.lang.IllegalArgumentException:ElasticsearchParseException [解析字段[fileData.content]]中的文档时出错;嵌套:IllegalArgumentException [field [c ontent]不作为path [fileData.content]]的一部分出现; 在org.elasticsearch.ingest.CompoundProcessor.newCompoundProcessorException(CompoundProcessor.java:156)〜[elasticsearch-5.0.0-rc1.jar:5.0.0-rc1] 在org.elasticsearch.ingest.CompoundProcessor.execute(CompoundProcessor.java:107)〜[elasticsearch-5.0.0-rc1.jar:5.0.0-rc1] 在org.elasticsearch.ingest.Pipeline.execute(Pipeline.java:58)〜[elasticsearch-5.0.0-rc1.jar:5.0.0-rc1] at org.elasticsearch.ingest.PipelineExecutionService.innerExecute(PipelineExecutionService.java:166)~ [elasticsearch-5.0.0-rc1.jar:5.0.0-rc1] 在org.elasticsearch.ingest.PipelineExecutionService.access $ 000(PipelineExecutionService.java:41)〜[elasticsearch-5.0.0-rc1.jar:5.0.0-rc1] 在org.elasticsearch.ingest.PipelineExecutionService $ 1.doRun(PipelineExecutionService.java:65)[elasticsearch-5.0.0-rc1.jar:5.0.0-rc1] at org.elasticsearch.common.util.concurrent.ThreadContext $ ContextPreservingAbstractRunnable.doRun(ThreadContext.java:504)[elasticsearch-5.0.0-rc1.jar:5.0.0-rc1] 在org.elasticsearch.common.util.concurrent.AbstractRunnable.run(AbstractRunnable.java:37)[elasticsearch-5.0.0-rc1.jar:5.0.0-rc1] at java.util.concurrent.ThreadPoolExecutor.runWorker(Unknown Source)[?:1.8.0_112] at java.util.concurrent.ThreadPoolExecutor $ Worker.run(Unknown Source)[?:1.8.0_112] 在java.lang.Thread.run(未知来源)[?:1.8.0_112] 引起:java.lang.IllegalArgumentException:ElasticsearchParseException [解析字段[fileData.content]]中的文档时出错;嵌套:IllegalArgumentException [field [content]不作为一部分出现 path [fileData.content]];
有人可以帮助我吗?或者也许可以指出一个很好的教程如何一起使用Elasticsearch,Ingest-Mapper和NEST客户端?
提前多多感谢。