我正在尝试根据我在此处学到的内容编制索引并搜索PDF: ElasticSearch & attachment type (NEST C#)
但是我收到了“状态代码400,没有提供内容”错误。 pdf的大小约为7KB,大约只有我自己制作的一个,只有一些文字。
连连呢?代码和输出如下。谢谢!
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Threading.Tasks;
using Nest;
using System.IO;
using System.Threading;
namespace SearchPDFConsole
{
class Program
{
static void Main(string[] args)
{
// create es client
string index = "pdftestitems";
Console.WriteLine("client stuff...");
var node = new Uri("http://tns-dev.pts-eden.org:9200");
var settings = new ConnectionSettings(node).SetDefaultIndex(index);
var client = new ElasticClient(settings);
Console.WriteLine("deleting index...");
// delete index if any
var di = client.DeleteIndex(index);
Console.WriteLine(di.ConnectionStatus.HttpStatusCode);
Console.WriteLine("creating index...");
// Create your index explicitly before you index any instances of your class. If you don't do this, it will use dynamic mapping and ignore your attribute mapping. If you change your mapping in the future, always recreate the index.
var ci = client.CreateIndex(index, c => c.AddMapping<Document>(m => m.MapFromAttributes()));
Console.WriteLine(di.ConnectionStatus.HttpStatusCode);
string path = "test2.pdf";
var attachment = new Attachment();
attachment.Content = Convert.ToBase64String(File.ReadAllBytes(path));
attachment.ContentType = "application/pdf";
attachment.Name = "test2.pdf";
var doc = new Document()
{
ID = 2,
Title = "test2",
Content = "This is a test."
};
var doc2 = new Document()
{
ID = 1,
Title = "test",
Content = "good luck",
File = attachment
};
Console.WriteLine("Indexing document 1...");
var status = client.Index<Document>(doc);
Console.WriteLine(status.ConnectionStatus);
Console.WriteLine("Indexing document 2...");
var status2 = client.Index<Document>(doc2);
Console.WriteLine(status2.ConnectionStatus);
Console.WriteLine("sleeping 1s...");
Thread.Sleep(1000);
string stringsearch = "test";
//var searchResults = client.Search<Document>(s => s.Type("document").Query(qs => qs.QueryString(q => q.Query(stringsearch))));
var searchResults = client.Search<Document>(s => s.Query(qs => qs.QueryString(q => q.Query(stringsearch))));
Console.WriteLine(searchResults.Documents.Count());
foreach (var sd in searchResults.Documents)
{
Console.WriteLine(sd.Title);
}
}
}
[ElasticType(Name = "document")]
public class Document
{
public int ID { get; set; }
[ElasticProperty(Store = true)]
public string Title { get; set; }
[ElasticProperty(Store = true)]
public string Content { get; set; }
[ElasticProperty(Type = FieldType.Attachment, TermVector = TermVectorOption.WithPositionsOffsets, Store = true)]
public Attachment File { get; set; }
}
public class Attachment
{
[ElasticProperty(Name = "_content")]
public string Content { get; set; }
[ElasticProperty(Name = "_content_type")]
public string ContentType { get; set; }
[ElasticProperty(Name = "_name")]
public string Name { get; set; }
}
}
我的节目输出:
C:\PROGRAMMING\SearchPDFTest\SearchPDFConsole\bin\Debug>SearchPDFConsole.exe
client stuff...
deleting index...
200
creating index...
200
Indexing document 1...
StatusCode: 201,
Method: PUT,
Url: http://tns-dev.pts-eden.org:9200/pdftestitems/document/2,
Request: {
"iD": 2,
"title": "test2",
"content": "This is a test."
},
Response: <Response stream not captured or already read to completion by
serializer, set ExposeRawResponse() on connectionsettings to force it to be set
on>
Indexing document 2...
StatusCode: 400,
Method: PUT,
Url: http://tns-dev.pts-eden.org:9200/pdftestitems/document/1,
Request: {
"iD": 1,
"title": "test",
"content": "good luck",
"file": {
"_content": "JVBERi0xLjYNJeLjz9MNCjE5IDAgb2JqDTw8L0ZpbHRlci9GbGF0ZURlY29kZS9
GaXJzdCA5L0xlbmd0aCAxMzkvTiAyL1R5cGUvT2JqU3RtPj5zdHJlYW0NCmjeTM3BCsIwDAbgV/mfwLQ
brQijB3cUYQxvY4figuzSjrYDfXvbenCH/JCEL2khoKAVuo56v7sESbd1iZPKi3GmOy+rvfr3JE4CpbR
scp4vOWd6fDamwb44GlMPuMQuRcimaOqD36pFdUUdLx6ngw1Zoq1u5Oj38OQI/et9sokh/v+M+QowAEh
[LOTS MORE BASE64 ENCODED STUFF]
mL1dbMSAyIDFdPj5zdHJlYW0NCmjeYmIAAiZGpmUMTAwMPkDWP0UGpv/8a4Gsj8GMQDHG/yACxGIAsRi
YIaz/Aun/gKwaoDamDJDeqSBWIZBgfAoiZoAIRyDx8g1I9iWIuwpISGkyAAQYAArYEhcNCmVuZHN0cmV
hbQ1lbmRvYmoNc3RhcnR4cmVmDQo3MTE2DQolJUVPRg0K",
"_content_type": "application/pdf",
"_name": "test2.pdf"
}
},
Response: <Response stream not captured or already read to completion by
serializer, set ExposeRawResponse() on connectionsettings to force it to be set
on>
ExceptionMessage: No content is provided.
StackTrace:
sleeping 1s...
1
test2
答案 0 :(得分:1)
我使用在Word中创建的简单PDF来尝试您的代码,它似乎对我来说很好。
我正在使用elasticsearch-mapper-attachments 2.4.3运行ES 1.4.4。 两个索引操作都返回正确的2xx状态代码,之后我就可以搜索PDF了。
您能确保PDF正确吗?尝试从另一个程序创建一个新程序吗?