将Node.js流中的数据上传到ElasticSearch数据库

时间:2015-07-12 21:19:27

标签: javascript node.js elasticsearch

我当前的Node.js代码从一个非常大的USPTO专利XML文件(大约100mb)创建一个流,并在解析XML流时创建一个patentGrant对象。 patentGrant对象包括出版号,出版国,出版日期和专利种类。我正在尝试使用ElasticSearch创建一个包含所有patentGrant对象的数据库。我已成功添加代码以连接到本地ElasticSearch数据库,但我无法理解ElasticSearch-js API。我不知道如何将patentGrant对象上传到数据库。从以下tutorial和之前的stackoverflow问题我问here。好像我应该使用bulk api 继承我的ParseXml.js代码:

var CreateParsableXml = require('./CreateParsableXml.js');
var XmlParserStream = require('xml-stream');
// var Upload2ES = require('./Upload2ES.js');
var parseXml;


var es = require('elasticsearch');
var client = new es.Client({
    host: 'localhost:9200'
});


// create xml parser using xml-stream node.js module
parseXml = new XmlParserStream(CreateParsableXml.concatXmlStream('ipg140107.xml'));

parseXml.on('endElement: us-patent-grant', function(patentGrantElement) {
    var patentGrant;
    patentGrant = {
        pubNo: patentGrantElement['us-bibliographic-data-grant']['publication-reference']['document-id']['doc-number'],
        pubCountry: patentGrantElement['us-bibliographic-data-grant']['publication-reference']['document-id']['country'],
        kind: patentGrantElement['us-bibliographic-data-grant']['publication-reference']['document-id']['kind'],
        pubDate: patentGrantElement['us-bibliographic-data-grant']['publication-reference']['document-id']['date']
    };
    console.log(patentGrant);
});

parseXml.on('end', function() {
    console.log('all done');
});

1 个答案:

答案 0 :(得分:1)

批量api,正如您在链接的文档中所述,用于" index"和"删除"操作

使用创建https://www.elastic.co/guide/en/elasticsearch/client/javascript-api/current/api-reference.html#api-create

parseXml.on('endElement: us-patent-grant', function(patentGrantElement) {
    var patentGrant;
    patentGrant = {
        pubNo: patentGrantElement['us-bibliographic-data-grant']['publication-reference']['document-id']['doc-number'],
        pubCountry: patentGrantElement['us-bibliographic-data-grant']['publication-reference']['document-id']['country'],
        kind: patentGrantElement['us-bibliographic-data-grant']['publication-reference']['document-id']['kind'],
        pubDate: patentGrantElement['us-bibliographic-data-grant']['publication-reference']['document-id']['date']
    };
    client.create({
        index: 'myindex',
        type: 'mytype',
        body: patentGrant,
    }, function() {}
    )
    console.log(patentGrant);
});

没有ID,应根据https://www.elastic.co/guide/en/elasticsearch/reference/1.6/docs-index_.html#_automatic_id_generation

创建一个ID