使用pdf-table-extractor包循环文件

时间:2018-03-10 07:27:33

标签: json node.js

我有一个pdf文件列表,我想从这些文件中提取表格。 所以我使用pdf-table-extractor来做到这一点。

如果我只有一个pdf文件,我可以使用此代码:

var url = {title: 'Italy-2016-Vaccine coverage 24 months', filename: 'ita-2016-24m.pdf', link: 'http://www.salute.gov.it/imgs/C_17_tavole_20_allegati_iitemAllegati_0_fileAllegati_itemFile_5_file.pdf'};

function pdfToJson() {
    var inputFile = './output/ita/' + url.filename;
    pdf_table_extractor(inputFile, success, error);
} 

function success(result) {
    var out = './output/ita' + url.filename;
    var stream = fs.createWriteStream(out);
    stream.once('open', function(fd) {
        stream.write(JSON.stringify(result));
        stream.end();
    });
} 

function error(err) {
    console.error('Error: ' + err);
}

但我有17个pdf文件,所以我不得不循环浏览这些文件。 问题是在success()函数中定义输出文件名。 这是我的代码:

var urls = [
    {title: 'Italy-2016-Vaccine coverage 24 months', filename: 'ita-2016-24m.pdf', link: 'http://www.salute.gov.it/imgs/C_17_tavole_20_allegati_iitemAllegati_0_fileAllegati_itemFile_5_file.pdf'},
    {title: 'Italy-2015-Vaccine coverage 24 months', filename: 'ita-2015-24m.pdf', link: 'http://www.salute.gov.it/imgs/C_17_tavole_20_allegati_iitemAllegati_0_fileAllegati_itemFile_3_file.pdf'}, 
    {title: 'Italy-2014-Vaccine coverage 24 months', filename: 'ita-2014-24m.pdf', link: 'http://www.salute.gov.it/imgs/C_17_tavole_20_allegati_iitemAllegati_0_fileAllegati_itemFile_2_file.pdf'}, 
    {title: 'Italy-2013-Vaccine coverage 24 months', filename: 'ita-2013-24m.pdf', link: 'http://www.salute.gov.it/imgs/C_17_tavole_20_allegati_iitemAllegati_0_fileAllegati_itemFile_1_file.pdf'}, 
    {title: 'Italy-2012-Vaccine coverage 24 months', filename: 'ita-2012-24m.pdf', link: 'http://www.salute.gov.it/imgs/C_17_tavole_20_allegati_iitemAllegati_5_fileAllegati_itemFile_0_file.pdf'}, 
    {title: 'Italy-2011-Vaccine coverage 24 months', filename: 'ita-2011-24m.pdf', link: 'http://www.salute.gov.it/imgs/C_17_tavole_20_allegati_iitemAllegati_6_fileAllegati_itemFile_0_file.pdf'}, 
    {title: 'Italy-2010-Vaccine coverage 24 months', filename: 'ita-2010-24m.pdf', link: 'http://www.salute.gov.it/imgs/C_17_tavole_20_allegati_iitemAllegati_7_fileAllegati_itemFile_0_file.pdf'}, 
    {title: 'Italy-2009-Vaccine coverage 24 months', filename: 'ita-2009-24m.pdf', link: 'http://www.salute.gov.it/imgs/C_17_tavole_20_allegati_iitemAllegati_8_fileAllegati_itemFile_0_file.pdf'}, 
    {title: 'Italy-2008-Vaccine coverage 24 months', filename: 'ita-2008-24m.pdf', link: 'http://www.salute.gov.it/imgs/C_17_tavole_20_allegati_iitemAllegati_15_fileAllegati_itemFile_0_file.pdf'}, 
    {title: 'Italy-2007-Vaccine coverage 24 months', filename: 'ita-2007-24m.pdf', link: 'http://www.salute.gov.it/imgs/C_17_tavole_20_allegati_iitemAllegati_14_fileAllegati_itemFile_0_file.pdf'}, 
    {title: 'Italy-2006-Vaccine coverage 24 months', filename: 'ita-2006-24m.pdf', link: 'http://www.salute.gov.it/imgs/C_17_tavole_20_allegati_iitemAllegati_13_fileAllegati_itemFile_0_file.pdf'}, 
    {title: 'Italy-2005-Vaccine coverage 24 months', filename: 'ita-2005-24m.pdf', link: 'http://www.salute.gov.it/imgs/C_17_tavole_20_allegati_iitemAllegati_12_fileAllegati_itemFile_0_file.pdf'}, 
    {title: 'Italy-2004-Vaccine coverage 24 months', filename: 'ita-2004-24m.pdf', link: 'http://www.salute.gov.it/imgs/C_17_tavole_20_allegati_iitemAllegati_11_fileAllegati_itemFile_0_file.pdf'}, 
    {title: 'Italy-2003-Vaccine coverage 24 months', filename: 'ita-2003-24m.pdf', link: 'http://www.salute.gov.it/imgs/C_17_tavole_20_allegati_iitemAllegati_10_fileAllegati_itemFile_0_file.pdf'}, 
    {title: 'Italy-2002-Vaccine coverage 24 months', filename: 'ita-2002-24m.pdf', link: 'http://www.salute.gov.it/imgs/C_17_tavole_20_allegati_iitemAllegati_9_fileAllegati_itemFile_0_file.pdf'}, 
    {title: 'Italy-2001-Vaccine coverage 24 months', filename: 'ita-2001-24m.pdf', link: 'http://www.salute.gov.it/imgs/C_17_tavole_20_allegati_iitemAllegati_1_fileAllegati_itemFile_0_file.pdf'}, 
    {title: 'Italy-2000-Vaccine coverage 24 months', filename: 'ita-2000-24m.pdf', link: 'http://www.salute.gov.it/imgs/C_17_tavole_20_allegati_iitemAllegati_0_fileAllegati_itemFile_0_file.pdf'}
];

function pdfToJson() {
    urls.forEach(function(url) {
        var inputFile = './output/ita/' + url.filename;
        pdf_table_extractor(inputFile, success, error);
    });
}

function success(result) {
    var out = './output/ita' + ???; // <-- PROBLEM HERE
    var stream = fs.createWriteStream(out);
    stream.once('open', function(fd) {
        stream.write(JSON.stringify(result));
        stream.end();
    });
} 

function error(err) {
    console.error('Error: ' + err);
}

要编写此代码的结构,我按照找到here的示例进行了编写,但我并不十分喜欢代码的组织方式。

有人知道如何帮助我吗?

1 个答案:

答案 0 :(得分:1)

我希望以下答案可以解决您的问题。

var urls = [
{title: 'Italy-2016-Vaccine coverage 24 months', filename: 'ita-2016-24m.pdf', link: 'http://www.salute.gov.it/imgs/C_17_tavole_20_allegati_iitemAllegati_0_fileAllegati_itemFile_5_file.pdf'},
{title: 'Italy-2015-Vaccine coverage 24 months', filename: 'ita-2015-24m.pdf', link: 'http://www.salute.gov.it/imgs/C_17_tavole_20_allegati_iitemAllegati_0_fileAllegati_itemFile_3_file.pdf'}, 
{title: 'Italy-2014-Vaccine coverage 24 months', filename: 'ita-2014-24m.pdf', link: 'http://www.salute.gov.it/imgs/C_17_tavole_20_allegati_iitemAllegati_0_fileAllegati_itemFile_2_file.pdf'}, 
{title: 'Italy-2013-Vaccine coverage 24 months', filename: 'ita-2013-24m.pdf', link: 'http://www.salute.gov.it/imgs/C_17_tavole_20_allegati_iitemAllegati_0_fileAllegati_itemFile_1_file.pdf'}, 
{title: 'Italy-2012-Vaccine coverage 24 months', filename: 'ita-2012-24m.pdf', link: 'http://www.salute.gov.it/imgs/C_17_tavole_20_allegati_iitemAllegati_5_fileAllegati_itemFile_0_file.pdf'}, 
{title: 'Italy-2011-Vaccine coverage 24 months', filename: 'ita-2011-24m.pdf', link: 'http://www.salute.gov.it/imgs/C_17_tavole_20_allegati_iitemAllegati_6_fileAllegati_itemFile_0_file.pdf'}, 
{title: 'Italy-2010-Vaccine coverage 24 months', filename: 'ita-2010-24m.pdf', link: 'http://www.salute.gov.it/imgs/C_17_tavole_20_allegati_iitemAllegati_7_fileAllegati_itemFile_0_file.pdf'}, 
{title: 'Italy-2009-Vaccine coverage 24 months', filename: 'ita-2009-24m.pdf', link: 'http://www.salute.gov.it/imgs/C_17_tavole_20_allegati_iitemAllegati_8_fileAllegati_itemFile_0_file.pdf'}, 
{title: 'Italy-2008-Vaccine coverage 24 months', filename: 'ita-2008-24m.pdf', link: 'http://www.salute.gov.it/imgs/C_17_tavole_20_allegati_iitemAllegati_15_fileAllegati_itemFile_0_file.pdf'}, 
{title: 'Italy-2007-Vaccine coverage 24 months', filename: 'ita-2007-24m.pdf', link: 'http://www.salute.gov.it/imgs/C_17_tavole_20_allegati_iitemAllegati_14_fileAllegati_itemFile_0_file.pdf'}, 
{title: 'Italy-2006-Vaccine coverage 24 months', filename: 'ita-2006-24m.pdf', link: 'http://www.salute.gov.it/imgs/C_17_tavole_20_allegati_iitemAllegati_13_fileAllegati_itemFile_0_file.pdf'}, 
{title: 'Italy-2005-Vaccine coverage 24 months', filename: 'ita-2005-24m.pdf', link: 'http://www.salute.gov.it/imgs/C_17_tavole_20_allegati_iitemAllegati_12_fileAllegati_itemFile_0_file.pdf'}, 
{title: 'Italy-2004-Vaccine coverage 24 months', filename: 'ita-2004-24m.pdf', link: 'http://www.salute.gov.it/imgs/C_17_tavole_20_allegati_iitemAllegati_11_fileAllegati_itemFile_0_file.pdf'}, 
{title: 'Italy-2003-Vaccine coverage 24 months', filename: 'ita-2003-24m.pdf', link: 'http://www.salute.gov.it/imgs/C_17_tavole_20_allegati_iitemAllegati_10_fileAllegati_itemFile_0_file.pdf'}, 
{title: 'Italy-2002-Vaccine coverage 24 months', filename: 'ita-2002-24m.pdf', link: 'http://www.salute.gov.it/imgs/C_17_tavole_20_allegati_iitemAllegati_9_fileAllegati_itemFile_0_file.pdf'}, 
{title: 'Italy-2001-Vaccine coverage 24 months', filename: 'ita-2001-24m.pdf', link: 'http://www.salute.gov.it/imgs/C_17_tavole_20_allegati_iitemAllegati_1_fileAllegati_itemFile_0_file.pdf'}, 
{title: 'Italy-2000-Vaccine coverage 24 months', filename: 'ita-2000-24m.pdf', link: 'http://www.salute.gov.it/imgs/C_17_tavole_20_allegati_iitemAllegati_0_fileAllegati_itemFile_0_file.pdf'}];

function pdfToJson() {
  urls.forEach(function(url) {
    var inputFile = './output/ita/' + url.filename;
    pdf_table_extractor(inputFile, function(result){

      success(url,result);

    }, function(error){

      error(url,error);

    });
  });
}

/*function pdfToJson() {
   urls.forEach(function(url) {
      var inputFile = './output/ita/' + url.filename;
      pdf_table_extractor(inputFile, success, error);
   });
}*/

function success(url,result) {
   var out = './output/ita' + url.filename; // <-- PROBLEM HERE
   var stream = fs.createWriteStream(out);
    stream.once('open', function(fd) {
      stream.write(JSON.stringify(result));
      stream.end();
    });
 } 

 function error(url,err) {
  console.error('Error: ' + err);
 }