我有一个自动化PDF内容的场景。如何在nodejs中检索PDF文件的内容。
我完全被阻止了。虽然pdf2jsona
和jsonreader
上的帖子很少,但这些帖子对我不起作用。任何帮助将不胜感激。
var pdfParser = new PDFParser();
fs.readFile(pdfFilePath, function(err, pdfBuffer) {
pdfParser.parseBuffer(pdfBuffer);
}, function(pdfBuffer){
pdfParser.parseBuffer(pdfBuffer);
})
错误:参数数组无效,需要.data或.url 在FSReqWrap.readFileAfterClose [as oncomplete](fs.js:445:3)
答案 0 :(得分:1)
var fs = require("fs");
var PdfReader = require('pdfreader').PdfReader;
fs.readFile("E://file streaming in node js//demo//read.pdf", (err, pdfBuffer) => {
// pdfBuffer contains the file content
new PdfReader().parseBuffer(pdfBuffer, function(err, item){
if (err)
callback(err);
else if (!item)
callback();
else if (item.text)
console.log(item.text);
});
});
答案 1 :(得分:0)
我找到了答案并且工作正常。通过运行以下命令安装fs和pdf2json。 npm install pdf2json和npm install fs`
var fs=require('fs');
var PDFParser=require('pdf2json');
var path = osHomedir();
var homepath = path.replace(new RegExp('\\' + path.sep, 'g'), '/');
var pdfFilePath = homepath + '/Downloads/' + filename.pdf';
if (fs.existsSync(pdfFilePath)) {
//Read the content of the pdf from the downloaded path
var pdfParser = new PDFParser(browser, 1);
pdfParser.on("pdfParser_dataError", function (errData) {
console.error(errData.parserError)
});
pdfParser.on("pdfParser_dataReady", function (pdfData) {
//console.log('here is the content: '+pdfParser.getRawTextContent());
browser.assert.ok(pdfParser.getRawTextContent().indexOf(textToVerify) > -1);
});
pdfParser.loadPDF(pdfFilePath);
} else {
console.log('OOPs file not present in the downloaded folder');
//Throw an error if the file is not found in the path mentioned
browser.assert.ok(fs.existsSync(pdfFilePath));
}