使用HTML5 File API读取pdf文件

时间:2018-08-28 09:41:11

标签: javascript html5 reactjs

我想用React制作表格并上传pdf文件。我必须执行到这里,但是现在我的应用程序需要从pdf读取数据而不保存在后端数据库等中。整个功能可以作为预检查器。 有什么建议吗?

1 个答案:

答案 0 :(得分:4)

您可以使用PDF.js通过javascript / jQuery读取PDF文件的内容。这是我的工作示例。

$("#file").on("change", function(evt){


var file = evt.target.files[0];

//Read the file using file reader
var fileReader = new FileReader();

fileReader.onload = function () {

//Turn array buffer into typed array
var typedarray = new Uint8Array(this.result);

//calling function to read from pdf file
getText(typedarray).then(function (text) {

/*Selected pdf file content is in the variable text. */
$("#content").html(text);
}, function (reason) //Execute only when there is some error while reading pdf file
{
alert('Seems this file is broken, please upload another file');
console.error(reason);
});

//getText() function definition. This is the pdf reader function.
function getText(typedarray) {

//PDFJS should be able to read this typedarray content

var pdf = PDFJS.getDocument(typedarray);
return pdf.then(function (pdf) {

// get all pages text
var maxPages = pdf.pdfInfo.numPages;
var countPromises = [];
// collecting all page promises
for (var j = 1; j <= maxPages; j++) {
var page = pdf.getPage(j);

var txt = "";
countPromises.push(page.then(function (page) {
// add page promise
var textContent = page.getTextContent();

return textContent.then(function (text) {
// return content promise
return text.items.map(function (s) {
return s.str;
}).join(''); // value page text
});
}));
}

// Wait for all pages and join text
return Promise.all(countPromises).then(function (texts) {
return texts.join('');
});
});
}
};
            //Read the file as ArrayBuffer
fileReader.readAsArrayBuffer(file);

});
<script src="https://cdnjs.cloudflare.com/ajax/libs/pdf.js/2.0.87/pdf.js"></script>

<script src="https://ajax.googleapis.com/ajax/libs/jquery/2.1.1/jquery.min.js"></script>

<body>
<input type="file" id="file" name="file" accept="application/pdf">
<br>
<p id="content"></p>
</body>