如何使用带有文件输入的pdf.js从PDF中提取文本

时间:2019-05-27 14:25:38

标签: javascript pdf.js

我想从通过文件输入从本地设备选择的PDF中提取文本。我尝试使用pdf.js并阅读了许多有关它的问题。但我无法在代码中实现。我从此链接获得了一些代码 http://hublog.hubmed.org/archives/001948.html 我正在使用这项服务。 http://hubgit.github.com/2011/11/pdftotext/ 我已经修改了该代码以使用文件输入。我的代码是     

<!-- edit this; the PDF file must be on the same domain as this page -->
<iframe id="input" style="display:none"></iframe>

<!-- embed the pdftotext service as an iframe -->
<iframe id="processor" src="http://hubgit.github.com/2011/11/pdftotext/"></iframe>

<!-- a container for the output -->
<div id="output"></div>

<button onclick="chk()">click</button>
<script>
var input = document.getElementById("input");
var processor = document.getElementById("processor");
var output = document.getElementById("output");

var files = document.getElementById("files");

function chk() {
input.src = URL.createObjectURL(files.files[0]);
// listen for messages from the processor
window.addEventListener("message", function(event){
if (event.source != processor.contentWindow) return;

switch (event.data){
// "ready" = the processor is ready, so fetch the PDF file
case "ready":
var xhr = new XMLHttpRequest;
xhr.open('GET', input.getAttribute("src"), true);
xhr.responseType = "arraybuffer";
xhr.onload = function(event) {
processor.contentWindow.postMessage(this.response, "*");
};
xhr.send();
break;

// anything else = the processor has returned the text of the PDF
default:
output.textContent = event.data.replace(/\s+/g, " ");
break;
}
}, true);
//alert("and");

}
</script>

问题是选择文件后,每当我单击浏览器停止按钮时,请更正此代码并指导我谢谢

0 个答案:

没有答案