如何使PDF中的文字可选?
在这里尝试过。 PDF写得很好,但没有文字选择
https://github.com/mozilla/pdf.js
https://github.com/mozilla/pdf.js/blob/master/web/text_layer_builder.css
https://github.com/mozilla/pdf.js/blob/master/web/text_layer_builder.js
'use strict';
PDFJS.getDocument('file.pdf').then(function(pdf){
var page_num = 1;
pdf.getPage(page_num).then(function(page){
var scale = 1.5;
var viewport = page.getViewport(scale);
var canvas = document.getElementById('the-canvas');
var context = canvas.getContext('2d');
canvas.height = viewport.height;
canvas.width = viewport.width;
var canvasOffset = $(canvas).offset();
var $textLayerDiv = $('#text-layer').css({
height : viewport.height+'px',
width : viewport.width+'px',
top : canvasOffset.top,
left : canvasOffset.left
});
page.render({
canvasContext : context,
viewport : viewport
});
page.getTextContent().then(function(textContent){
var textLayer = new TextLayerBuilder({
textLayerDiv : $textLayerDiv.get(0),
pageIndex : page_num - 1,
viewport : viewport
});
textLayer.setTextContent(textContent);
textLayer.render();
});
});
});
<body>
<div>
<canvas id="the-canvas" style="border:1px solid black;"></canvas>
<div id="text-layer" class="textLayer"></div>
</div>
</body>
答案 0 :(得分:13)
您的JavaScript代码非常完美。 您只需要包含Text Layer Builder所依赖的UI实用程序:
https://github.com/mozilla/pdf.js/blob/master/web/ui_utils.js
或在HTML中:
<script src="https://raw.githubusercontent.com/mozilla/pdf.js/master/web/ui_utils.js"></script>
如果您运行代码(没有ui_utils)并检查debug console,
你会看到ReferenceError: CustomStyle is not defined
。
PDFjs quick search中的repo将显示它在ui_utils.js中定义。
这是我的最小但完整的代码供您参考。 我正在使用PDFjs的演示pdf here。 请注意,在生产中,您不应链接到raw.github。
<!DOCTYPE html><meta charset="utf-8">
<link rel="stylesheet" href="https://raw.githubusercontent.com/mozilla/pdf.js/master/web/text_layer_builder.css" />
<script src="https://ajax.googleapis.com/ajax/libs/jquery/2.1.4/jquery.min.js"></script>
<script src="https://raw.githubusercontent.com/mozilla/pdf.js/master/web/ui_utils.js"></script>
<script src="https://raw.githubusercontent.com/mozilla/pdf.js/master/web/text_layer_builder.js"></script>
<script src="https://mozilla.github.io/pdf.js/build/pdf.js"></script>
<body>
<div>
<canvas id="the-canvas" style="border:1px solid black;"></canvas>
<div id="text-layer" class="textLayer"></div>
</div>
<script>
'use strict';
PDFJS.getDocument('file.pdf').then(function(pdf){
var page_num = 1;
pdf.getPage(page_num).then(function(page){
var scale = 1.5;
var viewport = page.getViewport(scale);
var canvas = $('#the-canvas')[0];
var context = canvas.getContext('2d');
canvas.height = viewport.height;
canvas.width = viewport.width;
var canvasOffset = $(canvas).offset();
var $textLayerDiv = $('#text-layer').css({
height : viewport.height+'px',
width : viewport.width+'px',
top : canvasOffset.top,
left : canvasOffset.left
});
page.render({
canvasContext : context,
viewport : viewport
});
page.getTextContent().then(function(textContent){
console.log( textContent );
var textLayer = new TextLayerBuilder({
textLayerDiv : $textLayerDiv.get(0),
pageIndex : page_num - 1,
viewport : viewport
});
textLayer.setTextContent(textContent);
textLayer.render();
});
});
});
</script>
答案 1 :(得分:6)
在 pdf.js 版本 2.8.61 上,检查的答案不再起作用,因为 renderTextLayer() 已集成到 pdf.js,不再需要外部源,也不需要 jQuery。
以下代码将使 PDF 文本可选。它加载以下PDF文档为例,请替换为您自己的:
https://raw.githubusercontent.com/mozilla/pdf.js/ba2edeae/web/compressed.tracemonkey-pldi-09.pdf
主要使用两个html元素:
<canvas id="the-canvas"></canvas>
<div class="textLayer"></div>
canvas 用于显示不可选择的文档,.textLayer div 用于可选文本。 .textLayer div 上的文字都是透明的,所以不可见,只提供选择效果。
<!DOCTYPE html>
<html>
<head>
<meta http-equiv="Content-Type" content="text/html; charset=UTF-8" />
<meta name="viewport" content="width=device-width,initial-scale=1.0,maximum-scale=1.0,user-scalable=no">
<script src="//mozilla.github.io/pdf.js/build/pdf.js" crossorigin="anonymous"></script>
<link href="//mozilla.github.io/pdf.js/web/viewer.css" rel="stylesheet" type="text/css" />
<style type="text/css">
#the-canvas {
border: 1px solid black;
direction: ltr;
}
</style>
</head>
<body>
<h1>PDF.js Previous/Next example</h1>
<div>
<button id="prev">Previous</button>
<button id="next">Next</button>
<span>Page: <span id="page_num"></span> / <span id="page_count"></span></span>
</div>
<canvas id="the-canvas"></canvas>
<div class="textLayer"></div>
<script>
// If absolute URL from the remote server is provided, configure the CORS
// header on that server.
var url = '//raw.githubusercontent.com/mozilla/pdf.js/ba2edeae/web/compressed.tracemonkey-pldi-09.pdf';
// Loaded via <script> tag, create shortcut to access PDF.js exports.
var pdfjsLib = window['pdfjs-dist/build/pdf'];
// The workerSrc property shall be specified.
pdfjsLib.GlobalWorkerOptions.workerSrc = '//mozilla.github.io/pdf.js/build/pdf.worker.js';
var pdfDoc = null,
pageNum = 1,
pageRendering = false,
pageNumPending = null,
//scale = 0.8,
scale = 1,
canvas = document.getElementById('the-canvas'),
ctx = canvas.getContext('2d');
/**
* Get page info from document, resize canvas accordingly, and render page.
* @param num Page number.
*/
function renderPage(num) {
pageRendering = true;
// Using promise to fetch the page
pdfDoc.getPage(num).then(function(page) {
var viewport = page.getViewport({scale: scale});
canvas.height = viewport.height;
canvas.width = viewport.width;
// Render PDF page into canvas context
var renderContext = {
canvasContext: ctx,
viewport: viewport
};
var renderTask = page.render(renderContext);
// Wait for rendering to finish
renderTask.promise.then(function() {
pageRendering = false;
if (pageNumPending !== null) {
// New page rendering is pending
renderPage(pageNumPending);
pageNumPending = null;
}
}).then(function() {
// Returns a promise, on resolving it will return text contents of the page
return page.getTextContent();
}).then(function(textContent) {
// Assign CSS to the textLayer element
var textLayer = document.querySelector(".textLayer");
textLayer.style.left = canvas.offsetLeft + 'px';
textLayer.style.top = canvas.offsetTop + 'px';
textLayer.style.height = canvas.offsetHeight + 'px';
textLayer.style.width = canvas.offsetWidth + 'px';
// Pass the data to the method for rendering of text over the pdf canvas.
pdfjsLib.renderTextLayer({
textContent: textContent,
container: textLayer,
viewport: viewport,
textDivs: []
});
});
});
// Update page counters
document.getElementById('page_num').textContent = num;
}
/**
* If another page rendering in progress, waits until the rendering is
* finised. Otherwise, executes rendering immediately.
*/
function queueRenderPage(num) {
if (pageRendering) {
pageNumPending = num;
} else {
renderPage(num);
}
}
/**
* Displays previous page.
*/
function onPrevPage() {
if (pageNum <= 1) {
return;
}
pageNum--;
queueRenderPage(pageNum);
}
document.getElementById('prev').addEventListener('click', onPrevPage);
/**
* Displays next page.
*/
function onNextPage() {
if (pageNum >= pdfDoc.numPages) {
return;
}
pageNum++;
queueRenderPage(pageNum);
}
document.getElementById('next').addEventListener('click', onNextPage);
/**
* Asynchronously downloads PDF.
*/
pdfjsLib.getDocument(url).promise.then(function(pdfDoc_) {
pdfDoc = pdfDoc_;
document.getElementById('page_count').textContent = pdfDoc.numPages;
// Initial/first page rendering
renderPage(pageNum);
});
</script>
</body>
</html>
答案 2 :(得分:3)
经过几个小时的努力,我发现这篇文章对于选择文本和使用没有节点的pdf.js非常有帮助。 Custom PDF Rendering in JavaScript with Mozilla’s PDF.Js
答案 3 :(得分:-1)
您好,您已在HTML内容中创建了画布。
Canvas不支持文本选择,因此您需要将画布更改为其他方式。