protected void printOCROverlay() throws IOException {
if (_listOfWords == null || _listOfWords.isEmpty()) {
__log.error("No words in PDF.");
return;
}
// Get all pages as image
PDDocument pdfDocIn = PDDocument.load(new File(_inputFileName_PDF));
PDFRenderer pdfRenderer = new PDFRenderer(pdfDocIn);
ArrayList<BufferedImage> pageImages_List = new ArrayList<BufferedImage>();
for (int n = 0; n < pdfDocIn.getNumberOfPages(); n++) {
BufferedImage pageImage = null;
try {
pageImage = pdfRenderer.renderImageWithDPI(n, RENDER_DPI, ImageType.RGB);
pageImages_List.add(pageImage);
} catch (IOException e) {
e.printStackTrace();
}
}
pdfDocIn.close();
// Init new pdf-doc
PDDocument pdDocument = new PDDocument();
// Init first page
PDPage page = new PDPage(PDRectangle.A4);
pdDocument.addPage(page);
// Init content stream
PDPageContentStream contentStream = new PDPageContentStream(pdDocument, page, true, true);
contentStream.setStrokingColor(_lightGrayColor); // Bounding box color
// Draw first image
PDImageXObject pdImage = LosslessFactory.createFromImage(pdfDocIn, pageImages_List.get(_listOfWords.get(0).page-1));
contentStream.drawImage(pdImage, 0, 0, PDRectangle.A4.getWidth(), PDRectangle.A4.getHeight());
for (int i = 0; i < _listOfWords.size(); i++) {
DocElements.Word currentWord = _listOfWords.get(i);
// Create new page
if (i > 0 && currentWord.page > _listOfWords.get(i-1).page) {
contentStream.close();
// Create new page, init new content stream
page = new PDPage(PDRectangle.A4);
pdDocument.addPage(page);
contentStream = new PDPageContentStream(pdDocument, page, true, true);
contentStream.setStrokingColor(_lightGrayColor);
// Draw image of next page
pdImage = LosslessFactory.createFromImage(pdfDocIn, pageImages_List.get(currentWord.page-1));
contentStream.drawImage(pdImage, 0, 0, PDRectangle.A4.getWidth(), PDRectangle.A4.getHeight());
}
// Draw bounding box
contentStream.addRect(currentWord.bBox);
contentStream.stroke();
// Draw extracted text
contentStream.beginText();
for (int c = 0; c < currentWord.word.size(); c++) {
TextPosition currChar = currentWord.word.get(c);
PDFont wordFont = currChar.getFont();
contentStream.setNonStrokingColor(_ocrColor);
contentStream.setFont(wordFont, 1.0f);
contentStream.setTextMatrix(currChar.getTextMatrix());
// Sometimes there are unprintable symbols...
try {
contentStream.showText(currChar.toString());
} catch (IllegalArgumentException iEx) {
__log.debug("Non-supported character [IllegalArgumentException]: " + currChar.toString());
contentStream.setNonStrokingColor(_warnColor);
contentStream.setFont(_font, 1.0f);
contentStream.showText("[?]");
} catch (NullPointerException nEx) {
__log.debug("Non-supported character [NullPointerException]: " + currChar.toString());
contentStream.setNonStrokingColor(_warnColor);
contentStream.setFont(_font, 1.0f);
contentStream.showText("[?]");
} catch (UnsupportedOperationException usEx) {
__log.debug("Non-supported character [UnsupportedOperationException]: " + currChar.toString());
contentStream.setNonStrokingColor(_warnColor);
contentStream.setFont(_font, 1.0f);
contentStream.showText("[?]");
}
}
contentStream.endText();
}
contentStream.close();
String outputPath = _outputFolder + getFileNameWithoutExt(_inputFileName_PDF) + "_ocr.pdf";
__log.info("Trying to save reconstructed file to: " + outputPath);
pdDocument.save(outputPath);
pdDocument.close();
}
此重定向规则无法在#,
之后读取字符串请帮帮我
答案 0 :(得分:0)
要为重写的网址使用锚链接(或一般特殊字符),您可以使用NE flag。
RewriteRule ^adminDashboard/(.*)$ "PL_5_25_2015/index.php#admin/$1" [NE]