Jasper报告PDF不符合PDF / UA

时间:2019-06-12 15:46:26

标签: jasper-reports itext pdf-generation

我想使Jasper PDF / UA导出的PDF兼容,但是Jasper的局限性阻止了我这样做。客户敦促我们正确完成此任务。

PDF / UA有很多要求,包括但不限于显示标题和语言,嵌入字体以及向图像添加替代文本。到目前为止,我已经在Jaspersoft Studio中设置了所有508个PDF标签,设置了显示标题和语言,嵌入字体的属性,并在图像中添加了替换文本。我还通过Apache PDFBox将PDF / UA标识符附加到了输出PDF(即生成PDF之后)。我们将Jaspersoft Studio v6.6.0与Jasper Reports Library v6.4.0和Oracle一起用于数据库。根据我的阅读,Jasper在这方面的功能有限,这是因为由于许可问题而将itext降级到v2.1.7.js6。

<jasperReport xlmns=...>
        ... // other properties
        <property name="net.sf.jasperreports.awt.ignore.missing.font" value="false"/>
        <property name="net.sf.jasperreports.export.xls.detect.cell.type" value="false"/>
        <property name="net.sf.jasperreports.export.xls.sheet.names.all" value="REPORT SHEET NAME"/>
        <property name="net.sjasperreports.default.pdf.font.name" value="Times-Roman"/>
        <property name="net.sf.jasperreports.export.xls.ignore.graphics" value="false"/>
        <property name="net.sf.jasperreports.default.pdf.embedded" value="true"/>
        <property name="net.sf.jasperreports.export.pdf.metadata.title" value="MY REPORT TITLE"/>
        <property name="net.sf.jasperreports.export.pdf.display.metadata.title" value="true"/>
        <property name="net.sf.jasperreports.export.pdf.tagged" value="true"/>
        <property name="net.sf.jasperreports.export.pdf.tag.language" value="EN-US"/>
        ... // parameters, stored proc call, headings, etc.
        <!-- Possible PDF 508 tags to be set on text fields -->
        <property name="net.sf.jasperreports.export.pdf.tag.table" value="start"/>
        <property name="net.sf.jasperreports.export.pdf.tag.th" value="full"/>
        <property name="net.sf.jasperreports.export.pdf.tag.tr" value="start">
        <property name="net.sf.jasperreports.export.pdf.tag.td" value="full">
        <property name="net.sf.jasperreports.export.pdf.tag.tr" value="end">
        <property name="net.sf.jasperreports.export.pdf.tag.table" value="start"/>
        ...
</jasperReport>
... // other imports
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.pdmodel.common.PDMetadata;
import org.apache.xmpbox.XMPMetadata;
import org.apache.xmpbox.schema.XMPSchema;
import org.apache.xmpbox.xml.XmpSerializer;
... // more imports

public class ReportResult {
   ... // other methods

    /*
     * @param pdf - The pdf instance created from BAOS
     * @param title - Document
     * @return BAOS containing metadata (UA-identifier, title)
     */
    private ByteArrayOutputStream appendXMPMetaData(PDDocument pdf, String title) throws TransformerException, IOException {
        XMPMetadata xmp = XMPMetadata.createXMPMetadata();
        xmp.createAndAddDublinCoreSchema();
        xmp.getDublinCoreSchema().setTitle(title);
        xmp.getDublinCoreSchema().setDescription(title);
        xmp.createAndAddPDFAExtensionSchemaWithDefaultNS();
        xmp.getPDFExtensionSchema().addNamespace("http://www.aiim.org/pdfa/ns/schema#", "pdfaSchema");
        xmp.getPDFExtensionSchema().addNamespace("http://www.aiim.org/pdfa/ns/property#", "pdfaProperty");
        xmp.getPDFExtensionSchema().addNamespace("http://www.aiim.org/pdfua/ns/id/", "pdfuaid");

        XMPSchema uaSchema = new XMPSchema(XMPMetadata.createXMPMetadata(),
                "pdfaSchema", "pdfaSchema", "pdfaSchema");
        uaSchema.setTextPropertyValue("schema", "PDF/UA Universal Accessibility Schema");
        uaSchema.setTextPropertyValue("namespaceURI", "http://www.aiim.org/pdfua/ns/id/");
        uaSchema.setTextPropertyValue("prefix", "pdfuaid");

        XMPSchema uaProp = new XMPSchema(XMPMetadata.createXMPMetadata(),"pdfaProperty", "pdfaProperty", "pdfaProperty");
        uaProp.setTextPropertyValue("name", "part");
        uaProp.setTextPropertyValue("valueType", "Integer");
        uaProp.setTextPropertyValue("category", "internal");
        uaProp.setTextPropertyValue("description", "Indicates, which part of ISO 14289 standard is followed");
        uaSchema.addUnqualifiedSequenceValue("property", uaProp);

        xmp.getPDFExtensionSchema().addBagValue("schemas", uaSchema);
        xmp.getPDFExtensionSchema().setPrefix("pdfuaid");
        xmp.getPDFExtensionSchema().setTextPropertyValue("part", "1");

        XmpSerializer serializer = new XmpSerializer();
        ByteArrayOutputStream baos = new ByteArrayOutputStream();
        serializer.serialize(xmp, baos, true);

        PDMetadata metadata = new PDMetadata(pdf);
        metadata.importXMPMetadata(baos.toByteArray());
        pdf.getDocumentCatalog().setMetadata(metadata);

        ByteArrayOutputStream byteArrayOutputStream = new ByteArrayOutputStream();
        pdf.save(byteArrayOutputStream);
        pdf.close();

        return byteArrayOutputStream;
    } 

    protected void getJasperPDFDoc(ReportConfig reportConfig) throws IOException, TransformerException {

        List<ReportParameter> reportParams = reportConfig.getReportParams();

        ... // cookies and printer config

        Map imagesMap = new HashMap();
        request.getSession(true).setAttribute("IMAGES_MAP", imagesMap);

        ByteArrayOutputStream bs = ReportAccess.Instance.getInstance().generateJasperReport(
                getCurrentUserId(), getCurrentUserName(), reportConfig, "PDF",
                 reportParams, getTmpImageUri(),
                 imagesMap, rptTemplateLoc);

        if (bs != null) {
            if (reportConfig.doPrint) {
                response.setContentType("text/html");
            } else {
                log.debug("Got PDF report data");
                String fileName = getReportFileName(reportConfig) + ".pdf";
                response.setContentType("application/pdf");
                String dispositionProperty = "attachment; filename=" + fileName;
                response.setHeader("Content-disposition", dispositionProperty);
            }

            PDDocument pdf = PDDocument.load(new ByteArrayInputStream(bs.toByteArray()));
            ByteArrayOutputStream baosWithMetaData = appendXMPMetaData(pdf, reportConfig.getDisplayName());

            response.setHeader("Content-length", Integer.toString(baosWithMetaData.size()));
            ServletOutputStream os = response.getOutputStream();
            baosWithMetaData.writeTo(os);

            os.flush();
            os.close();
        } else {
            displayError("PDF");
        }
     }

     ... // other methods
}
/* REPORT MANAGER CLASS */
private static void generatePDFDoc(JasperPrint jasperPrint, ByteArrayOutputStream f) {

        try {

            JasperPrint jr = moveTableOfContents(jasperPrint);
            JRPdfExporter exporter  = new JRPdfExporter();
            exporter.setExporterInput(new SimpleExporterInput(jr));
            exporter.setExporterOutput(new SimpleOutputStreamExporterOutput(f));

            //configuration
            SimplePdfExporterConfiguration configuration = new SimplePdfExporterConfiguration();
            configuration.setCompressed(true);
            configuration.setTagged(true);
            configuration.setTagLanguage("EN-US");

            //set configuration
            exporter.setConfiguration(configuration);

            //export to PDF
            exporter.exportReport();
        } catch (Exception e) {
            log.error(e.getMessage(), e);
        }
    }

我注意到Adobe的Preflight检查器以及我们的客户端报告了一些错误,如下所示:

  1. 存在非标准标签
  2. 循环角色图
  3. 未知锚点单元格附加在每个页面的左上角
  4. 在表编辑器视图中无法正确识别表

Images显示我的问题。在这方面的任何帮助,表示赞赏。

1 个答案:

答案 0 :(得分:0)

如果您想使事情更简单但又不同,可以选择PD4ML v4。页面上有一个简约的示例:https://pd4ml.tech/pdf-ua/

它使用来自输入HTML / CSS的可用结构和元信息来生成有效的带标签的PDF / UA。

如果目标是仅通过PDF / UA文件格式验证(例如通过Adobe的Preflight检查器),则只需选择Constants.PDFUA作为输出格式就足够了。

pd4ml.writePDF(fos, Constants.PDFUA);

如果目标是生成兼容Matterhorn协议的PDF(并通过PAC3 https://www.access-for-all.ch/en/pdf-lab/pdf-accessibility-checker-pac.html进行验证),则很可能还需要对齐输入HTML:将TITLE,ALT和LANG属性添加到确保表结构和标题层次结构一致。