在itext7中,pdf阅读器中是否存在getJavascript的等效方法?我们正在寻找使用itext7对pdf文档进行恶意内容清理。
答案 0 :(得分:2)
据我所知,在iText 7中没有专门的方法。
基本上,旧的PdfReader.getJavaScript()
方法只查找 JavaScript 名称树,并将所有值放入字符串缓冲区。
您可以在iText 7中输出这些值:
PdfNameTree javascript = pdfDocument.getCatalog().getNameTree(PdfName.JavaScript);
Map<String, PdfObject> objs2 = javascript.getNames();
for (Map.Entry<String, PdfObject> entry : objs2.entrySet())
{
System.out.println();
System.out.println(entry.getKey());
System.out.println();
PdfObject object = entry.getValue();
if (object.isDictionary()) {
object = ((PdfDictionary)object).get(PdfName.JS);
if (object.isString()) {
System.out.println(((PdfString)object).getValue());
} else if (object.isStream()) {
System.out.println(new String(((PdfStream)object).getBytes()));
}
}
System.out.println();
}
(ShowDocumentLevelJavaScript test testREJECT_ContainsJavaScript
)
显然,您可以以类似的方式将JavaScript片段收集到某个字符串缓冲区中。
在James声明的评论中
我尝试使用(并扩展)您的答案,但无法检测打开示例PDF时触发的JavaScript弹出窗口
将上述代码应用于@James提供的PDF文件,我得到输出:
e.pdf Freeware Hinweis
if (app.viewerVersion>=5)
{
var result=app.alert(
"Diese Datei wurde mit der Freeware Version von CIB e.pdf erzeugt.\n\nMöchten Sie nähere Informationen?"
, 3
, 2
, "e.pdf Freeware Hinweis"
);
if (result==4)
getURL("http://www.cib.de/deutsch/products/pdfplugin/epdfbeta.asp", false);
}
JavaScript弹出窗口在这里可以清楚地看作是app.alert
。因此,我无法重现这个问题。
答案 1 :(得分:0)
对于任何能够使用iText 7.1.1或更新版本的人,我希望mkl的解决方案更好。如果您被迫像我一样使用iText 7.0.5,以下内容适用于我的参考PDF:
private static boolean hasJavascript(PdfDocument pdfDoc, String theFile) {
int n = pdfDoc.getNumberOfPages();
for (int i = 1; i <= n; i++) {
PdfPage pdfPage = pdfDoc.getPage(i);
List<PdfAnnotation> annotList = pdfPage.getAnnotations();
if (ListUtility.hasData(annotList)) {
for (PdfAnnotation annot : annotList) {
if (annot.getSubtype().equals(PdfName.Link)) {
continue;
}
PdfDictionary annotationAction = annot.getPdfObject().getAsDictionary(PdfName.A);
if (annotationAction != null && PdfName.JavaScript.equals(annotationAction.get(PdfName.S))) {
PdfString javascript = annotationAction.getAsString(PdfName.JS);
if (StringUtility.hasData(javascript.getValue())) {
log.debug("JavaScript found in PDF on page " + i);
log.trace(javascript.getValue());
return true;
}
}
}
}
}
String javaScriptInPdf = getJavaScriptFromPdfDocument(pdfDoc);
if (StringUtility.hasData(javaScriptInPdf)) {
log.debug("JavaScript found using iText 7");
log.trace(javaScriptInPdf);
return true;
}
log.debug("JavaScript not found in PDF");
return false;
}
//
private static String getJavaScriptFromPdfDocument(PdfDocument pdfDoc) {
StringBuilder strBuf = new StringBuilder();
try {
PdfDictionary pdfDictionaryCatalog = pdfDoc.getCatalog().getPdfObject();
if (pdfDictionaryCatalog == null) {
log.trace("getJavaScriptFromPdfDocument(): pdfDictionaryCatalog null; return null");
return null;
}
PdfDictionary pdfDictionaryNames = pdfDictionaryCatalog.getAsDictionary(PdfName.Names);
if (pdfDictionaryNames == null) {
log.trace("getJavaScriptFromPdfDocument(): PdfDictionary for PdfName.Names null; return null");
return null;
}
PdfDictionary pdfDictionaryJavaScript = pdfDictionaryNames.getAsDictionary(PdfName.JavaScript);
if (pdfDictionaryJavaScript == null) {
log.trace("getJavaScriptFromPdfDocument(): PdfDictionary for PdfName.JavaScript null; return null");
return null;
}
Set<Entry<PdfName, PdfObject>> set = pdfDictionaryJavaScript.entrySet();
for (Entry<PdfName, PdfObject> pdfObjectEntry : set) {
PdfObject pdfObj = pdfObjectEntry.getValue();
if (pdfObj.isDictionary()) {
getJavaScriptFromPdfDictionary((PdfDictionary)pdfObj, strBuf);
} else if (pdfObj.isArray()) {
getJavaScriptFromPdfArray((PdfArray)pdfObj, strBuf);
} else if (pdfObj.isString() && pdfObjectEntry.getKey().getValue().equals(PdfName.JS.getValue())) {
strBuf.append(((PdfString) pdfObj).getValue());
}
}
}
catch (Exception e) {
log.debug(e,e);
}
return strBuf.toString();
}
//
private static void getJavaScriptFromPdfArray(PdfArray pdfArray, StringBuilder strBuf) {
if (pdfArray == null) {
return;
}
for (PdfObject pdfObj : pdfArray) {
// To get same output as getJavaScriptUsingiText559(), not appending String values found in array to strBuf
if (pdfObj == null) {
continue;
}
else if (pdfObj.isDictionary()) {
getJavaScriptFromPdfDictionary((PdfDictionary)pdfObj, strBuf);
}
else if (pdfObj.isArray()) {
getJavaScriptFromPdfArray((PdfArray)pdfObj, strBuf);
}
}
}
//
private static void getJavaScriptFromPdfDictionary(PdfDictionary pdfDict, StringBuilder strBuf) {
if (pdfDict == null) {
return;
}
PdfObject pdfObj = pdfDict.get(PdfName.JS);
if (pdfObj == null) {
return;
}
if (pdfObj.isString()) {
strBuf.append(((PdfString) pdfObj).getValue());
}
else if (pdfObj.isStream()) {
strBuf.append(getStringFromPdfStream((PdfStream) pdfObj, TRUNCATE_PDF_STREAM_AT));
}
else if (pdfObj.isDictionary()) {
getJavaScriptFromPdfDictionary((PdfDictionary) pdfObj, strBuf);
}
else if (pdfObj.isArray()) {
getJavaScriptFromPdfArray((PdfArray)pdfObj, strBuf);
}
}
答案 2 :(得分:0)
我知道这是一个非常老的线程,除了mkl和James编写的内容之外,仍然共享我的代码以添加一些内容。
// iText V 7.0.2
//To get javascript that is added through OpenAction
PdfDocument srcPdf = new PdfDocument(new PdfReader(srcFilePath));
PdfDictionary pdfDictionaryCatalog = srcPdf.getCatalog().getPdfObject();
PdfDictionary namesDictionary = pdfDictionaryCatalog.getAsDictionary(PdfName.OpenAction);
if(namesDictionary != null) {
PdfObject pdfObj = namesDictionary.get(PdfName.JS);
if(pdfObj != null) {
StringBuilder strBuf = new StringBuilder();
if (pdfObj.isDictionary()) {
getJavaScriptFromPdfDictionary((PdfDictionary)pdfObj, strBuf);
}else if (pdfObj.isArray()) {
getJavaScriptFromPdfArray((PdfArray)pdfObj, strBuf);
} else if (pdfObj.isString()) {
strBuf.append(((PdfString) pdfObj).getValue());
}
System.out.println("*****OPENACTION****** "+strBuf.toString());
}
}
// To get java script available from NAMES dictionary
namesDictionary = pdfDictionaryCatalog.getAsDictionary(PdfName.Names);
if(namesDictionary != null) {
PdfDictionary javascriptDictionary = namesDictionary.getAsDictionary(PdfName.JavaScript);
if(javascriptDictionary != null) {
StringBuilder strBuf = new StringBuilder();
Set<Entry<PdfName, PdfObject>> set = javascriptDictionary.entrySet();
for (Entry<PdfName, PdfObject> entry : set) {
PdfObject pdfObj = entry.getValue();
if (pdfObj.isDictionary()) {
getJavaScriptFromPdfDictionary((PdfDictionary)pdfObj, strBuf);
}else if (pdfObj.isArray()) {
getJavaScriptFromPdfArray((PdfArray)pdfObj, strBuf);
} else if (pdfObj.isString() && entry.getKey().getValue().equals(PdfName.JS.getValue())) {
strBuf.append(((PdfString) pdfObj).getValue());
}
}
System.out.println("*****JAVASCRIPT****** "+strBuf.toString());
}
}
// To get java script from name tree JAVASCRIPT
PdfNameTree nameTree = srcPdf.getCatalog().getNameTree(PdfName.JavaScript);
if(nameTree != null) {
Map<String, PdfObject> objs = nameTree.getNames();
if(objs != null) {
StringBuilder strBuf = new StringBuilder();
for (Entry<String, PdfObject> entry : objs.entrySet()) {
PdfObject pdfObj = entry.getValue();
if (pdfObj.isDictionary()) {
getJavaScriptFromPdfDictionary((PdfDictionary)pdfObj, strBuf);
}else if (pdfObj.isArray()) {
getJavaScriptFromPdfArray((PdfArray)pdfObj, strBuf);
} else if (pdfObj.isString() && entry.getKey().equals(PdfName.JS.getValue())) {
strBuf.append(((PdfString) pdfObj).getValue());
}
}
System.out.println("*****JAVASCRIPT NAMED TREE****** "+strBuf.toString());
}
}
// To get java script at the annotation action level
for (int i = 1; i <= srcPdf.getNumberOfPages(); i++) {
PdfPage page = srcPdf.getPage(i);
List<PdfAnnotation> annotList = page.getAnnotations();
if(annotList != null) {
for (PdfAnnotation pdfAnnotation : annotList) {
if(pdfAnnotation.getPdfObject() != null) {
PdfDictionary annotationAction = pdfAnnotation.getPdfObject().getAsDictionary(PdfName.A);
if (annotationAction != null && PdfName.JavaScript.equals(annotationAction.get(PdfName.S))) {
PdfString javascript = annotationAction.getAsString(PdfName.JS);
if(javascript != null) {
System.out.println("ANNOTATION "+javascript);
}
}
}
}
}
}
/*getJavaScriptFromPdfDictionary() and getJavaScriptFromPdfArray() methods are the same from James answer. */