答案 0 :(得分:1)
答案 1 :(得分:1)
class OverlappingTextSearchingStrategy : IEventListener
static List<Vector> UNIT_SQUARE_CORNERS = new List<Vector> { new Vector(0, 0, 1), new Vector(1, 0, 1), new Vector(1, 1, 1), new Vector(0, 1, 1) };
ICollection<Rectangle> imageRectangles = new HashSet<Rectangle>();
ICollection<Rectangle> textRectangles = new HashSet<Rectangle>();
public void EventOccurred(IEventData data, EventType type)
if (data is ImageRenderInfo) {
ImageRenderInfo imageData = (ImageRenderInfo)data;
Matrix ctm = imageData.GetImageCtm();
List<Rectangle> cornerRectangles = new List<Rectangle>(UNIT_SQUARE_CORNERS.Count);
foreach (Vector unitCorner in UNIT_SQUARE_CORNERS)
Vector corner = unitCorner.Cross(ctm);
cornerRectangles.Add(new Rectangle(corner.Get(Vector.I1), corner.Get(Vector.I2), 0, 0));
Rectangle boundingBox = Rectangle.GetCommonRectangle(cornerRectangles.ToArray());
Console.WriteLine("Adding image bounding rectangle {0}.", boundingBox);
} else if (data is TextRenderInfo) {
TextRenderInfo textData = (TextRenderInfo)data;
Rectangle ascentRectangle = textData.GetAscentLine().GetBoundingRectangle();
Rectangle descentRectangle = textData.GetDescentLine().GetBoundingRectangle();
Rectangle boundingBox = Rectangle.GetCommonRectangle(ascentRectangle, descentRectangle);
if (boundingBox.GetHeight() == 0 || boundingBox.GetWidth() == 0)
Console.WriteLine("Ignoring empty text bounding rectangle {0} for \"{1}\".", boundingBox, textData.GetText());
Console.WriteLine("Adding text bounding rectangle {0} for \"{1}\" with 0.5 margins.", boundingBox, textData.GetText());
textRectangles.Add(boundingBox.ApplyMargins<Rectangle>(0.5f, 0.5f, 0.5f, 0.5f, false));
} else if (data is PathRenderInfo) {
} else if (data != null)
Console.WriteLine("Ignored {0} event, class {1}.", type, data.GetType().Name);
Console.WriteLine("Ignored {0} event with null data.", type);
public ICollection<EventType> GetSupportedEvents()
// Support all events
return null;
public bool foundOverlappingText()
bool result = false;
List<Rectangle> textRectangleList = new List<Rectangle>(textRectangles);
while (textRectangleList.Count > 0)
Rectangle testRectangle = textRectangleList[textRectangleList.Count - 1];
textRectangleList.RemoveAt(textRectangleList.Count - 1);
foreach (Rectangle rectangle in textRectangleList)
if (intersect(testRectangle, rectangle))
Console.WriteLine("Found text intersecting text with bounding boxes {0} at {1},{2} and {3} at {4},{5}.",
testRectangle, testRectangle.GetX(), testRectangle.GetY(), rectangle, rectangle.GetX(), rectangle.GetY());
result = true;// if only the fact counts, do instead: return true
foreach (Rectangle rectangle in imageRectangles)
if (intersect(testRectangle, rectangle))
Console.WriteLine("Found text intersecting image with bounding boxes {0} at {1},{2} and {3} at {4},{5}.",
testRectangle, testRectangle.GetX(), testRectangle.GetY(), rectangle, rectangle.GetX(), rectangle.GetY());
result = true;// if only the fact counts, do instead: return true
return result;
bool intersect(Rectangle a, Rectangle b)
return intersect(a.GetLeft(), a.GetRight(), b.GetLeft(), b.GetRight()) &&
intersect(a.GetBottom(), a.GetTop(), b.GetBottom(), b.GetTop());
bool intersect(float start1, float end1, float start2, float end2)
if (start1 < start2)
return start2 <= end1;
return start1 <= end2;
PdfReader reader = new PdfReader(pdf);
PdfDocument document = new PdfDocument(reader);
PdfDocumentContentParser contentParser = new PdfDocumentContentParser(document);
OverlappingTextSearchingStrategy strategy = contentParser.ProcessContent(page, new OverlappingTextSearchingStrategy());
bool foundOverlaps = strategy.foundOverlappingText();
public class OverlappingTextSearchingStrategy implements IEventListener {
static List<Vector> UNIT_SQUARE_CORNERS = Arrays.asList(new Vector(0,0,1), new Vector(1,0,1), new Vector(1,1,1), new Vector(0,1,1));
Set<Rectangle> imageRectangles = new HashSet<>();
Set<Rectangle> textRectangles = new HashSet<>();
public void eventOccurred(IEventData data, EventType type) {
if (data instanceof ImageRenderInfo) {
ImageRenderInfo imageData = (ImageRenderInfo) data;
Matrix ctm = imageData.getImageCtm();
List<Rectangle> cornerRectangles = new ArrayList<>(UNIT_SQUARE_CORNERS.size());
for (Vector unitCorner : UNIT_SQUARE_CORNERS) {
Vector corner = unitCorner.cross(ctm);
cornerRectangles.add(new Rectangle(corner.get(Vector.I1), corner.get(Vector.I2), 0, 0));
Rectangle boundingBox = Rectangle.getCommonRectangle(cornerRectangles.toArray(new Rectangle[cornerRectangles.size()]));
logger.info(String.format("Adding image bounding rectangle %s.", boundingBox));
} else if (data instanceof TextRenderInfo) {
TextRenderInfo textData = (TextRenderInfo) data;
Rectangle ascentRectangle = textData.getAscentLine().getBoundingRectangle();
Rectangle descentRectangle = textData.getDescentLine().getBoundingRectangle();
Rectangle boundingBox = Rectangle.getCommonRectangle(ascentRectangle, descentRectangle);
if (boundingBox.getHeight() == 0 || boundingBox.getWidth() == 0)
logger.info(String.format("Ignoring empty text bounding rectangle %s for '%s'.", boundingBox, textData.getText()));
else {
logger.info(String.format("Adding text bounding rectangle %s for '%s' with 0.5 margins.", boundingBox, textData.getText()));
textRectangles.add(boundingBox.applyMargins(0.5f, 0.5f, 0.5f, 0.5f, false));
} else if (data instanceof PathRenderInfo) {
// TODO: vector graphics
} else if (data != null) {
logger.fine(String.format("Ignored %s event, class %s.", type, data.getClass().getSimpleName()));
} else {
logger.fine(String.format("Ignored %s event with null data.", type));
public Set<EventType> getSupportedEvents() {
// Support all events
return null;
public boolean foundOverlappingText() {
boolean result = false;
List<Rectangle> textRectangleList = new ArrayList<>(textRectangles);
while (!textRectangleList.isEmpty())
Rectangle testRectangle = textRectangleList.remove(textRectangleList.size() - 1);
for (Rectangle rectangle : textRectangleList) {
if (intersect(testRectangle, rectangle)) {
logger.info(String.format("Found text intersecting text with bounding boxes %s at %s,%s and %s at %s,%s.",
testRectangle, testRectangle.getX(), testRectangle.getY(), rectangle, rectangle.getX(), rectangle.getY()));
result = true;// if only the fact counts, do instead: return true
for (Rectangle rectangle : imageRectangles) {
if (intersect(testRectangle, rectangle)) {
logger.info(String.format("Found text intersecting image with bounding boxes %s at %s,%s and %s at %s,%s.",
testRectangle, testRectangle.getX(), testRectangle.getY(), rectangle, rectangle.getX(), rectangle.getY()));
result = true;// if only the fact counts, do instead: return true
return result;
boolean intersect(Rectangle a, Rectangle b) {
return intersect(a.getLeft(), a.getRight(), b.getLeft(), b.getRight()) &&
intersect(a.getBottom(), a.getTop(), b.getBottom(), b.getTop());
boolean intersect(float start1, float end1, float start2, float end2) {
if (start1 < start2)
return start2 <= end1;
return start1 <= end2;
Logger logger = Logger.getLogger(OverlappingTextSearchingStrategy.class.getName());
PdfReader reader = new PdfReader(pdf);
PdfDocument document = new PdfDocument(reader);
PdfDocumentContentParser contentParser = new PdfDocumentContentParser(document);
OverlappingTextSearchingStrategy strategy = contentParser.processContent(pageNumber, new OverlappingTextSearchingStrategy());
boolean foundOverlaps = strategy.foundOverlappingText();
boundingBox.applyMargins(0.5f, 0.5f, 0.5f, 0.5f, false),
答案 2 :(得分:0)
您好我有一个不使用免费库的代码示例,但我认为其他库应该具有类似的功能,因此您可以将其用作想法: 在使用以下代码示例之前,请确保使用最新版本的Apitron PDF Kit。
using System;
using System.Collections.Generic;
using System.IO;
using Apitron.PDF.Kit.FixedLayout;
using Apitron.PDF.Kit.FixedLayout.Content;
using Apitron.PDF.Kit.FixedLayout.PageProperties;
using FixedLayout.Resources;
using FixedLayout.ContentElements;
/// <summary>
/// Gets all text boundaries.
/// </summary>
/// <param name="elements">The elements.</param>
/// <param name="boundaries">The boundaries.</param>
public void GetAllTextBoundaries(IContentElementsEnumerator elements, IList<Boundary> boundaries, Boundary offset)
// We dont count drawings and images here - only text;
if(elements == null)
foreach (IContentElement element in elements)
TextContentElement text = element as TextContentElement;
if (text != null)
foreach (TextSegment segment in text.Segments)
Boundary currentBoundary = segment.Boundary;
if (offset != null)
currentBoundary = new Boundary(currentBoundary.Left + offset.Left, currentBoundary.Bottom + offset.Bottom, currentBoundary.Right + offset.Left, currentBoundary.Top + offset.Bottom);
else if (element is FormContentElement)
Boundary currentBoundary = (element as FormContentElement).Boundary;
if (offset != null)
currentBoundary = new Boundary(currentBoundary.Left + offset.Left, currentBoundary.Bottom + offset.Bottom, currentBoundary.Right + offset.Left, currentBoundary.Top + offset.Bottom);
this.GetAllTextBoundaries((element as FormContentElement).FormXObject.Elements, boundaries, currentBoundary);
/// <summary>
/// Checks if text is overlapped.
/// </summary>
/// <returns></returns>
public bool CheckIfTextIsOverlapped(string fileName)
const double overlapMax = 5;
using (System.IO.Stream stream = new FileStream(fileName, FileMode.Open, FileAccess.ReadWrite))
using (FixedDocument document = new FixedDocument(stream))
foreach (Page page in document.Pages)
IList<Boundary> boundaries = new List<Boundary>();
foreach (Annotation annotation in page.Annotations)
// Actually we need only Normal state, but will check all - to be sure.
if(annotation.Appearance.Normal != null)
this.GetAllTextBoundaries(annotation.Appearance.Normal.Elements, boundaries, annotation.Boundary);
IContentElementsEnumerator elements = page.Elements;
this.GetAllTextBoundaries(elements, boundaries, null);
for (int i = 0; i < boundaries.Count; i++)
for (int j = i + 1; j < boundaries.Count; j++)
Boundary b1 = boundaries[i];
Boundary b2 = boundaries[j];
double x1 = Math.Max(b1.Left, b2.Left);
double y1 = Math.Max(b1.Bottom, b2.Bottom);
double x2 = Math.Min(b1.Right, b2.Right);
double y2 = Math.Min(b1.Top, b2.Top);
// So we have intersection
if (x1 < x2 && y1 < y2)
if (x1 - x2 >= overlapMax || y1 - y2 >= overlapMax)
return true;
return false;