我正在与iText7一起读取PDF文件数据,并根据它们在PDF文件中的位置创建一个xml文件。 我的问题是,
iText7程序集中有一个类LocationTextExtractionStrategy
,如下所示
public class LocationTextExtractionStrategy : ITextExtractionStrategy, IEventListener
{
public LocationTextExtractionStrategy(ITextChunkLocationStrategy strat);
public virtual void EventOccurred(IEventData data, EventType type);
public interface ITextChunkLocationStrategy
{
ITextChunkLocation CreateLocation(TextRenderInfo renderInfo, LineSegment baseline);
}
}
现在我将这个类继承到我的项目类中,如下所示
public class TextLocationExtractionStrategy : LocationTextExtractionStrategy
{
ITextChunkLocationStrategy locationstrategy ;
public TextLocationExtractionStrategy(ITextChunkLocationStrategy strategy)
{
locationstrategy = strategy;
}
public override void EventOccurred(IEventData data, EventType type)
{
if (!type.Equals(EventType.RENDER_TEXT))
return;
TextRenderInfo renderInfo = (TextRenderInfo)data;
string curFont = renderInfo.GetFont().GetFontProgram().ToString();
float curFontSize = renderInfo.GetFontSize();
IList<TextRenderInfo> text = renderInfo.GetCharacterRenderInfos();
foreach (TextRenderInfo t in text)
{
LineSegment lineSegment = t.GetBaseline();
string letter = t.GetText();
Vector letterStart = t.GetBaseline().GetStartPoint();
Vector letterEnd = t.GetAscentLine().GetEndPoint();
Rectangle letterRect = new Rectangle(letterStart.Get(0), letterStart.Get(1),
letterEnd.Get(0) - letterStart.Get(0), letterEnd.Get(1) - letterStart.Get(1));
if (letter != " " && !letter.Contains(' '))
{
ITextChunkLocation loc = locationstrategy.CreateLocation(t, lineSegment);
UTextChunk chunk = new UTextChunk(t, t.GetText(), loc);
t.GetText();
t.GetBaseline().GetBoundingRectangle();
m_locationResult.Add(chunk);
}
}
}
}
在EventOccurred
方法中,我调用了基类的CreateLocation
方法(在if
条件内)。
现在我的问题是,如何将接口作为参数传递给派生类的Constructor,以便可以轻松调用基类的CreateLocation
方法?
我正在尝试如下所示,但是我不明白如何创建合适的对象或其他可以通过调用CreateLocation
方法传递给构造函数的对象。 / p>
LocationTextExtractionStrategy.ITextChunkLocationStrategy locst = null;
TextLocationExtractionStrategy strategy = new TextLocationExtractionStrategy(locst);
PdfTextExtractor.GetTextFromPage(page, strategy))
由于我们无法创建接口的实例,因此我不知道如何将值传递给类型为interface的参数。在上面的几行中,我只是将其分配为null并调用构造函数,但是为它分配了null值,它将说“对象引用未设置为对象的实例”。
您能帮我解决这个问题吗?
如果提出问题时有任何问题,请告诉我或纠正我。
答案 0 :(得分:1)
如果您查看基类LocationTextExtractionStrategy
的源代码(它是开放源代码,那么请看一下源代码!),您会看到它不仅具有带有ITextChunkLocationStrategy
参数的构造函数,而且也具有不带参数的构造函数。
该构造函数实际上实例化了该接口的实现,并将其转发给您引用的同一构造函数:
public LocationTextExtractionStrategy()
: this(new _ITextChunkLocationStrategy_85()) {
}
private sealed class _ITextChunkLocationStrategy_85 : LocationTextExtractionStrategy.ITextChunkLocationStrategy {
public _ITextChunkLocationStrategy_85() {
}
public ITextChunkLocation CreateLocation(TextRenderInfo renderInfo, LineSegment baseline) {
return new TextChunkLocationDefaultImp(baseline.GetStartPoint(), baseline.GetEndPoint(), renderInfo.GetSingleSpaceWidth());
}
}
由于要使用ITextChunkLocationStrategy
实现,并且基类没有为此提供getter,因此不能简单地使用没有参数的其他构造函数。而且您无法实例化该_ITextChunkLocationStrategy_85
类,因为它是 private 。而且您不能简单地将_ITextChunkLocationStrategy_85
复制到代码中,因为TextChunkLocationDefaultImp
是internal
。
但是,您可以做的是将TextChunkLocationDefaultImp
复制到您的代码中,然后将_ITextChunkLocationStrategy_85
复制到您的代码中,通过使用您的代码来替换基类中的TextChunkLocationDefaultImp
该类的副本,然后实例化_ITextChunkLocationStrategy_85
类的副本,以最终获得一个ITextChunkLocationStrategy
实现实例。
或者,您可以尝试进行反思和内省。不过,这可能会导致维护问题。
如果库提供了概括某些内容然后隐藏其默认实现的方法,这确实是一件令人讨厌的事情。
作为参考,TextChunkLocationDefaultImp
当前的实现方式如下
internal class TextChunkLocationDefaultImp : ITextChunkLocation {
private const float DIACRITICAL_MARKS_ALLOWED_VERTICAL_DEVIATION = 2;
/// <summary>the starting location of the chunk</summary>
private readonly Vector startLocation;
/// <summary>the ending location of the chunk</summary>
private readonly Vector endLocation;
/// <summary>unit vector in the orientation of the chunk</summary>
private readonly Vector orientationVector;
/// <summary>the orientation as a scalar for quick sorting</summary>
private readonly int orientationMagnitude;
/// <summary>perpendicular distance to the orientation unit vector (i.e.</summary>
/// <remarks>
/// perpendicular distance to the orientation unit vector (i.e. the Y position in an unrotated coordinate system)
/// we round to the nearest integer to handle the fuzziness of comparing floats
/// </remarks>
private readonly int distPerpendicular;
/// <summary>distance of the start of the chunk parallel to the orientation unit vector (i.e.</summary>
/// <remarks>distance of the start of the chunk parallel to the orientation unit vector (i.e. the X position in an unrotated coordinate system)
/// </remarks>
private readonly float distParallelStart;
/// <summary>distance of the end of the chunk parallel to the orientation unit vector (i.e.</summary>
/// <remarks>distance of the end of the chunk parallel to the orientation unit vector (i.e. the X position in an unrotated coordinate system)
/// </remarks>
private readonly float distParallelEnd;
/// <summary>the width of a single space character in the font of the chunk</summary>
private readonly float charSpaceWidth;
public TextChunkLocationDefaultImp(Vector startLocation, Vector endLocation, float charSpaceWidth) {
this.startLocation = startLocation;
this.endLocation = endLocation;
this.charSpaceWidth = charSpaceWidth;
Vector oVector = endLocation.Subtract(startLocation);
if (oVector.Length() == 0) {
oVector = new Vector(1, 0, 0);
}
orientationVector = oVector.Normalize();
orientationMagnitude = (int)(Math.Atan2(orientationVector.Get(Vector.I2), orientationVector.Get(Vector.I1)
) * 1000);
// see http://mathworld.wolfram.com/Point-LineDistance2-Dimensional.html
// the two vectors we are crossing are in the same plane, so the result will be purely
// in the z-axis (out of plane) direction, so we just take the I3 component of the result
Vector origin = new Vector(0, 0, 1);
distPerpendicular = (int)(startLocation.Subtract(origin)).Cross(orientationVector).Get(Vector.I3);
distParallelStart = orientationVector.Dot(startLocation);
distParallelEnd = orientationVector.Dot(endLocation);
}
public virtual int OrientationMagnitude() {
return orientationMagnitude;
}
public virtual int DistPerpendicular() {
return distPerpendicular;
}
public virtual float DistParallelStart() {
return distParallelStart;
}
public virtual float DistParallelEnd() {
return distParallelEnd;
}
/// <returns>the start location of the text</returns>
public virtual Vector GetStartLocation() {
return startLocation;
}
/// <returns>the end location of the text</returns>
public virtual Vector GetEndLocation() {
return endLocation;
}
/// <returns>the width of a single space character as rendered by this chunk</returns>
public virtual float GetCharSpaceWidth() {
return charSpaceWidth;
}
/// <param name="as">the location to compare to</param>
/// <returns>true is this location is on the the same line as the other</returns>
public virtual bool SameLine(ITextChunkLocation @as) {
if (OrientationMagnitude() != @as.OrientationMagnitude()) {
return false;
}
float distPerpendicularDiff = DistPerpendicular() - @as.DistPerpendicular();
if (distPerpendicularDiff == 0) {
return true;
}
LineSegment mySegment = new LineSegment(startLocation, endLocation);
LineSegment otherSegment = new LineSegment(@as.GetStartLocation(), @as.GetEndLocation());
return Math.Abs(distPerpendicularDiff) <= DIACRITICAL_MARKS_ALLOWED_VERTICAL_DEVIATION && (mySegment.GetLength
() == 0 || otherSegment.GetLength() == 0);
}
/// <summary>
/// Computes the distance between the end of 'other' and the beginning of this chunk
/// in the direction of this chunk's orientation vector.
/// </summary>
/// <remarks>
/// Computes the distance between the end of 'other' and the beginning of this chunk
/// in the direction of this chunk's orientation vector. Note that it's a bad idea
/// to call this for chunks that aren't on the same line and orientation, but we don't
/// explicitly check for that condition for performance reasons.
/// </remarks>
/// <param name="other"/>
/// <returns>the number of spaces between the end of 'other' and the beginning of this chunk</returns>
public virtual float DistanceFromEndOf(ITextChunkLocation other) {
return DistParallelStart() - other.DistParallelEnd();
}
public virtual bool IsAtWordBoundary(ITextChunkLocation previous) {
// In case a text chunk is of zero length, this probably means this is a mark character,
// and we do not actually want to insert a space in such case
if (startLocation.Equals(endLocation) || previous.GetEndLocation().Equals(previous.GetStartLocation())) {
return false;
}
float dist = DistanceFromEndOf(previous);
if (dist < 0) {
dist = previous.DistanceFromEndOf(this);
//The situation when the chunks intersect. We don't need to add space in this case
if (dist < 0) {
return false;
}
}
return dist > GetCharSpaceWidth() / 2.0f;
}
internal static bool ContainsMark(ITextChunkLocation baseLocation, ITextChunkLocation markLocation) {
return baseLocation.GetStartLocation().Get(Vector.I1) <= markLocation.GetStartLocation().Get(Vector.I1) &&
baseLocation.GetEndLocation().Get(Vector.I1) >= markLocation.GetEndLocation().Get(Vector.I1) && Math.
Abs(baseLocation.DistPerpendicular() - markLocation.DistPerpendicular()) <= DIACRITICAL_MARKS_ALLOWED_VERTICAL_DEVIATION;
}
}