尝试使用 GhostScript ver 9.10 gsdll32.dll 文件从postscript文件中提取纯文本。似乎无法获得任何工作。试过多种变化。
{ extracts plain text from PostScript file via ps2ascii.ps }
procedure PS2TXT(input : AnsiString; output: AnsiString);
var
code:integer;
instance: Pointer;
argv: array of PAnsiChar;
RunFile: string;
begin
RunFile:= ExtractFilePath(ParamStr(0)) + 'ps2ascii.ps';
code := gsapi_new_instance(instance, nil);
if code < 0 then
raise Exception.Create('Impossible to open an instance of ghostscript. Error code: '+IntToStr(code));
try
SetLength(argv, 8);
argv[0] := PAnsiChar('-q -dNODISPLAY -dSAFER -dDELAYBIND ');
argv[1] := PAnsiChar('-dWRITESYSTEMDICT ');
argv[2] := PAnsiChar('-dSIMPLE ');
argv[3] := PAnsiChar('-c save ');
argv[4] := PAnsiChar('-f ' + RunFile);
argv[5] := PAnsiChar(input);
argv[6] := PAnsiChar('-c quit ');
argv[7] := PAnsiChar('> ' + output + '.txt');
// argv[7] := PAnsiChar('-sOutputFile='+ output + '.txt' );
code := gsapi_init_with_args(instance, Length(argv), @argv[0]);
if code < 0 then raise Exception.Create('ERROR: init_args: '+IntToStr(code));
gsapi_exit(instance);
finally
gsapi_delete_instance(instance);
end;
end;
RunFile varibable只是从正在运行的exe获取当前路径,然后附加到ps2ascii.ps文件。
或者我想通过相同的ps2ascii.ps解释器将PDF文件转换为纯文本文件。输出需要是纯文本,以便我可以解析它提取特定的键字段,然后将其发布到数据库。但是,通过TXTWRITE设备从PDF中提取文本似乎只适用于某些pdf文件,但不是全部。所以这样做是为了解决问题。 有人得到任何与DLL一起使用的代码吗?
答案 0 :(得分:1)
其他拥有STRUGGLED以获取正确参数以使GhostScript DLL在Delphi中运行的人可能会觉得这很有用。这很快,很脏,但很有效。根据需要清理它。享受!
GhostTools.pas GSDLL32.DLL的类文件
// GhostTools.pas v.03, 12/20013, Marvi mail: phantomlord@embarqmail.com
//
// Open source, modify to whatever extent
// Class to interact with GhostScript gsdll32.dll for simple PDF manipulation
// i.e. PDF to Jpeg, PDF to PNG, PDF to PS, PDF to Text, PS to PDF, etc.
// Requires GhostScript GSDLL32.DLL to reside in .EXE project folder
// or at the very least somewhere your app can get to it.
// If you use the ps2ascii.ps interpreter, do same as well.
//
// include GhostTools in your uses section, and use as needed
unit GhostTools;
interface
uses SysUtils, gsapi; {gsapi.pas file required as well}
procedure PDF2PNG(input : AnsiString; output: AnsiString);
{generates PNG image from PDF }
procedure PDF2JPEG(input : AnsiString; output: AnsiString);
{generates JPEG image from PDF }
procedure PDF2PS(input : AnsiString; output: AnsiString);
{generates PostScript file from PDF file }
procedure PS2PDF(input : AnsiString; output: AnsiString);
{generates PDF file from PostScript file }
procedure PDF2TXT(input : AnsiString; output: AnsiString);
{extracts plain text via TxtWrite device - Method #1 }
procedure PDS2TXT(input : AnsiString; output: AnsiString);
{extract plain text via PostScript interpreter - Method #2 }
function SlashSwap(PathVar: string): string;
{exchange backslash for forward slash - unix style }
implementation
{ exchange backslash for forward slash - to unix path format }
{ resolves path issue in ps2ascii.ps running on Windows platform }
function SlashSwap(PathVar: string): string;
var tmp: string;
begin
tmp:= PathVar;
while Pos('\', tmp) > 0 do
tmp[Pos('\', tmp)] := '/';
result:= tmp;
end;
{ generates PostScript file from PDF file }
procedure PDF2PS(input : AnsiString; output: AnsiString);
var
ExitCode:integer;
instance: Pointer;
Arg: array of PAnsiChar;
begin
ExitCode := gsapi_new_instance(instance, nil);
if ExitCode < 0 then
raise Exception.Create('Impossible to open an instance of ghostscript. Error ExitCode: '+IntToStr(ExitCode));
try
SetLength(Arg, 7);
Arg[0] := PAnsiChar('-q');
arg[1] := PAnsiChar('-dSAFER');
Arg[2] := PAnsiChar('-dNOPAUSE');
arg[3] := PAnsiChar('-dBATCH');
arg[4] := PAnsiChar('-sOutputFile=' + output + '.ps');
arg[5] := PAnsiChar('-sDEVICE=ps2write');
arg[6] := PAnsiChar(input);
ExitCode := gsapi_init_with_args(instance, Length(Arg), @Arg[0]);
if ExitCode < 0 then raise Exception.Create('ERROR: init_args: '+IntToStr(ExitCode));
gsapi_exit(instance);
finally
gsapi_delete_instance(instance);
end;
end;
{ generates PDF file from PostScript file }
procedure PS2PDF(input : AnsiString; output: AnsiString);
var
ExitCode:integer;
instance: Pointer;
Arg: array of PAnsiChar;
begin
ExitCode := gsapi_new_instance(instance, nil);
if ExitCode < 0 then
raise Exception.Create('Impossible to open an instance of ghostscript. Error ExitCode: '+IntToStr(ExitCode));
try
SetLength(Arg, 9);
Arg[0] := PAnsiChar('ps2pdf');
arg[1] := PAnsiChar('-dNOPAUSE');
arg[2] := PAnsiChar('-dBATCH');
arg[3] := PAnsiChar('-dSAFER');
arg[4] := PAnsiChar('-sDEVICE=pdfwrite');
arg[5] := PAnsiChar('-sOutputFile='+ output+'.pdf');
arg[6] := PAnsiChar('-c');
arg[7] := PAnsiChar('.setpdfwrite');
arg[8] := PAnsiChar('-f' + input);
ExitCode := gsapi_init_with_args(instance, Length(Arg), @Arg[0]);
if ExitCode < 0 then raise Exception.Create('ERROR: init_args: '+IntToStr(ExitCode));
gsapi_exit(instance);
finally
gsapi_delete_instance(instance);
end;
end;
{ generates JPEG image from PDF - 1 image per page }
procedure PDF2JPEG(input : AnsiString; output: AnsiString);
var
ExitCode:integer;
instance: Pointer;
Arg: array of PAnsiChar;
begin
ExitCode := gsapi_new_instance(instance, nil);
if ExitCode < 0 then
raise Exception.Create('Impossible to open an instance of ghostscript. Error ExitCode: '+IntToStr(ExitCode));
try
SetLength(Arg, 16);
Arg[0] := PAnsiChar('-q');
Arg[1] := PAnsiChar('-dQUIET');
Arg[2] := PAnsiChar('-dPARANOIDSAFER');
Arg[3] := PAnsiChar('-dBATCH');
Arg[4] := PAnsiChar('-dNOPAUSE');
Arg[5] := PAnsiChar('-dNOPROMPT');
Arg[6] := PAnsiChar('-dMaxBitmap=500000000');
Arg[7] := PAnsiChar('-dFirstPage=1');
Arg[8] := PAnsiChar('-dAlignToPixels=0');
Arg[9] := PAnsiChar('-dGridFitTT=0');
Arg[10] := PAnsiChar('-sDEVICE=jpeg');
Arg[11] := PAnsiChar('-dTextAlphaBits=4');
Arg[12] := PAnsiChar('-dGraphicsAlphaBits=4');
Arg[13] := PAnsiChar('-r300x300');
Arg[14] := PAnsiChar('-sOutputFile='+ output + ' Page-%02d.jpeg' );
Arg[15] := PAnsiChar(input);
ExitCode := gsapi_init_with_args(instance, Length(Arg), @Arg[0]);
if ExitCode < 0 then raise Exception.Create('ERROR: init_args: '+IntToStr(ExitCode));
gsapi_exit(instance);
finally
gsapi_delete_instance(instance);
end;
end;
{ extracts plain text from PDF file via ps2ascii.ps interpreter}
{ another interpreter is pstotxt.ps floating on the internet }
procedure PDS2TXT(input : AnsiString; output: AnsiString);
var
ExitCode:integer;
instance: Pointer;
Arg: array of PAnsiChar;
PSInterpreter: string;
OutputFile: string;
begin
input:= SlashSwap(input);
{*note: place your interpreter in your .EXE project folder }
PSInterpreter:= ExtractFilePath(ParamStr(0)) + 'ps2ascii.ps';
PSInterpreter:= SlashSwap(PSInterpreter);
outputFile:= ExtractFilePath(input) + output + '.txt';
OutputFile:= SlashSwap(OutputFile);
output:= OutputFile;
ExitCode := gsapi_new_instance(instance, nil);
if ExitCode < 0 then
raise Exception.Create('Impossible to open an instance of ghostscript. Error ExitCode: '+IntToStr(ExitCode));
try
SetLength(Arg, 9);
Arg[0] := PAnsiChar('-q');
Arg[1] := PAnsiChar('-sstdout='+ output); { Your_TXT_File_Out.txt }
Arg[2] := PAnsiChar('-dSIMPLE');
Arg[3] := PAnsiChar('-sFONTPATH=c:/windows/fonts');
Arg[4] := PAnsiChar('-dNODISPLAY');
Arg[5] := PAnsiChar('-dDELAYBIND');
Arg[6] := PAnsiChar('-dWRITESYSTEMDICT');
Arg[7] := PAnsiChar('-f'+ PSInterpreter); { path/to/ps2ascii.ps }
Arg[8] := PAnsiChar(input); { Your_PDF_File_In.pdf }
ExitCode := gsapi_init_with_args(instance, Length(Arg), @Arg[0]);
if ExitCode < 0 then raise Exception.Create('ERROR: init_args: '+IntToStr(ExitCode));
gsapi_exit(instance);
finally
gsapi_delete_instance(instance);
end;
end;
{ extract plain text from PDF File via TxtWrite device }
procedure PDF2TXT(input : AnsiString; output: AnsiString);
var ExitCode:integer;
instance: Pointer;
Arg: array of PAnsiChar;
begin
ExitCode := gsapi_new_instance(instance, nil);
if ExitCode < 0 then
raise Exception.Create('Impossible to open an instance of ghostscript. Error ExitCode: '+IntToStr(ExitCode));
try
SetLength(Arg, 5);
Arg[0] := PAnsiChar('-dBATCH');
Arg[1] := PAnsiChar('-dNOPAUSE');
Arg[2] := PAnsiChar('-sDEVICE=txtwrite');
Arg[3] := PAnsiChar('-sOutputFile='+ output + '.txt' );
Arg[4] := PAnsiChar(input);
ExitCode := gsapi_init_with_args(instance, Length(Arg), @Arg[0]);
if ExitCode < 0 then raise Exception.Create('ERROR: init_args: '+IntToStr(ExitCode));
gsapi_exit(instance);
finally
gsapi_delete_instance(instance);
end;
end;
{ generates PNG image from PDF - 1 image per page }
procedure PDF2PNG(input : AnsiString; output: AnsiString);
var
ExitCode:integer;
instance: Pointer;
Arg: array of PAnsiChar;
begin
ExitCode := gsapi_new_instance(instance, nil);
if ExitCode < 0 then
raise Exception.Create('Impossible to open an instance of ghostscript. Error ExitCode: '+IntToStr(ExitCode));
try
SetLength(Arg, 11);
Arg[0] := PAnsiChar('ps2pdf');
Arg[1] := PAnsiChar('-dNOPAUSE');
Arg[2] := PAnsiChar('-dBATCH');
Arg[3] := PAnsiChar('-dSAFER');
Arg[4] := PAnsiChar('-sDEVICE=pngalpha');
Arg[5] := PAnsiChar('-r300');
Arg[6] := PAnsiChar('-dTextAlphaBits=4');
Arg[7] := PAnsiChar('-sOutputFile='+output+' Page-%02d.png');
Arg[8] := PAnsiChar('-c');
Arg[9] := PAnsiChar('.setpdfwrite');
Arg[10]:= PAnsiChar('-f'+ input);
ExitCode := gsapi_init_with_args(instance, Length(Arg), @Arg[0]);
if ExitCode < 0 then raise Exception.Create('ERROR: init_args: '+IntToStr(ExitCode));
gsapi_exit(instance);
finally
gsapi_delete_instance(instance);
end;
end;
end.
如果你做得更好,请寄给我一份副本; - )