获取任何文件的文字内容?

时间:2016-04-04 13:49:33

标签: delphi delphi-10-seattle ifilter

使用Delphi 10 Seattle,我需要获取任何文件的文本内容。

我已经GetFileContentsFromIFilter SPFilter.pas来自http://develop.shorterpath.com/spfree/default.asp,但我无法让它发挥作用:

uses SPFilter;

procedure TForm1.btnTestClick(Sender: TObject);
var
  FilterStream: TStringStream;
begin
  FilterStream := TStringStream.Create;
  try
    if SPFilter.GetFileContentsFromIFilter(Trim(edtFile.Text), FilterStream) then // D:\Readme.txt
    begin
      FilterStream.Seek(0, soFromBeginning);
      Memo1.Lines.LoadFromStream(FilterStream);
    end;
  finally
    FilterStream.Free;
  end;
end;

这是SPFilter.pas单位:

(******************************************************************************)
(* SPFilter - Read file content using IFilter interface                       *)
(* Shorter Path Free Components 1.0                                           *)
(*                                                                            *)
(* Copyright (c) 2003 Shorter Path Software                                   *)
(* http://develop.shorterpath.com                                             *)
(******************************************************************************)

unit SPFilter;

interface

uses
  CodeSiteLogging,
  Classes;

function GetFileContentsFromIFilter(const FileName: string; OutData: TStream): Boolean;

implementation

uses
  Windows, SysUtils, Registry, ActiveX, Filter;

function GetFileContentsFromIFilter(const FileName: string;
  OutData: TStream): Boolean;
var
  Reg: TRegistry;
  DocType, DocClass, HandlerClass, PersistClass, FilterDLL: string;
  DLLHandle: THandle;
  ClassFactory: IClassFactory;
  FilterObj: IFilter;
  PersistFile: IPersistFile;
  DllGetClassObject: TDllGetClassObject;
  DllCanUnloadNow: TDLLCanUnloadNow;
  Res, ChunkRes: HResult;
  pFlags: ULONG;
  WFileName: WideString;
  StatChunk: TStatChunk;
  cwcBuffer: ULONG;
  awcBuffer: PWideChar;
  Txt: WideString;
  AnsiTxt: string;
  t: string;
  EndOfChunksCount: Integer;
begin
  Result := False;

  { Find filter DLL }
  FilterDLL := EmptyStr;

  { Step 1: Determine the CLSID }
  Reg := TRegistry.Create(KEY_READ);
  Reg.RootKey := HKEY_LOCAL_MACHINE;

  { A. Locate document type }
  DocType := 'Software\Classes\' + ExtractFileExt(FileName);
  if Reg.OpenKey(DocType, False) then
  begin
    DocType := Reg.ReadString(EmptyStr);
    Reg.CloseKey;
    CodeSite.Send('document type', DocType); // -> txtfile

    if Length(DocType) > 0 then
    begin
      { B. Locate document class }
      if Reg.OpenKey('Software\Classes\' + DocType + '\CLSID', False) then // FALSE HERE!!
      begin
        CodeSite.Send('CLSID');
        DocClass := Reg.ReadString(EmptyStr);
        Reg.CloseKey;
      end;
    end;

    { Step 2: Determine the Persistent Handler }
    if Length(DocClass) > 0 then
    begin
      if Reg.OpenKey('Software\Classes\CLSID\' + DocClass + '\PersistentHandler', False) then
      begin
        HandlerClass := Reg.ReadString(EmptyStr);
        Reg.CloseKey;
      end;
    end;

    { Step 3: Determine the IFilter Persistent Handler GUID }
    if Length(HandlerClass) > 0 then
    begin
      if Reg.OpenKey('Software\Classes\CLSID\' + HandlerClass +
        '\PersistentAddinsRegistered\' + GUIDToString(IID_IFilter), False) then
      begin
        PersistClass := Reg.ReadString(EmptyStr);
        Reg.CloseKey;
      end;
    end;

    { Step 4: Determine the Filter DLL }
    if Length(PersistClass) > 0 then
    begin
      if Reg.OpenKey('Software\Classes\CLSID\' + PersistClass + '\InprocServer32', False) then
      begin
        FilterDLL := Reg.ReadString(EmptyStr);
        Reg.CloseKey;
      end;
    end;
  end;
  Reg.Free;

  { Use Filter DLL to read the file }
  if Length(FilterDLL) > 0 then
  begin
    DLLHandle := LoadLibrary(PChar(FilterDLL));
    if DLLHandle <> 0 then
    begin
      @DllGetClassObject := GetProcAddress(DLLHandle, 'DllGetClassObject');
      @DllCanUnloadNow := GetProcAddress(DLLHandle, 'DllCanUnloadNow');
      if Assigned(DllGetClassObject) then
      begin
        { Get Class Factory }
        DllGetClassObject(StringToGUID(PersistClass), IClassFactory, ClassFactory);
        if Assigned(ClassFactory) then
        begin
          { Get IFilter object }
          ClassFactory.CreateInstance(nil, IFilter, FilterObj);
          if Assigned(FilterObj) then
          begin
            FilterObj.QueryInterface(IPersistFile, PersistFile);
            if Assigned(PersistFile) then
            begin
              WFileName := FileName;
              PersistFile.Load(PWideChar(WFileName), 0);
              EndOfChunksCount := 0;
              Res := FilterObj.Init(0, 0, nil, pFlags);
              if Res = S_OK then
              //if FilterObj.Init(0, 0, nil, pFlags) = S_OK then
              begin
                repeat
                  ChunkRes := FilterObj.GetChunk(StatChunk);
                  if ChunkRes = S_OK then
                  begin
                    if (StatChunk.flags and CHUNK_TEXT) <> 0 then
                    begin
                      GetMem(awcBuffer, 16384*2);
                      repeat
                        cwcBuffer := 16384;
                        FillChar(awcBuffer^, cwcBuffer, 0);
                        Res := FilterObj.GetText(cwcBuffer, awcBuffer);
                        if cwcBuffer > 0 then
                        begin
                          SetLength(Txt, cwcBuffer*2);
                          FillChar(PWideChar(Txt)^, cwcBuffer*2, 0);
                          Move(awcBuffer^, PWideChar(Txt)^, cwcBuffer*2);

                          SetLength(AnsiTxt, cwcBuffer*2);
                          FillChar(PChar(AnsiTxt)^, cwcBuffer*2, 0);
                          WideCharToMultiByte(CP_ACP, 0, PWideChar(Txt),
                            cwcBuffer, PAnsiChar(AnsiTxt), cwcBuffer*2, ' ', nil);

                          SetLength(AnsiTxt, StrLen(PChar(AnsiTxt)));
                          t := t + AnsiTxt;
                        end;
                      until Res = FILTER_E_NO_MORE_TEXT;
                      FreeMem(awcBuffer);
                    end;
                  end;
                  if ChunkRes = FILTER_E_END_OF_CHUNKS then
                    Inc(EndOfChunksCount) else
                    EndOfChunksCount := 0;
                until EndOfChunksCount > 1;
                { Return True for success }
                Result := True;
              end;
            end;
            { Release filter }
            PersistFile := nil;
            FilterObj := nil;
          end;
          { Release Class Factory }
          ClassFactory := nil;
        end;
      end;
      if Assigned(DllCanUnloadNow) then
      begin
        if DllCanUnloadNow = S_OK then
          FreeLibrary(DLLHandle) else
      end else
        FreeLibrary(DLLHandle);
    end;
  end;

  { Write data to stream }
  if Result then
  try
    OutData.Write(t[1], Length(t));
  except
    Result := False;
  end;
end;

end.

从评论中可以看出&#34; FALSE HERE&#34;它无法获取.TXT文件的CLSID。但是,txtfile注册表项中没有CLSID项。那么这里有什么问题?

然而,来自Citeknet的IFilter Explorer确实向我展示了.TXT文件的有效iFilter!

有人知道如何获取任何文件的内容吗?

0 个答案:

没有答案