Delphi Xe - Thread和WebBrowser在AV中获得源代码

时间:2014-04-14 12:33:25

标签: delphi

我想创建一个不断监视我的WebBrowser的Thread,以检查页面源中是否有某些文本。我在表单上有多个WebBrowsers,但是我只用一个WebBrowser创建了一个SSCCE(AV仍然存在)。

我甚至制作了一个计时器,在文档加载后等待10秒,并在计时器事件上创建线程(以确保文档完成),但似乎不是问题。无论文档是否加载,都会出现AV。因此,下面的示例中不存在计时器。我已逐步执行,而AV则发生在评论为iall.outerHTML的位置。 iall似乎包含正确的东西,但调用outerHTML结果是AV。

你能找到错误吗?

单位源代码如下:

unit Unit1;

interface

uses
  Winapi.Windows, Winapi.Messages, System.SysUtils, System.Variants, System.Classes, Vcl.Graphics,
  Vcl.Controls, Vcl.Forms, Vcl.Dialogs, Vcl.OleCtrls, SHDocVw, ActiveX, StrUtils, MSHTML,
  Vcl.StdCtrls;

type
  TWebBrowser = class(SHDocVw.TWebBrowser, IOleCommandTarget)
    CurDispatch: IDispatch;
  private
    TFDocLoaded: Boolean;
    TFedAddress: String;
    function QueryStatus(CmdGroup: PGUID; cCmds: Cardinal; prgCmds: POleCmd;
      CmdText: POleCmdText): HRESULT; stdcall;
    function Exec(CmdGroup: PGUID; nCmdID, nCmdexecopt: DWORD;
      const vaIn: OleVariant; var vaOut: OleVariant): HRESULT; stdcall;
  published
    property FDocLoaded: Boolean read TFDocLoaded write TFDocLoaded;
    property FedAddress: String read TFedAddress write TFedAddress;
  end;

  TForm1 = class;

  TWatcherThread = class(TThread)
  private
    THBrowser: TForm1;
  protected
    procedure Execute; override;
  public
    constructor Create(ABrowser: TForm1);
  end;

  TForm1 = class(TForm)
    WebBrowser1: TWebBrowser;
    Label1: TLabel;
    procedure FormCreate(Sender: TObject);
    procedure WebBrowser1NavigateComplete2(ASender: TObject;
      const pDisp: IDispatch; const URL: OleVariant);
  private
    { Private declarations }
    MyWatcher: TWatcherThread;
    function checkIfThereIs(AWebBrowser: TWebBrowser): Boolean;
    function GetWebBrowserHTML(const ABrowser: TWebBrowser): String;
  public
    { Public declarations }
  end;

var
  Form1: TForm1;

implementation

{$R *.dfm}

function TWebBrowser.QueryStatus(CmdGroup: PGUID; cCmds: Cardinal; prgCmds: POleCmd; CmdText: POleCmdText): HRESULT; stdcall;
begin
  Result := S_OK;
end;

function TWebBrowser.Exec(CmdGroup: PGUID; nCmdID, nCmdexecopt: DWORD; const vaIn: OleVariant; var vaOut: OleVariant): HRESULT; stdcall;
begin
  Result := S_OK;
  if nCmdID = OLECMDID_SHOWSCRIPTERROR then Result := S_OK;
end;

function TForm1.checkIfThereIs(AWebBrowser: TWebBrowser): Boolean;
var
  src: String;
begin
  Result := False;
  try
    src := GetWebBrowserHTML(AWebBrowser);
    if PosEx('<span>Mail</span>', src) > 0 then Result := True;
  except
    on E : Exception do result := False;
  end;
end;

function TForm1.GetWebBrowserHTML(const ABrowser: TWebBrowser): String;
var
  iall: IHTMLElement;
begin
  Result := '';
  try
    if not Assigned(ABrowser.Document) then Exit;
    if (ABrowser.Document AS IHTMLDocument2).body <> nil then
      begin
        iall := (ABrowser.Document AS IHTMLDocument2).body;
        while iall.parentElement <> nil do iall := iall.parentElement;
        Result := iall.outerHTML;  // <- here I get AV after doc is loaded
      end;
  except
    on E : Exception do //
  end;
end;

procedure TForm1.WebBrowser1NavigateComplete2(ASender: TObject;
  const pDisp: IDispatch; const URL: OleVariant);
var
  WB: TWebBrowser;
begin
  WB := TWebBrowser(ASender);
  if WB.CurDispatch = nil then WB.CurDispatch := pDisp;
  WB.FDocLoaded := True;
end;

procedure TForm1.FormCreate(Sender: TObject);
begin
  WebBrowser1.Navigate('www.yahoo.com');
  MyWatcher := TWatcherThread.Create(Self);
  with MyWatcher do
    begin
      FreeOnTerminate := True;
      Resume;
    end;
end;


constructor TWatcherThread.Create(ABrowser: TForm1);
begin
  THBrowser := ABrowser;
  inherited Create(True);
end;

procedure TWatcherThread.Execute;
var
  i: Integer;
  pt: TWebBrowser;
begin
  inherited;
  repeat
    //parsing list of existing WebBrowsers -> for each do the following begin/end section
      begin
      if THBrowser.WebBrowser1.FDocLoaded = True then
        if THBrowser.checkIfThereIs(THBrowser.WebBrowser1) then // must be called this way (passing WebBrowser class to function) because in the original I have more than one WebBrowsers
          begin
            THBrowser.Label1.Caption := 'There is!';
            Break;
          end;
      end;
    Sleep(1000);
  until Terminated;
end;

end.

1 个答案:

答案 0 :(得分:3)

您违反了VCL线程规则。必须从主线程进行对VCL控件的所有访问。

您还通过从创建它们的线程调用COM Web浏览器对象的方法来破坏COM线程规则。

在我看来,这个主题在这里没有用处,你应该只是在NavigateComplete2中完成工作。

我也想知道视觉成分是否是正确的选择。您是否需要显示所有这些网页,或者您是否正在抓取?