为什么我不能在Delphi程序中获取AOL验证码图像?

时间:2010-04-27 19:11:28

标签: delphi captcha indy indy10

新的演示代码:

我正在尝试从AOL获取验证码图像,并且我一直收到错误418.

unit imageunit;
///
///  https://new.aol.com/productsweb/
///
interface

uses
  Windows, Messages, SysUtils, Variants, Classes, Graphics, Controls, Forms,
  Dialogs, StdCtrls, IdIOHandler, IdIOHandlerSocket, IdIOHandlerStack, IdSSL,
  IdSSLOpenSSL, IdIntercept, IdZLibCompressorBase, IdCompressorZLib,
  IdCookieManager, IdBaseComponent, IdComponent, IdTCPConnection, IdTCPClient,
  IdHTTP,jpeg,GIFImg, ExtCtrls, PerlRegEx;

type
  TForm2 = class(TForm)
    IdHTTP1: TIdHTTP;
    IdCookieManager1: TIdCookieManager;
    IdCompressorZLib1: TIdCompressorZLib;
    IdConnectionIntercept1: TIdConnectionIntercept;
    IdSSLIOHandlerSocketOpenSSL1: TIdSSLIOHandlerSocketOpenSSL;
    Panel1: TPanel;
    Image1: TImage;
    Panel2: TPanel;
    Button1: TButton;
    PerlRegEx1: TPerlRegEx;
    Memo1: TMemo;
    procedure Button1Click(Sender: TObject);
  private
    { Private declarations }
  public
    { Public declarations }
  end;

var
  Form2: TForm2;

implementation

{$R *.dfm}

function getaimcaptchaimage(data:string):string;
var
    Regex: TPerlRegEx;
    ResultString: string;
begin
Regex := TPerlRegEx.Create(nil);
Regex.RegEx := '<img src="/productsweb/WordVerImage?(.*?)"';
Regex.Options := [preCaseless];
Regex.Subject := data;
if Regex.Match then begin
    if Regex.SubExpressionCount >= 1 then begin
        ResultString := Regex.SubExpressions[1];
    end;
  result:=Resultstring;
end;
end;


procedure TForm2.Button1Click(Sender: TObject);
var
  JPI : TJPEGImage;
  streamdata:TMemoryStream;
  SStream: Tstringstream;
  website:string;
begin
  streamdata := TMemoryStream.Create;
  SStream := tstringstream.Create ( '' );

  try
  idhttp1.Get('https://new.aol.com/productsweb/',SStream);
  memo1.Text:=UTF8ToWideString ( SStream.DataString );
  website:='https://new.aol.com/productsweb/WordVerImage'+getaimcaptchaimage( UTF8ToWideString ( SStream.DataString ));
  form2.Caption:=website;
  idhttp1.Get(website, Streamdata);
          Except
          { Handle exceptions }
          On E : Exception Do
               Begin
                MessageDlg('Exception: '+E.Message,mtError, [mbOK], 0);
               End;

          End;

  //https://new.aol.com/productsweb/WordVerImage?20890843
  //https://new.aol.com/productsweb/WordVerImage?91868359


  ///
  ///  gives error 418 unused
  ///

  streamdata.Position := 0;
  JPI := TJPEGImage.Create;
  Try
  JPI.LoadFromStream ( streamdata );
  Finally
  Image1.Picture.Assign ( JPI );
  JPI.Free;
  streamdata.Free;
  End;

end;

end.

形式:

object Form2: TForm2
  Left = 0
  Top = 0
  Caption = 'Form2'
  ClientHeight = 247
  ClientWidth = 480
  Color = clBtnFace
  Font.Charset = DEFAULT_CHARSET
  Font.Color = clWindowText
  Font.Height = -11
  Font.Name = 'Tahoma'
  Font.Style = []
  OldCreateOrder = False
  PixelsPerInch = 96
  TextHeight = 13
  object Panel1: TPanel
    Left = 0
    Top = 41
    Width = 480
    Height = 206
    Align = alClient
    TabOrder = 0
    object Image1: TImage
      Left = 1
      Top = 1
      Width = 478
      Height = 115
      Align = alClient
      ExplicitLeft = 5
      ExplicitTop = 17
      ExplicitWidth = 200
      ExplicitHeight = 70
    end
    object Memo1: TMemo
      Left = 1
      Top = 116
      Width = 478
      Height = 89
      Align = alBottom
      TabOrder = 0
      ExplicitLeft = 80
      ExplicitTop = 152
      ExplicitWidth = 185
    end
  end
  object Panel2: TPanel
    Left = 0
    Top = 0
    Width = 480
    Height = 41
    Align = alTop
    TabOrder = 1
    object Button1: TButton
      Left = 239
      Top = 6
      Width = 75
      Height = 25
      Caption = 'Button1'
      TabOrder = 0
      OnClick = Button1Click
    end
  end
  object IdHTTP1: TIdHTTP
    Intercept = IdConnectionIntercept1
    IOHandler = IdSSLIOHandlerSocketOpenSSL1
    MaxAuthRetries = 100
    AllowCookies = True
    HandleRedirects = True
    RedirectMaximum = 100
    ProxyParams.BasicAuthentication = False
    ProxyParams.ProxyPort = 0
    Request.ContentLength = -1
    Request.Accept = 
      'image/gif, image/jpeg, image/pjpeg, image/pjpeg, application/x-s' +
      'hockwave-flash, application/cade, application/xaml+xml, applicat' +
      'ion/vnd.ms-xpsdocument, application/x-ms-xbap, application/x-ms-' +
      'application, */*'
    Request.BasicAuthentication = False
    Request.Referer = 'http://www.yahoo.com'
    Request.UserAgent = 
      'Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.9.2.1) Gecko/201001' +
      '22 firefox/3.6.1'
    HTTPOptions = [hoForceEncodeParams]
    CookieManager = IdCookieManager1
    Compressor = IdCompressorZLib1
    Left = 40
    Top = 160
  end
  object IdCookieManager1: TIdCookieManager
    Left = 360
    Top = 136
  end
  object IdCompressorZLib1: TIdCompressorZLib
    Left = 408
    Top = 56
  end
  object IdConnectionIntercept1: TIdConnectionIntercept
    Left = 304
    Top = 72
  end
  object IdSSLIOHandlerSocketOpenSSL1: TIdSSLIOHandlerSocketOpenSSL
    Intercept = IdConnectionIntercept1
    MaxLineAction = maException
    Port = 0
    DefaultPort = 0
    SSLOptions.Mode = sslmUnassigned
    SSLOptions.VerifyMode = []
    SSLOptions.VerifyDepth = 0
    Left = 192
    Top = 136
  end
  object PerlRegEx1: TPerlRegEx
    Options = []
    Left = 120
    Top = 56
  end
end

如果你去https://new.aol.com/productsweb/ 你会注意到验证码图像有一个像https://new.aol.com/productsweb/WordVerImage?91868359

这样的网址

我将该网址放在编辑框中并收到错误消息。

这段代码有什么问题?

4 个答案:

答案 0 :(得分:7)

涉及到一个cookie。如果您直接访问未访问https://new.aol.com/productsweb/WordVerImage?91868359的浏览器中的验证码网址https://new.aol.com/productsweb/,则会获得(刷新后):

<!DOCTYPE HTML PUBLIC "-//IETF//DTD HTML 2.0//EN"> 
<html><head> 
<title>418 unused</title> 
</head><body> 
<h1>unused</h1> 
<p>The server encountered an internal error or
misconfiguration and was unable to complete
your request.</p> 
<p>Please contact the server administrator,
 null and inform them of the time the error occurred,
and anything you might have done that may have
caused the error.</p> 
<p>More information about this error may be available
in the server error log.</p> 
</body></html> 

但如果您先访问https://new.aol.com/productsweb/,那么您将获得一张图片。清除cookie并再次出现错误(尽管Francois指出,您先没有响应,然后在刷新时获得图像。)

Indy支持cookie,因此您需要添加对cookie的支持,然后通过首先访问productsweb获取cookie,或者从已知值模拟它。

您会注意到生成的图像不是基于单独作为参数传递的数字,而是基于cookie。有两个不同的浏览器(Chrome和Firefox),每个浏览器都有不同的cookie,然后访问相同的验证码URL,您将获得两个不同的图像。

很好奇你要用这个来完成什么。

答案 1 :(得分:4)

我认为这意味着“走开”。不知何故,通过标题或您请求中的某些内容,它确定您闻起来像机器人。也许是因为你要求的图像知道它不仅仅适合你。 是的,那可能就是这样。如果我在浏览器中访问您的URL,我也会收到418。

答案 2 :(得分:2)

这不是你的代码。尝试在浏览器中.... (你显然需要从'h t t p s'中删除空白......)

在获取验证码图像之前,显然需要调用此URL https://new.aol.com/productsweb/。否则您会收到(不正确的)错误418 Unused 有时我必须尝试使用​​图像#两次,因为我第一次遇到420 Unused错误...

你最好问他们,因为他们的API看起来并不稳定......

RE:http 418的笑话。如果您希望通过A Web Developer and His Girlfriend(s)

上的http错误代码获得乐趣

答案 3 :(得分:0)

在我的旧项目中,我从网上获取了验证码。我使用嵌入式Web浏览器组件缓存程序完成此操作。因此,如果您可以从代码中读取临时Internet文件,则可以从缓存中读取图像数据。我在下面添加了一个简单的代码,

GetCachedFileFromURL和ClearAllEntries函数在TEmbeddedWebBrowser单元中声明。我只在我的解决方案中使用我复制的代码来降低exe的大小。但您可以使用组件更新源。该组件是开源的。

uses
 WinInet;

function GetCachedFileFromURL(strUL: string; var strLocalFile: string): Boolean;
var
   lpEntryInfo: PInternetCacheEntryInfo;
   hCacheDir: LongWord;
   dwEntrySize: LongWord;
   dwLastError: LongWord;
begin
   Result := False;
   dwEntrySize := 0;
  // Begin the enumeration of the Internet cache.
   FindFirstUrlCacheEntry(nil, TInternetCacheEntryInfo(nil^), dwEntrySize);
   GetMem(lpEntryInfo, dwEntrySize);
   hCacheDir := FindFirstUrlCacheEntry(nil, lpEntryInfo^, dwEntrySize);
   if (hCacheDir <> 0) and (strUL = lpEntryInfo^.lpszSourceUrlName) then
      begin
         strLocalFile := lpEntryInfo^.lpszLocalFileName;
         Result := True;
      end;
   FreeMem(lpEntryInfo);
   if Result = False then
      repeat
         dwEntrySize := 0;
      // Retrieves the next cache group in a cache group enumeration
         FindNextUrlCacheEntry(hCacheDir, TInternetCacheEntryInfo(nil^), dwEntrySize);
         dwLastError := GetLastError();
         if (GetLastError = ERROR_INSUFFICIENT_BUFFER) then
            begin
               GetMem(lpEntryInfo, dwEntrySize);
               if (FindNextUrlCacheEntry(hCacheDir, lpEntryInfo^, dwEntrySize)) then
                  begin
                     if strUL = lpEntryInfo^.lpszSourceUrlName then
                        begin
                           strLocalFile := lpEntryInfo^.lpszLocalFileName;
                           Result := True;
                           Break;
                        end;
                  end;
               FreeMem(lpEntryInfo);
            end;
      until (dwLastError = ERROR_NO_MORE_ITEMS);
end;

procedure TForm1.ClearCache();
begin
  SearchPattern := spAll;
  ClearAllEntries;
end;

使用

procedure TForm1.Button1Click(Sender: TObject);
var
 fname:string;
 jpImg:TJPEGImage;
begin
  ClearCache;
  try
   jpImg:=TJPEGImage.Create; 
   GetCachedFileFromURL('https://ebildirge.ssk.gov.tr/WPEB/PG',fname);
   jpImg.LoadFromFile(fname);
  finally
   FreeAndNil(jpgImg);
  end;
end;