我想从网页上获取信息,我需要列出html文档中的所有列和行
我可以通过ID访问html元素,但是如何访问所有元素 html代码中的行和列,以获取每个单元格的HTML代码?
function GetElementById(const Doc: IDispatch; const Id: string;memo:tmemo): IDispatch;
var
Document: IHTMLDocument2; // IHTMLDocument2 interface of Doc
Body: IHTMLElement2; // document body element
Tags: IHTMLElementCollection; // all tags in document body
Tag: IHTMLElement; // a tag in document body
I: Integer; // loops thru tags in document body
begin
Result := nil;
// Check for valid document: require IHTMLDocument2 interface to it
if not Supports(Doc, IHTMLDocument2, Document) then
raise Exception.Create('Invalid HTML document');
// Check for valid body element: require IHTMLElement2 interface to it
if not Supports(Document.body, IHTMLElement2, Body) then
raise Exception.Create('Can''t find <body> element');
// Get all tags in body element ('*' => any tag name)
Tags := Body.getElementsByTagName('*');
// Scan through all tags in body
for I := 0 to Pred(Tags.length) do
begin
// Get reference to a tag
Tag := Tags.item(I, EmptyParam) as IHTMLElement;
// Check tag's id and return it if id matches
if AnsiSameText(Tag.id, Id) then
begin
memo.lines.add(Tag.id);
memo.Lines.Add(tag.outerHTML);
Result := Tag;
Break;
end;
end;
end;