有一段时间我正试图从这个html表中获取数据,我尝试了付费和免费的组件。我试着做一些编码,也没有结果。我有一个类直接为ClientDataSet抛出html表,但是使用这个表它不起作用。任何人都有关于如何获取此html表中的数据的任何提示?或者将它转换为txt / xls / csv或xml的方法?遵循表格的代码:
WebBrowser1.Navigate('http://site2.aesa.pb.gov.br/aesa/monitoramentoPluviometria.do?metodo=listarMesesChuvasMensais');
WebBrowser1.OleObject.Document.All.Tags('select').Item(0).Value:= '2013';
WebBrowser1.OleObject.Document.All.Tags('select').Item(1).Value:= '7';
WebBrowser1.OleObject.Document.All.Tags('input').Item(1).click;
Memo1.Text:= WebBrowser1.OleObject.Document.All.Tags('table').Item(10).InnerHTML;
Memo1.Lines.SaveToFile('table.html');
答案 0 :(得分:4)
以下内容将从目标页面上的HTML表格中提取数据 并将其加载到ClientDataSet中。
它相当啰嗦,也许正如大卫所说,德尔福 可能不是这项工作的最佳工具。
在我的Form1上,我有一个TEdit,edValue,让我在第一个键入值 HTML表数据中的数据行。我用它作为一种方法来找到表中的 HTML文档。我敢说有更好的方法,但至少我的方法应该比关于嵌入表格的文档布局的硬编码假设更强大,这些假设可能会在页面的变化中存活下来。作者。
从广义上讲,代码的工作原理是首先使用内容来查找HTML表格单元格 我的edValue.Text,然后找到该单元所属的表,然后 从表中填充CDS的字段和数据。
默认情况下,CDS字段设置为255个字符;也许有一个规格 在网页上发布的数据,允许您对某些(如果不是全部)字段使用较小的值。他们都被认为是ftString类型,以避免代码窒息意外的单元格内容。
顺便说一句,在底部是一个实用程序功能,用于在本地保存HTML页面 保存必须单击按钮选择年+月。要重新加载 来自保存文件的WebBrowser,只需使用文件名作为要加载的URL。
TForm1 = class(TForm)
[ ... ]
public
{ Public declarations }
Doc : IHtmlDocument2;
procedure TForm1.btnFindValueClick(Sender: TObject);
var
Table : IHTMLTable;
begin
Doc := WebBrowser1.Document as IHTMLDocument2;
Table := FindTableByCellValue(edValue.Text);
Assert(Table <> Nil);
LoadCDSFromHTMLTable(CDS, Table);
end;
procedure TForm1.LoadCDSFromHTMLTable(DestCDS : TClientDataSet; Table : IHTMLTable);
var
I,
J : Integer;
vTable : OleVariant;
iRow : IHTMLTableRow;
FieldName,
FieldValue : String;
Field : TField;
const
cMaxFieldSize = 255;
scIDFieldName = 'ID';
begin
// Use OleVariant instead of IHTMLTable becuse it's less fiddly for doing what follows
vTable := Table;
Assert(not DestCDS.Active and (DestCDS.FieldCount = 0));
// First create an AutoInc field
Field := TAutoIncField.Create(Self);
Field.FieldName := scIDFieldName;
Field.DataSet := DestCDS;
// Next create CDS fields from the names in the cells in the first row of the table
for I := 0 to (vTable.Rows.Item(0).Cells.Length - 1) do begin
FieldName := vTable.Rows.Item(0).Cells.Item(I).InnerText;
Field := TStringField.Create(Self);
// At this point, we might want to clean up the FieldName by removing embedded spaces, etc
Field.FieldName := FieldName;
Field.Size := cMaxFieldSize;
Field.DataSet := DestCDS;
end;
DestCDS.DisableControls;
try
DestCDS.IndexFieldNames := scIDFieldName;
DestCDS.CreateDataSet;
// Next load the HTML table data into the CDS
for I := 1 to (vTable.Rows.Length - 1) do begin
DestCDS.Insert;
for J := 0 to vTable.Rows.Item(0).Cells.Length - 1 do begin
FieldValue := vTable.Rows.Item(I).Cells.Item(J).InnerText;
// the J + 1 is because Fields[0] is the autoinc one
DestCDS.Fields[J + 1].AsString := FieldValue;
end;
DestCDS.Post;
end;
DestCDS.First;
finally
DestCDS.EnableControls;
end;
end;
function TForm1.FindTableCellByTagValue(Doc : IHtmlDocument2; const AValue : String) : IHTMLTableCell;
var
All: IHTMLElementCollection;
Value: String;
I,
Len: Integer;
E: OleVariant;
iE : IHTMLElement;
iT : IHTMLTextElement;
iC : IHTMLTableCell;
begin
Result := Nil;
All := Doc.All;
if All = Nil then Exit;
Len := All.Length;
for I := 0 to Len - 1 do begin
E := All.Item(I, varEmpty);
iE := IDispatch(E) as IHTMLElement;
if Supports(iE, IHTMLTableCell, iC) then begin
Value := Trim(iE.Get_InnerText);
if Pos(Trim(AValue), Value) = 1 then begin
Result := iC;
Break;
end
end
else
Continue;
end;
end;
function TForm1.FindTableByCellValue(Value : String): IHTMLTable;
var
Node : IHtmlElement;
iTable : IHTMLTable;
iCell : IHTMLTableCell;
begin
Result := Nil;
iCell := FindTableCellByTagValue(Doc, edValue.Text);
if iCell = Nil then
Exit;
Node := IDispatch(iCell) as IHtmlElement;
// if we found a Node with the cell text we were looking for,
// we can now find the HTML table to which it belongs
while Node <> Nil do begin
Node := Node.parentElement;
if Supports(Node, IHTMLTable, iTable) then begin
Result := iTable;
Break;
end;
end;
end;
procedure TForm1.SaveFileLocally(const FileName : String);
var
PFile: IPersistFile; // declared in ActiveX unit
begin
PFile := Doc as IPersistFile;
PFile.Save(StringToOleStr(FileName), False);
end;
答案 1 :(得分:1)
unit Unit1;
interface
uses
Winapi.Windows, Winapi.Messages, System.SysUtils, System.Variants, System.Classes, Vcl.Graphics,
Vcl.Controls, Vcl.Forms, Vcl.Dialogs, Vcl.OleCtrls, SHDocVw, Vcl.StdCtrls,
Vcl.Grids, Vcl.DBGrids, Data.DB, Datasnap.DBClient;
type
TForm1 = class(TForm)
WebBrowser1: TWebBrowser;
DBGrid1: TDBGrid;
StringGrid1: TStringGrid;
Button1: TButton;
Button2: TButton;
ClientDataSet1: TClientDataSet;
DataSource1: TDataSource;
ClientDataSet1MunicípioPosto: TStringField;
ClientDataSet1TotalMensalmm: TStringField;
ClientDataSet1ClimatologiaMensalmm: TStringField;
ClientDataSet1Desviomm: TStringField;
ClientDataSet1Desvio: TStringField;
ClientDataSet1id: TAutoIncField;
procedure FormCreate(Sender: TObject);
procedure Button1Click(Sender: TObject);
procedure Button2Click(Sender: TObject);
private
{ Private declarations }
public
{ Public declarations }
end;
var
Form1: TForm1;
implementation
{$R *.dfm}
procedure TForm1.Button1Click(Sender: TObject);
var
irow, jcol: Integer;
ovTable: OleVariant;
begin
ovTable := WebBrowser1.OleObject.Document.all.tags('table').item(11);
ShowMessage('Number of Rows: '+IntToStr(ovTable.Rows.Length));
ShowMessage('Number of Cols: '+IntToStr(ovTable.Rows.Item(0).Cells.Length));
StringGrid1.RowCount:= ovTable.Rows.Length+1;
StringGrid1.ColCount:= ovTable.Rows.Item(0).Cells.Length+1;
for irow := 0 to (ovTable.Rows.Length - 1) do
begin
for jcol := 0 to (ovTable.Rows.Item(irow).Cells.Length - 1) do
begin
StringGrid1.Cells[jcol+1, irow+1] := ovTable.Rows.Item(irow).Cells.Item(jcol).InnerText;
end;
end;
end;
procedure TForm1.Button2Click(Sender: TObject);
var
iRow : Integer;
iCol : Integer;
ovTable: OleVariant;
begin
ovTable := WebBrowser1.OleObject.Document.all.tags('table').item(11);
for iRow := 1 to (ovTable.Rows.Length - 1) do
begin
ClientDataSet1.Open;
ClientDataSet1.insert;
for iCol := 0 to (ovTable.Rows.Item(iRow).Cells.Length - 1) do
begin
ClientDataSet1.FieldByname('Município/Posto').AsString:=ovTable.Rows.Item(iRow).Cells.Item(0).InnerText;
ClientDataSet1.FieldByname('Total Mensal (mm)').AsString:=ovTable.Rows.Item(iRow).Cells.Item(1).InnerText;
ClientDataSet1.FieldByname('Climatologia Mensal (mm)').AsString:=ovTable.Rows.Item(iRow).Cells.Item(2).InnerText;
ClientDataSet1.FieldByname('Desvio (mm)').AsString:=ovTable.Rows.Item(iRow).Cells.Item(3).InnerText;
ClientDataSet1.FieldByname('Desvio (%)').AsString:=ovTable.Rows.Item(iRow).Cells.Item(4).InnerText;
end;
ClientDataSet1.Post;
ClientDataSet1.IndexFieldNames:= 'id';
ClientDataSet1.First;
end;
end;
procedure TForm1.FormCreate(Sender: TObject);
begin
WebBrowser1.Navigate('C:\htmlwiththetable.html');
end;
end