我想要做的就是实现经典webbrowser的“Export to excel”选项,Delphi2007命令......当我从webbrowser使用此选项导出12000行表时,它需要的时间少于一分钟从Windows的任何Web浏览器导出表。尝试使用2D数组在Delphi中实现这一点需要10分钟...尝试使用解析技术(Stringlists,字符串,Pos(tr),pos(td)和其他一些字符串函数)实现导出需要很长时间。 ..因此,哪些是webbrowser将html表导出为excel的命令,我必须将它们转换为Delphi?我应该在Delphi中使用javascript吗?我应该使用指针吗?我应该使用HTML实体吗? xml?...有什么想法吗?提前谢谢。
2D阵列
Excel:= CreateOleObject('Excel.Application');
ovTable := WebBrowser1.OleObject.Document.all.tags('TABLE').item(0);
arrayn:=VarArrayCreate([1, ovTable.Rows.Length, 1, ovTable.Rows.Item(1).Cells.Length], varvariant);
for i:=0 to (ovTable.Rows.Length - 1) do
begin
for j := 0 to (ovTable.Rows.Item(i).Cells.Length - 1) do
Begin
arrayn[i+1, j+1]:=ovTable.Rows.Item(i).Cells.Item(j).InnerText;
Application.ProcessMessages;
end;end;
WS.range[ws.cells[1, 1], ws.cells[ovTable.Rows.Length, ovTable.Rows.Item(1).Cells.Length]].value:=arrayn;
Excel.WorkBooks[1].SaveAs(directorylistbox1.Directory+'\'+'test.xlsx');
WS := Excel.WorkBooks.close;
Excel.quit;
Excel:=unassigned;
HTML PARSING
function HTMLCleanUp(L : string) : string;
const
CSVTempSeparator = #255; //replaced by a comma
CRLF = #13#10;
var
P1,P2 : integer;
begin
P1 := Pos('<',L); //clean-up anything between <>
while (P1>0) do //WHILE1
begin
P2 := Pos('>',L);
if (P2>0)
then Begin Delete(L,P1,P2-P1+1); end;
P1 := Pos('<',L);
end; //WHILE1
L:=StringReplace(L,' ','-',[rfReplaceAll]);
L:=StringReplace(L,'-01','',[rfReplaceAll]);
L:=StringReplace(L,'-02','',[rfReplaceAll]);
L:=StringReplace(L,'-03','',[rfReplaceAll]);
Result := Trim(L);
end;
function HTMLTableToCSV(HTML,CSV : TStringList) : boolean;
const
CRLF = #13#10;
CSVTempSeparator = #9;
var
P1,P2,P3,P4, p5, P6, p11, p22 : integer;
S,TmpStr,CSVStr : string;
begin
Result := True;
S := Trim(StringReplace(HTML.Text,CRLF,'',[rfReplaceAll]));
P1 := PosEx('<TR',S, 1); //CASE SENSITIVE , TR->FIRST ROW
CSVStr := '';
while (P1>0) do //while1
begin
P2 := PosEx('</TR',S, P1);
if (P2>0) //if1
then begin
TmpStr := Copy(S,P1,P2-P1+1);
//Delete(S,P1,P2-P1+1);
CSVStr := ''; p11:=1;p22:=1;
P11 := PosEx('<TH',TmpStr,1);
while (P11>0) do //while2
begin
P22 := PosEx('</TH',TmpStr, P11);
if (P22>0) //if2
then begin
CSVStr :=
//CSVStr+Trim(Copy(TmpStr,P1+4,P2-P1-4));//+CSVTempSeparator;
CSVStr+Trim(Copy(TmpStr,P11,P22-P11))+CSVTempSeparator;
//Delete(TmpStr,P1,P2-P1+1);
end //if2
else begin
Result := False;
Exit;
end; //if2
P11 := PoseX('<TH',TmpStr, P22);
end; //while2
P11 := PosEx('<TD',TmpStr, 1);
while (P11>0) do //while2
begin
P22 := PosEx('</TD',TmpStr, P11);
if (P22>0) //if2
then begin
CSVStr :=
//CSVStr+Trim(Copy(TmpStr,P1+4,P2-P1-4));//+CSVTempSeparator;
CSVStr+Trim(Copy(TmpStr,P11,P22-P11))+CSVTempSeparator;
//Delete(TmpStr,P1,P2-P1+1);
end //if2
else begin
Result := False;
Exit;
end; //if2
P11 := PosEx('<TD',TmpStr,P22);
end; //while2
end //if1
else begin
Result:=false;
exit;
end; //if1
CSV.Add(HTMLCleanUp(CSVStr));
P1 := PosEx('<TR',S,P2); //CASE SENSITIVE
end; //while1
end;
procedure TForm11.Button1Click(Sender: TObject);
const
xlExcel7 = $00000027;
TmpFileName='c:\test\Test.txt';
VAR
Excel: Olevariant;
HTMLStrList,CSVSTRList : TStringList;
begin
HTMLStrList := TStringList.Create;
try
HTMLStrList.LoadFromFile('C:\test\TestTable1.htm');
CSVSTRList := TStringList.Create;
try
if HTMLTableToCSV(HTMLStrList,CSVSTRList)
then Begin
CSVSTRList.SaveToFile(TmpFileName);
Excel:= CreateOleObject('Excel.Application');
Excel.WorkBooks.opentext(TmpFileName);//OPEN TXT WITH EXCEL
Excel.DisplayAlerts := False;
Excel.WorkBooks[1].SaveAs('c:\test\Nisa.xls', xlExcel7);//SAVE TAB DELIMITED TEXT FILE
Excel.WorkBooks[1].close;
Excel.quit;
Excel:=unassigned;
End
else ShowMessage('Error converting HTML table to CSV');
finally
CSVSTRList.Free;
end;
finally
HTMLStrList.Free;
DeleteFile(TmpFileName);
end;
end;
procedure TForm11.FormCreate(Sender: TObject);
begin
webBrowser1.Navigate('http://samples.msdn.microsoft.com/workshop/samples/author/tables/HTML_ Table.htm');
end;
procedure TForm11.WebBrowser1DocumentComplete(ASender: TObject;
const pDisp: IDispatch; var URL: OleVariant);
var
Document: IHtmlDocument2;
CurWebrowser : IWebBrowser;
TopWebBrowser: IWebBrowser;
WindowName : string;
begin
CurWebrowser := pDisp as IWebBrowser;
TopWebBrowser := (ASender as TWebBrowser).DefaultInterface;
if CurWebrowser=TopWebBrowser then
begin
document := webbrowser1.document as IHtmlDocument2;
memo3.lines.add(trim(document.body.innerhtml)); // to get html
ShowMessage('Document is complete.')
end;
end;
端。
答案 0 :(得分:0)
我找到了解决方案...... HTML表解析不到一秒钟!
function HTMLCleanUp(L : string) : string;
var
P1,P2 : integer;
begin
P1 := Pos('<',L); //clean-up anything between <>
while (P1>0) do //WHILE1
begin
P2 := Pos('>',L);
if (P2>0)
then Begin Delete(L,P1,P2-P1+1); end;
P1 := Pos('<',L);
end; //WHILE1
L:=StringReplace(L,' ','-',[rfReplaceAll]);
Result := Trim(L);
end;
procedure TForm11.WB_SaveAs_HTML(WB : TWebBrowser; const FileName : string) ;
var
PersistStream: IPersistStreamInit;
Stream: IStream;
FileStream: TFileStream;
begin
if not Assigned(WB.Document) then
begin
ShowMessage('Document not loaded!') ;
Exit;
end;
PersistStream := WB.Document as IPersistStreamInit;
FileStream := TFileStream.Create(FileName, fmCreate) ;
try
Stream := TStreamAdapter.Create(FileStream, soReference) as IStream;
if Failed(PersistStream.Save(Stream, True)) then ShowMessage('SaveAs HTML fail!') ;
finally
FileStream.Free;
end;
end; (* WB_SaveAs_HTML *)
procedure TForm11.Button1Click(Sender: TObject);
const
xlExcel7 = $00000027;
TmpFileName='c:\test\xxxx.txt';
CRLF = #13#10;
CSVTempSeparator = #9; //#255; //replaced by a comma
ADPNEWHOTURL = 'http://samples.msdn.microsoft.com/workshop/samples/author/tables/HTML_Table.htm';
VAR
Excel, WS: Olevariant;
P1,P2,P3,P4, p5, P6, p11, p22 : integer;
i, j: Integer;
buffer,rawHTM,TmpStr,CSVStr:string;
HTMFile : TextFile;
CSVSTRList : TStringList;
begin
CSVSTRList := TStringList.Create;
WB_SaveAs_HTML(WebBrowser1,TmpFileName) ;
AssignFile(HTMFile, TmpFileName);//read the HTML file
Reset(HTMFile);
while not EOF(HTMFile) do begin
ReadLn(HTMFile, buffer);
rawHTM := Concat(rawHTM, buffer);
end;
i:=1;j:=1;
rawHTM := Trim(StringReplace(rawHTM,CRLF,'',[rfReplaceAll]));
P1 := PosEx('<TR',rawHTM, 1); //CASE SENSITIVE , TR->FIRST ROW
while (P1>0) do //while1
begin
P2 := PosEx('</TR',rawHTM, P1);
if (P2>0) //if1
then begin
TmpStr := Copy(rawHTM,P1,P2-P1+1);
CSVStr := '';p11:=1;p22:=1;
P11 := PosEx('<TH',TmpStr,1);
while (P11>0) do //while2
begin
P22 := PosEx('</TH',TmpStr, P11);
if (P22>0) //if2
then begin
CSVStr :=CSVStr+
HTMLCleanUp(Trim(Copy(TmpStr,P11,P22-P11)))+CSVTempSeparator; j:=j+1;
end //if2
else begin
Exit;
end; //if2
P11 := PoseX('<TH',TmpStr, P22);
end; //while2
P11 := PosEx('<TD',TmpStr, 1);
while (P11>0) do //while2
begin
P22 := PosEx('</TD',TmpStr, P11);
if (P22>0) //if2
then begin
CSVStr :=CSVStr+
HTMLCleanUp(Trim(Copy(TmpStr,P11,P22-P11)))+CSVTempSeparator; j:=j+1;
end //if2
else begin
Exit;
end; //if2
P11 := PosEx('<TD',TmpStr,P22);
end; //while2
end //if1
else begin
exit;
end; //if1
CSVSTRList.Add(CSVStr);
P1 := PosEx('<TR',rawHTM,P2); i:=i+1; j:=1; //CASE SENSITIVE
end; //while1
CSVSTRList.SaveToFile('c:\test\xxx2.txt');
Excel:= CreateOleObject('Excel.Application');
Excel.WorkBooks.opentext('c:\test\xxx2.txt');//OPEN TXT WITH EXCEL
Excel.visible := True;
CloseFile(HTMFile);
DeleteFile(TmpFileName);
end;