Delphi,将HTML表导出到Excel

时间:2014-01-28 20:04:36

标签: html excel delphi

我想要做的就是实现经典webbrowser的“Export to excel”选项,Delphi2007命令......当我从webbrowser使用此选项导出12000行表时,它需要的时间少于一分钟从Windows的任何Web浏览器导出表。尝试使用2D数组在Delphi中实现这一点需要10分钟...尝试使用解析技术(Stringlists,字符串,Pos(tr),pos(td)和其他一些字符串函数)实现导出需要很长时间。 ..因此,哪些是webbrowser将html表导出为ex​​cel的命令,我必须将它们转换为Delphi?我应该在Delphi中使用javascript吗?我应该使用指针吗?我应该使用HTML实体吗? xml?...有什么想法吗?提前谢谢。

2D阵列

Excel:= CreateOleObject('Excel.Application'); 
ovTable := WebBrowser1.OleObject.Document.all.tags('TABLE').item(0);
arrayn:=VarArrayCreate([1, ovTable.Rows.Length, 1, ovTable.Rows.Item(1).Cells.Length],         varvariant);
for i:=0 to (ovTable.Rows.Length - 1) do  
begin
for j := 0 to (ovTable.Rows.Item(i).Cells.Length - 1) do
Begin
arrayn[i+1, j+1]:=ovTable.Rows.Item(i).Cells.Item(j).InnerText;
Application.ProcessMessages;
end;end;
WS.range[ws.cells[1, 1], ws.cells[ovTable.Rows.Length,     ovTable.Rows.Item(1).Cells.Length]].value:=arrayn;
Excel.WorkBooks[1].SaveAs(directorylistbox1.Directory+'\'+'test.xlsx');
WS := Excel.WorkBooks.close;
Excel.quit;
Excel:=unassigned;

HTML PARSING

function HTMLCleanUp(L : string) : string;
const
CSVTempSeparator = #255; //replaced by a comma
CRLF = #13#10;
var
P1,P2 : integer;
begin    
P1 := Pos('<',L); //clean-up anything between <>
while (P1>0) do    //WHILE1
begin
P2 := Pos('>',L);
if (P2>0)
then Begin Delete(L,P1,P2-P1+1); end;
P1 := Pos('<',L);
end;               //WHILE1
L:=StringReplace(L,'&nbsp;','-',[rfReplaceAll]);
L:=StringReplace(L,'-01','',[rfReplaceAll]);
L:=StringReplace(L,'-02','',[rfReplaceAll]);
L:=StringReplace(L,'-03','',[rfReplaceAll]);
Result := Trim(L);
end;

function HTMLTableToCSV(HTML,CSV : TStringList) : boolean;
const
CRLF = #13#10;
CSVTempSeparator = #9; 
var
P1,P2,P3,P4, p5, P6, p11, p22 : integer;
S,TmpStr,CSVStr : string;
begin
Result := True;
S := Trim(StringReplace(HTML.Text,CRLF,'',[rfReplaceAll]));
P1 := PosEx('<TR',S, 1);    //CASE SENSITIVE , TR->FIRST ROW
CSVStr := '';
while (P1>0) do     //while1
begin
P2 := PosEx('</TR',S, P1);
      if (P2>0)      //if1
      then begin
      TmpStr := Copy(S,P1,P2-P1+1);
      //Delete(S,P1,P2-P1+1);
      CSVStr := ''; p11:=1;p22:=1;
      P11 := PosEx('<TH',TmpStr,1);
            while (P11>0) do   //while2
            begin
            P22 := PosEx('</TH',TmpStr, P11);
                   if (P22>0)  //if2
                   then begin
                   CSVStr :=
                   //CSVStr+Trim(Copy(TmpStr,P1+4,P2-P1-4));//+CSVTempSeparator;
                   CSVStr+Trim(Copy(TmpStr,P11,P22-P11))+CSVTempSeparator;
                   //Delete(TmpStr,P1,P2-P1+1);
                   end        //if2
                   else begin
                   Result := False;
                   Exit;
                   end;       //if2
            P11 := PoseX('<TH',TmpStr, P22);
            end;              //while2
       P11 := PosEx('<TD',TmpStr, 1);
            while (P11>0) do   //while2
            begin
            P22 := PosEx('</TD',TmpStr, P11);
                   if (P22>0)  //if2
                   then begin
                   CSVStr :=
                   //CSVStr+Trim(Copy(TmpStr,P1+4,P2-P1-4));//+CSVTempSeparator;
                   CSVStr+Trim(Copy(TmpStr,P11,P22-P11))+CSVTempSeparator;
                   //Delete(TmpStr,P1,P2-P1+1);
                   end        //if2
                   else begin
                   Result := False;
                   Exit;
                   end;       //if2
             P11 := PosEx('<TD',TmpStr,P22);
            end;              //while2
      end            //if1
      else begin
      Result:=false;
      exit;
      end;            //if1
CSV.Add(HTMLCleanUp(CSVStr));
P1 := PosEx('<TR',S,P2);    //CASE SENSITIVE
end;      //while1
end;

procedure TForm11.Button1Click(Sender: TObject);
const
xlExcel7 = $00000027;
TmpFileName='c:\test\Test.txt';
VAR
Excel: Olevariant;
HTMLStrList,CSVSTRList : TStringList;
begin
HTMLStrList := TStringList.Create;
try
HTMLStrList.LoadFromFile('C:\test\TestTable1.htm');
CSVSTRList := TStringList.Create;
try
if HTMLTableToCSV(HTMLStrList,CSVSTRList)
then Begin 
CSVSTRList.SaveToFile(TmpFileName);
Excel:= CreateOleObject('Excel.Application');
Excel.WorkBooks.opentext(TmpFileName);//OPEN TXT WITH EXCEL
Excel.DisplayAlerts := False;
Excel.WorkBooks[1].SaveAs('c:\test\Nisa.xls', xlExcel7);//SAVE TAB DELIMITED TEXT FILE
Excel.WorkBooks[1].close;
Excel.quit;
Excel:=unassigned;
End
else ShowMessage('Error converting HTML table to CSV');
finally
CSVSTRList.Free;
end;
finally
HTMLStrList.Free;
DeleteFile(TmpFileName);
end;
end;


procedure TForm11.FormCreate(Sender: TObject);
begin
webBrowser1.Navigate('http://samples.msdn.microsoft.com/workshop/samples/author/tables/HTML_    Table.htm');
end;

procedure TForm11.WebBrowser1DocumentComplete(ASender: TObject;
  const pDisp: IDispatch; var URL: OleVariant);
var
Document: IHtmlDocument2;
CurWebrowser : IWebBrowser;
TopWebBrowser: IWebBrowser;
WindowName   : string;

begin
CurWebrowser := pDisp as IWebBrowser;
TopWebBrowser := (ASender as TWebBrowser).DefaultInterface;
if CurWebrowser=TopWebBrowser then
 begin
document := webbrowser1.document as IHtmlDocument2;
memo3.lines.add(trim(document.body.innerhtml));  // to get html
ShowMessage('Document is complete.')
 end;
end;

端。

1 个答案:

答案 0 :(得分:0)

我找到了解决方案...... HTML表解析不到一秒钟!

function HTMLCleanUp(L : string) : string;
var
P1,P2 : integer;
begin
P1 := Pos('<',L); //clean-up anything between <>
while (P1>0) do    //WHILE1
begin
P2 := Pos('>',L);
if (P2>0)
then Begin Delete(L,P1,P2-P1+1); end;
P1 := Pos('<',L);
end;               //WHILE1
L:=StringReplace(L,'&nbsp;','-',[rfReplaceAll]);
Result := Trim(L);
end;

 procedure TForm11.WB_SaveAs_HTML(WB : TWebBrowser; const FileName : string) ;
 var
   PersistStream: IPersistStreamInit;
   Stream: IStream;
   FileStream: TFileStream;
 begin
   if not Assigned(WB.Document) then
   begin
     ShowMessage('Document not loaded!') ;
     Exit;
   end;

   PersistStream := WB.Document as IPersistStreamInit;
   FileStream := TFileStream.Create(FileName, fmCreate) ;
   try
     Stream := TStreamAdapter.Create(FileStream, soReference) as IStream;
     if Failed(PersistStream.Save(Stream, True)) then ShowMessage('SaveAs HTML fail!') ;
   finally
     FileStream.Free;
   end;
 end; (* WB_SaveAs_HTML *)

procedure TForm11.Button1Click(Sender: TObject);
const
xlExcel7 = $00000027;
TmpFileName='c:\test\xxxx.txt';
CRLF = #13#10;
CSVTempSeparator = #9;   //#255; //replaced by a comma
ADPNEWHOTURL = 'http://samples.msdn.microsoft.com/workshop/samples/author/tables/HTML_Table.htm';

VAR
Excel, WS: Olevariant;
P1,P2,P3,P4, p5, P6, p11, p22 : integer;
i, j: Integer;
buffer,rawHTM,TmpStr,CSVStr:string;
HTMFile : TextFile;
CSVSTRList : TStringList;

begin
CSVSTRList := TStringList.Create;

WB_SaveAs_HTML(WebBrowser1,TmpFileName) ;

AssignFile(HTMFile, TmpFileName);//read the HTML file
     Reset(HTMFile);
        while not EOF(HTMFile) do begin
        ReadLn(HTMFile, buffer);
        rawHTM := Concat(rawHTM, buffer);
      end;

i:=1;j:=1;
rawHTM := Trim(StringReplace(rawHTM,CRLF,'',[rfReplaceAll]));
P1 := PosEx('<TR',rawHTM, 1);   //CASE SENSITIVE , TR->FIRST ROW
while (P1>0) do     //while1
begin
P2 := PosEx('</TR',rawHTM, P1);
      if (P2>0)      //if1
      then begin
      TmpStr := Copy(rawHTM,P1,P2-P1+1);
      CSVStr := '';p11:=1;p22:=1;
      P11 := PosEx('<TH',TmpStr,1);
            while (P11>0) do   //while2
            begin
            P22 := PosEx('</TH',TmpStr, P11);
                   if (P22>0)  //if2
                   then begin
                   CSVStr :=CSVStr+
                   HTMLCleanUp(Trim(Copy(TmpStr,P11,P22-P11)))+CSVTempSeparator; j:=j+1;
                   end        //if2
                   else begin
                   Exit;
                   end;       //if2
            P11 := PoseX('<TH',TmpStr, P22);
            end;              //while2
       P11 := PosEx('<TD',TmpStr, 1);
            while (P11>0) do   //while2
            begin
            P22 := PosEx('</TD',TmpStr, P11);
                   if (P22>0)  //if2
                   then begin
                   CSVStr :=CSVStr+
                   HTMLCleanUp(Trim(Copy(TmpStr,P11,P22-P11)))+CSVTempSeparator; j:=j+1;
                   end        //if2
                   else begin
                   Exit;
                   end;       //if2
             P11 := PosEx('<TD',TmpStr,P22);
            end;              //while2
      end            //if1
      else begin
      exit;
      end;            //if1
      CSVSTRList.Add(CSVStr);
P1 := PosEx('<TR',rawHTM,P2); i:=i+1; j:=1;  //CASE SENSITIVE
end;      //while1

CSVSTRList.SaveToFile('c:\test\xxx2.txt');
Excel:= CreateOleObject('Excel.Application');
Excel.WorkBooks.opentext('c:\test\xxx2.txt');//OPEN TXT WITH EXCEL
Excel.visible := True;
CloseFile(HTMFile);
DeleteFile(TmpFileName);
end;