PL / SQL中的HTML表解析器

时间:2017-03-07 09:37:31

标签: html xml plsql xml-parsing html-table

我需要创建HTML表格解析器,它将以正确的顺序读取表格单元格。

我到目前为止的代码:

html := '<body>
           <table border="1">
             <tr>
               <td><b>A1</b></td>
               <td><i>B1</i></td>
             </tr>
             <tr>
               <td><b>A2</b></td>
               <td><i>B2</i></td>
             </tr>
           </table>
         </body>';

FOR r IN (SELECT rownum rn, td FROM xmltable('*/table/tr' passing xmltype(html)
                                                         columns td xmltype path './td'))
LOOP
  FOR c IN (SELECT cell FROM xmltable('.' passing r.td
                                                  columns cell VARCHAR(200) path '.'))
  LOOP
    dbms_output.put_line('Row ' || r.rn || ': ' || c.cell);

  END LOOP;
END LOOP;

现在的结果是:

Row 1: A1B1
Row 2: A2B2

我需要的是:

Row 1: A1
Row 1: B1
Row 2: A2
Row 2: B2

我怎样才能实现这一目标?感谢您的回复。

4 个答案:

答案 0 :(得分:2)

一个足够的查询。

SELECT SEQNO,text FROM xmltable('//*/table/tr' passing xmltype('<body>
           <table border="1">
             <tr>
               <td><b>A1</b></td>
               <td><i>B1</i></td>
             </tr>
             <tr>
               <td><b>A2</b></td>
               <td><i>B2</i></td>
             </tr>
           </table>
         </body>')
 columns td xmltype path '.' , "SEQNO" FOR ORDINALITY)
,xmltable('//td' passing td columns text varchar2(100) path './*/text()');

答案 1 :(得分:1)

这可能会起到作用:)

    DECLARE
      html VARCHAR2(1000) := '<body>           
         <table border="1">             
           <tr>               
                <td><b>A1</b></td>               
                <td><i>B1</i></td>             
           </tr>             
           <tr>                 
                <td><b>A2</b></td>               
                <td><i>B2</i></td>             
           </tr>           
         </table>         
       </body>';
BEGIN
  FOR r IN
  (SELECT rownum rn,
    td
  FROM xmltable('*/table/tr' passing xmltype(html) columns td xmltype path '.')
  )
  LOOP
    FOR c IN
    (SELECT cell
    FROM xmltable('*/td/.' passing r.td columns cell VARCHAR(200) path '.')
    )
    LOOP
      dbms_output.put_line('Row ' || r.rn || ': ' || c.cell);
    END LOOP;
  END LOOP;
END;

输出: -

 Row 1: A1
 Row 1: B1
 Row 2: A2
 Row 2: B2

答案 2 :(得分:1)

假设HTML列是clob,您可以按以下方式执行:

declare
html clob:= '<body>
           <table border="1">
             <tr>
               <td><b>A1</b></td>
               <td><i>B1</i></td>
             </tr>
             <tr>
               <td><b>A2</b></td>
               <td><i>B2</i></td>
             </tr>
           </table>
         </body>';
begin
FOR r IN (SELECT rownum rn, td FROM xmltable('*/table/tr' passing xmltype(html)
                                                         columns td xmltype path './td'))
LOOP
  FOR c IN (SELECT cell FROM xmltable('./td'  passing r.td
                                                  columns cell VARCHAR(200) path '.'))
  LOOP
    dbms_output.put_line('Row ' || r.rn || ': ' || c.cell);

  END LOOP;
END LOOP;

end;

答案 3 :(得分:0)

enter image description here enter image description here
您还可以创建漂亮的基于 jquery 的响应式表(注意:报告已准备好 ORACLE 11G XE DUMMY DATA OF HR SCHEMA EMPLOYEE TABLE)

        DECLARE
               tbl_nm      VARCHAR2 (100) := 'MRKT';
               schema_nm   VARCHAR2 (100) := 'STDDATA_STAGE';
            
               CURSOR C1
               IS
                    SELECT column_name STR
                      FROM all_tab_cols
                     WHERE table_name = tbl_nm AND OWNER = schema_nm
                  ORDER BY COLUMN_ID;
            
            BEGIN
               DBMS_OUTPUT.put_line ('begin');
            
               DBMS_OUTPUT.put_line (
                  'DBMS_OUTPUT.put_line(''<!DOCTYPE html>
            <html lang="en" >
            
            <head>
            
            <script src="https://cpwebassets.codepen.io/assets/common/stopExecutionOnTimeout-157cd5b220a5c80d4ff8e0e70ac069bffd87a61252088146915e8726e5d9f147.js"></script> 
            <script src=''''https://cdnjs.cloudflare.com/ajax/libs/jquery/3.1.1/jquery.min.js''''> </script> 
            <script src=''''https://cdn.datatables.net/1.10.13/js/jquery.dataTables.min.js''''> </script> 
            <script src=''''https://cdn.datatables.net/buttons/1.2.4/js/buttons.html5.min.js''''> </script> 
            <script src=''''https://cdn.rawgit.com/bpampuch/pdfmake/0.1.18/build/vfs_fonts.js''''> </script> 
            <script src=''''https://cdnjs.cloudflare.com/ajax/libs/jszip/2.5.0/jszip.min.js''''></script>
            <script src=''''https://cdn.rawgit.com/bpampuch/pdfmake/0.1.18/build/pdfmake.min.js''''> </script>  
            <script src=''''https://cdn.datatables.net/buttons/1.2.4/js/dataTables.buttons.min.js''''> </script> 
            
            <meta charset="UTF-8">
            <link rel="apple-touch-icon" type="image/png" href="https://cpwebassets.codepen.io/assets/favicon/apple-touch-icon-5ae1a0698dcc2402e9712f7d01ed509a57814f994c660df9f7a952f3060705ee.png" />
            <meta name="apple-mobile-web-app-title" content="CodePen">
            
            <link rel="shortcut icon" type="image/x-icon" href="https://cpwebassets.codepen.io/assets/favicon/favicon-aec34940fbc1a6e787974dcd360f2c6b63348d4b1f4e06c77743096d55480f33.ico" />
            
            <link rel="mask-icon" type="" href="https://cpwebassets.codepen.io/assets/favicon/logo-pin-8f3771b1072e3c38bd662872f6b673a722f4b3ca2421637d5596661b4e2132cc.svg" color="#111" />
            
            
              <title>CodePen - HTML Table to Excel, CSV and PDF</title>
              
              
              <link rel=''''stylesheet'''' href=''''https://cdn.datatables.net/1.10.13/css/jquery.dataTables.min.css''''>
            <link rel=''''stylesheet'''' href=''''https://cdn.datatables.net/buttons/1.2.4/css/buttons.dataTables.min.css''''>
              
            <style>
            //https://datatables.net/extensions/buttons/examples/html5/simple.html
            </style>
            
              <script>
              window.console = window.console || function(t) {};
            </script>
            
              
              
              <script>
              if (document.location.search.match(/type=embed/gi)) {
                window.parent.postMessage("resize", "*");
              }
            </script>
            
            
            </head>
            
            <body translate="no" >
              <table id="example" class="display" cellspacing="0" width="100%">
                    <thead>          
                      <tr>'');');
            
            
               FOR i IN C1
               LOOP
                  DBMS_OUTPUT.put_line (
                        'DBMS_OUTPUT.put_line(''<th>''||'
                     || ''''
                     || I.STR
                     || ''''
                     || '||''</th>'');');
               END LOOP;
            
               DBMS_OUTPUT.put_line ('DBMS_OUTPUT.put_line(''  </tr>
                    </thead><tbody>'');');
            
            
            
               DBMS_OUTPUT.put_line ('for i in (select *from ' || tbl_nm || ')loop
               ');
               DBMS_OUTPUT.put_line ('DBMS_OUTPUT.put_line(''</tr>
               '');');
            
               FOR i IN C1
               LOOP
                  DBMS_OUTPUT.put_line (
                        'DBMS_OUTPUT.put_line(''<td>''||'
                     || 'i.'
                     || I.STR
                     || '||''</td>'');');
               END LOOP;
            
               DBMS_OUTPUT.put_line ('DBMS_OUTPUT.put_line(''</tr>'');');
               DBMS_OUTPUT.put_line ('end loop;');
            
               DBMS_OUTPUT.put_line (
                  'DBMS_OUTPUT.put_line(''  </tbody>
                </table>
              
                  <script id="rendered-js" >
            $(document).ready(function () {
              $(''''#example'''').DataTable({
                dom: ''''Bfrtip'''',
                buttons: [
                ''''copyHtml5'''',
                {
                  extend: ''''excelHtml5'''',
                  title: ''''Project Report - '''' + new Date().toJSON().slice(0, 10).replace(/-/g, ''''-'''') },
            
                ''''csvHtml5'''',
                ''''pdfHtml5''''] });
            
            
            });
            //# sourceURL=pen.js
                </script>
            
              
            
            </body>
            
            </html>'');');
               DBMS_OUTPUT.put_line ('end;');
            END;
    
    
      [1]: https://i.stack.imgur.com/Zt84f.png