解析HTML文件并将值作为php变量返回

时间:2012-05-28 02:22:51

标签: php javascript html ajax

我的公司网站上有这个HTML代码。由于我无权访问数据库,因此我想通过HTML文件进行解析并返回值。代码是这样的:

<?php
$string = '
<p> <b>HEADER INFO</b>
<table width=100% cellspacing=0>
  <tr align=left>
    <td width=2% colspan=1><font face=\"verdana, arial, helvetica\" size=\"-2\">&nbsp;&nbsp;</font></td>
    <td valign=top colspan=1><font face=\"verdana, arial, helvetica\" size=\"-2\"><b>View Object:</b> 6600422</font></td>
    <td valign=top colspan=1><font face=\"verdana, arial, helvetica\" size=\"-2\"><b>BPO:</b> G37147359-000000</font></td>
    <td valign=top colspan=1><font face=\"verdana, arial, helvetica\" size=\"-2\"><b>Ack Date:</b> 2012-05-28</font></td>
  </tr>
  <tr align=left>
    <td width=2% colspan=1><font face=\"verdana, arial, helvetica\" size=\"-2\">&nbsp;&nbsp;</font></td>
    <td valign=top colspan=3><font face=\"verdana, arial, helvetica\" size=\"-2\"><b>Operation(s):</b> PPS_Queue, PPS_Build, PPS_BoxAll, JPN_End</font></td>
  </tr>
</table>
</p>
<hr>
<p> <b>EXTERNAL ORDER NUMBER REFERENCE</b>
<table width=100% cellspacing=0>
  <tr align=left>
    <td width=2% colspan=1><font face=\"verdana, arial, helvetica\" size=\"-2\">&nbsp;&nbsp;</font></td>
    <td valign=top colspan=1><font face=\"verdana, arial, helvetica\" size=\"-2\"><b>SAP Sales Order Number</b></font></td>
    <td valign=top colspan=1><font face=\"verdana, arial, helvetica\" size=\"-2\"><b>Customer P.O. Number</b></font></td>
    <td valign=top colspan=1><font face=\"verdana, arial, helvetica\" size=\"-2\"><b>Legacy Order Number</b></font></td>
  </tr>
  <tr align=left>
    <td width=2% colspan=1><font face=\"verdana, arial, helvetica\" size=\"-2\">&nbsp;&nbsp;</font></td>
    <td valign=top colspan=1><font face=\"verdana, arial, helvetica\" size=\"-2\">0310363858</font></td>
    <td valign=top colspan=1><font face=\"verdana, arial, helvetica\" size=\"-2\">77340892008-120413</font></td>
    <td valign=top colspan=1><font face=\"verdana, arial, helvetica\" size=\"-2\">89FF09378001</font></td>
  </tr>
</table>
</p>
<hr>
<p> <b>PRODUCTS FOR THIS WORK OBJECT/OPERATION(S)</b>
<table width=100% cellspacing=0>
  <tr align=left>
    <td width=2% colspan=1><font face=\"verdana, arial, helvetica\" size=\"-2\">&nbsp;&nbsp;</font></td>
    <td valign=top colspan=1><font face=\"verdana, arial, helvetica\" size=\"-2\"><b>PL</b></font></td>
    <td valign=top colspan=1><font face=\"verdana, arial, helvetica\" size=\"-2\"><b>Product #</b></font></td>
    <td valign=top colspan=1><font face=\"verdana, arial, helvetica\" size=\"-2\"><b>Qty</b></font></td>
    <td valign=top colspan=1><font face=\"verdana, arial, helvetica\" size=\"-2\"><b>Options</b></font></td>
    <td valign=top colspan=1><font face=\"verdana, arial, helvetica\" size=\"-2\"><b>Serial #</b></font></td>
  </tr>
  <tr align=left>
    <td width=2% colspan=1><font face=\"verdana, arial, helvetica\" size=\"-2\">&nbsp;&nbsp;</font></td>
    <td valign=top colspan=1><font face=\"verdana, arial, helvetica\" size=\"-2\">3C</font></td>
    <td valign=top colspan=1><font face=\"verdana, arial, helvetica\" size=\"-2\">AP703B</font></td>
    <td valign=top colspan=1><font face=\"verdana, arial, helvetica\" size=\"-2\">1</font></td>
    <td valign=top colspan=1>&nbsp </td>
    <td valign=top colspan=1><font face=\"verdana, arial, helvetica\" size=\"-2\">2S6219000G</font></td>
  </tr>
</table>
</p>
<hr>
<p> <b>Station Info</b>
<table width=100% cellspacing=0>
  <tr align=left>
    <td width=2% colspan=1><font face=\"verdana, arial, helvetica\" size=\"-2\">&nbsp;&nbsp;</font></td>
    <td valign=top colspan=1><font face=\"verdana, arial, helvetica\" size=\"-2\"><b>Start Station:</b> JPN_End</font></td>
    <td valign=top colspan=1><font face=\"verdana, arial, helvetica\" size=\"-2\"><b>Location:</b> Done</font></td>
    <td valign=top colspan=1><font face=\"verdana, arial, helvetica\" size=\"-2\"><b>Station:</b> </font></td>
  </tr>
  <tr align=left>
    <td width=2% colspan=1><font face=\"verdana, arial, helvetica\" size=\"-2\">&nbsp;&nbsp;</font></td>
    <td valign=top colspan=1><font face=\"verdana, arial, helvetica\" size=\"-2\"><b>Birth Date/Time:</b> 2012-05-23 14:20:32 SGT</font></td>
    <td valign=top colspan=1><font face=\"verdana, arial, helvetica\" size=\"-2\"><b>Power Cord:</b></font></td>
    <td valign=top colspan=1><font face=\"verdana, arial, helvetica\" size=\"-2\"><b>Voltage:</b></font></td>
  </tr>
</table>
</p>
<hr>
<p> <b>MATERIAL LIST FOR THIS WORK OBJECT/OPERATION(S)</b>
<table width=100% cellspacing=0>
  <tr align=left>
    <td width=2% colspan=1><font face=\"verdana, arial, helvetica\" size=\"-2\">&nbsp;&nbsp;</font></td>
    <td valign=top colspan=1><font face=\"verdana, arial, helvetica\" size=\"-2\"><b>Part Number</b></font></td>
    <td valign=top colspan=1><font face=\"verdana, arial, helvetica\" size=\"-2\"><b>Qty</b></font></td>
    <td valign=top colspan=1><font face=\"verdana, arial, helvetica\" size=\"-2\"><b>Description</b></font></td>
    <td valign=top colspan=1><font face=\"verdana, arial, helvetica\" size=\"-2\"><b>BB Type</b></font></td>
    <td valign=top colspan=1><font face=\"verdana, arial, helvetica\" size=\"-2\"><b>Material Location</b></font></td>
    <td valign=top colspan=1><font face=\"verdana, arial, helvetica\" size=\"-2\"><b>Serial Number</b></font></td>
  </tr>
  <tr align=left>
    <td width=2% colspan=1><font face=\"verdana, arial, helvetica\" size=\"-2\">&nbsp;&nbsp;</font></td>
    <td valign=top colspan=1><font face=\"verdana, arial, helvetica\" size=\"-2\">AP703B@@</font></td>
    <td valign=top colspan=1><font face=\"verdana, arial, helvetica\" size=\"-2\">1</font></td>
    <td valign=top colspan=1><font face=\"verdana, arial, helvetica\" size=\"-2\">OEM Generic 1U SAS Enclosure</font></td>
    <td valign=top colspan=1><font face=\"verdana, arial, helvetica\" size=\"-2\">BOM</font></td>
    <td valign=top colspan=1><font face=\"verdana, arial, helvetica\" size=\"-2\">ASSY</font></td>
    <td valign=top colspan=1><font face=\"verdana, arial, helvetica\" size=\"-2\">2S6219000G</font></td>
  </tr>
</table>
</p>
 ';

 $result = parse_data($string);

extract($result);

echo $headertext.'<br />';
echo $sapSON.'<br />';
echo $custPON.'<br />';
echo $legacyON.'<br />';
echo $pl.'<br />';
echo $pn.'<br />';


function parse_data($string){
$string = str_replace('&nbsp;&nbsp;','',$string);

$xml = new DOMDocument();
@$xml->loadHTML($string);

$ret = array();

foreach($xml->getElementsByTagName('p') as $p) {
    $header = trim($p->nodeValue);
}

foreach($xml->getElementsByTagName('td') as $td) {
    $value = trim($td->nodeValue);
    if(!empty($value) && is_numeric($value{0})){
        $ret[] = $value;
    }
}

$ret = array('headertext'=>$header,
             'sapSON'=>$ret[0],
             'custPON'=>$ret[1],
             'legacyON'=>$ret[2],
             'pl'=>$ret[3],
             'pn'=>$ret[4],);

return $ret;
}
?>

现在我想将标题“外部订单号参考”保存到i变量中,稍后我可以调用它。

此外,第一行的第二,第三和第四列分别对应于第二行的第二,第三和第四列的值。我还想将这些值保存到变量中。所以基本上,我需要一个PHP脚本来解析这个HTML文件并返回以下内容:

$header1 = "HEADER INFO";
$viewObject = "6600422";
$BPO = "G37147359-000000";
$AckDate = "2012-05-28";
$Operations = "PPS_Queue, PPS_Build, PPS_BoxAll, JPN_End";
$header2 = "EXTERNAL ORDER NUMBER REFERENCE";
$sapSON = "0310363858";
$custPON = "77340892008-120413";
$legacyON = "89FF09378001";
$header3 = "PRODUCTS FOR THIS WORK OBJECT/OPERATION(S)"
$pl = "3C";
$pn = "AP703B";
$qty = "1";
$options = "&nbsp;";
$serialNo = "2S6219000G";

ETC ...基本上,我需要将所有表格内容保存到变量中,因为我稍后会将它们保存到我的数据库中并从中创建报告并生成条形码以获取某些细节

感谢您的帮助!

仅供参考:我无权访问数据库,所以我所能做的就是解析这个HTML文件并将值保存到我以后可以存储到我的数据库的变量中。另外,请注意标题是常量,唯一更改的值是不同订单的数字。

1 个答案:

答案 0 :(得分:2)

在此尝试此操作, See it in action

<?php
$string = '<p> <b>EXTERNAL ORDER NUMBER REFERENCE</b>
    <table width=100% cellspacing=0>
      <tr align=left>
        <td width=2% colspan=1><font face="verdana, arial, helvetica" size="-2">&nbsp;&nbsp;</font></td>
        <td valign=top colspan=1><font face="verdana, arial, helvetica" size="-2"><b>SAP Sales Order Number</b></font></td>
        <td valign=top colspan=1><font face="verdana, arial, helvetica" size="-2"><b>Customer P.O. Number</b></font></td>
        <td valign=top colspan=1><font face="verdana, arial, helvetica" size="-2"><b>Legacy Order Number</b></font></td>
      </tr>
      <tr align=left>
        <td width=2% colspan=1><font face="verdana, arial, helvetica" size="-2">&nbsp;&nbsp;</font></td>
        <td valign=top colspan=1><font face="verdana, arial, helvetica" size="-2">0310363858</font></td>
        <td valign=top colspan=1><font face="verdana, arial, helvetica" size="-2">77340892008-120413</font></td>
        <td valign=top colspan=1><font face="verdana, arial, helvetica" size="-2">89FF09378001</font></td>
  </tr>
    </table>
</p>
';

$result = parse_data($string);

extract($result);

echo $headertext.'<br />';
echo $sapSON.'<br />';
echo $custPON.'<br />';
echo $legacyON.'<br />';


function parse_data($string){
    $string = str_replace('&nbsp;&nbsp;','',$string);

    $xml = new DOMDocument();
    @$xml->loadHTML($string);

    $ret = array();

    foreach($xml->getElementsByTagName('p') as $p) {
        $header = trim($p->nodeValue);
    }

    foreach($xml->getElementsByTagName('td') as $td) {
        $value = trim($td->nodeValue);
        if(!empty($value) && is_numeric($value{0})){
            $ret[] = $value;
        }
    }

    $ret = array('headertext'=>$header,
                 'sapSON'=>$ret[0],
                 'custPON'=>$ret[1],
                 'legacyON'=>$ret[2]);

    return $ret;
}
?>

编辑版本2(多行):

由于每个迭代的表格不同,它变得相当复杂,但我喜欢挑战。你走了,希望它有帮助...

<?php
$result = parse_data($string);

//Create Variables From Values
foreach($result as $key=>$value){
    foreach($value as $key_b=>$value_b){
        $$key_b = $value_b;
    }
}
/* --New Available Variables--
    $header0 = HEADER INFO
    $ViewObject = 6600422
    $BPO = G37147359-000000
    $AckDate = 2012-05-28
    $Operations = PPS_Queue, PPS_Build, PPS_BoxAll, JPN_End
    $header1 = EXTERNAL ORDER NUMBER REFERENCE
    $SAPSalesOrderNumber = 0310363858
    $CustomerPONumber = 77340892008-120413
    $LegacyOrderNumber = 89FF09378001
    $header2 = PRODUCTS FOR THIS WORK OBJECT/OPERATION(S)
    $PL = 3C
    $Product = AP703B
    $Qty = 1
    $Options =  
    $Serial = 2S6219000G
    $header3 = Station Info
    $StartStation = JPN_End
    $Location = Done
    $Station = 
    $BirthDateTime = 2012-05-23 14
    $PowerCord = 
    $Voltage = 
    $header4 = MATERIAL LIST FOR THIS WORK OBJECT/OPERATION(S)
    $PartNumber = AP703B@@
    $Description = OEM Generic 1U SAS Enclosure
    $BBType = BOM
    $MaterialLocation = ASSY
    $SerialNumber = 2S6219000G
*/

function parse_data($string){
    $string = str_replace('&nbsp;&nbsp;','',$string);
    $parts = explode('<hr>',$string);

    $html = new DOMDocument();
    $ret = array();
    $entry=0;
    foreach($parts as $part){
        @$html->loadHTML($part);
        //Get Header
        foreach($html->getElementsByTagName('p') as $p) {
            $ret[$entry]['header'.$entry] = trim($p->nodeValue);
        }
        $i=0;
        foreach($html->getElementsByTagName('td') as $td){
            $value = trim($td->nodeValue);
            if(empty($value)){
                continue;
            }
            switch($entry){
                case 0:
                    $split = explode(':',$value);
                    $ret[$entry][preg_replace('/[^a-zA-Z]/s', '', $split[0])] = trim($split[1]);
                    break;
                case 1:
                    if(!is_numeric($value{0})){
                        $ret[$entry][$i] = trim($value);
                    }else{
                        $ret[$entry][preg_replace('/[^a-zA-Z]/s', '', $ret[$entry][$i-3])] = trim($value);
                        unset($ret[$entry][$i-3]);
                    }
                    break;
                case 2:
                    if($i<=4){
                        $ret[$entry][$i] = trim($value);
                    }else{
                        $ret[$entry][preg_replace('/[^a-zA-Z]/s', '', $ret[$entry][$i-5])] = trim($value);
                        unset($ret[$entry][$i-5]);
                    }
                    break;
                case 3:
                    $split = explode(':',$value);
                    $ret[$entry][preg_replace('/[^a-zA-Z]/s', '', $split[0])] = trim($split[1]);
                    break;
                case 4:
                    if($i<=5){
                        $ret[$entry][$i] = trim($value);
                    }else{
                        $ret[$entry][preg_replace('/[^a-zA-Z]/s', '', $ret[$entry][$i-6])] = trim($value);
                        unset($ret[$entry][$i-6]);
                    }
                    break;
            }
            $i++;
        }
        $entry++;
    }
    return $ret;
}
?>