用PHP解析结构化文本数据

时间:2012-03-14 22:37:04

标签: php parsing mt940

我正在寻找各种(更好的)方法来解析PHP中的结构化文本数据并将这些数据转换为PHP对象图。我在PHP中看到了很多不同的解析器,用于各种基于文本的文件格式,但几乎所有这些解析器似乎都是一些脆弱的正则表达式链。必须有更好的方法!

在这个特定情况下,我希望解析MT940文件(银行账户交易)。但我也遇到了与其他文件格式相同的问题。我总是最终得到一大批正则表达式,这些正则表达式变得复杂,特别是在需要支持不同格式时。 MT940也有这个问题。 MT940不是严格定义的格式,几乎所有银行都使用稍微不同的方言。

那么,你如何设计更强大和可扩展的解析器来处理不同的方言?

这是MT940的一个示例语句,取自this question

{1:F01AHHBCH110XXX0000000000}{2:I940X           N2}{3:{108:XBS/091502}}{4:
:20:XBS/091202/0001
:25:5887/507004-50
:28C:140/1
:60F:C0914CHF7789,
:61:0912021202D36,80NTRFNONREF//0887-1202-29-941
04392579-0 LUTHY + xxx, ZUR
:86:6034?60LUTHY + xxxx, ZUR vom 01.12.09 um 16:28 Karten-Nr. 2232
2579-0
:62F:C091202CHF52,2
:64:C091302CHF52,2
-}

2 个答案:

答案 0 :(得分:3)

您可以使用此免费解析器(GPL 2.0):

http://www.kingsquare.nl/php-mt940

这是另一个:

http://www.butcher.art.pl/en/2010/09/tutoriale/parser-php-mt940-format-wyciagow-bankowych/

希望这会让你放弃重新发明轮子。

  

那么,您如何设计更健壮且可扩展的解析器   处理不同的方言?

不幸的是,对此没有简单的答案。您必须紧扣并熟悉您希望支持的所有变体。从国王广场页面:

  

解析器尝试通过它来确定它来自哪个发起银行   文件的前几行然后加载每个银行的引擎。

这将需要大量的经验和研究。幸运的是,他们的代码可以帮助你。

答案 1 :(得分:-1)

class Mt940{
   
    private $controler = null;
    
    private $content = null;
    
    private $transItem = array();
    private $transCount = 0;
    
    private $operationItem = array();
    private $operationCount = 0;
    
    private $details = array();
    
  
    public function __construct(\app\library\_PajaxController $controler) 
    {
        $this->controler = $controler;
    }
    
    public function load($file_path, $encodingIn='', $encodingOut='')
    {
         try{
               if(!file_exists($file_path)) throw new \app\library\_PajaxException("File not found!", \app\library\_PajaxException::TEXT, $this->controler->getEnv() );
             
               $this->transItem      = array();
               $this->transCount     = 0;
               $this->operationItem  = array();
               $this->operationCount = 0;
                              
               
               $this->content = file_get_contents($file_path);
               
               if($encodingIn != '' && $encodingOut != '' ) $this->content = @iconv($encodingIn, $encodingOut, $this->content);
               
            
               
               $this->splitTransfers();
               $this->to_details();
               $this->parse();
               
               return $this->details;
               
            }catch(\app\library\_PajaxException $ex) 
            { 
              throw $ex;         
            }catch(\Throwable $thr) { throw new \app\library\_PajaxException($thr, \app\library\_PajaxException::THROWABLE, $this->getEnv()); }
    }
    
  
    private function splitTransfers()
    {
        try{
               
             $contTmp =  $this->content;
              
            
              $offsetStr = 0;
              while(true)
              {
                  $intStr20  = strpos( $contTmp,':20:', $offsetStr); 
                  $intStr62F = -1;
                  $iniStrEnd = -1;
                  
                  if( is_numeric($intStr20) && $intStr20 >= 0) //found start transaction..
                  {
                      //try find end..
                      $intStr62F = strpos( $contTmp,':62F:', $intStr20);                      
                      //echo $intStr20.' -- > '.$intStr62F;
                      
                      if( is_numeric($intStr62F) &&  $intStr62F >= 0)
                      {
                           $iniStrEnd = strpos( $contTmp,"\r\n", $intStr62F);     
                           
                           $items = substr( $contTmp ,$intStr20,   $iniStrEnd - $intStr20  );
                           
                           $this->transItem[$this->transCount] = $items;
                           $this->transCount++;
                           
                           $offsetStr = $iniStrEnd+2;
                      }
                  }else break;
                  
              }
              
               
              //print_r($this->transItem);
              
            
          }catch(\app\library\_PajaxException $ex) 
            { 
              throw $ex;         
            }catch(\Throwable $thr) { throw new \app\library\_PajaxException($thr, \app\library\_PajaxException::THROWABLE, $this->getEnv()); }
    }
    
    
    private function to_details()
    {
         try{
             
      
             foreach ($this->transItem as $ix => $trans)
             {
                                      
              //   echo $trans."\n\n\n";
                 
                        $offsetStr = 0;                      
                      
                        
                        while(true)
                        {
                                             
                              
                            $intStr61  = strpos( $trans,':61:', $offsetStr);                          
                            $iniStrEnd = -1;                                                      
                  
                            if( is_numeric($intStr61) && $intStr61 >= 0) //found start transaction..
                            {
                                //try find end..
                                $intStrEndStr = strpos( $trans,':61:', $intStr61+4); //nex trans 
                                $intStrEndStr2 = strpos( $trans,':62F:', $intStr61); //nex trans 
                                
                             //   echo "\n".$intStr61." -->> ".$iniStrEnd."\n";
                              //  echo "\n".$intStr61." -->> ".$intStrEndStr2."\n";

                                if( is_numeric($intStrEndStr) &&  $intStrEndStr >= 0)
                                {
                                     $item = substr($trans, $intStr61, $intStrEndStr-$intStr61-4);
                                     
                                   //  echo "\n{".$item."\n}\n";
                                     
                                     $this->operationItem[$this->operationCount] = $item;
                                     $this->operationCount++;
                                     
                                     $offsetStr = $intStrEndStr-4;                                   
                                     continue;
                                }else
                                if( is_numeric($intStrEndStr2) &&  $intStrEndStr2 >= 0)
                                {
                                     $item = substr($trans, $intStr61, $intStrEndStr2-$intStr61-4);
                                     
                                   //  echo "\n3\{\n".$item."\n}\n";
                                     
                                     $this->operationItem[$this->operationCount] = $item;
                                     $this->operationCount++;
                                     
                                     $offsetStr = $intStrEndStr2-4;                                   
                                     continue;
                                }else break;
                                
                                
                            }else break;
                            
                            

                        }
               //  echo "\n---------\n";
             }
             
             
         
               
            }catch(\app\library\_PajaxException $ex) 
            { 
              throw $ex;         
            }catch(\Throwable $thr) { throw new \app\library\_PajaxException($thr, \app\library\_PajaxException::THROWABLE, $this->getEnv()); }
    }
    
    
    /**
     * 
     * @throws \app\library\_PajaxException
     */
    private function parse()
    {
         try{
             
    
             foreach ($this->operationItem as $ix => $trans)
             {
                    
                 
                     //  echo "\n".$trans."\n";                                   
                       $trans = str_replace("\r\n", "", $trans);
                                   
                       $int61 = strpos($trans, ":61:");
                       $int86 = strpos($trans, ":86:", $int61);
                       
                       if(is_numeric($int61)  && is_numeric($int86) )
                       {
                          $str61 = substr($trans, $int61+4, $int86 - $int61-2) ;
                          $str86 = substr($trans, $int86+4);
                          
                         // echo "\n[".$str61."]\n";
                         // echo "\n[".$str86."]\n";
                          
                          $currencyDate = substr($str61, 0,6); //YYMMDD
                          $approveDate  = substr($str61, 6, 4);  //MMDD
                          $accPage      = substr($str61,10, 2); //CN/CD etc.
                                                   
                          $amPos        = strpos($str61, ",",12);
                          $amount       = ( substr($str61, 12, $amPos-12+3) );
                          $amount       = str_replace(",", ".",$amount);
                          $amount       = number_format($amount,2,'.','');
                          
                          $exp      = explode("^", $str86);
                          if(count($exp) < 2)   $exp  = explode(">", $str86);
                          if(count($exp) < 2)   $exp  = explode("<", $str86);
                          if(count($exp) < 2)   $exp  = explode("&", $str86);
                          if(count($exp) < 2)   $exp  = explode("*", $str86);
                          if(count($exp) < 2)   $exp  = explode(":", $str86);
                          
                          $cl_name  = '';
                          $title    = '';
                          $unique   = '';
                         
                          if(count($exp)>0) $title .= $exp[0].' ';
                          if(count($exp)>1) $title .= $exp[1].' ';
                          
                          if(count($exp)>2) $cl_name .= $exp[2].' ';
                          if(count($exp)>3) $cl_name .= $exp[3].' ';
                          if(count($exp)>4) $cl_name .= $exp[4].' ';
                          if(count($exp)>5) $cl_name .= $exp[5].' ';
                        
                          foreach($exp as $xi => $ex_row) $unique .= $ex_row;
                          $unique = md5($unique);
                         
                          
                          
                          
                       //   echo "\n[".$currencyDate.' | '.$approveDate.' | '.$accPage .' | '.$amount." | ".$title." | ".$cl_name." | ".$unique."]\n";                          
                        //  echo "\n \n";
                          
                          $this->details[] = array(
                              'unique'       => $unique,
                              'amount'       => $amount,
                              'currencyDate' => $currencyDate,
                              'approveDate'  => $approveDate,
                              'title'        => $title,
                              'name'         => $cl_name,
                          );
                          
                       }
            
             }
             
                      
               
            }catch(\app\library\_PajaxException $ex) 
            { 
              throw $ex;         
            }catch(\Throwable $thr) { throw new \app\library\_PajaxException($thr, \app\library\_PajaxException::THROWABLE, $this->getEnv()); }
    }
}