The horrible system we use in my company gives me the following output:
{
party:"bases",
number:"1",
id:"xx_3039366",
url:"systen01-ny.com",
target:"_self",
address:"Ch\u00e3o as Alminhas-Medas,Uteiros de Gatos e Fontes Longaq<br/>64320-761 ANHADOS LdA",
coordinate:{
x:90.995262145996094,
y:-1.3394836426
},
contactDetails:{
id:"366",
phone:"xxxxxx",
mobile:"",
fax:"xxxx 777 235",
c2c:!0
},
parameters:"Flex Am\u00e1vel Silva,hal,,EN_30336,S,786657,1,0,",
text:"Vila Nova de Loz C\u00f4a,os melhores vinhos, v\u00e1rias. Produtor/exportador/com\u00e9rcio",
website:null,
mail:"",
listing:"paid",
pCode:"64",
name:"xpto Am\u00e1vel Costa",
logo:{src:"http://ny.test.gif",
altname:"xpto Am\u00e1vel Costa"},
bookingUrl:"",
ipUrl:"",
ipLabel:"",
customerId:"7657",
addressId:"98760",
combined:null,
showReviews:!0
}
I would like to know if there is a way to convert the output to array, as if it were a json, or even some other format that I can manipulate this data in PHP. Json_decode does not work.
答案 0 :(得分:4)
就像我说的那样,这里是你自己的Json Object解析器。
一句警告,这些事情可能更像艺术,然后是科学,所以如果你的输入与你的例子不同,它可能会有问题。鉴于样本量很小(1个文档),我不能保证它在一个例子之外的功能。
我会尝试解释它是如何工作的,但我担心它会在凡人身上消失。
说真的,这很有趣,一次享受挑战。
<?php
function parseJson($subject, $tokens)
{
$types = array_keys($tokens);
$patterns = [];
$lexer_stream = [];
$result = false;
foreach ($tokens as $k=>$v){
$patterns[] = "(?P<$k>$v)";
}
$pattern = "/".implode('|', $patterns)."/i";
if (preg_match_all($pattern, $subject, $matches, PREG_OFFSET_CAPTURE)) {
//print_r($matches);
foreach ($matches[0] as $key => $value) {
$match = [];
foreach ($types as $type) {
$match = $matches[$type][$key];
if (is_array($match) && $match[1] != -1) {
break;
}
}
$tok = [
'content' => $match[0],
'type' => $type,
'offset' => $match[1]
];
$lexer_stream[] = $tok;
}
$result = parseJsonTokens( $lexer_stream );
}
return $result;
}
function parseJsonTokens( array &$lexer_stream ){
$result = [];
next($lexer_stream); //advnace one
$mode = 'key'; //items start in key mode ( key => value )
$key = '';
$value = '';
while($current = current($lexer_stream)){
$content = $current['content'];
$type = $current['type'];
switch($type){
case 'T_WHITESPACE'://ignore whitespace
next($lexer_stream);
break;
case 'T_STRING':
//keys are always strings, but strings are not always keys
if( $mode == 'key')
$key .= $content;
else
$value .= $content;
next($lexer_stream); //consume a token
break;
case 'T_COLON':
$mode = 'value'; //change mode key :
next($lexer_stream);//consume a token
break;
case 'T_ENCAP_STRING':
$value .= trim(unicode_decode($content),'"'); //encapsulated strings are always content
next($lexer_stream);//consume a token
break;
case 'T_NULL':
$value = null; //encapsulated strings are always content
next($lexer_stream);//consume a token
break;
case 'T_COMMA': //comma ends an item
//store
$result[$key] = $value;
//reset
$mode = 'key'; //items start in key mode ( key => value )
$key = '';
$value = '';
next($lexer_stream);//consume a token
break;
case 'T_OPEN_BRACE': //start of a sub-block
$value = parseJsonTokens($lexer_stream); //recursive
break;
case 'T_CLOSE_BRACE': //start of a sub-block
//store
$result[$key] = $value;
next($lexer_stream);//consume a token
return $result;
break;
default:
print_r($current);
trigger_error("Unknown token $type value $content", E_USER_ERROR);
}
}
if( !$current ) return;
print_r($current);
trigger_error("Unclosed item $mode for $type value $content", E_USER_ERROR);
}
//@see https://stackoverflow.com/questions/2934563/how-to-decode-unicode-escape-sequences-like-u00ed-to-proper-utf-8-encoded-cha
function replace_unicode_escape_sequence($match) {
return mb_convert_encoding(pack('H*', $match[1]), 'UTF-8', 'UCS-2BE');
}
function unicode_decode($str) {
return preg_replace_callback('/\\\\u([0-9a-f]{4})/i', 'replace_unicode_escape_sequence', $str);
}
$str = '{
party:"bases",
number:"1",
id:"xx_3039366",
url:"systen01-ny.com",
target:"_self",
address:"Ch\u00e3o as Alminhas-Medas,Uteiros de Gatos e Fontes Longaq<br/>64320-761 ANHADOS LdA",
coordinate:{
x:90.995262145996094,
y:-1.3394836426
},
contactDetails:{
id:"366",
phone:"xxxxxx",
mobile:"",
fax:"xxxx 777 235",
c2c:!0
},
parameters:"Flex Am\u00e1vel Silva,hal,,EN_30336,S,786657,1,0,",
text:"Vila Nova de Loz C\u00f4a,os melhores vinhos, v\u00e1rias. Produtor/exportador/com\u00e9rcio",
website:null,
mail:"",
listing:"paid",
pCode:"64",
name:"xpto Am\u00e1vel Costa",
logo:{src:"http://ny.test.gif",
altname:"xpto Am\u00e1vel Costa"},
bookingUrl:"",
ipUrl:"",
ipLabel:"",
customerId:"7657",
addressId:"98760",
combined:null,
showReviews:!0
}';
$tokens = [
'T_OPEN_BRACE' => '\{',
'T_CLOSE_BRACE' => '\}',
'T_NULL' => '\bnull\b',
'T_ENCAP_STRING' => '\".*?(?<!\\\\)\"',
'T_COLON' => ':',
'T_COMMA' => ',',
'T_STRING' => '[-a-z0-9_.!]+',
'T_WHITESPACE' => '[\r\n\s\t]+',
'T_UNKNOWN' => '.+?'
];
var_export( parseJson($str, $tokens) );
输出(这是每个人都想要的)
array (
'party' => 'bases',
'number' => '1',
'id' => 'xx_3039366',
'url' => 'systen01-ny.com',
'target' => '_self',
'address' => 'Chão as Alminhas-Medas,Uteiros de Gatos e Fontes Longaq<br/>64320-761 ANHADOS LdA',
'coordinate' =>
array (
'x' => '90.995262145996094',
'y' => '-1.3394836426',
),
'contactDetails' =>
array (
'id' => '366',
'phone' => 'xxxxxx',
'mobile' => '',
'fax' => 'xxxx 777 235',
'c2c' => '!0',
),
'parameters' => 'Flex Amável Silva,hal,,EN_30336,S,786657,1,0,',
'text' => 'Vila Nova de Loz Côa,os melhores vinhos, várias. Produtor/exportador/comércio',
'website' => NULL,
'mail' => '',
'listing' => 'paid',
'pCode' => '64',
'name' => 'xpto Amável Costa',
'logo' =>
array (
'src' => 'http://ny.test.gif',
'altname' => 'xpto Amável Costa',
),
'bookingUrl' => '',
'ipUrl' => '',
'ipLabel' => '',
'customerId' => '7657',
'addressId' => '98760',
'combined' => NULL,
'showReviews' => '!0',
)
你甚至可以在这里测试它(因为我是一个好人)
http://sandbox.onlinephpfunctions.com/code/3c1dcafb59abbf19f7f3209724dbdd4a46546c57
我能够在这个SO帖子的帮助下修复编码问题\u00e
等,所以对他们大喊大叫,因为我讨厌字符编码。
男人我只是喜欢一段漂亮的代码,只是嗯。
干杯!