首先,我确定我应该在几个小时之前抓住它,但我似乎无法看到它。
所以,情况是我正在尝试设置一个可重用(非递归)函数来将HTML块解析为由头元素拆分的多维数组。基本上,最终结果不应超过7个等级(H1-6和H6的子女)。在将第一个H1放入标有“顶部”的“特殊”部分之前,还有一个元素可以捕获。
<?php
function sortEntrySections($section, $level = 1) {
if(is_array($section)) {
$i = 0;
$ele = 'h' . $level;
$sectionStructure = $level === 1 ? array(array('title' => 'Top', 'children' => array())) : array();
foreach($section as $element) {
if($element->tagName != $ele && isset($sectionStructure[$i]) && is_array($sectionStructure[$i])) {
array_push($sectionStructure[$i]['children'], $element);
} else {
$i++;
if($element->tagName == $ele) {
$sectionStructure[$i] = array('title' => $element->textContent, 'children' => array($element));
} else {
$sectionStructure[$i] = $element;
}
}
}
return $sectionStructure;
}
return $section;
}
function breakupEntry() {
$body = new DOMDocument();
@$body->loadHTML(mb_convert_encoding(html_entity_decode($GLOBALS['libraryEntry']['body']), 'HTML-ENTITIES', 'UTF-8'));
$formattedBody = new DOMDocument();
/* Build Multidimensional Array of Sections */
$i = 0;
$elements = array();
foreach($body->getElementsByTagName('*') as $child) {
if($child->tagName !== 'html' && $child->tagName !== 'body' && $child->parentNode->tagName === 'body') {
array_push($elements, $formattedBody->importNode($child, true));
}
}
$sections = sortEntrySections($elements, 1);
for($i = 1; $i < sizeof($sections); $i++) {
$childrenH1 = sortEntrySections($sections[$i]['children'], 2);
if(isset($childrenH1['children'])) {
foreach($childrenH1['children'] as $j => $childH1) {
$childrenH2 = sortEntrySections($childH1, 3);
if(isset($childrenH2['children'])) {
foreach($childrenH2['children'] as $k => $childH2) {
$childrenH3 = sortEntrySections($childH2, 4);
if(isset($childrenH3['children'])) {
foreach($childrenH3['children'] as $l => $childH3) {
$childrenH4 = sortEntrySections($childH3, 5);
if(isset($childrenH4['children'])) {
foreach($childrenH4['children'] as $m => $childH4) {
$childrenH4[$m]['children'] = sortEntrySections($childH4, 6);
}
}
$childrenH3['children'][$l] = $childrenH4;
}
}
$childrenH2['children'][$k] = $childrenH3;
}
}
$childrenH1['children'][$j] = $childrenH2;
}
}
$sections[$i]['children'] = $childrenH1;
}
return $sections;
}
$body = <<<EOD
<p>Pre Header Section Content 1</p>
<p>Pre Header Section Content 2</p>
<p>Pre Header Section Content 3</p>
<h1>Header 1</h1>
<p>Header 1 Section Content 1</p>
<p>Header 1 Section Content 2</p>
<p>Header 1 Section Content 3</p>
<h2>Header 1.1</h2>
<p>Header 1 Subheader 1 Section Content 1</p>
<p>Header 1 Subheader 1 Section Content 2</p>
<p>Header 1 Subheader 1 Section Content 3</p>
<h3>Header 1.1.1</h3>
<p>Header 1 Subheader 1 Subheader 1 Section Content 1</p>
<p>Header 1 Subheader 1 Subheader 1 Section Content 2</p>
<p>Header 1 Subheader 1 Subheader 1 Section Content 3</p>
<h4>Header 1.1.1.1</h4>
<p>Header 1 Subheader 1 Subheader 1 Subheader 1 Section Content 1</p>
<p>Header 1 Subheader 1 Subheader 1 Subheader 1 Section Content 2</p>
<p>Header 1 Subheader 1 Subheader 1 Subheader 1 Section Content 3</p>
<h5>Header 1.1.1.1.1</h5>
<p>Header 1 Subheader 1 Subheader 1 Subheader 1 Subheader 1 Section Content 1</p>
<p>Header 1 Subheader 1 Subheader 1 Subheader 1 Subheader 1 Section Content 2</p>
<p>Header 1 Subheader 1 Subheader 1 Subheader 1 Subheader 1 Section Content 3</p>
<h6>Header 1.1.1.1.1.1</h6>
<p>Header 1 Subheader 1 Subheader 1 Subheader 1 Subheader 1 Subheader 1 Section Content 1</p>
<p>Header 1 Subheader 1 Subheader 1 Subheader 1 Subheader 1 Subheader 1 Section Content 2</p>
<p>Header 1 Subheader 1 Subheader 1 Subheader 1 Subheader 1 Subheader 1 Section Content 3</p>
<h6>Header 1.1.1.1.1.2</h6>
<p>Header 1 Subheader 1 Subheader 1 Subheader 1 Subheader 1 Subheader 2 Section Content 1</p>
<p>Header 1 Subheader 1 Subheader 1 Subheader 1 Subheader 1 Subheader 2 Section Content 2</p>
<p>Header 1 Subheader 1 Subheader 1 Subheader 1 Subheader 1 Subheader 2 Section Content 3</p>
<h6>Header 1.1.1.1.1.3</h6>
<p>Header 1 Subheader 1 Subheader 1 Subheader 1 Subheader 1 Subheader 3 Section Content 1</p>
<p>Header 1 Subheader 1 Subheader 1 Subheader 1 Subheader 1 Subheader 3 Section Content 2</p>
<p>Header 1 Subheader 1 Subheader 1 Subheader 1 Subheader 1 Subheader 3 Section Content 3</p>
<h5>Header 1.1.1.1.2</h5>
<p>Header 1 Subheader 1 Subheader 1 Subheader 1 Subheader 2 Section Content 1</p>
<p>Header 1 Subheader 1 Subheader 1 Subheader 1 Subheader 2 Section Content 2</p>
<p>Header 1 Subheader 1 Subheader 1 Subheader 1 Subheader 2 Section Content 3</p>
<h5>Header 1.1.1.1.3</h5>
<p>Header 1 Subheader 1 Subheader 1 Subheader 1 Subheader 3 Section Content 1</p>
<p>Header 1 Subheader 1 Subheader 1 Subheader 1 Subheader 3 Section Content 2</p>
<p>Header 1 Subheader 1 Subheader 1 Subheader 1 Subheader 3 Section Content 3</p>
<h4>Header 1.1.1.2</h4>
<p>Header 1 Subheader 1 Subheader 1 Subheader 2 Section Content 1</p>
<p>Header 1 Subheader 1 Subheader 1 Subheader 2 Section Content 2</p>
<p>Header 1 Subheader 1 Subheader 1 Subheader 2 Section Content 3</p>
<h4>Header 1.1.1.3</h4>
<p>Header 1 Subheader 1 Subheader 1 Subheader 3 Section Content 1</p>
<p>Header 1 Subheader 1 Subheader 1 Subheader 3 Section Content 2</p>
<p>Header 1 Subheader 1 Subheader 1 Subheader 3 Section Content 3</p>
<h3>Header 1.1.2</h3>
<p>Header 1 Subheader 1 Subheader 2 Section Content 1</p>
<p>Header 1 Subheader 1 Subheader 2 Section Content 2</p>
<p>Header 1 Subheader 1 Subheader 2 Section Content 3</p>
<h3>Header 1.1.3</h3>
<p>Header 1 Subheader 1 Subheader 3 Section Content 1</p>
<p>Header 1 Subheader 1 Subheader 3 Section Content 2</p>
<p>Header 1 Subheader 1 Subheader 3 Section Content 3</p>
<h2>Header 1.2</h2>
<p>Header 1 Subheader 2 Section Content 1</p>
<p>Header 1 Subheader 2 Section Content 2</p>
<p>Header 1 Subheader 2 Section Content 3</p>
<h2>Header 1.3</h2>
<p>Header 1 Subheader 3 Section Content 1</p>
<p>Header 1 Subheader 3 Section Content 2</p>
<p>Header 1 Subheader 3 Section Content 3</p>
<h1>Header 2</h1>
<p>Header 2 Section Content 1</p>
<p>Header 2 Section Content 2</p>
<p>Header 2 Section Content 3</p>
<h1>Header 3</h1>
<p>Header 3 Section Content 1</p>
<p>Header 3 Section Content 2</p>
<p>Header 3 Section Content 3</p>
EOD;
$libraryEntry = array('body' => $body);
$results = breakupEntry();
echo '<textarea>'; var_dump($results); echo '</textarea>';
?>
答案 0 :(得分:1)
我把这个东西改写了六次,每个都给出了一个不同的问题,我一直坚持下去。最后,我将其重写为有限的递归函数,使用$level
变量的限制来确保它不超出预期范围。
<?php
function sortEntrySections($section, $level = 1) {
if(is_array($section)) {
$i = 0;
$level = intval($level);
$level = $level > 6 ? 6 : ($level < 1 ? 1 : $level);
$ele = 'h' . $level;
$sectionStructure = $level === 1 ? array(array('title' => 'Top', 'children' => array())) : array();
foreach($section as $element) {
if($element->tagName != $ele && isset($sectionStructure[$i]) && is_array($sectionStructure[$i])) {
array_push($sectionStructure[$i]['children'], $element);
} else {
$i++;
if($element->tagName == $ele) {
$sectionStructure[$i] = array('title' => $element->textContent, 'children' => array($element));
} else {
$sectionStructure[$i] = $element;
}
}
}
foreach($sectionStructure as $i => $subsection) {
if(is_array($subsection) && isset($subsection['children']) && $level < 6) {
$sectionStructure[$i]['children'] = sortEntrySections($subsection['children'], $level + 1);
}
}
return $sectionStructure;
}
return $section;
}
function breakupEntry() {
$body = new DOMDocument();
@$body->loadHTML(mb_convert_encoding(html_entity_decode($GLOBALS['libraryEntry']['body']), 'HTML-ENTITIES', 'UTF-8'));
$formattedBody = new DOMDocument();
/* Build Multidimensional Array of Sections */
$i = 0;
$elements = array();
foreach($body->getElementsByTagName('*') as $child) {
if($child->tagName !== 'html' && $child->tagName !== 'body' && $child->parentNode->tagName === 'body') {
array_push($elements, $formattedBody->importNode($child, true));
}
}
$sections = sortEntrySections($elements);
return $sections;
}
$body = <<<EOD
<p>Pre Header Section Content 1</p>
<p>Pre Header Section Content 2</p>
<p>Pre Header Section Content 3</p>
<h1>Header 1</h1>
<p>Header 1 Section Content 1</p>
<p>Header 1 Section Content 2</p>
<p>Header 1 Section Content 3</p>
<h2>Header 1.1</h2>
<p>Header 1 Subheader 1 Section Content 1</p>
<p>Header 1 Subheader 1 Section Content 2</p>
<p>Header 1 Subheader 1 Section Content 3</p>
<h3>Header 1.1.1</h3>
<p>Header 1 Subheader 1 Subheader 1 Section Content 1</p>
<p>Header 1 Subheader 1 Subheader 1 Section Content 2</p>
<p>Header 1 Subheader 1 Subheader 1 Section Content 3</p>
<h4>Header 1.1.1.1</h4>
<p>Header 1 Subheader 1 Subheader 1 Subheader 1 Section Content 1</p>
<p>Header 1 Subheader 1 Subheader 1 Subheader 1 Section Content 2</p>
<p>Header 1 Subheader 1 Subheader 1 Subheader 1 Section Content 3</p>
<h5>Header 1.1.1.1.1</h5>
<p>Header 1 Subheader 1 Subheader 1 Subheader 1 Subheader 1 Section Content 1</p>
<p>Header 1 Subheader 1 Subheader 1 Subheader 1 Subheader 1 Section Content 2</p>
<p>Header 1 Subheader 1 Subheader 1 Subheader 1 Subheader 1 Section Content 3</p>
<h6>Header 1.1.1.1.1.1</h6>
<p>Header 1 Subheader 1 Subheader 1 Subheader 1 Subheader 1 Subheader 1 Section Content 1</p>
<p>Header 1 Subheader 1 Subheader 1 Subheader 1 Subheader 1 Subheader 1 Section Content 2</p>
<p>Header 1 Subheader 1 Subheader 1 Subheader 1 Subheader 1 Subheader 1 Section Content 3</p>
<h6>Header 1.1.1.1.1.2</h6>
<p>Header 1 Subheader 1 Subheader 1 Subheader 1 Subheader 1 Subheader 2 Section Content 1</p>
<p>Header 1 Subheader 1 Subheader 1 Subheader 1 Subheader 1 Subheader 2 Section Content 2</p>
<p>Header 1 Subheader 1 Subheader 1 Subheader 1 Subheader 1 Subheader 2 Section Content 3</p>
<h6>Header 1.1.1.1.1.3</h6>
<p>Header 1 Subheader 1 Subheader 1 Subheader 1 Subheader 1 Subheader 3 Section Content 1</p>
<p>Header 1 Subheader 1 Subheader 1 Subheader 1 Subheader 1 Subheader 3 Section Content 2</p>
<p>Header 1 Subheader 1 Subheader 1 Subheader 1 Subheader 1 Subheader 3 Section Content 3</p>
<h5>Header 1.1.1.1.2</h5>
<p>Header 1 Subheader 1 Subheader 1 Subheader 1 Subheader 2 Section Content 1</p>
<p>Header 1 Subheader 1 Subheader 1 Subheader 1 Subheader 2 Section Content 2</p>
<p>Header 1 Subheader 1 Subheader 1 Subheader 1 Subheader 2 Section Content 3</p>
<h5>Header 1.1.1.1.3</h5>
<p>Header 1 Subheader 1 Subheader 1 Subheader 1 Subheader 3 Section Content 1</p>
<p>Header 1 Subheader 1 Subheader 1 Subheader 1 Subheader 3 Section Content 2</p>
<p>Header 1 Subheader 1 Subheader 1 Subheader 1 Subheader 3 Section Content 3</p>
<h4>Header 1.1.1.2</h4>
<p>Header 1 Subheader 1 Subheader 1 Subheader 2 Section Content 1</p>
<p>Header 1 Subheader 1 Subheader 1 Subheader 2 Section Content 2</p>
<p>Header 1 Subheader 1 Subheader 1 Subheader 2 Section Content 3</p>
<h4>Header 1.1.1.3</h4>
<p>Header 1 Subheader 1 Subheader 1 Subheader 3 Section Content 1</p>
<p>Header 1 Subheader 1 Subheader 1 Subheader 3 Section Content 2</p>
<p>Header 1 Subheader 1 Subheader 1 Subheader 3 Section Content 3</p>
<h3>Header 1.1.2</h3>
<p>Header 1 Subheader 1 Subheader 2 Section Content 1</p>
<p>Header 1 Subheader 1 Subheader 2 Section Content 2</p>
<p>Header 1 Subheader 1 Subheader 2 Section Content 3</p>
<h3>Header 1.1.3</h3>
<p>Header 1 Subheader 1 Subheader 3 Section Content 1</p>
<p>Header 1 Subheader 1 Subheader 3 Section Content 2</p>
<p>Header 1 Subheader 1 Subheader 3 Section Content 3</p>
<h2>Header 1.2</h2>
<p>Header 1 Subheader 2 Section Content 1</p>
<p>Header 1 Subheader 2 Section Content 2</p>
<p>Header 1 Subheader 2 Section Content 3</p>
<h2>Header 1.3</h2>
<p>Header 1 Subheader 3 Section Content 1</p>
<p>Header 1 Subheader 3 Section Content 2</p>
<p>Header 1 Subheader 3 Section Content 3</p>
<h1>Header 2</h1>
<p>Header 2 Section Content 1</p>
<p>Header 2 Section Content 2</p>
<p>Header 2 Section Content 3</p>
<h1>Header 3</h1>
<p>Header 3 Section Content 1</p>
<p>Header 3 Section Content 2</p>
<p>Header 3 Section Content 3</p>
EOD;
$libraryEntry = array('body' => $body);
$results = breakupEntry();
echo '<textarea>'; var_dump($results); echo '</textarea>';
?>