我正在尝试将旧的HTML网站转换为新的CMS。为了获得正确的菜单层次(具有不同的深度),我想用PHP读取所有文件并将菜单(嵌套的无序列表)提取/解析为关联数组
root.html
<ul id="menu">
<li class="active">Start</li>
<ul>
<li><a href="file1.html">Sub1</a></li>
<li><a href="file2.html">Sub2</a></li>
</ul>
</ul>
file1.html
<ul id="menu">
<li><a href="root.html">Start</a></li>
<ul>
<li class="active">Sub1</li>
<ul>
<li><a href="file3.html">SubSub1</a></li>
<li><a href="file4.html">SubSub2</a></li>
<li><a href="file5.html">SubSub3</a></li>
<li><a href="file6.html">SubSub4</a></li>
</ul>
</ul>
</ul>
file3.html
<ul id="menu">
<li><a href="root.html">Start</a></li>
<ul>
<li><a href="file1.html">Sub1</a></li>
<ul>
<li class="active">SubSub1</li>
<ul>
<li><a href="file7.html">SubSubSub1</a></li>
<li><a href="file8.html">SubSubSub2</a></li>
<li><a href="file9.html">SubSubSub3</a></li>
</ul>
</ul>
</ul>
</ul>
file4.html
<ul id="menu">
<li><a href="root.html">Start</a></li>
<ul>
<li><a href="file1.html">Sub1</a></li>
<ul>
<li><a href="file3.html">SubSub1</a></li>
<li class="active">SubSub2</li>
<li><a href="file5.html">SubSub3</a></li>
<li><a href="file6.html">SubSub4</a></li>
</ul>
</ul>
</ul>
我想遍历所有文件,提取&#39; id =&#34; menu&#34;&#39;在保留层次结构和文件信息的同时创建一个这样(或类似的)数组
Array
[file] => root.html
[child] => Array
[Sub1] => Array
[file] => file1.html
[child] => Array
[SubSub1] => Array
[file] => file3.html
[child] => Array
[SubSubSub1] => Array
[file] => file7.html
[SubSubSub2] => Array
[file] => file8.html
[SubSubSub3] => Array
[file] => file9.html
[SubSub2] => Array
[file] => file4.html
[SubSub3] => Array
[file] => file5.html
[SubSub4] => Array
[file] => file6.html
[Sub2] => Array
[file] => file2.html
在PHP Simple HTML DOM Parser libray的帮助下,我成功读取了文件并解压缩了菜单
$html = file_get_html($file);
foreach ($html->find("ul[id=menu]") as $ul) {
..
}
要仅解析菜单的活动部分(省略链接以获得1个或更多级别)我使用
$ul->find("ul",-1)
找到外部ul中的最后一个ul。这适用于单个文件。
但是我无法遍历所有文件/菜单并保留父/子信息,因为每个菜单都有不同的深度。
感谢所有建议,提示和帮助!
答案 0 :(得分:0)
编辑:好的,毕竟不是那么容易:)
顺便说一句,这个库真的是一个很好的工具。感谢那些写作的人。
以下是一种可能的解决方案:
class menu_parse {
static $missing = array(); // list of missing files
static private $files = array(); // list of source files to process
// initiate menu parsing
static function start ($file)
{
// start with root file
self::$files[$file] = 1;
// parse all source files
for ($res=array(); current(self::$files); next(self::$files))
{
// get next file name
$file = key(self::$files);
// parse the file
if (!file_exists ($file))
{
self::$missing[$file] = 1;
continue;
}
$html = file_get_html ($file);
// get menu root (if any)
$root = $html->find("ul[id=menu]",0);
if ($root) self::menu ($root, $res);
}
// reorder missing files array
self::$missing = array_keys (self::$missing);
// that's all folks
return $res;
}
// parse a menu at a given level
static private function menu ($menu, &$res)
{
foreach ($menu->children as $elem)
{
switch ($elem->tag)
{
case "li" : // name and possibly source file of a menu
// grab menu name
$name = $elem->plaintext;
// see if we can find a link to the menu file
$link = $elem->children(0);
if ($link && $link->tag == 'a')
{
// found the link
$file = $link->href;
$res[$name]->file = $file;
// add the source file to the processing list
self::$files[$file] = 1;
}
break;
case "ul" : // go down one level to grab items of the current menu
self::menu ($elem, $res[$name]->childs);
}
}
}
}
用法:
// The result will be an array of menus indexed by item names.
//
// Each menu will be an object with 2 members
// - file -> source file of the menu
// - childs -> array of menu subtitems
//
$res = menu_parse::start ("root.html");
// parse_menu::$missing will contain all the missing files names
echo "Result : <pre>";
print_r ($res);
echo "</pre><br>missing files:<pre>";
print_r (menu_parse::$missing);
echo "</pre>";
测试用例的输出:
Array
(
[Start] => stdClass Object
(
[childs] => Array
(
[Sub1] => stdClass Object
(
[file] => file1.html
[childs] => Array
(
[SubSub1] => stdClass Object
(
[file] => file3.html
[childs] => Array
(
[SubSubSub1] => stdClass Object
(
[file] => file7.html
)
[SubSubSub2] => stdClass Object
(
[file] => file8.html
)
[SubSubSub3] => stdClass Object
(
[file] => file9.html
)
)
)
[SubSub2] => stdClass Object
(
[file] => file3.html
)
[SubSub3] => stdClass Object
(
[file] => file5.html
)
[SubSub4] => stdClass Object
(
[file] => file6.html
)
)
)
[Sub2] => stdClass Object
(
[file] => file2.html
)
)
[file] => root.html
)
)
missing files: Array
(
[0] => file2.html
[1] => file5.html
[2] => file6.html
[3] => file7.html
[4] => file8.html
[5] => file9.html
)
您可以修改代码以将(子)菜单作为具有数字索引和名称作为属性的数组(以便具有相同名称的两个项不会相互覆盖),但这会使结果的结构复杂化
如果发生这样的名称重复,最好的解决方案是重命名其中一个项目,恕我直言。
它可以修改为处理多个,但这没有多大意义恕我直言(这将意味着根菜单ID重复,这可能会导致JavaScript尝试首先处理它的麻烦。)< / p>
答案 1 :(得分:0)
这更像是具有向上链接的目录树。级别1上的file1指向级别2上的file3,这指向级别1上的文件1,这导致“不同深度”。考虑设置一个向上和向下指向的特定菜单对象,并保留它的列表而不是字符串数组的数组。 php中这样一个hierarchie的起点可能是这样的类:
class menuItem {
protected $leftSibling = null;
protected $rightSibling = null;
protected $parents = array();
protected $childs = array();
protected properties = array();
// set property like menu name or file name
function setProp($name, $val) {
$this->properties[$name] = $val;
}
// get a propertue if set, false otherwise
function getProp($name) {
if ( isset($this->properties[$name]) )
return $this->properties[$name];
return false;
}
function getLeftSiblingsAsArray() {
$sibling = $this->getLeftSibling();
$siblings = array();
while ( $sibling != null ) {
$siblings[] = $sibling;
$sibling = $sibling->getLeftSibling();
}
return $siblings;
}
function addChild($item) {
$this->childs[] = $item;
}
function addLeftSibling($item) {
$sibling = $this->leftSibling;
while ( $sibling != null ) {
if ( $sibling->hasLeft() )
$sibling = $sibling->getLeftSibling();
else {
$sibling->addFinalLeft($item);
break;
}
}
}
function addFinalLeft(item) {
$sibling->leftSibling = $item;
}
....