// get CONTENT from united domains footer
$content = file_get_contents('http://www.uniteddomains.com/index/footer/');
// remove spaces from CONTENT
$content = preg_replace('/\s+/', '', $content);
// match all tld tags
$regex = '#target="_parent">.(.*?)</a></li><li>#';
preg_match($regex, $source, $matches);
print_r($matches);
我想要匹配所有TLD:
每个tld前面都有target="_parent">.
,后跟</a></li><li>
我想最终得到像array('africa','amsterdam','bnc'...ect ect )
我在这里做错了什么?
注意:删除所有空格的第二步只是为了简化操作。
答案 0 :(得分:3)
这是一个正则表达式,可以为该页面执行此操作。
\.\w+(?=</a></li>)
<强> PHP 强>
$content = file_get_contents('http://www.uniteddomains.com/index/footer/');
preg_match_all('/\.\w+(?=<\/a><\/li>)/m', $content, $matches);
print_r($matches);
以下是结果:
.africa,.amsterdam,.bcn,.berlin,.boston,.brussels,.budapest,.gent,.hamburg,.koeln,.london,.madrid,.melbourne,.moscow,.miami,.nagoya ,.nyc,.okinawa,.osaka,.paris,.quebec,.roma,.ryukyu,.stockholm,.sydney,.tokyo,.vegas,.wien,.yokohama,.africa,.arab,.bayern ,. bzh,.cymru,.kiwi,.lat,.scot,.vlaanderen,.wales,.app,.blog,.chat,.cloud,.digital,.email,.mobile,.online,.site,.mls, .secure,.web,.wiki,.associates,。business,.car,.careers,.contractors,.clothing,.design,.equipment,.estate,.gallery,.graphics,.hotel,.immo,.investments ,.law,.management,。media,.money,.solutions,.sucks,.taxi,.trade,.archi,.adult,.bio,.center,.city,.club,.cool,.date ,. earth,.energy,.family,.free,.green,.live,.lol,.love,.med,.ngo,.news,.phone,.pictures,.radio,.reviews,.rip,.team, .technology,.today,.voting,.buy,.deal,.luxe, .sale,.shop,.shopping,.store,.eus,.gay,.eco,.hiv,.irish,.one,.pics,.porn,.sex,.singles,.vin,.vip,.bar ,.pizza,.wine,.bike,.book,.holiday,.horse,.film,.music,.party,.email,.pets,.play,.rocks,.rugby,.ski,.sport ,.冲浪,.tour,.video
答案 1 :(得分:0)
使用DOM更清洁:
$doc = new DOMDocument();
@$doc->loadHTMLFile('http://www.uniteddomains.com/index/footer/');
$xpath = new DOMXPath($doc);
$items = $xpath->query('/html/body/div/ul/li/ul/li[not(@class)]/a[@target="_parent"]/text()');
$result = '';
foreach($items as $item) {
$result .= $item->nodeValue; }
$result = explode('.', $result);
array_shift($result);
print_r($result);