我有以下代码
import subprocess
interestedYears=5;
year=1
today="2017-10-23"
tokenDataToday= today.split("-")
dateEnd=tokenDataToday[0]+tokenDataToday[1]+tokenDataToday[2]
url= "https://addons.mozilla.org/en-US/firefox/addon/adblock-plus/statistics/downloads-day-"
while year <= interestedYears:
yearStart= str(int(float(tokenDataToday[0]))-year)
dateStart=yearStart+tokenDataToday[1]+tokenDataToday[2]
#print("dateStart: " + dateStart)
#print("dateEnd: " + dateEnd)
tmpUrl=url+dateStart+"-"+dateEnd+".csv"
cmd = 'curl -O ' + tmpUrl
print(cmd)
args = cmd.split()
process = subprocess.Popen(args, shell=False, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
stdout, stderr = process.communicate()
dateEnd=dateStart
year = year+1
print("-----------------------------")
使用curl后我得到了这个字符串。然后我想得到所有标签的所有链接(href链接)与class&class 39 class2&#39;,怎么做?我尝试了一些方法,但它不起作用:(
答案 0 :(得分:1)
使用DOMDocument
$dom = new DOMDocument;
// load your html
$dom->loadHTML($input);
// loop all the anchor tags
foreach ($dom->getElementsByTagName('a') as $a) {
// check the calss
if($a->getattribute('class') == 'class1 class2') {
// echo href
echo $a->getattribute('href')."<br/>";
}
}
Out put:
link-to-web1.html
abc/link-to-web1.html
abc/xyz/link-to-web1.html