查询从网页中提取版本信息

时间:2020-04-26 08:19:35

标签: php regex web-scraping

我尝试使用php从网页中提取版本信息。

function catche(string $url)  {

    $curl = curl_init($url);
    $value1 = curl_exec($curl);
    curl_close($curl);
    return $value1;
}

function getVersioninfofromwebpage($url) {
    $content = fetch($url);
    $dom = new DOMDocument();
    @$dom->loadHTML($content);
    $xpath = new DOMXPath($dom);
    $result = $xpath->query("//span[contains(text(),'VMware Tools')]//following-sibling::ul//span[contains(text(),'Release Notes')]//following-sibling::ul//li[1]//a");

    $version = $result->item(0)->textContent;
    # expected version info is 11.0.6 

    $modversion = explode(".", $version);
    array_pop($modversion);
    $modversion = implode(".", $modversion);

    $version1 = str_replace('.', '', $version);


    $buildUrl = "https://docs.vmware.com/en/VMware-Tools/" . $modversion . "/rn/VMware-Tools-" . $version1 . "-Release-Notes.html"

    # expected url "https://docs.vmware.com/en/VMware-Tools/11.0/rn/VMware-Tools-1106-Release-Notes.html"
    $content = fetch($url);

    $dom = new DOMDocument();
    @$dom->loadHTML($content);

    $xpath = new DOMXPath($dom);


    $result = $xpath->query("//div[contains(@id, 'content')]/table");

    $header = $result->item(0)->textContent;

    $rx = preg_match("((\d+))", $header, $matches);

    $build = $matches[1];

    return "$version" . "." . "$build" ;

}
$url = "https://docs.vmware.com/en/VMware-Tools/index.html";
$val = getVersioninfofromwebpage($url);
echo $val;

似乎我的查询出错了,我没有得到任何信息并得到了错误。 谁能帮助我获取版本信息。 预期结果是:11.0.6.15940789

0 个答案:

没有答案