我在抓一个网站。我能够获得大部分内容但只是最后一件事,那就是;当我遍历雇主(公司)名称及其位置时。然后我将它们推送到名为$places
的数组中。当我打印该数组时,它看起来像这样:
Array
(
[0] => Confidential
)
Array
(
[0] => Palm Bay, FL
)
Array
(
[0] =>
[1] => Robert Half Technology
[2] =>
)
Array
(
[0] => Santa Monica, CA
)
Array
(
[0] =>
[1] => Apex Systems
[2] =>
)

在每个$a->nodeValue
上打印该值,但当我将其推送到数组$places
时,它将子数组推送到该数组并具有许多索引。我不知道为什么。你能测试我的代码并看看它是怎么回事:
set_time_limit(500);
$link = "https://www.careerbuilder.com/jobs-web-developer-in-usa?keywords=software+developer&location=usa&page_number=1";
$ch = curl_init();
curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, false);
curl_setopt($ch, CURLOPT_URL, $link);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
curl_setopt($ch, CURLOPT_USERAGENT, "Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US) AppleWebKit/525.13 (KHTML, like Gecko) Chrome/0.A.B.C Safari/525.13");
$data = curl_exec($ch);
$title = array();
$when = array();
$timing = array();
$company = array();
$location = array();
$paths = array();
$salary = array();
$summary = array();
@$document = new DOMdocument();
libxml_use_internal_errors(true);
@$document - > loadHTML($data);
$elements = @$document - > getElementsByTagName("div");
foreach($elements as $element) {
if ($element - > getAttribute("class") == "job-row") {
foreach($element - > childNodes as $sublements) {
if ($sublements - > hasChildNodes()) {
foreach($sublements - > childNodes as $nodes) {
if ($nodes - > hasChildNodes()) {
foreach($nodes - > childNodes as $node) {
if ($node - > hasChildNodes()) {
switch ($node - > getAttribute("class")) {
case 'job-title show-for-medium-up':
{
array_push($title, $node - > nodeValue);
break;
}
case 'show-for-medium-up':
{
array_push($when, $node - > nodeValue);
break;
}
case 'job-text employment-info':
{
array_push($timing, $node - > nodeValue);
break;
}
case 'job-description show-for-medium-up':
{
array_push($summary, $node - > nodeValue);
break;
}
case 'job-text':
{
$places = array();
foreach($node - > childNodes as $a) {
array_push($places, $a - > nodeValue);
}
echo "<pre>";
print_r($places);
echo "<pre>";
break;
}
}
}
}
}
}
}
}
}
}
curl_close($ch);
&#13;
答案 0 :(得分:0)
我找到了解决方案,我正在接触孩子们。
<?php
#$link = "https://www.careerbuilder.com/jobs-web-developer-in-".$loc."?keywords=".$title."&location=".$loc."&page_number=".$start;
set_time_limit(500);
$link = "https://www.careerbuilder.com/jobs-web-developer-in-usa?keywords=software+developer&location=usa&page_number=1";
$ch = curl_init();
curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, false);
curl_setopt($ch,CURLOPT_URL,$link);
curl_setopt($ch,CURLOPT_RETURNTRANSFER,1);
curl_setopt($ch, CURLOPT_USERAGENT, "Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US) AppleWebKit/525.13 (KHTML, like Gecko) Chrome/0.A.B.C Safari/525.13");
$data = curl_exec($ch);
#"https://www.indeed.com/jobs?q=&l=&start=";
$title = array ();
$when = array ();
$timing = array ();
$company = array ();
$location = array ();
$paths = array ();
$salary = array ();
$summary = array ();
$places = array ();
@$document = new DOMdocument();
libxml_use_internal_errors(true);
@$document->loadHTML($data);
$elements = @$document->getElementsByTagName("div");
foreach($elements as $element){
if($element->getAttribute("class")=="job-row"){
foreach($element->childNodes as $sublements){
if($sublements->hasChildNodes()){
foreach ($sublements->childNodes as $nodes) {
if($nodes->hasChildNodes()){
foreach ($nodes->childNodes as $node) {
if($node->hasChildNodes()){
switch ($node->getAttribute("class")) {
case 'job-title show-for-medium-up':{
array_push($title, $node->nodeValue);
if($node->hasChildNodes()){
foreach($node->childNodes as $l){
if($l->hasChildNodes()){
array_push($paths, $l->getAttribute("href"));
}
}
}
break;
}
case 'show-for-medium-up':{
array_push($when, $node->nodeValue);
break;
}
case 'job-text employment-info':{
array_push($timing, $node->nodeValue);
break;
}
case 'job-description show-for-medium-up':{
array_push($summary, $node->nodeValue);
break;
}
case 'job-text':{
array_push($places, $node->nodeValue);
break;
}
}
}
}
}
}
}
}
}
}
for($i=0;$i<sizeof($places);$i++){
if($i%2==0){
array_push($company, $places[$i]);
}else{
array_push($location, $places[$i]);
}
}
print_r($title);
echo "<br>";
echo "<br>";
print_r($paths);
echo "<br>";
echo "<br>";
print_r($when);
echo "<br>";
echo "<br>";
print_r($timing);
echo "<br>";
echo "<br>";
print_r($summary);
echo "<br>";
echo "<br>";
print_r($location);
echo "<br>";
echo "<br>";
print_r($company);
curl_close($ch);
?>
&#13;