我试图在PHP中编写一个特定于内容的webcrawler,并且我在第68行继续得到一个未定义的偏移量。我真的不确定我哪里出错,我认为我的一个数组没有赋值,或者我使用爆炸错误,但我不知道如何解决它。所以任何帮助都会很棒!
1 <?php
2
3 include("C:\Program Files\PHPCrawl_082\libs\PHPCrawler.class.php");
4
5 set_time_limit(0);
6
7 $domain = "http://www.carrollcountyohio.us/";
8
9
10 $content = "Election Results";
11
12
13 $content_tag = "Election Results";
14
15
16 $output_file = "ElectionsURL.txt";
17
18
19 $max_urls_to_check = 10;
20
21 $rounds = 0;
22
23 $domain_stack = array();
24
25
26 $max_size_domain_stack = 1000;
27
28
29 $checked_domains = array();
30
31 //I'm not sure if my problem could be here, I dont think it is.
32 while ($domain != "" && $rounds < $max_urls_to_check) {
33 $doc = new DOMDocument();
34
35 @$doc->loadHTMLFile($domain);
36 $found = false;
37
38
39 foreach($doc->getElementsByTagName($content_tag) as $tag) {
40 if (strpos($tag->nodeValue, $content)) {
41 $found = true;
42 break;
43 }
44 }
45
46 $checked_domains[$domain] = $found;
47 //I think right here is where I'm messing up, i dont think its allowing the value.
48 foreach($doc->getElementsByTagName('Election') as $link) {
49 $href = $link->getAttribute('href');
50 if (strpos($href, 'http://www.carrollcountyohio.us/') !== false && strpos($href, $domain) === false) {
52 $href_array = array_pad(explode("/", $href, 2), 2, $href);
53 if (count($domain_stack) < $max_size_domain_stack &&
54 $checked_domains["http://www.carrollcountyohio.us/".$href_array[2]] === null) {
56 array_push($domain_stack, "http://www.carrollcountyohio.us/".$href_array[2]);
58 }
59 };
60 } //I added this isset to try and figure it out, dont know if it's working
61
62 if(isset($domain_stack[0])) {
63 }
64
65 //the next paragraph has the line 68 where i keep having a
66 // undefined offset problem.
67 $domain_stack = array_unique($domain_stack);
68 $domain = $domain_stack[0];
69 unset($domain_stack[0]);
70 $domain_stack = array_values($domain_stack);
70 $rounds++;
72
73
74 }
75
76 $found_domains = $domain_stack;
77 foreach ($checked_domains as $key => $value) {
78 if ($key == 2) {
79 $found_domains .= $key."\n";
80 }
81 }
82
83 file_put_contents($output_file, $found_domains);
84 ?>