而php运算符与mysql Truncate无法正常工作

时间:2014-09-19 14:48:40

标签: php mysql xampp localhost

我有一堆网站存储为3个mysql表中的字符串。我的脚本将它们放入数组中,解析它,提取所有链接并将它们分类为2个表。它在3个完全分类的相同模块中被打破。

整个过程每30秒执行一次操作。

出于某种原因,只是第一次按预期工作,以后没有任何事情发生。

在我开始使用代码之前,我为折旧的mysql道歉,这个脚本只会在本地机器上使用,我会在适当的时候更新它。

这是我的代码:

$i=1;

$domain1 = 'example1.com';
$domain2 = 'example2.com';
$domain3 = 'example3.com';

$robots1 = array("url1",
"url2",
"url3");


$robots2 = array("url1",
"url2",
"url3");


$robots3 = array("url1",
"url2",
"url3");

require_once 'Normalizer.php';

$conn = mysql_connect('localhost:3306','user', 'pass', true );
mysql_select_db( 't1000', $conn );

while ($i<=50000) {


$query = 'SELECT * FROM dump1';
$result1=mysql_query( $query, $conn );
$strings1=array();
while ($row = mysql_fetch_assoc($result1)) {
array_push($strings1, $row["link"]);
}
$query = 'TRUNCATE TABLE dump1';
$delete=mysql_query( $query, $conn );




$query = 'SELECT * FROM dump2';
$result1=mysql_query( $query, $conn );
$strings2=array();
while ($row = mysql_fetch_assoc($result1)) {
array_push($strings2, $row["link"]);
}
$query = 'TRUNCATE TABLE dump2';
$delete=mysql_query( $query, $conn );




$query = 'SELECT * FROM dump3';
$result1=mysql_query( $query, $conn );
$strings3=array();
while ($row = mysql_fetch_assoc($result1)) {
array_push($strings3, $row["link"]);
}
$query = 'TRUNCATE TABLE dump3';
$delete=mysql_query( $query, $conn );


// Module 1 start

$ii=0;
 $links = array();
 $edofollow = array();
 $enofollow = array();
 $internal = array();

foreach ($strings1 as $value)
{
$input=$strings1[$ii];
$htm=stripcslashes($input);

$doc = new DOMDocument();
@$doc->loadHTML($htm);

  $arr = $doc->getElementsByTagName("a"); // DOMNodeList Object
  foreach($arr as $item) { // DOMElement Object
    $href =  $item->getAttribute("href");
    $rel =  $item->getAttribute("rel");
    $text = trim(preg_replace("/[\r\n]+/", " ", $item->nodeValue));
    $links[] = array(
      'href' => $href,
      'rel' => $rel,
      'text' => $text
    );  
if (strpos($href, '://')!==false AND strpos($href, $domain1)==false AND  $rel!=='nofollow') 
    {
    $un = new URL\Normalizer();
    $un->setUrl( $href );
    $href= parse_url($un->normalize(), PHP_URL_HOST);   
    array_push($edofollow, $href);
}

else if (strpos($href, '://')!==false AND strpos($href, $domain1)==false AND $rel=='nofollow')

    {
    $un1 = new URL\Normalizer();
    $un1->setUrl( $href );  
    array_push($enofollow, $un1->normalize());  

}

else if (strpos($href,'://')==false or strpos($href,$domain1)!==false)

    { 
    $un2 = new URL\Normalizer();
    $un2->setUrl( $href );
        $href1=$un2->normalize();
    if (strpos($href1, 'TRANSCRIPTS')==false AND strpos($href1, '(')==false AND strpos($href1, ')')==false AND strpos($href1, '#')==false AND strpos($href1, 'javascript')==false AND  strpos($href1, '?')==false AND  strpos($href1, 'void')==false)       
    {
    if($href1=='' or $href1=='/')
    {}
    else{   

        if (strpos($href1, '://')==false)
        {$href1='http://'.$domain1.$href1;}


if (in_array($href1, $robots1)) { }
else {
array_push($internal, $href1);
}       
 }
  } 
 }
}  

$uedofollow = array_values(array_unique($edofollow));
foreach ($uedofollow as $value) {
$query=mysql_query("select * from dofollow where link='".$value."' ");
$duplicate=0;
if($query){
$duplicate=mysql_num_rows($query);
}
   if($duplicate==0)
    {
    $sql='INSERT INTO dofollow (link) VALUES ("'.$value.'")';
    mysql_query( $sql, $conn );
    }
}

$uinternal = array_values(array_unique($internal));
foreach ($uinternal as $value2) {
    $query=mysql_query("select * from joblist1 where link='".$value2."' ");
if ($query) {
$duplicate=0;
$duplicate=mysql_num_rows($query);
   if($duplicate==0)
    {
    $sql='INSERT INTO joblist1 (link) VALUES ("'.$value2.'")';
    mysql_query( $sql, $conn );
    }
}
}

$ii=$ii+1;
}

// Module 1 ends


// Module 2 start

 $links = array();
 $edofollow = array();
 $enofollow = array();
 $internal = array();

$ii=0;

foreach ($strings2 as $value)
{
$input=$strings2[$ii];
$htm=stripcslashes($input);

$doc = new DOMDocument();
@$doc->loadHTML($htm);

  $arr = $doc->getElementsByTagName("a"); // DOMNodeList Object
  foreach($arr as $item) { // DOMElement Object
    $href =  $item->getAttribute("href");
    $rel =  $item->getAttribute("rel");
    $text = trim(preg_replace("/[\r\n]+/", " ", $item->nodeValue));
    $links[] = array(
      'href' => $href,
      'rel' => $rel,
      'text' => $text
    );  
if (strpos($href, '://')!==false AND strpos($href, $domain2)==false AND  $rel!=='nofollow') 
    {
    $un = new URL\Normalizer();
    $un->setUrl( $href );
    $href= parse_url($un->normalize(), PHP_URL_HOST);   
    array_push($edofollow, $href);
}

else if (strpos($href, '://')!==false AND strpos($href, $domain2)==false AND $rel=='nofollow')

    {
    $un1 = new URL\Normalizer();
    $un1->setUrl( $href );  
    array_push($enofollow, $un1->normalize());  

}

else if (strpos($href,'://')==false or strpos($href,$domain2)!==false)

    { 
    $un2 = new URL\Normalizer();
    $un2->setUrl( $href );
        $href1=$un2->normalize();
    if (strpos($href1, 'TRANSCRIPTS')==false AND strpos($href1, '(')==false AND strpos($href1, ')')==false AND strpos($href1, '#')==false AND strpos($href1, 'javascript')==false AND  strpos($href1, '?')==false AND  strpos($href1, 'void')==false)       
    {
    if($href1=='' or $href1=='/')
    {}
    else{   

        if (strpos($href1, '://')==false)
        {$href1='http://'.$domain2.$href1;}


if (in_array($href1, $robots2)) { }
else {
array_push($internal, $href1);
}       
 }
  } 
 }
}  

$uedofollow = array_values(array_unique($edofollow));
foreach ($uedofollow as $value) {
$query=mysql_query("select * from dofollow where link='".$value."' ");
$duplicate=0;
if($query){
$duplicate=mysql_num_rows($query);
}
   if($duplicate==0)
    {
    $sql='INSERT INTO dofollow (link) VALUES ("'.$value.'")';
    mysql_query( $sql, $conn );
    }
}

$uinternal = array_values(array_unique($internal));
foreach ($uinternal as $value2) {
    $query=mysql_query("select * from joblist2 where link='".$value2."' ");
if ($query) {
$duplicate=0;
$duplicate=mysql_num_rows($query);
   if($duplicate==0)
    {
    $sql='INSERT INTO joblist2 (link) VALUES ("'.$value2.'")';
    mysql_query( $sql, $conn );
    }
}
}

$ii=$ii+1;
}

// Module 2 Ends


// Module 3 start

 $links = array();
 $edofollow = array();
 $enofollow = array();
 $internal = array();

$ii=0;

foreach ($strings3 as $value)
{
$input=$strings3[$ii];
$htm=stripcslashes($input);

$doc = new DOMDocument();
@$doc->loadHTML($htm);

  $arr = $doc->getElementsByTagName("a"); // DOMNodeList Object
  foreach($arr as $item) { // DOMElement Object
    $href =  $item->getAttribute("href");
    $rel =  $item->getAttribute("rel");
    $text = trim(preg_replace("/[\r\n]+/", " ", $item->nodeValue));
    $links[] = array(
      'href' => $href,
      'rel' => $rel,
      'text' => $text
    );  
if (strpos($href, '://')!==false AND strpos($href, $domain3)==false AND  $rel!=='nofollow') 
    {
    $un = new URL\Normalizer();
    $un->setUrl( $href );
    $href= parse_url($un->normalize(), PHP_URL_HOST);   
    array_push($edofollow, $href);
}

else if (strpos($href, '://')!==false AND strpos($href, $domain3)==false AND $rel=='nofollow')

    {
    $un1 = new URL\Normalizer();
    $un1->setUrl( $href );  
    array_push($enofollow, $un1->normalize());  

}

else if (strpos($href,'://')==false or strpos($href,$domain3)!==false)

    { 
    $un2 = new URL\Normalizer();
    $un2->setUrl( $href );
        $href1=$un2->normalize();
    if (strpos($href1, 'TRANSCRIPTS')==false AND strpos($href1, '(')==false AND strpos($href1, ')')==false AND strpos($href1, '#')==false AND strpos($href1, 'javascript')==false AND  strpos($href1, '?')==false AND  strpos($href1, 'void')==false)       
    {
    if($href1=='' or $href1=='/')
    {}
    else{   

        if (strpos($href1, '://')==false)
        {$href1='http://'.$domain3.$href1;}


if (in_array($href1, $robots3)) { }
else {
array_push($internal, $href1);
}       
 }
  } 
 }
}  

$uedofollow = array_values(array_unique($edofollow));
foreach ($uedofollow as $value) {
$query=mysql_query("select * from dofollow where link='".$value."' ");
$duplicate=0;
if($query){
$duplicate=mysql_num_rows($query);
}
   if($duplicate==0)
    {
    $sql='INSERT INTO dofollow (link) VALUES ("'.$value.'")';
    mysql_query( $sql, $conn );
    }
}

$uinternal = array_values(array_unique($internal));
foreach ($uinternal as $value2) {
    $query=mysql_query("select * from joblist3 where link='".$value2."' ");
if ($query) {
$duplicate=0;
$duplicate=mysql_num_rows($query);
   if($duplicate==0)
    {
    $sql='INSERT INTO joblist3 (link) VALUES ("'.$value2.'")';
    mysql_query( $sql, $conn );
    }
}
}

$ii=$ii+1;
}

// Module 3 ends

sleep(30);
$i=$i++;
}

我现在试图解决它几天,混合了一些东西,但没有运气......

1 个答案:

答案 0 :(得分:1)

尝试在do {} while();

中包装它

即:

$i = 1;
do {
    echo "some crap $i<br>\n";
} while($i<=50000);