我有一个MySql数据库只有超过3000万行,但有些行(适当的300万)是重复的我使用下面的PHP脚本来复制数据库并检查重复项,但它的运行速度非常慢花了3天,甚至还没到中途,还有加速吗?我在MySql Server上运行脚本。
<?php
$dbhost = "localhost" ;
$con = mysql_connect("$dbhost","$dbuser","$dbpass");
if (!$con)
{
die('Could not connect: ' . mysql_error());
}
mysql_select_db("$dbame", $con);
$server_self = $_SERVER['PHP_SELF'];
$rows_per_page = 1000;
$dbname_var = post1;
//paging script
$query = "SELECT count(*) FROM $dbname_var"; //change!!
//paging script
//PAGING SCRIPT
if (isset($_GET['pageno'])) {
$pageno = $_GET['pageno'];
} else {
$pageno = 1;
} // if
$resulta = mysql_query($query) or trigger_error("SQL", E_USER_ERROR);
$query_data = mysql_fetch_row($resulta);
$numrows = $query_data[0];
$lastpage = ceil($numrows/$rows_per_page);
$pageno = (int)$pageno;
if ($pageno > $lastpage) {
$pageno = $lastpage;
} // if
if ($pageno < 1) {
$pageno = 1;
} // if
$limit = 'LIMIT ' .($pageno - 1) * $rows_per_page .',' .$rows_per_page;
$nextpage = $pageno+1;
$prevpage = $pageno-1;
$result = mysql_query("SELECT * FROM $dbname_var $limit");
while($row = mysql_fetch_array( $result )) {
$as = $row[1];
$ad = $row[2];
$af = $row[10];
//CHECK IF THE ROW ALREADY EXISTS IN THE TABLE
$resultdb1 = mysql_query("SELECT * FROM post_final WHERE
add1='$as' AND
add2='$ad' AND
add10='$af'");
$num_rowsdb1 = mysql_num_rows($resultdb1);
//IF IT DOSENT ADD IT!
if($num_rowsdb1 < 1) {
mysql_query("INSERT INTO post_final (add1,
add2,
add3,
add4,
add5,
add6,
add7,
add8,
add9,
add10)
VALUES ('$row[1]',
'$row[2]',
'$row[3]',
'$row[4]',
'$row[5]',
'$row[6]',
'$row[7]',
'$row[8]',
'$row[9]',
'$row[10]')");
} }
$nxt = $_GET['pageno'] + 1;
if ($_GET['pageno'] < $lastpage) {
print "<BODY>
<script type=\"text/javascript\">
<!--
window.location = \"/addnew.php?pageno=$nxt&lastpage=$lastpage\"
//-->
</script>
</BODY>
</HTML>\n"; }
?>
# Column Type Collation Attributes Null Default Extra Action
1 ID int(10) No None AUTO_INCREMENT Change Drop More
2 add1 varchar(50) latin1_german2_ci No None Change Drop More
3 add2 varchar(50) latin1_german2_ci No None Change Drop More
4 add3 varchar(50) latin1_german2_ci No None Change Drop More
5 add4 varchar(50) latin1_german2_ci No None Change Drop More
6 add5 varchar(50) latin1_german2_ci No None Change Drop More
7 add6 varchar(50) latin1_german2_ci No None Change Drop More
8 add7 varchar(50) latin1_german2_ci No None Change Drop More
9 add8 varchar(50) latin1_german2_ci No None Change Drop More
10 add9 varchar(50) latin1_german2_ci No None Change Drop More
11 add10 varchar(50) latin1_german2_ci No None Change Drop More
Action Keyname Type Unique Packed Column Cardinality Collation Null Comment
Edit Drop PRIMARY BTREE Yes No ID 10044279 A
Edit Drop add10 BTREE No No add10 590839 A
Edit Drop add1 BTREE No No add1 264323 A
Edit Drop add2 BTREE No No add2 233587 A
答案 0 :(得分:3)
仅使用SQL,不需要在SQL和PHP之间来回传递数据。假设您有一个包含表db1
的数据库post_final
,请创建数据库db2
创建空post_final
表并执行
INSERT INTO `db2`.`post_final` (add1, add2, ...., add10)
SELECT DISTINCT add1, add2, ... , add10 FROM `db1`.`post_final`