我在MySQL中有一个表约。 2000万行。
id | word_eng | word_indic
我必须使用google translate api将英语单词(word_eng
)翻译成印度语(word_indic
)。
我编写了PHP代码,它产生多个curl请求并从API获取数据并将其更新到表中。但这个过程非常缓慢,大约每秒100到200个单词。
我正在使用RollingCurl进行多卷曲。
什么是尽可能快地制作它的最好方法?
以下是我的代码。我这是一个cron工作。
<?php
include_once('db.php');
include_once('functions.php');
include_once('rolling-curl-master/RollingCurl.php');
$table = $argv[1];
$q = "SELECT * from $table where word_indic is null limit 500000";
$result = $conn->query($q); $n = 0;
$urls = array();
while ($row = $result->fetch_assoc())
{
$id = $row['id'];
$word = rawurlencode(getName($row['name_eng']));
//getName is a simple function which does some trimming and cleaning up of string
$url = 'https://www.google.com/inputtools/request?text='.rawurlencode($word).'&ime=transliteration_en_te&id='.rawurlencode($id);
array_push($urls, $url);
}
//print_r($urls);
unset($url);
$rc = new RollingCurl("request_callback");
// the window size determines how many simultaneous requests to allow.
$rc->window_size = 300;
foreach ($urls as $url)
{
// add each request to the RollingCurl object
$request = new RollingCurlRequest($url);
$rc->add($request);
}
$rc->execute();
function request_callback($response, $info)
{
// parse the page title out of the returned HTML
if (preg_match("~<title>(.*?)</title>~i", $response, $out)) {
$title = $out[1];
}
//echo "<b>$title</b><br />";
//print_r($info);
$parts = parse_url($info['url']);
parse_str($parts['query'], $query);
$id = $query['id'];
$text = $query['text'];
//echo "<hr>";
$trans = json_decode($response)[1][0][1][0];
global $conn; global $table; global $urls; global $n;
if ($trans != '' and !preg_match('/[a-z]/', $trans))
{
$conn->query("update $table set word_indic='$trans' where id='$id'"); $n++;
}
}
?>