我想创建一个机器人,它会列出我的wiki页面,然后搜索wiki的所有页面,当他在另一个页面中找到页面的名称时创建超链接。
我有一个名为" Wiki"在另一页中有“" wiki"”这个词。所以我想创建一个超链接来重定向到页面" Wiki"。
这是我第一次使用此API,所以我不确定如何继续。 我已经发现您可以使用" list = allpages"列出所有页面。并使用" list = search"在所有wiki中搜索字符串。但是当我有那个字符串的页面名称时,如何在页面中编辑那些字符串呢?
现在我在PHP中这样做,所以我可以做一些事情,比如获取页面的所有内容,更改它然后编辑页面吗?
答案 0 :(得分:0)
如果您有兴趣,我就是创建这个机器人的方法:
$path_cookie = "______path________";
$botLogin="Bot";
$botPass="password";
$linkWiki="exemple.com";
if (!file_exists($path_cookie)) touch($path_cookie); //create a file to stay logged in
$curl = curl_init();
function requeteCurl($postfields, $curl, $linkWiki, $path_cookie) //the function you'll just use for each of your requests when logged in
{
curl_setopt($curl, CURLOPT_URL, $linkWiki);
curl_setopt($curl, CURLOPT_RETURNTRANSFER, true);
curl_setopt($curl, CURLOPT_POST, true);
curl_setopt($curl, CURLOPT_POSTFIELDS, $postfields);
curl_setopt($curl, CURLOPT_COOKIEFILE, realpath($path_cookie));
$resultat = curl_exec($curl);
return $resultat;
}
/* First you need to login with your bot */
$postfields = array(
'action' => 'login',
'format'=> 'json',
'lgname' => $botLogin,
'lgpassword' => $botPass
);
curl_setopt($curl, CURLOPT_URL, $linkWiki);
curl_setopt($curl, CURLOPT_COOKIESESSION, true);
curl_setopt($curl, CURLOPT_RETURNTRANSFER, true);
curl_setopt($curl, CURLOPT_POST, true);
curl_setopt($curl, CURLOPT_POSTFIELDS, $postfields);
curl_setopt($curl, CURLOPT_COOKIEJAR, $path_cookie); //you need to stock your cookies the first time
$connexion=curl_exec($curl);
if (!$connexion) {
throw new Exception("Error getting data from server ($linkWiki: " . curl_error($curl));
}
$json_connexion = json_decode($connexion, true);
$tokenConnexion=$json_connexion['login']['token']; //sometime you need to login a second time with the token :
$postfields = array(
'action' => 'login',
'format'=> 'json',
'lgtoken' => $tokenConnexion,
'lgname' => $botLogin,
'lgpassword' => $botPass
);
$connexionToken=requeteCurl($postfields, $curl, $linkWiki, $path_cookie);
var_dump($connexionToken);
/* You have to list all the pages in your wiki to know which strings to search for */
$postfields = array(
'action' => 'query',
'format'=> 'json',
'list' => 'allpages',
'aplimit' => 'max'
);
$pagesWiki=requeteCurl($postfields, $curl, $linkWiki, $path_cookie);
$json_pagesWikis = json_decode($pagesWiki, true);
$tabPagesWiki= array();
$i=0;
foreach ($json_pagesWikis["query"]["allpages"] as $pages ) { // stock all the names in an array
$tabNomsPagesWiki[$i] = $pages["title"]; // tableau contenant les noms sont modif des pages
$i++;
}
/* Then you search on all the wiki to find the pages where the string you search is */
foreach ($tabNomsPagesWiki as $chaineRecherchee ) //you use each name as a string to search
{
$postfields = array(
'action' => 'query',
'format'=> 'json',
'list' => 'search',
'srsearch' => $chaineRecherchee,
'srwhat' => 'text',
'srlimit' => 'max'
);
$pagesString = requeteCurl($postfields, $curl, $linkWiki, $path_cookie);
$json_pagesString = json_decode($pagesString, true);
$pagesComportantLaRecherche= array(); //and you stock again your results in an array
$i=0;
foreach ($json_pagesString["query"]["search"] as $search ) {
$pagesComportantLaRecherche[$i] = $search["title"] ;
$i++;
}
/* now you have to find your string in the page */
foreach($pagesComportantLaRecherche as $pageRecherche){
if($pageRecherche != $chaineRecherchee){ //you don't want to do create link to the page in which you are !
$postfields = array(
'action' => 'parse',
'format'=> 'json',
'page' => $pageRecherche,
'prop' => 'wikitext'
);
$pageContent=requeteCurl($postfields, $curl, $linkWiki, $path_cookie);
$json_pagesContent = json_decode($pageContent, true);
$text_pagesContent = $json_pagesContent["parse"]["wikitext"]["*"] ; //now you have all the content of your page in a var
/* To find where your string is and replace it with a link you have to first search for the links in the page to not put a link in a link*/
$stringLien = "[[".$chaineRecherchee."]]"; //that's the string which will replace the one in the text
$stringLength = strlen($chaineRecherchee);
$patternLien = "/((\\[\\[[^\\]]*)[\\s](".$chaineRecherchee.")[\\s\\,\\.][^\\]]*\\]\\])|((\\[[^\\]]*)[\\s\\'](".$chaineRecherchee.")[\\s\\,\\.\\'][^\\]]*\\])/mi"; //a regex to find all the links with your string in it in the page
preg_match_all($patternLien, $text_pagesContent, $liens,PREG_OFFSET_CAPTURE);
$patternNomPage = "/[\\s\\']".$chaineRecherchee."[\\s\\,\\.\\']/im"; //now to find just your string
preg_match_all($patternNomPage, $text_pagesContent, $nomPages,PREG_OFFSET_CAPTURE);
$decalage=1;
foreach ($nomPages[0] as $page){
// you need to know the offset of all your strings and your links to compare it
$offsetNomPagetrouvee = $page[1];
$est_dans_lien = false;
foreach ($liens[0] as $lien){
$lienOffset= $lien[1];
$lienTaille = strlen($lien[0]);
if($lienOffset <= $offsetNomPagetrouvee && $offsetNomPagetrouvee <= $lienOffset+ $lienTaille){
$est_dans_lien = true;
break;
}
}
if(!$est_dans_lien){ //if you find a string which is not in a link then you replace it with a link
$text_pagesContent = substr_replace($text_pagesContent, $stringLien, $offsetNomPagetrouvee+$decalage, $stringLength);
$decalage+=4; //you have to move your offset as you change a string by a link so you add four characters :[[]]
}
}
if($decalage>1){ //if you created some new links, then you edit the page
$postfields = array(
'action' => 'query',
'meta' => 'tokens',
'format' => 'json'
);
$tokenEdit=requeteCurl($postfields, $curl, $linkWiki, $path_cookie);
$json_tokenEdit = json_decode($tokenEdit, true);
$text_tokenEdit = $json_tokenEdit['query']['tokens']['csrftoken'];
$postfields = array(
'action' => 'edit',
'format' => 'json',
'title' => $pageRecherche,
'text' => $text_pagesContent,
'bot' => '',
'token' => $text_tokenEdit
);
$edit=requeteCurl($postfields, $curl, $linkWiki, $path_cookie);
echo "\n".$edit;
}
}
}
}
unlink($path_cookie);
嗯,我确定在这段代码中有很多不必要的东西,但我不是php和mediawiki的专家,而且脚本运行得那么好所以它不是那么糟糕我认为^^