Api mediawiki:机器人创建超链接?

时间:2015-06-22 08:56:28

标签: php hyperlink mediawiki-api

我想创建一个机器人,它会列出我的wiki页面,然后搜索wiki的所有页面,当他在另一个页面中找到页面的名称时创建超链接。

我有一个名为" Wiki"在另一页中有“" wiki"”这个词。所以我想创建一个超链接来重定向到页面" Wiki"。

这是我第一次使用此API,所以我不确定如何继续。 我已经发现您可以使用" list = allpages"列出所有页面。并使用" list = search"在所有wiki中搜索字符串。但是当我有那个字符串的页面名称时,如何在页面中编辑那些字符串呢?

现在我在PHP中这样做,所以我可以做一些事情,比如获取页面的所有内容,更改它然后编辑页面吗?

1 个答案:

答案 0 :(得分:0)

如果您有兴趣,我就是创建这个机器人的方法:

$path_cookie = "______path________";
$botLogin="Bot";
$botPass="password";
$linkWiki="exemple.com";

if (!file_exists($path_cookie)) touch($path_cookie); //create a file to stay logged in

$curl = curl_init();

function requeteCurl($postfields, $curl, $linkWiki, $path_cookie) //the function you'll just use for each of your requests when logged in
{
    curl_setopt($curl, CURLOPT_URL, $linkWiki);
    curl_setopt($curl, CURLOPT_RETURNTRANSFER, true);
    curl_setopt($curl, CURLOPT_POST, true);
    curl_setopt($curl, CURLOPT_POSTFIELDS, $postfields);
    curl_setopt($curl, CURLOPT_COOKIEFILE, realpath($path_cookie));
    $resultat = curl_exec($curl);

    return $resultat;
}

    /* First you need to login with your bot */


$postfields = array(
        'action' => 'login',
        'format'=> 'json',
        'lgname' => $botLogin,
        'lgpassword' => $botPass
);

curl_setopt($curl, CURLOPT_URL, $linkWiki);
curl_setopt($curl, CURLOPT_COOKIESESSION, true);
curl_setopt($curl, CURLOPT_RETURNTRANSFER, true);
curl_setopt($curl, CURLOPT_POST, true);
curl_setopt($curl, CURLOPT_POSTFIELDS, $postfields);
curl_setopt($curl, CURLOPT_COOKIEJAR, $path_cookie); //you need to stock your cookies the first time
$connexion=curl_exec($curl); 
if (!$connexion) {
    throw new Exception("Error getting data from server ($linkWiki: " . curl_error($curl));
}
$json_connexion = json_decode($connexion, true);
$tokenConnexion=$json_connexion['login']['token']; //sometime you need to login a second time with the token :
$postfields = array(
        'action' => 'login',
        'format'=> 'json',
        'lgtoken' => $tokenConnexion,
        'lgname' => $botLogin,
        'lgpassword' => $botPass

);

$connexionToken=requeteCurl($postfields, $curl, $linkWiki, $path_cookie);
var_dump($connexionToken);

    /* You have to list all the pages in your wiki to know which strings to search for */

$postfields = array(
            'action' => 'query',
            'format'=> 'json',
            'list' => 'allpages',
            'aplimit' => 'max'
    );
    $pagesWiki=requeteCurl($postfields, $curl, $linkWiki, $path_cookie);
    $json_pagesWikis = json_decode($pagesWiki, true);
    $tabPagesWiki= array();
    $i=0;
    foreach ($json_pagesWikis["query"]["allpages"] as $pages ) { // stock all the names in an array
        $tabNomsPagesWiki[$i] = $pages["title"]; // tableau contenant les noms sont modif des pages
        $i++;
    }

    /* Then you search on all the wiki to find the pages where the string you search is */

foreach ($tabNomsPagesWiki as $chaineRecherchee ) //you use each name as a string to search
{
    $postfields = array(
            'action' => 'query',
            'format'=> 'json',
            'list' => 'search',
            'srsearch' => $chaineRecherchee,
            'srwhat' => 'text',
            'srlimit' => 'max'
    );

    $pagesString = requeteCurl($postfields, $curl, $linkWiki, $path_cookie);
    $json_pagesString = json_decode($pagesString, true);
    $pagesComportantLaRecherche= array(); //and you stock again your results in an array
    $i=0;
    foreach ($json_pagesString["query"]["search"] as $search ) {
        $pagesComportantLaRecherche[$i] = $search["title"] ;
        $i++;
    }

    /* now you have to find your string in the page */

    foreach($pagesComportantLaRecherche as $pageRecherche){ 
    if($pageRecherche != $chaineRecherchee){ //you don't want to do create link to the page in which you are !
            $postfields = array(
                    'action' => 'parse',
                    'format'=> 'json',
                    'page' => $pageRecherche,
                    'prop' => 'wikitext'
            );
            $pageContent=requeteCurl($postfields, $curl, $linkWiki, $path_cookie);
            $json_pagesContent = json_decode($pageContent, true);

            $text_pagesContent = $json_pagesContent["parse"]["wikitext"]["*"] ; //now you have all the content of your page in a var

    /* To find where your string is and replace it with a link you have to first search for the links in the page to not put a link in a link*/

            $stringLien = "[[".$chaineRecherchee."]]"; //that's the string which will replace the one in the text
            $stringLength = strlen($chaineRecherchee); 

            $patternLien = "/((\\[\\[[^\\]]*)[\\s](".$chaineRecherchee.")[\\s\\,\\.][^\\]]*\\]\\])|((\\[[^\\]]*)[\\s\\'](".$chaineRecherchee.")[\\s\\,\\.\\'][^\\]]*\\])/mi"; //a regex to find all the links with your string in it in the page
            preg_match_all($patternLien, $text_pagesContent, $liens,PREG_OFFSET_CAPTURE);
            $patternNomPage = "/[\\s\\']".$chaineRecherchee."[\\s\\,\\.\\']/im"; //now to find just your string
            preg_match_all($patternNomPage, $text_pagesContent, $nomPages,PREG_OFFSET_CAPTURE);         

            $decalage=1;

            foreach ($nomPages[0] as  $page){
                // you need to know the offset of all your strings and your links to compare it
                $offsetNomPagetrouvee = $page[1];       
                $est_dans_lien = false; 
                foreach ($liens[0] as $lien){
                    $lienOffset= $lien[1];      
                    $lienTaille = strlen($lien[0]); 
                    if($lienOffset <= $offsetNomPagetrouvee && $offsetNomPagetrouvee <= $lienOffset+ $lienTaille){
                        $est_dans_lien = true;
                        break;
                    }
                }
                if(!$est_dans_lien){ //if you find a string which is not in a link then you replace it with a link
                    $text_pagesContent = substr_replace($text_pagesContent, $stringLien, $offsetNomPagetrouvee+$decalage, $stringLength);
                    $decalage+=4; //you have to move your offset as you change a string by a link so you add four characters :[[]]
                }

            }


            if($decalage>1){ //if you created some new links, then you edit the page

                $postfields = array(
                        'action' => 'query',
                        'meta' => 'tokens',
                        'format' => 'json'
                );
                $tokenEdit=requeteCurl($postfields, $curl, $linkWiki, $path_cookie);
                $json_tokenEdit = json_decode($tokenEdit, true);
                $text_tokenEdit = $json_tokenEdit['query']['tokens']['csrftoken'];


                $postfields = array(
                        'action' => 'edit',
                        'format' => 'json',
                        'title' => $pageRecherche,
                        'text' => $text_pagesContent,
                        'bot' => '',
                        'token' => $text_tokenEdit
                );
                $edit=requeteCurl($postfields, $curl, $linkWiki, $path_cookie);
                echo "\n".$edit;

            }



        }

    }

}


unlink($path_cookie);

嗯,我确定在这段代码中有很多不必要的东西,但我不是php和mediawiki的专家,而且脚本运行得那么好所以它不是那么糟糕我认为^^