使用PHP,MySQL和XML进行大数据导入

时间:2012-06-22 21:04:19

标签: php mysql xml

我正在构建一个脚本,将上传的XML文件中的数据导入MySQL,但不能直接导入。函数本身只有一个查询,但它总是超时。该查询必须连接两个表才能找到一个ID,该ID将与第三个表中的XML数据一起插入。

这是执行它的功能:

public function imprtTVLApi($file) {
    global $db, $user;
    $ext = pathinfo($file, PATHINFO_EXTENSION);
    $message = '';
    $text = '';
    $streaming_websites = array(
        'megavideo.com' => '1',
        'vidxden' => '2',
        'videobb.com' => '4',
        'veevr.com' => '5',
        'vureel.com' => '6',
        '2gb-hosting.com' => '7',
        'videoweed.com' => '9',
        'divxhosting.net' => '11',
        'movshare.net' => '12',
        'novamov.com' => '13',
        'ovfile.com' => '14',
        'videozer.com' => '15',
        'putlocker.com' => '16',
        'sockshare.com' => '17',
        'videoweed.es' => '18',
        'uploadc.com' => '19',
        'wupload.com' => '20',
        'vidbux.com' => '21',
        'veoh.com' => '22',
        'mixturevideo.com' => '23',
        'youtube.com' => '24',
        'filebox.com' => '25',
        'filenuke.com' => '26',
        'hostingbulk.com' => '27',
        'zalaa.com' => '28',
        'nowvideo.eu' => '29',
        'vidhog.com' => '30',
        'bitshare.com' => '31',
        'muchshare.net' => '32',
        'allmyvideos.net' => '33',
        'gorillavid.in' => '34',
        'daclips.in' => '35',
        'movpod.in' => '36',
        'stream2k.eu' => '37'
    );
    if ( $ext == "xml") :
        $tv  = new DomDocument();
        $tv->recover = TRUE;
        $tv->load($file) or die("Could not load the file! " .$file);
        $tvSearch = $tv->getElementsByTagName( "url" );
        $count = 0;

        foreach( $tvSearch as $tv ) {   

            // name
            $names = $tv->getElementsByTagName('name');
            $name = $names->item(0)->nodeValue;

            // seasons
            $seasons = $tv->getElementsByTagName('season');
            $season = $seasons->item(0)->nodeValue;

            // episode
            $episodes = $tv->getElementsByTagName('episode');
            $episode = $episodes->item(0)->nodeValue;

            // episode
            $links = $tv->getElementsByTagName('loc');
            $link = $links->item(0)->nodeValue;

            $user = $user->uid;
            $url  = $db->escape($link);
            $stream1 = $this->getHostFromURL($url);
            $stream = $streaming_websites[$stream1];
            $added = time();
            $status = "ok";
            $sql = 'SELECT e.id AS epid FROM ' .$this->epTable. ' AS e INNER JOIN ' .$this->tvTable. ' AS t ON e.show_ID = t.id WHERE t.title = "' .$name. '" AND e.season = "' .$season. '" AND e.episode = "' .$episode. '" LIMIT 1';
            $row = $db->first($sql);

            if( isset($row['epid'])) {

                $data = array(
                    'user_ID' => $user,
                    'url' => sanitize($url),
                    'stream_ID' => $stream,
                    'e_ID' => intval($row['epid']),
                    'added' => intval($added),
                    'status' => $status
                );


                $text .= "INSERT IGNORE INTO " .$this->lkTable. " (`user_ID`, `url`, `stream_ID`, `e_ID`, `added`, `status`) VALUES ('" .$data['user_ID']. "', '" .$data['url']. "', '" .$data['stream_ID']. "', '" .$data['e_ID']. "', '" .$data['added']. "', '" .$data['status']. "')";

            } else {
                $message = "Show not found";
            }
            $count++;

        }
        $myFile =  "export_" .$user->uid. "_" .time(). ".sql";
        $fh = fopen($myFile, 'w') or die("can't open file");
        $stringData = $text;
        fwrite($fh, $stringData);
        fclose($fh);
        parse_mysql_dump($myFile);
        unlink($myFile);
        $message = "Links: " .$count. ", Your links were added and they will show up on the site in a minute!";
    else :
        $message = "Incorrect file format! Must be XML!";
    endif;
    print($message);
}

这是数据库结构:

--
-- Table structure for table `episodes2`
--

CREATE TABLE IF NOT EXISTS `episodes2` (
  `id` int(11) unsigned NOT NULL AUTO_INCREMENT,
  `title` varchar(80) NOT NULL DEFAULT 'Untitled',
  `show_ID` int(11) unsigned NOT NULL,
  `episode` bigint(3) unsigned NOT NULL DEFAULT '1',
  `season` bigint(3) unsigned NOT NULL DEFAULT '1',
  `description` text NOT NULL,
  `thumbnail` varchar(100) NOT NULL,
  `tvragepage` varchar(80) NOT NULL,
  `airdate` int(15) unsigned DEFAULT '0',
  `added` datetime DEFAULT '0000-00-00 00:00:00',
  PRIMARY KEY (`id`)
) ENGINE=InnoDB  DEFAULT CHARSET=utf8 AUTO_INCREMENT=2000117 ;

-- --------------------------------------------------------

--
-- Table structure for table `tvshows2`
--

CREATE TABLE IF NOT EXISTS `tvshows2` (
  `id` int(11) NOT NULL AUTO_INCREMENT,
  `imdbid` varchar(10) CHARACTER SET latin1 NOT NULL,
  `tvrageid` varchar(125) CHARACTER SET latin1 NOT NULL,
  `poster` varchar(256) CHARACTER SET latin1 NOT NULL,
  `slider` varchar(225) NOT NULL,
  `title` varchar(128) NOT NULL,
  `tagline` varchar(128) CHARACTER SET latin1 NOT NULL,
  `plot` text CHARACTER SET latin1 NOT NULL,
  `year` year(4) NOT NULL,
  `released` varchar(18) CHARACTER SET latin1 NOT NULL,
  `country` varchar(256) CHARACTER SET latin1 NOT NULL,
  `channel` smallint(3) NOT NULL DEFAULT '0',
  `runtime` char(6) CHARACTER SET latin1 NOT NULL,
  `storyline` text CHARACTER SET latin1 NOT NULL,
  `genres` varchar(256) CHARACTER SET latin1 NOT NULL,
  `actors` text CHARACTER SET latin1 NOT NULL,
  `votes` varchar(10) CHARACTER SET latin1 NOT NULL,
  `timezone` varchar(10) CHARACTER SET latin1 NOT NULL,
  `oscars` tinyint(4) NOT NULL,
  `seasons` int(4) NOT NULL,
  `mpaa` enum('TV-14','TV-Y','TV-PG','TV-MA') CHARACTER SET latin1 NOT NULL DEFAULT 'TV-14',
  `trivia` text CHARACTER SET latin1 NOT NULL,
  `status` enum('never_aired','cancelled_ended','returning_series','new_series','on_hiatus','tbd','pilot_rejected','in_development','pilot_ordered') CHARACTER SET latin1 NOT NULL DEFAULT 'returning_series',
  `permalink` varchar(125) NOT NULL,
  `classification` varchar(35) CHARACTER SET latin1 NOT NULL DEFAULT 'Unavailable',
  `airtime` varchar(150) CHARACTER SET latin1 NOT NULL DEFAULT 'Unavailable',
  `airday` varchar(80) CHARACTER SET latin1 NOT NULL DEFAULT 'Monday',
  `ended` varchar(20) CHARACTER SET latin1 NOT NULL,
  UNIQUE KEY `id_2` (`id`),
  UNIQUE KEY `permalink` (`permalink`),
  KEY `id` (`id`)
) ENGINE=InnoDB  DEFAULT CHARSET=utf8 AUTO_INCREMENT=31994 ;

-- --------------------------------------------------------

--
-- Table structure for table `tv_links`
--

CREATE TABLE IF NOT EXISTS `tv_links` (
  `link_ID` int(11) unsigned NOT NULL AUTO_INCREMENT,
  `user_ID` int(11) NOT NULL DEFAULT '0',
  `url` varchar(225) CHARACTER SET latin1 NOT NULL,
  `stream_ID` int(11) NOT NULL,
  `e_ID` int(11) NOT NULL DEFAULT '1',
  `added` int(11) DEFAULT NULL,
  `status` enum('ok','broken','deleted','pending') CHARACTER SET latin1 NOT NULL DEFAULT 'ok',
  PRIMARY KEY (`link_ID`),
  UNIQUE KEY `url` (`url`),
  KEY `e_ID` (`e_ID`)
) ENGINE=InnoDB

在循环创建.sql文件后,数据将发送到MySQL。 这里的关键是找到将在链接表中导入的剧集ID。

您对如何更快或任何想法有任何建议吗?

0 个答案:

没有答案