使用google docs和php将.doc转换为.html

时间:2011-11-23 12:45:08

标签: php html google-docs-api zend-framework

我正在尝试使用zend和Google API将word文档转换为HTML。我总是收到一个错误。我可以将文件上传到我的Google帐户,但之后我无法将其保存为HTML,然后将其恢复到我的服务器。我正在使用php而我的服务器是Linux。

有没有人这样做过?

提前感谢。

修改

convert.php:

$argv = array("","","","achim.doc","stocklist.html");

require_once 'ConvertDoc.php';

if(!file_exists($argv[3])){
    echo $argv[3]." :: file does not exists\n\n";exit;
}
googlecsv($argv[3],$argv[4],'');

function googlecsv($originalFilename, $newfile, $tempfile) {
    try{
        $staticSessionTokens=array();
        echo "$originalFilename, $newfile, $tempfile <br/>\n\n";
        // create converter with authentication
        $cdoc = new ConvertDoc('', '', $staticSessionTokens);
        // convert to csv, all worksheets.
        $cdoc->convert($originalFilename, $newfile, $tempfile, 'html', '');
    } catch(Exception $e) {
        echo "<b style='color:red;font-size:1em;'>ERROR: ".$e->getMessage()."</b><br/>\n";
    }
}

ConverDoc.php:

<?php

$path = "/home/webqda/public_html/zend/library"; 
set_include_path(get_include_path() . PATH_SEPARATOR . $path);

require_once 'Zend/Loader.php';
Zend_Loader::loadClass('Zend_Gdata');
Zend_Loader::loadClass('Zend_Gdata_AuthSub');
Zend_Loader::loadClass('Zend_Gdata_ClientLogin');
Zend_Loader::loadClass('Zend_Gdata_Spreadsheets');
Zend_Loader::loadClass('Zend_Gdata_Docs');


class ConvertDoc{
    // holds the service tokens
    private $_serviceSessionToken = array();
    private $_user = '';
    private $_pass = '';
    private $_debug = false;
    private $_curl = false;

    /**
     * construct
     *
     * @param  string $user The username, in e-mail address format, to authenticate
     * @param  string $pass The password for the user specified
     * @param  array $tokens Array of tokens for clientlogin authentication
     * @return void
     */
    function __construct($username,$passwd,$tokens=array()){
        $this->_user=$username;
        $this->_pass=$passwd;
        foreach($tokens as $service=>$token){
            $this->set_service_token($service,$token);
        }
    }

    function debug($message){
        if($this->_debug)
            echo date('Y-m-d H:i:s').' :: '.$message."  <br/>\n";
    }

    /**
     * convert
     *
     * @param  string           $filename              the file name (either direct path to file or name of file with $tempfile holding the path to actual tmp file
     * @param  string           $newfilename           save as this file name
     * @param  string           $tempfile              file location if upload (/tmp/...)
     * @param  string           $format                format of file to download
     *                                                 http://code.google.com/apis/documents/docs/3.0/developers_guide_protocol.html#DownloadingSpreadsheets
     * @param  string           $gid                   The gid parameter is an absolute identifier for worksheets
     *                                                 for spreadsheets (if not numberic it will download entire workbook as one sheet)
     * @return void
     */
    function convert($filename, $newfilename='', $tempfile='',$format='html',$gid=0){
        $this->debug('convert file');
        // authenticate to docs list (wordly)
        $client = $this->getClientLoginHttpClient(Zend_Gdata_Docs::AUTH_SERVICE_NAME);


        $docs = new Zend_Gdata_Docs($client);
        $this->debug('authenticated');

        // upload temporary file to ggl
        $newDoc = $this->uploadDocument($docs, $filename, $tempfile);
        $this->debug('uploaded');
        // get the content source url
        $src = $newDoc->content->getSrc();
        // download the data to the new filename
        if($this->_curl){
            $content = $this->curlSrc($src, $format, $gid, $newfilename);
        } else {
            $content = $this->downloadSrc($src, $format, $gid, $newfilename);
        }
        $this->debug('downloaded');
        // delete the temporary file on ggl
        $newDoc->delete();
        $this->debug('deleted');
    }

    /**
     * set_service_token
     *
     * @param  string $service Which service to authenticate against.
     * @param  string $token Token for the service identified
     * @return void
     */
    function set_service_token($service,$token){
        //echo "$service :: $token    <br/>\n";
        $this->_serviceSessionToken[$service] = trim($token);// make sure it is clean.
    }

    /**
     * get_service_token
     *
     * @param  string $service Which service to authenticate against.
     * @return string
     */
    function get_service_token($service){
        if(!empty($this->_serviceSessionToken[$service])){
            //echo "$service :: ".$this->_serviceSessionToken[$service]."    <br/>\n";
            return $this->_serviceSessionToken[$service];
        }
        throw new Exception("session token not found for service {$service}\n");
        return false;
    }

    /**
     * Returns a HTTP client object with the appropriate headers for communicating
     * with Google using the ClientLogin credentials supplied.
     *
     * @param  string $service Which service to authenticate against.
     * @return Zend_Http_Client
     */
    function getClientLoginHttpClient($service='writely'){
        try{
            $token = $this->get_service_token($service);
            $this->debug('token');
            $client = new Zend_Gdata_HttpClient();
            $client->setClientLoginToken($token);
        } catch(Exception $e) {
            // no token found so make it.
            $this->debug('newtoken');
            $client = Zend_Gdata_ClientLogin::getHttpClient($this->_user, $this->_pass, $service);
            $this->set_service_token($service,$client->getClientLoginToken());  
            /*
                    example on how to catch exceptions, not doing it here, the app needs to handle it.
                    try {
                        $client = Zend_Gdata_ClientLogin::getHttpClient($this->_user, $this->_pass, $service);
                    } catch (Zend_Gdata_App_AuthException $e) {
                        echo "Error: Unable to authenticate. Please check your";
                        echo " credentials.\n";
                        exit(1);
                    } catch (Zend_Gdata_App_CaptchaRequiredException $e) {
                        echo 'CAPTCHA answer required to login';
                        echo $e->getCaptchaUrl();
                        exit;
                        // http://code.google.com/apis/gdata/docs/auth/clientlogin.html
                    } catch (Exception $e) {
                        echo 'Unknown Exception';
                        exit;
                    }
            */
        }
        $config = array(
            'timeout' => 60 /* timeout after 60 seconds */
        );        
        $client->setConfig($config);
        return $client;

    }


    /**
     * Upload the specified document
     *
     * @param  Zend_Gdata_Docs $docs                  The service object to use for communicating with
     *                                                the Google Documents server.
     * @param  string          $originalFileName      The name of the file to be uploaded. The mime type
     *                                                of the file is determined from the extension on
     *                                                this file name. For example, test.csv is uploaded
     *                                                as a comma seperated volume and converted into a
     *                                                spreadsheet.
     * @param  string          $temporaryFileLocation (optional) The file in which the data for the
     *                                                document is stored. This is used when the file has
     *                                                been uploaded from the client's machine to the
     *                                                server and is stored in a temporary file which
     *                                                does not have an extension. If this parameter is
     *                                                null, the file is read from the originalFileName.
     * @return Zend_Gdata_Docs_DocumentListEntry
     */
    private function uploadDocument($docs, $originalFileName, $temporaryFileLocation=false) {
        $fileToUpload = $originalFileName;
        if ($temporaryFileLocation) {
            $fileToUpload = $temporaryFileLocation;
        }
        // get mimetype from original file name
        $filenameParts = explode('.', $originalFileName);
        $fileExtension = end($filenameParts);
        $mimeType = Zend_Gdata_Docs::lookupMimeType($fileExtension);
        if(!$mimeType){
            $mimeType = $this->mimetype($fileToUpload);
        }
        if(!$mimeType){
            throw new Exception("No Mime Type!");
            return false;
        }

        // Upload the file and convert it into a Google Document. The original
        // file name is used as the title of the document and the mime type
        // is determined based on the extension on the original file name.
        $e=true;
        $counter=0;
        while($e && $counter<10){
            try {
                $this->debug('upload');
                $newDocumentEntry = $docs->uploadFile($fileToUpload, $originalFileName, $mimeType, Zend_Gdata_Docs::DOCUMENTS_LIST_FEED_URI);
                $e=false;
            } catch (Zend_Gdata_App_HttpException $e){
                $r = $e->getResponse();
                if($r->getStatus() == '408'){
                    // timed out
                    $counter++;
                    $this->debug('try again');
                } else {
                    echo "<b style='color:red;font-size:1em;'>ERROR: ".$r->getMessage()." : ".$r->getBody()."</b><br/>\n";
                    $counter=10;// stop loop
                }                
                $e=true;
            }
        }
        if($counter==10){
            throw new Exception("failed to upload file");
            return false;
        }
        return $newDocumentEntry;
    }

    /**
     * get the mimetype for the file
     *
     * @param  string          $file               Link to the source file to download
     *
     * @return string
     */
    function mimetype($file){
        if(class_exists('finfo')){
            // new way, must be installed on php
            $this->finfo = new finfo(FILEINFO_MIME,'/usr/share/file/magic'); // use to return mime type ala mimetype extension
            if(!$this->finfo){
                $mimetype='unknown';
            } else {
                $mimetype = $this->finfo->file($file);
            }
        } else {
            $mimetype = mime_content_type($file);
        }
        return $mimetype;
    }

    /**
     * Upload the specified document
     *
     * @param  string          $src_url               Link to the source file to download
     * @param  string          $format                format of file to download
     *                                                http://code.google.com/apis/documents/docs/3.0/developers_guide_protocol.html#DownloadingSpreadsheets
     * @param  string          $gid                   The gid parameter is an absolute identifier for worksheets
     *                                                for spreadsheets (if not numberic it will download entire workbook as one sheet)
     *
     *
     * @return Zend_Gdata_Docs_DocumentListEntry
     */
    private function downloadSrc($src_url, $format='html', $gid=0, $file=false) {
        // find service based on url
        $service = $this->src_url_service($src_url);
        // authenticate to service
        $this->getClientLoginHttpClient($service);
        // get the token from the service
        $sessionToken = $this->get_service_token($service);
        // now try to do our thing...
        $opts = array(  
            'http' => array(
                'method' => 'GET',  
                'header' => "GData-Version: 3.0\r\n".  
                "Authorization: GoogleLogin auth=$sessionToken\r\n"
            )  
        );  
        // BUILD URL
        $src_url =  $src_url . '&chrome=false';
        if($format){
            $src_url =  $src_url . '&format='.$format.'&exportFormat='.$format.'';
        }
        if(is_numeric($gid)){
            $src_url =  $src_url . '&gid='.$gid.'';
        }
        // GET DATA
        $data = file_get_contents($src_url, false, stream_context_create($opts));

        if($file){
            file_put_contents($file,$data);
        }
        return $data;
    }  

    /**
     * Upload the specified document
     *
     * @param  string          $src_url               Link to the source file to download
     * @param  string          $format                format of file to download
     *                                                http://code.google.com/apis/documents/docs/3.0/developers_guide_protocol.html#DownloadingSpreadsheets
     * @param  string          $gid                   The gid parameter is an absolute identifier for worksheets
     *                                                for spreadsheets (if not numberic it will download entire workbook as one sheet)
     * @param  string          $file                  location of the file to save the data to
     *
     *
     * @return Zend_Gdata_Docs_DocumentListEntry
     */
    // curl -o tmp1 -H "Authorization: GoogleLogin auth={authcode}" "http://spreadsheets.google.com/feeds/download/spreadsheets/Export?key={dockey}&exportFormat={format}"
    private function curlSrc($src_url, $format='html', $gid=0, $file=false){
        // find service based on url
        $service = $this->src_url_service($src_url);
        // authenticate to service
        $this->getClientLoginHttpClient($service);
        // get the token from the service
        $sessionToken = $this->get_service_token($service);
        // now try to do our thing...
        if($file){ // open file if saving to file.
            $file = fopen($file,"w+");
        }
        // BUILD URL
        $src_url =  $src_url . '&chrome=false';
        if($format){
            $src_url =  $src_url . '&format='.$format.'&exportFormat='.$format.'';
        }
        if(is_numeric($gid)){
            $src_url =  $src_url . '&gid='.$gid.'';
        }
        // INIT CURL
        $curl = curl_init($src_url);
        // Setup headers - I used the same headers from Firefox version 2.0.0.6
        // below was split up because php.net said the line was too long. :/
        $header[] = "GData-Version: 3.0";
        $header[] = "Authorization: GoogleLogin auth=$sessionToken";
        curl_setopt($curl, CURLOPT_HTTPHEADER, $header);

        curl_setopt($curl, CURLOPT_FOLLOWLOCATION, 1);// follow redirects!!
        curl_setopt($curl, CURLOPT_POST, false); 
        curl_setopt($curl, CURLINFO_HEADER_OUT,true); // TRUE to track the handle's request string. 

        if($file){
            curl_setopt($curl, CURLOPT_FILE,$file); // file to write output to
            $data = curl_exec($curl); // execute the curl command
        } else {
            curl_setopt($curl, CURLOPT_RETURNTRANSFER, true); // output to variable
            $data = curl_exec($curl); // execute the curl command
        }

        // debug info
        if($this->_debug){
            echo curl_getinfo($curl, CURLINFO_HEADER_OUT);
            var_dump($data);
        }

        curl_close($curl); // close the connection
        return $data;
    }

    private function src_url_service($src_url){
        if(stristr($src_url,'spreadsheet')){
            return Zend_Gdata_Spreadsheets::AUTH_SERVICE_NAME;
        } else {
            return Zend_Gdata_Docs::AUTH_SERVICE_NAME;
        }
        // not sure how to handle pdg with Zend.

        // http://code.google.com/apis/documents/docs/3.0/developers_guide_protocol.html#DownloadingDocs
    }

}

这应该将achim.doc文件转换为stocklist.html,而是获取stocklist.html文件,但内容是Google文档登录页面。

0 个答案:

没有答案