PHP库以解析Mobi

时间:2012-08-05 14:01:55

标签: php parsing mobipocket

是否有免费的PHP库可解析.mobi文件以获取:

  • 作者
  • 标题
  • 出版商

修改

对于认为这是Does a PHP Library Exist to Work with PRC/MOBI Files完全重复的所有人,您显然懒得阅读这些问题。

该提问者想知道如何使用PHP库生成.mobi文件。我想知道如何拆分或解析已创建的.mobi文件以获取某些信息。因此,该问题的解决方案phpMobi将无效,因为它是一个从HTML生成.mobi文件的脚本,而不是解析.mobi文件。

3 个答案:

答案 0 :(得分:5)

一个非常非常蹩脚的例子,但是如果你绝望了,你可以尝试这样的事情:

$data = file_get_contents("A Young Girl's Diary - Freud, Sigmund.mobi");

$chunk = mb_substr($data, mb_strpos($data, 'EXTH'), 512);
$chunks = explode("\x00", $chunk);
array_shift($chunks);

$chunks = array_filter($chunks, function($str){return preg_match('#([A-Z])#', $str) && mb_strlen($str) > 2;});

$chunks = array_combine(array('author', 'publisher', 'title'), $chunks);

print_r($chunks);

输出:

Array
(
    [author] => Freud, Sigmund
    [publisher] => Webarto
    [title] => A Young Girl's Diary
)

使用的文件:http://freekindlebooks.org/Freud/752-h.mobi(使用Calibre编辑发布者元数据)

文件解析甚至不是一件容易或有趣的事情。请看一下:http://code.google.com/p/xee/source/browse/XeePhotoshopLoader.m?r=a70d7396356997114b548f4ab2cbd49badd7d285#107

你应该做的是逐字节阅读,但由于没有详细的文档,我担心这不是一件容易的事。

P.S。我没有尝试过拍摄封面照片。

答案 1 :(得分:4)

如果某人仍然感兴趣,这里有一个mobi元数据阅读样本:

class palmDOCHeader
{
    public $Compression = 0;
    public $TextLength = 0;
    public $Records = 0;
    public $RecordSize = 0;
}

class palmHeader
{
    public $Records = array();
}

class palmRecord
{
    public $Offset = 0;
    public $Attributes = 0;
    public $Id = 0;
}

class mobiHeader
{
    public $Length = 0;
    public $Type = 0;
    public $Encoding = 0;
    public $Id = 0;
    public $FileVersion = 0;

}

class exthHeader
{
    public $Length = 0;
    public $Records = array();  
}

class exthRecord
{
    public $Type = 0;
    public $Length = 0;
    public $Data = "";
}

class mobi {
    protected $mobiHeader;
    protected $exthHeader;

    public function __construct($file){
        $handle = fopen($file, "r");
        if ($handle){
            fseek($handle, 60, SEEK_SET);
            $content = fread($handle, 8);
            if ($content != "BOOKMOBI"){
                echo "Invalid file format";
                fclose($handle);
                return;
            }

            // Palm Database
            echo "\nPalm database:\n";
            $palmHeader = new palmHeader();

            fseek($handle, 0, SEEK_SET);
            $name = fread($handle, 32);
            echo "Name: ".$name."\n";

            fseek($handle, 76, SEEK_SET);
            $content = fread($handle, 2);
            $records = hexdec(bin2hex($content));
            echo "Records: ".$records."\n";

            fseek($handle, 78, SEEK_SET);
            for ($i=0; $i<$records; $i++){
                $record = new palmRecord();

                $content = fread($handle, 4);
                $record->Offset = hexdec(bin2hex($content));

                $content = fread($handle, 1);
                $record->Attributes = hexdec(bin2hex($content));

                $content = fread($handle, 3);
                $record->Id = hexdec(bin2hex($content));

                array_push($palmHeader->Records, $record);
                echo "Record ".$i." offset: ".$record->Offset." attributes: ".$record->Attributes."  id : ".$record->Id."\n";
            }

            // PalmDOC Header
            $palmDOCHeader = new palmDOCHeader();
            fseek($handle, $palmHeader->Records[0]->Offset, SEEK_SET);
            $content = fread($handle, 2);
            $palmDOCHeader->Compression = hexdec(bin2hex($content));
            $content = fread($handle, 2);
            $content = fread($handle, 4);
            $palmDOCHeader->TextLength = hexdec(bin2hex($content));
            $content = fread($handle, 2);
            $palmDOCHeader->Records = hexdec(bin2hex($content));
            $content = fread($handle, 2);
            $palmDOCHeader->RecordSize = hexdec(bin2hex($content));
            $content = fread($handle, 4);

            echo "\nPalmDOC Header:\n";
            echo "Compression:".$palmDOCHeader->Compression."\n";
            echo "TextLength:".$palmDOCHeader->TextLength."\n";
            echo "Records:".$palmDOCHeader->Records."\n";
            echo "RecordSize:".$palmDOCHeader->RecordSize."\n";

            // MOBI Header
            $mobiStart = ftell($handle);
            $content = fread($handle, 4);
            if ($content == "MOBI"){
                $this->mobiHeader = new mobiHeader();
                echo "\nMOBI header:\n";
                $content = fread($handle, 4);
                $this->mobiHeader->Length = hexdec(bin2hex($content));

                $content = fread($handle, 4);
                $this->mobiHeader->Type = hexdec(bin2hex($content));

                $content = fread($handle, 4);
                $this->mobiHeader->Encoding = hexdec(bin2hex($content));

                $content = fread($handle, 4);
                $this->mobiHeader->Id = hexdec(bin2hex($content));

                echo "Header length: ".$this->mobiHeader->Length."\n";
                echo "Type: ".$this->mobiHeader->Type."\n";
                echo "Encoding: ".$this->mobiHeader->Encoding."\n";
                echo "Id: ".$this->mobiHeader->Id."\n";

                fseek($handle, $mobiStart+$this->mobiHeader->Length, SEEK_SET);
                $content = fread($handle, 4);
                if ($content == "EXTH"){
                    $this->exthHeader = new exthHeader();
                    echo "\nEXTH header:\n";

                    $content = fread($handle, 4);
                    $this->exthHeader->Length = hexdec(bin2hex($content));

                    $content = fread($handle, 4);
                    $records = hexdec(bin2hex($content));
                    echo "Records: ".$records."\n";

                    for ($i=0; $i<$records; $i++){
                        $record = new exthRecord();

                        $content = fread($handle, 4);
                        $record->Type = hexdec(bin2hex($content));

                        $content = fread($handle, 4);
                        $record->Length = hexdec(bin2hex($content));

                        $record->Data = fread($handle, $record->Length - 8);

                        array_push($this->exthHeader->Records, $record);
                        echo "Record ".$i." type: ".$record->Type." length: ".$record->Length."\n";
                        echo "  data: ".$record->Data."\n";
                    }
                }
            }

            fclose($handle);
        }
    }

    protected function GetRecord($type)
    {
        foreach ($this->exthHeader->Records as $record){
            if ($record->Type == $type)
                return $record;
        }
        return NULL;
    }

    protected function GetRecordData($type)
    {
        $record = $this->GetRecord($type);
        if ($record)
            return $record->Data;
        return "";
    }

    public function Title()
    {
        return $this->GetRecordData(503);
    }

    public function Author()
    {
        return $this->GetRecordData(100);
    }

    public function Isbn()
    {
        return $this->GetRecordData(104);
    }

    public function Subject()
    {
        return $this->GetRecordData(105);
    }

    public function Publisher()
    {
        return $this->GetRecordData(101);
    }
}

$mobi = new mobi("test.mobi");
echo "\nTitle: ".$mobi->Title();
echo "\nAuthor: ".$mobi->Author();
echo "\nIsbn: ".$mobi->Isbn();
echo "\nSubject: ".$mobi->Subject();
echo "\nPublisher: ".$mobi->Publisher();

答案 2 :(得分:2)

有同样的问题,没有找到任何PHP解析器,不得不自己编写(遗憾的是我不能透露我的代码)。这是关于.mobi结构http://wiki.mobileread.com/wiki/MOBI

的一个很好的资源