列表索引必须是整数或切片,而不是str的XML

时间:2018-03-03 21:17:08

标签: python epub

再次问好StackOverflow,

我的代码遇到了另一个问题。第208行if [currentpage]["headingAnchorName"][currentanchor] in data["pages"]:出现此错误list indices must be integers or slices, not str

我要做的是将锚标记添加到ePub中的XML输出。我添加了一个示例toc.ncx和metadata.json。我正在尝试将锚标签添加到代码中。带有epub文件和其他资源的原始,完整代码位于我的Github

这个肯定让我头疼,所以我非常感谢你的帮助。

CreateE-book.py

#!/usr/bin/env python
#CreateE-book.py - Combines GenMetadata.py and GenEpub.py into one easy script.

#GenMetadata.py - Generates the content.opf and toc.ncx files from the metadata.json file.

#opf = "OEBPS/content.opf"
#ncx = "OEBPS/toc.ncx"

#JSON extraction magic

import os
import time
import json
from collections import OrderedDict
import zipfile

with open("metadata.json") as json_file:
    data = json.load((json_file), object_pairs_hook=OrderedDict) #For some reason the order is randomised, this preserves the order.

#Create a compatible content.opf from scratch.
def GenOPF():

    opf = open(data["containerFolder"] + os.sep + "content.opf", "w")
    opf.write('<?xml version="1.0" encoding="UTF-8" standalone="no"?><package xmlns="http://www.idpf.org/2007/opf" unique-identifier="bookid" version="2.0">\n')

    #Metadata tags
    opf.write('\t<metadata xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:opf="http://www.idpf.org/2007/opf">\n')
    opf.write('\t\t<dc:title>' + data["title"] + '</dc:title>\n')
    opf.write('\t\t<dc:creator>' + data["creator"] + '</dc:creator>\n')
    opf.write('\t\t<dc:subject>' + data["subject"] + '</dc:subject>\n')
    opf.write('\t\t<dc:publisher>' + data["publisher"] + '</dc:publisher>\n')
    opf.write('\t\t<dc:identifier id="bookid">' + data["ISBN"] + '</dc:identifier>\n')
    opf.write('\t\t<dc:date>' + (time.strftime("%Y-%m-%d")) + '</dc:date>\n') #YYYY[-MM[-DD]]
    opf.write('\t\t<dc:language>' + data["language"] + '</dc:language>\n')
    opf.write('\t\t<dc:rights>' + data["rights"] + '</dc:rights>\n')
    opf.write('\t\t<meta content="main_cover_image" name="cover"/>\n')

    #Fixed (non-reflowable) support
    if (data["textPresentation"] == "Reflowable" or data["textPresentation"] == "reflowable" or data["textPresentation"] == "reflow"):
    print('e-book type: Reflowable')

    elif (data["textPresentation"] == "Fixed layout" or data["textPresentation"] == "Fixed Layout" or data["textPresentation"] == "fixed layout" or data["textPresentation"] == "fixed"):
    opf.write('\t\t<meta name="fixed-layout" content="true"/>\n')
    print('e-book type: Fixed layout')

else:
    print('Invalid textPresentation in metadata.json.')

opf.write('\t</metadata>\n')

#Manifest tags
opf.write('\t<manifest>\n')

#Write out the CSS files
cssindex = 0

for subdir, dirs, files in os.walk(data["containerFolder"] + os.sep + data["cssFolder"]):
    for file in files:
        filepath = subdir + os.sep + file
        correctfilepath = filepath.replace(data["containerFolder"] + os.sep, "") #removes the redudant OEBPS

        if filepath.endswith(".css"):
            opf.write('\t\t<item href="' + correctfilepath + '" id="css' + str(cssindex) + '" media-type="text/css"/>\n')
            print (filepath)
            cssindex += 1

#Write out the NCX and cover image files
opf.write('\t\t<item href="toc.ncx" id="ncx" media-type="application/x-dtbncx+xml"/>\n')
#opf.write('\t\t<item href="'+ data["imagesFolder"] + '/' + data["epubCover"] +'" id="main_cover_image" media-type="image/jpeg"/>\n') #Removes duplicate output, leaving commented as I might it later for the Kindle covers.

#Write out the images

imageindex = 0

for subdir, dirs, files in os.walk(data["containerFolder"] + os.sep + data["imagesFolder"]):
    for file in files:
        filepath = subdir + os.sep + file
        correctfilepath = filepath.replace(data["containerFolder"] + os.sep, "") #removes the redudant OEBPS

        if filepath.endswith(".jpg") or filepath.endswith(".jpeg") or filepath.endswith(".jpe"):
            opf.write('\t\t<item href="' + correctfilepath + '" id="image' + str(imageindex) + '" media-type="image/jpeg"/>\n')
            print (filepath)
            imageindex += 1

        elif filepath.endswith(".png"):
            opf.write('\t\t<item href="' + correctfilepath + '" id="image' + str(imageindex) + '" media-type="image/png"/>\n')
            print (filepath)
            imageindex += 1

        elif filepath.endswith(".gif"):
            opf.write('\t\t<item href="' + correctfilepath + '" id="image' + str(imageindex) + '" media-type="image/gif"/>\n')
            print (filepath)
            imageindex += 1

        elif filepath.endswith(".svg"):
            opf.write('\t\t<item href="' + correctfilepath + '" id="image' + str(imageindex) + '" media-type="image/svg+xml"/>\n')
            print (filepath)
            imageindex += 1

#Write out all the pages in the book.
#Count all the instances within the pages block.

currentpage = 0
totalpages = len(data["pages"]) #Number of pages

while currentpage != totalpages: #Write out all the xhtml files as declared in the JSON.
    pageid = str.lower(data["pages"][currentpage]["pageName"]) #remove capital letters and spaces from the id attribute (works with Unicode)
    correctpageid = pageid.replace(" ","_")

    opf.write('\t\t<item href="' + data["pages"][currentpage]["fileName"] + '" id="' + correctpageid + '" media-type="application/xhtml+xml"/>\n')
    currentpage += 1

#Write out all the custom fonts in the book.

fontindex = 0

for subdir, dirs, files in os.walk(data["containerFolder"] + os.sep + data["fontsFolder"]):
    for file in files:
        filepath = subdir + os.sep + file
        correctfilepath = filepath.replace(data["containerFolder"] + os.sep, "") #removes the redudant OEBPS

        if filepath.endswith(".ttf"):
            opf.write('\t\t<item href="' + correctfilepath + '" id="font' + str(fontindex) + '" media-type="font/truetype"/>\n')
            print (filepath)
            imageindex += 1

        elif filepath.endswith(".otf"):
            opf.write('\t\t<item href="' + correctfilepath + '" id="font' + str(fontindex) + '" media-type="font/opentype"/>\n')
            print (filepath)
            imageindex += 1

opf.write('\t</manifest>\n')

#Spine tags
opf.write('\t<spine toc="ncx">\n')

#Write out all the filenames in order again as declared in the JSON.

currentpage = 0
totalpages = len(data["pages"]) #Number of pages

while currentpage != totalpages: #Write out all the xhtml files as declared in the JSON.
    pageid = str.lower(data["pages"][currentpage]["pageName"]) #remove capital letters and spaces from the id attribute (works with Unicode)
    correctpageid = pageid.replace(" ","_")

    opf.write('\t\t<itemref idref="' + correctpageid + '"/>\n')
    currentpage += 1

opf.write('\t</spine>\n')

#End of file
opf.write('</package>')

opf.close() #Eventually save directly to the OEBPS folder

#Create a compatible toc.ncx from scratch.
def GenNCX():

ncx = open(data["containerFolder"] + os.sep + "toc.ncx", "w")

ncx.write('<?xml version="1.0" encoding="UTF-8" ?>\n')
ncx.write('<!DOCTYPE ncx PUBLIC "-//NISO//DTD ncx 2005-1//EN" "http://www.daisy.org/z3986/2005/ncx-2005-1.dtd"><ncx xmlns="http://www.daisy.org/z3986/2005/ncx/" version="2005-1">\n')

#Head tags
ncx.write('<head>\n')
ncx.write('\t<meta name="dtb:uid" content="' + data["ISBN"] + '" />\n') #Has to be the same as dc:identifier.

#Declare the maximum amount of indentation from 1 to 4.

indentations = [] #Assemble a Python list (array) with all the indentations and take the largest number.
currentpage = 0
totalpages = len(data["pages"]) #Number of pages

while currentpage != totalpages:
    indentations.append(data["pages"][currentpage]["indentation"])
    maxdepth = max(indentations)
    currentpage += 1

ncx.write('\t<meta name="dtb:depth" content="' + str(maxdepth) + '" />\n')
ncx.write('\t<meta name="dtb:totalPageCount" content="0" />\n')
ncx.write('\t<meta name="dtb:maxPageNumber" content="0" />\n')
ncx.write('</head>\n')

#Doctitle tags
ncx.write('<docTitle>\n')
ncx.write('\t<text>' + data["titleShort"] + '</text>\n')
ncx.write('</docTitle>\n')

#Write out the NavMap tags (and their children)
ncx.write('<navMap>\n')

currentpage = int(0)
index = 1
totalpages = len(data["pages"]) #Number of pages

while currentpage != totalpages: #Write out all the xhtml files as declared in the JSON, indendation currently unsupported (data["pages"][currentpage]["indentation"].

    ncx.write('\t<navPoint id="navpoint-' + str(currentpage) + '" playOrder="' + str(index) + '">\n') #id=001 class=h1 playOrder=1
    ncx.write('\t\t<navLabel>\n')
    ncx.write('\t\t\t<text>' + data["pages"][currentpage]["pageName"] + '</text>\n')
    ncx.write('\t\t</navLabel>\n')
    ncx.write('\t\t<content src="'+ data["pages"][currentpage]["fileName"] +'" />\n')

    #Write out the anchor tags.
    currentanchor = int(0)

    if [currentpage]["headingAnchorName"][currentanchor] in data["pages"]:

        totalanchors = len(data["pages"][currentpage]["headingAnchorName"]) #Number of anchor tags

        while currentanchor != totalanchors:
            ncx.write('\t\t\t<navLabel>\n')
            ncx.write('\t\t\t\t<text>' + data["pages"][currentpage]["headingAnchorName"][currentanchor] + '</text>\n')
            ncx.write('\t\t\t</navLabel>\n')
            ncx.write('\t\t\t<content src="' + data["pages"][currentpage]["fileName"] + data["pages"][currentpage]["headingAnchorLink"][currentanchor] + '" />\n')

            currentanchor += 1

    ncx.write('\t</navPoint>\n')

    currentpage += 1
    index += 1

ncx.write('</navMap>\n')

#End of file
ncx.write('</ncx>')

def GenEpub():
#GenEpub.py - Generates an .epub file from the data provided.
#Ideally with no errors or warnings from epubcheck (needs to be implemented, maybe with the Python wrapper).

#Generate the mimetype.
mime = open("mimetype", "w")

mime.write('application/epub+zip')

mime.close()

#Generate the META-INF.
try:
    os.stat('META-INF')

except:
    os.mkdir('META-INF')

metainf = open('META-INF' + os.sep + "container.xml", "w")

metainf.write('<?xml version="1.0"?>\n')
metainf.write('<container version="1.0" xmlns="urn:oasis:names:tc:opendocument:xmlns:container">\n')
metainf.write('\t<rootfiles>\n')
metainf.write('\t\t<rootfile full-path="' + data["containerFolder"] + '/content.opf" media-type="application/oebps-package+xml" />\n')
metainf.write('\t</rootfiles>\n')
metainf.write('</container>')

metainf.close()

#The ePub standard requires deflated compression and a compression order.
zf = zipfile.ZipFile(data["fileName"] + '.epub', mode='w', compression=zipfile.ZIP_STORED)

zf.write('mimetype')

for dirname, subdirs, files in os.walk('META-INF'):
    zf.write(dirname)
    for filename in files:
        if filename != '.DS_Store': #epubcheck hates uninvited files and macOS places these everywhere.
                zf.write(os.path.join(dirname, filename))
                print('dirname:' + dirname)
                print('filename:' + filename)

for dirname, subdirs, files in os.walk(data["containerFolder"]):
    zf.write(dirname)
    for filename in files:
        if filename != '.DS_Store': #epubcheck hates uninvited files
            zf.write(os.path.join(dirname, filename))
            print('dirname:' + dirname)
            print('filename:' + filename)

zf.close()

#zipfile has a built-in validator for debugging
with open(data["fileName"] + '.epub', 'r') as f:
    if zipfile.is_zipfile(f) is True:
        print("ZIP file is valid.")

#Extra debugging information
#print(getinfo.compress_type(zf))
#print(getinfo.compress_size(zf))
#print(getinfo.file_size(zf))

GenOPF()
GenNCX()
GenEpub()

metadata.json

{
        "comment1": "Metadata.json - Insert the e-book's metadata here, so that the content.opf and toc.ncx files can be filled. May use data for ONIX3 too. WIP",

        "comment2": "Folder metadata - This contains the names of the technical folders, if you change the default names please update them here.",
        "containerFolder": "OEBPS",
        "cssFolder": "styles",
        "imagesFolder": "images",
        "fontsFolder": "fonts",

        "comment3": "Technical metadata - This is the where the cover image is specified. Recommended to use ePub V2.0.1 over 3.0 for epubVersion and Reflowable rather than Fixed for textPresentation (unless doing a project that requires a specific layout). mobiCover and generateKindle are currently unused but added for futureproofing.",
        "epubCover": "cover.png",
        "mobiCover": "cover.png",
        "fileName": "FullSolderComic",
        "epubVersion": "2.0.1",
        "textPresentation": "Reflowable",
        "generateKindle": "no",
        "generateGoogle": "yes",

        "comment4": "Book metadata - Information about the e-book itself. Language is specified with ISO 639-1. ISBN number can be declared, as well as the Amazon ASIN. Rights can be worldwide, country specific or under a permissable license such as Creative-Commons SA",
        "title": "Soldering is Easy: Here's How to Do it (Extended Version)",
        "titleShort": "Soldering is Easy",
        "creator": "Mitch Altman, Andie Nordgren, Jeff Keyzer",
        "subject": "Academic",
        "publisher": "MightyOHM",
        "ISBN": "-",
        "ASIN": "-",
        "language": "en",
        "rights": "Creative Commons BY-SA 4.0",

        "comment5": "This is the page order that the e-book has. The first number before the colon is the page order, the second is the indentation (default is 1, allowed upto 4), third is the page name and fourth is file itself.",
        "pages": [
                {
                    "pageNumber": "0",
                    "indentation": "1",
                    "pageName": "Front Cover",
                    "fileName": "bookcover.xhtml"
                },
                {
                    "pageNumber": "1",
                    "indentation": "1",
                    "pageName": "Indicia",
                    "fileName": "indicia.xhtml"
                },
                {
                    "pageNumber": "2",
                    "indentation": "1",
                    "pageName": "License",
                    "fileName": "license.xhtml",
                    "headingAnchorName": [
                        "Content license",
                        "E-book license"
                    ],
                    "headingAnchorLink": [
                        "#content-license",
                        "#ebook-license"
                    ]
                },
                {
                    "pageNumber": "3",
                    "indentation": "1",
                    "pageName": "Page 1",
                    "fileName": "page1.xhtml"
                },
                {
                    "pageNumber": "4",
                    "indentation": "1",
                    "pageName": "Page 2",
                    "fileName": "page2.xhtml"
                },
                {
                    "pageNumber": "5",
                    "indentation": "1",
                    "pageName": "Page 3",
                    "fileName": "page3.xhtml"
                },
                {
                    "pageNumber": "6",
                    "indentation": "1",
                    "pageName": "Page 4",
                    "fileName": "page4.xhtml"
                },
                {
                    "pageNumber": "7",
                    "indentation": "1",
                    "pageName": "Page 5",
                    "fileName": "page5.xhtml"
                },
                {
                    "pageNumber": "8",
                    "indentation": "1",
                    "pageName": "Page 6",
                    "fileName": "page6.xhtml"
                },
                {
                    "pageNumber": "9",
                    "indentation": "1",
                    "pageName": "Page 7",
                    "fileName": "page7.xhtml"
                },
                {
                    "pageNumber": "10",
                    "indentation": "1",
                    "pageName": "Blank page",
                    "fileName": "toc.xhtml"
                }
            ]
}

toc.ncx

<?xml version="1.0" encoding="UTF-8" ?>
<!DOCTYPE ncx PUBLIC "-//NISO//DTD ncx 2005-1//EN" "http://www.daisy.org/z3986/2005/ncx-2005-1.dtd"><ncx xmlns="http://www.daisy.org/z3986/2005/ncx/" version="2005-1">
<head>
   <meta name="dtb:uid" content="-" />
   <meta name="dtb:depth" content="1" />
   <meta name="dtb:totalPageCount" content="0" />
   <meta name="dtb:maxPageNumber" content="0" />
</head>
<docTitle>
   <text>Soldering is Easy</text>
</docTitle>
<navMap>
   <navPoint id="navpoint-0" playOrder="1">
       <navLabel>
            <text>Front Cover</text>
       </navLabel>
       <content src="bookcover.xhtml" />
   </navPoint>
   <navPoint id="navpoint-1" playOrder="2">
        <navLabel>
            <text>Indicia</text>
        </navLabel>
        <content src="indicia.xhtml" />
   </navPoint>
   <navPoint id="navpoint-2" playOrder="3">
     <navLabel>
        <text>License</text>
     </navLabel>
    <content src="license.xhtml" />
   </navPoint>
   <navPoint id="navpoint-3" playOrder="4">
      <navLabel>
         <text>Page 1</text>
      </navLabel>
     <content src="page1.xhtml" />
   </navPoint>
   <navPoint id="navpoint-4" playOrder="5">
      <navLabel>
          <text>Page 2</text>
      </navLabel>
      <content src="page2.xhtml" />
   </navPoint>
   <navPoint id="navpoint-5" playOrder="6">
    <navLabel>
        <text>Page 3</text>
    </navLabel>
      <content src="page3.xhtml" />
    </navPoint>
   <navPoint id="navpoint-6" playOrder="7">
    <navLabel>
        <text>Page 4</text>
    </navLabel>
      <content src="page4.xhtml" />
   </navPoint>
   <navPoint id="navpoint-7" playOrder="8">
    <navLabel>
        <text>Page 5</text>
    </navLabel>
    <content src="page5.xhtml" />
</navPoint>
<navPoint id="navpoint-8" playOrder="9">
    <navLabel>
        <text>Page 6</text>
    </navLabel>
    <content src="page6.xhtml" />
</navPoint>
<navPoint id="navpoint-9" playOrder="10">
    <navLabel>
        <text>Page 7</text>
    </navLabel>
    <content src="page7.xhtml" />
</navPoint>
<navPoint id="navpoint-10" playOrder="11">
    <navLabel>
        <text>Blank page</text>
    </navLabel>
    <content src="toc.xhtml" />
</navPoint>

0 个答案:

没有答案