解析XML,需要从并不总是存在的子节点获取元素文本

时间:2018-05-02 13:47:50

标签: xml python-3.x xml-parsing

Python的新手。我正在解析XML元素文本,最终被传递到数据库,但是我正在打印到QA我的脚本。以下(编辑以减少样本长度)给了我正是我正在寻找的东西。我的问题是:如何在不删除具有BaseRents节点但不包含FreeRents的记录的情况下引入FreeRents节点值?我尝试了一个for循环,但这显然消除了没有BaseRent和FreeRent的记录:

XML:

      <Lease>
        <Id>5507160</Id>
        <LeaseType>direct</LeaseType>
        <LeaseExpirationDate>2018-12-31</LeaseExpirationDate>
        <LeaseCommencementDate>1996-12-01</LeaseCommencementDate>
        <ExecutionDate/>
        <LeaseTerm/>
        <LeaseStructure>triple net</LeaseStructure>
        <SourceID>4258_1300</SourceID>
        <Industry/>
        <RenewalStatus>unknown</RenewalStatus>
        <CreatedAt>2018-01-10T20:43:55Z</CreatedAt>
        <UpdatedAt>2018-04-10T16:31:22Z</UpdatedAt>
        <Rights/>
        <LeaseDealTerms>
          <LeaseDealTerm>
            <RentableArea>10740</RentableArea>
            <SecurityDeposit/>
            <NEROverride/>
            <MiscDescription>Miscellaneous Description</MiscDescription>
            <SourceID>1228_1300</SourceID>
            <IsMonthToMonth>false</IsMonthToMonth>
            <MoveOutDate/>
            <BaseRents>
              <BaseRent>
                <StartDate>2018-01-01</StartDate>
                <EndDate>2018-01-31</EndDate>
                <Amount>16.0</Amount>
                <Units>rsf/year</Units>
              </BaseRent>
              <BaseRent>
                <StartDate>2018-12-01</StartDate>
                <EndDate>2018-12-31</EndDate>
                <Amount>16.0</Amount>
                <Units>rsf/year</Units>
              </BaseRent>
            </BaseRents>
            <FreeRents>
              <FreeRent>
                <StartDate>2018-05-01</StartDate>
                <EndDate>2018-05-31</EndDate>
                <Amount>5.0</Amount>
                <Units>rsf/year</Units>
                <FreeRentType>net</FreeRentType>
                <Percentage>100.0</Percentage>
              </FreeRent>
              <FreeRent>
                <StartDate>2019-04-01</StartDate>
                <EndDate>2019-04-30</EndDate>
                <Amount>3.0</Amount>
                <Units>rsf/year</Units>
                <FreeRentType>net</FreeRentType>
                <Percentage>100.0</Percentage>
              </FreeRent>
            </FreeRents>
          </LeaseDealTerm>
        </LeaseDealTerms>
        <Tenant>
          <Id>993183</Id>
          <CompanyName>Sample Company LTD.</CompanyName>
        </Tenant>
      </Lease>

脚本:

import xml.etree.ElementTree as ET
import pyodbc

server = "servername, port"
database = "DestDB"
username = "foo"
password = "bar"


con = pyodbc.connect("DRIVER={ODBC Driver 17 for SQL Server};\
SERVER="+server+";DATABASE="+database+";UID="+username+";PWD="+password+";Trusted_Connection=yes;")
x = con.cursor()

tree = ET.parse(r'\\server\FileName.xml')
root = tree.getroot()

for ls in root.findall("PortfolioData/Properties/Property/Spaces/Space/Lease"):
    lsid = ls.find("Id").text
    for ldt in ls.findall("LeaseDealTerms/LeaseDealTerm"):
        ldra = ldt.find("RentableArea").text
        try:
            unitnum = ldt.find("SourceID").text.split("_")[1]
        except:
            unitnum = None
        secdep = ldt.find("SecurityDeposit").text
        nero = ldt.find("NEROverride").text
        lsdesc = ldt.find("MiscDescription").text #Jeff asked for this
        mtm = ldt.find("IsMonthToMonth").text
        moutdt = ldt.find("MoveOutDate").text
        for br in ldt.findall("BaseRents/BaseRent"):
            brstdt = br.find("StartDate").text
            brenddt = br.find("EndDate").text
            bramt = br.find("Amount").text
            brunit = br.find("Units").text

            if lsid == "5507160": 
                print(lsid, unitnum, ldra, secdep, nero, lsdesc, mtm, moutdt, brenddt, bramt, brunit)

结果(没有FreeRents):

5507160 1300 10740 None None Miscellaneous Description false 2018-01-01 2018-01-31 16.0 rsf/year
5507160 1300 10740 None None Miscellaneous Description false 2018-12-01 2018-12-31 16.0 rsf/year

期望的结果:

5507160 1300 10740 None None Miscellaneous Description false 2018-01-01 2018-01-31 16.0 rsf/year 2018-05-01 2018-05-31 5.0 rsf/year net 100.0
5507160 1300 10740 None None Miscellaneous Description false 2018-12-01 2018-12-31 16.0 rsf/year 2019-04-01 2018-04-30 3.0 rsf/year net 100.0

1 个答案:

答案 0 :(得分:0)

意识到我的for循环被错误地写了 - 我试图嵌套得太深,导致结果不完整。更正的脚本引用:

for ldt.findall中的br(&#34; BaseRents&#34;):

而不是:

对于ldt.findall中的br(&#34; BaseRents / BaseRent&#34;):

FreeRents的类似结构返回了完整的结果集。完整更新的脚本:

import xml.etree.ElementTree as ET
import pyodbc

server = "servername, port"
database = "DestDB"
username = "foo"
password = "bar"

con = pyodbc.connect("DRIVER={ODBC Driver 17 for SQL Server};\
SERVER="+server+";DATABASE="+database+";UID="+username+";PWD="+password+";Trusted_Connection=yes;")
x = con.cursor()

tree = ET.parse(r'\\server\FileName.xml')
root = tree.getroot()

for ls in root.findall("PortfolioData/Properties/Property/Spaces/Space/Lease"):
lsid = ls.find("Id").text
for ldt in ls.findall("LeaseDealTerms/LeaseDealTerm"):
    ldra = ldt.find("RentableArea").text
    try:
        unitnum = ldt.find("SourceID").text.split("_")[1]
    except:
        unitnum = None
    secdep = ldt.find("SecurityDeposit").text
    nero = ldt.find("NEROverride").text
    lsdesc = ldt.find("MiscDescription").text #Jeff asked for this
    mtm = ldt.find("IsMonthToMonth").text
    moutdt = ldt.find("MoveOutDate").text
    try:
        retasgw = ldt.find("Retail/AnnualSalesGrowthRate").text
    except:
        retasgw = None
    try:
        retpct = ldt.find("Retail/PercentRate").text
    except:
        retpct = None
    try:
        retesale = ldt.find("Retail/UnnaturalBreakpoint").text
    except:
        retesale = None

    for br in ldt.findall("BaseRents"):
        try:
            brstdt = br.find("BaseRent/StartDate").text
        except:
            brstdt = None
        try:
            brenddt = br.find("BaseRent/EndDate").text
        except:
            brenddt = None
        try:
            bramt = br.find("BaseRent/Amount").text
        except:
            bramt = None
        try:
            brunit = br.find("BaseRent/Units").text
        except:
            brunit = None

    for br in ldt.findall("FreeRents"):
        try:
            frstdt = br.find("FreeRent/StartDate").text
        except:
            frstdt = None
        try:
            frenddt = br.find("FreeRent/EndDate").text
        except:
            frenddt = None
        try:
            framt = br.find("FreeRent/Amount").text
        except:
            framt = None
        try:
            frunit = br.find("FreeRent/Units").text
        except:
            frunit = None
        try:
            frtype = br.find("FreeRent/FreeRentType").text
        except:
            frtype = None
        try:
            frpct = br.find("FreeRent/Percentage").text
        except:
            frpct = None
    for opex in ldt.findall("OpExs"):
        try:
            opstdt = opex.find("OpEx/StartDate").text
        except:
            opstdt = None
        try:
            openddt = opex.find("OpEx/EndDate").text
        except:
            openddt = None
        try:
            opamt = opex.find("OpEx/Amount").text
        except:
            opamt = None
        try:
            opunits = opex.find("OpEx/Units").text
        except:
            opunits = None
    for rtax in ldt.findall("RealEstateTaxes"):
        try:
            rtstdt = rtax.find("RealEstateTax/StartDate").text
        except:
            rtstdt = None
        try:
            rtenddt = rtax.find("RealEstateTax/EndDate").text
        except:
            rtenddt = None
        try:
            rtamt = rtax.find("RealEstateTax/Amount").text
        except:
            rtamt = None
        try:
            rtunits = rtax.find("RealEstateTax/Units").text
        except:
            rtunits = None
    for orev in ldt.findall("OtherRevenues"):
        try:
            orstdt = orev.find("OtherRevenues/StartDate").text
        except:
            orstdt = None
        try:
            orenddt = orev.find("OtherRevenues/EndDate").text
        except:
            orenddt = None
        try:
            oramt = orev.find("OtherRevenues/Amount").text
        except:
            oramt = None
        try:
            orunits = orev.find("OtherRevenues/Units").text
        except:
            orunits = None
        try:
            ortype = orev.find("OtherRevenues/Type").text
        except:
            ortype = None

if lsid is not None:
    x.execute("Insert into portfoliodealterms (lsid, unitnum, ldra, secdep, nero, lsdesc, mtm, moutdt, retasgw, \
         retpct, retesale, brstdt, brenddt, bramt, brunit, frstdt, frenddt, framt, frunit, frtype, frpct, opstdt, \
         openddt, opamt, opunits, rtstdt, rtenddt, rtamt, rtunits, orstdt, orenddt, oramt, orunits, ortype) \
        values (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, \
        ?, ?, ?)",
        (lsid, unitnum, ldra, secdep, nero, lsdesc, mtm, moutdt, retasgw,
         retpct, retesale, brstdt, brenddt, bramt, brunit, frstdt, frenddt, framt, frunit, frtype, frpct, opstdt,
         openddt, opamt, opunits, rtstdt, rtenddt, rtamt, rtunits, orstdt, orenddt, oramt, orunits, ortype))
    con.commit()