从Melissa数据库中提取数据

时间:2019-01-23 00:48:26

标签: python pandas

我正在尝试从Melissa数据库中提取数据,但是我一直在返回NaN值,我不确定为什么。

这是我的代码:

from xml.dom.minidom import parse,parseString
import xml.dom.minidom
import requests
import sys
import pandas as pd
import numpy as np

key = ''

df = pd.read_csv('data.csv')

def getElementValue(p_dom,p_element):
    if len(p_dom.getElementsByTagName(p_element)) > 0:
       l_value=p_dom.getElementsByTagName(p_element)[0]
       return(l_value.firstChild.data)
    else:
       l_value='NaN'
       return(l_value)

def getData(key, full_name, address):
        url = 'https://personator.melissadata.net/v3/WEB/ContactVerify/doContactVerify?id=' + key +'&full=' 
        + full_name +'&a1=' + address
        xml = requests.get(url)
        dom = parseString(xml.text)

        response = dom.getElementsByTagName('response')

        Gender = getElementValue(dom, 'Gender')
        DateOfBirth = getElementValue(dom, 'DateOfBirth')
        DateOfDeath = getElementValue(dom, 'DateOfDeath')
        EthnicCode = getElementValue(dom, 'EthnicCode')
        EthnicGroup = getElementValue(dom, 'EthnicGroup')
        Education = getElementValue(dom, 'Education')
        PoliticalParty = getElementValue(dom, 'PoliticalParty')
        MaritalStatus = getElementValue(dom, 'MaritalStatus')
        HouseholdSize = getElementValue(dom, 'HouseholdSize')
        ChildrenAgeRange = getElementValue(dom, 'ChildrenAgeRange')
        PresenceOfChildren = getElementValue(dom, 'PresenceOfChildren')
        PresenceOfSenior = getElementValue(dom, 'PresenceOfSenior')
        LengthOfResidence = getElementValue(dom, 'LengthOfResidence')
        OwnRent = getElementValue(dom, 'OwnRent')
        CreditCardUser = getElementValue(dom, 'CreditCardUser')
        Occupation = getElementValue(dom, 'Occupation')
        HouseholdIncome = getElementValue(dom, 'HouseholdIncome')

        return Gender

df['Gender'] = df.apply(lambda row: getData(key, row['Full Name'], row['Address']), axis = 1)

从getElementValue(p_dom,p_element)函数返回NaN。

0 个答案:

没有答案