使用Python访问Resume Parser API

时间:2017-01-18 06:15:43

标签: python api

我正在使用此代码访问简历解析API。但我收到错误"错误解析简历:' ResumeParser'对象没有属性'编码'"和"属性错误:' ResumeParser'对象没有属性'名称' &#34 ;.如何解决此错误?

import os
from xml.dom import minidom
from pysimplesoap.client import SoapClient
from pysimplesoap.helpers import sort_dict


class ParserClient(SoapClient):
'''extends the soap client to encode the response with utf-8 encoding'''
 def wsdl_call(self,method,*args,**kwargs):
    '''override wsdl call method to make sure unmarshall is not called'''
     operation=self.get_operation(method)
     inp=operation['input']
     header=operation.get('header')
     if 'action' in operation:
        self.action=operation['action']
    if header:
        self._call_headers=sort_dict(header,self._headers)
    method,params=self.wsdl_call_get_params(method,inp,*args,**kwargs)
    response=self.call(method,*params)
    return response

def send(self,method,xml):
    content=super(ParserClient,self).send(method,xml)
    self.result=content
    return content


class ResumeParser(object):
'''Connect to the resume parser's XML API to get parsed data'''
def _init_(self,simple=True,timeout=60):
    '''initialize resume parser class'''
    self.wsdl="http://jobsite.onlineresumeparser.com/rPlusParseResume.asmx?WSDL"
    self.secret="" #enter key here
    self.encoding="base64"
    self.simple=simple
    self.client=ParserClient(wsdl=self.wsdl,timeout=timeout)
    self.names=[]

def get_file_content(self,file_path):
    '''return the encoded content for a given file'''
    file_obj=open(os.path.abspath(file_path),'r')
    content=file_obj.read().encode(self.encoding)
    file_obj.close()
    return content

def get_names(self,path):
    '''Given a path to folder that contain resume files this method will parse
    the resumes and will return the names of the candidates as a list'''
    opt=os.path
    resumes=[opt.join(path,r) for r in os.listdir(path) if opt.isfile(opt.join(path,r))]
    #parse information for each resume
    for resume in resumes:
        try:
            xml_data=self.get_xml(resume)
            print xml_data
            name=self.get_name_from_xml(xml_data)

            if name:
                self.names.append(name)
                print name
        except Exception,err:
            print "error parsing resume:%s" % str(err)
    return list(set(self.names))


def get_name_from_xml(self,data):
    '''return the full name from the xml data given'''  
    xmldata=minidom.parseString(data)
    name=xmldata.getElementsByTagName("CANDIDATE_FULL_NAME")
    name=name[0].childNodes[0].data.title()
    print '\n name of student: ',name
    return name

def get_xml(self,filepath):
    '''fetches and returns the xml for the given file from the api'''
    filename=os.path.basename(filepath)
    base64=self.get_file_content(filepath)
    filedata={
        'B64FileZippedContent':base64,
        'FileName':filename,
        'UserID':1,
        'secretKey':self.secret 
        }
    get=self.client.Get_SimpleXML if self.simple else self.client.get_HRXML
    get(**filedata)

    return self.process_raw_xml()   

def process_raw_xml(self,data=None):
    '''processes and return the clean xml'''
    raw=data if data else self.client.result
    parsed=minidom.parseString(raw)
    result=parsed.getElementsByTagName("Get_SimpleXMLResult")[0]
    text_node=result.childNodes[0]
    data=text_node.childNodes[0]
    data=text_node.data.encode("UTF-8")
    print data
    return data


obj1=ResumeParser()
result=obj1.get_names("/home/Desktop/resumes/")

print result

1 个答案:

答案 0 :(得分:0)

您需要将__init__定义为构造函数方法而不是_init_(注意下划线的数量)