Python3 - for循环只返回一次迭代

时间:2018-05-21 22:06:45

标签: xml python-3.x for-loop lxml

我有一个python脚本来检查我的搜索扫描程序正在扫描哪些实例。 API响应位于XML,我使用带有lxml的{​​{1}}库来循环响应并解析每个etree标记。但是,我的循环只返回一个实例ID。下面是我的代码,<EC2_INSTANCE_ID>和我的脚本输出。

XML

CODE:

import os import requests import boto3 import lxml from lxml import etree s3 = boto3.resource('s3') def apiLogin(): global s s = requests.Session() qualys_username = "xxxxxx" qualys_password = "xxxxxx" payload = {'action':'login', 'username':qualys_username, 'password':qualys_password} s.headers.update({'X-Requested-With':qualys_username}) r = s.post('https://qualysapi.qualys.com/api/2.0/fo/session/', data=payload) def launchReport(): payload = {'action':'list', 'use_tags':'1', 'tag_set_include':'xxxxxx', 'host_metadata':'ec2', 'host_metadata_fields':'instanceId'} r = s.post('https://qualysapi.qualys.com/api/2.0/fo/asset/host/', data=payload) os.chdir('/tmp') f = open('qualys_instances.xml','w') print(r.text, file=f) file = open('qualys_instances.xml','rb') s3.Bucket('xxxxxx').put_object(Key='qualys_instances.xml', Body=file) def formatReport(): bucket = 'xxxxxx' key = 'xxxxxx' os.chdir('/tmp') obj = s3.Object(bucket, key) body = obj.get()['Body'].read() doc = etree.fromstring(body) host_list = doc.xpath('//EC2_INSTANCE_ID') for i in host_list: print(i) k = open('qualys_instances.txt','w') print(i.text, file=k) file = open('qualys_instances.txt','rb') s3.Bucket('nwm-all-instances').put_object(Key='qualys_instances.txt', Body=file)

XML:

<?xml version="1.0" encoding="UTF-8" ?> <!DOCTYPE HOST_LIST_OUTPUT SYSTEM "https://qualysapi.qualys.com/api/2.0/fo/asset/host/host_list_output.dtd"> <HOST_LIST_OUTPUT> <RESPONSE> <DATETIME>2018-05-21T21:38:06Z</DATETIME> <HOST_LIST> <HOST> <ID>xxxxxx</ID> <IP>xxxxxx</IP> <TRACKING_METHOD>EC2</TRACKING_METHOD> <EC2_INSTANCE_ID><![CDATA[i-1111111]]></EC2_INSTANCE_ID> <METADATA> <EC2> <ATTRIBUTE> <NAME><![CDATA[latest/dynamic/instance-identity/document/instanceId]]></NAME> <LAST_STATUS>Fail</LAST_STATUS> <VALUE><![CDATA[]]></VALUE> <LAST_SUCCESS_DATE></LAST_SUCCESS_DATE> <LAST_ERROR_DATE>2018-05-16T03:41:14Z</LAST_ERROR_DATE> <LAST_ERROR><![CDATA[QualysShell not available]]> . </LAST_ERROR> </ATTRIBUTE> </EC2> </METADATA> </HOST> <HOST> <ID>xxxxxx</ID> <IP>xxxxxx</IP> <TRACKING_METHOD>EC2</TRACKING_METHOD> <EC2_INSTANCE_ID><![CDATA[i-222222]]></EC2_INSTANCE_ID> <METADATA> <EC2> <ATTRIBUTE> <NAME><![CDATA[latest/dynamic/instance-identity/document/instanceId]]></NAME> <LAST_STATUS>Fail</LAST_STATUS> <VALUE><![CDATA[]]></VALUE> <LAST_SUCCESS_DATE></LAST_SUCCESS_DATE> <LAST_ERROR_DATE>2018-05-16T03:27:35Z</LAST_ERROR_DATE> <LAST_ERROR><![CDATA[QualysShell not available]]> . </LAST_ERROR> </ATTRIBUTE> </EC2> </METADATA> </HOST> </HOST_LIST> </RESPONSE> </HOST_LIST_OUTPUT>

SCRIPT OUTPUT:

1 个答案:

答案 0 :(得分:1)

<强>尝试:

from lxml import etree

xml_data = """<?xml version="1.0" encoding="UTF-8" ?><!DOCTYPE HOST_LIST_OUTPUT SYSTEM "https://qualysapi.qualys.com/api/2.0/fo/asset/host/host_list_output.dtd"><HOST_LIST_OUTPUT><RESPONSE><DATETIME>2018-05-21T21:38:06Z</DATETIME><HOST_LIST><HOST><ID>xxxxxx</ID><IP>xxxxxx</IP><TRACKING_METHOD>EC2</TRACKING_METHOD><EC2_INSTANCE_ID><![CDATA[i-1111111]]></EC2_INSTANCE_ID><METADATA><EC2><ATTRIBUTE><NAME><![CDATA[latest/dynamic/instance-identity/document/instanceId]]></NAME><LAST_STATUS>Fail</LAST_STATUS><VALUE><![CDATA[]]></VALUE><LAST_SUCCESS_DATE></LAST_SUCCESS_DATE><LAST_ERROR_DATE>2018-05-16T03:41:14Z</LAST_ERROR_DATE><LAST_ERROR><![CDATA[QualysShell not available]]>.</LAST_ERROR></ATTRIBUTE></EC2></METADATA></HOST><HOST><ID>xxxxxx</ID><IP>xxxxxx</IP><TRACKING_METHOD>EC2</TRACKING_METHOD><EC2_INSTANCE_ID><![CDATA[i-222222]]></EC2_INSTANCE_ID><METADATA><EC2><ATTRIBUTE><NAME><![CDATA[latest/dynamic/instance-identity/document/instanceId]]></NAME><LAST_STATUS>Fail</LAST_STATUS><VALUE><![CDATA[]]></VALUE><LAST_SUCCESS_DATE></LAST_SUCCESS_DATE><LAST_ERROR_DATE>2018-05-16T03:27:35Z</LAST_ERROR_DATE><LAST_ERROR><![CDATA[QualysShell not available]]>.</LAST_ERROR></ATTRIBUTE></EC2></METADATA></HOST></HOST_LIST></RESPONSE></HOST_LIST_OUTPUT>"""

# strip_cdata=False must be passed so CDATA sections are preserved
parser = etree.XMLParser(strip_cdata=False)
tree = etree.fromstring(xml_data, parser)

# we will use the full XPath since it could just be '//EC2_INSTANCE_ID/text()'
for ec2_instance_id in tree.xpath('//HOST_LIST_OUTPUT/RESPONSE/HOST_LIST/HOST/EC2_INSTANCE_ID/text()'):
    print(ec2_instance_id)

<强>输出:

i-1111111
i-222222