我有一个python
脚本来检查我的搜索扫描程序正在扫描哪些实例。 API响应位于XML
,我使用带有lxml
的{{1}}库来循环响应并解析每个etree
标记。但是,我的循环只返回一个实例ID。下面是我的代码,<EC2_INSTANCE_ID>
和我的脚本输出。
XML
CODE:
import os
import requests
import boto3
import lxml
from lxml import etree
s3 = boto3.resource('s3')
def apiLogin():
global s
s = requests.Session()
qualys_username = "xxxxxx"
qualys_password = "xxxxxx"
payload = {'action':'login', 'username':qualys_username,
'password':qualys_password}
s.headers.update({'X-Requested-With':qualys_username})
r = s.post('https://qualysapi.qualys.com/api/2.0/fo/session/',
data=payload)
def launchReport():
payload = {'action':'list', 'use_tags':'1', 'tag_set_include':'xxxxxx', 'host_metadata':'ec2', 'host_metadata_fields':'instanceId'}
r = s.post('https://qualysapi.qualys.com/api/2.0/fo/asset/host/', data=payload)
os.chdir('/tmp')
f = open('qualys_instances.xml','w')
print(r.text, file=f)
file = open('qualys_instances.xml','rb')
s3.Bucket('xxxxxx').put_object(Key='qualys_instances.xml', Body=file)
def formatReport():
bucket = 'xxxxxx'
key = 'xxxxxx'
os.chdir('/tmp')
obj = s3.Object(bucket, key)
body = obj.get()['Body'].read()
doc = etree.fromstring(body)
host_list = doc.xpath('//EC2_INSTANCE_ID')
for i in host_list:
print(i)
k = open('qualys_instances.txt','w')
print(i.text, file=k)
file = open('qualys_instances.txt','rb')
s3.Bucket('nwm-all-instances').put_object(Key='qualys_instances.txt', Body=file)
XML:
<?xml version="1.0" encoding="UTF-8" ?>
<!DOCTYPE HOST_LIST_OUTPUT SYSTEM "https://qualysapi.qualys.com/api/2.0/fo/asset/host/host_list_output.dtd">
<HOST_LIST_OUTPUT>
<RESPONSE>
<DATETIME>2018-05-21T21:38:06Z</DATETIME>
<HOST_LIST>
<HOST>
<ID>xxxxxx</ID>
<IP>xxxxxx</IP>
<TRACKING_METHOD>EC2</TRACKING_METHOD>
<EC2_INSTANCE_ID><![CDATA[i-1111111]]></EC2_INSTANCE_ID>
<METADATA>
<EC2>
<ATTRIBUTE>
<NAME><![CDATA[latest/dynamic/instance-identity/document/instanceId]]></NAME>
<LAST_STATUS>Fail</LAST_STATUS>
<VALUE><![CDATA[]]></VALUE>
<LAST_SUCCESS_DATE></LAST_SUCCESS_DATE>
<LAST_ERROR_DATE>2018-05-16T03:41:14Z</LAST_ERROR_DATE>
<LAST_ERROR><![CDATA[QualysShell not available]]> .
</LAST_ERROR>
</ATTRIBUTE>
</EC2>
</METADATA>
</HOST>
<HOST>
<ID>xxxxxx</ID>
<IP>xxxxxx</IP>
<TRACKING_METHOD>EC2</TRACKING_METHOD>
<EC2_INSTANCE_ID><![CDATA[i-222222]]></EC2_INSTANCE_ID>
<METADATA>
<EC2>
<ATTRIBUTE>
<NAME><![CDATA[latest/dynamic/instance-identity/document/instanceId]]></NAME>
<LAST_STATUS>Fail</LAST_STATUS>
<VALUE><![CDATA[]]></VALUE>
<LAST_SUCCESS_DATE></LAST_SUCCESS_DATE>
<LAST_ERROR_DATE>2018-05-16T03:27:35Z</LAST_ERROR_DATE>
<LAST_ERROR><![CDATA[QualysShell not available]]> .
</LAST_ERROR>
</ATTRIBUTE>
</EC2>
</METADATA>
</HOST>
</HOST_LIST>
</RESPONSE>
</HOST_LIST_OUTPUT>
SCRIPT OUTPUT:
答案 0 :(得分:1)
<强>尝试:强>
from lxml import etree
xml_data = """<?xml version="1.0" encoding="UTF-8" ?><!DOCTYPE HOST_LIST_OUTPUT SYSTEM "https://qualysapi.qualys.com/api/2.0/fo/asset/host/host_list_output.dtd"><HOST_LIST_OUTPUT><RESPONSE><DATETIME>2018-05-21T21:38:06Z</DATETIME><HOST_LIST><HOST><ID>xxxxxx</ID><IP>xxxxxx</IP><TRACKING_METHOD>EC2</TRACKING_METHOD><EC2_INSTANCE_ID><![CDATA[i-1111111]]></EC2_INSTANCE_ID><METADATA><EC2><ATTRIBUTE><NAME><![CDATA[latest/dynamic/instance-identity/document/instanceId]]></NAME><LAST_STATUS>Fail</LAST_STATUS><VALUE><![CDATA[]]></VALUE><LAST_SUCCESS_DATE></LAST_SUCCESS_DATE><LAST_ERROR_DATE>2018-05-16T03:41:14Z</LAST_ERROR_DATE><LAST_ERROR><![CDATA[QualysShell not available]]>.</LAST_ERROR></ATTRIBUTE></EC2></METADATA></HOST><HOST><ID>xxxxxx</ID><IP>xxxxxx</IP><TRACKING_METHOD>EC2</TRACKING_METHOD><EC2_INSTANCE_ID><![CDATA[i-222222]]></EC2_INSTANCE_ID><METADATA><EC2><ATTRIBUTE><NAME><![CDATA[latest/dynamic/instance-identity/document/instanceId]]></NAME><LAST_STATUS>Fail</LAST_STATUS><VALUE><![CDATA[]]></VALUE><LAST_SUCCESS_DATE></LAST_SUCCESS_DATE><LAST_ERROR_DATE>2018-05-16T03:27:35Z</LAST_ERROR_DATE><LAST_ERROR><![CDATA[QualysShell not available]]>.</LAST_ERROR></ATTRIBUTE></EC2></METADATA></HOST></HOST_LIST></RESPONSE></HOST_LIST_OUTPUT>"""
# strip_cdata=False must be passed so CDATA sections are preserved
parser = etree.XMLParser(strip_cdata=False)
tree = etree.fromstring(xml_data, parser)
# we will use the full XPath since it could just be '//EC2_INSTANCE_ID/text()'
for ec2_instance_id in tree.xpath('//HOST_LIST_OUTPUT/RESPONSE/HOST_LIST/HOST/EC2_INSTANCE_ID/text()'):
print(ec2_instance_id)
<强>输出:强>
i-1111111
i-222222