我正在尝试使用正则表达式编辑一些词典值,然后将它们改回dict对象。
我正在使用Shodan API检查IP数据,这将返回一组结果,超出了我的需要。
Shodan Ip
ipinfo {'city': None, 'region_code': None, 'os': None, 'tags': ['vpn'], 'ip': 771247238, 'isp': 'Host Universal Pty', 'area_code': None, 'dma_code': None, 'last_update': '2019-05-01T06:53:53.130508', 'country_code3': 'AUS', 'country_name': 'Australia', 'hostnames': [], 'postal_code': None, 'longitude': 143.2104, 'country_code': 'AU', 'ip_str': '45.248.76.134', 'latitude': -33.494, 'org': 'Host Universal Pty', 'data': [{'_shodan': {'id': 'bc2dc252-5b9d-4b3d-975f-0156860c8849', 'options': {}, 'ptr': True, 'module': 'https', 'crawler': '65e79faecee26516a8ed6f16c1142432f303fbdc'}, 'hash': 0, 'os': None, 'opts': {}, 'ip': 771247238, 'isp': 'Host Universal Pty', 'port': 443, 'hostnames': [], 'location': {'city': None, 'region_code': None, 'area_code': None, 'longitude': 143.2104, 'country_code3': 'AUS', 'country_name': 'Australia', 'postal_code': None, 'dma_code': None, 'country_code': 'AU', 'latitude': -33.494}, 'timestamp': '2019-05-01T06:53:53.130508', 'domains': [], 'org': 'Host Universal Pty', 'data': '', 'asn': 'AS136557', 'transport': 'tcp', 'ip_str': '45.248.76.134'}, {'_shodan': {'id': 'cdce36e7-588f-4377-8cc6-f9bedd426e6b', 'options': {}, 'ptr': True, 'module': 'https', 'crawler': '0636e1e6dd371760aeaf808ed839236e73a9e74d'}, 'hash': 0, 'os': None, 'opts': {}, 'ip': 771247238, 'isp': 'Host Universal Pty', 'port': 8443, 'hostnames': [], 'location': {'city': None, 'region_code': None, 'area_code': None, 'longitude': 143.2104, 'country_code3': 'AUS', 'country_name': 'Australia', 'postal_code': None, 'dma_code': None, 'country_code': 'AU', 'latitude': -33.494}, 'timestamp': '2019-04-26T18:31:18.138759', 'domains': [], 'org': 'Host Universal Pty', 'data': '', 'asn': 'AS136557', 'transport': 'tcp', 'ip_str': '45.248.76.134'}, {'_shodan': {'id': '27e5f5e0-662e-4621-b043-56d64d25f38d', 'options': {}, 'ptr': True, 'module': 'http', 'crawler': 'c9b639b99e5410a46f656e1508a68f1e6e5d6f99'}, 'hash': 0, 'os': None, 'opts': {}, 'ip': 771247238, 'isp': 'Host Universal Pty', 'http': {'robots_hash': None, 'redirects': [], 'securitytxt': None, 'title': None, 'sitemap_hash': None, 'robots': None, 'server': None, 'host': '45.248.76.134', 'html': None, 'location': '/', 'html_hash': None, 'sitemap': None, 'securitytxt_hash': None}, 'port': 8080, 'hostnames': [], 'location': {'city': None, 'region_code': None, 'area_code': None, 'longitude': 143.2104, 'country_code3': 'AUS', 'country_name': 'Australia', 'postal_code': None, 'dma_code': None, 'country_code': 'AU', 'latitude': -33.494}, 'timestamp': '2019-04-21T03:00:14.986062', 'domains': [], 'org': 'Host Universal Pty', 'data': '', 'asn': 'AS136557', 'transport': 'tcp', 'ip_str': '45.248.76.134'}, {'_shodan': {'id': 'bfbc3556-d00d-4512-8cb3-32ef6cae9964', 'options': {}, 'ptr': True, 'module': 'ike', 'crawler': '8cd926590a400feb4b683f8337a77287ddf3d2c7'}, 'hash': -451677272, 'os': None, 'tags': ['vpn'], 'opts': {'raw': '61713862726c6c3764627037343033792920252800000000000000240000000800000005'}, 'ip': 771247238, 'isp': 'Host Universal Pty', 'port': 500, 'isakmp': {'initiator_spi': '61713862726c6c37', 'responder_spi': '6462703734303379', 'msg_id': '00000000', 'next_payload': 41, 'exchange_type': 37, 'length': 36, 'version': '2.0', 'flags': {'encryption': False, 'authentication': False, 'commit': False}, 'aggressive': {'initiator_spi': 'a6517b6a97dca862', 'responder_spi': '1655d8123c9f2104', 'msg_id': 'd14144c6', 'next_payload': 11, 'exchange_type': 5, 'length': 40, 'version': '1.0', 'flags': {'encryption': False, 'authentication': False, 'commit': False}, 'vendor_ids': []}, 'vendor_ids': []}, 'hostnames': [], 'location': {'city': None, 'region_code': None, 'area_code': None, 'longitude': 143.2104, 'country_code3': 'AUS', 'country_name': 'Australia', 'postal_code': None, 'dma_code': None, 'country_code': 'AU', 'latitude': -33.494}, 'timestamp': '2019-04-13T11:18:42.166709', 'domains': [], 'org': 'Host Universal Pty', 'data': 'VPN (IKE)\n\nInitiator SPI: 61713862726c6c37\nResponder SPI: 6462703734303379\nNext Payload: RESERVED\nVersion: 2.0\nExchange Type: DOI Specific Use\nFlags:\n Encryption: False\n Commit: False\n Authentication: False\nMessage ID: 00000000\nLength: 36', 'asn': 'AS136557', 'transport': 'udp', 'ip_str': '45.248.76.134'}], 'asn': 'AS136557', 'ports': [443, 8443, 8080, 500]}
我使用正则表达式删除不需要的数据。这将从字段的数据中删除所有内容。
osint_ip1 = re.sub("..'data':.*", "}", str(ipinfo))
出现问题...由于Showdan根据IP不一致返回,因此我需要使用Dirctwriter将相应的值写入there字段。
问题是我必须将ipinfo转换为字符串才能编辑数据,并且不能在CSV Directwriter中使用字符串对象。
如何将字符串变回方向格式?
OSINT(STRING)
rejoin : ["{'city': None", " 'region_code': None", " 'os': None", " 'tags': ['vpn']", " 'ip': 771247238", " 'isp': 'Host Universal Pty'", " 'area_code': None", " 'dma_code': None", " 'last_update': '2019-05-01T06:53:53.130508'", " 'country_code3': 'AUS'", " 'country_name': 'Australia'", " 'hostnames': []", " 'postal_code': None", " 'longitude': 143.2104", " 'country_code': 'AU'", " 'ip_str': '45.248.76.134'", " 'latitude': -33.494", " 'org': 'Host Universal Pty'}"]
下面的完整代码
import csv
import os
import re
import time
import shodan
from shodan import Shodan
def OPSINT():
for x in ip:
print(x)
try:
ipinfo = api.host(x)
except shodan.exception.APIError:
ipinfo = None
pass
filename = 'C:\\ProgramData\\FDA\\output\\processed\\OSINT.csv'
if ipinfo != None:
osint_ip1 = re.sub("..'data':.*", "}", str(ipinfo))
osint_ip = osint_ip1.split(',')
print("rejoin :", osint_ip)
# print(osint_ip)
print("ipinfo", ipinfo)
with open("C:\\ProgramData\\FDA\\output\\processed\\OSINT.csv", 'a') as csvfile:
fieldnames = ['city', 'region_code', 'os', 'tags', 'ip', 'isp', 'area_code', 'dma_code', 'last_update',
'country_code3', 'country_name', 'hostnames', 'postal_code', 'longitude', 'country_code',
'ip_str', 'latitude', 'org']
writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
fileEmpty = os.stat(filename).st_size == 0
if fileEmpty:
writer.writeheader()
else:
writer.writerows(osint_ip)
csvfile.close()
答案 0 :(得分:2)
dict是结构良好的数据类型,旨在通过键进行访问和/或操作。通过将字典转换为字符串并执行正则表达式替换来操作字典是完全不必要且容易出错的。
由于您已经将所需的确切键定义为fieldnames
,因此您可以简单地使用operator.itemgetter
从字典ipinfo
获取这些键的值,并使用为csv.DictWriter.writerow
编写新字典的键名,写自:
from operator import itemgetter
...
writer.writerow(dict(zip(fieldnames, itemgetter(*fieldnames)(ipinfo))))