我有一个从stdin读取数据的脚本,例如:
#################################
# Retrieve NMON data from stdin #
#################################
# Read nmon data from stdin
data = sys.stdin.readlines()
然后,部分代码使用正则表达式搜索转换并生成数据:
###################
# Dynamic Sections : data requires to be transposed to be exploitable within Splunk
###################
dynamic_section = ["DISKBUSY","DISKBSIZE","DISKREAD","DISKWRITE"]
for section in dynamic_section:
# Set output file (will be opened for writing after data transposition)
currsection_output = DATA_DIR + HOSTNAME + '_' + day + '_' + month + '_' + year + '_' + hour + minute + second + '_' + section + '.nmon.csv'
# Open StringIO for temp in memory
buffer = cStringIO.StringIO()
# counter
count = 0
for line in data:
if find_section:
# csv header
# Replace some symbols
line=re.sub("%",'_PCT',line)
line=re.sub(" ",'_',line)
line=re.sub("\+",'',line)
line=re.sub("\(",'_',line)
line=re.sub("\)",'_',line)
line=re.sub(" ",'_',line)
line=re.sub("\.",'_',line)
# Extract header excluding data that always has Txxxx for timestamp reference
myregex = '(' + section + ')\,([^T].+)'
fullheader_match = re.search( myregex, line)
if fullheader_match:
fullheader = fullheader_match.group(2)
header_match = re.match( r'([a-zA-Z\-\/\_0-9]+,)([a-zA-Z\-\/\_0-9\,]*)', fullheader)
if header_match:
header = header_match.group(2)
# Write header
buffer.write('ZZZZ' + ',' + header + '\n'),
# Extract timestamp
# Nmon V9 and prior do not have date in ZZZZ
# If unavailable, we'll use the global date (AAA,date)
ZZZZ_DATE = '-1'
ZZZZ_TIME = '-1'
# For Nmon V10 and more
timestamp_match = re.match( r'^ZZZZ\,(.+)\,(.+)\,(.+)\n', line)
if timestamp_match:
ZZZZ_TIME = timestamp_match.group(2)
ZZZZ_DATE = timestamp_match.group(3)
# Convert month names to numbers
month_to_numbers = {'JAN': '01', 'FEB': '02', 'MAR': '03', 'APR': '04', 'MAY': '05', 'JUN': '06', 'JUL': '07', 'AUG': '08', 'SEP': '09', 'OCT': '10', 'NOV': '11', 'DEC': '12'}
for k, v in month_to_numbers.items():
ZZZZ_DATE = ZZZZ_DATE.replace(k, v)
ZZZZ_timestamp = ZZZZ_DATE + ' ' + ZZZZ_TIME
# For Nmon V9 and less
if ZZZZ_DATE == '-1':
ZZZZ_DATE = DATE
timestamp_match = re.match( r'^ZZZZ\,(.+)\,(.+)\n', line)
if timestamp_match:
ZZZZ_TIME = timestamp_match.group(2)
# Convert month names to numbers
month_to_numbers = {'JAN': '01', 'FEB': '02', 'MAR': '03', 'APR': '04', 'MAY': '05', 'JUN': '06', 'JUL': '07', 'AUG': '08', 'SEP': '09', 'OCT': '10', 'NOV': '11', 'DEC': '12'}
for k, v in month_to_numbers.items():
ZZZZ_DATE = ZZZZ_DATE.replace(k, v)
ZZZZ_timestamp = ZZZZ_DATE + ' ' + ZZZZ_TIME
# Extract Data
myregex = r'^' + section + '\,(T\d+)\,(.+)\n'
perfdata_match = re.match( myregex, line)
if perfdata_match:
perfdata = perfdata_match.group(2)
# Write perf data
buffer.write(ZZZZ_timestamp + ',' + perfdata + '\n'),
# Open final for writing
with open(currsection_output, "w") as currsection:
# Rewind temp
buffer.seek(0)
writer = csv.writer(currsection)
writer.writerow(['type', 'serialnum', 'hostname', 'ZZZZ', 'device', 'value'])
# increment
count += 1
for d in csv.DictReader(buffer):
ZZZZ = d.pop('ZZZZ')
for device, value in sorted(d.items()):
# increment
count += 1
row = [section, SN, HOSTNAME, ZZZZ, device, value]
writer.writerow(row)
# End for
# Show number of lines extracted
result = section + " section: Wrote" + " " + str(count) + " lines"
print (result)
ref.write(result + "\n")
# Discard memory buffer
buffer.close()
# End for
如果从stding中检索的内容中不存在数据,我如何防止进入循环(主要部分)? (并存储在数据中)
感谢您的帮助!
答案 0 :(得分:0)
我终于可以通过在主循环之前添加循环/计数器来找到一种处理方法,例如:
dynamic_section = ["IOADAPT","NETERROR","NET","NETPACKET","JFSFILE","JFSINODE"]
for section in dynamic_section:
# counter
count = 0
for line in data:
# Extract sections, and write to output
myregex = r'^' + section + '\,(T\d+)\,(.+)\n'
find_section = re.match( myregex, line)
if find_section:
# increment
count += 1
if count > 2:
其余的代码。
最后,完整的代码:
dynamic_section = ["IOADAPT","NETERROR","NET","NETPACKET","JFSFILE","JFSINODE"]
for section in dynamic_section:
# counter
count = 0
for line in data:
# Extract sections, and write to output
myregex = r'^' + section + '\,(T\d+)\,(.+)\n'
find_section = re.match( myregex, line)
if find_section:
# increment
count += 1
if count > 2:
# Set output file (will be opened for writing after data transposition)
currsection_output = DATA_DIR + HOSTNAME + '_' + day + '_' + month + '_' + year + '_' + hour + minute + second + '_' + section + '.nmon.csv'
# Open StringIO for temp in memory
buffer = cStringIO.StringIO()
# counter
count = 0
for line in data:
# Extract sections, and write to output
myregex = r'^' + section + '[0-9]*' + '|ZZZZ.+'
find_section = re.match( myregex, line)
if find_section:
# csv header
# Replace some symbols
line=re.sub("%",'_PCT',line)
line=re.sub(" ",'_',line)
line=re.sub("\+",'',line)
line=re.sub("\(",'_',line)
line=re.sub("\)",'_',line)
line=re.sub(" ",'_',line)
# Extract header excluding data that always has Txxxx for timestamp reference
myregex = '(' + section + ')\,([^T].+)'
fullheader_match = re.search( myregex, line)
if fullheader_match:
fullheader = fullheader_match.group(2)
# Replace "." by "_" only for header
fullheader=re.sub("\.",'_',fullheader)
header_match = re.match( r'([a-zA-Z\-\/\_0-9]+,)([a-zA-Z\-\/\_0-9\,]*)', fullheader)
if header_match:
header = header_match.group(2)
# Write header
buffer.write('ZZZZ' + ',' + header + '\n'),
# Extract timestamp
# Nmon V9 and prior do not have date in ZZZZ
# If unavailable, we'll use the global date (AAA,date)
ZZZZ_DATE = '-1'
ZZZZ_TIME = '-1'
# For Nmon V10 and more
timestamp_match = re.match( r'^ZZZZ\,(.+)\,(.+)\,(.+)\n', line)
if timestamp_match:
ZZZZ_TIME = timestamp_match.group(2)
ZZZZ_DATE = timestamp_match.group(3)
# Convert month names to numbers
month_to_numbers = {'JAN': '01', 'FEB': '02', 'MAR': '03', 'APR': '04', 'MAY': '05', 'JUN': '06', 'JUL': '07', 'AUG': '08', 'SEP': '09', 'OCT': '10', 'NOV': '11', 'DEC': '12'}
for k, v in month_to_numbers.items():
ZZZZ_DATE = ZZZZ_DATE.replace(k, v)
ZZZZ_timestamp = ZZZZ_DATE + ' ' + ZZZZ_TIME
# For Nmon V9 and less
if ZZZZ_DATE == '-1':
ZZZZ_DATE = DATE
timestamp_match = re.match( r'^ZZZZ\,(.+)\,(.+)\n', line)
if timestamp_match:
ZZZZ_TIME = timestamp_match.group(2)
# Convert month names to numbers
month_to_numbers = {'JAN': '01', 'FEB': '02', 'MAR': '03', 'APR': '04', 'MAY': '05', 'JUN': '06', 'JUL': '07', 'AUG': '08', 'SEP': '09', 'OCT': '10', 'NOV': '11', 'DEC': '12'}
for k, v in month_to_numbers.items():
ZZZZ_DATE = ZZZZ_DATE.replace(k, v)
ZZZZ_timestamp = ZZZZ_DATE + ' ' + ZZZZ_TIME
# Extract Data
myregex = r'^' + section + '\,(T\d+)\,(.+)\n'
perfdata_match = re.match( myregex, line)
if perfdata_match:
perfdata = perfdata_match.group(2)
# Write perf data
buffer.write(ZZZZ_timestamp + ',' + perfdata + '\n'),
# Open final for writing
with open(currsection_output, "w") as currsection:
# Rewind temp
buffer.seek(0)
writer = csv.writer(currsection)
writer.writerow(['type', 'serialnum', 'hostname', 'ZZZZ', 'device', 'value'])
# increment
count += 1
for d in csv.DictReader(buffer):
ZZZZ = d.pop('ZZZZ')
for device, value in sorted(d.items()):
# increment
count += 1
row = [section, SN, HOSTNAME, ZZZZ, device, value]
writer.writerow(row)
# End for
# Show number of lines extracted
result = section + " section: Wrote" + " " + str(count) + " lines"
print (result)
ref.write(result + "\n")
# Discard memory buffer
buffer.close()
# End for