Question

我有一个从stdin读取数据的脚本，例如：

#################################
# Retrieve NMON data from stdin #
#################################

# Read nmon data from stdin

data = sys.stdin.readlines()

然后，部分代码使用正则表达式搜索转换并生成数据：

###################
# Dynamic Sections : data requires to be transposed to be exploitable within Splunk
###################

dynamic_section = ["DISKBUSY","DISKBSIZE","DISKREAD","DISKWRITE"]

for section in dynamic_section:

    # Set output file (will be opened for writing after data transposition)
    currsection_output = DATA_DIR + HOSTNAME + '_' + day + '_' + month + '_' + year + '_' + hour + minute + second + '_' + section + '.nmon.csv'

    # Open StringIO for temp in memory
    buffer = cStringIO.StringIO()

    # counter
    count = 0

    for line in data:

        if find_section:

            # csv header

            # Replace some symbols
            line=re.sub("%",'_PCT',line)
            line=re.sub(" ",'_',line)
            line=re.sub("\+",'',line)
            line=re.sub("\(",'_',line)              
            line=re.sub("\)",'_',line)              
            line=re.sub(" ",'_',line)
            line=re.sub("\.",'_',line)

            # Extract header excluding data that always has Txxxx for timestamp reference
            myregex = '(' + section + ')\,([^T].+)'
            fullheader_match = re.search( myregex, line)            

            if fullheader_match:
                fullheader = fullheader_match.group(2)

                header_match = re.match( r'([a-zA-Z\-\/\_0-9]+,)([a-zA-Z\-\/\_0-9\,]*)', fullheader)    

                if header_match:
                    header = header_match.group(2)

                    # Write header
                    buffer.write('ZZZZ' + ',' + header + '\n'),


            # Extract timestamp

            # Nmon V9 and prior do not have date in ZZZZ
            # If unavailable, we'll use the global date (AAA,date)
            ZZZZ_DATE = '-1'
            ZZZZ_TIME = '-1'                

            # For Nmon V10 and more             

            timestamp_match = re.match( r'^ZZZZ\,(.+)\,(.+)\,(.+)\n', line)
            if timestamp_match:
                ZZZZ_TIME = timestamp_match.group(2)
                ZZZZ_DATE = timestamp_match.group(3)            

                # Convert month names to numbers
                month_to_numbers = {'JAN': '01', 'FEB': '02', 'MAR': '03', 'APR': '04', 'MAY': '05', 'JUN': '06', 'JUL': '07', 'AUG': '08', 'SEP': '09', 'OCT': '10', 'NOV': '11', 'DEC': '12'}         

                for k, v in month_to_numbers.items():
                    ZZZZ_DATE = ZZZZ_DATE.replace(k, v)

                ZZZZ_timestamp = ZZZZ_DATE + ' ' + ZZZZ_TIME

            # For Nmon V9 and less                  

            if ZZZZ_DATE == '-1':
                ZZZZ_DATE = DATE
                timestamp_match = re.match( r'^ZZZZ\,(.+)\,(.+)\n', line)
                if timestamp_match:
                    ZZZZ_TIME = timestamp_match.group(2)                    

                    # Convert month names to numbers
                    month_to_numbers = {'JAN': '01', 'FEB': '02', 'MAR': '03', 'APR': '04', 'MAY': '05', 'JUN': '06', 'JUL': '07', 'AUG': '08', 'SEP': '09', 'OCT': '10', 'NOV': '11', 'DEC': '12'}         

                    for k, v in month_to_numbers.items():
                        ZZZZ_DATE = ZZZZ_DATE.replace(k, v)

                    ZZZZ_timestamp = ZZZZ_DATE + ' ' + ZZZZ_TIME

            # Extract Data
            myregex = r'^' + section + '\,(T\d+)\,(.+)\n'
            perfdata_match = re.match( myregex, line)
            if perfdata_match:
                perfdata = perfdata_match.group(2)

                # Write perf data
                buffer.write(ZZZZ_timestamp + ',' + perfdata + '\n'),


    # Open final for writing
    with open(currsection_output, "w") as currsection:

        # Rewind temp
        buffer.seek(0)

        writer = csv.writer(currsection)
        writer.writerow(['type', 'serialnum', 'hostname', 'ZZZZ', 'device', 'value'])           

        # increment
        count += 1

        for d in csv.DictReader(buffer):
            ZZZZ = d.pop('ZZZZ')
            for device, value in sorted(d.items()):

                # increment
                count += 1

                row = [section, SN, HOSTNAME, ZZZZ, device, value]
                writer.writerow(row)            

        # End for

    # Show number of lines extracted
    result = section + " section: Wrote" + " " + str(count) + " lines"
    print (result)
    ref.write(result + "\n")

    # Discard memory buffer 
    buffer.close()  

# End for

如果从stding中检索的内容中不存在数据，我如何防止进入循环（主要部分）？（并存储在数据中）

感谢您的帮助！

Answer 1

我终于可以通过在主循环之前添加循环/计数器来找到一种处理方法，例如：

dynamic_section = ["IOADAPT","NETERROR","NET","NETPACKET","JFSFILE","JFSINODE"]

for section in dynamic_section:

    # counter
    count = 0

    for line in data:

        # Extract sections, and write to output
        myregex = r'^' + section + '\,(T\d+)\,(.+)\n'
        find_section = re.match( myregex, line)
        if find_section:

            # increment
            count += 1

    if count > 2:

其余的代码。

最后，完整的代码：

dynamic_section = ["IOADAPT","NETERROR","NET","NETPACKET","JFSFILE","JFSINODE"]

for section in dynamic_section:

    # counter
    count = 0

    for line in data:

        # Extract sections, and write to output
        myregex = r'^' + section + '\,(T\d+)\,(.+)\n'
        find_section = re.match( myregex, line)
        if find_section:

            # increment
            count += 1

    if count > 2:

        # Set output file (will be opened for writing after data transposition)
        currsection_output = DATA_DIR + HOSTNAME + '_' + day + '_' + month + '_' + year + '_' + hour + minute + second + '_' + section + '.nmon.csv'

        # Open StringIO for temp in memory
        buffer = cStringIO.StringIO()

        # counter
        count = 0

        for line in data:

            # Extract sections, and write to output
            myregex = r'^' + section + '[0-9]*' + '|ZZZZ.+'
            find_section = re.match( myregex, line)

            if find_section:

                # csv header

                # Replace some symbols
                line=re.sub("%",'_PCT',line)
                line=re.sub(" ",'_',line)
                line=re.sub("\+",'',line)
                line=re.sub("\(",'_',line)              
                line=re.sub("\)",'_',line)              
                line=re.sub(" ",'_',line)

                # Extract header excluding data that always has Txxxx for timestamp reference
                myregex = '(' + section + ')\,([^T].+)'
                fullheader_match = re.search( myregex, line)            

                if fullheader_match:
                    fullheader = fullheader_match.group(2)

                    # Replace "." by "_" only for header
                    fullheader=re.sub("\.",'_',fullheader)

                    header_match = re.match( r'([a-zA-Z\-\/\_0-9]+,)([a-zA-Z\-\/\_0-9\,]*)', fullheader)    

                    if header_match:
                        header = header_match.group(2)

                        # Write header
                        buffer.write('ZZZZ' + ',' + header + '\n'),


                # Extract timestamp

                # Nmon V9 and prior do not have date in ZZZZ
                # If unavailable, we'll use the global date (AAA,date)
                ZZZZ_DATE = '-1'
                ZZZZ_TIME = '-1'                

                # For Nmon V10 and more             

                timestamp_match = re.match( r'^ZZZZ\,(.+)\,(.+)\,(.+)\n', line)
                if timestamp_match:
                    ZZZZ_TIME = timestamp_match.group(2)
                    ZZZZ_DATE = timestamp_match.group(3)            

                    # Convert month names to numbers
                    month_to_numbers = {'JAN': '01', 'FEB': '02', 'MAR': '03', 'APR': '04', 'MAY': '05', 'JUN': '06', 'JUL': '07', 'AUG': '08', 'SEP': '09', 'OCT': '10', 'NOV': '11', 'DEC': '12'}         

                    for k, v in month_to_numbers.items():
                        ZZZZ_DATE = ZZZZ_DATE.replace(k, v)

                    ZZZZ_timestamp = ZZZZ_DATE + ' ' + ZZZZ_TIME

                # For Nmon V9 and less                  

                if ZZZZ_DATE == '-1':
                    ZZZZ_DATE = DATE
                    timestamp_match = re.match( r'^ZZZZ\,(.+)\,(.+)\n', line)
                    if timestamp_match:
                        ZZZZ_TIME = timestamp_match.group(2)                    

                        # Convert month names to numbers
                        month_to_numbers = {'JAN': '01', 'FEB': '02', 'MAR': '03', 'APR': '04', 'MAY': '05', 'JUN': '06', 'JUL': '07', 'AUG': '08', 'SEP': '09', 'OCT': '10', 'NOV': '11', 'DEC': '12'}         

                        for k, v in month_to_numbers.items():
                            ZZZZ_DATE = ZZZZ_DATE.replace(k, v)

                        ZZZZ_timestamp = ZZZZ_DATE + ' ' + ZZZZ_TIME

                # Extract Data
                myregex = r'^' + section + '\,(T\d+)\,(.+)\n'
                perfdata_match = re.match( myregex, line)
                if perfdata_match:
                    perfdata = perfdata_match.group(2)

                    # Write perf data
                    buffer.write(ZZZZ_timestamp + ',' + perfdata + '\n'),


        # Open final for writing
        with open(currsection_output, "w") as currsection:

            # Rewind temp
            buffer.seek(0)

            writer = csv.writer(currsection)
            writer.writerow(['type', 'serialnum', 'hostname', 'ZZZZ', 'device', 'value'])           

            # increment
            count += 1

            for d in csv.DictReader(buffer):
                ZZZZ = d.pop('ZZZZ')
                for device, value in sorted(d.items()):

                    # increment
                    count += 1

                    row = [section, SN, HOSTNAME, ZZZZ, device, value]
                    writer.writerow(row)            

            # End for

        # Show number of lines extracted
        result = section + " section: Wrote" + " " + str(count) + " lines"
        print (result)
        ref.write(result + "\n")

        # Discard memory buffer 
        buffer.close()  

    # End for

Python - 仅在匹配数据存在时输入循环

1 个答案: