
时间:2015-09-12 13:59:15

标签: python-2.7


blahhh blaahhh blahhh
 some thing write this long 23.78, lat 45.45
      g.m.  occ/yr  r(event)   g.m. occ/yr   r(event)
      0.125  0.254   12.587    0.258 2.568   1.369
      0.785  0.365   10.258    0.897 2.987   9.365
something note write here blahh blahhh blahhh


long 23.78 lat 45.45 g.m. 0.125, 0.785 occ/yr 0.254, 0.365 r(event) 12.587,10.258 g.m 0.258, 0.897 occ/yr 2.568, 2.987 r(event) 1.369, 9.365 


file = open('geotechnic.txt').readlines()
i =0
while i < len(file):
    for line in file:
        wordList = re.sub("[^\w\./()]", " ",  line).split()
            print wordList[i]

1 个答案:

答案 0 :(得分:0)


<强> parsegeo.py

import re

data = '''blahhh blaahhh blahhh
 some thing write this long 23.78, lat 45.45
      g.m.  occ/yr  r(event)   g.m. occ/yr   r(event)
      0.125  0.254   12.587    0.258 2.568   1.369
      0.785  0.365   10.258    0.897 2.987   9.365
something note write here blahh blahhh blahhh'''

lines = data.split('\n')
matchobj = re.match('^.*(long \d+\.\d+),\s+(lat \d+\.\d+)', lines[1])
longval = matchobj.group(1)
latval = matchobj.group(2)

headers = lines[2].strip().split()
dataline1 = lines[3].strip().split()
dataline2 = lines[4].strip().split()

zippeddata = zip(dataline1, dataline2)

outputlist = [longval, latval]
for i in range(0, len(headers)):
    segment = '{header} {valtuple}'.format(header=headers[i], valtuple=', '.join(zippeddata[i]))

print " ".join(outputlist)


(parsegeo)macbook:parsegeo user$ python parsegeo.py
long 23.78 lat 45.45 g.m. 0.125, 0.785 occ/yr 0.254, 0.365 r(event) 12.587, 10.258 g.m. 0.258, 0.897 occ/yr 2.568, 2.987 r(event) 1.369, 9.365









我在下面提供了解析您在评论中链接的数据文件的解决方案。您没有指定要解析的数据块(zero attenuation variability数据或variability in atten数据)。所以我只显示zero attenuation variability数据。 variability in atten数据已标记化并添加到var_atten_data列表中。如果您想显示variability in atten数据,则必须自己列出zip()join()和字符串格式。我会把它作为练习留给你。


import re

with open('geotechnic.txt', 'r') as f:
    in_attenuation_block = skipped_first = skipped_second = parsed_header = False
    longval = latval = None
    zero_atten_headers = []
    var_atten_headers = []
    zero_atten_data = []
    var_atten_data = []
    for line in f:
        matchobj = re.match('^.*site at long\s+(\d+\.\d+),\s+lat\s+(\d+\.\d+)', line)
        if matchobj:
            longval = matchobj.group(1)
            latval = matchobj.group(2)
            in_attenuation_block = True
        if in_attenuation_block:
            if skipped_first:
                if skipped_second:
                    data_line = line.strip().split()
                    if len(data_line) > 5:
                        if 'g.m.' in data_line[0] and len(data_line) > 5:
                            zero_atten_headers = data_line[0:5]
                            var_atten_headers = data_line[5:]
                        elif re.match('^\d+\.\d+\s+\d+\.\d', line.strip()):
                        elif re.match('^total yearly events', line.strip()):
                            # Reached the end of data block, print out summary
                            zippeddata = zip(*zero_atten_data)
                            outputlist = ["long", longval, "lat", latval]
                            for i in range(0, len(zero_atten_headers)):
                                segment = '{header} {valtuple}'.format(header=zero_atten_headers[i], valtuple=', '.join(zippeddata[i]))
                            print " ".join(outputlist)
                            # Reset all of the flags, arrays, and vars for the next block of data
                            in_attenuation_block = skipped_first = skipped_second = parsed_header = False
                            longval = latval = None
                            zero_atten_headers = []
                            var_atten_headers = []
                            zero_atten_data = []
                            var_atten_data = []
                            print 'Unable to parse current line. Skipping to next line.  Current line: {}'.format(line)
                        print 'Unable to parse current line. Skipping to next line.  Current line: {}'.format(line)
                    skipped_second = True
                skipped_first = True


(parsegeo)macbook:parsegeo user$ python parsegeo.py
long 46.766 lat 32.305 g.m. 0.02, 0.04, 0.06, 0.08, 0.10, 0.12, 0.14, 0.16, 0.18, 0.20, 0.22, 0.24 occ/yr 0.15773, 0.00734, 0.00084, 0.00030, 0.00011, 0.00004, 0.00002, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000 exc/yr 0.00865, 0.00132, 0.00047, 0.00017, 0.00006, 0.00002, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000 r(events) 19.2, 126.4, 352.8, 974.5, 2574.4, 8231.0, 70366.1, 99999.9, 99999.9, 99999.9, 99999.9, 99999.9 r(yrs) 115.6, 759.7, 2120.4, 5856.8, 15472.2, 49469.3, 99999.9, 99999.9, 99999.9, 99999.9, 99999.9, 99999.9
long 46.884 lat 32.306 g.m. 0.02, 0.04, 0.06, 0.08, 0.10, 0.12, 0.14, 0.16, 0.18, 0.20, 0.22, 0.24, 0.26, 0.28, 0.30 occ/yr 0.15085, 0.01156, 0.00285, 0.00070, 0.00023, 0.00010, 0.00005, 0.00002, 0.00001, 0.00001, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000 exc/yr 0.01553, 0.00397, 0.00112, 0.00042, 0.00019, 0.00009, 0.00004, 0.00002, 0.00001, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000 r(events) 10.7, 41.9, 148.2, 394.3, 879.0, 1798.1, 4235.4, 8361.3, 25064.4, 99999.9, 99999.9, 99999.9, 99999.9, 99999.9, 99999.9 r(yrs) 64.4, 251.6, 890.6, 2369.5, 5283.2, 10806.6, 25455.0, 50252.4, 99999.9, 99999.9, 99999.9, 99999.9, 99999.9, 99999.9, 99999.9
long 46.765 lat 32.405 g.m. 0.02, 0.04, 0.06, 0.08, 0.10, 0.12, 0.14, 0.16, 0.18, 0.20, 0.22, 0.24, 0.26 occ/yr 0.15628, 0.00842, 0.00111, 0.00036, 0.00012, 0.00006, 0.00002, 0.00001, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000 exc/yr 0.01010, 0.00168, 0.00057, 0.00021, 0.00009, 0.00003, 0.00001, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000 r(events) 16.5, 98.8, 292.0, 800.9, 1930.1, 5871.5, 19010.9, 99999.9, 99999.9, 99999.9, 99999.9, 99999.9, 99999.9 r(yrs) 99.0, 593.8, 1755.0, 4813.5, 11599.9, 35288.4, 99999.9, 99999.9, 99999.9, 99999.9, 99999.9, 99999.9, 99999.9
long 46.883 lat 32.406 g.m. 0.02, 0.04, 0.06, 0.08, 0.10, 0.12, 0.14, 0.16, 0.18, 0.20, 0.22, 0.24, 0.26, 0.28, 0.30, 0.32, 0.34 occ/yr 0.14909, 0.01221, 0.00351, 0.00101, 0.00032, 0.00013, 0.00006, 0.00003, 0.00002, 0.00001, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000 exc/yr 0.01730, 0.00509, 0.00158, 0.00058, 0.00026, 0.00012, 0.00006, 0.00003, 0.00001, 0.00001, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000 r(events) 9.6, 32.7, 105.0, 287.4, 646.3, 1349.7, 2697.5, 5679.3, 11947.6, 31177.0, 99999.9, 99999.9, 99999.9, 99999.9, 99999.9, 99999.9, 99999.9 r(yrs) 57.8, 196.4, 631.2, 1727.5, 3884.1, 8111.6, 16212.1, 34133.4, 71806.2, 99999.9, 99999.9, 99999.9, 99999.9, 99999.9, 99999.9, 99999.9, 99999.9
long 47.700 lat 33.300 g.m. 0.02, 0.04, 0.06, 0.08, 0.10, 0.12, 0.14, 0.16, 0.18, 0.20, 0.22 occ/yr 0.15767, 0.00717, 0.00095, 0.00046, 0.00011, 0.00003, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000 exc/yr 0.00872, 0.00155, 0.00060, 0.00015, 0.00003, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000 r(events) 19.1, 107.4, 275.1, 1143.4, 5364.2, 99999.9, 99999.9, 99999.9, 99999.9, 99999.9, 99999.9 r(yrs) 114.7, 645.2, 1653.4, 6872.1, 32239.4, 99999.9, 99999.9, 99999.9, 99999.9, 99999.9, 99999.9