我有一个包含以下内容的文件:
A B C D
1 2 3 4 5
2 2 4
3 1 3 4
请注意,第2行的4后面紧跟新行。
我想制作一个看起来像这样的字典
['A']['1'] = 2, d['B']['1'] = 3, ..., d['D']['1'] = 5, d['B']['2'] = 2, etc
空格不应出现在字典中。
在python中执行此操作的最佳方法是什么?
答案 0 :(得分:3)
数据都是单个数字吗?那么它与列标题对齐?在这种情况下,您可以这样做:
it = iter(datafile)
cols = list(next(it)[2::2])
d = {}
for row in it:
for col, val in zip(cols, row[2::2]):
if val != ' ':
d.setdefault(col, {})[row[0]] = int(val)
根据最近添加的作者的数据和代码,上述代码显然是不够的。如果文档的格式总是为12对6个月的31对数据,我们可以通过多种方式处理它。这就是我写的。它不是最优雅的,可能不是那么高效,但是完成工作。这是您先按行索引,然后按列索引的原因之一。
def process(data):
import re
hre = re.compile(r' +([A-Z]+)'*6)
sre = re.compile(r' +([a-z]+) ([a-z]+)'*6)
dre = re.compile(r'(\d{1,2}) ' + r'(.{4}) (.{4}) {,4}'*6)
it = iter(data)
headers = None
result = {}
for line in it:
if not line: continue
if not headers:
# find the first header
hmatch = hre.match(line)
if hmatch:
subs = iter(sre.match(next(it)).groups())
headers = [h + next(subs)
for h in hmatch.groups()
for _ in range(2)]
count = 0
else:
# fill in the data
dmatch = dre.match(line)
row = dmatch.group(1)
for col, d in zip(headers, dmatch.groups()[1:]):
if d.strip():
result.setdefault(col, {})[row] = int(d)
count += 1
if count == 31:
headers = None
return result
data = """
TIMES OF SUNRISE AND SUNSET (for ideal horizon & meteorological conditions)
For the year 2012
Make corrections for daylight saving time where necessary.
------------------------------------------------------------------------------
JAN FEB MAR APR MAY JUN
rise set rise set rise set rise set rise set rise set
1 0513 1925 0541 1918 0606 1851 0628 1812 0648 1738 0708 1720
2 0514 1925 0541 1918 0606 1850 0628 1811 0649 1737 0709 1719
3 0515 1925 0542 1917 0607 1849 0629 1810 0649 1736 0709 1719
4 0515 1926 0543 1916 0608 1847 0630 1808 0650 1736 0710 1719
5 0516 1926 0544 1915 0609 1846 0630 1807 0651 1735 0710 1719
6 0517 1926 0545 1915 0609 1845 0631 1806 0651 1734 0711 1719
7 0518 1926 0546 1914 0610 1844 0632 1805 0652 1733 0711 1719
8 0519 1926 0547 1913 0611 1843 0632 1803 0653 1732 0712 1719
9 0519 1926 0548 1912 0612 1841 0633 1802 0653 1731 0712 1718
10 0520 1926 0549 1911 0612 1840 0634 1801 0654 1731 0712 1718
11 0521 1926 0550 1911 0613 1839 0634 1800 0655 1730 0713 1718
12 0522 1926 0551 1910 0614 1838 0635 1759 0655 1729 0713 1718
13 0523 1926 0551 1909 0615 1836 0636 1757 0656 1729 0714 1719
14 0524 1926 0552 1908 0615 1835 0636 1756 0657 1728 0714 1719
15 0525 1925 0553 1907 0616 1834 0637 1755 0657 1727 0714 1719
16 0526 1925 0554 1906 0617 1832 0638 1754 0658 1727 0715 1719
17 0527 1925 0555 1905 0617 1831 0638 1753 0659 1726 0715 1719
18 0527 1925 0556 1904 0618 1830 0639 1752 0659 1725 0715 1719
19 0528 1924 0557 1903 0619 1829 0640 1751 0700 1725 0716 1719
20 0529 1924 0558 1902 0619 1827 0640 1749 0701 1724 0716 1719
21 0530 1924 0558 1901 0620 1826 0641 1748 0701 1724 0716 1720
22 0531 1923 0559 1900 0621 1825 0642 1747 0702 1723 0716 1720
23 0532 1923 0600 1859 0621 1824 0642 1746 0703 1723 0716 1720
24 0533 1923 0601 1858 0622 1822 0643 1745 0703 1722 0717 1720
25 0534 1922 0602 1857 0623 1821 0644 1744 0704 1722 0717 1721
26 0535 1922 0602 1855 0624 1820 0644 1743 0705 1722 0717 1721
27 0536 1921 0603 1854 0624 1818 0645 1742 0705 1721 0717 1721
28 0537 1921 0604 1853 0625 1817 0646 1741 0706 1721 0717 1722
29 0538 1920 0605 1852 0626 1816 0646 1740 0706 1720 0717 1722
30 0539 1920 0626 1815 0647 1739 0707 1720 0717 1722
31 0540 1919 0627 1813 0707 1720
JUL AUG SEP OCT NOV DEC
rise set rise set rise set rise set rise set rise set
1 0717 1723 0705 1740 0632 1759 0553 1818 0518 1841 0503 1907
2 0717 1723 0704 1741 0631 1800 0552 1819 0517 1842 0503 1908
3 0717 1724 0703 1741 0630 1801 0551 1819 0517 1843 0503 1909
4 0717 1724 0702 1742 0629 1801 0550 1820 0516 1843 0503 1910
5 0717 1724 0701 1743 0627 1802 0548 1821 0515 1844 0503 1911
6 0717 1725 0700 1743 0626 1802 0547 1821 0514 1845 0503 1911
7 0716 1725 0700 1744 0625 1803 0546 1822 0513 1846 0503 1912
8 0716 1726 0659 1745 0624 1804 0545 1823 0513 1847 0503 1913
9 0716 1726 0658 1745 0622 1804 0543 1823 0512 1848 0503 1914
10 0716 1727 0657 1746 0621 1805 0542 1824 0511 1849 0503 1914
11 0716 1727 0656 1746 0620 1805 0541 1825 0511 1850 0503 1915
12 0715 1728 0655 1747 0618 1806 0540 1825 0510 1850 0504 1916
13 0715 1729 0654 1748 0617 1807 0538 1826 0509 1851 0504 1916
14 0715 1729 0653 1748 0616 1807 0537 1827 0509 1852 0504 1917
15 0714 1730 0652 1749 0614 1808 0536 1827 0508 1853 0505 1918
16 0714 1730 0651 1750 0613 1809 0535 1828 0508 1854 0505 1918
17 0713 1731 0650 1750 0612 1809 0534 1829 0507 1855 0505 1919
18 0713 1731 0649 1751 0610 1810 0533 1830 0507 1856 0506 1920
19 0713 1732 0648 1751 0609 1810 0531 1830 0506 1857 0506 1920
20 0712 1733 0647 1752 0608 1811 0530 1831 0506 1858 0507 1921
21 0712 1733 0645 1753 0607 1812 0529 1832 0505 1859 0507 1921
22 0711 1734 0644 1753 0605 1812 0528 1833 0505 1859 0508 1922
23 0711 1734 0643 1754 0604 1813 0527 1834 0505 1900 0508 1922
24 0710 1735 0642 1755 0603 1813 0526 1834 0504 1901 0509 1923
25 0709 1736 0641 1755 0601 1814 0525 1835 0504 1902 0509 1923
26 0709 1736 0640 1756 0600 1815 0524 1836 0504 1903 0510 1923
27 0708 1737 0638 1756 0559 1815 0523 1837 0503 1904 0510 1924
28 0707 1738 0637 1757 0557 1816 0522 1838 0503 1905 0511 1924
29 0707 1738 0636 1758 0556 1817 0521 1838 0503 1906 0512 1924
30 0706 1739 0635 1758 0555 1817 0520 1839 0503 1906 0512 1925
31 0705 1739 0634 1759 0519 1840 0513 1925
""".split('\n')
>>> d = process(data)
>>> d['DECrise']['8']
503
>>> d
{'AUGset': {'24': 1755, '25': 1755, '26': 1756, '27': 1756, '20': 1752...
答案 1 :(得分:2)
为了娱乐和兴趣,我提出了一个完全不同的答案;
import datetime
import math
import ephem # PyEphem module
class SunTimes(object):
"""Helper class for finding sun rise/set times
@param date: observation date, one of
string, "yyyy[/mm[/dd[ hh[:mm[:ss]]]]]"
(Unspecified pieces are assumed to be 0)
datetime.date
@param lat: latitude, one of
string, "d[:mm[:ss]]" angle measured in degrees, minutes, seconds
(Unspecified pieces are assumed to be 0)
epoch.Angle
numeric angle in degrees
@param lon: longitude, same types as lat
@fromCity: string, city name
If specified, overrides lat and lon
If city is not recognized, raises KeyError
"""
def __init__(self, *args, **kwargs):
super(SunTimes,self).__init__()
self.sun = ephem.Sun()
self.date = ephem.Date(0)
self._date = 0
self.viewer = ephem.Observer()
self._lat = ''
self._lon = ''
self._city = None
self.dirty = True # lazy updates
self._clean(*args, **kwargs)
def _clean(self, date=None, lat=None, lon=None, fromCity=None):
if date is not None and date != self._date:
self.date = ephem.Date(date)
self._date = date
self.dirty = True
if lat is not None and lat != self._lat:
self.viewer.lat = self.getAngle(lat)
self._lat = lat
self.viewer.name = None
self.city = None
self.dirty = True
if lon is not None and lon != self._lon:
self.viewer.long = self.getAngle(lon)
self._lon = lon
self.viewer.name = None
self.city = None
self.dirty = True
if fromCity is not None and fromCity != self._city:
self.viewer = ephem.city(fromCity)
self._city = fromCity
self._lat = self.viewer.lat
self._lon = self.viewer.long
self.dirty = True
if self.dirty:
self.viewer.date = self.date
self.sun.compute(self.viewer)
self.dirty = False
def getAngle(self, value):
if isinstance(value, ephem.Angle):
return value
elif isinstance(value, str):
return ephem.degrees(value)
else:
return ephem.degrees(math.radians(value))
def sunrise(self, *args, **kwargs):
self._clean(*args, **kwargs)
return self.sun.rise_time.datetime()
def sunset(self, *args, **kwargs):
self._clean(*args, **kwargs)
return self.sun.set_time.datetime()
这些表在澳大利亚珀斯的当地时间非常匹配。
sun = SunTimes(lat='-31.9273', lon='115.87925') # Perth
print sun.sunrise(date='2012/1/1')
>>> 2012-01-01 05:15:42.835679
print st.sunset()
>>> 2012-01-01 19:24:23.083130
时代并不完全相同;比较如下:
答案 2 :(得分:1)
将每一行读入一个字符串,将其解析为包含一些空元素的列表,如
(2,,2,4,)
然后将该列表转换为您的字典条目。在解析之前,您可能希望阅读字符串模块中的方法。
看起来像稀疏矩阵的作业问题。
答案 3 :(得分:1)
这是一种可能的解决方案,假设text
是输入文件的内容:
lines = text.rstrip().split('\n')
lines = [line + ' ' * (max(map(len, lines)) - len(line)) for line in lines]
# pads lines with spaces so that all of them have the same length
rows = tuple(line.replace(' ', ' ').split(' ') for line in lines)
columns = tuple(zip(*rows)) # transpose rows matrix
table = dict()
for i, column in enumerate(columns):
if i > 0: # skip first column
table[rows[0][i]] = dict()
for j, cell in enumerate(column):
if j > 0 and cell.isdigit(): # skip header and blanks
table[rows[0][i]][columns[0][j]] = int(cell)
print table # prints the resulting dict
答案 4 :(得分:1)
这假定所有数据项由单个空格分隔,并且“空白”项目由单个空格组成。
from collections import defaultdict
import re
testData = """
A B C D
1 2 3 4 5
2 2 4
3 1 3 4
"""
def strToArray(s):
item = re.compile(r'(\s|\S+)(?:\s|$)')
return [item.findall(ln) for ln in s.split('\n') if len(ln)]
def arrayToDict(array):
res = defaultdict(dict)
xIds = array.pop(0)[1:]
for row in array:
yId = row.pop(0)
for xId,item in zip(xIds,row):
if item.strip():
res[xId][yId] = int(item)
return res
def main():
data = arrayToDict(strToArray(testData))
print data
if __name__=="__main__":
main()
导致
{'A': {'1': 2, '3': 1}, 'C': {'1': 4, '3': 3, '2': 4}, 'B': {'1': 3, '2': 2}, 'D': {'1': 5, '3': 4}}
答案 5 :(得分:1)
以下是使用您以后的数据的解决方案:
data = """
TIMES OF SUNRISE AND SUNSET (for ideal horizon & meteorological conditions)
For the year 2012
Make corrections for daylight saving time where necessary.
------------------------------------------------------------------------------
JAN FEB MAR APR MAY JUN
rise set rise set rise set rise set rise set rise set
1 0513 1925 0541 1918 0606 1851 0628 1812 0648 1738 0708 1720
2 0514 1925 0541 1918 0606 1850 0628 1811 0649 1737 0709 1719
3 0515 1925 0542 1917 0607 1849 0629 1810 0649 1736 0709 1719
4 0515 1926 0543 1916 0608 1847 0630 1808 0650 1736 0710 1719
5 0516 1926 0544 1915 0609 1846 0630 1807 0651 1735 0710 1719
6 0517 1926 0545 1915 0609 1845 0631 1806 0651 1734 0711 1719
7 0518 1926 0546 1914 0610 1844 0632 1805 0652 1733 0711 1719
8 0519 1926 0547 1913 0611 1843 0632 1803 0653 1732 0712 1719
9 0519 1926 0548 1912 0612 1841 0633 1802 0653 1731 0712 1718
10 0520 1926 0549 1911 0612 1840 0634 1801 0654 1731 0712 1718
11 0521 1926 0550 1911 0613 1839 0634 1800 0655 1730 0713 1718
12 0522 1926 0551 1910 0614 1838 0635 1759 0655 1729 0713 1718
13 0523 1926 0551 1909 0615 1836 0636 1757 0656 1729 0714 1719
14 0524 1926 0552 1908 0615 1835 0636 1756 0657 1728 0714 1719
15 0525 1925 0553 1907 0616 1834 0637 1755 0657 1727 0714 1719
16 0526 1925 0554 1906 0617 1832 0638 1754 0658 1727 0715 1719
17 0527 1925 0555 1905 0617 1831 0638 1753 0659 1726 0715 1719
18 0527 1925 0556 1904 0618 1830 0639 1752 0659 1725 0715 1719
19 0528 1924 0557 1903 0619 1829 0640 1751 0700 1725 0716 1719
20 0529 1924 0558 1902 0619 1827 0640 1749 0701 1724 0716 1719
21 0530 1924 0558 1901 0620 1826 0641 1748 0701 1724 0716 1720
22 0531 1923 0559 1900 0621 1825 0642 1747 0702 1723 0716 1720
23 0532 1923 0600 1859 0621 1824 0642 1746 0703 1723 0716 1720
24 0533 1923 0601 1858 0622 1822 0643 1745 0703 1722 0717 1720
25 0534 1922 0602 1857 0623 1821 0644 1744 0704 1722 0717 1721
26 0535 1922 0602 1855 0624 1820 0644 1743 0705 1722 0717 1721
27 0536 1921 0603 1854 0624 1818 0645 1742 0705 1721 0717 1721
28 0537 1921 0604 1853 0625 1817 0646 1741 0706 1721 0717 1722
29 0538 1920 0605 1852 0626 1816 0646 1740 0706 1720 0717 1722
30 0539 1920 0626 1815 0647 1739 0707 1720 0717 1722
31 0540 1919 0627 1813 0707 1720
JUL AUG SEP OCT NOV DEC
rise set rise set rise set rise set rise set rise set
1 0717 1723 0705 1740 0632 1759 0553 1818 0518 1841 0503 1907
2 0717 1723 0704 1741 0631 1800 0552 1819 0517 1842 0503 1908
3 0717 1724 0703 1741 0630 1801 0551 1819 0517 1843 0503 1909
4 0717 1724 0702 1742 0629 1801 0550 1820 0516 1843 0503 1910
5 0717 1724 0701 1743 0627 1802 0548 1821 0515 1844 0503 1911
6 0717 1725 0700 1743 0626 1802 0547 1821 0514 1845 0503 1911
7 0716 1725 0700 1744 0625 1803 0546 1822 0513 1846 0503 1912
8 0716 1726 0659 1745 0624 1804 0545 1823 0513 1847 0503 1913
9 0716 1726 0658 1745 0622 1804 0543 1823 0512 1848 0503 1914
10 0716 1727 0657 1746 0621 1805 0542 1824 0511 1849 0503 1914
11 0716 1727 0656 1746 0620 1805 0541 1825 0511 1850 0503 1915
12 0715 1728 0655 1747 0618 1806 0540 1825 0510 1850 0504 1916
13 0715 1729 0654 1748 0617 1807 0538 1826 0509 1851 0504 1916
14 0715 1729 0653 1748 0616 1807 0537 1827 0509 1852 0504 1917
15 0714 1730 0652 1749 0614 1808 0536 1827 0508 1853 0505 1918
16 0714 1730 0651 1750 0613 1809 0535 1828 0508 1854 0505 1918
17 0713 1731 0650 1750 0612 1809 0534 1829 0507 1855 0505 1919
18 0713 1731 0649 1751 0610 1810 0533 1830 0507 1856 0506 1920
19 0713 1732 0648 1751 0609 1810 0531 1830 0506 1857 0506 1920
20 0712 1733 0647 1752 0608 1811 0530 1831 0506 1858 0507 1921
21 0712 1733 0645 1753 0607 1812 0529 1832 0505 1859 0507 1921
22 0711 1734 0644 1753 0605 1812 0528 1833 0505 1859 0508 1922
23 0711 1734 0643 1754 0604 1813 0527 1834 0505 1900 0508 1922
24 0710 1735 0642 1755 0603 1813 0526 1834 0504 1901 0509 1923
25 0709 1736 0641 1755 0601 1814 0525 1835 0504 1902 0509 1923
26 0709 1736 0640 1756 0600 1815 0524 1836 0504 1903 0510 1923
27 0708 1737 0638 1756 0559 1815 0523 1837 0503 1904 0510 1924
28 0707 1738 0637 1757 0557 1816 0522 1838 0503 1905 0511 1924
29 0707 1738 0636 1758 0556 1817 0521 1838 0503 1906 0512 1924
30 0706 1739 0635 1758 0555 1817 0520 1839 0503 1906 0512 1925
31 0705 1739 0634 1759 0519 1840 0513 1925
"""
import re
import itertools
parsed = re.findall(r'''(?xm) # verbose, multiline
^ # start of line
(\d+) # the date
\s{2} # 2 spaces
(?:(\d+)\s(\d+)\s{4}|\s{13}) # rise/set time or 13 spaces
(?:(\d+)\s(\d+)\s{4}|\s{13}) # rise/set time or 13 spaces
(?:(\d+)\s(\d+)\s{4}|\s{13}) # rise/set time or 13 spaces
(?:(\d+)\s(\d+)\s{4}|\s{13}) # rise/set time or 13 spaces
(?:(\d+)\s(\d+)\s{4}|\s{13}) # rise/set time or 13 spaces
(?:(\d+)\s(\d+)|\s{9})? # rise/set time or 9 spaces (optional)
$ # end of line
''',data)
# transpose, throw out date line and create an iterator
# that will walk the original table column by column.
parsed = zip(*parsed)[1:]
data_gen = itertools.chain(*parsed)
sun = {}
# Date changes fastest, followed by 6 month step, then rise/set, then first 6 months.
for m in range(1,7):
for t in ['rise','set']:
for s in [0,6]:
for d in range(1,32):
data = next(data_gen)
# handle blanks
if data:
sun[m+s,d,t] = data
if __name__ == '__main__':
print sun[11,24,'rise']
答案 6 :(得分:0)
我建议使用 csv 模块。
空白的存在会产生一些难题。所以我创建了一个包含
的文件 A B C D
1 2 3 4 5
2 8 2 4 10
3 1 88 3 4
我编写的代码大致按照您的意愿处理这些内容:
f = open('gogo.txt','rb')
print f.read()
f.seek(0,0)
import csv
dodo = csv.reader(f, delimiter = ' ')
headers = dodo.next()[-4:]
print 'headers==',headers
print
d = {}
for k in headers:
d[k] = {}
print d
print
for row in dodo:
print row[0],row[1:]
z = zip(headers,row[1:])
print "z==",z
for x,y in zip(headers,row[-4:]):
print x,y
d[x][row[0]] = y
print d
print '-----------------------------------'
print d
结果
A B C D
1 2 3 4 5
2 8 2 4 10
3 1 88 3 4
headers== ['A', 'B', 'C', 'D']
{'A': {}, 'C': {}, 'B': {}, 'D': {}}
1 ['2', '3', '4', '5']
z== [('A', '2'), ('B', '3'), ('C', '4'), ('D', '5')]
A 2
B 3
C 4
D 5
{'A': {'1': '2'}, 'C': {'1': '4'}, 'B': {'1': '3'}, 'D': {'1': '5'}}
-----------------------------------
2 ['8', '2', '4', '10']
z== [('A', '8'), ('B', '2'), ('C', '4'), ('D', '10')]
A 8
B 2
C 4
D 10
{'A': {'1': '2', '2': '8'}, 'C': {'1': '4', '2': '4'}, 'B': {'1': '3', '2': '2'}, 'D': {'1': '5', '2': '10'}}
-----------------------------------
3 ['1', '88', '3', '4']
z== [('A', '1'), ('B', '88'), ('C', '3'), ('D', '4')]
A 1
B 88
C 3
D 4
{'A': {'1': '2', '3': '1', '2': '8'}, 'C': {'1': '4', '3': '3', '2': '4'}, 'B': {'1': '3', '3': '88', '2': '2'}, 'D': {'1': '5', '3': '4', '2': '10'}}
-----------------------------------
{'A': {'1': '2', '3': '1', '2': '8'}, 'C': {'1': '4', '3': '3', '2': '4'}, 'B': {'1': '3', '3': '88', '2': '2'}, 'D': {'1': '5', '3': '4', '2': '10'}}
由于有人说它是一个家庭作业,所以你必须搜索如何改进这些代码以使其能够处理包含空白的行。这将是一件好事。
答案 7 :(得分:0)
草率的代码,但我相信这符合你的要求
jcomeau@intrepid:/tmp$ cat test.dat test.py; ./test.py
A B C D
1 2 3 4 5
2 2 4
3 1 3 4
#!/usr/bin/python
import re
input = open('test.dat')
data = input.readlines()
input.close()
pattern = '(\S+|\s?)\s'
parsed = [map(str.strip, re.compile(pattern).findall(line)) for line in data]
columns = parsed.pop(0)[1:]
rows = [r.pop(0) for r in parsed]
d = {}
for c in columns:
if not d.has_key(c):
d[c] = {}
for r in rows:
try:
d[c][r] = int(parsed[rows.index(r)][columns.index(c)])
except:
pass
print d, d['A']['1'], d['B']['1'], d['D']['1'], d['B']['2']
{'A': {'1': 2, '3': 1}, 'C': {'1': 4, '3': 3, '2': 4}, 'B': {'1': 3, '2': 2}, 'D': {'1': 5, '3': 4}} 2 3 5 2
答案 8 :(得分:0)
Hugh Bothwell和jcomeau_ictx,有趣的想法,但不够通用。不能使用真实数据,虽然我确信你可以找到一种方法来使用正则表达式来使它工作。
scoffey,谢谢。我已经用你的想法将线条填充到相同的长度。 eyquem,你一定是在做梦。我从来没有说过任何关于家庭作业的事情。以下是我现在的真实数据代码。
l = """
TIMES OF SUNRISE AND SUNSET (for ideal horizon & meteorological conditions)
For the year 2012
Make corrections for daylight saving time where necessary.
------------------------------------------------------------------------------
JAN FEB MAR APR MAY JUN
rise set rise set rise set rise set rise set rise set
1 0513 1925 0541 1918 0606 1851 0628 1812 0648 1738 0708 1720
2 0514 1925 0541 1918 0606 1850 0628 1811 0649 1737 0709 1719
3 0515 1925 0542 1917 0607 1849 0629 1810 0649 1736 0709 1719
4 0515 1926 0543 1916 0608 1847 0630 1808 0650 1736 0710 1719
5 0516 1926 0544 1915 0609 1846 0630 1807 0651 1735 0710 1719
6 0517 1926 0545 1915 0609 1845 0631 1806 0651 1734 0711 1719
7 0518 1926 0546 1914 0610 1844 0632 1805 0652 1733 0711 1719
8 0519 1926 0547 1913 0611 1843 0632 1803 0653 1732 0712 1719
9 0519 1926 0548 1912 0612 1841 0633 1802 0653 1731 0712 1718
10 0520 1926 0549 1911 0612 1840 0634 1801 0654 1731 0712 1718
11 0521 1926 0550 1911 0613 1839 0634 1800 0655 1730 0713 1718
12 0522 1926 0551 1910 0614 1838 0635 1759 0655 1729 0713 1718
13 0523 1926 0551 1909 0615 1836 0636 1757 0656 1729 0714 1719
14 0524 1926 0552 1908 0615 1835 0636 1756 0657 1728 0714 1719
15 0525 1925 0553 1907 0616 1834 0637 1755 0657 1727 0714 1719
16 0526 1925 0554 1906 0617 1832 0638 1754 0658 1727 0715 1719
17 0527 1925 0555 1905 0617 1831 0638 1753 0659 1726 0715 1719
18 0527 1925 0556 1904 0618 1830 0639 1752 0659 1725 0715 1719
19 0528 1924 0557 1903 0619 1829 0640 1751 0700 1725 0716 1719
20 0529 1924 0558 1902 0619 1827 0640 1749 0701 1724 0716 1719
21 0530 1924 0558 1901 0620 1826 0641 1748 0701 1724 0716 1720
22 0531 1923 0559 1900 0621 1825 0642 1747 0702 1723 0716 1720
23 0532 1923 0600 1859 0621 1824 0642 1746 0703 1723 0716 1720
24 0533 1923 0601 1858 0622 1822 0643 1745 0703 1722 0717 1720
25 0534 1922 0602 1857 0623 1821 0644 1744 0704 1722 0717 1721
26 0535 1922 0602 1855 0624 1820 0644 1743 0705 1722 0717 1721
27 0536 1921 0603 1854 0624 1818 0645 1742 0705 1721 0717 1721
28 0537 1921 0604 1853 0625 1817 0646 1741 0706 1721 0717 1722
29 0538 1920 0605 1852 0626 1816 0646 1740 0706 1720 0717 1722
30 0539 1920 0626 1815 0647 1739 0707 1720 0717 1722
31 0540 1919 0627 1813 0707 1720
JUL AUG SEP OCT NOV DEC
rise set rise set rise set rise set rise set rise set
1 0717 1723 0705 1740 0632 1759 0553 1818 0518 1841 0503 1907
2 0717 1723 0704 1741 0631 1800 0552 1819 0517 1842 0503 1908
3 0717 1724 0703 1741 0630 1801 0551 1819 0517 1843 0503 1909
4 0717 1724 0702 1742 0629 1801 0550 1820 0516 1843 0503 1910
5 0717 1724 0701 1743 0627 1802 0548 1821 0515 1844 0503 1911
6 0717 1725 0700 1743 0626 1802 0547 1821 0514 1845 0503 1911
7 0716 1725 0700 1744 0625 1803 0546 1822 0513 1846 0503 1912
8 0716 1726 0659 1745 0624 1804 0545 1823 0513 1847 0503 1913
9 0716 1726 0658 1745 0622 1804 0543 1823 0512 1848 0503 1914
10 0716 1727 0657 1746 0621 1805 0542 1824 0511 1849 0503 1914
11 0716 1727 0656 1746 0620 1805 0541 1825 0511 1850 0503 1915
12 0715 1728 0655 1747 0618 1806 0540 1825 0510 1850 0504 1916
13 0715 1729 0654 1748 0617 1807 0538 1826 0509 1851 0504 1916
14 0715 1729 0653 1748 0616 1807 0537 1827 0509 1852 0504 1917
15 0714 1730 0652 1749 0614 1808 0536 1827 0508 1853 0505 1918
16 0714 1730 0651 1750 0613 1809 0535 1828 0508 1854 0505 1918
17 0713 1731 0650 1750 0612 1809 0534 1829 0507 1855 0505 1919
18 0713 1731 0649 1751 0610 1810 0533 1830 0507 1856 0506 1920
19 0713 1732 0648 1751 0609 1810 0531 1830 0506 1857 0506 1920
20 0712 1733 0647 1752 0608 1811 0530 1831 0506 1858 0507 1921
21 0712 1733 0645 1753 0607 1812 0529 1832 0505 1859 0507 1921
22 0711 1734 0644 1753 0605 1812 0528 1833 0505 1859 0508 1922
23 0711 1734 0643 1754 0604 1813 0527 1834 0505 1900 0508 1922
24 0710 1735 0642 1755 0603 1813 0526 1834 0504 1901 0509 1923
25 0709 1736 0641 1755 0601 1814 0525 1835 0504 1902 0509 1923
26 0709 1736 0640 1756 0600 1815 0524 1836 0504 1903 0510 1923
27 0708 1737 0638 1756 0559 1815 0523 1837 0503 1904 0510 1924
28 0707 1738 0637 1757 0557 1816 0522 1838 0503 1905 0511 1924
29 0707 1738 0636 1758 0556 1817 0521 1838 0503 1906 0512 1924
30 0706 1739 0635 1758 0555 1817 0520 1839 0503 1906 0512 1925
31 0705 1739 0634 1759 0519 1840 0513 1925
"""
l = l.split('\n')
l = filter(None, l)
f = map(lambda _: str.ljust(_[4:],78), l[7:38])
s = map(lambda _: str.ljust(_[4:],78), l[40:71])
l = map(lambda _: ''.join(_),zip(f,s))
a=[]
r = [13*i for i in xrange(13)]
for line in l:
d = [(line[r[i]:r[i+1]]) for i in xrange(12)]
a.append(d)
import numpy
a = numpy.transpose(a).tolist()
sun = {}
for m in xrange(12):
a[m] = filter(lambda _: not _.isspace(), a[m])
for d in xrange(len(a[m])):
date = "%4d-%02d-%d" % (2012, m+1, d+1)
sun[date] = {}
sun[date]['rise'], sun[date]['set'] = a[m][d].split()
print sun
答案 9 :(得分:0)
安德烈,
为了提高治疗开始的自动化程度,我会这样做:
l = filter(None,l.splitlines())
starts = [ i+1 for i,line in enumerate(l) if 'rise' in line and 'set' in line]
print 'starts==',starts
f = []
for line in l[starts[0]:]:
if any(c not in ' 0123456789' for c in line):
break
else:
f.append( line.partition(' ')[2] )
s = []
for line in l[starts[1]:]:
if any(c not in ' 0123456789' for c in line):
break
else:
s.append( line.partition(' ')[2] )
a= [ (x+' '+y).split(' ') for x,y in zip(f,s) ]
改进算法:
l = filter(None,l.splitlines())
starts = [ i+1 for i,line in enumerate(l) if 'rise' in line and 'set' in line]
print 'starts==',starts
nb, a = 0, []
while starts[1]+nb<len(l):
line0 = l[starts[0]+nb].partition(' ')[2]
line1 = l[starts[1]+nb].partition(' ')[2]
if any(c not in ' 0123456789' for c in line0) or any(c not in ' 0123456789' for c in line1):
break
else:
a.append((line0+' '+line1).split(' '))
nb += 1
我不能走得更远,因为我没有笨拙,我不知道你想要获得的最终数据;
但有一些明显的事情:
要实现这种过程,绝对有必要使用正则表达式:你将获得更高等级的处理。