"POLYGON ((12 13,22 23,16 17,22 24))",101,Something,100000
"POLYGON ((10 12,40 42,46 34,16 24,88 22,33 24,18 20 ))",102,another,200000
如何在csv文件中获得如下内容:
UID(如101,102等)表示每个多边形的唯一标识符。
UID#1,County,population,Point#1_Lat,Point#1_Long
UID#1,County,population,Point#2_Lat,Point#2_Long
UID#1,County,population,Point#3_Lat,Point#3_Long
UID#1,County,population,Point#n_Lat,Point#n_Long
UID#2,County,population,Point#1_Lat,Point#1_Long
UID#2,County,population,Point#2_Lat,Point#2_Long
UID#2,County,population,Point#3_Lat,Point#3_Long
UID#2,County,population,Point#n_Lat,Point#n_Long
答案 0 :(得分:1)
以下是使用pyparsing
的解决方案。如果这对您不起作用,请告诉我 - 想出一些仅使用标准库的内容(例如re
等)应该不会太难,但它肯定会更加丑陋。
import csv
from pyparsing import Group, Literal, OneOrMore, Optional, Word
from pyparsing import delimitedList
from pyparsing import alphas, nums
data = """
"POLYGON ((12 13,22 23,16 17,22 24))",101,Something,100000
"POLYGON ((10 12,40 42,46 34,16 24,88 22,33 24,18 20 ))",102,another,200000
"""
def parse_line(line):
latitude = Word(nums)
longitude = Word(nums)
point = Group(latitude + longitude)
point_sequence = delimitedList(point, delim=',')
name = Word("POLYGON").suppress()
paren_left = Literal("((").suppress()
paren_right = Literal("))").suppress()
quote = Literal('"').suppress()
polygon = Group(quote + name + paren_left + point_sequence + paren_right + quote)
uid = Word(nums)
county = Word(alphas)
population = Word(nums)
sep = Literal(",").suppress()
parser = polygon + sep + uid + sep + county + sep + population
result = parser.parseString(line)
return result
def parse_lines(data, outfile):
with open(outfile, 'w') as f:
writer = csv.writer(f, lineterminator='\n')
lines = data.split('\n')
for line in lines:
if not line:
continue
points, uid, county, population = parse_line(line)
for lat, long in points:
writer.writerow([uid, county, population, lat, long])
writer.writerow('')
parse_lines(data, r'd:\out.txt') # change the path to wherever you want output
结果:
101,Something,100000,12,13
101,Something,100000,22,23
101,Something,100000,16,17
101,Something,100000,22,24
102,another,200000,10,12
102,another,200000,40,42
102,another,200000,46,34
102,another,200000,16,24
102,another,200000,88,22
102,another,200000,33,24
102,another,200000,18,20
答案 1 :(得分:1)
谢谢你的解决方案。这是我第一次尝试Python。我尝试了你的建议。试着找出替代方案并取得好成绩。
几何,区类型,UID
“POLYGON(x1 y1,x2 y2,x3 y3,x4 y4)”,name1,abc,100
“POLYGON(x1 y1,x2 y2,x3 y3,x4 y4,x5 y5,x6 y6)”,name2,pqr,101
import csv
import re
import sys
l_InputFileName ='D:/Example1.txt' # make changes here..
l_OutputFileName ='D:/Example1_o.txt' # make changes here..
fo = open(l_OutputFileName, "a+")
with open( l_InputFileName, 'r') as csvfile:
csvR = csv.reader( csvfile, delimiter=',', quotechar='"')
#Get first row which has column names
header = csvR.next()
#Convert first row which is a list into a string
print ','.join(header[0:])
#Write the header to output csv file,index starts at 1 here(splitting point into lat and long)
fo.write('longitude,latitude,')
fo.write(','.join(header[1:])+'\n')
for row in csvR:
#Remove the string POLYGON and brackets from first column
coodlist=row[0].strip('POLYGON').strip('()').split(',')
#Get the number of columns
#print "\nNo of Columns="+str(len(row))
#Get remaining columns
strRemainingCols =','.join(row[1:])
#Print each lat,long in a seperate row..reamining columns will not change
#Seperate latitude and longitude using comma
for i in range(len(coodlist)):
print coodlist[i].replace(' ',',')+','+strRemainingCols
fo.write(coodlist[i].replace(' ',',')+','+strRemainingCols+'\n')
csvfile.close()
fo.close()