我必须将csv文件转换为geo json格式。我尝试了以下两种方法,一种是首先将csv转换为json,然后再次从json转换为geo json,为此,我将数据用单引号引起来。 第二种方法是我将csv转换为geojson,但是我得到列表索引超出范围错误
有人可以帮助解决此问题吗?
import csv
import json
vPath= "/ab/c/"
filepath="/test/TEST_GJSON/b/Location.csv"
with open("/dbfs"+filepath,'r',encoding="utf8", errors='ignore') as f:
reader = csv.DictReader(f)
print("reader: ",reader)
rows = list(reader)
print("rows: ",rows)
with open("/dbfs"+"/test/TEST_GJSON/b/LocationTest1.json", 'w') as f:
json.dump(rows, f)
print("json file created")
#json to geo json
jsonfile="/dbfs"+"/test/TEST_GJSON/b/LocationTest1.json"
data = []
with open(jsonfile) as f:
for line in f:
data.append(json.loads(line))
print(data)
#data = json.load(open(jsonfile))
for d in data:
print("first for loop data: ",data)
geojson = {
"type": "FeatureCollection",
"features": [
{
"type": "Feature",
"geometry" : {
"type": "Point",
"coordinates": [d["LONG"], d["LAT"]],
},
"properties" : d,
} for d in data[0] ]
}
output = open(out_file, 'w')
json.dump(geojson, output)
print("geojson: ",geojson)
如果我执行此代码,我将所有json数据都放在单引号和坐标中,就像在单引号中一样。至于geojson的格式,坐标值应为int。
第二种方法:
import csv
filepath="/test/TEST_GJSON/b/Location.csv"
# Read in raw data from csv
rawData = csv.reader(open("/dbfs"+filepath, 'r',encoding="utf8", errors='ignore'))
# Skip header
next(rawData)
print ('Starting write and loop')
# the template. where data from the csv will be formatted to geojson
template = \
''' \
{
"type" : "Feature",
"properties" : {"name":"%s","value":"%s",
"name":"%s","value":"%s",
"name":"%s","value":"%s",
"name":"%s","value":"%s",
"name":"%s","value":"%s",
"name":"%s","value":"%s",
"name":"%s","value":"%s",
"name":"%s","value":"%s",
"name":"%s","value":"%s",
"name":"%s","value":"%s",
"name":"%s","value":"%s",
"name":"%s","value":"%s",
"name":"%s","value":"%s",
"name":"%s","value":"%s",
"name":"%s","value":"%s",
"name":"%s","value":"%s",
"name":"%s","value":"%s",
"name":"%s","value":"%s",
"name":"%s","value":"%s"
},
"geometry" : {
"type" : "Point",
"coordinates" : [%s,%s]
}
},
'''
template2 = \
''' \
{
"type" : "Feature",
"properties" : {"name":"%s","value":"%s"},
"geometry" : {
"type" : "Point",
"coordinates" : [
%s,
%s
]
}
}
'''
# the head of the geojson file
output = \
''' \
{
"type" : "FeatureCollection",
"features" : [
'''
# loop through the csv
iter = 0
maxIter = 400000;
for row in rawData:
print(len(row))
print("after if =2: ",str(row))
print ('Iteration = ' + str(iter) + ' ' + str(row))
name=row[0]
print("name ",name)
print(row[1],row[2],row)
lat = row[16]
lon = row[17]
if iter < maxIter:
output += template % (row[0],lat,lon)
print("1st ouput: ",output)
else:
output += template2 % (row[0], row[2], row[3], row[4], row[5],row[6], row[7], row[8], row[9], row[10],row[11], row[12], row[13], row[14], row[15],rows[16],lat,long)
break;
iter += 1
# the tail of the geojson file
output += \
''' \
]
}
'''
# opens an geoJSON file to write the output
#outFileHandle = open(/dbfs+"/test/TEST_GJSON/b/LocationTest1.geojson", "w")
#outFileHandle.write(output)
#outFileHandle.close()
print(output)
print ('Finished!')
执行上面的编码后,我得到的值足以格式化字符串.csv中有19列。如何动态赋予以上列以及如何获取属性和值中的所有列名?