如何将csv文件动态转换为Geo json格式

时间:2019-07-15 08:29:25

标签: python-3.x geojson azure-databricks

我必须将csv文件转换为geo json格式。我尝试了以下两种方法,一种是首先将csv转换为json,然后再次从json转换为geo json,为此,我将数据用单引号引起来。 第二种方法是我将csv转换为geojson,但是我得到列表索引超出范围错误

有人可以帮助解决此问题吗?

import csv
import json
vPath= "/ab/c/"
filepath="/test/TEST_GJSON/b/Location.csv"
with open("/dbfs"+filepath,'r',encoding="utf8", errors='ignore') as f:
    reader = csv.DictReader(f)
    print("reader: ",reader)
    rows = list(reader)
    print("rows: ",rows)

with open("/dbfs"+"/test/TEST_GJSON/b/LocationTest1.json", 'w') as f:
    json.dump(rows, f)
print("json file created")
#json to geo json

jsonfile="/dbfs"+"/test/TEST_GJSON/b/LocationTest1.json"
data = []
with open(jsonfile) as f:
    for line in f:
        data.append(json.loads(line))
print(data)
#data = json.load(open(jsonfile))
for d in data:
  print("first for loop data: ",data)
geojson = {
    "type": "FeatureCollection",
    "features": [
    {
        "type": "Feature",
        "geometry" : {
            "type": "Point",
            "coordinates": [d["LONG"], d["LAT"]],
            },
        "properties" : d,
     } for d in data[0] ]
}


output = open(out_file, 'w')
json.dump(geojson, output)

print("geojson: ",geojson)

如果我执行此代码,我将所有json数据都放在单引号和坐标中,就像在单引号中一样。至于geojson的格式,坐标值应为int。

第二种方法:

import csv
    filepath="/test/TEST_GJSON/b/Location.csv"
    # Read in raw data from csv
    rawData = csv.reader(open("/dbfs"+filepath, 'r',encoding="utf8", errors='ignore'))

    # Skip header
    next(rawData)


    print ('Starting write and loop')
    # the template. where data from the csv will be formatted to geojson
    template = \
       ''' \
       { 
            "type" : "Feature",
            "properties" : {"name":"%s","value":"%s",
                            "name":"%s","value":"%s",
                            "name":"%s","value":"%s",
                            "name":"%s","value":"%s",
                            "name":"%s","value":"%s",
                            "name":"%s","value":"%s",
                            "name":"%s","value":"%s",
                            "name":"%s","value":"%s",
                            "name":"%s","value":"%s",
                            "name":"%s","value":"%s",
                            "name":"%s","value":"%s",
                            "name":"%s","value":"%s",
                            "name":"%s","value":"%s",
                            "name":"%s","value":"%s",
                            "name":"%s","value":"%s",
                            "name":"%s","value":"%s",
                            "name":"%s","value":"%s",
                            "name":"%s","value":"%s",
                            "name":"%s","value":"%s"
                            },
            "geometry" : {
               "type" : "Point",
               "coordinates" : [%s,%s]
             }

           },
       '''

    template2 = \
       ''' \
       { 
            "type" : "Feature",
            "properties" : {"name":"%s","value":"%s"},
            "geometry" : {
               "type" : "Point",
               "coordinates" : [
               %s, 
               %s
               ]
             }

           }
       '''


    # the head of the geojson file
    output = \
       ''' \




    { 
       "type" : "FeatureCollection",
       "features" : [
       '''


    # loop through the csv 
    iter = 0

    maxIter = 400000;

    for row in rawData:
      print(len(row))

      print("after if =2: ",str(row))
      print ('Iteration = ' + str(iter) + '    ' + str(row))
      name=row[0]
      print("name ",name)
      print(row[1],row[2],row)

      lat = row[16]
      lon = row[17]
      if iter < maxIter: 
        output += template % (row[0],lat,lon)
        print("1st ouput: ",output)
      else:
        output += template2 % (row[0], row[2], row[3], row[4], row[5],row[6], row[7], row[8], row[9], row[10],row[11], row[12], row[13], row[14], row[15],rows[16],lat,long)
        break;


      iter += 1



    # the tail of the geojson file
    output += \
       ''' \
       ]

    }
       '''


    # opens an geoJSON file to write the output
    #outFileHandle = open(/dbfs+"/test/TEST_GJSON/b/LocationTest1.geojson", "w")
    #outFileHandle.write(output)
    #outFileHandle.close()
    print(output)
    print ('Finished!')

执行上面的编码后,我得到的值足以格式化字符串.csv中有19列。如何动态赋予以上列以及如何获取属性和值中的所有列名?

0 个答案:

没有答案