pd.read_json:逐行读取块

时间:2019-07-14 12:03:20

标签: python json pandas chunks

我想逐行读取一个不寻常的格式化json文件。 对于代码行data = json.loads(line),我得到了错误'json.decoder.JSONDecodeError:期望值:第1行第1列(字符0)'。

with open('new.json') as f:
    for line in f:
        data = json.loads(line)

open对我有用,但是使用这种读取方法,我无法像pd.read_json这样定义块。是否可以使用pd.read_json逐行读取文件?

Json文件:

{"type": "FeatureCollection", "features": [{ "type": "Feature", "properties": {}, "geometry": {"type":"Polygon","coordinates":[[[-4.29021744815101,48.5143412874521],[-4.29013497741707,48.5143440207212],[-4.28959262578981,48.5144873561254],[-4.28874229335262,48.5147093562795],[-4.28873164693566,48.5146475842289],[-4.28884366763834,48.514379992606],[-4.2890392275783,48.5140365423362],[-4.28907026022347,48.5139952803317],[-4.28909939147002,48.5139401772964],[-4.28804596071614,48.5118383551367],[-4.28917719530158,48.5113365873849],[-4.28930059484434,48.5113042340208],[-4.28991913630321,48.5124538984363],[-4.29061436808547,48.5137619114669],[-4.29061671073197,48.5137663296708],[-4.29087504162859,48.5141661685437],[-4.29023349610421,48.5143292061531],[-4.29022583960902,48.5143410061436],[-4.29021744815101,48.5143412874521]]]}}]}
{"type": "FeatureCollection", "features": [{ "type": "Feature", "properties": {}, "geometry": {"type":"Polygon","coordinates":[[[-3.7350625243932,48.1848785869773],[-3.7347528506632,48.1848730836583],[-3.73473421351873,48.1848710697161],[-3.73391708921485,48.1848317218237],[-3.73345443802032,48.184784387771],[-3.73339223288227,48.1848012641021],[-3.73338932024451,48.1848024601119],[-3.73337889985779,48.1848054579735],[-3.73334451211194,48.184815136003],[-3.73334261623194,48.1848268218925],[-3.73332592369853,48.1849297160121],[-3.73331811715827,48.1849766168037],[-3.73325267814187,48.1851798121491],[-3.73318862541147,48.1853781147334],[-3.7331832755441,48.185385600037],[-3.73311237834389,48.1854516491189],[-3.73307132176516,48.1854895592675],[-3.73304753817576,48.1854933480186],[-3.73298546837288,48.1855028381599],[-3.73291594479475,48.1855009239457],[-3.73288917169179,48.1855000249613],[-3.73285901997393,48.1854975219536],[-3.73280489417796,48.1854926685428],[-3.73260642930474,48.185473117715],[-3.73237029519309,48.1854494069719],[-3.73197642531927,48.1854151136977],[-3.73171484997032,48.1853920428782],[-3.7317145297998,48.1853929191018],[-3.73171237455302,48.1853986684612],[-3.73166709847852,48.1854974068619],[-3.73155317697147,48.1857455633357],[-3.73154649109863,48.1857582363287],[-3.73153543285345,48.1857787540888],[-3.73146986994153,48.1859029445122],[-3.73133097547452,48.1861650443673],[-3.731218747863,48.1861318393685],[-3.73110101446164,48.1860967634318],[-3.73086249049958,48.1860144137328],[-3.73036740078679,48.1858430405827],[-3.72987817474263,48.1857080225914],[-3.72983231027187,48.1856953176834],[-3.72983713797708,48.1856818388616],[-3.72985491541579,48.1856335741382],[-3.72985574749816,48.1856323706591],[-3.72994330019466,48.1855728767933],[-3.72990657950362,48.1855590173522],[-3.7299972579383,48.1850926252626],[-3.72891337358976,48.1846911143134],[-3.72855457344473,48.1845582044569],[-3.72857012615032,48.1845363917365],[-3.7288638068384,48.1841308675118],[-3.72895166222348,48.1840804148277],[-3.72959269863475,48.1837130041969],[-3.72965210706062,48.1836983396247],[-3.72968210791325,48.18370157447],[-3.72973229135383,48.1837074039926],[-3.72997346944721,48.1837486852237],[-3.730297600631,48.1837866621361],[-3.73049523054776,48.1837977300575],[-3.73058478215378,48.1838028449738],[-3.7305853440734,48.1838028038347],[-3.73118401428959,48.1837563211525],[-3.73120459118999,48.1837567166324],[-3.73147592453808,48.1837621622482],[-3.73173389090255,48.1837790365628],[-3.73204497102696,48.1837996207504],[-3.73251671430452,48.183829949838],[-3.73270215026346,48.1838359403728],[-3.73306971278977,48.1838483355168],[-3.73327425457046,48.1838372936271],[-3.73339288768415,48.1838311532918],[-3.73382465430422,48.1837840021173],[-3.73413982385002,48.1837431811857],[-3.73416969061106,48.1837393423969],[-3.73417009818185,48.1837392558798],[-3.73424416805291,48.1837142141484],[-3.73449136560662,48.1836307875219],[-3.73459641872941,48.1836243747852],[-3.73475528663114,48.1836149031024],[-3.73508547919139,48.1836515631883],[-3.73549210512159,48.1837204267043],[-3.73555351299474,48.1837441161653],[-3.73553588388847,48.1838586601083],[-3.73541918910584,48.1845724844406],[-3.73535762993358,48.1848143013051],[-3.73531295281074,48.1848827088381],[-3.7350625243932,48.1848785869773]],[[-3.72925211502078,48.1841936408449],[-3.72925202753542,48.1841936431335],[-3.72905894861286,48.1845421744863],[-3.72925211502078,48.1841936408449]],[[-3.73502365976846,48.1840632903695],[-3.73180418115953,48.1841360370525],[-3.7350236804856,48.1840637903968],[-3.73502365976846,48.1840632903695]]]}}]}
{"type": "FeatureCollection", "features": [{ "type": "Feature", "properties": {}, "geometry": {"type":"Polygon","coordinates":[[[-3.7350625243932,48.1848785869773],[-3.7347528506632,48.1848730836583],[-3.73473421351873,48.1848710697161],[-3.73391708921485,48.1848317218237],[-3.73345443802032,48.184784387771],[-3.73339223288227,48.1848012641021],[-3.73338932024451,48.1848024601119],[-3.73337889985779,48.1848054579735],[-3.73334451211194,48.184815136003],[-3.73334261623194,48.1848268218925],[-3.73332592369853,48.1849297160121],[-3.73331811715827,48.1849766168037],[-3.73325267814187,48.1851798121491],[-3.73318862541147,48.1853781147334],[-3.7331832755441,48.185385600037],[-3.73311237834389,48.1854516491189],[-3.73307132176516,48.1854895592675],[-3.73304753817576,48.1854933480186],[-3.73298546837288,48.1855028381599],[-3.73291594479475,48.1855009239457],[-3.73288917169179,48.1855000249613],[-3.73285901997393,48.1854975219536],[-3.73280489417796,48.1854926685428],[-3.73260642930474,48.185473117715],[-3.73237029519309,48.1854494069719],[-3.73197642531927,48.1854151136977],[-3.73171484997032,48.1853920428782],[-3.7317145297998,48.1853929191018],[-3.73171237455302,48.1853986684612],[-3.73166709847852,48.1854974068619],[-3.73155317697147,48.1857455633357],[-3.73154649109863,48.1857582363287],[-3.73153543285345,48.1857787540888],[-3.73146986994153,48.1859029445122],[-3.73133097547452,48.1861650443673],[-3.731218747863,48.1861318393685],[-3.73110101446164,48.1860967634318],[-3.73086249049958,48.1860144137328],[-3.73036740078679,48.1858430405827],[-3.72987817474263,48.1857080225914],[-3.72983231027187,48.1856953176834],[-3.72983713797708,48.1856818388616],[-3.72985491541579,48.1856335741382],[-3.72985574749816,48.1856323706591],[-3.72994330019466,48.1855728767933],[-3.72990657950362,48.1855590173522],[-3.7299972579383,48.1850926252626],[-3.72891337358976,48.1846911143134],[-3.72855457344473,48.1845582044569],[-3.72857012615032,48.1845363917365],[-3.7288638068384,48.1841308675118],[-3.72895166222348,48.1840804148277],[-3.72959269863475,48.1837130041969],[-3.72965210706062,48.1836983396247],[-3.72968210791325,48.18370157447],[-3.72973229135383,48.1837074039926],[-3.72997346944721,48.1837486852237],[-3.730297600631,48.1837866621361],[-3.73049523054776,48.1837977300575],[-3.73058478215378,48.1838028449738],[-3.7305853440734,48.1838028038347],[-3.73118401428959,48.1837563211525],[-3.73120459118999,48.1837567166324],[-3.73147592453808,48.1837621622482],[-3.73173389090255,48.1837790365628],[-3.73204497102696,48.1837996207504],[-3.73251671430452,48.183829949838],[-3.73270215026346,48.1838359403728],[-3.73306971278977,48.1838483355168],[-3.73327425457046,48.1838372936271],[-3.73339288768415,48.1838311532918],[-3.73382465430422,48.1837840021173],[-3.73413982385002,48.1837431811857],[-3.73416969061106,48.1837393423969],[-3.73417009818185,48.1837392558798],[-3.73424416805291,48.1837142141484],[-3.73449136560662,48.1836307875219],[-3.73459641872941,48.1836243747852],[-3.73475528663114,48.1836149031024],[-3.73508547919139,48.1836515631883],[-3.73549210512159,48.1837204267043],[-3.73555351299474,48.1837441161653],[-3.73553588388847,48.1838586601083],[-3.73541918910584,48.1845724844406],[-3.73535762993358,48.1848143013051],[-3.73531295281074,48.1848827088381],[-3.7350625243932,48.1848785869773]],[[-3.72925211502078,48.1841936408449],[-3.72925202753542,48.1841936431335],[-3.72905894861286,48.1845421744863],[-3.72925211502078,48.1841936408449]],[[-3.73502365976846,48.1840632903695],[-3.73180418115953,48.1841360370525],[-3.7350236804856,48.1840637903968],[-3.73502365976846,48.1840632903695]]]}}]}
{"type": "FeatureCollection", "features": [{ "type": "Feature", "properties": {}, "geometry": {"type":"Polygon","coordinates":[[[-3.72782441939485,48.0821914591482],[-3.7279626098774,48.0823288022383],[-3.72775177802298,48.0824733136652],[-3.72718506959888,48.0827424484138],[-3.72693728692629,48.0828980407726],[-3.72704867025754,48.0830761149953],[-3.72741448178102,48.0835645154002],[-3.72724029195195,48.0836676781935],[-3.72697413807627,48.0838591555791],[-3.72668584331384,48.0840570289267],[-3.72643366337412,48.0842261925285],[-3.72622836904061,48.0843793426547],[-3.72606386758044,48.0844969049885],[-3.72572650097526,48.0842788531181],[-3.72514670721312,48.0839041117153],[-3.72509451921264,48.0838703756697],[-3.7250783941179,48.0838518265083],[-3.72504905237142,48.0838135412588],[-3.72503878659429,48.0837690439831],[-3.72511706955043,48.0837112807628],[-3.72525103351516,48.0836103683799],[-3.72515213496531,48.0835107745828],[-3.72421533101317,48.0827929680033],[-3.72456428925743,48.0825745513877],[-3.72492168933841,48.0823442268499],[-3.72528426705779,48.0820650702317],[-3.72560579085558,48.081777694312],[-3.72561569134927,48.0817576562406],[-3.7256197404066,48.0817494498813],[-3.72563818363263,48.0817121517943],[-3.72560450062254,48.0816924668015],[-3.7257757790852,48.0815237513447],[-3.72620174782297,48.08120631087],[-3.72642658044231,48.081074584131],[-3.7267496577915,48.0809218546231],[-3.72676710477618,48.080914714909],[-3.72734718770272,48.0813026713323],[-3.72747245180956,48.0813856811749],[-3.72746754385628,48.0813951831035],[-3.72731279947579,48.0814767956915],[-3.72721885986459,48.0815436482258],[-3.7273775548969,48.0816729094619],[-3.72740538514552,48.0817112807134],[-3.72739473614373,48.081747753786],[-3.72734478816703,48.0817997971279],[-3.72740440502293,48.0818476204988],[-3.7277332432576,48.0821239202894],[-3.72773488440252,48.0821248469141],[-3.72782441939485,48.0821914591482]]]}}]}
{"type": "FeatureCollection", "features": [{ "type": "Feature", "properties": {}, "geometry": {"type":"Polygon","coordinates":[[[-3.72782441939485,48.0821914591482],[-3.7279626098774,48.0823288022383],[-3.72775177802298,48.0824733136652],[-3.72718506959888,48.0827424484138],[-3.72693728692629,48.0828980407726],[-3.72704867025754,48.0830761149953],[-3.72741448178102,48.0835645154002],[-3.72724029195195,48.0836676781935],[-3.72697413807627,48.0838591555791],[-3.72668584331384,48.0840570289267],[-3.72643366337412,48.0842261925285],[-3.72622836904061,48.0843793426547],[-3.72606386758044,48.0844969049885],[-3.72572650097526,48.0842788531181],[-3.72514670721312,48.0839041117153],[-3.72509451921264,48.0838703756697],[-3.7250783941179,48.0838518265083],[-3.72504905237142,48.0838135412588],[-3.72503878659429,48.0837690439831],[-3.72511706955043,48.0837112807628],[-3.72525103351516,48.0836103683799],[-3.72515213496531,48.0835107745828],[-3.72421533101317,48.0827929680033],[-3.72456428925743,48.0825745513877],[-3.72492168933841,48.0823442268499],[-3.72528426705779,48.0820650702317],[-3.72560579085558,48.081777694312],[-3.72561569134927,48.0817576562406],[-3.7256197404066,48.0817494498813],[-3.72563818363263,48.0817121517943],[-3.72560450062254,48.0816924668015],[-3.7257757790852,48.0815237513447],[-3.72620174782297,48.08120631087],[-3.72642658044231,48.081074584131],[-3.7267496577915,48.0809218546231],[-3.72676710477618,48.080914714909],[-3.72734718770272,48.0813026713323],[-3.72747245180956,48.0813856811749],[-3.72746754385628,48.0813951831035],[-3.72731279947579,48.0814767956915],[-3.72721885986459,48.0815436482258],[-3.7273775548969,48.0816729094619],[-3.72740538514552,48.0817112807134],[-3.72739473614373,48.081747753786],[-3.72734478816703,48.0817997971279],[-3.72740440502293,48.0818476204988],[-3.7277332432576,48.0821239202894],[-3.72773488440252,48.0821248469141],[-3.72782441939485,48.0821914591482]]]}}]}

chunks = pd.read_json('new.json', lines=True, chunksize = 2)

    neu = []
    for i, c in enumerate(chunks):
        for line in c:
            data = json.loads(line)
            for feature in data['features']:
                poly1 = shapely.geometry.asShape(feature['geometry'])
                neu.append(poly1)
                boundary = cascaded_union(neu)
                    # print(boundary.geom_type)

    geojson_out = geojson.Feature(geometry=boundary)
    myFeat = FeatureCollection([geojson_out])
    with open('final_{}.geojson'.format(i), 'w') as outfile:
        json.dump(myFeat, outfile)
    outfile.close()

谢谢!

0 个答案:

没有答案