在熊猫中取消嵌套json列

时间:2017-10-20 17:42:25

标签: json python-2.7 pandas

我有下面的代码,它将数据从json文件读取到pandas数据帧。像“属性”这样的一些列仍然随之而来。我希望它们像“attributes.GoodForMeal.Dessert”这样的列,类似于r的flatten函数。

有人可以建议在python中这样做吗?

代码:

df_business = pd.read_json('dataset/business.json', lines=True)
print(df_business[1:3])

数据:

              address                                         attributes  \
1       2824 Milton Rd  {u'GoodForMeal': {u'dessert': False, u'latenig...   
2  337 Danforth Avenue  {u'BusinessParking': {u'garage': False, u'stre...   

              business_id                                         categories  \
1  mLwM-h2YhXl2NCgdS84_Bw  [Food, Soul Food, Convenience Stores, Restaura...   
2  v2WhjAB3PIBA8J8VxG3wEg                               [Food, Coffee & Tea]   

        city                                              hours  is_open  \
1  Charlotte  {u'Monday': u'10:00-22:00', u'Tuesday': u'10:0...        0   
2    Toronto  {u'Monday': u'10:00-19:00', u'Tuesday': u'10:0...        0   

    latitude  longitude                                name neighborhood  \
1  35.236870 -80.741976  South Florida Style Chicken & Ribs     Eastland   
2  43.677126 -79.353285                    The Tea Emporium    Riverdale   

  postal_code  review_count  stars state  
1       28215             4    4.5    NC  
2     M4K 1N7             7    4.5    ON  

更新

from pandas.io.json import json_normalize
print json_normalize('dataset/business.json')

错误:

---------------------------------------------------------------------------
AttributeError                            Traceback (most recent call last)
<ipython-input-12-bb0ce59acb26> in <module>()
      1 from pandas.io.json import json_normalize
----> 2 print json_normalize('dataset/business.json')

/Users/anaconda/lib/python2.7/site-packages/pandas/io/json.pyc in json_normalize(data, record_path, meta, meta_prefix, record_prefix)
    791 
    792     if record_path is None:
--> 793         if any([isinstance(x, dict) for x in compat.itervalues(data[0])]):
    794             # naive normalization, this is idempotent for flat records
    795             # and potentially will inflate the data considerably for

/Users/anaconda/lib/python2.7/site-packages/pandas/compat/__init__.pyc in itervalues(obj, **kw)
    169 
    170     def itervalues(obj, **kw):
--> 171         return obj.itervalues(**kw)
    172 
    173     next = lambda it : it.next()

AttributeError: 'str' object has no attribute 'itervalues'

UPDATE2:

Code:

import json; 
json_normalize(json.load('dataset/business.json'))

Error:

---------------------------------------------------------------------------
AttributeError                            Traceback (most recent call last)
<ipython-input-20-4fb4bf64efc6> in <module>()
      1 import json;
----> 2 json_normalize(json.load('dataset/business.json'))

/Users/anaconda/lib/python2.7/json/__init__.pyc in load(fp, encoding, cls, object_hook, parse_float, parse_int, parse_constant, object_pairs_hook, **kw)
    285 
    286     """
--> 287     return loads(fp.read(),
    288         encoding=encoding, cls=cls, object_hook=object_hook,
    289         parse_float=parse_float, parse_int=parse_int,

AttributeError: 'str' object has no attribute 'read'

UPDATE3:

Code:
with open('dataset/business.json') as f:
    df = json_normalize(json.load(f))

Error:

---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
<ipython-input-21-e3449614f320> in <module>()
      1 with open('dataset/business.json') as f:
----> 2     df = json_normalize(json.load(f))

/Users/anaconda/lib/python2.7/json/__init__.pyc in load(fp, encoding, cls, object_hook, parse_float, parse_int, parse_constant, object_pairs_hook, **kw)
    289         parse_float=parse_float, parse_int=parse_int,
    290         parse_constant=parse_constant, object_pairs_hook=object_pairs_hook,
--> 291         **kw)
    292 
    293 

/Users/anaconda/lib/python2.7/json/__init__.pyc in loads(s, encoding, cls, object_hook, parse_float, parse_int, parse_constant, object_pairs_hook, **kw)
    337             parse_int is None and parse_float is None and
    338             parse_constant is None and object_pairs_hook is None and not kw):
--> 339         return _default_decoder.decode(s)
    340     if cls is None:
    341         cls = JSONDecoder

/Users/anaconda/lib/python2.7/json/decoder.pyc in decode(self, s, _w)
    365         end = _w(s, end).end()
    366         if end != len(s):
--> 367             raise ValueError(errmsg("Extra data", s, end, len(s)))
    368         return obj
    369 

ValueError: Extra data: line 2 column 1 - line 156640 column 1 (char 731 - 132272455)

UPDATE4:

Code:

with open('dataset/business.json') as f:
    reviews = f.read().strip().split("\n")
reviews = [json.loads(review) for review in reviews]

reviews[1:5]

Sample Data:

[{u'address': u'2824 Milton Rd',
  u'attributes': {u'Ambience': {u'casual': False,
    u'classy': False,
    u'divey': False,
    u'hipster': False,
    u'intimate': False,
    u'romantic': False,
    u'touristy': False,
    u'trendy': False,
    u'upscale': False},
   u'BusinessAcceptsCreditCards': False,
   u'GoodForKids': True,
   u'GoodForMeal': {u'breakfast': False,
    u'brunch': False,
    u'dessert': False,
    u'dinner': False,
    u'latenight': False,
    u'lunch': False},
   u'HasTV': False,
   u'NoiseLevel': u'average',
   u'OutdoorSeating': False,
   u'RestaurantsAttire': u'casual',
   u'RestaurantsDelivery': True,
   u'RestaurantsGoodForGroups': True,
   u'RestaurantsPriceRange2': 2,
   u'RestaurantsReservations': False,
   u'RestaurantsTakeOut': True},
  u'business_id': u'mLwM-h2YhXl2NCgdS84_Bw',
  u'categories': [u'Food',
   u'Soul Food',
   u'Convenience Stores',
   u'Restaurants'],
  u'city': u'Charlotte',
  u'hours': {u'Friday': u'10:00-22:00',
   u'Monday': u'10:00-22:00',
   u'Saturday': u'10:00-22:00',
   u'Sunday': u'10:00-22:00',
   u'Thursday': u'10:00-22:00',
   u'Tuesday': u'10:00-22:00',
   u'Wednesday': u'10:00-22:00'},
  u'is_open': 0,
  u'latitude': 35.23687,
  u'longitude': -80.7419759,
  u'name': u'South Florida Style Chicken & Ribs',
  u'neighborhood': u'Eastland',
  u'postal_code': u'28215',
  u'review_count': 4,
  u'stars': 4.5,
  u'state': u'NC'},
 {u'address': u'337 Danforth Avenue',
  u'attributes': {u'BikeParking': True,
   u'BusinessAcceptsCreditCards': True,
   u'BusinessParking': {u'garage': False,
    u'lot': False,
    u'street': True,
    u'valet': False,
    u'validated': False},
   u'OutdoorSeating': False,
   u'RestaurantsPriceRange2': 2,
   u'WheelchairAccessible': True,
   u'WiFi': u'no'},
  u'business_id': u'v2WhjAB3PIBA8J8VxG3wEg',
  u'categories': [u'Food', u'Coffee & Tea'],
  u'city': u'Toronto',
  u'hours': {u'Friday': u'10:00-19:00',
   u'Monday': u'10:00-19:00',
   u'Saturday': u'10:00-18:00',
   u'Sunday': u'12:00-17:00',
   u'Thursday': u'10:00-19:00',
   u'Tuesday': u'10:00-19:00',
   u'Wednesday': u'10:00-19:00'},
  u'is_open': 0,
  u'latitude': 43.6771258,
  u'longitude': -79.3532848,
  u'name': u'The Tea Emporium',
  u'neighborhood': u'Riverdale',
  u'postal_code': u'M4K 1N7',
  u'review_count': 7,
  u'stars': 4.5,
  u'state': u'ON'},
 {u'address': u'7702 E Doubletree Ranch Rd, Ste 300',
  u'attributes': {},
  u'business_id': u'CVtCbSB1zUcUWg-9TNGTuQ',
  u'categories': [u'Professional Services', u'Matchmakers'],
  u'city': u'Scottsdale',
  u'hours': {u'Friday': u'9:00-17:00',
   u'Monday': u'9:00-17:00',
   u'Thursday': u'9:00-17:00',
   u'Tuesday': u'9:00-17:00',
   u'Wednesday': u'9:00-17:00'},
  u'is_open': 1,
  u'latitude': 33.5650816,
  u'longitude': -111.9164003,
  u'name': u'TRUmatch',
  u'neighborhood': u'',
  u'postal_code': u'85258',
  u'review_count': 3,
  u'stars': 3.0,
  u'state': u'AZ'},
 {u'address': u'4719 N 20Th St',
  u'attributes': {u'Alcohol': u'none',
   u'Ambience': {u'casual': False,
    u'classy': False,
    u'divey': False,
    u'hipster': False,
    u'intimate': False,
    u'romantic': False,
    u'touristy': False,
    u'trendy': False,
    u'upscale': False},
   u'BikeParking': True,
   u'BusinessAcceptsCreditCards': True,
   u'BusinessParking': {u'garage': False,
    u'lot': False,
    u'street': False,
    u'valet': False,
    u'validated': False},
   u'Caters': True,
   u'GoodForKids': True,
   u'GoodForMeal': {u'breakfast': False,
    u'brunch': False,
    u'dessert': False,
    u'dinner': False,
    u'latenight': False,
    u'lunch': False},
   u'HasTV': False,
   u'NoiseLevel': u'quiet',
   u'OutdoorSeating': False,
   u'RestaurantsAttire': u'casual',
   u'RestaurantsDelivery': False,
   u'RestaurantsGoodForGroups': True,
   u'RestaurantsPriceRange2': 1,
   u'RestaurantsReservations': False,
   u'RestaurantsTableService': False,
   u'RestaurantsTakeOut': True,
   u'WiFi': u'no'},
  u'business_id': u'duHFBe87uNSXImQmvBh87Q',
  u'categories': [u'Sandwiches', u'Restaurants'],
  u'city': u'Phoenix',
  u'hours': {},
  u'is_open': 0,
  u'latitude': 33.5059283,
  u'longitude': -112.0388474,
  u'name': u'Blimpie',
  u'neighborhood': u'',
  u'postal_code': u'85016',
  u'review_count': 10,
  u'stars': 4.5,
  u'state': u'AZ'}]

0 个答案:

没有答案