我有下面的代码,它将数据从json文件读取到pandas数据帧。像“属性”这样的一些列仍然随之而来。我希望它们像“attributes.GoodForMeal.Dessert”这样的列,类似于r的flatten函数。
有人可以建议在python中这样做吗?
代码:
df_business = pd.read_json('dataset/business.json', lines=True)
print(df_business[1:3])
数据:
address attributes \
1 2824 Milton Rd {u'GoodForMeal': {u'dessert': False, u'latenig...
2 337 Danforth Avenue {u'BusinessParking': {u'garage': False, u'stre...
business_id categories \
1 mLwM-h2YhXl2NCgdS84_Bw [Food, Soul Food, Convenience Stores, Restaura...
2 v2WhjAB3PIBA8J8VxG3wEg [Food, Coffee & Tea]
city hours is_open \
1 Charlotte {u'Monday': u'10:00-22:00', u'Tuesday': u'10:0... 0
2 Toronto {u'Monday': u'10:00-19:00', u'Tuesday': u'10:0... 0
latitude longitude name neighborhood \
1 35.236870 -80.741976 South Florida Style Chicken & Ribs Eastland
2 43.677126 -79.353285 The Tea Emporium Riverdale
postal_code review_count stars state
1 28215 4 4.5 NC
2 M4K 1N7 7 4.5 ON
更新
from pandas.io.json import json_normalize
print json_normalize('dataset/business.json')
错误:
---------------------------------------------------------------------------
AttributeError Traceback (most recent call last)
<ipython-input-12-bb0ce59acb26> in <module>()
1 from pandas.io.json import json_normalize
----> 2 print json_normalize('dataset/business.json')
/Users/anaconda/lib/python2.7/site-packages/pandas/io/json.pyc in json_normalize(data, record_path, meta, meta_prefix, record_prefix)
791
792 if record_path is None:
--> 793 if any([isinstance(x, dict) for x in compat.itervalues(data[0])]):
794 # naive normalization, this is idempotent for flat records
795 # and potentially will inflate the data considerably for
/Users/anaconda/lib/python2.7/site-packages/pandas/compat/__init__.pyc in itervalues(obj, **kw)
169
170 def itervalues(obj, **kw):
--> 171 return obj.itervalues(**kw)
172
173 next = lambda it : it.next()
AttributeError: 'str' object has no attribute 'itervalues'
UPDATE2:
Code:
import json;
json_normalize(json.load('dataset/business.json'))
Error:
---------------------------------------------------------------------------
AttributeError Traceback (most recent call last)
<ipython-input-20-4fb4bf64efc6> in <module>()
1 import json;
----> 2 json_normalize(json.load('dataset/business.json'))
/Users/anaconda/lib/python2.7/json/__init__.pyc in load(fp, encoding, cls, object_hook, parse_float, parse_int, parse_constant, object_pairs_hook, **kw)
285
286 """
--> 287 return loads(fp.read(),
288 encoding=encoding, cls=cls, object_hook=object_hook,
289 parse_float=parse_float, parse_int=parse_int,
AttributeError: 'str' object has no attribute 'read'
UPDATE3:
Code:
with open('dataset/business.json') as f:
df = json_normalize(json.load(f))
Error:
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
<ipython-input-21-e3449614f320> in <module>()
1 with open('dataset/business.json') as f:
----> 2 df = json_normalize(json.load(f))
/Users/anaconda/lib/python2.7/json/__init__.pyc in load(fp, encoding, cls, object_hook, parse_float, parse_int, parse_constant, object_pairs_hook, **kw)
289 parse_float=parse_float, parse_int=parse_int,
290 parse_constant=parse_constant, object_pairs_hook=object_pairs_hook,
--> 291 **kw)
292
293
/Users/anaconda/lib/python2.7/json/__init__.pyc in loads(s, encoding, cls, object_hook, parse_float, parse_int, parse_constant, object_pairs_hook, **kw)
337 parse_int is None and parse_float is None and
338 parse_constant is None and object_pairs_hook is None and not kw):
--> 339 return _default_decoder.decode(s)
340 if cls is None:
341 cls = JSONDecoder
/Users/anaconda/lib/python2.7/json/decoder.pyc in decode(self, s, _w)
365 end = _w(s, end).end()
366 if end != len(s):
--> 367 raise ValueError(errmsg("Extra data", s, end, len(s)))
368 return obj
369
ValueError: Extra data: line 2 column 1 - line 156640 column 1 (char 731 - 132272455)
UPDATE4:
Code:
with open('dataset/business.json') as f:
reviews = f.read().strip().split("\n")
reviews = [json.loads(review) for review in reviews]
reviews[1:5]
Sample Data:
[{u'address': u'2824 Milton Rd',
u'attributes': {u'Ambience': {u'casual': False,
u'classy': False,
u'divey': False,
u'hipster': False,
u'intimate': False,
u'romantic': False,
u'touristy': False,
u'trendy': False,
u'upscale': False},
u'BusinessAcceptsCreditCards': False,
u'GoodForKids': True,
u'GoodForMeal': {u'breakfast': False,
u'brunch': False,
u'dessert': False,
u'dinner': False,
u'latenight': False,
u'lunch': False},
u'HasTV': False,
u'NoiseLevel': u'average',
u'OutdoorSeating': False,
u'RestaurantsAttire': u'casual',
u'RestaurantsDelivery': True,
u'RestaurantsGoodForGroups': True,
u'RestaurantsPriceRange2': 2,
u'RestaurantsReservations': False,
u'RestaurantsTakeOut': True},
u'business_id': u'mLwM-h2YhXl2NCgdS84_Bw',
u'categories': [u'Food',
u'Soul Food',
u'Convenience Stores',
u'Restaurants'],
u'city': u'Charlotte',
u'hours': {u'Friday': u'10:00-22:00',
u'Monday': u'10:00-22:00',
u'Saturday': u'10:00-22:00',
u'Sunday': u'10:00-22:00',
u'Thursday': u'10:00-22:00',
u'Tuesday': u'10:00-22:00',
u'Wednesday': u'10:00-22:00'},
u'is_open': 0,
u'latitude': 35.23687,
u'longitude': -80.7419759,
u'name': u'South Florida Style Chicken & Ribs',
u'neighborhood': u'Eastland',
u'postal_code': u'28215',
u'review_count': 4,
u'stars': 4.5,
u'state': u'NC'},
{u'address': u'337 Danforth Avenue',
u'attributes': {u'BikeParking': True,
u'BusinessAcceptsCreditCards': True,
u'BusinessParking': {u'garage': False,
u'lot': False,
u'street': True,
u'valet': False,
u'validated': False},
u'OutdoorSeating': False,
u'RestaurantsPriceRange2': 2,
u'WheelchairAccessible': True,
u'WiFi': u'no'},
u'business_id': u'v2WhjAB3PIBA8J8VxG3wEg',
u'categories': [u'Food', u'Coffee & Tea'],
u'city': u'Toronto',
u'hours': {u'Friday': u'10:00-19:00',
u'Monday': u'10:00-19:00',
u'Saturday': u'10:00-18:00',
u'Sunday': u'12:00-17:00',
u'Thursday': u'10:00-19:00',
u'Tuesday': u'10:00-19:00',
u'Wednesday': u'10:00-19:00'},
u'is_open': 0,
u'latitude': 43.6771258,
u'longitude': -79.3532848,
u'name': u'The Tea Emporium',
u'neighborhood': u'Riverdale',
u'postal_code': u'M4K 1N7',
u'review_count': 7,
u'stars': 4.5,
u'state': u'ON'},
{u'address': u'7702 E Doubletree Ranch Rd, Ste 300',
u'attributes': {},
u'business_id': u'CVtCbSB1zUcUWg-9TNGTuQ',
u'categories': [u'Professional Services', u'Matchmakers'],
u'city': u'Scottsdale',
u'hours': {u'Friday': u'9:00-17:00',
u'Monday': u'9:00-17:00',
u'Thursday': u'9:00-17:00',
u'Tuesday': u'9:00-17:00',
u'Wednesday': u'9:00-17:00'},
u'is_open': 1,
u'latitude': 33.5650816,
u'longitude': -111.9164003,
u'name': u'TRUmatch',
u'neighborhood': u'',
u'postal_code': u'85258',
u'review_count': 3,
u'stars': 3.0,
u'state': u'AZ'},
{u'address': u'4719 N 20Th St',
u'attributes': {u'Alcohol': u'none',
u'Ambience': {u'casual': False,
u'classy': False,
u'divey': False,
u'hipster': False,
u'intimate': False,
u'romantic': False,
u'touristy': False,
u'trendy': False,
u'upscale': False},
u'BikeParking': True,
u'BusinessAcceptsCreditCards': True,
u'BusinessParking': {u'garage': False,
u'lot': False,
u'street': False,
u'valet': False,
u'validated': False},
u'Caters': True,
u'GoodForKids': True,
u'GoodForMeal': {u'breakfast': False,
u'brunch': False,
u'dessert': False,
u'dinner': False,
u'latenight': False,
u'lunch': False},
u'HasTV': False,
u'NoiseLevel': u'quiet',
u'OutdoorSeating': False,
u'RestaurantsAttire': u'casual',
u'RestaurantsDelivery': False,
u'RestaurantsGoodForGroups': True,
u'RestaurantsPriceRange2': 1,
u'RestaurantsReservations': False,
u'RestaurantsTableService': False,
u'RestaurantsTakeOut': True,
u'WiFi': u'no'},
u'business_id': u'duHFBe87uNSXImQmvBh87Q',
u'categories': [u'Sandwiches', u'Restaurants'],
u'city': u'Phoenix',
u'hours': {},
u'is_open': 0,
u'latitude': 33.5059283,
u'longitude': -112.0388474,
u'name': u'Blimpie',
u'neighborhood': u'',
u'postal_code': u'85016',
u'review_count': 10,
u'stars': 4.5,
u'state': u'AZ'}]