我正在尝试使用musicbrainzngs
模块解析我从Musicbrainz收到的大型类似JSON的Python字典中的数据。下面是一个漂亮格式的这种字典的样本。
{ 'artist-credit': [ { 'artist': { 'id': '0039c7ae-e1a7-4a7d-9b49-0cbc716821a6',
'name': 'Death Cab for Cutie',
'sort-name': 'Death Cab for Cutie'}}],
'artist-credit-phrase': 'Death Cab for Cutie',
'asin': 'B0000D1FDI',
'barcode': '655173103227',
'country': 'US',
'cover-art-archive': { 'artwork': 'true',
'back': 'false',
'count': '1',
'front': 'true'},
'date': '2003-10-07',
'id': 'e602a3ae-fe8f-4abd-8638-f055517bacb2',
'label-info-count': 1,
'label-info-list': [ { 'catalog-number': 'bark32',
'label': { 'id': 'a4f904e0-f048-4c13-88ec-f9f31f3e6109',
'name': 'Barsuk Records',
'sort-name': 'Barsuk Records'}}],
'medium-count': 1,
'medium-list': [ { 'disc-count': 4,
'disc-list': [ { 'id': '5G2zzIza.oA1Y3XpMHxuohbzazQ-',
'offset-count': 11,
'offset-list': [ 150,
18780,
34698,
51295,
70286,
80376,
100160,
135944,
152723,
166122,
191235],
'sectors': '207432'},
{ 'id': '8XGkh_GqZPv6rL8W1c6_t9fQKhw-',
'offset-count': 11,
'offset-list': [ 150,
18933,
35005,
51755,
70899,
81142,
101079,
137017,
153949,
167501,
192767],
'sectors': '208967'},
{ 'id': 'AptsPDTKO.nMoE_GRmqGZSWjT7g-',
'offset-count': 11,
'offset-list': [ 150,
18635,
34405,
50855,
69697,
79638,
99273,
134905,
151535,
164791,
189759],
'sectors': '205963'},
{ 'id': 'scc32yarsl41ysxMw43_1Pk8n3M-',
'offset-count': 11,
'offset-list': [ 150,
18628,
34394,
50839,
69678,
79616,
99248,
134880,
151507,
164754,
189715],
'sectors': '205910'}],
'format': 'CD',
'position': '1',
'track-count': 11,
'track-list': [ { 'id': 'd65135d9-d917-3c04-9a3e-1a9f3f75dbdf',
'length': '246400',
'number': '1',
'position': '1',
'recording': { 'id': '2aefb5c8-f137-4289-b9f9-e78d23695468',
'length': '246400',
'title': 'The New '
'Year'},
'track_or_recording_length': '246400'},
{ 'id': 'bdff5634-4743-3957-9a2c-285af885fd56',
'length': '210240',
'number': '2',
'position': '2',
'recording': { 'id': '60d3363b-7cc4-4675-a85b-692683054ff2',
'length': '210213',
'title': 'Lightness'},
'track_or_recording_length': '210240'},
{ 'id': '41aec2a4-4ecb-30c0-9052-099a504c1623',
'length': '219280',
'number': '3',
'position': '3',
'recording': { 'id': '6c077d47-09ae-4059-b025-d48f48710f92',
'length': '219000',
'title': 'Title '
'and '
'Registration'},
'track_or_recording_length': '219280'},
{ 'id': 'b28ef069-7bca-3a43-a3a2-dcd123652d2e',
'length': '251200',
'number': '4',
'position': '4',
'recording': { 'id': '143b3d2e-82d6-4fbc-afd4-5ac8b8e2ffe6',
'length': '251187',
'title': 'Expo ’86'},
'track_or_recording_length': '251200'},
{ 'id': 'c329e654-8866-3a78-939d-3cb8368f1de6',
'length': '132520',
'number': '5',
'position': '5',
'recording': { 'id': '59403f9d-722a-48c3-aedb-6c1bb6102668',
'length': '132520',
'title': 'The '
'Sound '
'of '
'Settling'},
'track_or_recording_length': '132520'},
{ 'id': '469bbe0a-8ac9-38f9-8998-17ab0bdd8cb4',
'length': '261773',
'number': '6',
'position': '6',
'recording': { 'id': 'a594b2c7-5e4f-4e81-9cda-91bab05da25e',
'length': '261773',
'title': 'Tiny '
'Vessels'},
'track_or_recording_length': '261773'},
{ 'id': '9ac716d8-e9a2-3b7f-9588-ef5b91b00925',
'length': '475120',
'number': '7',
'position': '7',
'recording': { 'id': '9472186f-ec6d-48d1-9a47-4bc6e922cffe',
'length': '475093',
'title': 'Transatlanticism'},
'track_or_recording_length': '475120'},
{ 'id': 'a37ec190-9dde-3c23-8d2d-a561afe56a3a',
'length': '221706',
'number': '8',
'position': '8',
'recording': { 'id': '6dfcacb8-f767-43af-9645-9c9dd39eeb44',
'length': '221706',
'title': 'Passenger '
'Seat'},
'track_or_recording_length': '221706'},
{ 'id': '46eecfc9-e7a3-3d73-acb1-02a5b13d3831',
'length': '176640',
'number': '9',
'position': '9',
'recording': { 'id': 'e2a42ced-3f08-4012-ad7c-8c215da8a2a1',
'length': '176640',
'title': 'Death '
'of an '
'Interior '
'Decorator'},
'track_or_recording_length': '176640'},
{ 'id': 'b3e21529-72fb-30bd-b4b8-22e7c7c9a411',
'length': '332826',
'number': '10',
'position': '10',
'recording': { 'id': '70cf7264-1053-4afc-ace8-81cd24cc6391',
'length': '332826',
'title': 'We '
'Looked '
'Like '
'Giants'},
'track_or_recording_length': '332826'},
{ 'id': '8b3e26c5-978b-35f2-8b2b-7e2574ae37bf',
'length': '215294',
'number': '11',
'position': '11',
'recording': { 'id': 'a9f6ea4c-06a9-46a4-9a67-667617b0fe6a',
'length': '216000',
'title': 'A Lack '
'of Color'},
'track_or_recording_length': '215294'}]}],
'packaging': 'Jewel Case',
'quality': 'normal',
'release-event-count': 1,
'release-event-list': [ { 'area': { 'id': '489ce91b-6658-3307-9877-795b68554c98',
'iso-3166-1-code-list': ['US'],
'name': 'United States',
'sort-name': 'United States'},
'date': '2003-10-07'}],
'status': 'Official',
'text-representation': {'language': 'eng', 'script': 'Latn'},
'title': 'Transatlanticism'}
我试图从这本字典中解析出特定的信息。例如,我有下面的字典,其中填充了数据的一般描述作为键,然后通过列表和字典的长索引导致我想要的数据。对于此示例,假设 release_dict 是上面的数据。
album_info = {
"album_title": release_dict['title'],
"album_artist": release_dict['artist-credit'][0]['artist']['name'],
"artist": release_dict['artist-credit'][0]['artist']['name'],
"release_date": release_dict['date'],
"track_total": release_dict['medium-list'][disc_number]['track-count'],
"release_label": release_dict['label-info-list'][0]['label']['name'],
}
问题是,从musicbrainzngs
传回的数据并不总是相同的。例如,导致“release_label”的索引甚至可能不存在。我能想到处理这个问题的唯一合理方法是为每个标记制作 try 语句,如下所示:
album_dict = {}
try:
album_dict["album_title"] = release_dict['title']
except KeyError:
print("Value for 'album_title' not found.")
并对所有其他标签执行相同操作。但我真的更喜欢以某种方式遍历每个标签,以使代码更加干燥和安全。
我能想到的唯一解决方案是创建一个读取索引字符串的函数(即"['artist-credit'][0]['artist']['name']"
)并将其读取并检查每个部分是否存在,但我认为这可能是一点点这个问题有点过头了。
无论如何,我只想制作一段代码,从原始元数据中获取尽可能多的信息。如果你有任何想法,我愿意接受所有的灵魂。并且感谢您的帮助。
答案 0 :(得分:2)
你可以创建一个带有可变数量键的函数,如果它不存在则停止并返回一个默认值(如嵌套dict.get
)。
def rec_get(d, *items, default=None):
try:
for item in items:
d = d[item]
except (KeyError, TypeError):
return default
return d
album_info = {
"album_title": release_dict.get('title'),
"album_artist": rec_get(release_dict, 'artist-credit', 0, 'artist', 'name'),
"artist": rec_get(release_dict, 'artist-credit', 0, 'artist', 'name'),
"release_date": release_dict.get('date'),
"track_total": rec_get(release_dict, 'medium-list', disc_number, 'track-count'),
"release_label": rec_get(release_dict, 'label-info-list', 0, 'label', 'name'),
}