我正在使用IBM Watson
和python3
将音频文件转换为文本文件。它以JSON格式返回结果。生成的JSON就像这样...
{
"results": [
{
"alternatives": [
{
"transcript": "Marcus white is the managing director of quartz power group an energy company ",
"confidence": 0.85,
"word_confidence": [
[
"Marcus",
0.678
],
[
"white",
0.99
],
[
"is",
0.988
],
[
"the",
1.0
],
[
"managing",
1.0
],
[
"director",
1.0
],
[
"of",
1.0
],
[
"quartz",
0.394
],
[
"power",
0.737
],
[
"group",
0.968
],
[
"an",
0.783
],
[
"energy",
0.989
],
[
"company",
0.844
]
],
"timestamps": [
[
"Marcus",
12.28,
12.78
],
[
"white",
12.78,
13.17
],
[
"is",
13.17,
13.33
],
[
"the",
13.33,
13.42
],
[
"managing",
13.42,
13.83
],
[
"director",
13.83,
14.39
],
[
"of",
14.39,
14.52
],
[
"quartz",
14.52,
15.0
],
[
"power",
15.0,
15.36
],
[
"group",
15.36,
15.79
],
[
"an",
15.93,
16.08
],
[
"energy",
16.08,
16.45
],
[
"company",
16.45,
16.95
]
]
}
],
"final": true
},
{
"alternatives": [
{
"transcript": "every month the departmental manages meet to discuss high level issues in the company ",
"confidence": 0.925,
"word_confidence": [
[
"every",
1.0
],
[
"month",
0.993
],
[
"the",
0.728
],
[
"departmental",
1.0
],
[
"manages",
0.7
],
[
"meet",
0.77
],
[
"to",
1.0
],
[
"discuss",
1.0
],
[
"high",
0.835
],
[
"level",
0.984
],
[
"issues",
1.0
],
[
"in",
0.67
],
[
"the",
0.927
],
[
"company",
0.994
]
],
"timestamps": [
[
"every",
18.1,
18.39
],
[
"month",
18.39,
18.93
],
[
"the",
18.96,
19.07
],
[
"departmental",
19.07,
19.73
],
[
"manages",
19.73,
20.29
],
[
"meet",
20.29,
20.56
],
[
"to",
20.56,
20.66
],
[
"discuss",
20.66,
21.12
],
[
"high",
21.12,
21.33
],
[
"level",
21.33,
21.64
],
[
"issues",
21.64,
22.08
],
[
"in",
22.08,
22.18
],
[
"the",
22.18,
22.27
],
[
"company",
22.27,
22.75
]
]
}
],
"final": true
},
此格式会为每个翻译的片段重复。我正在尝试提取所有转录,这就是transcript
的值。我尝试过:
index = 0
for [index]["transcript"] in ["results"][0]["alternatives"]:
print (["results"][0]["alternatives"][index]["transcript"])
index += 1
但是这失败了,因为每个转录对象都被嵌入到列表中。第一个转录值的完整路径为:
d["results"][0]["alternatives"][0]["transcript"]
如何遍历此列表以提取所有转录值并将其添加到字符串变量?
干杯:)
答案 0 :(得分:4)
for result in d['results']:
for alternative in result['alternatives']:
# add to your string here
print alternative['transcript']
通常,python允许您遍历其列表中的对象,而不必使用索引进行遍历,这在C ++等语言中更为常见。