将具有非结构化数据的pdf转换为json对象的最佳方法是什么?您将使用什么工具来解析此数据?
我当然可以将pdf转换为csv格式,如果这样更简单,我愿意这样做。我仅尝试处理3个不同的文档,并且不介意手动执行某些工作。我更喜欢使用Java,但我也了解一些javascript和一小部分python。
我的最终目标是使用json对象填充mongodb数据库,甚至可能填充elasticSearch索引。
您可以提供的任何建议将不胜感激。
我要分析的文档是:
http://akccompanioneventresults.com/?moid=351
{
"dogName" : "My Dolly Two Spots",
"armbandNumber" : 110,
"handler" : "Nancy Muller",
"breed" : "Papillon",
"round" : "Top 20",
"event" : "2019 AKC National Obedience Championship",
"date" : "2019-03-17",
"rings" :
[
{
"ringNumber" : "1",
"exercises" :
[
{
"exerciseName" : "DJ",
"judge" : "J Stephens",
"score" : 0.5
}
{
"exerciseName" : "DJ",
"judge" : "L Hause",
"score" : 1.0
}
{
"exerciseName" : "DR#3",
"judge" : "J Stephens",
"score" : 0.5
}
{
"exerciseName" : "DR#3",
"judge" : "L Hause",
"score" : 1.0
}
{
"exerciseName" : "Misc",
"judge" : "J Stephens",
"score" : 0.0
}
{
"exerciseName" : "Misc",
"judge" : "L Hause",
"score" : 1.0
}
]
}
{
"ringNumber" : "2",
"exercises" :
[
{
"exerciseName" : "CD",
"judge" : "R Withers",
"score" : 1.0
}
{
"exerciseName" : "CD",
"judge" : "V Kinion",
"score" : 0.0
}
{
"exerciseName" : "ROF",
"judge" : "R Withers",
"score" : 0.0
}
{
"exerciseName" : "ROF",
"judge" : "V Kinion",
"score" : 0.0
}
{
"exerciseName" : "Misc",
"judge" : "R Withers",
"score" : 0.0
}
{
"exerciseName" : "Misc",
"judge" : "V Kinion",
"score" : 0.0
}
]
}
{
"ringNumber" : "7",
"exercises" :
[
{
"exerciseName" : "RHJ",
"judge" : "C Wray",
"score" : 0.5
}
{
"exerciseName" : "RHJ",
"judge" : "J Nocilly",
"score" : 0.5
}
{
"exerciseName" : "HF-8",
"judge" : "C Wray",
"score" : 2.0
}
{
"exerciseName" : "HF-8",
"judge" : "J Nocilly",
"score" : 1.5
}
{
"exerciseName" : "Misc",
"judge" : "C Wray",
"score" : 0.0
}
{
"exerciseName" : "Misc",
"judge" : "J Nocilly",
"score" : 0.0
}
]
}
{
"ringNumber" : "8",
"exercises" :
[
{
"exerciseName" : "DR",
"judge" : "B Lee",
"score" : 0.0
}
{
"exerciseName" : "DR",
"judge" : "J Caputa",
"score" : 0.0
}
{
"exerciseName" : "SE",
"judge" : "B Lee",
"score" : 2.0
}
{
"exerciseName" : "SE",
"judge" : "J Caputa",
"score" : 2.0
}
{
"exerciseName" : "Misc",
"judge" : "B Lee",
"score" : 0.0
}
{
"exerciseName" : "Misc",
"judge" : "J Caputa",
"score" : 0.0
}
]
}
]
} ...