下面是在 JSON 需要被转换
bing_url = 'https://www.bing.com/'
driver = webdriver.Chrome()
driver.get(bing_url)
time.sleep(4)
# select the search box and enter a search condition
search_box = driver.find_element_by_xpath('//*[@id="sb_form_q"]')
search_box.send_keys('coffee cups', Keys.ENTER)
# click on the images tab
images_tab = driver.find_element_by_xpath('//*
[@id="b_header"]/nav/ul/li[2]/a')
images_tab.click()
select_first_image = driver.find_element_by_xpath('//*[@id="mmComponent_images_1"]/ul[1]/li[1]/div/div/a/div/img')
select_first_image.click()
# gives error that no such element exists
image_url = driver.find_element_by_xpath('//*[@id="mainImageWindow"]/div[1]/div/div/div/img')
需要将其转换为{
"name": "Jon",
"tags":[
{
"1": "San Jose",
"2": "California",
"3": 1987
},
{
"1": "University Ave",
"2": "Princeton",
"3": 1990
}
]
}
,就像这样
DataFrame
谁能帮我解决这个难题。谢谢!!
答案 0 :(得分:1)
正如我在评论中说的,Spark supports JSON paring out of the box-您不需要任何外部库。
Spark ,将自动推断文件的 schema ,但不会自动展平数据...
您需要在程序上执行该操作。
这是一个简单的示例,说明如何使用类型的 Dataset
。
(注意:JSON文件应按文档中所述每行包含一个文档)。
(此外,请注意,我将字段重命名为a
,b
和c
,因此它们是有效的 Scala 标识符)。 / p>
import org.apache.spark.sql.{SparkSession, Encoder}
final case class Nested(name: String, tags: List[Tag])
final case class Tag(a: String, b: String, c: Int)
final case class Flattened(name: String, a: String, b: String, c: Int)
val spark = SparkSession.builder.master("local[*]").getOrCreate()
import spark.implicits._
val nestedEncoder: Encoder[Nested] = implicitly
val nestedSchema = nestedEncoder.schema
val nestedDS =
spark
.read
.option(key = "charset", value = "UTF-8")
.schema(nestedSchema)
.json(path = "path")
.as[Nested](nestedEncoder)
val flattenedDS = for {
nested <- nestedDS
tag <- nested.tags
} yield Flattened(nested.name, tag.a, tag.b, tag.c)
// flattenedDS: Dataset[Flattened] = [name: string, a: string, b: string, c: integer]
flattenedDS.printSchema()
// root
// |-- name: string (nullable = true)
// |-- a: string (nullable = true)
// |-- b: string (nullable = true)
// |-- c: integer (nullable = false)
flattenedDS.show()
// +----+--------------+----------+----+
// |name| a| b| c|
// +----+--------------+----------+----+
// | Jon| San Jose|California|1987|
// | Jon|University Ave| Princeton|1990|
// +----+--------------+----------+----+