pyspark新手,想将csv文件读取到数据帧。似乎无法阅读。有什么帮助吗?
from pyspark.sql import SQLContext
import pyspark
from pyspark.sql import Row
import csv
sql_c = SQLContext(sc)
rdd = sc.textFile('data.csv').map(lambda line: line.split(","))
rdd.count()
Py4JJavaError Traceback(最近一次调用最后一次) in() ----> 1 rdd.count()
答案 0 :(得分:1)
如果您使用Spark 2,首选方式是
var displayName: String?
var version: String?
var build: String?
override func viewDidLoad() {
super.viewDidLoad()
// Get display name, version and build
if let displayName = Bundle.main.localizedInfoDictionary?["CFBundleDisplayName"] as? String {
self.displayName = displayName
}
if let version = Bundle.main.infoDictionary?["CFBundleShortVersionString"] as? String {
self.version = version
}
if let build = Bundle.main.infoDictionary?["CFBundleVersion"] as? String {
self.build = build
}
}
答案 1 :(得分:0)
读取独立于spark版本的csv:
if sc.version.startswith("2"):
csv_plugin = "csv"
else:
csv_plugin = "com.databricks.spark.csv"
dataframe = sql_c.read.format(csv_plugin).options(header='true', inferSchema='true').load('data.csv')
如果您没有标题,请删除header='true'
。