我有一个以下列方式构建的DataSet:
Encoder<MyDomain> encoder= Encoders.bean(MyDomain.class);
Dataset<MyDomain> stdDS = sc.createDataset(filteredRecords.rdd(), encoder);
Dataset<Row> rowDataset = stdDS.withColumn("idHash", stdDS.col("id").substr(0, 5));
然后我尝试输出数据集:
rowDataset.write().partitionBy("keep", "idHash").save("test.parquet");
当我只通过“保持”分区时,一切正常,然后我通过“keep”和“idHash”进行分区:
File already exists: file:/C:/dev/test.parquet/_temporary/0/_temporary/attempt_201701191219_0001_m_000000_0/keep=true/idHash=0a/part-r-00000-2c2e0494-f6a7-47d7-88e2-f49dffb608d1.snappy.parquet
如何使用多个分区使我的DataSet正确输出。该文件夹以空为开头。此外,当我在本地计算机上运行时会发生此错误,在生产中,此数据将输出到S3,因此任何解决方案都需要针对本地文件系统和AWS S3工作。
谢谢, 森
答案 0 :(得分:0)
尝试
import UIKit
class ViewController: UIViewController,SenderViewControllerDelegate {
//MARK:- Properties
@IBOutlet var firstLabel: UILabel!
override func viewDidLoad() {
super.viewDidLoad()
// Do any additional setup after loading the view, typically from a nib.
}
override func didReceiveMemoryWarning() {
super.didReceiveMemoryWarning()
// Dispose of any resources that can be recreated.
}
//MARK:- Actions
func messageData(data: String) {
firstLabel.text = data
}
override func prepare(for segue: UIStoryboardSegue, sender: Any?) {
if segue.identifier == "showSendingVc" {
let viewController : ViewController = segue.destination as! NewViewController
ViewController.delegate = self
}
}
}