我有一个Dataset<Row>
,其中有四列,其中两列是非原始数据类型List<Long> and List<String>
。
+------+---------------+---------------------------------------------+---------------+
| Id| value | time |aggregateType |
+------+---------------+---------------------------------------------+---------------+
|0001 | [1.5,3.4,4.5]| [1551502200000,1551502200000,1551502200000] | Sum |
+------+---------------+---------------------------------------------+---------------+
我有一个接受三个参数并返回Double
值UDF3<String,List<Long>,List<String>,Double>
的UDF3。
所以当我调用UDF时,它会抛出一个异常
错误
caused by java.lang.classcastexception scala.collection.mutable.wrappedarray$ofref cannot be cast to java.lang.List
但是,如果我将类型更改为String
到UDF3<String,String,String,Double>
,就不会抱怨。
引发异常的代码
UDF3<String,List<Long>,List<String>,Double> getAggregate = new UDF3<String,List<Long>,List<String>,Double>() {
public Double call(String t1,List<Long> t2,List<String> t3) throws Exception {
//do some process to return double
return double;
}
sparkSession.udf().register("getAggregate_UDF",getAggregate, DataTypes.DoubleType);
inputDS = inputDs.withColumn("value_new",callUDF("getAggregate_UDF",col("aggregateType"),col("time"),col("value")));
将所有类型更改为字符串后的代码
UDF3<String,String,String,Double> getAggregate = new UDF3<String,String,String,Double>() {
public Double call(String t1,String t2,String t3) throws Exception {
//code to convert t2 and t3 to List<Long> and List<String> respectively
//do some process to return double
return double;
}
sparkSession.udf().register("getAggregate_UDF",getAggregate, DataTypes.DoubleType);
inputDS = inputDs.withColumn("value_new",callUDF("getAggregate_UDF",col("aggregateType"),col("time").cast("String"),col("value").cast("String")));
上面的代码有效,但手动进行String to List
转换。
需要帮助
I)如何在数据集中转换非原始数据类型List<Long> and List<String>
以克服caused by java.lang.classcastexception scala.collection.mutable.wrappedarray$ofref cannot be cast to java.lang.List
II)请建议我是否有任何解决方法
谢谢。
答案 0 :(得分:3)
您的UDF将始终接收WrappedArray实例而不是List,因为这是引擎存储它们的方式。
您需要编写如下内容:
public func getTransactionData(completion: @escaping (([Order]?) -> ())) {
guard let userId = Auth.auth().currentUser?.uid else { completion(nil); return }
let db = Firestore.firestore()
let query = db.collection("order").whereField("account_id",
isEqualTo: userId)
query.getDocuments() { (querySnapshot, err) in
if let err = err {
print("Error getting documents: \(err)")
} else {
guard let querySnapshot = querySnapshot else {
completion(nil); return }
var orders = [Order]()
for document in querySnapshot.documents {
let order = self.extractOrder(document)
if let order = order {
orders.append(order)
}
}
completion(orders)
}
}
}
private func extractOrder(_ document: QueryDocumentSnapshot) -> Order? {
print("document.data() is \(document.data())")
let lineItems = extractLineItems(document.data()["line_items"] as? [[String:Any]] ?? [[:]])
let orderId = document.documentID
guard let balanceId = document.data()["balance_id"] as? String,
let accountId = document.data()["account_id"] as? String,
let subtotal = document.data()["subtotal"] as? Int,
let date = document.data()["date"] as? Int,
let totalAmount = document.data()["total_amount"] as? Int,
let notes = document.data()["notes"] as? String,
let rewardAmount = document.data()["reward_amount"] as? Int,
let status = document.data()["status"] as? String,
let tax = document.data()["tax_amount"] as? Int,
let tip = document.data()["tip_amount"] as? Int,
let balanceAmount = document.data()["balance_amount"] as? Int,
let discountAmount = document.data()["discount_amount"] as? Int,
let locationId = document.data()["location_id"] as? String
else { return nil }
let order = Order(totalAmount: totalAmount, subtotal: subtotal, discountAmount: discountAmount, tipAmount: tip, taxAmount: tax, balanceAmount: balanceAmount, rewardAmount: rewardAmount, balanceId: balanceId, accountId: accountId, locationId: locationId, date: date, status: status, orderType: "PICK UP", lineItems: lineItems, notes: notes, orderId: orderId)
print("order is \(order)")
return order
}
var modifiers: [Modifier]?
var toppings: [String]?
private func extractLineItems(_ dictionaryArray: [[String:Any]]) -> [MenuItem] {
var lineItems = [MenuItem]()
let count = dictionaryArray.count
for x in 0..<count {
guard let itemId = dictionaryArray[x]["item_id"] as? String,
let category = dictionaryArray[x]["category"] as? String,
let name = dictionaryArray[x]["name"] as? String,
let description = dictionaryArray[x]["description"] as? String,
let photoUrl = dictionaryArray[x]["photoUrl"] as? String,
let basePrice = dictionaryArray[x]["base_item_price"] as? Int,
let unitPrice = dictionaryArray[x]["unit_price"] as? Int,
let totalPrice = dictionaryArray[x]["total_price"] as? Int,
let quantity = dictionaryArray[x]["quantity"] as? Int,
let size = dictionaryArray[x]["size"] as? String,
let modifierKeys = dictionaryArray[x]["modifierKeys"] as? [String],
let sizeAddOnPrice = dictionaryArray[x]["sizeAddOnPrice"] as? Int,
let toppingsAddOnPrice = dictionaryArray[x]["toppingsAddOnPrice"] as? Int
else { continue }
if let modifiers = dictionaryArray[x]["modifiers"] as? [Modifier] {
self.modifiers = modifiers
} else {
self.modifiers = nil
}
if let toppings = dictionaryArray[x]["toppings"] as? [String] {
self.toppings = toppings
} else {
self.toppings = [String]()
}
let totalModPrice = sizeAddOnPrice + toppingsAddOnPrice
let lineItem = MenuItem(itemId: itemId, name: name, modifiers: self.modifiers, photoUrl: photoUrl, quantity: quantity, basePrice: basePrice, unitPrice: unitPrice, totalPrice: totalPrice, totalModPrice: totalModPrice, sizeAddOnPrice: sizeAddOnPrice, toppingsAddOnPrice: toppingsAddOnPrice, description: description, size: size, toppings: self.toppings, category: category, modifierKeys: modifierKeys)
lineItems.append(lineItem)
}
return lineItems
}
答案 1 :(得分:1)
这是我的示例,您必须使用WrappedArray接收数组并将其转换为列表
/*
+------+---------------+---------------------------------------------+---------------+
| Id| value | time |aggregateType |
+------+---------------+---------------------------------------------+---------------+
|0001 | [1.5,3.4,4.5]| [1551502200000,1551502200000,1551502200000] | Sum |
+------+---------------+---------------------------------------------+---------------+
**/
StructType dataSchema = new StructType(new StructField[] {createStructField("Id", DataTypes.StringType, true),
createStructField("value",
DataTypes.createArrayType(DataTypes.DoubleType,
false),
false),
createStructField("time",
DataTypes.createArrayType(DataTypes.LongType,
false),
false),
createStructField("aggregateType",
DataTypes.StringType,
true),});
List<Row> data = new ArrayList<>();
data.add(RowFactory.create("0001",
Arrays.asList(1.5, 3.4, 4.5),
Arrays.asList(1551502200000L, 1551502200000L, 1551502200000L),
"sum"));
Dataset<Row> example = spark.createDataFrame(data, dataSchema);
example.show(false);
UDF3<String, WrappedArray<Long>, WrappedArray<Double>, Double> myUDF = (param1, param2, param3) -> {
List<Long> param1AsList = JavaConversions.seqAsJavaList(param2);
List<Double> param2AsList = JavaConversions.seqAsJavaList(param3);
//Example
double myDoubleResult = 0;
if ("sum".equals(param1)) {
myDoubleResult = param2AsList.stream()
.mapToDouble(f -> f)
.sum();
}
return myDoubleResult;
};
spark.udf()
.register("myUDF", myUDF, DataTypes.DoubleType);
example = example.withColumn("new", callUDF("myUDF", col("aggregateType"), col("time"), col("value")));
example.show(false);
您可以从github
获取它