var word_count = rdd.flatMap(lines=>lines.split(",").map(words=>(words,1)).reduceByKey(_+_).collect
word_count:Array[(String,Int)] = Array((Ankita,1),(shelly,1),(A,2),(B,1))
//现在编写以下查询结果为空字符串,但我想提取(A,2)和(B,1)。
var filtered = word_count.filter(values=>(values=="A") || (values=="B")).collect
filtered:Array[(String,Int)] = Array()
答案 0 :(得分:1)
应该是:
word_count.filter(x => (x._1 == "A") || (x._1 == "B"))
或
word_count.filter(x => Seq("A", "B").contains(x._1))
您的代码将Tuple2
与String
进行比较,给出了虚假的表达:
scala> ("A", 1) == "A"
<console>:24: warning: comparing values of types (String, Int) and String using `==' will always yield false
("A", 1) == "A"
^
res0: Boolean = false