我在下面陈述我的情景: 10000 - 服务器正在发送DF大小的数据。 (每5秒输入10,000个输入)
如果任何服务器DF尺寸超过70%,则将ROM尺寸增加20% 如果对于任何服务器使用的DF尺寸小于30%,则将ROM尺寸减小25%。
我提供的代码从kafka获取消息并与“%”匹配并执行to.upper()。此代码仅供参考我的kafka详细信息。
任何人都可以帮我解决这个问题。
package rnd
import org.apache.spark.SparkConf
import kafka.serializer.StringDecoder
import org.apache.spark.sql.SQLContext
//import org.apache.spark.streaming.dstream.DStream
import org.apache.spark.streaming.kafka.KafkaUtils
import org.apache.spark.streaming.{Minutes, Seconds, StreamingContext}
//import org.apache.spark.util.TimeStampedWeakValueHashMap.toWeakReference
import org.apache.spark.{SparkConf, SparkContext}
object WordFind {
def main(args: Array[String]) {
import org.apache.spark.SparkConf
val conf = new SparkConf().setMaster("local[*]").setAppName("KafkaReceiver")
import org.apache.spark.streaming.StreamingContext
import org.apache.spark.streaming.Seconds
val batchIntervalSeconds = 2
val ssc = new StreamingContext(conf, Seconds(10))
import org.apache.spark.streaming.kafka.KafkaUtils
import org.apache.spark.streaming.dstream.ReceiverInputDStream
val kafkaStream: ReceiverInputDStream[(String, String)] = KafkaUtils.createStream(ssc, "localhost:2181","spark-streaming-consumer-group", Map("wordcounttopic" -> 5))
import org.apache.spark.streaming.dstream.DStream
val filteredStream: DStream[(String, String)] = kafkaStream.filter(record =>
record._2.contains("%")) // TODO : pattern matching here
val outputDStream: DStream[String] = filteredStream.map(record => record._2.toUpperCase())
outputDStream.print()
ssc.start
ssc.awaitTerminationOrTimeout(batchIntervalSeconds * 5 * 1000)
}
}
请帮助我完成满足代码的方案。
示例输入
文件系统1K块使用可用使用%挂载 / dev / sda1 132239776 6210884 119311504 5%/ tmpfs 4021876 0 4021876 0%/ dev / shm
示例输出: 如果对任何情况使用%> 70>消息:将ROM大小增加20% if if%< 30%for any case>消息:将ROM大小减少25%
即使我必须把它放到弹性搜索中,它也会出错:
package rnd
import org.apache.spark.SparkConf
import kafka.serializer.StringDecoder
import org.apache.spark.sql.SQLContext
//import org.apache.spark.streaming.dstream.DStream
import org.apache.spark.streaming.kafka.KafkaUtils
import org.apache.spark.streaming.{Minutes, Seconds, StreamingContext}
//import org.apache.spark.util.TimeStampedWeakValueHashMap.toWeakReference
import org.apache.spark.{SparkConf, SparkContext}
object WordFind {
def main(args: Array[String]) {
}
import org.apache.spark.SparkConf
val conf = new SparkConf().setMaster("local[*]").setAppName("KafkaReceiver")
val sc = new SparkContext(conf)
val checkpointDir = "/usr/local/kafka/kafka_2.11-0.11.0.2/checkpoint/"
import org.apache.spark.streaming.StreamingContext
import org.apache.spark.streaming.Seconds
val batchIntervalSeconds = 2
val ssc = new StreamingContext(conf, Seconds(10))
import org.apache.spark.streaming.kafka.KafkaUtils
import org.apache.spark.streaming.dstream.ReceiverInputDStream
val ssc = new StreamingContext(sc, Seconds(batchIntervalSeconds))
val kafkaStream: ReceiverInputDStream[(String, String)] = KafkaUtils.createStream(ssc, "localhost:2181",
"spark-streaming-consumer-group", Map("wordcounttopic" -> 5))
import org.apache.spark.streaming.dstream.DStream
val filteredStream: DStream[Array[String]] = kafkaStream
.filter(!_._2.contains("Filesystem")) // eliminate header
.map(_._2.split("\\s+")) // split with space
val outputDStream: DStream[String] = filteredStream.map {
row =>
val useIdx = row.length - 2
// if Use%>70 for any case> Message: Increase ROM size by 20%
// if Use%<30% for any case> Message: Decrease ROM size by 25%
val usePercent = row(useIdx).replace("%", "").toInt
usePercent match {
case x if x > 70 => "Increase ROM size by 20%"
case x if x < 30 => "Decrease ROM size by 25%"
case _ => "Undefined"
}
outputDStream.print()
import org.elasticsearch.spark.sql._
outputDStream.saveToEs("dfvalueoperations_v1/kwc")
}
// To make sure data is not deleted by the time we query it interactively
ssc.remember(Minutes(1))
ssc.checkpoint(checkpointDir)
ssc
// }
// This starts the streaming context in the background.
ssc.start()
// This is to ensure that we wait for some time before the background streaming job starts. This will put this cell on hold for 5 times the batchIntervalSeconds.
ssc.awaitTerminationOrTimeout(batchIntervalSeconds * 5 * 1000)
}
错误:错误:(51,21)value saveToEs不是org.apache.spark.streaming.dstream.DStream [String]的成员 outputDStream.saveToEs( “kafkamessage_v1 / KWC”)
答案 0 :(得分:0)
几乎没有假设来获得所需的输出。
1。)标题可能出现在两者之间,因此使用过滤器来删除标题。
Filesystem 1K-blocks Used Available Use% Mounted on
2。)由于Filesysytem
列可能在字符串中有空格,因此我使用最后一个索引来提取use%
。 (如果这不起作用,请尝试group regex
来实现相同的目标)
3.)未定义30到70之间的使用百分比,因此输出消息包含“未定义”的情况。
示例输入输出(使用Array[String]
)
scala> val input =
| """|Filesystem 512-blocks Used Available Capacity iused ifree %iused Mounted on
| |/dev/disk1 234618880 154868528 79238352 67% 1784543 4293182736 0% /
| |devfs 364 364 0 100% 630 0 100% /dev
| |map -hosts 0 0 0 100% 0 0 100% /net
| |map auto_home 0 0 0 100% 0 0 100% /home""".stripMargin
scala> val inputStr: Array[Array[String]] = input.split("\n").filter(!_.contains("Filesystem")).map(_.split("\\s+"))
scala> val outputMessage = inputStr.map {
| row =>
| // Assuming the position is always second from last
| val elementPosition = row.length - 2
| // if Use%>70 for any case> Message: Increase ROM size by 20%
| // if Use%<30% for any case> Message: Decrease ROM size by 25%
| val usePercent = row(elementPosition).replace("%", "").toInt
| usePercent match {
| case x if x > 70 => (usePercent, "Increase ROM size by 20%")
| case x if x < 30 => (usePercent, "Decrease ROM size by 25%")
| case _ => (usePercent, "Undefined")
| }
| }
scala> outputMessage.foreach(println)
(0,Decrease ROM size by 25%)
(100,Increase ROM size by 20%)
(100,Increase ROM size by 20%)
(100,Increase ROM size by 20%)
此代码适用于Array[String]
,请为ReceiverInputDStream[(String, String)]
进行测试。代码必须类似于:
val filteredStream: DStream[Array[String]] = kafkaStream
.filter(!_._2.contains("Filesystem")) // eliminate header
.map(_._2.split("\\s+")) // split with space
val outputDStream: DStream[String] = filteredStream.map {
row =>
val useIdx = row.length - 2
// if Use%>70 for any case> Message: Increase ROM size by 20%
// if Use%<30% for any case> Message: Decrease ROM size by 25%
val usePercent = row(useIdx).replace("%", "").toInt
usePercent match {
case x if x > 70 => "Increase ROM size by 20%"
case x if x < 30 => "Decrease ROM size by 25%"
case _ => "Undefined"
}
}
希望这有帮助。