我开发了一个带有Kafka流媒体和放大器的火花应用程序。 sql基于此Spark example
有3个线程针对相同的SqlContext运行不同的sql:
public final class TestSpark implements Serializable{
private static final Pattern SPACE = Pattern.compile(" ");
private static final Logger logger = LoggerFactory.getLogger("TestSpark");
private static ThreadFactory threadFactory = Executors.defaultThreadFactory();
private static ExecutorService executorServices = new ThreadPoolExecutor(4, 8, 120, TimeUnit.SECONDS, new LinkedBlockingQueue<Runnable>(10000), threadFactory, new RejectedExecutionHandlerImpl());
public TestSpark()
{
}
public void run(String brokers, String topics, String master)
{
List<String> queryStrings = new ArrayList<String>();
queryStrings.add("select id, type, key, side, shares, price, time, rank, flag, flag2, comment, source, status, price1, price2 from EventData where key in ('6267359')");
queryStrings.add("select id, type, key, side, shares, price, time, rank, flag, flag2, comment, source, status, price1, price2 from EventData where key in ('6558484')");
queryStrings.add("select id, type, key, side, shares, price, time, rank, flag, flag2, comment, source, status, price1, price2 from EventData where key in ('BP3R8Z3')");
// Create context with 1 second batch interval
SparkConf sparkConf = new SparkConf().setAppName("TestSpark").setMaster("local[*]");
JavaStreamingContext jssc = new JavaStreamingContext(sparkConf, Durations.seconds(1));
HashSet<String> topicsSet = new HashSet<String>(Arrays.asList(topics.split(",")));
HashMap<String, String> kafkaParams = new HashMap<String, String>();
kafkaParams.put("metadata.broker.list", brokers);
// Create direct kafka stream with brokers and topics
JavaPairInputDStream<String, String> messages = KafkaUtils.createDirectStream(
jssc,
String.class,
String.class,
StringDecoder.class,
StringDecoder.class,
kafkaParams,
topicsSet
);
JavaDStream<String> lines = messages.map(new Function<Tuple2<String, String>, String>() {
@Override
public String call(Tuple2<String, String> tuple2) {
return tuple2._2(); // We do not care about the key here
}
});
lines.foreachRDD(new Function2<JavaRDD<String>, Time, Void>(){
@Override
public Void call(JavaRDD<String> rdd, Time time) throws Exception {
if (!rdd.isEmpty())
{
SQLContext sqlContext = JavaSQLContextSingleton.getInstance(rdd.context());
for (int i = 0; i < queryStrings.size(); i++)
{
executorServices.execute(new ActionThread(sqlContext, rdd, queryStrings.get(i)));
}
}
return null;
}
});
// Start the computation
jssc.start();
jssc.awaitTermination();
}
public static void main(String[] args) {
if (args.length < 3) {
System.err.println("Usage: DirectKafkaWordCount <brokers> <topics> <master>\n" +
" <brokers> is a list of one or more Kafka brokers\n" +
" <topics> is a list of one or more kafka topics to consume from\n" +
" <master> is spark cluster\n\n");
System.exit(1);
}
TestSpark testSpark = new TestSpark();
testSpark.run(args[0], args[1], args[2]);
}
}
/** Lazily instantiated singleton instance of SQLContext */
class JavaSQLContextSingleton {
static private transient SQLContext instance = null;
static public SQLContext getInstance(SparkContext sparkContext) {
if (instance == null) {
instance = new SQLContext(sparkContext);
}
return instance;
}
}
class ActionThread implements Serializable, Runnable{
/**
*
*/
private static final long serialVersionUID = 1L;
private static final Logger logger = LoggerFactory.getLogger("ActionThread");
SQLContext sqlContext;
JavaRDD<String> rdd;
String query;
ActionThread(SQLContext sqlContext, JavaRDD<String> rdd, String query)
{
this.sqlContext = sqlContext;
this.rdd = rdd;
this.query = query;
}
@Override
public void run() {
sqlContext.read().json(rdd).registerTempTable("EventData");
DataFrame filteredEvent = sqlContext.sql(query);
List<String> eventList = filteredEvent.toJavaRDD().map(new Function<Row, String>() {
@Override
public String call(Row row)
{
return "ThreadID:" + Thread.currentThread().getId() + " " + row.toString();
}
}).collect();
for (String event: eventList) {
System.out.println(event);
}
filteredEvent.count();
}
}
class RejectedExecutionHandlerImpl implements RejectedExecutionHandler {
@Override
public void rejectedExecution(Runnable r, ThreadPoolExecutor executor) {
System.out.println(r.toString() + " is rejected");
}
}
然而,它给了我意想不到的结果 - 我希望Row中的第一个数字是唯一的,但有重复!通过倾听同一主题,我确信它们是独一无二的。
ThreadID:109 [1,ORDER,6558484,SELL,1000000.0,7.26,2016-01-29T16:36:44.232+03:00,HIGH,Y,_NA_,LMT: Order in Hand,BOAML_AP,null,7.18,7.19]
ThreadID:107 [0,ORDER,6267359,SELL,1000000.0,7.26,2016-01-29T16:36:44.232+03:00,HIGH,Y,_NA_,LMT: Order in Hand,BOAML_AP,null,7.18,7.19]
ThreadID:75 [0,ORDER,6267359,SELL,1000000.0,7.26,2016-01-29T16:36:44.232+03:00,HIGH,Y,_NA_,LMT: Order in Hand,BOAML_AP,null,7.18,7.19]
ThreadID:75 [3,ORDER,6267359,SELL,1000000.0,7.26,2016-01-29T16:36:44.232+03:00,HIGH,Y,_NA_,LMT: Order in Hand,BOAML_AP,null,7.18,7.19]
ThreadID:107 [2,ORDER,BP3R8Z3,SELL,1000000.0,7.26,2016-01-29T16:36:44.232+03:00,HIGH,Y,_NA_,LMT: Order in Hand,BOAML_AP,null,7.18,7.19]
ThreadID:75 [3,ORDER,6267359,SELL,1000000.0,7.26,2016-01-29T16:36:44.232+03:00,HIGH,Y,_NA_,LMT: Order in Hand,BOAML_AP,null,7.18,7.19]
有没有人知道造成这种情况的原因或我应该这样做?
如果我在'if(!rdd.isEmpty())'下面添加一条印刷线,我得到以下内容:
Rdd:MapPartitionsRDD[7] at map at TestSpark.java:111
Rdd:MapPartitionsRDD[9] at map at TestSpark.java:111
Rdd:MapPartitionsRDD[11] at map at TestSpark.java:111
Rdd:MapPartitionsRDD[21] at map at TestSpark.java:111
Rdd:MapPartitionsRDD[23] at map at TestSpark.java:111
Rdd:MapPartitionsRDD[25] at map at TestSpark.java:111
Rdd:MapPartitionsRDD[27] at map at TestSpark.java:111
Rdd:MapPartitionsRDD[29] at map at TestSpark.java:111
Rdd:MapPartitionsRDD[31] at map at TestSpark.java:111
Rdd:MapPartitionsRDD[33] at map at TestSpark.java:111
Rdd:MapPartitionsRDD[35] at map at TestSpark.java:111
ThreadID:75 [0,ORDER,6267359,SELL,1000000.0,7.26,2016-01-29T16:36:44.232+03:00,HIGH,Y,_NA_,LMT: Order in Hand,BOAML_AP,null,7.18,7.19]
ThreadID:92 [0,ORDER,6267359,SELL,1000000.0,7.26,2016-01-29T16:36:44.232+03:00,HIGH,Y,_NA_,LMT: Order in Hand,BOAML_AP,null,7.18,7.19]
Rdd:MapPartitionsRDD[37] at map at TestSpark.java:111
Rdd:MapPartitionsRDD[59] at map at TestSpark.java:111
Rdd:MapPartitionsRDD[61] at map at TestSpark.java:111
Rdd:MapPartitionsRDD[63] at map at TestSpark.java:111
Rdd:MapPartitionsRDD[97] at map at TestSpark.java:111
Rdd:MapPartitionsRDD[99] at map at TestSpark.java:111
ThreadID:75 [1,ORDER,6558484,SELL,1000000.0,7.26,2016-01-29T16:36:44.232+03:00,HIGH,Y,_NA_,LMT: Order in Hand,BOAML_AP,null,7.18,7.19]
Rdd:MapPartitionsRDD[124] at map at TestSpark.java:111
Rdd:MapPartitionsRDD[163] at map at TestSpark.java:111
Rdd:MapPartitionsRDD[187] at map at TestSpark.java:111
ThreadID:93 [3,ORDER,6267359,SELL,1000000.0,7.26,2016-01-29T16:36:44.232+03:00,HIGH,Y,_NA_,LMT: Order in Hand,BOAML_AP,null,7.18,7.19]
Rdd:MapPartitionsRDD[220] at map at TestSpark.java:111
用System.out.println替换println(“Rdd.collect:”+ rdd.collect());我得到了以下所以看起来在线程之前事情都很好。也许我应该按顺序执行它,让集群并行执行过滤器。
Rdd.collect:[{"id":0,"type":"ORDER","key":"6267359","key2":"BP3R8Z3","key3":null,"side":"SELL","shares":1000000.0,"price":7.26,"time":"2016-01-29T16:36:44.232+03:00","rank":"HIGH","flag":"Y","flag2":"_NA_","comment":"LMT: Order in Hand","others":"","others2":"","source":"BOAML_AP","status":null,"price1":7.18,"price2":7.19}, {"id":1,"type":"ORDER","key":"6558484","key2":"BP3R8Z3","key3":null,"side":"SELL","shares":1000000.0,"price":7.26,"time":"2016-01-29T16:36:44.232+03:00","rank":"HIGH","flag":"Y","flag2":"_NA_","comment":"LMT: Order in Hand","others":"","others2":"","source":"BOAML_AP","status":null,"price1":7.18,"price2":7.19}]
Rdd.collect:[{"id":2,"type":"ORDER","key":"BP3R8Z3","key2":"BP3R8Z3","key3":null,"side":"SELL","shares":1000000.0,"price":7.26,"time":"2016-01-29T16:36:44.232+03:00","rank":"HIGH","flag":"Y","flag2":"_NA_","comment":"LMT: Order in Hand","others":"","others2":"","source":"BOAML_AP","status":null,"price1":7.18,"price2":7.19}]
Rdd.collect:[{"id":3,"type":"ORDER","key":"6267359","key2":"BP3R8Z3","key3":null,"side":"SELL","shares":1000000.0,"price":7.26,"time":"2016-01-29T16:36:44.232+03:00","rank":"HIGH","flag":"Y","flag2":"_NA_","comment":"LMT: Order in Hand","others":"","others2":"","source":"BOAML_AP","status":null,"price1":7.18,"price2":7.19}]
Rdd.collect:[{"id":4,"type":"ORDER","key":"6558484","key2":"BP3R8Z3","key3":null,"side":"SELL","shares":1000000.0,"price":7.26,"time":"2016-01-29T16:36:44.232+03:00","rank":"HIGH","flag":"Y","flag2":"_NA_","comment":"LMT: Order in Hand","others":"","others2":"","source":"BOAML_AP","status":null,"price1":7.18,"price2":7.19}]
Rdd.collect:[{"id":5,"type":"ORDER","key":"BP3R8Z3","key2":"BP3R8Z3","key3":null,"side":"SELL","shares":1000000.0,"price":7.26,"time":"2016-01-29T16:36:44.232+03:00","rank":"HIGH","flag":"Y","flag2":"_NA_","comment":"LMT: Order in Hand","others":"","others2":"","source":"BOAML_AP","status":null,"price1":7.18,""price2":7.19}]
Rdd.collect:[{"id":6,"type":"ORDER","key":"6267359","key2":"BP3R8Z3","key3":null,"side":"SELL","shares":1000000.0,"price":7.26,"time":"2016-01-29T16:36:44.232+03:00","rank":"HIGH","flag":"Y","flag2":"_NA_","comment":"LMT: Order in Hand","others":"","others2":"","source":"BOAML_AP","status":null,"price1":7.18,"price2":7.19}]
Rdd.collect:[{"id":7,"type":"ORDER","key":"6558484","key2":"BP3R8Z3","key3":null,"side":"SELL","shares":1000000.0,"price":7.26,"time":"2016-01-29T16:36:44.232+03:00","rank":"HIGH","flag":"Y","flag2":"_NA_","comment":"LMT: Order in Hand","others":"","others2":"","source":"BOAML_AP","status":null,"price1":7.18,"price2":7.19}]
Rdd.collect:[{"id":8,"type":"ORDER","key":"BP3R8Z3","key2":"BP3R8Z3","key3":null,"side":"SELL","shares":1000000.0,"price":7.26,"time":"2016-01-29T16:36:44.232+03:00","rank":"HIGH","flag":"Y","flag2":"_NA_","comment":"LMT: Order in Hand","others":"","others2":"","source":"BOAML_AP","status":null,"price1":7.18,"price2":7.19}]
Rdd.collect:[{"id":9,"type":"ORDER","key":"6267359","key2":"BP3R8Z3","key3":null,"side":"SELL","shares":1000000.0,"price":7.26,"time":"2016-01-29T16:36:44.232+03:00","rank":"HIGH","flag":"Y","flag2":"_NA_","comment":"LMT: Order in Hand","others":"","others2":"","source":"BOAML_AP","status":null,"price1":7.18,""price2":7.19}]
Rdd.collect:[{"id":10,"type":"ORDER","key":"6558484","key2":"BP3R8Z3","key3":null,"side":"SELL","shares":1000000.0,"price":7.26,"time":"2016-01-29T16:36:44.232+03:00","rank":"HIGH","flag":"Y","flag2":"_NA_","comment":"LMT: Order in Hand","others":"","others2":"","source":"BOAML_AP","status":null,"price1":7.18,"price2":7.19}]
ThreadID:111 [1,ORDER,6558484,SELL,1000000.0,7.26,2016-01-29T16:36:44.232+03:00,HIGH,Y,_NA_,LMT: Order in Hand,BOAML_AP,null,7.18,7.19]
ThreadID:75 [0,ORDER,6267359,SELL,1000000.0,7.26,2016-01-29T16:36:44.232+03:00,HIGH,Y,_NA_,LMT: Order in Hand,BOAML_AP,null,7.18,7.19]
ThreadID:112 [0,ORDER,6267359,SELL,1000000.0,7.26,2016-01-29T16:36:44.232+03:00,HIGH,Y,_NA_,LMT: Order in Hand,BOAML_AP,null,7.18,7.19]
Rdd.collect:[{"id":11,"type":"ORDER","key":"BP3R8Z3","key2":"BP3R8Z3","key3":null,"side":"SELL","shares":1000000.0,"price":7.26,"time":"2016-01-29T16:36:44.232+03:00","rank":"HIGH","flag":"Y","flag2":"_NA_","comment":"LMT: Order in Hand","others":"","others2":"","source":"BOAML_AP","status":null,"price1":7.18,"price2":7.19}}]
Rdd.collect:[{"id":12,"type":"ORDER","key":"6267359","key2":"BP3R8Z3","key3":null,"side":"SELL","shares":1000000.0,"price":7.26,"time":"2016-01-29T16:36:44.232+03:00","rank":"HIGH","flag":"Y","flag2":"_NA_","comment":"LMT: Order in Hand","others":"","others2":"","source":"BOAML_AP","status":null,"price1":7.18,"price2":7.19}}]
Rdd.collect:[{"id":13,"type":"ORDER","key":"6558484","key2":"BP3R8Z3","key3":null,"side":"SELL","shares":1000000.0,"price":7.26,"time":"2016-01-29T16:36:44.232+03:00","rank":"HIGH","flag":"Y","flag2":"_NA_","comment":"LMT: Order in Hand","others":"","others2":"","source":"BOAML_AP","status":null,"price1":7.18,"price2":7.19}}]
Rdd.collect:[{"id":14,"type":"ORDER","key":"BP3R8Z3","key2":"BP3R8Z3","key3":null,"side":"SELL","shares":1000000.0,"price":7.26,"time":"2016-01-29T16:36:44.232+03:00","rank":"HIGH","flag":"Y","flag2":"_NA_","comment":"LMT: Order in Hand","others":"","others2":"","source":"BOAML_AP","status":null,"price1":7.18,"price2":7.19}}]
ThreadID:111 [3,ORDER,6267359,SELL,1000000.0,7.26,2016-01-29T16:36:44.232+03:00,HIGH,Y,_NA_,LMT: Order in Hand,BOAML_AP,null,7.18,7.19]
ThreadID:112 [2,ORDER,BP3R8Z3,SELL,1000000.0,7.26,2016-01-29T16:36:44.232+03:00,HIGH,Y,_NA_,LMT: Order in Hand,BOAML_AP,null,7.18,7.19]
Rdd.collect:[{"id":15,"type":"ORDER","key":"6267359","key2":"BP3R8Z3","key3":null,"side":"SELL","shares":1000000.0,"price":7.26,"time":"2016-01-29T16:36:44.232+03:00","rank":"HIGH","flag":"Y","flag2":"_NA_","comment":"LMT: Order in Hand","others":"","others2":"","source":"BOAML_AP","status":null,"price1":7.18,"price2":7.19}}]