我已经设法获得推文,但只有一个。 一般来说,没有错误,也没有推文。 身份验证不是问题,如果不使用Spark,我可以获取推文。
Hadoop 2.6.0-cdh5.11.0
Java 1.8
import java.io.IOException;
import org.apache.log4j.Level;
import org.apache.log4j.Logger;
import org.apache.spark.SparkConf;
import org.apache.spark.streaming.Duration;
import org.apache.spark.streaming.api.java.JavaReceiverInputDStream;
import org.apache.spark.streaming.api.java.JavaStreamingContext;
import org.apache.spark.streaming.twitter.TwitterUtils;
import twitter4j.Status;
public class TweetStream {
private static void setCredentials(){
System.setProperty("twitter4j.oauth.consumerKey",
"");
System.setProperty("twitter4j.oauth.consumerSecret",
"");
System.setProperty("twitter4j.oauth.accessToken",
"");
System.setProperty("twitter4j.oauth.accessTokenSecret",
"");
}
public static void main(String[] args){
SparkConf sparkConf = new SparkConf().setAppName("TWS").setMaster("local[*]");
JavaStreamingContext jssc = new JavaStreamingContext(sparkConf, new Duration(10000));
setCredentials();
Logger rootLogger = Logger.getRootLogger();
rootLogger.setLevel(Level.ERROR);
JavaReceiverInputDStream<Status> stream = TwitterUtils.createStream(jssc);
stream.filter(x -> x.getGeoLocation() != null).foreachRDD(rdd -> rdd.foreach(status -> save(status)));
jssc.start();
try {
jssc.awaitTermination();
} catch (Exception e) {
e.printStackTrace();
}
}
private static void save(Status status) throws IOException {
System.out.println(status);
}
}
控制台:
Using Spark's default log4j profile: org/apache/spark/log4j-defaults.properties
17/06/24 10:26:59 INFO SparkContext: Running Spark version 2.1.0
17/06/24 10:27:00 WARN NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable
17/06/24 10:27:00 WARN Utils: Your hostname, quickstart.cloudera resolves to a loopback address: 127.0.0.1; using 10.0.2.15 instead (on interface eth1)
17/06/24 10:27:00 WARN Utils: Set SPARK_LOCAL_IP if you need to bind to another address
17/06/24 10:27:00 INFO SecurityManager: Changing view acls to: cloudera
17/06/24 10:27:00 INFO SecurityManager: Changing modify acls to: cloudera
17/06/24 10:27:00 INFO SecurityManager: Changing view acls groups to:
17/06/24 10:27:00 INFO SecurityManager: Changing modify acls groups to:
17/06/24 10:27:00 INFO SecurityManager: SecurityManager: authentication disabled; ui acls disabled; users with view permissions: Set(cloudera); groups with view permissions: Set(); users with modify permissions: Set(cloudera); groups with modify permissions: Set()
17/06/24 10:27:00 INFO Utils: Successfully started service 'sparkDriver' on port 39180.
17/06/24 10:27:00 INFO SparkEnv: Registering MapOutputTracker
17/06/24 10:27:00 INFO SparkEnv: Registering BlockManagerMaster
17/06/24 10:27:00 INFO BlockManagerMasterEndpoint: Using org.apache.spark.storage.DefaultTopologyMapper for getting topology information
17/06/24 10:27:00 INFO BlockManagerMasterEndpoint: BlockManagerMasterEndpoint up
17/06/24 10:27:00 INFO DiskBlockManager: Created local directory at /tmp/blockmgr-b42e754c-3bfb-4997-aed1-22db5d7296f6
17/06/24 10:27:00 INFO MemoryStore: MemoryStore started with capacity 669.1 MB
17/06/24 10:27:00 INFO SparkEnv: Registering OutputCommitCoordinator
17/06/24 10:27:01 INFO Utils: Successfully started service 'SparkUI' on port 4040.
17/06/24 10:27:01 INFO SparkUI: Bound SparkUI to 0.0.0.0, and started at http://10.0.2.15:4040
17/06/24 10:27:01 INFO Executor: Starting executor ID driver on host localhost
17/06/24 10:27:01 INFO Utils: Successfully started service 'org.apache.spark.network.netty.NettyBlockTransferService' on port 36428.
17/06/24 10:27:01 INFO NettyBlockTransferService: Server created on 10.0.2.15:36428
17/06/24 10:27:01 INFO BlockManager: Using org.apache.spark.storage.RandomBlockReplicationPolicy for block replication policy
17/06/24 10:27:01 INFO BlockManagerMaster: Registering BlockManager BlockManagerId(driver, 10.0.2.15, 36428, None)
17/06/24 10:27:01 INFO BlockManagerMasterEndpoint: Registering block manager 10.0.2.15:36428 with 669.1 MB RAM, BlockManagerId(driver, 10.0.2.15, 36428, None)
17/06/24 10:27:01 INFO BlockManagerMaster: Registered BlockManager BlockManagerId(driver, 10.0.2.15, 36428, None)
17/06/24 10:27:01 INFO BlockManager: Initialized BlockManager: BlockManagerId(driver, 10.0.2.15, 36428, None)
的pom.xml
<dependencies>
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-core_2.11</artifactId>
<version>2.1.0</version>
</dependency>
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-streaming_2.11</artifactId>
<version>2.1.0</version>
</dependency>
<dependency>
<groupId>org.apache.bahir</groupId>
<artifactId>spark-streaming-twitter_2.11</artifactId>
<version>2.1.0</version>
</dependency>
<dependency>
<groupId>org.twitter4j</groupId>
<artifactId>twitter4j-core</artifactId>
<version>4.0.6</version>
</dependency>
</dependencies>