我只是在文本文件中有每个推文的id,内容和标签以及时间,我不知道如何在推文列表中存储信息, 我按如下方式创建了一个推文类:
public class Tweet {
private String type;
private String origin;
private String tweetText;
private String url;
private String tweetID;
private String tweetDate;
private int retCount;
private String favourit;
private String mEntities;
private String hashtags;
public Tweet(String tweetID,String origin) {
this.tweetID = tweetID;
this.origin = origin;
}
public Tweet(String type, String origin, String tweetText, String url, String tweetID, String tweetDate, int retCount, String favourit, String mEntities, String hashtags) {
this.type = type;
this.origin = origin;
this.tweetText = tweetText;
this.url = url;
this.tweetID = tweetID;
this.tweetDate = tweetDate;
this.retCount = retCount;
this.favourit = favourit;
this.mEntities = mEntities;
this.hashtags = hashtags;
}
public String getType() {
return type;
}
public void setType(String type) {
this.type = type;
}
public String getOrigin() {
return origin;
}
public void setOrigin(String origin) {
this.origin = origin;
}
public String getTweetText() {
return tweetText;
}
public void setTweetText(String tweetText) {
this.tweetText = tweetText;
}
public String getUrl() {
return url;
}
public void setUrl(String url) {
this.url = url;
}
public String getTweetID() {
return tweetID;
}
public void setTweetID(String tweetID) {
this.tweetID = tweetID;
}
public String getTweetDate() {
return tweetDate;
}
public void setTweetDate(String tweetDate) {
this.tweetDate = tweetDate;
}
public int getRetCount() {
return retCount;
}
public void setRetCount(int retCount) {
this.retCount = retCount;
}
public String getFavourit() {
return favourit;
}
public void setFavourit(String favourit) {
this.favourit = favourit;
}
public String getmEntities() {
return mEntities;
}
public void setmEntities(String mEntities) {
this.mEntities = mEntities;
}
public String getHashtags() {
return hashtags;
}
public void setHashtags(String hashtags) {
this.hashtags = hashtags;
}
我的数据文件格式如下:
***
***
Type:status
Origin: Here's link to listen live to our discussion of #debtceiling #politics :
Text: Here's link to listen live to our discussion of :
URL:
ID: 96944336150867968
Time: Fri Jul 29 09:05:05 CDT 2011
RetCount: 0
Favorite: false
MentionedEntities:
Hashtags: debtceiling politics
***
***
Type:status
Origin: Now we're talking #debtceiling w/ Dick Polman @NewsWorksWHYY @PhillyInquirer & Bill Galston @BrookingsInst @NoLabelsOrg
Text: Now we're talking w/ Dick Polman & Bill Galston
URL:
ID: 96943803600089088
Time: Fri Jul 29 09:02:58 CDT 2011
RetCount: 1
Favorite: false
MentionedEntities: 136337303 151106990 14495726 15161791
Hashtags: debtceiling
***
***
我想将此文件和库存信息读入列表,我从这段代码开始,但我不知道如何解决这个问题
public static List<String> readTweets(File file) throws IOException {
List<String> tweets = new ArrayList<String>();
//logger.info("Read tweets from {}", file.getAbsolutePath());
BufferedReader reader = new BufferedReader(new FileReader(file));
String line;
String[] fields;
while ((line = reader.readLine()) != null) {
fields = line.split(",");
if (fields.length > 1)
tweets.add(fields[1]);
}
return tweets;
}
答案 0 :(得分:1)
根据您正在尝试的代码的外观,这就是我要做的事情:
public static List<String> readTweets(File file) throws IOException {
List<String> tweets = new ArrayList<String>();
List<String> lines = Files.readAllLines(file.toPath());
for(int i = 0; i < lines.length(); i++){
String line = lines.get(i);
String[] part = line.split(",");
if(part.length < 1) tweets.add(part[i]);
}
}
但是,如果我编写了一个纯粹用于将推文内容打印到控制台的应用程序,那么我就是这样做的:
TweetReader.java
package Testers;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileWriter;
import java.io.IOException;
import java.nio.file.Files;
import java.util.ArrayList;
import java.util.List;
public class TweetReader {
public static List<Tweet> readTweets(File file) throws IOException {
boolean processEnd = false;
String type = "";
String origin = "";
String tweetText = "";
String url = "";
String tweetID = "";
String tweetDate = "";
int retCount = 0;
String favourite = "";
String mEntities = "";
String hashTags = "";
List<Tweet> tweets = new ArrayList<Tweet>();
List<String> lines = Files.readAllLines(file.toPath());
for(int i = 0; i < lines.size(); i++){
String line = lines.get(i);
line = line.trim();
if(line.equals("***")){
if(processEnd){
Tweet tweet = new Tweet(type, origin, tweetText, url, tweetID, tweetDate, retCount, favourite, mEntities, hashTags);
tweets.add(tweet);
processEnd = false;
}else processEnd = true;
}else{
if(line.contains(":")){
String header = line.substring(0, line.indexOf(":"));
//System.out.println(header); //You can uncomment this for troubleshooting
if(header.equals("Type")) type = line.substring(line.length() > 5 ? 5 : line.length());
else if(header.equals("Origin")) origin = line.substring(line.length() > 8 ? 8 : line.length());
else if(header.equals("Text")) tweetText = line.substring(line.length() > 6 ? 6 : line.length());
else if(header.equals("URL")) url = line.substring(line.length() > 5 ? 5 : line.length());
else if(header.equals("ID")) tweetID = line.substring(line.length() > 4 ? 4 : line.length());
else if(header.equals("Time")) tweetDate = line.substring(line.length() > 6 ? 6 : line.length());
else if(header.equals("RetCount")) retCount = Integer.parseInt(line.substring(line.length() > 10 ? 10 : line.length()));
else if(header.equals("Favorite")) favourite = line.substring(line.length() > 11 ? 11 : line.length());
else if(header.equals("MentionedEntities")) mEntities = line.substring(line.length() > 19 ? 19 : line.length());
else if(header.equals("Hashtags")) hashTags = line.substring(line.length() > 10 ? 10 : line.length());
else throw new IOException("Line cannot be identified as part of a tweet:" + line);
}else throw new IOException("Line cannot be processed:" + line);
}
}
return tweets;
}
public static void main(String[] args){
File log = new File("log.txt");
List<Tweet> tweets = new ArrayList<Tweet>();
try {
File f = new File(".").getAbsoluteFile();
File[] array = f.listFiles();
for(int i = 0; i < array.length; i++){
File tweet = array[i];
if(tweet.isFile() && !tweet.getName().contains("log.txt") && !tweet.getName().contains(".jar")){
log("Reading file: " + tweet.getAbsolutePath(), log);
List<Tweet> tweetlist = readTweets(tweet);
tweets.addAll(tweetlist);
}
}
System.out.println("Reading tweets now");
for(int i = 0; i < tweets.size(); i++){
Tweet t = tweets.get(i);
log("Type = " + t.getType(), log);
log("Origin = " + t.getOrigin(), log);
log("Text = " + t.getTweetText(), log);
log("URL = " + t.getURL(), log);
log("ID = " + t.getTweetID(), log);
log("Date = " + t.getTweetDate(), log);
log("Ret count = " + t.getRetCount(), log);
log("Favourite = " + t.getFavourite(), log);
log("Mentioned entities = " + t.getMentionedEntities(), log);
log("Hashtags = " + t.getHashtags(), log);
log("Tweet finished", log);
}
} catch (IOException e) {
log(e, log);
}
log("Finished reading tweets.", log);
}
private static void log(IOException e, File log) {
log(e.getMessage(), log);
StackTraceElement[] array = e.getStackTrace();
for(int i = 0; i < array.length; i++){
log(" " + array[i], log);
}
}
private static void log(String string, File log) {
try {
BufferedWriter writer = new BufferedWriter(new FileWriter(log, true));
writer.write(string);
writer.newLine();
writer.close();
} catch (IOException e) {
e.printStackTrace();
}
}
}
Tweet.java
package Testers;
public class Tweet {
private String type;
private String origin;
private String tweetText;
private String url;
private String tweetID;
private String tweetDate;
private int retCount;
private String favourit;
private String mEntities;
private String hashtags;
public Tweet(String tweetID,String origin) {
this.tweetID = tweetID;
this.origin = origin;
}
public Tweet(String type, String origin, String tweetText, String url, String tweetID, String tweetDate, int retCount, String favourit, String mEntities, String hashtags) {
this.type = type;
this.origin = origin;
this.tweetText = tweetText;
this.url = url;
this.tweetID = tweetID;
this.tweetDate = tweetDate;
this.retCount = retCount;
this.favourit = favourit;
this.mEntities = mEntities;
this.hashtags = hashtags;
}
public String getType() {
return type;
}
public String getOrigin(){
return origin;
}
public String getTweetText(){
return tweetText;
}
public String getURL(){
return url;
}
public String getTweetID(){
return tweetID;
}
public String getTweetDate(){
return tweetDate;
}
public int getRetCount(){
return retCount;
}
public String getFavourite(){
return favourit;
}
public String getMentionedEntities(){
return mEntities;
}
public String getHashtags(){
return hashtags;
}
}
答案 1 :(得分:0)
//global attribute
List<Tweet> tweetList = new ArrayList<>();
String line = "";
String[] fields;
while (line != null) {
line = reader.readLine();
line = reader.readLine();
//these two are for ***
for(int i = 0;i<10;i++){
line = reader.readLine();
tweets.add(line);
// these are for the other data
}
Tweet tweet = createTweetFromList(tweets);
tweetList.add(tweet);
}