从文件中获取数据 - 缓冲读卡器

时间:2016-05-16 20:33:38

标签: java twitter bufferedreader filereader

我只是在文本文件中有每个推文的id,内容和标签以及时间,我不知道如何在推文列表中存储信息, 我按如下方式创建了一个推文类:

            public class Tweet {
private String type;
private String origin;  
private String tweetText;
private String url;
private String tweetID;
private String tweetDate;
private int retCount;
private String favourit;
private String mEntities;
private String hashtags;

public Tweet(String tweetID,String origin) {
    this.tweetID = tweetID;
    this.origin = origin;
}

public Tweet(String type, String origin, String tweetText, String url, String tweetID, String tweetDate, int retCount, String favourit, String mEntities, String hashtags) {
    this.type = type;
    this.origin = origin;
    this.tweetText = tweetText;
    this.url = url;
    this.tweetID = tweetID;
    this.tweetDate = tweetDate;
    this.retCount = retCount;
    this.favourit = favourit;
    this.mEntities = mEntities;
    this.hashtags = hashtags;
}




public String getType() {
    return type;
}

public void setType(String type) {
    this.type = type;
}

public String getOrigin() {
    return origin;
}

public void setOrigin(String origin) {
    this.origin = origin;
}

public String getTweetText() {
    return tweetText;
}

public void setTweetText(String tweetText) {
    this.tweetText = tweetText;
}

public String getUrl() {
    return url;
}

public void setUrl(String url) {
    this.url = url;
}

public String getTweetID() {
    return tweetID;
}

public void setTweetID(String tweetID) {
    this.tweetID = tweetID;
}

public String getTweetDate() {
    return tweetDate;
}

public void setTweetDate(String tweetDate) {
    this.tweetDate = tweetDate;
}

public int getRetCount() {
    return retCount;
}

public void setRetCount(int retCount) {
    this.retCount = retCount;
}

public String getFavourit() {
    return favourit;
}

public void setFavourit(String favourit) {
    this.favourit = favourit;
}

public String getmEntities() {
    return mEntities;
}

public void setmEntities(String mEntities) {
    this.mEntities = mEntities;
}

public String getHashtags() {
    return hashtags;
}

public void setHashtags(String hashtags) {
    this.hashtags = hashtags;
}

我的数据文件格式如下:

             ***
             ***
             Type:status
             Origin: Here's link to listen live to our discussion of  #debtceiling #politics : 
             Text: Here's link to listen live to our discussion of   : 
             URL: 
             ID: 96944336150867968
             Time: Fri Jul 29 09:05:05 CDT 2011
             RetCount: 0
             Favorite: false
             MentionedEntities: 
             Hashtags:  debtceiling politics
             ***
             ***
             Type:status
             Origin: Now we're talking #debtceiling w/ Dick Polman @NewsWorksWHYY @PhillyInquirer & Bill Galston @BrookingsInst @NoLabelsOrg 
            Text: Now we're talking  w/ Dick Polman   & Bill Galston   
            URL: 
            ID: 96943803600089088
            Time: Fri Jul 29 09:02:58 CDT 2011
            RetCount: 1
            Favorite: false
            MentionedEntities: 136337303 151106990 14495726 15161791 
            Hashtags:  debtceiling
            ***
            ***

我想将此文件和库存信息读入列表,我从这段代码开始,但我不知道如何解决这个问题

            public static List<String> readTweets(File file) throws IOException {
    List<String> tweets = new ArrayList<String>();
    //logger.info("Read tweets from {}", file.getAbsolutePath());
    BufferedReader reader = new BufferedReader(new FileReader(file));
    String line;
    String[] fields;
    while ((line = reader.readLine()) != null) {
        fields = line.split(",");
        if (fields.length > 1)
            tweets.add(fields[1]);
    }
    return tweets;
}

2 个答案:

答案 0 :(得分:1)

根据您正在尝试的代码的外观,这就是我要做的事情:

public static List<String> readTweets(File file) throws IOException {
    List<String> tweets = new ArrayList<String>();
    List<String> lines = Files.readAllLines(file.toPath());
    for(int i = 0; i < lines.length(); i++){
        String line = lines.get(i);
        String[] part = line.split(",");
        if(part.length < 1) tweets.add(part[i]);
   }
}

但是,如果我编写了一个纯粹用于将推文内容打印到控制台的应用程序,那么我就是这样做的:

TweetReader.java

package Testers;

import java.io.BufferedWriter;
import java.io.File;
import java.io.FileWriter;
import java.io.IOException;
import java.nio.file.Files;
import java.util.ArrayList;
import java.util.List;

public class TweetReader {

    public static List<Tweet> readTweets(File file) throws IOException {
        boolean processEnd = false;
        String type = "";
        String origin = "";
        String tweetText = "";
        String url = "";
        String tweetID = "";
        String tweetDate = "";
        int retCount = 0;
        String favourite = "";
        String mEntities = "";
        String hashTags = "";
        List<Tweet> tweets = new ArrayList<Tweet>();
        List<String> lines = Files.readAllLines(file.toPath());
        for(int i = 0; i < lines.size(); i++){
            String line = lines.get(i);
            line = line.trim();
            if(line.equals("***")){
                if(processEnd){
                    Tweet tweet = new Tweet(type, origin, tweetText, url, tweetID, tweetDate, retCount, favourite, mEntities, hashTags);
                    tweets.add(tweet);
                    processEnd = false;
                }else processEnd = true;
            }else{
                if(line.contains(":")){
                    String header = line.substring(0, line.indexOf(":"));
                    //System.out.println(header); //You can uncomment this for troubleshooting
                    if(header.equals("Type")) type = line.substring(line.length() > 5 ? 5 : line.length());
                    else if(header.equals("Origin")) origin = line.substring(line.length() > 8 ? 8 : line.length());
                    else if(header.equals("Text")) tweetText = line.substring(line.length() > 6 ? 6 : line.length());
                    else if(header.equals("URL")) url = line.substring(line.length() > 5 ? 5 : line.length());
                    else if(header.equals("ID")) tweetID = line.substring(line.length() > 4 ? 4 : line.length());
                    else if(header.equals("Time")) tweetDate = line.substring(line.length() > 6 ? 6 : line.length());
                    else if(header.equals("RetCount")) retCount = Integer.parseInt(line.substring(line.length() > 10 ? 10 : line.length()));
                    else if(header.equals("Favorite")) favourite = line.substring(line.length() > 11 ? 11 : line.length());
                    else if(header.equals("MentionedEntities")) mEntities = line.substring(line.length() > 19 ? 19 : line.length());
                    else if(header.equals("Hashtags")) hashTags = line.substring(line.length() > 10 ? 10 : line.length());
                    else throw new IOException("Line cannot be identified as part of a tweet:" + line);
                }else throw new IOException("Line cannot be processed:" + line);
            }
       }
        return tweets;
    }

    public static void main(String[] args){
        File log = new File("log.txt");
        List<Tweet> tweets = new ArrayList<Tweet>();
        try {
            File f = new File(".").getAbsoluteFile();
            File[] array = f.listFiles();
            for(int i = 0; i < array.length; i++){
                File tweet = array[i];  
                if(tweet.isFile() && !tweet.getName().contains("log.txt") && !tweet.getName().contains(".jar")){
                    log("Reading file: " + tweet.getAbsolutePath(), log);
                    List<Tweet> tweetlist = readTweets(tweet);
                    tweets.addAll(tweetlist);
                }   
            }
            System.out.println("Reading tweets now");
            for(int i = 0; i < tweets.size(); i++){
                Tweet t = tweets.get(i);
                log("Type = " + t.getType(), log);
                log("Origin = " + t.getOrigin(), log);
                log("Text = " + t.getTweetText(), log);
                log("URL = " + t.getURL(), log);
                log("ID = " + t.getTweetID(), log);
                log("Date = " + t.getTweetDate(), log);
                log("Ret count = " + t.getRetCount(), log);
                log("Favourite = " + t.getFavourite(), log);
                log("Mentioned entities = " + t.getMentionedEntities(), log);
                log("Hashtags = " + t.getHashtags(), log);
                log("Tweet finished", log);
            }
        } catch (IOException e) {
            log(e, log);
        } 
        log("Finished reading tweets.", log);
    }

    private static void log(IOException e, File log) {
        log(e.getMessage(), log);
        StackTraceElement[] array = e.getStackTrace();
        for(int i = 0; i < array.length; i++){
            log("        " + array[i], log);
        }
    }

    private static void log(String string, File log) {
        try {
            BufferedWriter writer = new BufferedWriter(new FileWriter(log, true));
            writer.write(string);
            writer.newLine();
            writer.close();
        } catch (IOException e) {
            e.printStackTrace();
        }
    }

}

Tweet.java

package Testers;

public class Tweet {
private String type;
private String origin;  
private String tweetText;
private String url;
private String tweetID;
private String tweetDate;
private int retCount;
private String favourit;
private String mEntities;
private String hashtags;

public Tweet(String tweetID,String origin) {
    this.tweetID = tweetID;
    this.origin = origin;
}

public Tweet(String type, String origin, String tweetText, String url, String tweetID, String tweetDate, int retCount, String favourit, String mEntities, String hashtags) {
    this.type = type;
    this.origin = origin;
    this.tweetText = tweetText;
    this.url = url;
    this.tweetID = tweetID;
    this.tweetDate = tweetDate;
    this.retCount = retCount;
    this.favourit = favourit;
    this.mEntities = mEntities;
    this.hashtags = hashtags;
}




public String getType() {
    return type;
}

public String getOrigin(){
    return origin;
}

public String getTweetText(){
    return tweetText;
}

public String getURL(){
    return url;
}

public String getTweetID(){
    return tweetID;
}

public String getTweetDate(){
    return tweetDate;
}

public int getRetCount(){
    return retCount;
}

public String getFavourite(){
    return favourit;
}

public String getMentionedEntities(){
    return mEntities;
}

public String getHashtags(){
    return hashtags;
}

}

答案 1 :(得分:0)

//global attribute
List<Tweet> tweetList = new ArrayList<>();

String line = "";
String[] fields;
while (line != null) {
   line = reader.readLine();
   line = reader.readLine();
   //these two are for ***
   for(int i = 0;i<10;i++){
      line = reader.readLine();
      tweets.add(line);
   // these are for the other data
   }
   Tweet tweet = createTweetFromList(tweets);
   tweetList.add(tweet);
}