无法从txt文件中读取单词并计算单词数

时间:2015-10-16 12:25:15

标签: java string twitter

我有一个小型项目来编写Twitter抓取工具,我在分析收集的推文时遇到了一些问题。

收集的推文放入txt文件中。我想要实现的是计算txt文件中有多少单词,包含单词' engineering'和主题标签的数量。以下是我到目前为止所尝试的内容,

import java.io.*;
import java.util.StringTokenizer;

public class TwitterAnalyzer {

public static void main(String args[]){
    try{

        String keyword = "Engineering";
        FileInputStream fInstream = new FileInputStream("C:\\Users\\Alan\\Documents\\NetBeansProjects\\TwitterCrawler\\"+keyword+"-data.txt");
        DataInputStream in = new DataInputStream(fInstream);
        BufferedReader br = new BufferedReader(new InputStreamReader(in));
        String strLine;


        int numberOfKeywords = 0;
        int numberOfWords = 0;
        int numberOfHashtags = 0;

        while((strLine = br.readLine()) != null){

            strLine = br.readLine();
            System.out.println(strLine);
            StringTokenizer st = new StringTokenizer(strLine, " \t\n\r\f.,;:!?\"");
            while(st.hasMoreTokens()){
                String word = st.nextToken();
                numberOfWords++;
                if(word.contains(keyword)){
                    numberOfKeywords++;
                }
                if(word.contains("#")){
                    numberOfHashtags++;
                }
            }
        }



        System.out.println(numberOfWords);
        System.out.println(numberOfKeywords);
        System.out.println(numberOfHashtags);
        br.close();

    }catch (FileNotFoundException fe){
        fe.printStackTrace();
        System.out.println("Unable to locate file");
        System.exit(-1);
    }catch (IOException ie){
        ie.printStackTrace();
        System.out.println("Unable to read file");
        System.exit(-1);
    }        


}
}

以下是txt文件的link

在此非常感谢!

2 个答案:

答案 0 :(得分:1)

尝试这种方式会有所帮助

import java.io.BufferedReader;
import java.io.FileReader;

public class CountWords {

    public static void main (String args[]) throws Exception {

       System.out.println ("Engineering");       
       FileReader fr = new FileReader ("c:\\Customer1.txt");        
       BufferedReader br = new BufferedReader (fr);     
       String line = br.readLin ();
       int count = 0;
       while (line != null) {
          String []parts = line.split(" ");
          for( String w : parts)
          {
            count++;        
          }
          line = br.readLine();
       }         
       System.out.println(count);
    }
}

答案 1 :(得分:1)

以下代码返回:202,14,22

public static void main(String args[]){
    try{
        String keyword = "engineering";
        Pattern keywordPattern = Pattern.compile(keyword);

        Pattern hashTagPattern = Pattern.compile("#[a-zA-Z0-9_]");

        FileInputStream fInstream = new FileInputStream("E:\\t.txt");
        BufferedReader br = new BufferedReader(new InputStreamReader(fInstream));
        String strLine;


        int numberOfKeywords = 0;
        int numberOfWords = 0;
        int numberOfHashtags = 0;

        while((strLine = br.readLine()) != null){
            Matcher  matcher = keywordPattern.matcher(strLine.toLowerCase());
            while (matcher.find())
                numberOfKeywords++;
            numberOfWords += strLine.split("\\s").length;
            matcher = hashTagPattern.matcher(strLine);
            while (matcher.find())
                numberOfHashtags++;
        }

        System.out.println(numberOfWords);
        System.out.println(numberOfKeywords);
        System.out.println(numberOfHashtags);
        br.close();

    }catch (FileNotFoundException fe){
        fe.printStackTrace();
        System.out.println("Unable to locate file");
        System.exit(-1);
    }catch (IOException ie){
        ie.printStackTrace();
        System.out.println("Unable to read file");
        System.exit(-1);
    }
}