在Java中阅读和写作

时间:2016-11-29 22:07:20

标签: java csv

我想将书中的每个单词(URL)与字典(Data.csv)中的单词进行比较,这样如果字典中不存在单词,就会将其添加到单词中。

1 个答案:

答案 0 :(得分:0)

您可以阅读csv文件和文本文件,并将数据存储在一个集合中。集合的优点是您不必关心重复项。然后,您只需将文本文件中的数据添加到字典中的数据集即可。这样您就可以避免自己进行比较。示例:

import java.io.BufferedReader;
import java.io.FileNotFoundException;
import java.io.FileReader;
import java.io.FileWriter;
import java.io.IOException;
import java.io.InputStreamReader;
import java.net.MalformedURLException;
import java.net.URL;
import java.nio.charset.StandardCharsets;
import java.util.HashSet;
import java.util.Set;
import java.util.TreeSet;

public class Main{

    public static void main(String[] args) throws FileNotFoundException, MalformedURLException, IOException {
        BufferedReader br = new BufferedReader(new FileReader("Data.csv"));
        String line;
        String splitBy = ",";
        Set<String> dataFromDictionary = new HashSet<>(); 

        while((line = br.readLine()) != null){
             String[] b = line.split(splitBy);
             for(String s : b){
              dataFromDictionary.add(s.toLowerCase()); 
             }
        }
        br.close();

        URL book = new URL("http://manybooks.net/send/1:text:.txt:text/topeliusz2724927249-8/topeliusz2724927249-8.txt)");
        BufferedReader in = new BufferedReader(new InputStreamReader(book.openStream(),StandardCharsets.ISO_8859_1)); // added Charsets so that å, ä ... are correct encoded

        Set<String> dataFrombook = new HashSet<>();
        String inputLine;
        int lineNumber = 1;
        while ((inputLine = in.readLine()) != null){
            lineNumber++;

            if(lineNumber > 54 && lineNumber < 1313){   // just jumped the copyright stuff
                System.out.println(inputLine);
                String[] words = inputLine.split("[\\s\\p{Punct}]+"); // split the line at blank space or punctuations like . , ; ! etc. to get only the words
                for(String s : words){
                    if(s.matches("\\p{L}+")){     // check if the string contains only letters // not to add numbers to your dictionary                   
                        dataFrombook.add(s.toLowerCase());  // change all to lowercase or uppercase because otherwise "Apple" will be treated as different word as "apple"
                    }                    
                }
            }
        }
        in.close();

        String csvFile = "newData.csv"; // write to new file or overwrite Data.csv
        FileWriter writer = new FileWriter(csvFile);

        dataFromDictionary.addAll(dataFrombook);
        Set<String> sorted = new TreeSet<>(dataFromDictionary);
        for(String s : sorted){           
            writer.write(s);  
            writer.write("\n"); // no idea how your csv file is structured; this will write each word to a new line; modify if necessary
        }
        writer.flush();
        writer.close();
    }
}