我在Java的哈希表的帮助下制作了一个非常大的文本的一致性。在了解了它的工作原理后,我发现为了防止数据损坏,需要一个足够好的哈希函数。 Java中哈希表的实现是否足够好呢?这些数据包含数百万字。
修改
ArrayList<Integer> indexArray = new ArrayList<Integer>();
BufferedReader bf = new BufferedReader(new FileReader(filename));
HashMap<String, ArrayList<Integer>> hmap = new HashMap<String, ArrayList<Integer>>();
String line = bf.readLine();
input = line.split(" ");
// Extract first word and it's index
String word = input[0];
String lastWord = word;
int index = Integer.valueOf(input[1]);
// Time the execution of storing all data in hashmap
double time = (double) System.currentTimeMillis();
while((line = bf.readLine()) != null) {
// Split line into words and store in variables
input = line.split(" ");
word = input[0];
index = Integer.valueOf(input[1]);
// If same word is encountered then add it's index to indexArray
if (input[0].equals(lastWord)) {
indexArray.add(index);
// If different word is encountered then add the last word and
// it's index array to hashmap, finally reset relevant variables
} else {
hmap.put(word, indexArray);
System.out.println(hmap.hashCode());
indexArray.clear();
indexArray.add(index); // Add last index to the reset indexArray
lastWord = word; // Set last word to newly found word
}
}
System.out.println((System.currentTimeMillis() - time)/1000);