我正在尝试使用合并排序和插入排序从文件中对字符串进行排序。我已经完成了排序部分,但我被困在我必须删除所有重复单词的部分。简而言之,我必须打印从合并和插入排序中获取的执行时间,以便在删除重复的单词时对文件中的字符串进行排序。
FRANKENSTEIN
MARY
斯通克拉夫特
SHELLEY
LETTER
1
要
这就是txt文件的样子,但它包含超过15,000个单词。我无法找出我必须删除重复单词然后对其进行排序的部分。
提前致谢
ergeSort pb = new ergeSort();
try {
BufferedReader br = new BufferedReader(new FileReader("test.txt"));
List<String> l = new ArrayList<String>();
String line;
while ((line = br.readLine()) != null) {
l.add(line);
}
br.close();
String[] arre = l.toArray(new String[]{});
startTime = System.currentTimeMillis();
(new ergeSort()).MergeSort(arre);
removeDuplicate(arre);
for(String h : arre){
System.out.println(h);
}
endTime = System.currentTimeMillis();
long executionTime1 = endTime - startTime;
startTime = System.currentTimeMillis();
inSort(arre);
removeDuplicate(arre);
endTime = System.currentTimeMillis();
long executionTime2 = endTime - startTime;
System.out.println("The execution time of Merge sort after duplicated words are removed is: " + executionTime1 + "ms");
System.out.println("The execution time of Insertion sort after duplicated words are removed is: " + executionTime2 + " ms");
System.out.println("The number of words remaining after removal of duplicated: " + arre.length);
} catch (Exception e) {
e.printStackTrace();
System.out.println("Incorrect File");
}
}
public static class ergeSort {
public static void MergeSort(String[] arr) {
if (arr.length > 1) {
String[] firstHalf = new String[arr.length / 2];
System.arraycopy(arr, 0, firstHalf, 0, arr.length / 2);
MergeSort(firstHalf);
String[] secondHalf = new String[arr.length - arr.length / 2];
System.arraycopy(arr, arr.length / 2, secondHalf, 0, arr.length - arr.length / 2);
MergeSort(secondHalf);
merge(firstHalf, secondHalf, arr);
}
}
public static void merge(String[] arr1, String[] arr2, String[] temp) {
int a = 0;
int b = 0;
for (int i = 0; i < temp.length; i++) {
if (b >= arr2.length || (a < arr1.length && arr1[a].compareToIgnoreCase(arr2[b]) < 0)) {
temp[i] = arr1[a];
a++;
} else {
temp[i] = arr2[b];
b++;
}
}
}
}
public static String[] inSort(Comparable[] arr) {
Comparable temp;
for (int i = 0; i < arr.length; i++) {
for (int j = i; j > 0; j--) {
if (arr[j].compareTo(arr[j - 1]) < 0) {
temp = arr[j];
arr[j] = arr[j - 1];
arr[j - 1] = temp;
}
}
}
return (String[]) arr;
}
public static void removeDuplicate(String[] words) {
{
List<String> q = new ArrayList<String>();
for (int i = 0; i < words.length; i++) {
if (words[i] != null) {
for (int j = i + 1; j < words.length; j++) //Inner loop for Comparison
{
if (words[i].compareToIgnoreCase(words[j])) //Checking for both strings are equal
{
q.add(words[i]);
}
}
}
}
}
}
}
答案 0 :(得分:0)
将List替换为Set,然后获取没有重复字符串的String数组 Set是一个不包含重复元素的集合。
ergeSort pb = new ergeSort();
try {
BufferedReader br = new BufferedReader(new FileReader("test.txt"));
Set<String> s = new HashSet<String>();
String line;
while ((line = br.readLine()) != null) {
s.add(line);
}
br.close();
String[] arre = s.toArray(new String[]{});
//...
}
// ...