借助自然平衡的合并排序功能即时进行数据合并

时间:2018-06-21 18:03:52

标签: java sorting

我已经实现了(外部)自然平衡双向合并排序,用于对文本文件进行排序(文本文件包含用换行符分隔的字符串)。该算法可以按预期工作,但是我要改进其中的一部分。首先,该算法分为两部分:第一部分分配输入数据,第二部分合并它。该算法的第一部分工作良好,但第二部分的实现方式不正确。这里的问题是,借助getNextStringRunLength函数,可以从两个临时文件中预先读取下一次运行的时间。

例如:

int n = getNextStringRunLength(temp_file_1)
int m = getNextStringRunLength(temp_file_2)

该算法合并数据,直到将(n + m)个值合并到长度为(n + m)的新行程中。这里的想法是在运行中合并数据,而不读取之前的游程长度。这里的主要问题是如何即时执行合并过程?

input.txt的示例
汽车
电机
自行车

约翰
卡尔

output.txt的样本
自行车

汽车
卡尔
约翰
电机

请参见下面的代码,感谢您的回答!

/**
* Sorts input text file of strings using natural merge procedure with a help of two auxiliary files.
* In the first part it uses these files to distribute input data and then merges them in the second part.
* Input data is distributed using <i>writeNextStringRun</i> method which writes next string run
* to auxiliary file. When it merges runs to a length of (m+n) it uses function<i>getNextStringRunLength</i>
* which returns length of a next string run found in auxiliary files and thus helping to merge run of length m 
* and run of length n into a single run of length (m+n).
* <p>
* Distribute and merge procedure repeat until all the data is sorted in ascending order. The algorithm
* produces a brand new output file which contains sorted data and thus retains input file unchanged.
* Since the algorithm is balanced it changes the role of input/output files after merge part ends. At the
* end of algorithm all empty auxiliary files are deleted from the local file system, the auxiliary file which
* contains sorted data becomes output file of the algorithm.
* <p>
* @param temp_files         number of auxiliary files used for data distribution and merging.
* @param working_dir        path to local directory where all the sorting takes place.
* @param main_string_file   local text file which contains all input data separated by new a line character.
* @throws IOException       if an input or output exception occurred during file operations.
*/

public static void naturalBalancedTwoWayMergeSortStrings(int temp_files, String working_dir, File main_string_file) throws IOException
{
    long data_read = 0;     
    String last_string_runs = new String[temp_files];

    int i,j;
    int n,m;
    int rw_switch = 0;
    int run_counter = 0;
    String line_1 = null;
    String line_2 = null;
    int file_write_pointer;
    long output_file_length = 0;
    String file_extension = ".txt";

    File input_files[] = new File[temp_files];
    File output_files[] = new File[temp_files];
    String input_file_name = "input_string_file_";
    String output_file_name = "output_string_file_";
    File sorted_file = new File(working_dir + "/main_string_sorted" + file_extension);

    BufferedWriter input_file_writers[] = new BufferedWriter[temp_files];
    BufferedReader input_file_readers[] = new BufferedReader[temp_files];
    BufferedReader run_length_readers[] = new BufferedReader[temp_files];
    BufferedWriter output_file_writers[] = new BufferedWriter[temp_files];
    BufferedReader main_file_reader = new BufferedReader(new FileReader(main_string_file));

    for(int p=0; p<temp_files; p++)
    {
        input_files[p] = new File(working_dir + input_file_name + (p+1) + file_extension);
        output_files[p] = new File(working_dir + output_file_name + (p+1) + file_extension);
        input_file_writers[p] = new BufferedWriter(new FileWriter(input_files[p],true));
        run_length_readers[p] = new BufferedReader(new FileReader(input_files[p]));
    }

    /* START - initial run distribution */
    long main_file_length = main_string_file.length();

    while(data_read < main_file_length)
    {
        writeNextStringRun(main_file_reader,input_file_writers[run_counter%2]);
        run_counter++;
    }

    main_file_reader.close();
    input_file_writers[0].close();
    input_file_writers[1].close();
    /* END - initial run distribution. */

    /* START - merge all runs. */
    do
    {
        for(int r=0; r<temp_files; r++)
        {
            input_file_readers[r] = new BufferedReader(new FileReader(input_files[r]));
            run_length_readers[r] = new BufferedReader(new FileReader(input_files[r]));
            output_file_writers[r] = new BufferedWriter(new FileWriter(output_files[r],true));              
        }

        /* >>> This is not the righ approach <<< */
        n = getNextStringRunLength(run_length_readers[0],0);
        m = getNextStringRunLength(run_length_readers[1],1);            

        try
        {
            line_1 = input_file_readers[0].readLine();              
        }
        catch(Exception e){}

        try
        {
            line_2 = input_file_readers[1].readLine();              
        }
        catch(Exception e){}

        file_write_pointer = 0;

        while(n > 0 || m > 0)
        {
            i = 1;
            j = 1;

            while((line_1 != null && i <= n) && (line_2 != null && j <= m))
            {                   
                if(line_1.compareTo(line_2) < 0)
                {
                    output_file_writers[file_write_pointer%2].write(line_1 + "\n");                     

                    try
                    {
                        line_1 = input_file_readers[0].readLine();                          
                    }
                    catch(Exception e){}

                    i++;
                }

                else
                {
                    output_file_writers[file_write_pointer%2].write(line_2 + "\n");                     

                    try
                    {
                        line_2 = input_file_readers[1].readLine();                                      
                    }
                    catch(Exception e){}

                    j++;                        
                }
            }

            while(line_1 != null && i <= n)
            {
                output_file_writers[file_write_pointer%2].write(line_1 + "\n");                                     

                try
                {
                    line_1 = input_file_readers[0].readLine();                                  
                }
                catch(Exception e){}

                i++;
            }

            while(line_2 != null && j <= m)
            {
                output_file_writers[file_write_pointer%2].write(line_2 + "\n");                 

                try
                {
                    line_2 = input_file_readers[1].readLine();                      
                }
                catch(Exception e){}

                j++;
            }

            file_write_pointer++;

            /* >>> This is not the righ approach <<< */
            n = getNextStringRunLength(run_length_readers[0],0);
            m = getNextStringRunLength(run_length_readers[1],1);                
        }
        /* END - merge all runs. */

        for(int k=0; k<temp_files; k++)
        {
            input_file_readers[k].close();
            run_length_readers[k].close();
            output_file_writers[k].close();
        }

        /* START - swap io files. */
        File temp_store[] = new File[temp_files];

        switch(rw_switch % 2)
        {
            case 0:
            output_file_length = output_files[0].length();
            for(int k=0; k<temp_files; k++)
            {
                input_file_writers[k] = new BufferedWriter(new FileWriter(input_files[k]));
                temp_store[k] = input_files[k];
                input_files[k] = output_files[k];
                output_files[k] = temp_store[k];
            }
            break;

            case 1:
            output_file_length = output_files[0].length();
            for(int k=0; k<temp_files; k++)
            {
                temp_store[k] = output_files[k];
                output_files[k] = input_files[k];
                input_files[k] = temp_store[k];
                output_file_writers[k] = new BufferedWriter(new FileWriter(output_files[k]));
            }
            break;
        }
        /* END - swap io files. */

        rw_switch++;
    }
    while(main_file_length > output_file_length);
    /* END - merge all runs. */             
}   

/**
* Writes next string run to auxiliary file.
*
* @param  input_file_reader a reader which reads strings from input text file.
* @param  input_writer      a writer which writes next string run to a auxiliary file.
* @throws IOException       if an input or output exception occurred during file operations.
*/
private static void writeNextStringRun(BufferedReader input_file_reader, BufferedWriter input_writer) throws IOException
{
    try
    {
        if(last_string_run != null)
        {
            input_writer.write(last_string_run + "\n");             
            last_string_run = null;
        }

        String min_value = "";
        String current_line = input_file_reader.readLine();         

        while(current_line !=  null)
           {                
            if(current_line.compareTo(min_value) >= 0)
            {
                input_writer.write(current_line + "\n");                    

                data_read += current_line.length() + 1;
                min_value = current_line;

                current_line = input_file_reader.readLine();                
            }

            else
            {
                last_string_run = current_line;
                return;
            }
        }
    }
    catch(Exception e){}
}

/**
* Returns length of next string run in auxiliary string file.
*
* @param  input_file_reader a reader which reads strings from auxiliary input file.
* @param  input_file_index  an index of auxiliary file from which it reads next string run length.
*         This parameter is used in <i>last_string_runs</i> array which contains last runs read from
*         a file with this parameter (index).
* @return next string run length found in auxiliary string file.
* @throws IOException if an input or output exception occurred during file operations.
*/

private static int getNextStringRunLength(BufferedReader input_file_reader, int input_file_index) throws IOException
{
    int run_length = 0;

    try
    {
        if(last_string_runs[input_file_index] != null)
        {
            run_length++;               
        }

        String min_value = "";
        String current_line = input_file_reader.readLine();         

        if(run_length > 0)
        {
            min_value = last_string_runs[input_file_index];
        }

        while(current_line !=  null)
           {                
            if(current_line.compareTo(min_value) >= 0)
            {
                run_length++;
                min_value = current_line;
                current_line = input_file_reader.readLine();                                    
            }

            else
            {
                last_string_runs[input_file_index] = current_line;
                return run_length;
            }
        }
    }
    catch(Exception e){}

    last_string_runs[input_file_index] = null;

    return run_length;
}

0 个答案:

没有答案