用于搜索二进制文件的Java程序

时间:2016-03-26 09:44:05

标签: java file search binary adt

我想编写一个在文件中进行二进制搜索的程序。

我有一个具有字符串int的TreeSet ADT,我想以这种方式在文件中搜索每个字符串:首先,我想读取文件的中间页面并进行搜索。如果找到该字符串,则返回该字符串的位置;如果没有,那么我想从文件的左半部分或右半部分读取页面,具体取决于字符串的字母顺序。

我的二进制搜索类的代码是:

public class PageBinarySearch {
    final int NOT_FOUND = -1;
    final int BOUND_LIMIT = -1;
    final int EVRTHG_OK = 0;
    final int ON = 1;
    final int OFF = 0;
    private DataInputStream inFile;
    private String[] readBuffer; //This buffer is used to read a specified page from
                              //the disk.
    private String[] auxBuffer;  //Auxiliary buffer is used for searching in the la-
                              //st page. This buffer has less than PAGE_SIZE ele-
                              //ments.
    private int lastPage, leftElemNum, lastPageNo;
    private int diskAccessMeter; //This variable is used to count disk accesses.
    private int firstNum;


/********************************************************************************/
    //Constructor of the class
    public PageBinarySearch(String filename, String key, int PageSize, int numPage, int numOfWords) throws IOException{
        System.out.println();
        System.out.println("Binary Search on disk pages");
        System.out.println("*************************************************");
        System.out.println();
        //Initializing the elements of the class.
        readBuffer = new String[PageSize];
        lastPage = numPage*PageSize;
        leftElemNum = numOfWords-(lastPage);
        auxBuffer = new String[leftElemNum];
        lastPageNo = numPage;
        diskAccessMeter = 0;
        this.setFirstNum(filename);
        basicPageBinarySearchMethod(filename, 0, lastPageNo, key,PageSize,numPage,numOfWords);
        System.out.println("Disk accesses:"+this.getDiskAccessMeter());
    }


/********************************************************************************/
    //Recursive binary search on disk pages.
    public void basicPageBinarySearchMethod(String filename, int start, int end,
                                            String key, int PageSize, int numPage, int numOfWords) throws IOException{
        inFile = new DataInputStream(new FileInputStream(filename));

        int bound = boundHandler(start, key);
        if (bound != EVRTHG_OK){return;}

        int midPage = start + (end-start)/2;
        int midPageIndex = ((midPage) * (PageSize));
        int midPageEnd = midPageIndex + PageSize;

        //System.out.println("----------------------------------------");
        System.out.println("Page:"+(midPage+1));
        System.out.println(" Index:"+midPageIndex+" End:"+midPageEnd);

        //Accessing midPage's index.
        accessPageIndex(midPageIndex - 1);

        fillBasicBuffer(PageSize);
        System.out.println();

        if (key.compareTo(readBuffer[0])<0){
            //Case that the key is in the left part.
            end = midPage - 1;
            inFile.close(); //We close the stream because in the next recursion
                            //we want to access new midPage.
            basicPageBinarySearchMethod(filename, start, end, key, PageSize, numPage, numOfWords);
        }
        else if (key.compareTo(readBuffer[255])>0){
            //Case that the key is in the left part.
            start = midPage+1;
            inFile.close();
            basicPageBinarySearchMethod(filename, start, end, key, PageSize, numPage, numOfWords);
        }
        else{
            //Case that:
            //a) key is bigger than the integer, which is in the midPage-
            //Index position of the file, and
            //b) key is less than the integer, which is in the midPageEnd
            //position of the file.
            LookingOnPage(midPage, key);
        }


    }

/********************************************************************************/  
    public int boundHandler(int start, String key) throws IOException{
        if (start == this.getLastPageNo()){
            //In this case the program has to start searching in the last page, 
            //which has less than PAGE_SIZE elements. So we call the alternative
            //function "LookingOnLastPage".
            System.out.println("Start == End");
            accessPageIndex(this.getLastPage());
            LookingOnLastPage(key);
            return BOUND_LIMIT;
        }
        //if (key < this.getFirstNum()){

            //System.out.println("Key does not exist.");
            //return BOUND_LIMIT;
        //}

        return EVRTHG_OK;
    }

/********************************************************************************/
    //This function is running a binary search in the specified disk page, which
    //is saved in the readBuffer.
    public void LookingOnPage(int pageNo, String key) throws IOException{
        int i, result;
        System.out.println("Looking for key:"+key+" on page:"+(pageNo+1));
        result = myBinarySearch(key, readBuffer);
        if (result != -1){
            System.out.println("Key found on page:"+(pageNo+1));
            inFile.close();
            return;
        }
        else{
            System.out.println("Key is not found");
            inFile.close();
            return;
        }
    }

/********************************************************************************/
    //This function is running a binary search in the last disk page, which
    //is saved in the auxBuffer.
    public void LookingOnLastPage(String key) throws IOException{
        int i, result;
        this.setDiskAccessMeter(this.getDiskAccessMeter()+1);
        System.out.println("Looking for key:"+key+" on last page:"
                                                    +(this.getLastPageNo()+1));
        for (i=0; i<this.getLeftElemNum(); i++){
            auxBuffer[i] = inFile.readUTF();
        }
        result = myBinarySearch(key, auxBuffer);
        if (result != -1){
            System.out.println("Key found on last page");
            inFile.close();
            return;
        }
        else{
            System.out.println("Key is not found");
            inFile.close();
            return;
        }
    }

/********************************************************************************/  
    public void accessPageIndex(int intNum) throws IOException
    {
        //This function is skipping intNum integers in the file, to access page's
        //index.
        int i;
        System.out.println(" Accessing page's index...");
        inFile.skipBytes(intNum*4);
        //inFile.readInt();
    }

/********************************************************************************/  
    public void fillBasicBuffer(int PageSize) throws IOException{
        //Loading readBuffer.
        int i;
        this.setDiskAccessMeter(this.getDiskAccessMeter()+1);
        for (i=0; i<PageSize; i++){
            readBuffer[i] = inFile.readUTF();
        }
    }

/********************************************************************************/  
    //This function implements binary search on a given buffer. 
    public int myBinarySearch(String key, String[] auxBuffer) {
        int lo = 0;
        int hi = auxBuffer.length - 1;
        while (lo <= hi) {
            // Key is in a[lo..hi] or not present.
            int mid = lo + (hi - lo) / 2;
            if      (key.compareTo(auxBuffer[mid])<0) hi = mid - 1;
            else if (key.compareTo(auxBuffer[mid])>0) lo = mid + 1;
            else return mid;
        }
        return NOT_FOUND;
    }

/********************************************************************************/
    public int getLastPage() {
        return lastPage;
    }
    public void setLastPage(int lastPage) {
        this.lastPage = lastPage;
    }

    public int getLeftElemNum() {
        return leftElemNum;
    }
    public void setLeftElemNum(int leftElemNum) {
        this.leftElemNum = leftElemNum;
    }

    public int getLastPageNo() {
        return lastPageNo;
    }
    public void setLastPageNo(int lastPageNo) {
        this.lastPageNo = lastPageNo;
    }

    public int getDiskAccessMeter() {
        return diskAccessMeter;
    }
    public void setDiskAccessMeter(int diskAccessMeter) {
        this.diskAccessMeter = diskAccessMeter;
    }


    public int getFirstNum() {
        return firstNum;
    }


    public void setFirstNum(String filename) throws IOException {
        inFile = new DataInputStream(new FileInputStream(filename));
        this.firstNum = inFile.readInt();
        inFile.close();
    }

}

我的主要是:

public class MyMain {

    private static final int DataPageSize = 128; // Default Data Page size

    public static void main(String[] args) throws IOException {
        TreeSet<DictPage> listOfWords = new TreeSet<DictPage>(new MyDictPageComp());
        LinkedList<Page> Eurethrio = new LinkedList<Page>();

        File file = new File("C:\\Kennedy.txt");
        BufferedReader br = new BufferedReader(new InputStreamReader(new FileInputStream(file)));
        //This will reference one line at a time...
        String line = null;
        int line_count=0; //Metavliti gia na metrame grammes ..
        int byte_count; //Metavliti gia na metrame bytes...
        int total_byte_count=0; //Metavliti gia synoliko arithmo bytes ...
        int fromIndex;

        int middleP;
        int kat = 0;
        while( (line = br.readLine())!= null ){
            line_count++;
            fromIndex=0;
            String [] tokens = line.split(",\\s+|\\s*\\\"\\s*|\\s+|\\.\\s*|\\s*\\:\\s*");
            String line_rest=line;
            for (int i=1; i <= tokens.length; i++) {
                byte_count = line_rest.indexOf(tokens[i-1]);
                fromIndex = fromIndex + byte_count + 1 + tokens[i-1].length();
                if (fromIndex < line.length())
                    line_rest = line.substring(fromIndex);
                listOfWords.add(new DictPage(tokens[i-1],kat));
                kat++;
                Eurethrio.add(new Page("Kennedy",fromIndex));
                }
                total_byte_count += fromIndex;
                Eurethrio.add(new Page("Kennedy", total_byte_count));
        }

        //for(DictPage p : listOfWords){
            //System.out.println(p.getWord() + " " + p.getPage());
        //}

        //for (int i = 0;i<Eurethrio.size();i++){
            //System.out.println(""+Eurethrio.get(i).getFile()+" "+Eurethrio.get(i).getBytes());
        //}

        ByteArrayOutputStream bos = new ByteArrayOutputStream(128) ;
        DataOutputStream out = new DataOutputStream(bos);
        String s ;
        int integ;
        byte[] buf ;
        int nPag=1; //Aritmos selidwn
        int numOfWords=0;
        for (DictPage p : listOfWords){
            s = p.getWord();
            integ = p.getPage();
            numOfWords++;

            byte dst[] = new byte[20];
            byte[] src = s.getBytes(); //metatrepei se bytes to string
            System.arraycopy(src, 0, dst, 1, src.length); //to antigrafei ston buffer dst
            out.write(dst, 0, 20);  // to grafei sto arxeio

            out.writeInt(integ);  // grafei ton akeraio sto file

            out.close();

            buf = bos.toByteArray(); // Creates a newly allocated byte array.
            System.out.println("\nbuf size: " + buf.length + " bytes");
             //dhmiourgia selidas (h opoia einai enas byte array)
            if(buf.length> nPag*DataPageSize){
                nPag++;
            }
            byte[] DataPage = new byte[nPag*DataPageSize];
            System.arraycopy( buf, 0, DataPage, 0, buf.length); // antigrafw buf sth selida
            System.out.println("TARRARA"+DataPage.length);
            bos.close();//kleinw buf

            // write to the file
            RandomAccessFile MyFile = new RandomAccessFile ("newbabis", "rw");
            MyFile.seek(0);
            MyFile.write(DataPage);
        }
        System.out.println("Number of Pages :"+nPag);
       if (nPag%2 == 0){
           middleP = nPag/2;
       }
       else{
           middleP = (nPag+1)/2;
       }
       System.out.println("Middle page is no:"+middleP);

       PageBinarySearch BinarySearch;
    String key;
    for(DictPage p : listOfWords){
        key = p.getWord();
        BinarySearch = new PageBinarySearch("C:\\Kennedy.txt", key , DataPageSize, nPag, numOfWords);
    }    
    }
}

我的程序运行不正常,我找不到我做错了什么。

1 个答案:

答案 0 :(得分:1)

使用缓冲的输入流而不是数据输入流 并在源代码中调整它的用法

因为数据输入流的readUTF方法将达到 如果没有数据写入文件,则立即结束文件 数据输出流方法writeUTF或类似。

注意:这不会修复你的程序。它 因为你的递归会导致堆栈溢出 二分搜索方法不会返回

private BufferedInputStream inFile;

public void basicPageBinarySearchMethod(String filename, int start, int end,
                                        String key, int PageSize, int numPage, int numOfWords) throws IOException{
    inFile = new BufferedInputStream(new FileInputStream(filename));
    ...
}

public void accessPageIndex(int intNum) throws IOException
{
    //This function is skipping intNum integers in the file, to access page's
    //index.
    int i;
    System.out.println(" Accessing page's index...");
    inFile.skip(intNum*4);
    //inFile.readInt();
}
 public void LookingOnLastPage(String key) throws IOException{
    int i, result;
    this.setDiskAccessMeter(this.getDiskAccessMeter()+1);
    System.out.println("Looking for key:"+key+" on last page:"
                                                +(this.getLastPageNo()+1));
    for (i=0; i<this.getLeftElemNum(); i++){
        auxBuffer[i] = String.valueOf((char)inFile.read());
    }
    ...
}
public void fillBasicBuffer(int PageSize) throws IOException{
    //Loading readBuffer.
    int i;
    this.setDiskAccessMeter(this.getDiskAccessMeter()+1);
    for (i=0; i < PageSize; i++){
        char c = (char)inFile.read();
        readBuffer[i] = String.valueOf(c);
    }
}

 public void setFirstNum(String filename) throws IOException {
    inFile = new BufferedInputStream(new FileInputStream(filename));
    this.firstNum = inFile.read();
    inFile.close();
}