如何使用apache lucene,apache pdf框搜索pdf中的确切短语

时间:2013-01-31 11:33:18

标签: java swing pdf

  

可能重复:
  How to get the pdfpage content

我正在使用pdf渲染器查看pdf和pdfbox以转换为文本以执行搜索功能。我能够搜索单个单词,但是当我将搜索词作为Hello User时,我无法搜索我正在使用的代码如下。

import java.awt.BorderLayout;
import java.awt.Dimension;
import java.awt.FlowLayout;
import java.awt.Label;
import java.awt.Toolkit;
import java.awt.event.ActionEvent;
import java.awt.event.ActionListener;

import javax.swing.JButton;
import javax.swing.JDialog;
import javax.swing.JFrame;
import javax.swing.JPanel;
import javax.swing.JScrollPane;
import javax.swing.JTextField;
import javax.swing.SwingUtilities;
import javax.swing.event.DocumentEvent;
import javax.swing.event.DocumentListener;

import com.google.common.base.CharMatcher;
import com.sun.pdfview.PDFFile;
import com.sun.pdfview.PagePanel;

public class PdfViewer extends JPanel {
    private static enum Navigation {
        GO_FIRST_PAGE, FORWARD, BACKWARD, GO_LAST_PAGE, GO_N_PAGE
    }

    private static final CharMatcher POSITIVE_DIGITAL = CharMatcher.anyOf("0123456789");
    private static final String GO_PAGE_TEMPLATE = "%s of %s";
    private static final int FIRST_PAGE = 1;
    private int currentPage = FIRST_PAGE;
    private JButton btnFirstPage;
    private JButton btnPreviousPage;
    private JTextField txtGoPage;
    private JButton btnNextPage;
    private JButton btnLastPage;
    private JButton print;
    private JButton search;
    private PagePanel pagePanel;
    private static PDFFile pdfFile;

    static int count;
static String text;
static String parsedText;
static JTextField jTextField;

    public PdfViewer() {
        initial();
    }

    private void initial() {
        setLayout(new BorderLayout(0, 0));
        JPanel topPanel = new JPanel(new FlowLayout(FlowLayout.CENTER));
        add(topPanel, BorderLayout.NORTH);
        btnFirstPage = createButton("|<<");
        topPanel.add(btnFirstPage);
        btnPreviousPage = createButton("<<");
        topPanel.add(btnPreviousPage);
        txtGoPage = new JTextField(10);
        txtGoPage.setHorizontalAlignment(JTextField.CENTER);
        topPanel.add(txtGoPage);
        btnNextPage = createButton(">>");
        topPanel.add(btnNextPage);
        btnLastPage = createButton(">>|");
        topPanel.add(btnLastPage);

        search = new JButton("search");
        topPanel.add(search);
        JScrollPane scrollPane = new JScrollPane();
        add(scrollPane, BorderLayout.CENTER);
        JPanel viewPanel = new JPanel(new BorderLayout(0, 0));
        scrollPane.setViewportView(viewPanel);

        pagePanel = new PagePanel();
        Dimension screenSize = Toolkit.getDefaultToolkit().getScreenSize();
        pagePanel.setPreferredSize(screenSize);
        viewPanel.add(pagePanel, BorderLayout.CENTER);

        // disableAllNavigationButton();

        search.addActionListener(new Action1());
    }

    private JButton createButton(String string) {
        return new JButton(string);
    }

    class Action1 implements ActionListener {
        @Override
        public void actionPerformed(ActionEvent e) {
            JDialog jDialog = new JDialog(SwingUtilities.getWindowAncestor(search));
            Label label = new Label("Search: ");
            final JTextField jTextField = new JTextField(10);
            jTextField.addActionListener(new ActionListener() {

                @Override
                public void actionPerformed(ActionEvent e) {
                    // Here perform search in PDF
            text = jTextField.getText();
            search(pdfFile,text);
                    System.out.println("Search for text " + jTextField.getText() + " requested");
                }
            });
            // If you want to react to every change of text in the textfield, you can
            // use a DocumentListener and invoke the search method for all events.
            jTextField.getDocument().addDocumentListener(new DocumentListener() {

                @Override
                public void removeUpdate(DocumentEvent e) {

                }

                @Override
                public void insertUpdate(DocumentEvent e) {

                }

                @Override
                public void changedUpdate(DocumentEvent e) {

                }
            });
            JPanel panel = new JPanel();
            jDialog.add(panel);
            panel.add(label);
            panel.add(jTextField);
            jDialog.pack();
            jDialog.setLocationRelativeTo(search);
            jDialog.setVisible(true);
        }
    }

   public static void search(String parsedText,String text) {

            count = 0;
            int lastIndex = 0;

            while(lastIndex != -1){

                   lastIndex = parsedText.indexOf(text,lastIndex);

                   if( lastIndex != -1){
                         count ++;
                         lastIndex+=text.length();
                         System.out.println("Count value is :::::" + count);
                  }

            }

            }

public static void main(String[] args) {
        try {
            long heapSize = Runtime.getRuntime().totalMemory();
            System.out.println("Heap Size = " + heapSize);

            JFrame frame = new JFrame("PDF Test");
            frame.setDefaultCloseOperation(JFrame.EXIT_ON_CLOSE);

            // load a pdf from a byte buffer
            File file = new File("file.pdf");
            RandomAccessFile raf = new RandomAccessFile(file, "r");
            FileChannel channel = raf.getChannel();
            ByteBuffer buf = channel.map(FileChannel.MapMode.READ_ONLY, 0, channel.size());
            final PDFFile pdffile = new PDFFile(buf);
            PdfViewer pdfViewer = new PdfViewer();
            pdfViewer.setPDFFile(pdffile);
            frame.add(pdfViewer);
            frame.pack();
            frame.setVisible(true);

            PDFPage page = pdffile.getPage(0);
            pdfViewer.getPagePanel().showPage(page);

            PDDocument doc = PDDocument.load(file);

            PDFTextStripper stripper = new PDFTextStripper();  
            parsedText=stripper.getText(doc); 
            //System.out.println("_____________" + parsedText);

        } catch (IOException e) {
            e.printStackTrace();
        }
    }



}

所以当我用谷歌搜索它时,我发现我可以使用apche lucene来做到这一点。我已经看到链接

Exact Phrase search using Lucene?

但我不明白如何按照我的要求实现这个目标。如果有人有想法,你可以分享给我。谢谢你。

0 个答案:

没有答案