如何获取pdfpage内容

时间:2013-01-30 06:52:38

标签: java swing pdf

我正在使用pdfviewer来查看我的pdf,我添加了一个搜索按钮来框架。当我点击搜索按钮时,它要求搜索这个词。当我输入一个搜索词时,它应该将搜索词与pdfpage内容,如果匹配,它应该显示相等。我已经写了一个搜索方法,在那个方法中我将搜索词与pdfpage内容进行比较。我在这里进行了调查。如何获取pdfPage内容并将此内容与搜索单词。我有wriiten的代码如下。

import java.awt.BorderLayout;
import java.awt.Dimension;
import java.awt.FlowLayout;
import java.awt.Label;
import java.awt.Toolkit;
import java.awt.event.ActionEvent;
import java.awt.event.ActionListener;

import javax.swing.JButton;
import javax.swing.JDialog;
import javax.swing.JFrame;
import javax.swing.JPanel;
import javax.swing.JScrollPane;
import javax.swing.JTextField;
import javax.swing.SwingUtilities;
import javax.swing.event.DocumentEvent;
import javax.swing.event.DocumentListener;

import com.google.common.base.CharMatcher;
import com.sun.pdfview.PDFFile;
import com.sun.pdfview.PagePanel;

public class PdfViewer extends JPanel {
    private static enum Navigation {
        GO_FIRST_PAGE, FORWARD, BACKWARD, GO_LAST_PAGE, GO_N_PAGE
    }

    private static final CharMatcher POSITIVE_DIGITAL = CharMatcher.anyOf("0123456789");
    private static final String GO_PAGE_TEMPLATE = "%s of %s";
    private static final int FIRST_PAGE = 1;
    private int currentPage = FIRST_PAGE;
    private JButton btnFirstPage;
    private JButton btnPreviousPage;
    private JTextField txtGoPage;
    private JButton btnNextPage;
    private JButton btnLastPage;
    private JButton print;
    private JButton search;
    private PagePanel pagePanel;
    private static PDFFile pdfFile;

    static int count;
static String text;
static String parsedText;
static JTextField jTextField;

    public PdfViewer() {
        initial();
    }

    private void initial() {
        setLayout(new BorderLayout(0, 0));
        JPanel topPanel = new JPanel(new FlowLayout(FlowLayout.CENTER));
        add(topPanel, BorderLayout.NORTH);
        btnFirstPage = createButton("|<<");
        topPanel.add(btnFirstPage);
        btnPreviousPage = createButton("<<");
        topPanel.add(btnPreviousPage);
        txtGoPage = new JTextField(10);
        txtGoPage.setHorizontalAlignment(JTextField.CENTER);
        topPanel.add(txtGoPage);
        btnNextPage = createButton(">>");
        topPanel.add(btnNextPage);
        btnLastPage = createButton(">>|");
        topPanel.add(btnLastPage);

        search = new JButton("search");
        topPanel.add(search);
        JScrollPane scrollPane = new JScrollPane();
        add(scrollPane, BorderLayout.CENTER);
        JPanel viewPanel = new JPanel(new BorderLayout(0, 0));
        scrollPane.setViewportView(viewPanel);

        pagePanel = new PagePanel();
        Dimension screenSize = Toolkit.getDefaultToolkit().getScreenSize();
        pagePanel.setPreferredSize(screenSize);
        viewPanel.add(pagePanel, BorderLayout.CENTER);

        // disableAllNavigationButton();

        search.addActionListener(new Action1());
    }

    private JButton createButton(String string) {
        return new JButton(string);
    }

    class Action1 implements ActionListener {
        @Override
        public void actionPerformed(ActionEvent e) {
            JDialog jDialog = new JDialog(SwingUtilities.getWindowAncestor(search));
            Label label = new Label("Search: ");
            final JTextField jTextField = new JTextField(10);
            jTextField.addActionListener(new ActionListener() {

                @Override
                public void actionPerformed(ActionEvent e) {
                    // Here perform search in PDF
            text = jTextField.getText();
            search(pdfFile,text);
                    System.out.println("Search for text " + jTextField.getText() + " requested");
                }
            });
            // If you want to react to every change of text in the textfield, you can
            // use a DocumentListener and invoke the search method for all events.
            jTextField.getDocument().addDocumentListener(new DocumentListener() {

                @Override
                public void removeUpdate(DocumentEvent e) {

                }

                @Override
                public void insertUpdate(DocumentEvent e) {

                }

                @Override
                public void changedUpdate(DocumentEvent e) {

                }
            });
            JPanel panel = new JPanel();
            jDialog.add(panel);
            panel.add(label);
            panel.add(jTextField);
            jDialog.pack();
            jDialog.setLocationRelativeTo(search);
            jDialog.setVisible(true);
        }
    }

   public static void search(String parsedText,String text) {

            count = 0;
            int lastIndex = 0;

            while(lastIndex != -1){

                   lastIndex = parsedText.indexOf(text,lastIndex);

                   if( lastIndex != -1){
                         count ++;
                         lastIndex+=text.length();
                         System.out.println("Count value is :::::" + count);
                  }

            }

            }


}

在主要方法中,我添加了以下代码行,用于将文档转换为文本。

PDDocument doc = PDDocument.load(file);

            PDFTextStripper stripper = new PDFTextStripper();  
            parsedText=stripper.getText(doc);

我跟随pdfviewer的参考链接是

http://seanshou.blogspot.in/2012/10/java-swing-pdf-viewer.html

我能够获得pdfpage.But问题是如何获取该特定页面中的内容。非常感谢任何帮助。

谢谢。

1 个答案:

答案 0 :(得分:1)

要从页面中提取文本,请使用pdfbox。请点击此链接:

http://pdfbox.apache.org/userguide/text_extraction.html