Question

我正在努力将下面的爬虫类中的StringBuffer值传递给主类：

import java.util.Scanner; 
import java.util.ArrayList; 
import java.util.*;
import java.io.*;
import java.net.URL;

public class Crawler
{
 public int threads = 0; 
 public int results = 0; 
 public String output; //this is the string I will pass the buffer value to

public void crawler(String startingURL) 
{ 
 ArrayList<String> listOfPendingURLs = new ArrayList<String>(); 
 ArrayList<String> listOfTraversedURLs = new ArrayList<String>(); 

 listOfPendingURLs.add(startingURL); //Add the starting URL to a list named listOfPendingURLs

 while (!listOfPendingURLs.isEmpty() && //while listOfPendingURLs is not empty
    listOfTraversedURLs.size() <= 100) 
   { 
    String urlString = listOfPendingURLs.remove(0); //Remove a URL from listOfPendingURLs

  if (!listOfTraversedURLs.contains(urlString)) //if this URL is not in listOfTraversedURLs
  { 
      listOfTraversedURLs.add(urlString); //Add it to listOfTraversedURLs
      System.out.println("Craw " + urlString); //Display this URL -- change this to display on the panel

      try 
      {
         URL oURL = new URL(urlString);
         BufferedReader in = new BufferedReader(
            new InputStreamReader(oURL.openStream()));
         StringBuffer strbuf = new StringBuffer();
         String lines;
         while ((lines = in.readLine()) != null) 
            strbuf.append(lines); //I want to pass html source code to the main from here

            output = strbuf.toString(); //convert to string
            strbuf.delete(0,strbuf.length());//empty the buffer
            results++;//GUI statistics variable - for future use

         in.close();
      } 

      catch (Exception e) 
      {
         e.printStackTrace();
      }

  for (String s: getSubURLs(urlString)) { //Read the page from this URL and for each URL contained in the page
      if (!listOfTraversedURLs.contains(s))
      listOfPendingURLs.add(s); //Add it to listOfPendingURLs if it is not is listOfTraversedURLs
 } //Exit the while loop when the size of S is equal to 100
 } 
 } 
 }

public static ArrayList<String> getSubURLs(String urlString) { 
 ArrayList<String> list = new ArrayList<String>(); 

 try { 
    java.net.URL url = new java.net.URL(urlString); 
    Scanner input = new Scanner(url.openStream()); 
    int current = 0; 

    while (input.hasNext()) 
    { 
      String line = input.nextLine(); 
      current = line.indexOf("http:", current);


    while (current > 0) 
    { 
      int endIndex = line.indexOf("\"", current);

      if (endIndex > 0) { // Ensure that a correct URL is found 
      list.add(line.substring(current, endIndex)); 
      current = line.indexOf("http:", endIndex); 
 } 

 else 
 current = -1;

 } 
 } 
 } 

 catch (Exception ex) 
 { 
   System.out.println("Error: " + ex.getMessage()); 
 } 

 return list; 
 } 
}

我试图传递缓冲区的主要功能如下所示：

public class mainApp
{

public static void main( String args[] )
{ 
  int width = 800;
  int height = 600;
  String title = "Kangaroo";

  int threads = 1;
  int spiders = 20;
  int results = 10325;
  String status = "completed";

  MyFrame frame = new MyFrame(width, height, title, threads, spiders, results, status);
  Crawler crawler = new Crawler();
  TextAnalyser textAnalyser = new TextAnalyser();

  while(width > 1)
  {
  if(frame.startSearch == true)//strt crawling
    {
    //crawler.crawler(frame.url);
    //System.out.println(crawler.source);
    crawler.crawler(frame.url);//start getting html source code
    textAnalyser.analyse(crawler.output);//send the source code to text analyser
    }

  }
}//end main method
}//end class

由于某种原因，它不会将缓冲区读为null也不会显示源代码...任何想法？我一直在尝试很多变化，但似乎没有任何变化。

Answer 1

调整您的crawler方法以在调用时返回一个字符串，如：

public String crawler(String startingURL) {
    String result;
    listOfPendingURLs = new ArrayList<String>();
    listOfTraversedURLs = new ArrayList<String>();

    result = "";
    listOfPendingURLs.add(startingURL);
    while (!listOfPendingURLs.isEmpty()
            && listOfTraversedURLs.size() <= 100) {
        String urlString = listOfPendingURLs.remove(0);
        if (!listOfTraversedURLs.contains(urlString)) {
            listOfTraversedURLs.add(urlString);
            // TODO display on panel instead
            System.out.println("Craw " + urlString);
            try {
                URL oURL = new URL(urlString);
                BufferedReader in = new BufferedReader(
                        new InputStreamReader(oURL.openStream()));
                StringBuffer strbuf = new StringBuffer();
                String lines;
                while ((lines = in.readLine()) != null) {
                    strbuf.append(lines);
                }
                result = result + strbuf.toString(); // convert to string
                strbuf.delete(0, strbuf.length());// empty the buffer
                results++;// GUI statistics variable - for future use

                in.close();
            }

            catch (Exception e) {
                e.printStackTrace();
            }

            for (String s : getSubURLs(urlString)) { // Read the page from
                                                        // this URL and for
                                                        // each URL
                                                        // contained in the
                                                        // page
                if (!listOfTraversedURLs.contains(s))
                    listOfPendingURLs.add(s); // Add it to listOfPendingURLs
                                                // if it is not is
                                                // listOfTraversedURLs
            } 
        }
    }
    return result;
}

将StringBuffer值从外部类传递给main

1 个答案: