Question

如何提取Facebook页面的所有链接。我可以使用jsoup提取它并传递“like”链接作为参数来提取喜欢该特定页面的所有用户信息

private static String readAll(Reader rd) throws IOException 
{
StringBuilder sb = new StringBuilder();
int cp;
while ((cp = rd.read()) != -1) 
{
  sb.append((char) cp);
}
return sb.toString();
}

 public static JSONObject readurl(String url) throws IOException, JSONException
 {
 InputStream is = new URL(url).openStream();
 try 
 {
  BufferedReader rd = new BufferedReader
(new InputStreamReader(is, Charset.forName("UTF-8")));
  String jsonText = readAll(rd);
  JSONObject json = new JSONObject(jsonText);

  return json;
} 
finally
{
  is.close();
}
}
public static void main(String[] args) throws IOException, 
JSONException,  FacebookException 
{
  try
  {

    System.out.println("\nEnter the search string:");
    @SuppressWarnings("resource")
    Scanner sc=new Scanner(System.in);
    String s=sc.nextLine();
    JSONObject json = readurl("https://graph.facebook.com/"+s);

    System.out.println(json);
}}

我可以修改这个并整合这个代码。以下代码提取了特定页面的所有链接。我试过上面的代码，但它不工作

 String url = "http://www.firstpost.com/tag/crime-in-india";
  Document doc = Jsoup.connect(url).get();
  Elements links = doc.getElementsByTag("a");
   System.out.println(links.size());

    for (Element link : links) 
    {
        System.out.println(link.absUrl("href") +trim(link.text(), 35));     
    }
  }

  public static String trim(String s, int width) {
    if (s.length() > width)
        return s.substring(0, width-1) + ".";
    else
        return s;
  }
 }

Answer 1

你也可以尝试另类方式：

import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.net.MalformedURLException;
import java.net.URL;
import java.util.LinkedHashSet;
import java.util.Set;

import javax.swing.text.MutableAttributeSet;
import javax.swing.text.html.HTML;
import javax.swing.text.html.HTML.Tag;
import javax.swing.text.html.HTMLEditorKit;
import javax.swing.text.html.parser.ParserDelegator;

public class URLExtractor {

    private static class HTMLPaserCallBack extends HTMLEditorKit.ParserCallback {

        private Set<String> urls;

        public HTMLPaserCallBack() {
            urls = new LinkedHashSet<String>();
        }

        public Set<String> getUrls() {
            return urls;
        }

        @Override
        public void handleSimpleTag(Tag t, MutableAttributeSet a, int pos) {
            handleTag(t, a, pos);
        }

        @Override
        public void handleStartTag(Tag t, MutableAttributeSet a, int pos) {
            handleTag(t, a, pos);
        }

        private void handleTag(Tag t, MutableAttributeSet a, int pos) {
            if (t == Tag.A) {
                Object href = a.getAttribute(HTML.Attribute.HREF);
                if (href != null) {
                    String url = href.toString();
                    if (!urls.contains(url)) {
                        urls.add(url);
                    }
                }
            }
        }
    }

    public static void main(String[] args) throws IOException {
        InputStream is = null;
        try {
            String u = "https://www.facebook.com/";
            URL url = new URL(u);
            is = url.openStream(); // throws an IOException
            HTMLPaserCallBack cb = new HTMLPaserCallBack();
            new ParserDelegator().parse(new BufferedReader(new InputStreamReader(is)), cb, true);
            for (String aUrl : cb.getUrls()) {
                System.out.println("Found URL: " + aUrl);
            }
        } catch (MalformedURLException mue) {
            mue.printStackTrace();
        } catch (IOException ioe) {
            ioe.printStackTrace();
        } finally {
            try {
                is.close();
            } catch (IOException ioe) {
                // nothing to see here
            }
        }
    }
}

Answer 2

有点作品，但我不确定你可以使用jsoup这个我宁愿看看casperjs或phantomjs

import java.io.IOException;

import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;

public class getFaceBookLinks {

    public static Elements getElementsByTag_then_FilterBySelector (String tag, String httplink, String selector){
        Document doc = null;
        try {
            doc = Jsoup.connect(httplink).get();
        } catch (IOException e) {
            e.printStackTrace();
        }
        Elements links = doc.getElementsByTag(tag);
        return links.select(selector);
    }

    //Test functionality
    public static void main(String[] args){
        // The class name for the like links on facebook is UFILikeLink
        Elements likeLinks = getElementsByTag_then_FilterBySelector("a", "http://www.facebook.com", ".UFILikeLink");        
        System.out.println(likeLinks);

    }

}

提取Facebook页面的链接

2 个答案: