使用正则表达式仅在循环中提取链接

时间:2016-05-11 16:51:35

标签: java regex

import java.io.BufferedReader;
import java.io.InputStreamReader;
import java.net.HttpURLConnection;
import java.net.URL;
import java.util.regex.Pattern;
import java.util.regex.Matcher;

public class google {

    public static void main(String[] args) throws Exception 

    {
        StringBuilder a = new StringBuilder();
        String regex = "(https?):\\/\\/(www\\.)?[a-z0-9\\.:].*?(?=\\s)";
        Pattern r = Pattern.compile(regex);
        String key="myapikey";
        String qry="tree";  

        URL url = new URL("https://www.googleapis.com/customsearch/v1?key="+key+ "&cx=**************&q="+ qry +"&alt=json");
        HttpURLConnection conn = (HttpURLConnection) url.openConnection();
        conn.setRequestMethod("GET");
        conn.setRequestProperty("Accept", "application/json");
        BufferedReader br = new BufferedReader(new InputStreamReader(
                (conn.getInputStream())));

        String output;
        System.out.println(url);
        System.out.println("Output from Server .... \n");
        StringBuffer sb = new StringBuffer();

        while ((output = br.readLine()) != null) {

            if(output.contains("jpg")){ 
            //  Matcher m = r.matcher(output);

                a.append(output + "\n");

            }     
        }

        System.out.println(a);       //Will print the google search links
        conn.disconnect();                              
    }

}

该程序返回以下内容:

  

" url":" https://i.ytimg.com/vi/XGM6sHIJuho/hqdefault1.jpg"

     

" og:image":" https://i.ytimg.com/vi/XGM6sHIJuho/hqdefault2.jpg",

     

" twitter:image":" https://i.ytimg.com/vi/XGM6sHIJuho/hqdefault3.jpg",

     

" thumbnailurl":" https://i.ytimg.com/vi/XGM6sHIJuho/hqdefault4.jpg",

     

" src":" https://i.ytimg.com/vi/XGM6sHIJuho/hqdefault5.jpg"

     

" url":" https://i.ytimg.com/vi/Iv9E9xLFUso/maxresdefault6.jpg",

     

" og:image":" https://i.ytimg.com/vi/Iv9E9xLFUso/maxresdefault7.jpg",

     

" thumbnailurl":" https://i.ytimg.com/vi/Iv9E9xLFUso/maxresdefault8.jpg",

     

" src":" https://i.ytimg.com/vi/Iv9E9xLFUso/maxresdefault9.jpg"

但需要它才能归还:

  

https://i.ytimg.com/vi/XGM6sHIJuho/hqdefault1.jpg   https://i.ytimg.com/vi/XGM6sHIJuho/hqdefault2.jpg   https://i.ytimg.com/vi/XGM6sHIJuho/hqdefault3.jpg

     

等...

只能匹配链接的正则表达式是:

  

String regex =" (https?):\ / \ /(www \。)?[a-z0-9 \:]。*?(?= \ s)&#34 ;;

但是在这个程序中实现它有困难。有任何想法吗?

感谢您的时间

1 个答案:

答案 0 :(得分:1)

最终找到了解决方案。使用不同的表达式。谢谢你的建议!

import java.io.BufferedReader;
import java.io.InputStreamReader;
import java.net.HttpURLConnection;
import java.net.URL;
import java.util.regex.Pattern;
import java.util.regex.Matcher;


public class google {

    public static void main(String[] args) throws Exception 

    {


        StringBuilder results = new StringBuilder();


        String key="myprivatekey";
        String qry="tree";

        URL url = new URL(
                "https://www.googleapis.com/customsearch/v1?key="+key+ "&cx=myprivatekey&q="+ qry +"&alt=json");
        HttpURLConnection conn = (HttpURLConnection) url.openConnection();
        conn.setRequestMethod("GET");
        conn.setRequestProperty("Accept", "application/json");
        BufferedReader br = new BufferedReader(new InputStreamReader(
                (conn.getInputStream())));

        String output;
        System.out.println(url);
        System.out.println("Output from Server .... \n");




        while ((output = br.readLine()) != null) {

            Pattern pattern = Pattern.compile("(?:(?:https?)+\\:\\/\\/+[a-zA-Z0-9\\/\\._-]{1,})+(?:(?:jpe?g|png|gif))");
            Matcher matcher = pattern.matcher(output);

            if(matcher.find()){
                  results.append(matcher.group() + "\n");
            }







        }


        System.out.println(results);
        conn.disconnect();                              
    }

}