从谷歌图片获取网址

时间:2017-10-01 22:49:51

标签: c# google-image-search

我正在使用以下代码来获取网页上所有图片网址的列表。目标是从谷歌图像搜索获取所有图像网址。我的问题是我只得到20个网址,网址的照片是小照片而不是实际尺寸。

这是代码:

public List<string> FetchImages(string Url)
{
    List<string> imageList = new List<string>();

    //Append http:// if necessary
    if (!Url.StartsWith("http://") && !Url.StartsWith("https://"))
        Url = "http://" + Url;

    string responseUrl = string.Empty;
    string htmlData = ASCIIEncoding.ASCII.GetString(DownloadData(Url, out responseUrl));

    if (responseUrl != string.Empty)
        Url = responseUrl;

    if (htmlData != string.Empty)
    {
        string imageHtmlCode = "<img";
        string imageSrcCode = @"src=""";

        int index = htmlData.IndexOf(imageHtmlCode);
        while (index != -1)
        {
            //Remove previous data
            htmlData = htmlData.Substring(index);

            //Find the location of the two quotes that mark the image's location
            int brackedEnd = htmlData.IndexOf('>'); //make sure data will be inside img tag
            int start = htmlData.IndexOf(imageSrcCode) + imageSrcCode.Length;
            int end = htmlData.IndexOf('"', start + 1);

            //Extract the line
            if (end > start && start < brackedEnd)
            {
                string loc = htmlData.Substring(start, end - start);

                //Store line
                imageList.Add(loc);
            }

            //Move index to next image location
            if (imageHtmlCode.Length < htmlData.Length)
                index = htmlData.IndexOf(imageHtmlCode, imageHtmlCode.Length);
            else
                index = -1;
        }

        //Format the image URLs
        for (int i = 0; i < imageList.Count; i++)
        {
            string img = imageList[i];

            string baseUrl = GetBaseURL(Url);

            if ((!img.StartsWith("http://") && !img.StartsWith("https://"))
                && baseUrl != string.Empty)
                img = baseUrl + "/" + img.TrimStart('/');

            imageList[i] = img;
        }
    }

    return imageList;
}
private string GetBaseURL(string Url)
{
    int inx = Url.IndexOf("://") + "://".Length;
    int end = Url.IndexOf('/', inx);

    string baseUrl = string.Empty;
    if (end != -1)
        return Url.Substring(0, end);
    else
        return string.Empty;
}
private byte[] DownloadData(string Url, out string responseUrl)
{
    byte[] downloadedData = new byte[0];
    try
    {
        //Get a data stream from the url
        WebRequest req = WebRequest.Create(Url);
        WebResponse response = req.GetResponse();
        Stream stream = response.GetResponseStream();

        responseUrl = response.ResponseUri.ToString();

        //Download in chuncks
        byte[] buffer = new byte[1024];

        //Get Total Size
        int dataLength = (int)response.ContentLength;

        //Download to memory
        //Note: adjust the streams here to download directly to the hard drive
        MemoryStream memStream = new MemoryStream();
        while (true)
        {
            //Try to read the data
            int bytesRead = stream.Read(buffer, 0, buffer.Length);

            if (bytesRead == 0)
            {
                break;
            }
            else
            {
                //Write the downloaded data
                memStream.Write(buffer, 0, bytesRead);
            }
        }

        //Convert the downloaded stream to a byte array
        downloadedData = memStream.ToArray();

        //Clean up
        stream.Close();
        memStream.Close();
    }
    catch (Exception)
    {
        responseUrl = string.Empty;
        return new byte[0];
    }

    return downloadedData;
    }

1 个答案:

答案 0 :(得分:0)

Google似乎一次只能加载一定数量的图片。当您在计算机上滚动时,它们会加载更多。难道如果它到达的图像没有被填充,它会认为图像的结束了吗?