在C#中解析Google SERP

时间:2019-07-04 12:02:30

标签: c# regex parsing

使用C#解析Google SERP-我认为正则表达式是问题所在。你能帮助我吗? 它总是返回positon0。

        public static int GetPosition(Uri url, string searchTerm)
        {

            string text = string.Format("http://www.google.com/search?num=1000&q={0}&btnG=Search", HttpUtility.UrlEncode(searchTerm));
            Console.WriteLine(text);
            HttpWebRequest request = (HttpWebRequest)WebRequest.Create(text);
            using (HttpWebResponse response = (HttpWebResponse)request.GetResponse())
            {
                using (StreamReader reader = new StreamReader(response.GetResponseStream(), Encoding.ASCII))
                {
                    string html = reader.ReadToEnd();
                    return FindPosition(html, url);
                }
            }
        }
        private static int FindPosition(string html, Uri url)
        {
            string lookup = "(<h3 class=\"r\"><a href=\"/url\\?q=)(\\w+[a-zA-Z0-9.\\-?=/:]*)";
            [...]
        }
    }
}

1 个答案:

答案 0 :(得分:0)

 public static int GetPosition(Uri url, string searchTerm)
        {

            string text = string.Format("http://www.google.com/search?num=1000&q={0}&btnG=Search", HttpUtility.UrlEncode(searchTerm));
            Console.WriteLine(text);
            HttpWebRequest request = (HttpWebRequest)WebRequest.Create(text);
            using (HttpWebResponse response = (HttpWebResponse)request.GetResponse())
            {
                using (StreamReader reader = new StreamReader(response.GetResponseStream(), Encoding.ASCII))
                {
                    string html = reader.ReadToEnd();
                    return FindPosition(html, url);
                }
            }
        }
        private static int FindPosition(string html, Uri url)
        {
            var reg = new Regex("<a href=\"/url\\?q=\\w+[a-zA-Z0-9.\\-?=/:]*");
            var position = 0;
            var index = 1;
            foreach (var match in reg.Matches(html))
            {
                if (match.ToString().Contains(url.ToString()))
                {
                    position = index;
                    break;
                }
                index++;
            }
            return position;
        }