使用C#解析Google SERP-我认为正则表达式是问题所在。你能帮助我吗? 它总是返回positon0。
public static int GetPosition(Uri url, string searchTerm)
{
string text = string.Format("http://www.google.com/search?num=1000&q={0}&btnG=Search", HttpUtility.UrlEncode(searchTerm));
Console.WriteLine(text);
HttpWebRequest request = (HttpWebRequest)WebRequest.Create(text);
using (HttpWebResponse response = (HttpWebResponse)request.GetResponse())
{
using (StreamReader reader = new StreamReader(response.GetResponseStream(), Encoding.ASCII))
{
string html = reader.ReadToEnd();
return FindPosition(html, url);
}
}
}
private static int FindPosition(string html, Uri url)
{
string lookup = "(<h3 class=\"r\"><a href=\"/url\\?q=)(\\w+[a-zA-Z0-9.\\-?=/:]*)";
[...]
}
}
}
答案 0 :(得分:0)
public static int GetPosition(Uri url, string searchTerm)
{
string text = string.Format("http://www.google.com/search?num=1000&q={0}&btnG=Search", HttpUtility.UrlEncode(searchTerm));
Console.WriteLine(text);
HttpWebRequest request = (HttpWebRequest)WebRequest.Create(text);
using (HttpWebResponse response = (HttpWebResponse)request.GetResponse())
{
using (StreamReader reader = new StreamReader(response.GetResponseStream(), Encoding.ASCII))
{
string html = reader.ReadToEnd();
return FindPosition(html, url);
}
}
}
private static int FindPosition(string html, Uri url)
{
var reg = new Regex("<a href=\"/url\\?q=\\w+[a-zA-Z0-9.\\-?=/:]*");
var position = 0;
var index = 1;
foreach (var match in reg.Matches(html))
{
if (match.ToString().Contains(url.ToString()))
{
position = index;
break;
}
index++;
}
return position;
}