我正在尝试从此URL获取HTML源:“http://www.nguoiduatin.vn/con-sot-gia-ve-may-bay-cuc-hang-khong-noi-chua-cham-tran-a341336.html”使用HttpWebRequest和HttpWebResponse并收到此错误: 远程服务器返回错误:(500)内部服务器错误。 问题是我可以从许多其他网站获得html源代码,除了这个,我也可以正常使用浏览器访问网页,下面是我的代码片段:
private string getHTMLFromURL(string url)
{
try
{
WriteLog("getting html source from :" + url);
HttpWebRequest request = (HttpWebRequest)WebRequest.Create(url);
HttpWebResponse response = (HttpWebResponse)request.GetResponse();
if (response.StatusCode == HttpStatusCode.OK)
{
WriteLog("response from "+url + "is OK, start get html source");
Stream receiveStream = response.GetResponseStream();
StreamReader readStream = null;
if (response.CharacterSet == null)
{
readStream = new StreamReader(receiveStream);
}
else
{
readStream = new StreamReader(receiveStream, Encoding.GetEncoding(response.CharacterSet));
}
string data = readStream.ReadToEnd();
response.Close();
readStream.Close();
return data;
}
else
{
WriteLog("cannot get html source from "+url+" response status code = "+ response.StatusCode);
return null;
}
}
catch (Exception ex)
{
WriteLog("Exception in getHTMLFromURL message = " + ex.Message);
return null;
}
}
所以,有人请告诉我这是什么问题吗?
答案 0 :(得分:0)
您的服务器需要UserAgent
HttpWebRequest request = (HttpWebRequest)WebRequest.Create(url);
request.UserAgent = "SO/1.0";
这就是全部