有人能告诉我如何使用用户编写的网页文档在c#应用程序中显示HTML网页的标题
答案 0 :(得分:-1)
你可以试试这个。首先,您需要获取整个html响应并遍历它以废弃title元素之间的文本。
public class TitleScraper {
private string url;
public TitleScraper(string url) {
this.url = url;
}
public string Title { get; set; }
public void Scrape() {
WebRequest request = WebRequest.Create(this.url);
WebResponse response = request.GetResponse();
Stream data = response.GetResponseStream();
StreamReader sr = new StreamReader(data);
string html = sr.ReadToEnd();
string regex = @"(?<=<title.*>)([\s\S]*)(?=</title>)";
System.Text.RegularExpressions.Regex ex = new System.Text.RegularExpressions.Regex(regex, System.Text.RegularExpressions.RegexOptions.IgnoreCase);
Title = ex.Match(html).Value.Trim();
}
}
//Note: If you have you are trying to read from a document file instead of the URL, You can use the following instead
public void Scrape() {
string filePath = @"c:\user\filename.doc"; //location to your file
StreamReader sr = new StreamReader(filePath);
string text = sr.ReadToEnd();
sr.Close();
string regex = @"(?<=<title.*>)([\s\S]*)(?=</title>)";
System.Text.RegularExpressions.Regex ex = new System.Text.RegularExpressions.Regex(regex, System.Text.RegularExpressions.RegexOptions.IgnoreCase);
Title = ex.Match(text).Value.Trim();
}