显示HTML <title> </title>内容c#

时间:2016-02-20 19:34:12

标签: c# html

有人能告诉我如何使用用户编写的网页文档在c#应用程序中显示HTML网页的标题

1 个答案:

答案 0 :(得分:-1)

你可以试试这个。首先,您需要获取整个html响应并遍历它以废弃title元素之间的文本。

public class TitleScraper {

private string url;

public TitleScraper(string url) { 
this.url = url; 
} 

public string Title { get; set; } 

public void Scrape() { 
WebRequest request = WebRequest.Create(this.url); 
WebResponse response = request.GetResponse(); 
Stream data = response.GetResponseStream(); 
StreamReader sr = new StreamReader(data); 
string html = sr.ReadToEnd(); 
string regex = @"(?<=<title.*>)([\s\S]*)(?=</title>)"; 
System.Text.RegularExpressions.Regex ex = new System.Text.RegularExpressions.Regex(regex, System.Text.RegularExpressions.RegexOptions.IgnoreCase); 
Title = ex.Match(html).Value.Trim(); 
}

} 

//Note: If you have you are trying to read from a document file instead of the URL, You can use the following instead

public void Scrape() { 

string filePath = @"c:\user\filename.doc"; //location to your file
StreamReader sr = new StreamReader(filePath);
string text = sr.ReadToEnd();
sr.Close();
string regex = @"(?<=<title.*>)([\s\S]*)(?=</title>)"; 
System.Text.RegularExpressions.Regex ex = new System.Text.RegularExpressions.Regex(regex, System.Text.RegularExpressions.RegexOptions.IgnoreCase); 
Title = ex.Match(text).Value.Trim();

}