我正在创建一个HTML到文本解析器。我需要删除所有HTML元素,并希望每次有<BR>
时都返回回车符,然后删除<BR>
,这样就不会留下任何HTML标记。然后我想解析组合框中某个字符串的文本。提前感谢您的帮助。
private void navigateWeb_Click(object sender, EventArgs e)
{
openFD.Title = "Select your configuration file";
openFD.InitialDirectory = "C:";
openFD.FileName = "";
openFD.Filter = "Config File (*.cfg)|*.cfg|Text File (*.txt)|*.txt|All Files (*.*)|*.*";
openFD.ShowDialog();
MyURL = openFD.FileName;
//Open and read file
System.IO.StreamReader objReader;
objReader = new System.IO.StreamReader(MyURL);
richTextBox1.Text = objReader.ReadToEnd();
var lines = File.ReadAllLines(MyURL)
.Select(l => l.Trim())
.Where(l => l.StartsWith(comboBox1.Text));
textBox1.Text = String.Join(Environment.NewLine, lines);
}
的 的 ** * ** * *** 更新的 * ** * * 以下是完成工作的解决方案:
public static string RemoveHTML(string text)
{
text = text.Replace(" ", " ").Replace("<br>", "\n");
var oRegEx = new System.Text.RegularExpressions.Regex("<[^>]+>");
return oRegEx.Replace(text, string.Empty);
}
private void navigateWeb_Click(object sender, EventArgs e)
{
openFD.Title = "Enter URL in the box below";
openFD.InitialDirectory = "C:";
openFD.FileName = "http://msnconf/configtc.aspx?IP=10.6.64.200&m=c";
openFD.Filter = "HTTP://|*.*|Config File (*.cfg)|*.cfg|Text File (*.txt)|*.txt|All Files (*.*)|*.*";
//openFD.ShowDialog();
if (openFD.ShowDialog() == DialogResult.Cancel)
{
//MessageBox.Show("cancel button clicked");
}
else
{
MyURL = openFD.FileName;
webBrowser1.Visible = true;
richTextBox1.Visible = false;
permitACL.Enabled = true;
//webBrowser1.Navigate(new Uri(MyURL.SelectedItem.ToString()));
webBrowser1.Navigate(MyURL);
//Open and read file
System.IO.StreamReader objReader;
objReader = new System.IO.StreamReader(MyURL);
richTextBox1.Text = objReader.ReadToEnd();
//Read all lines of file
// String lines = objReader.ReadToEnd();
String[] crString = { "<BR> " };
String[] aLines = richTextBox1.Text.Split(crString, StringSplitOptions.RemoveEmptyEntries);
// String[] lines = File.ReadAllLines(MyURL);
String noHtml = String.Empty;
for (int x = 0; x < aLines.Length; x++)
{
if(permitACL.Checked)
{
if (aLines[x].Contains("permit"))
{
noHtml += (RemoveHTML(aLines[x]) + "\r\n");
}
}
if (aLines[x].Contains(comboBox1.Text))
{
noHtml += (RemoveHTML(aLines[x]) + "\r\n");
}
}
//Find lines that match our text in the combobox
//lines.Select(l => l.Trim());
//.Where(l => l.StartsWith(comboBox1.Text));
//Print results to textbox
textBox1.Text = String.Join(Environment.NewLine, noHtml);
}
}
答案 0 :(得分:1)
我建议您使用HTML Agility Pack - 它是一个HTML解析器,您可以使用XPath语法进行查询。
答案 1 :(得分:0)
public static string RemoveHTML(string text)
{
text = text.Replace(" ", " ").Replace("<br>", "\n");
var oRegEx = new System.Text.RegularExpressions.Regex("<[^>]+>");
return oRegEx.Replace(text, string.Empty);
}