我有一些这样的文字:
This is a simple line
[olist]
[#]This is line 1
[#]This is line 2
[olist]
[#]This is line 2.1
[#]This is line 2.2
[#]This is line 2.3
and it continues here
[/olist]
[#]This is line 3
[/olist]
Another line
如何在C#中将其解析为HTML,如下所示
This is a simple line
<ol>
<li>This is line 1</li>
<li>This is line 2
<ol>
<li>This is line 2.1</li>
<li>This is line 2.2</li>
<li>This is line 2.3
and it continues here</li>
</ol>
</li>
<li>This is line 3</li>
</ol>
Another line
我目前正在拆分和连接,但子列表未正确处理。
更新: - 示例代码
这就是我目前正在做的事情。
var html = ReplaceList(customHtml,"olist","ol");
private static string ReplaceList(string text, string key, string tag)
{
var itemTmpl = GetListEntry(text, key);
while (itemTmpl != null)
{
var buf = new StringBuilder();
var arr = itemTmpl.Split(new[] { "[#]" }, StringSplitOptions.RemoveEmptyEntries);
foreach (var str in arr)
{
if (!string.IsNullOrWhiteSpace(str))
buf.AppendFormat("<li>{0}</li>", str.Trim());
}
var content = string.Format("<{0}>{1}</{0}>", tag, buf);
text = text.SubstringBefore("[" + key + "]") + content +
text.SubstringAfter("[/" + key + "]");
itemTmpl = GetListEntry(text, key);
}
return text;
}
private static string GetListEntry(string text, string key)
{
var tag1 = string.Format("[{0}]", key);
var tag2 = string.Format("[/{0}]", key);
var start = text.IndexOf(tag1, StringComparison.Ordinal);
var end = (start > -1) ? text.IndexOf(tag2, start, StringComparison.Ordinal) : -1;
if (start < 0 || end <= start)
return null;
var result = text.Substring(start + tag1.Length, end - start - tag1.Length);
return result;
}
请注意 某些列表项跨越多行,也可能包含换行符
答案 0 :(得分:1)
首先必须将其解析为某个抽象树,然后从抽象树中组合结果。 即:
public interface IElement
{
void AddElement(IElement element);
IElement Parent { get; }
}
class OlElement : IElement
{
public IList<LiElement> Elements { get; set; }
public IElement Parent { get; set; }
public OlElement(IElement parent)
{
Parent = parent;
Elements = new List<LiElement>();
}
public void AddElement(IElement element)
{
Elements.Add((LiElement)element);
}
public override string ToString()
{
var builder = new StringBuilder();
builder.AppendLine("<ol>");
foreach(var child in Elements)
{
builder.AppendLine(child.ToString());
}
builder.AppendLine("</ol>");
return builder.ToString();
}
}
class LiElement : IElement
{
public string Text { get; set; }
public IElement Parent { get; set; }
public IList<OlElement> Elements { get; set; }
public LiElement(IElement parent, string text)
{
Parent = parent;
Text = text;
Elements = new List<OlElement>();
}
public void AddElement(IElement element)
{
Elements.Add((OlElement)element);
}
public override string ToString()
{
var builder = new StringBuilder();
builder.Append("<li>");
builder.Append(Text);
foreach (var child in Elements)
{
builder.AppendLine(child.ToString());
}
builder.AppendLine("</li>");
return builder.ToString();
}
}
获得结果:
const string text = @"[olist]
[#]This is line 1
[#]This is line 2
[olist]
[#]This is line 2.1
[#]This is line 2.2
[#]This is line 2.3
[/olist]
[#]This is line 3
[/olist]";
var regex = new Regex(@"^\s*\[(?<tag>[^\]]+)\](?<text>.*)$");
var builder = new StringBuilder();
var root = new OlElement(null);
var currentElement = (IElement)root;
using (var reader = new StringReader(text))
{
string line;
while ((line = reader.ReadLine()) != null)
{
var match = regex.Match(line);
if (match.Success)
{
switch (match.Groups["tag"].Value)
{
case "#":
if (currentElement is OlElement)
{
var child = new LiElement(currentElement, match.Groups["text"].Value);
currentElement.AddElement(child);
currentElement = child;
break;
}
if (currentElement is LiElement)
{
var child = new LiElement(currentElement.Parent, match.Groups["text"].Value);
currentElement.Parent.AddElement(child);
currentElement = child;
}
break;
case "olist":
if (currentElement == root)
{
break;
}
if (currentElement is LiElement)
{
var child = new OlElement(currentElement);
currentElement.AddElement(child);
currentElement = child;
}
break;
case "/olist":
if (currentElement is LiElement)
{
currentElement = currentElement.Parent.Parent;
break;
}
if (currentElement is OlElement)
{
currentElement = currentElement.Parent;
}
break;
default:
break;
}
}
}
}
var result = root.ToString();
答案 1 :(得分:0)
考虑以下方法(注意它是&#34;快速和脏&#34;在确定标签时)。
非常简单 - 只需逐行阅读文本并对其进行转换(使用一些预测和计算深度级别的子列表)。
string src = @"[olist]
[#]This is line 1
[#]This is line 2
[olist]
[#]This is line 2.1
[olist]
[#]This is line 2.1.1
[#]This is line 2.1.2
[/olist]
[#]This is line 2.2
[#]This is line 2.3
[/olist]
[#]This is line 3
[/olist]";
var sb = new StringBuilder();
var lines = src.Split(new string[] {Environment.NewLine}, StringSplitOptions.RemoveEmptyEntries);
int i = 0;
int innerListsCount = 0;
while (i < lines.Length)
{
string line = lines[i];
if (line.EndsWith("[olist]"))
sb.AppendLine(line.Replace("[olist]", "<ol>"));
else if (line.EndsWith("[/olist]"))
{
sb.AppendLine(line.Replace("[/olist]", "</ol>"));
if (innerListsCount > 0)
{
for (int j = 0; j <= innerListsCount; j++)
sb.Append(" ");
sb.AppendLine("</li>");
}
innerListsCount--;
}
else if (line.Trim().StartsWith("[#]"))
{
sb.Append(line.Replace("[#]", "<li>"));
if (i < lines.Length && lines[i + 1].EndsWith("[olist]"))
{
innerListsCount++;
sb.AppendLine();
}
else
sb.AppendLine("</li>");
}
i++;
}
Console.WriteLine(sb.ToString());
输出看起来完全符合您的要求:
<ol>
<li>This is line 1</li>
<li>This is line 2
<ol>
<li>This is line 2.1
<ol>
<li>This is line 2.1.1</li>
<li>This is line 2.1.2</li>
</ol>
</li>
<li>This is line 2.2</li>
<li>This is line 2.3</li>
</ol>
</li>
<li>This is line 3</li>
</ol>