我正在尝试解析wiki标记,例如:
*bold text*, /italics/, [[www.example.com][Title]] etc,
创建
粗体文字,斜体,Title等,
并试图找到解决这个问题的最佳方法。我开始将我的字符串转换为列表,并通过char解析递归char,这适用于基本格式,但我现在正在尝试扩展段和表的支持语法,单个传递不会似乎已经足够了。
我已经看到了一些关于Parsers和combinators的东西,但是我还没有找到解释如何获取格式化文本的资源,特别是当它被嵌套为wiki标记有倾向时。正则表达式似乎是非常低效,并且逐行解析,然后char by char似乎比我已经错综复杂的尝试更糟糕。我意识到这是一个不好的问题,问“我该怎么做?”,但我没有想法。只是一个简单的例子将不胜感激。
修改 Welp,这是我能做的最好的事情 - 决定正则表达式开始时太尴尬并最终得到这个......
private def parse(toGo: List[Char], past: List[Char], current: List[Char], matching: Option[Char]) : (String, String) = {
// There has to be a better way to do this....
(matching, toGo) match {
// Match whole line logic first
case (Some('$'), t) if t.startsWith(List(' ', ' ', ' ')) => {
t.dropWhile(x => x == ' ') match {
case ('*' :: t) => ("<li>" + parse(t, List(), List(), None)._1 + "</li>", "ul")
case ('#' :: t) => ("<li>" + parse(t, List(), List(), None)._1 + "</li>", "ol")
case ('h' :: '1' :: t) => ("<h1>" + parse(t, List(), List(), None)._1 + "</h1>", "")
case ('h' :: '2' :: t) => ("<h2>" + parse(t, List(), List(), None)._1 + "</h2>", "")
case ('h' :: '3' :: t) => ("<h3>" + parse(t, List(), List(), None)._1 + "</h3>", "")
case ('h' :: '4' :: t) => ("<h4>" + parse(t, List(), List(), None)._1 + "</h4>", "")
case ('h' :: '5' :: t) => ("<h5>" + parse(t, List(), List(), None)._1 + "</h5>", "")
case ('h' :: '6' :: t) => ("<h6>" + parse(t, List(), List(), None)._1 + "</h6>", "")
case _ => parse(t, List(), List(), None)
}
}
case (Some('$'), '|' :: t) => {
val cols = t.foldLeft(List(List.empty[Char])) {
(acc, i) =>
if (i == '|') acc :+ List.empty
else acc.init :+ (acc.last :+ i)
}
val parsedCols = for (s <- cols) yield {
"<td>" + parse(s, List(), List(), None)._1 + "</td>"
}
("<tr>" + parsedCols.mkString + "</tr>", "table")
}
case (Some('$'), _) => parse(toGo, List(), List(), None)
case (Some(':'), ':' :: t) => {
val foundArticle = Article.getArticleByName(current.reverse.mkString)
val title = t.takeWhile(x => wikiTitle(x))
val link:String = foundArticle.id match {
case -1 => "<a href=\"edit/" + foundArticle.title + "\"> " + title.mkString + "</a>"
case _ => "<a href=\"" + foundArticle.title + "\"> " + title.mkString + "</a>"
}
parse(t.dropWhile(x => wikiTitle(x)), link.toList.reverse ::: past, List(), None)
}
case (Some(':'), c :: t) if (wikiTitle.apply(c) == false) => {
val foundArticle = Article.getArticleByName(current.reverse.mkString)
val link:String = foundArticle.id match {
case -1 => "<a href=\"edit/" + foundArticle.title + "\"> " + foundArticle.title + "</a>" + c
case _ => "<a href=\"" + foundArticle.title + "\"> " + foundArticle.title + "</a>" + c
}
parse(t, link.toList.reverse ::: past, List(), None)
}
case (Some(x), y :: t) if (x == y) => {
val inner = parse(current.reverse, List(), List(), None)._1
// Sort out the wiki char mappings here
val wrapped = x match {
case '*' => "<span style=\"font-weight:bold;\">" + inner + "</span>"
case '/' => "<span style=\"font-style:italic\">" + inner + "</span>"
case '_' => "<span style=\"text-decoration:underline\">" + inner + "</span>"
case '-' => "<span style=\"text-decoration:line-through\">" + inner + "</span>"
case '~' => "<pre>" + current.reverse.mkString + "</pre>"
case _ => inner
}
parse(t, wrapped.toList.reverse ::: past, List(), None)
}
case (Some(x), y :: t) => parse(t, past, y :: current, Some(x))
case (Some(x), Nil) => parse(current.reverse, x :: past, List(), None)
case (None, w :: t) if (wikiMarkup(w)) => parse(t, current ::: past, List(), Some(w))
case (None, h :: t) => parse(t, past, h :: current, None)
case (None, Nil) => ((past.reverse ::: current.reverse) mkString, "p")
}
}
private def parsedStringBuilder(parsedInfo: Iterator[(String, String)]): String = {
var last = ""
(for (line <- parsedInfo) yield (
(line, last) match {
case ((s, tag), l) if tag == l && tag == "p" => last = tag; s + "<br/>"
case ((s, tag), l) if tag == l => last = tag; s
case ((s, tag), l) if tag == "" => last = tag; "</" + last + ">" + s
case ((s, tag), l) if l == "" && tag == "p" => last = tag; "<" + tag + ">" + s + "<br/>"
case ((s, tag), l) if l == "" => last = tag; "<" + tag + ">" + s
case ((s, tag), l) if tag == "p" => last = tag; "</" + l + "><" + tag + ">" + s + "<br/>"
case ((s, tag), l) => last = tag; "</" + l + "><" + tag + ">" + s
}
)).mkString
}
def toHTML: String = {
parsedStringBuilder(for (l <- content.lines) yield parse(l.toList, List(), List(), Some('$')))
}
可能已经清理了一些东西但我并不打算这样可能不完全清楚,但仍然 - 必须有一个更简洁的方式?