正则表达式:类似松弛的“ markdown”,用于精确匹配一个格式字符

时间:2018-06-27 21:24:45

标签: javascript regex formatting markdown

我试图对此进行研究,但只能弄清楚如何使我的搜索字词如此精确。

我希望用户能够用符号_*~等包围文本,以实现类似markdown的格式,基本上是Slack的方式。我的工作正常,但是有一个问题。

当用户键入_bold_时,我将渲染<strong>bold</strong>,这是理想的效果。但是,当用户键入__bold__时,我会得到<strong>_bold_</strong>,而我只希望得到__bold__

这是我的正则表达式:

const rules = [
    {regex: /_{1}(.+?)_{1}/g, replacement: '<em>$1</em>'},
    {regex: /\*{1}(.+?)\*{1}/g, replacement: '<strong>$1</strong>'},
    // etc
];

然后我正在跑步:

let formattedText = '__some text__'; // the user input
rules.forEach(rule => formattedText = formattedText.replace(rule.regex, rule.replacement));

仅匹配带有{1}的那个。但是,如何忽略多个情况?

2 个答案:

答案 0 :(得分:1)

您可以使用

{regex: /(^|[^_])_(?!_)((?:[^]*?[^_])?)_(?!_)/g, replacement: '$1<em>$2</em>'}

请参见regex demo

详细信息

  • (^|[^_])-第1组(替换模式中用$1引用,因为此文本应放回结果中):字符串开头或_以外的任何字符
  • _(?!_)-一个_之后没有_
  • ((?:[^]*?[^_])?)-第2组(在替换模式中用$2引用):尽可能少的0个字符([^]*?)(*?),然后是使用除_以外的其他字符,零次或一次(可选)
  • _(?!_)-_之后没有_

答案 1 :(得分:0)

首先,Slack does not support markdown formatting(也称为here)但具有更简单的标记语言:

松弛消息可以使用简单的标记语言进行格式化,例如 到Markdown。支持的格式包括:precode斜体粗体,甚至是〜strike〜。

无论如何,最好的选择是使用流行的markdown库,例如Showdown,或者如果您真的想保持简单,可以使用this

;(function() { "use strict";

var
	output = "",

	BLOCK = "block",
	INLINE = "inline",

	/**
	 * Used to attach MarkdownToHtml object to `window` in browser
	 * context, or as an AMD module where appropriate.
	 * @type {Object}
	 */
	exports,

	/**
	 * An array of parse rule descriptor objects. Each object has two keys;
	 * pattern (the RegExp to match), and replace (the replacement string or
	 * function to execute).
	 * @type {Array}
	 */
	parseMap = [
		{
			// <h1>
			// A line starting with 1-6 hashes.
			pattern: /(#{1,6})([^\n]+)/g,
			replace: "<h$L1>$2</h$L1>",
			type: BLOCK,
		},
		{
			// <p>
			// Any line surrounded by newlines that doesn't start with
			// an HTML tag, asterisk or numeric value with dot following.
			pattern: /\n(?!<\/?\w+>|\s?\*|\s?[0-9]+|>|\&gt;|-{5,})([^\n]+)/g,
			replace: "<p>$1</p>",
			type: BLOCK,
		},
		{
			// <blockquote>
			// A greater-than character preceding any characters.
			pattern: /\n(?:&gt;|\>)\W*(.*)/g,
			replace: "<blockquote><p>$1</p></blockquote>",
			type: BLOCK,
		},
		{
			// <ul>
			//
			pattern: /\n\s?\*\s*(.*)/g,
			replace: "<ul>\n\t<li>$1</li>\n</ul>",
			type: BLOCK,
		},
		{
			// <ol>
			//
			pattern: /\n\s?[0-9]+\.\s*(.*)/g,
			replace: "<ol>\n\t<li>$1</li>\n</ol>",
			type: BLOCK,
		},
		{
			// <strong>
			// Either two asterisks or two underscores, followed by any
			// characters, followed by the same two starting characters.
			pattern: /(\*\*|__)(.*?)\1/g,
			replace: "<strong>$2</strong>",
			type: INLINE,
		},
		{
			// <em>
			// Either one asterisk or one underscore, followed by any
			// characters, followed by the starting character.
			pattern: /(\*|_)(.*?)\1/g,
			replace: "<em>$2</em>",
			type: INLINE,
		},
		{
			// <a>
			// Not starting with an exclamation mark, square brackets
			// surrounding any characters, followed by parenthesis surrounding
			// any characters.
			pattern: /([^!])\[([^\[]+)\]\(([^\)]+)\)/g,
			replace: "$1<a href=\"$3\">$2</a>",
			type: INLINE,
		},
		{
			// <img>
			// Starting with an exclamation mark, then followed by square
			// brackets surrounding any characters, followed by parenthesis
			// surrounding any characters.
			pattern: /!\[([^\[]+)\]\(([^\)]+)\)/g,
			replace: "<img src=\"$2\" alt=\"$1\" />",
			type: INLINE,
		},
		{
			// <del>
			// Double tilde characters surrounding any characters.
			pattern: /\~\~(.*?)\~\~/g,
			replace: "<del>$1</del>",
			type: INLINE,
		},
		{
			// <code>
			//
			pattern: /`(.*?)`/g,
			replace: "<code>$1</code>",
			type: INLINE,
		},
		{
			// <hr>
			//
			pattern: /\n-{5,}\n/g,
			replace: "<hr />",
			type: BLOCK,
		},
	],
$$;

/**
 * Self-executing function to handle exporting the parse function for
 * external use.
 */
(function go() {
	// Export AMD module if possible.
	if(typeof module !== "undefined"
	&& typeof module.exports !== "undefined") {
		exports = module.exports;
	}
	// Otherwise check for browser context.
	else if(typeof window !== "undefined") {
		window.MarkdownToHtml = {};
		exports = window.MarkdownToHtml;
	}

	exports.parse = parse;
})();

/**
 * Parses a provided Markdown string into valid HTML.
 *
 * @param  {string} string Markdown input for transformation
 * @return {string}        Transformed HTML output
 */
function parse(string) {
	// Pad with newlines for compatibility.
	output = "\n" + string + "\n";

	parseMap.forEach(function(p) {
		// Replace all matches of provided RegExp pattern with either the
		// replacement string or callback function.
		output = output.replace(p.pattern, function() {
			// console.log(this, arguments);
			return replace.call(this, arguments, p.replace, p.type);
		});
	});

	// Perform any post-processing required.
	output = clean(output);
	// Trim for any spaces or newlines.
	output = output.trim();
	// Tidy up newlines to condense where more than 1 occurs back to back.
	output = output.replace(/[\n]{1,}/g, "\n");
	return output;
}

function replace(matchList, replacement, type) {
	var
		i,
	$$;

	for(i in matchList) {
		if(!matchList.hasOwnProperty(i)) {
			continue;
		}

		// Replace $n with the matching regexp group.
		replacement = replacement.split("$" + i).join(matchList[i]);
		// Replace $Ln with the matching regexp group's string length.
		replacement = replacement.split("$L" + i).join(matchList[i].length);
	}

	if(type === BLOCK) {
		replacement = replacement.trim() + "\n";
	}

	return replacement;
}

function clean(string) {
	var cleaningRuleArray = [
		{
			match: /<\/([uo]l)>\s*<\1>/g,
			replacement: "",
		},
		{
			match: /(<\/\w+>)<\/(blockquote)>\s*<\2>/g,
			replacement: "$1",
		},
	];

	cleaningRuleArray.forEach(function(rule) {
		string = string.replace(rule.match, rule.replacement);
	});

	return string;
}

})();

var element = document.getElementById("example-markdown-content");
console.log(MarkdownToHtml.parse(element.innerHTML));
<div id="example-markdown-content">
# Hello, Markdown!

Here is some example **formatting** in _Markdown_.
</div>

即使这太多了,您也可以轻松地将其修整,因为它是如此简单。我想这将为您节省大量时间。