我需要在JavaScript中编写一个split函数,它将一个字符串拆分为一个逗号中的数组...但是逗号不能用引号括起来('
和"
)。
以下是三个示例以及结果(数组)应该如何:
"peanut, butter, jelly"
-> ["peanut", "butter", "jelly"]
"peanut, 'butter, bread', 'jelly'"
-> ["peanut", "butter, bread", "jelly"]
'peanut, "butter, bread", "jelly"'
-> ["peanut", 'butter, bread', "jelly"]
我无法使用JavaScript split
方法的原因是因为当分隔符用引号括起来时它也会分裂。
我怎样才能做到这一点,也许是用正则表达式?
关于上下文,我将使用它来分割从传递给你在扩展jQuery的$.expr[':']
时创建的函数的第三个参数的第三个元素传递的参数。通常,为此参数指定的名称称为meta
,这是一个包含有关过滤器的某些信息的数组。
无论如何,这个数组的第三个元素是一个字符串,其中包含与过滤器一起传递的参数;并且由于字符串格式的参数,我需要能够正确地拆分它们进行解析。
答案 0 :(得分:3)
您要求的内容本质上是一个Javascript CSV解析器。在“Javascript CSV Parser”上进行谷歌搜索,您将获得大量的点击,其中很多都有完整的脚本。另请参阅Javascript code to parse CSV data
答案 1 :(得分:1)
var str = 'text, foo, "haha, dude", bar';
var fragments = str.match(/[a-z]+|(['"]).*?\1/g);
更好(支持字符串中的转义"
或'
):
var str = 'text_123 space, foo, "text, here\", dude", bar, \'one, two\', blob';
var fragments = str.match(/[^"', ][^"',]+[^"', ]|(["'])(?:[^\1\\\\]|\\\\.)*\1/g);
// Result:
0: text_123 space
1: foo
2: "text, here\", dude"
3: bar
4: 'one, two'
5: blob
答案 2 :(得分:1)
好吧,我已经有一个解决方案的手提钻(一般代码写的东西),所以只是为了踢。 。
function Lexer () {
this.setIndex = false;
this.useNew = false;
for (var i = 0; i < arguments.length; ++i) {
var arg = arguments [i];
if (arg === Lexer.USE_NEW) {
this.useNew = true;
}
else if (arg === Lexer.SET_INDEX) {
this.setIndex = Lexer.DEFAULT_INDEX;
}
else if (arg instanceof Lexer.SET_INDEX) {
this.setIndex = arg.indexProp;
}
}
this.rules = [];
this.errorLexeme = null;
}
Lexer.NULL_LEXEME = {};
Lexer.ERROR_LEXEME = {
toString: function () {
return "[object Lexer.ERROR_LEXEME]";
}
};
Lexer.DEFAULT_INDEX = "index";
Lexer.USE_NEW = {};
Lexer.SET_INDEX = function (indexProp) {
if ( !(this instanceof arguments.callee)) {
return new arguments.callee.apply (this, arguments);
}
if (indexProp === undefined) {
indexProp = Lexer.DEFAULT_INDEX;
}
this.indexProp = indexProp;
};
(function () {
var New = (function () {
var fs = [];
return function () {
var f = fs [arguments.length];
if (f) {
return f.apply (this, arguments);
}
var argStrs = [];
for (var i = 0; i < arguments.length; ++i) {
argStrs.push ("a[" + i + "]");
}
f = new Function ("var a=arguments;return new this(" + argStrs.join () + ");");
if (arguments.length < 100) {
fs [arguments.length] = f;
}
return f.apply (this, arguments);
};
}) ();
var flagMap = [
["global", "g"]
, ["ignoreCase", "i"]
, ["multiline", "m"]
, ["sticky", "y"]
];
function getFlags (regex) {
var flags = "";
for (var i = 0; i < flagMap.length; ++i) {
if (regex [flagMap [i] [0]]) {
flags += flagMap [i] [1];
}
}
return flags;
}
function not (x) {
return function (y) {
return x !== y;
};
}
function Rule (regex, lexeme) {
if (!regex.global) {
var flags = "g" + getFlags (regex);
regex = new RegExp (regex.source, flags);
}
this.regex = regex;
this.lexeme = lexeme;
}
Lexer.prototype = {
constructor: Lexer
, addRule: function (regex, lexeme) {
var rule = new Rule (regex, lexeme);
this.rules.push (rule);
}
, setErrorLexeme: function (lexeme) {
this.errorLexeme = lexeme;
}
, runLexeme: function (lexeme, exec) {
if (typeof lexeme !== "function") {
return lexeme;
}
var args = exec.concat (exec.index, exec.input);
if (this.useNew) {
return New.apply (lexeme, args);
}
return lexeme.apply (null, args);
}
, lex: function (str) {
var index = 0;
var lexemes = [];
if (this.setIndex) {
lexemes.push = function () {
for (var i = 0; i < arguments.length; ++i) {
if (arguments [i]) {
arguments [i] [this.setIndex] = index;
}
}
return Array.prototype.push.apply (this, arguments);
};
}
while (index < str.length) {
var bestExec = null;
var bestRule = null;
for (var i = 0; i < this.rules.length; ++i) {
var rule = this.rules [i];
rule.regex.lastIndex = index;
var exec = rule.regex.exec (str);
if (exec) {
var doUpdate = !bestExec
|| (exec.index < bestExec.index)
|| (exec.index === bestExec.index && exec [0].length > bestExec [0].length)
;
if (doUpdate) {
bestExec = exec;
bestRule = rule;
}
}
}
if (!bestExec) {
if (this.errorLexeme) {
lexemes.push (this.errorLexeme);
return lexemes.filter (not (Lexer.NULL_LEXEME));
}
++index;
}
else {
if (this.errorLexeme && index !== bestExec.index) {
lexemes.push (this.errorLexeme);
}
var lexeme = this.runLexeme (bestRule.lexeme, bestExec);
lexemes.push (lexeme);
}
index = bestRule.regex.lastIndex;
}
return lexemes.filter (not (Lexer.NULL_LEXEME));
}
};
}) ();
if (!Array.prototype.filter) {
Array.prototype.filter = function (fun) {
var len = this.length >>> 0;
var res = [];
var thisp = arguments [1];
for (var i = 0; i < len; ++i) {
if (i in this) {
var val = this [i];
if (fun.call (thisp, val, i, this)) {
res.push (val);
}
}
}
return res;
};
}
现在使用代码解决问题:
function trim (str) {
str = str.replace (/^\s+/, "");
str = str.replace (/\s+$/, "");
return str;
}
var splitter = new Lexer ();
splitter.setErrorLexeme (Lexer.ERROR_LEXEME);
splitter.addRule (/[^,"]*"[^"]*"[^,"]*/g, trim);
splitter.addRule (/[^,']*'[^']*'[^,']*/g, trim);
splitter.addRule (/[^,"']+/g, trim);
splitter.addRule (/,/g, Lexer.NULL_LEXEME);
var strs = [
"peanut, butter, jelly"
, "peanut, 'butter, bread', 'jelly'"
, 'peanut, "butter, bread", "jelly"'
];
// NOTE: I'm lazy here, so I'm using Array.prototype.map,
// which isn't supported in all browsers.
var splitStrs = strs.map (function (str) {
return splitter.lex (str);
});
答案 3 :(得分:-1)
如果您可以控制输入以强制将字符串括在双引号"
中,并且所有带字符串的元素将用单引号'
括起来,并且没有元素可以包含单引号,然后您可以拆分, '
。如果你无法控制输入,那么使用正则表达式对输入进行排序/过滤/拆分与使用正则表达式匹配xhtml一样有用(参见:RegEx match open tags except XHTML self-contained tags)