Question

var str = 'single words "fixed string of words"';
var astr = str.split(" "); // need fix

我希望数组如下：单个，单词，固定字符串。

Answer 1

str.match(/\w+|"[^"]+"/g)

//single, words, "fixed string of words"

Answer 2

接受的答案并不完全正确。它分隔非空格字符，如。 - 并在结果中留下引号。执行此操作以便排除引号的更好方法是使用捕获组，例如：

//The parenthesis in the regex creates a captured group within the quotes
var myRegexp = /[^\s"]+|"([^"]*)"/gi;
var myString = 'single words "fixed string of words"';
var myArray = [];

do {
    //Each call to exec returns the next regex match as an array
    var match = myRegexp.exec(myString);
    if (match != null)
    {
        //Index 1 in the array is the captured group if it exists
        //Index 0 is the matched text, which we use if no captured group exists
        myArray.push(match[1] ? match[1] : match[0]);
    }
} while (match != null);

myArray现在将包含OP所要求的内容：

single,words,fixed string of words

Answer 3

这使用了分割和正则表达式匹配的混合。

var str = 'single words "fixed string of words"';
var matches = /".+?"/.exec(str);
str = str.replace(/".+?"/, "").replace(/^\s+|\s+$/g, "");
var astr = str.split(" ");
if (matches) {
    for (var i = 0; i < matches.length; i++) {
        astr.push(matches[i].replace(/"/g, ""));
    }
}

这会返回预期的结果，尽管单个正则表达式应该能够完成所有操作。

// ["single", "words", "fixed string of words"]

<强>更新这是S.Mark提出的方法的改进版本

var str = 'single words "fixed string of words"';
var aStr = str.match(/\w+|"[^"]+"/g), i = aStr.length;
while(i--){
    aStr[i] = aStr[i].replace(/"/g,"");
}
// ["single", "words", "fixed string of words"]

Answer 4

这可能是一个完整的解决方案： https://github.com/elgs/splitargs

Answer 5

ES6解决方案支持：

除了内部引号外，按空格分割
删除引号但不包含反斜杠转义引号
逃脱报价成为报价
可以在任何地方加上引号

代码：

str.match(/\\?.|^$/g).reduce((p, c) => {
        if(c === '"'){
            p.quote ^= 1;
        }else if(!p.quote && c === ' '){
            p.a.push('');
        }else{
            p.a[p.a.length-1] += c.replace(/\\(.)/,"$1");
        }
        return  p;
    }, {a: ['']}).a

输出：

[ 'single', 'words', 'fixed string of words' ]

Answer 6

这会将其拆分为一个数组，并从任何剩余的字符串中去除周围的引号。

const parseWords = (words = '') =>
    (words.match(/[^\s"]+|"([^"]*)"/gi) || []).map((word) => 
        word.replace(/^"(.+(?="$))"$/, '$1'))

Answer 7

此解决方案适用于双引号（“）和单引号（'）：

代码：

str.match(/[^\s"']+|"([^"]*)"/gmi)

// ["single", "words", "fixed string of words"]

此处显示了此正则表达式的工作方式：https://regex101.com/r/qa3KxQ/2

Answer 8

直到我找到 @dallin 的答案（此线程：https://stackoverflow.com/a/18647776/1904943）之前，我一直无法通过 JavaScript 处理混合了未加引号和带引号的术语/短语的字符串。

在研究这个问题时，我进行了一些测试。

由于我发现这些信息很难找到，所以我整理了相关信息（如下），这可能对其他人在 JavaScript 中处理包含引用词的字符串有所帮助。

let q = 'apple banana "nova scotia" "british columbia"';

提取[仅]引用的单词和短语：

// https://stackoverflow.com/questions/12367126/how-can-i-get-a-substring-located-between-2-quotes
const r = q.match(/"([^']+)"/g);
console.log('r:', r)
// r: Array [ "\"nova scotia\" \"british columbia\"" ]
console.log('r:', r.toString())
// r: "nova scotia" "british columbia"

// ----------------------------------------

// [alternate regex] https://www.regextester.com/97161
const s = q.match(/"(.*?)"/g);
console.log('s:', s)
// s: Array [ "\"nova scotia\"", "\"british columbia\"" ]
console.log('s:', s.toString())
// s: "nova scotia","british columbia"

提取[所有]未引用的、引用的单词和短语：

// https://stackoverflow.com/questions/2817646/javascript-split-string-on-space-or-on-quotes-to-array
const t = q.match(/\w+|"[^"]+"/g);
console.log('t:', t)
// t: Array(4) [ "apple", "banana", "\"nova scotia\"", "\"british columbia\"" ]
console.log('t:', t.toString())
// t: apple,banana,"nova scotia","british columbia"

// ----------------------------------------------------------------------------

// https://stackoverflow.com/questions/2817646/javascript-split-string-on-space-or-on-quotes-to-array
// [@dallon 's answer (this thread)] https://stackoverflow.com/a/18647776/1904943

var myRegexp = /[^\s"]+|"([^"]*)"/gi;
var myArray = [];

do {
    /* Each call to exec returns the next regex match as an array. */
    var match = myRegexp.exec(q);    // << "q" = my query (string)
    if (match != null)
    {
        /* Index 1 in the array is the captured group if it exists.
         * Index 0 is the matched text, which we use if no captured group exists. */
        myArray.push(match[1] ? match[1] : match[0]);
    }
} while (match != null);

console.log('myArray:', myArray, '| type:', typeof(myArray))
// myArray: Array(4) [ "apple", "banana", "nova scotia", "british columbia" ] | type: object
console.log(myArray.toString())
// apple,banana,nova scotia,british columbia

使用集合（而不是数组）：

// https://stackoverflow.com/questions/28965112/javascript-array-to-set
var mySet = new Set(myArray);
console.log('mySet:', mySet, '| type:', typeof(mySet))
// mySet: Set(4) [ "apple", "banana", "nova scotia", "british columbia" ] | type: object

迭代集合元素：

mySet.forEach(x => console.log(x));
/* apple
 * banana
 * nova scotia
 * british columbia
 */

// https://stackoverflow.com/questions/16401216/iterate-over-set-elements
myArrayFromSet = Array.from(mySet);

for (let i=0; i < myArrayFromSet.length; i++) {
    console.log(i + ':', myArrayFromSet[i])
}
/*
 0: apple
 1: banana
 2: nova scotia
 3: british columbia 
 */

旁白

以上 JavaScript 响应来自 FireFox 开发人员工具（F12，来自网页）。我创建了一个空白的 HTML 文件，该文件调用了我使用 Vim 编辑的 .js 文件，作为我的 IDE。 Simple JavaScript IDE
根据我的测试，克隆集似乎是深拷贝。 Shallow-clone an ES6 Map or Set

Answer 9

我也注意到了消失的角色。我认为你可以包含它们 - 例如，让它包含“+”一词，使用类似“[\ w \ +]”而不仅仅是“\ w”。

javascript在空格或数组引号上拆分字符串

9 个答案: