Javascript多次出现的关键字

时间:2016-05-24 21:22:46

标签: javascript

我的文字包含一些关键字,后跟句子,

var name = [];
var about = [];
var life = [];
function transform_report(data) {
    var keywords = ["Name", "About", "Life", "Rest"];
    var output_data = "Event ";
    var keyword_index = 0;
    var input_data = data.toString();
    var pos = -1;
    for (var i = 0; i < keywords.length; i++) {
        pos = input_data.indexOf(keywords[i]);
        if (pos != -1) {
            keyword_index = i;
            break;
        }
    }

    while (pos != -1) {
        output_data += keywords[keyword_index] + " : ";
        pos += keywords[keyword_index].length;
        var index = keyword_index;
        keyword_index = find_next_keyword(keywords, keyword_index, input_data, pos);
        var end_pos = input_data.indexOf(keywords[keyword_index]);
        var output_text = input_data.slice(pos, end_pos).replace(/:/, '');

        output_data += output_text.trim() + "\n";
        if (keywords[index] === "Name") {
            name.push(output_text.trim());
        }
        if ((keywords[index] === "About")) {
            about.push(output_text.trim());
        }
        if ((keywords[index] === "Life")) {
            life.push(output_text.trim());
        }
        pos = end_pos;
    }
    return output_data;
}

function find_next_keyword(keywords, index, input_data, pos) {
    var orig_index = index;
    var min_pos = input_data.length;
    var min_index = index;
    if (index == keywords.length - 1)
        return -1;
    for (var i = 0; i < keywords.length; i++) {
        if (i == orig_index)
            continue;
        var keyword = keywords[i];
        var next_keyword_pos = input_data.indexOf(keyword, pos);
        if (next_keyword_pos != -1 && next_keyword_pos < min_pos) {
            min_pos = next_keyword_pos;
            min_index = i;
        }
    }
    return min_index;
}

在上面的文字中,有4个关键词 - 名字,关于,生活,休息。我想将这些关键字后面的文本分成单独的字符串数组并填充它们。这些关键字在文本中的显示顺序始终相同。到目前为止,我尝试了以下代码:

name array contains :
The United States of America (USA), commonly referred to as the United States (U.S.) or America, is a federal republic composed of 50 states, the federal district of Washington, D.C., five major territories, and various possessions. 

about array contains: 48 contiguous states and Washington, D.C., are in central North America between Canada and Mexico. The state of Alaska is in the northwestern part of North America and the state of Hawaii is an archipelago in the mid-Pacific. The territories are scattered about the Pacific Ocean and the Caribbean Sea. At 3.8 million square miles and with over 320 million people, the country is the world's third largest by total area and the third most populous. It is one of the world's most ethnically diverse and multicultural nations, the product of large-scale immigration from many countries. 

life array contains:The geography and climate of the United States are also extremely diverse, and the country is home to about a wide variety of wildlife.

当关键字在数据中只出现一次时,上述代码可以正常工作。但在这种情况下,关键字&#34;关于&#34;在句子中也应该作为一个单词出现在&#34;关于数组&#34;和&#34;生命阵列&#34;。输出应为:

linear

但由于关键字显示为普通字,因此我无法获得所需的输出。有任何方法可以在Javascript中执行此操作吗?非常感谢。

2 个答案:

答案 0 :(得分:1)

考虑你的情况:

  

&#34; ....这些关键字在文本中出现的顺序始终是   。同样&#34;

&#34>主要目标&#34;可以使用String.splitString.replaceString.substringArray.indexOf函数通过以下方法实现:

// data is the initial string(text)
var splitted = data.split(/\.\s/),  // splitting sentences
    keywords = ["Name", "About", "Life", "Rest"],
    currentKeyword = "",  // the last active keyword
    keysObject = {'name' : [], 'about' : [], 'life' : [], 'rest' : []};

splitted.forEach(function(v){
    var first = v.substring(0, v.indexOf(" ")).replace(/\W/g, "");
    if (keywords.indexOf(first) !== -1) {
        keysObject[first.toLowerCase()].push(v.substring(v.indexOf(" ") + 1));
        currentKeyword = first.toLowerCase();
    } else {
        keysObject[currentKeyword].push(v);
    }    
});

console.log(JSON.stringify(keysObject, 0, 4));

输出:

{
    "name": [
        "The United States of America (USA), commonly referred to as the United States (U.S.) or America, is a federal republic composed of 50 states, the federal district of Washington, D.C., five major territories, and various possessions"
    ],
    "about": [
        "48 contiguous states and Washington, D.C., are in central North America between Canada and Mexico",
        "The state of Alaska is in the northwestern part of North America and the state of Hawaii is an archipelago in the mid-Pacific",
        "The territories are scattered **about** the Pacific Ocean and the Caribbean Sea",
        "At 3.8 million square miles and with over 320 million people, the country is the world's third largest by total area and the third most populous",
        "It is one of the world's most ethnically diverse and multicultural nations, the product of large-scale immigration from many countries"
    ],
    "life": [
        "The geography and climate of the United States are also extremely diverse, and the country is home to **about** a wide variety of wildlife"
    ],
    "rest": [
        "USA is a diversified nation and Niagara is world famous."
    ]
}

答案 1 :(得分:0)

如果我正确理解你的问题,你想在第一个“关于”开始一个新的字符串,而不是在之后发生的其他字符串。我能够使用字符串搜索来完成此操作,因为它找到了第一个实例。

  

http://codepen.io/jnfr/pen/gMYbPJ

    <button onclick="myFunction()">button</button>

    <p id="name"></p>

    <p id="about"></p>

    <p id="life"></p>

    <p id="rest"></p>

    function myFunction() {
    var str = "Name The United States of America (USA), commonly referred to as the United States (U.S.) or America, is a federal republic composed of 50 states, the federal district of Washington, D.C., five major territories, and various possessions. About 48 contiguous states and Washington, D.C., are in central North America between Canada and Mexico. The state of Alaska is in the northwestern part of North America and the state of Hawaii is an archipelago in the mid-Pacific. The territories are scattered about the Pacific Ocean and the Caribbean Sea. At 3.8 million square miles and with over 320 million people, the country is the world's third largest by total area and the third most populous. It is one of the world's most ethnically diverse and multicultural nations, the product of large-scale immigration from many countries. Life The geography and climate of the United States are also extremely diverse, and the country is home to about a wide variety of wildlife. Rest USA is a diversified nation and Niagara is world famous."; 

   var n = str.search("About");
   var name = str.slice(0, n);


   var p = str.search("Life");
   var about = str.slice(n, p);

   var r = str.search("Rest");
   var life = str.slice(p, r);

   var rest = str.slice(r, str.length);

  document.getElementById("name").innerHTML = name;
  document.getElementById("about").innerHTML = about;
  document.getElementById("life").innerHTML = life;
  document.getElementById("rest").innerHTML = rest;

  }