RiTa.js kwic()空白和特殊字符弄乱了

时间:2018-07-05 15:38:35

标签: p5.js

我正在使用RiTa.js库(kwic()函数)在文本集合中查找关键字并在上下文中获取关键字。 我正确地抓住了单词,但是当我得到kwic模型的结果并且句子中同时包含2个或更多特殊字符时,空格和特殊字符被弄乱了。 你能帮我吗?抱歉,我是p5.js的新手 谢谢!

txt = [];
files = ['Antonopoulou.txt',
  'Barriere.txt',
  'Costa.txt',
  // …
];

function preload() { //load files
  for (var i = 0; i < files.length; i++) {
    txt[i] = loadStrings('data/' + files[i]);
  }
  myFont = loadFont('data/FranklinGothicMedium.ttf');
}

function setup() {

  allwords = [];  
  titles = [];    
  authors = [];   

  for (var i = 0; i < txt.length; i++) {
    allwords[i] = txt.join('\n');                
  }

  counts = RiTa.concordance(allwords.join('\n'));    
  total = totalValues(counts);

  params = {
    ignoreStopWords: false,
    ignoreCase: false,
    ignorePunctuation: false,
    wordCount: 20
  };

  words = [];
  keys = []; 

  for (var k in counts) {       
    if (counts.hasOwnProperty(k)) {
      if (counts[k] >= 100) {  
        var tags = RiTa.getPosTags(k);
        if (tags[0] == 'jj') {  
          words.push(k);        
          keys.push(counts[k]); 
        }
      }
    }
  }

  search = [];     
  kwic = [];       

  for (var i = 0; i < words.length; i++) {  
    word = words[i];
    key = keys[i];
    result = RiTa.kwic(allwords[i], word, params);  // HERE

    search.push({
      word: word,
      key: key,
      result: result,        
    });

    for (var j = 0; j < search[i].result.length; j++) {
      keyword = search[i].word;
      entry = search[i].result[j];

      kwic.push({
        keyword: keyword,
        entry: entry,
      });

    }
  } 
}

问题似乎在这里:

for (var i = 0; i < words.length; i++) {  
        word = words[i];
        key = keys[i];
        result = RiTa.kwic(allwords[i], word, params);  // HERE

        search.push({
          word: word,
          key: key,
          result: result,        
        });

结果有奇怪的空格。

1 个答案:

答案 0 :(得分:0)

sketch.js中的代码存在很多问题,但这是一个清理后的版本(似乎没有您描述的问题):

var txt = [];

function preload() {

  var files = ['Lee.txt','Orsi.txt'];
  for (var i = 0; i < files.length; i++) {
    txt[i] = loadStrings('data/' + files[i]);
  }
}

function setup() {

  createCanvas(600, 600);

  var alltext = '';
  for (var i = 0; i < txt.length; i++) {
    alltext += txt[i].join('\n');
  }

  var lines = RiTa.kwic(alltext, "audio-visual", {
    ignoreStopWords: false,
    ignoreCase: false,
    ignorePunctuation: false,
    wordCount: 5
  });

  for (var i = 0; i < lines.length; i++) {
    text(i + ") "+lines[i], 20, 20 + i * 20);
  }
}


输出:

sketch output