我正在使用RiTa.js库(kwic()函数)在文本集合中查找关键字并在上下文中获取关键字。 我正确地抓住了单词,但是当我得到kwic模型的结果并且句子中同时包含2个或更多特殊字符时,空格和特殊字符被弄乱了。 你能帮我吗?抱歉,我是p5.js的新手 谢谢!
txt = [];
files = ['Antonopoulou.txt',
'Barriere.txt',
'Costa.txt',
// …
];
function preload() { //load files
for (var i = 0; i < files.length; i++) {
txt[i] = loadStrings('data/' + files[i]);
}
myFont = loadFont('data/FranklinGothicMedium.ttf');
}
function setup() {
allwords = [];
titles = [];
authors = [];
for (var i = 0; i < txt.length; i++) {
allwords[i] = txt.join('\n');
}
counts = RiTa.concordance(allwords.join('\n'));
total = totalValues(counts);
params = {
ignoreStopWords: false,
ignoreCase: false,
ignorePunctuation: false,
wordCount: 20
};
words = [];
keys = [];
for (var k in counts) {
if (counts.hasOwnProperty(k)) {
if (counts[k] >= 100) {
var tags = RiTa.getPosTags(k);
if (tags[0] == 'jj') {
words.push(k);
keys.push(counts[k]);
}
}
}
}
search = [];
kwic = [];
for (var i = 0; i < words.length; i++) {
word = words[i];
key = keys[i];
result = RiTa.kwic(allwords[i], word, params); // HERE
search.push({
word: word,
key: key,
result: result,
});
for (var j = 0; j < search[i].result.length; j++) {
keyword = search[i].word;
entry = search[i].result[j];
kwic.push({
keyword: keyword,
entry: entry,
});
}
}
}
问题似乎在这里:
for (var i = 0; i < words.length; i++) {
word = words[i];
key = keys[i];
result = RiTa.kwic(allwords[i], word, params); // HERE
search.push({
word: word,
key: key,
result: result,
});
结果有奇怪的空格。
答案 0 :(得分:0)
sketch.js中的代码存在很多问题,但这是一个清理后的版本(似乎没有您描述的问题):
var txt = [];
function preload() {
var files = ['Lee.txt','Orsi.txt'];
for (var i = 0; i < files.length; i++) {
txt[i] = loadStrings('data/' + files[i]);
}
}
function setup() {
createCanvas(600, 600);
var alltext = '';
for (var i = 0; i < txt.length; i++) {
alltext += txt[i].join('\n');
}
var lines = RiTa.kwic(alltext, "audio-visual", {
ignoreStopWords: false,
ignoreCase: false,
ignorePunctuation: false,
wordCount: 5
});
for (var i = 0; i < lines.length; i++) {
text(i + ") "+lines[i], 20, 20 + i * 20);
}
}
输出: