我有这个文件,我不知道如何解析这样的文本:
[1]
Word => 'THE'
USED => 53097401461
[2]
Word => 'OF'
USED => 30966074232
然后我必须搜索正在使用的TOP Xs单词。 (X是一个参数)
这是我的JavaScript:
$.get("https://gist.githubusercontent.com/zach-karat/119d690176f324e3f99c0e312f0a6620/raw/82e14d739e966216536ae9806282a20343e0e2f8/google-books-common-words.txt", function(data, status){
// Thats works once at the time but with letters and not with numbers!
//var hasString = data.includes("HELLO");
var content = data;
$('#content').html(data.replace('\n','<br>'));
});
});
}, 'html');
编辑:
文件中的单词已排序,因此我将代码编辑为:(现在...知道使用的TOP10单词长度为3个单词吗?) >
$.get("https://gist.githubusercontent.com/zach-karat/119d690176f324e3f99c0e312f0a6620/raw/82e14d739e966216536ae9806282a20343e0e2f8/google-books-common-words.txt", function(data, status,){
var lines = data.split("\n");
var x = 0;
$.each(lines, function(n, elem) {
// append if lenght > 10
$('#content').append('<div>' + elem + '</div>');
x ++;
if(x == 10){//x => parameter
return false;
}
});
});
});
}, 'html');
答案 0 :(得分:1)
使用正则表达式分割每一行。
正则表达式:/^([A-Z]+)\s*(\d+)$/gm
说明:
^
-字符串的开头
([A-Z]+)
-记住字符A-Z
的匹配。
\s*
-1个或更多空格
(\d+)
-记住数字0-9
的匹配。
gm
-global
和multiline
标志
示例:Regex101
$.get("https://gist.githubusercontent.com/zach-karat/119d690176f324e3f99c0e312f0a6620/raw/82e14d739e966216536ae9806282a20343e0e2f8/google-books-common-words.txt", function(data, status){
var regexp = /^([A-Z]+)\s*(\d+)$/gm;
var html = "";
var content = regexp.exec(data);
while(content)
{
html += "WORD : "+content[1]+"<br>USED : "+content[2]+"<br><br>";
content = regexp.exec(data);
}
$('#content').html(html);
});
<script src="https://ajax.googleapis.com/ajax/libs/jquery/2.1.1/jquery.min.js"></script>
<div id="content"></div>
答案 1 :(得分:1)
这是您需要的吗?
$.get("https://gist.githubusercontent.com/zach-karat/119d690176f324e3f99c0e312f0a6620/raw/82e14d739e966216536ae9806282a20343e0e2f8/google-books-common-words.txt", function (data, status) {
var content = data.split('\n').map(function(row){
return row.split('\t')
})
var x = 10; //from input parameter
var topResults = content.slice(0, x);
var html = topResults.map(function(result){
return result[0] + '\t' + result[1] + '<br>'
})
$('#content').html(html);
}, 'text')
实际工作不需要jQuery。
答案 2 :(得分:0)
使用Array.prototype.reduce
的简洁解决方案:
$.get("https://gist.githubusercontent.com/zach-karat/119d690176f324e3f99c0e312f0a6620/raw/82e14d739e966216536ae9806282a20343e0e2f8/google-books-common-words.txt", function(data) {
var html = data.split('\n').slice(0,10).reduce((all, item) => {
var [word, count] = item.split('\t');
return `${all}<div>Word:${word}, Used: ${count}</div>`;
}, '');
$('#content').html(html);
});
答案 3 :(得分:0)
$.get('https://gist.githubusercontent.com/zach-karat/119d690176f324e3f99c0e312f0a6620/raw/82e14d739e966216536ae9806282a20343e0e2f8/google-books-common-words.txt', function (data) {
const result = data.split('\n').reduce((res, curr) => {
const tmp = curr.split(' ');
return {...res, ...{[tmp[0]]: tmp[1]}};
}, {});
})
因此结果将如下所示:
const result = {
THE: "53097401461",
OF: "30966074232",
AND: "22632024504",
TO: "19347398077",
IN: "16891065263",
A: "15310087895",
IS: "8384246685",
THAT: "8000768228",
FOR: "6545282031",
IT: "5740085369",
AS: "5700645258",
WAS: "5502713968",
WITH: "5182797249",
BE: "4818864785",
BY: "4703106084",
ON: "4594521081",
NOT: "4522732626",
HE: "4110457083",
I: "3884828634",
THIS: "3826060334"
};
result ['THE'] = 53097401461;
希望这会有所帮助。