我正在阅读一个文本文件,并在我的反应项目中使用正则表达式将其转换为JSON格式。它工作正常但不包括文本文件的最后20-30行。将其转换为JSON时存在一些问题,但我无法理解这个问题。
这是我的代码:
readTextFile = file => {
let rawFile = new XMLHttpRequest();
rawFile.open("GET", file, false);
rawFile.onreadystatechange = () => {
if (rawFile.readyState === 4) {
if (rawFile.status === 200 || rawFile.status === 0) {
let allText = rawFile.responseText;
// console.log(allText)
let reg = /\d\d\d\d-(0?[1-9]|1[0-2])-(0?[1-9]|[12][0-9]|3[01]) (00|[0-9]|1[0-9]|2[0-3]):([0-9]|[0-5][0-9]):([0-9]|[0-5][0-9])/g;
let arr = [];
let start = null;
let line, lastSpacePos;
let match;
while ((match = reg.exec(allText)) != null) {
if(start) {
line = allText.slice(start, match.index).trim();
lastSpacePos = line.lastIndexOf(' ');
arr.push({
date: line.slice(0, 19),
text: line.slice(20, lastSpacePos).trim(),
user_id: line.slice(lastSpacePos).trim()
});
}
start = match.index
}
console.log(arr);
this.setState({
// text: JSON.stringify(arr)
text: allText
});
}
}
};
答案 0 :(得分:0)
我不确定问题中现有代码的问题。
要使用替代方法获得问题中描述的预期结果,您可以使用RegExp
/\s{2,}|\n+/g
替换大于2的空格字符和换行符号; /[\d-]+\s[\d:]+/g
获取日期; /.+(?=\s\w+\s$|\s\w+$)|\w+\s$|\w+$/g
匹配空格,字符和空格字符后面的文本或字符串和字符的结尾,然后是空格字符,后跟字符和空格字符或字符串结尾;从.map()
let allText = `2014-06-01 23:07:58 President Resigns in Georgia’s Breakaway Region of
Abkhazia t.co/DAploRvCvV nytimes
2014-06-01 23:48:06 The NYT FlipBoard guide to understanding climate
change and its consequences t.co/uPGTuYiSmQ nytimes
2014-06-01 23:59:06 For all the struggles that young college grads
face, a four-year degree has probably never been more valuable
t.co/Gjf6wrwMsS nytimes
2014-06-01 23:35:09 It's better to be a community-college graduate than
a college dropout t.co/k3CO7ClmIG nytimes
2014-06-01 22:47:04 Share your experience with Veterans Affairs health
care t.co/PrDhLC20Bt nytimes
2014-06-01 22:03:27 Abandon Hope, Almost All Ye Who Enter the N.B.A.
Playoffs t.co/IQAJ5XNddR nytimes`;
// replace more than one consecutive space character and new line characters
allText = allText.replace(/\s{2,}|\n+/g, " ");
// get dates
let dates = allText.match(/[\d-]+\s[\d:]+/g);
// get characters that are not dates
// spread `dates` to resulting array
// return object
let res = allText
.split(/[\d-]+\s[\d:]+\s/)
.filter(Boolean)
.map((text, index) =>
[dates[index], ...text.match(/.+(?=\s\w+\s$|\s\w+$)|\w+\s$|\w+$/g)])
.map(([date, text, user_id]) => ({date, text, user_id}));
console.log(res);