如何转换此文字
data=`ID ra dec V VR MJD
100 30.1 +15 7.00 -10 2450000.1234
200 30.2 +16 12.226 -5.124 2450000.2345
300 30.3 +17 13.022 12.777 2450000.3456
400 30.4 +18 14.880 13.666 2450000.6789
500 30.5 +19 12.892 -1.835 2450001
600 30.6 +20 17.587 15.340 2450002.123
700 30.7 +21 13.984 13.903 2450000.123456
800 30.8 +22 20.00 10.000 2450003.0 `
即导入的文本,其中多行和多列以空格和制表符分隔,并显示在此
中ID,ra,dec,V,VR,MJD
100,30.1,+15,7.00,-10,2450000.1234
200,30.2,+16,12.226,-5.124,2450000.2345
300,30.3,+17,13.022,12.777,2450000.3456
400,30.4,+18,14.880,13.666,2450000.6789
500,30.5,+19,12.892,-1.835,2450001
600,30.6,+20,17.587,15.340,2450002.123
700,30.7,+21,13.984,13.903,2450000.123456
800,30.8,+22,20.00,10.000,2450003.0
不幸的是,
data=data.replace(/^\s+|\s+$/g,'').replace(/[\t \r]+/g,',');
仅适用于第一行data.replace(/[^\S\r\n]+$/gm, "").replace(/[\t \r]+/g,',');
没关系,但仅限于追踪。 额外:如何将其转换为json
,将两个块分隔为两个数据集,例如[[{id:..., ra:...},{},{}],[{id:..., ra:...},{},{}]]
答案 0 :(得分:2)
// First: the trimming part. Split on newlines, process
// each line by trimming it and replacing remaining white
// space with commas
var data = 'ID ra dec V VR MJD\n\
100 30.1 +15 7.00 -10 2450000.1234\n\
200 30.2 +16 12.226 -5.124 2450000.2345\n\
300 30.3 +17 13.022 12.777 2450000.3456\n\
\n\
\n\
400 30.4 +18 14.880 13.666 2450000.6789\n\
500 30.5 +19 12.892 -1.835 2450001\n\
600 30.6 +20 17.587 15.340 2450002.123\n\
700 30.7 +21 13.984 13.903 2450000.123456 \n\
800 30.8 +22 20.00 10.000 2450003.0 ';
data = data.split('\n');
var i = 0, l = data.length;
for ( ; i < l; i++)
data[i] = data[i].trim().replace(/\s+/g,',');
data = data.join('\n');
document.write('<h1>Formatted data string</h1><pre><code>'+data+'</code></pre>');
// Now to turn it into objects.
// We'll strip the first line because
// that'll be the list of column names:
var cols = data.replace(/^([^\n]+)\n/,'$1').split(','),
columnCount = cols.length;
data = data.replace(/^[^\n]+\n/,'');
// Now separate the 2 datasets
var datasets = data.split('\n\n\n');
document.write('<h1>First dataset</h1><pre><code>'+datasets[0]+'</code></pre>');
document.write('<h1>Second dataset</h1><pre><code>'+datasets[1]+'</code></pre>')
// Now we go through each line and
// place the values into objects which
// we'll push to an array
var processed = [];
i = 0;
l = datasets.length;
for ( ; i < l; i++){
processed[i] = [];
var lines = datasets[i].split('\n'),
lineCount = lines.length;
for (var j = 0; j < lineCount; j++){
var dataArray = lines [j].split(','),
obj = {};
for (var k = 0; k < columnCount; k++)
obj[cols[k]] = dataArray[k];
processed[i].push(obj);
}
}
var finalJSON = JSON.stringify(processed);
document.write('<h1>Final JSON</h1><pre><code>'+finalJSON+'</code></pre>');
答案 1 :(得分:1)
因此,由于您知道每行的确切格式,因此您可以在每行的基础上使用捕获组来提取详细信息。尝试这样的事情:
/^\s*(\S+)\s+(\S+)\s+(\S+)\s+(\S+)\s+(\S+)\s+(\S+)\s*$/mg
请记住,\s
匹配所有空格,而\S
匹配非空格。如有必要,您可能需要根据自己的喜好调整捕获组。然后,使用多行和全局标志,我们都设置为迭代所有匹配。
以下是代码:
// Your data, with the header removed, formatted as a string literal:
var data = "100 30.1 +15 7.00 -10 2450000.1234\n"+
"200 30.2 +16 12.226 -5.124 2450000.2345\n"+
" 300 30.3 +17 13.022 12.777 2450000.3456\n"+
"\n"+
"\n"+
"400 30.4 +18 14.880 13.666 2450000.6789\n"+
"500 30.5 +19 12.892 -1.835 2450001\n"+
" 600 30.6 +20 17.587 15.340 2450002.123\n"+
"700 30.7 +21 13.984 13.903 2450000.123456 \n"+
"800 30.8 +22 20.00 10.000 2450003.0";
// The pattern to grab the data:
var data_pattern = /^\s*(\S+)\s+(\S+)\s+(\S+)\s+(\S+)\s+(\S+)\s+(\S+)\s*$/mg;
// Keep matching until we run out of lines that match...
var results = [];
var line_match;
while ((line_match = data_pattern.exec(data)) !== null){
// Parse the match into a json object and add it to the results.
results.push({
ID: line_match[1],
ra: line_match[2],
dec: line_match[3],
V: line_match[4],
VR: line_match[5],
MJD: line_match[6]
});
}
// Output the results.
console.log(JSON.stringify(results, null, 2));
这是控制台上的结果:
[
{
"ID": "100",
"ra": "30.1",
"dec": "+15",
"V": "7.00",
"VR": "-10",
"MJD": "2450000.1234"
},
{
"ID": "200",
"ra": "30.2",
"dec": "+16",
"V": "12.226",
"VR": "-5.124",
"MJD": "2450000.2345"
},
{
"ID": "300",
"ra": "30.3",
"dec": "+17",
"V": "13.022",
"VR": "12.777",
"MJD": "2450000.3456"
},
{
"ID": "400",
"ra": "30.4",
"dec": "+18",
"V": "14.880",
"VR": "13.666",
"MJD": "2450000.6789"
},
{
"ID": "500",
"ra": "30.5",
"dec": "+19",
"V": "12.892",
"VR": "-1.835",
"MJD": "2450001"
},
{
"ID": "600",
"ra": "30.6",
"dec": "+20",
"V": "17.587",
"VR": "15.340",
"MJD": "2450002.123"
},
{
"ID": "700",
"ra": "30.7",
"dec": "+21",
"V": "13.984",
"VR": "13.903",
"MJD": "2450000.123456"
},
{
"ID": "800",
"ra": "30.8",
"dec": "+22",
"V": "20.00",
"VR": "10.000",
"MJD": "2450003.0"
}
]
我希望这会有所帮助。
答案 2 :(得分:1)
使用split / join和trim进行字符串转换可能更容易:
data
.split(/\r?\n/)
.map(row => row.trim().split(/\s+/).join(','))
.join('\n')
额外的功劳更多涉及。 :)
const rows = data.split(/\r?\n/).map(row => row.trim().split(/\s+/).join(','));
const keys = rows.shift().split(',');
const chunks = rows.join("\n").split(/\n{2,}/);
const output = chunks .map(chunk => chunk.split("\n").map(
row => row.split(',').reduce((obj, v, i) => {
obj[keys[i]] = v;
return obj;
}, {})
));
答案 3 :(得分:1)
你快到了。你想要第一次替换多行标志,
但请勿替换\n
,因此请勿使用\s
。请改用[ \t]
:
var data = 'ID ra dec V VR MJD\n' +
' 100 30.1 +15 7.00 -10 2450000.1234\n' +
'200 30.2 +16 12.226 -5.124 2450000.2345\n' +
' 300 30.3 +17 13.022 12.777 2450000.3456\n' +
'\n' +
'\n' +
'400 30.4 +18 14.880 13.666 2450000.6789\n' +
'500 30.5 +19 12.892 -1.835 2450001\n' +
' 600 30.6 +20 17.587 15.340 2450002.123\n' +
'700 30.7 +21 13.984 13.903 2450000.123456\n' +
'800 30.8 +22 20.00 10.000 2450003.0 \n'
var result = data.replace(/^[ \t]+|[ \t]+$/gm,'').replace(/[ \t]+/g,',')
console.log(result);