如何替换阿拉伯语等效字符串中的所有罗马数字?

时间:2016-11-21 15:29:49

标签: javascript

我有一份所有莎士比亚十四行诗的清单,我正在寻找每个十四行诗的功能。但是,我希望能够使用阿拉伯数字搜索它们(例如“/ sonnet 122”。.txt以这种方式格式化:

I

This is a sonnet

II

This is a second sonnet

我现在正在使用节点来尝试这样做,但是我从昨天起就一直在尝试无济于事。我昨天的最后一次尝试是使用'替换'方法:

'use strict';
//require module roman-numerals, which converts roman to arabic
var toArabic = require('roman-numerals').toArabic;
//require file-handling module
var fs = require('fs');

fs.readFile('sonn.txt', 'utf8', function (err,data) {
    if (err) {
        console.log(err);
    } else {
        var RN = /[A-Z]{2,}/g; 
        var found = data.match(RN); //finds all roman numbers and puts them in an array
        var numArr = [];
        for (var i = 0; i < found.length; i++ ){
            numArr.push(toArabic(found[i])); //puts all arabic numbers in numArr
        }
        for (var e = 0; e < found.length; e++){
            data.replace(found, found.forEach((x, i)=> {
            toArabic(x)
    }
});

然后我尝试用以下代码替换它们:

data.replace(found, function(s, i){
    return numArr[i];
});

然后我尝试了for循环。我没有保留该代码,但它类似于:

for(var i=0;i<found.length;i++){
    data.replace(found, numArr[i]);
}

最后一个代码替换每个数字然后删除数据并替换下一个数字:

replace(abc, 123) -> 1bc, a2c, ab3

如何让它迭代数据中的每个匹配项并保留它?然后将其保存到新的txt应该很容易。

(另外,我的RegExp只找到多个字符罗马数字,以避免替换可能在一行末尾找到的孤独我。)

3 个答案:

答案 0 :(得分:1)

如果您使用String.prototype.replace,则可以使用正则表达式和自定义替换功能。您只需返回要用作替换的值,这是toArabic所做的。

var data = 'I\n\nThis is a sonnet\n\nII\n\nThis is a second sonnet';

//========================

var toArabic = (function () {
  var forEach = Array.prototype.forEach;


  /**
   * Converts a roman number to its arabic equivalent.
   *
   * Will throw TypeError on non-string inputs.
   *
   * @param {String} roman
   * @return {Number}
   */
  function toArabic (roman) {
    if (('string' !== typeof roman) && (!(roman instanceof String))) throw new TypeError('toArabic expects a string');

    // Zero is/was a special case. I'll go with Dionysius Exiguus on this one as
    // seen on http://en.wikipedia.org/wiki/Roman_numerals#Zero
    if (/^nulla$/i.test(roman) || !roman.length) return 0;

    // Ultra magical regexp to validate roman numbers!
    roman = roman.toUpperCase().match(/^(M{0,3})(CM|DC{0,3}|CD|C{0,3})(XC|LX{0,3}|XL|X{0,3})(IX|VI{0,3}|IV|I{0,3})$/);
    if (!roman) throw new Error('toArabic expects a valid roman number');
    var arabic = 0;

    // Crunching the thousands...
    arabic += roman[1].length * 1000;

    // Crunching the hundreds...
    if (roman[2] === 'CM') arabic += 900;
    else if (roman[2] === 'CD') arabic += 400;
    else arabic += roman[2].length * 100 + (roman[2][0] === 'D' ? 400 : 0);


    // Crunching the tenths
    if (roman[3] === 'XC') arabic += 90;
    else if (roman[3] === 'XL') arabic += 40;
    else arabic += roman[3].length * 10 + (roman[3][0] === 'L' ? 40 : 0);

    // Crunching the...you see where I'm going, right?
    if (roman[4] === 'IX') arabic += 9;
    else if (roman[4] === 'IV') arabic += 4;
    else arabic += roman[4].length * 1 + (roman[4][0] === 'V' ? 4 : 0);
    return arabic;
  };
  return toArabic;
})();

//====================

var RN = /[A-Z]{1,2}(?=\n)/g;
var newData = data.replace(RN, toArabic);
document.body.innerText = newData;

答案 1 :(得分:1)

您必须重新编写已替换的字符串,并且可以使用replace()

的回调
'use strict';

var toArabic = require('roman-numerals').toArabic;
var fs = require('fs');

fs.readFile('sonn.txt', 'utf8', function (err,data) {
    if (err) {
        console.log(err);
    } else {
        data = data.replace(/[A-Z]{2,}/g, function(x) {
            return toArabic(x);
        });
    }
});

Here are some more regular expressions to match romans

答案 2 :(得分:1)

这种事情最好以stream transform处理。旧节点流转换库初始化有点时髦,但它可以非常快速地完成工作。这是一个使用@adeneo上面写的替换函数的工作示例。

var stream = require('stream');
var util = require('util');
var toArabic = require('roman-numerals').toArabic;
var fs =require('fs');

var rstream = fs.createReadStream('sonnets.txt');
var wstream = fs.createWriteStream('sonnets.transformed.txt');

// node v0.10+ use native Transform, else polyfill
var Transform = stream.Transform ||
  require('readable-stream').Transform;

function Converter(options) {
    // allow use without new
    if (!(this instanceof Converter)) {
        return new Converter(options);
    }

    // init Transform
    Transform.call(this, options);
}

util.inherits(Converter, Transform);

Converter.prototype._transform = function (chunk, enc, cb) {

    //transform the chunk
    var data = chunk.toString().replace(/[A-Z]{2,}/g, function(x) {
            return toArabic(x);
        });

    this.push(data); //push the chunk

    cb(); //callback

};


// try it out
var converter = new Converter();

// now run it on the whole file
rstream
    .pipe(converter)
    .pipe(wstream)  // writes to sonnets.transformed.txt
    .on('finish', function () {  // finished
        console.log('done transforming');
     });

这里有很好的介绍: http://codewinds.com/blog/2013-08-20-nodejs-transform-streams.html 这里有使用through2转换库的更现代的例子 https://github.com/substack/stream-handbook