Sed没有找到/替换目录中的所有字符

时间:2017-11-06 21:30:12

标签: sed

我正在尝试查找并替换目录中许多文本文件中的单个字符。对可能重复的道歉,但我无法在其他sed线程中找到答案。

我用Homebrew安装gnu-sed,我正在使用命令:

find . -name "*.txt" -exec gsed -i -e 's/ñ/–/g' '{}' \;

我有一个'test'文件,其中包含我需要在目录中替换的字符,这些都可以找到并正确替换。但其他文本文件中的其他字符则不是。例如'我们去丹麦 - 所有'(ñ也没有被发现/替换)。

为什么会这样?我该如何解决?谢谢!

编辑 - 输出

$ od -c filethatworks.txt | head -2
0000000    –  **  **  \n   –  **  **  \n   “  **  **  \n   “  **  **  \n
0000020    —  **  **  \n   —  **  **  \n   -  \n   “  **  **  \n   “  **
$ od -c filethatdoesnot.txt | head -2
0000000    T   h   o   s   e       b   l   e   s   s   e   d       d   a
0000020    y   s       o   f       s   u   m   m   e   r       a   r   e

对于有效的文件,file命令返回

test.txt: UTF-8 Unicode text

而不是:

ca001_mci_17071971.txt: Non-ISO extended-ASCII text, with very long lines, with CRL
F line terminators

1 个答案:

答案 0 :(得分:1)

角色是人类的概念。当字符要在计算机文件中表示时,它们需要编码。编码将每个字符与称为代码点的整数相关联。

例如,取字符“ă”(这是一个小写字母“a”,顶部有一个短语,用于元音的{4}的罗马尼亚语拼写);在MS-DOS的旧时代,我们经常使用名为“code page 852”的编码,其中“ă”具有代码点199.然后Windows来了,在Windows上我们经常使用名为“code page 1250”的编码,其中“ă”具有代码点227.然后是Unicode,并且在Unicode中“ă”具有代码点259。

由于Unicode代码点的值可能大于255,因此必须有一种方法可以使用值介于0到255之间的字节来表示它们。这些方法称为“Unicode转换格式”(UTF),其中使用最广泛是UTF-8(在Linux中非常流行)和UTF-16(两种,小端和大端,在Windows上非常流行)。在UTF-8中,“ă”表示为两个字节,值为196和131(根据UTF-8的规则,这两个字节一起表示代码点259);在小端UTF-16中,“ă”由两个字节表示,值为3和1(通过小端UTF-16的规则,这两个字节一起表示代码点259)。

关键在于,为了理解文本文件,您需要知道(1)使用什么编码,以及(2)在Unicode的情况下,使用什么转换格式。现在,在Linux和Web上,我们非常接近所有文本都以UTF-8表示的共识;尽管如此,旧文件仍然存在,偶尔会有来自Windows的新文件,因此有一个非常好的程序叫iconv(在Linux和on Windows都可用),它用于从一个编码转换文本文件到另一个。

例如,假设您的有问题的文件是在Windows-1252中编码的(Windows文档也称为ANSI,尽管美国国家标准协会与此无关),您可以说

d3.selectAll(".mouse-per-line")

可悲的是,没有办法使用 <head> <script data-require="d3@3.5.3" data-semver="3.5.3" src="//cdnjs.cloudflare.com/ajax/libs/d3/3.5.3/d3.js"></script> <style> body { font: 10px sans-serif; } .axis path, .axis line { fill: none; stroke: #000; shape-rendering: crispEdges; } .line { fill: none; stroke: steelblue; stroke-width: 1.5px; } </style> </head> <body> <script> var margin = { top: 20, right: 80, bottom: 30, left: 50 }, width = 900 - margin.left - margin.right, height = 500 - margin.top - margin.bottom; // var parseDate = d3.time.format("%Y%m%d").parse; // var x = d3.scale.linear() // .range([0, width]); var x = d3.scale.ordinal() .domain(["20170101","20170108","20170115","20170122","20170128"]) .rangePoints([0, width],0.5); var y = d3.scale.linear() .range([height, 0]); var color = d3.scale.category10(); var xAxis = d3.svg.axis() .scale(x) // .ticks(5) .orient("bottom") // .tickValues(["2017-01-01", "2017-01-08", "2017-01-15", "2017-01-22", "2017-01-28"]); var yAxis = d3.svg.axis() .scale(y) .orient("left"); var line = d3.svg.line() .interpolate("linear") .x(function(d) { return x(d.week); }) .y(function(d) { return y(d.avg_test_drives); }); var svg = d3.select("body").append("svg") .attr("width", width + margin.left + margin.right) .attr("height", height + margin.top + margin.bottom) .append("g") .attr("transform", "translate(" + margin.left + "," + margin.top + ")"); // var data = d3.tsv.parse(myData); // console.log(data[0]); // alert (JSON.stringify(data)); var data = [{"week": "20170101", "dealer": "68", "peers": "73","all dealers":"123"}, {"week": "20170108", "dealer": "121","peers":"112","all dealers":"131"}, {"week": "20170115", "dealer": "104","peers":"101","all dealers":"106"}, {"week": "20170122", "dealer": "123","peers":"131","all dealers":"122"}, {"week": "20170128", "dealer": "106","peers":"107","all dealers":"122"}] // alert (JSON.stringify(data)); color.domain(d3.keys(data[0]).filter(function(key) { return key !== "week"; })); data.forEach(function(d) { d.week = d.week; }); var testdrives = color.domain().map(function(name) { return { name: name, values: data.map(function(d) { return { week: d.week, avg_test_drives: +d[name] }; }) }; }); // x.domain(d3.extent(data, function(d) { // return d.week; // })); y.domain([ d3.min(testdrives, function(c) { return d3.min(c.values, function(v) { return v.avg_test_drives; }); }), d3.max(testdrives, function(c) { return d3.max(c.values, function(v) { return v.avg_test_drives; }); }) ]); var legend = svg.selectAll('g') .data(testdrives) .enter() .append('g') .attr('class', 'legend'); legend.append('rect') .attr('x', width - 20) .attr('y', function(d, i) { return i * 20; }) .attr('width', 10) .attr('height', 10) .style('fill', function(d) { return color(d.name); }); legend.append('text') .attr('x', width - 8) .attr('y', function(d, i) { return (i * 20) + 9; }) .text(function(d) { return d.name; }); svg.append("g") .attr("class", "x axis") .attr("transform", "translate(0," + height + ")") .call(xAxis) // .ticks(5); svg.append("g") .attr("class", "y axis") .call(yAxis) .append("text") .attr("transform", "rotate(-90)") .attr("y", 6) .attr("dy", ".71em") .style("text-anchor", "end") .text("Test Drives"); var entity = svg.selectAll(".entity") .data(testdrives) .enter().append("g") .attr("class", "entity"); // alert (JSON.stringify(entity)); entity.append("path") .attr("class", "line") .attr("d", function(d) { return line(d.values); }) .style("stroke", function(d) { return color(d.name); }); entity.append("text") .datum(function(d) { return { name: d.week, value: d.values[d.values.length - 1] }; }) .attr("transform", function(d) { return "translate(" + x(d.value.week) + "," + y(d.value.avg_test_drives) + ")"; }) .attr("x", 3) .attr("dy", ".35em") .text(function(d) { return d.week; }); var mouseG = svg.append("g") .attr("class", "mouse-over-effects"); mouseG.append("path") // this is the black vertical line to follow mouse .attr("class", "mouse-line") .style("stroke", "black") .style("stroke-width", "1px") .style("opacity", "0"); var lines = document.getElementsByClassName('line'); var mousePerLine = mouseG.selectAll('.mouse-per-line') .data(testdrives) .enter() .append("g") .attr("class", "mouse-per-line"); mousePerLine.append("circle") .attr("r", 7) .style("stroke", function(d) { return color(d.week); }) .style("fill", "none") .style("stroke-width", "1px") .style("opacity", "0"); mousePerLine.append("text") .attr("transform", "translate(10,3)"); mouseG.append('svg:rect') // append a rect to catch mouse movements on canvas .attr('width', width) // can't catch mouse events on a g element .attr('height', height) .attr('fill', 'none') .attr('pointer-events', 'all') .on('mouseout', function() { // on mouse out hide line, circles and text d3.select(".mouse-line") .style("opacity", "0"); d3.selectAll(".mouse-per-line circle") .style("opacity", "0"); d3.selectAll(".mouse-per-line text") .style("opacity", "0"); }) .on('mouseover', function() { // on mouse in show line, circles and text d3.select(".mouse-line") .style("opacity", "1"); d3.selectAll(".mouse-per-line circle") .style("opacity", "1"); d3.selectAll(".mouse-per-line text") .style("opacity", "1"); }) .on('mousemove', function() { // mouse moving over canvas var mouse = d3.mouse(this); d3.select(".mouse-line") .attr("d", function() { var d = "M" + mouse[0] + "," + height; d += " " + mouse[0] + "," + 0; return d; }); d3.selectAll(".mouse-per-line") .attr("transform", function(d, i) { console.log(width/mouse[0]) /* var xDate = x.invert(mouse[0]), bisect = d3.bisector(function(d) { return d.week; }).right; idx = bisect(d.values, xDate); */ var beginning = 0, end = lines[i].getTotalLength(), target = null; while (true){ target = Math.floor((beginning + end) / 2); pos = lines[i].getPointAtLength(target); if ((target === end || target === beginning) && pos.x !== mouse[0]) { break; } if (pos.x > mouse[0]) end = target; else if (pos.x < mouse[0]) beginning = target; else break; //position found } d3.select(this).select('text') .text(y.invert(pos.y).toFixed(2)); return "translate(" + mouse[0] + "," + pos.y +")"; }); }); </script> </body>;你必须写一个临时输出文件,然后在源文件的顶部重命名临时输出文件,当然在检查一切进展顺利之后。