我正在尝试查找并替换目录中许多文本文件中的单个字符。对可能重复的道歉,但我无法在其他sed线程中找到答案。
我用Homebrew安装gnu-sed,我正在使用命令:
find . -name "*.txt" -exec gsed -i -e 's/ñ/–/g' '{}' \;
我有一个'test'文件,其中包含我需要在目录中替换的字符,这些都可以找到并正确替换。但其他文本文件中的其他字符则不是。例如'我们去丹麦 - 所有'(ñ也没有被发现/替换)。
为什么会这样?我该如何解决?谢谢!
编辑 - 输出
$ od -c filethatworks.txt | head -2
0000000 – ** ** \n – ** ** \n “ ** ** \n “ ** ** \n
0000020 — ** ** \n — ** ** \n - \n “ ** ** \n “ **
$ od -c filethatdoesnot.txt | head -2
0000000 T h o s e b l e s s e d d a
0000020 y s o f s u m m e r a r e
对于有效的文件,file命令返回
test.txt: UTF-8 Unicode text
而不是:
ca001_mci_17071971.txt: Non-ISO extended-ASCII text, with very long lines, with CRL
F line terminators
答案 0 :(得分:1)
角色是人类的概念。当字符要在计算机文件中表示时,它们需要编码。编码将每个字符与称为代码点的整数相关联。
例如,取字符“ă”(这是一个小写字母“a”,顶部有一个短语,用于元音的{4}的罗马尼亚语拼写);在MS-DOS的旧时代,我们经常使用名为“code page 852”的编码,其中“ă”具有代码点199.然后Windows来了,在Windows上我们经常使用名为“code page 1250”的编码,其中“ă”具有代码点227.然后是Unicode,并且在Unicode中“ă”具有代码点259。
由于Unicode代码点的值可能大于255,因此必须有一种方法可以使用值介于0到255之间的字节来表示它们。这些方法称为“Unicode转换格式”(UTF),其中使用最广泛是UTF-8(在Linux中非常流行)和UTF-16(两种,小端和大端,在Windows上非常流行)。在UTF-8中,“ă”表示为两个字节,值为196和131(根据UTF-8的规则,这两个字节一起表示代码点259);在小端UTF-16中,“ă”由两个字节表示,值为3和1(通过小端UTF-16的规则,这两个字节一起表示代码点259)。
关键在于,为了理解文本文件,您需要知道(1)使用什么编码,以及(2)在Unicode的情况下,使用什么转换格式。现在,在Linux和Web上,我们非常接近所有文本都以UTF-8表示的共识;尽管如此,旧文件仍然存在,偶尔会有来自Windows的新文件,因此有一个非常好的程序叫iconv
(在Linux和on Windows都可用),它用于从一个编码转换文本文件到另一个。
例如,假设您的有问题的文件是在Windows-1252中编码的(Windows文档也称为ANSI,尽管美国国家标准协会与此无关),您可以说
d3.selectAll(".mouse-per-line")
可悲的是,没有办法使用 <head>
<script data-require="d3@3.5.3" data-semver="3.5.3" src="//cdnjs.cloudflare.com/ajax/libs/d3/3.5.3/d3.js"></script>
<style>
body {
font: 10px sans-serif;
}
.axis path,
.axis line {
fill: none;
stroke: #000;
shape-rendering: crispEdges;
}
.line {
fill: none;
stroke: steelblue;
stroke-width: 1.5px;
}
</style>
</head>
<body>
<script>
var margin = {
top: 20,
right: 80,
bottom: 30,
left: 50
},
width = 900 - margin.left - margin.right,
height = 500 - margin.top - margin.bottom;
// var parseDate = d3.time.format("%Y%m%d").parse;
// var x = d3.scale.linear()
// .range([0, width]);
var x = d3.scale.ordinal()
.domain(["20170101","20170108","20170115","20170122","20170128"])
.rangePoints([0, width],0.5);
var y = d3.scale.linear()
.range([height, 0]);
var color = d3.scale.category10();
var xAxis = d3.svg.axis()
.scale(x)
// .ticks(5)
.orient("bottom")
// .tickValues(["2017-01-01", "2017-01-08", "2017-01-15", "2017-01-22", "2017-01-28"]);
var yAxis = d3.svg.axis()
.scale(y)
.orient("left");
var line = d3.svg.line()
.interpolate("linear")
.x(function(d) {
return x(d.week);
})
.y(function(d) {
return y(d.avg_test_drives);
});
var svg = d3.select("body").append("svg")
.attr("width", width + margin.left + margin.right)
.attr("height", height + margin.top + margin.bottom)
.append("g")
.attr("transform", "translate(" + margin.left + "," + margin.top + ")");
// var data = d3.tsv.parse(myData);
// console.log(data[0]);
// alert (JSON.stringify(data));
var data = [{"week": "20170101", "dealer": "68", "peers": "73","all dealers":"123"},
{"week": "20170108", "dealer": "121","peers":"112","all dealers":"131"},
{"week": "20170115", "dealer": "104","peers":"101","all dealers":"106"},
{"week": "20170122", "dealer": "123","peers":"131","all dealers":"122"},
{"week": "20170128", "dealer": "106","peers":"107","all dealers":"122"}]
// alert (JSON.stringify(data));
color.domain(d3.keys(data[0]).filter(function(key) {
return key !== "week";
}));
data.forEach(function(d) {
d.week = d.week;
});
var testdrives = color.domain().map(function(name) {
return {
name: name,
values: data.map(function(d) {
return {
week: d.week,
avg_test_drives: +d[name]
};
})
};
});
// x.domain(d3.extent(data, function(d) {
// return d.week;
// }));
y.domain([
d3.min(testdrives, function(c) {
return d3.min(c.values, function(v) {
return v.avg_test_drives;
});
}),
d3.max(testdrives, function(c) {
return d3.max(c.values, function(v) {
return v.avg_test_drives;
});
})
]);
var legend = svg.selectAll('g')
.data(testdrives)
.enter()
.append('g')
.attr('class', 'legend');
legend.append('rect')
.attr('x', width - 20)
.attr('y', function(d, i) {
return i * 20;
})
.attr('width', 10)
.attr('height', 10)
.style('fill', function(d) {
return color(d.name);
});
legend.append('text')
.attr('x', width - 8)
.attr('y', function(d, i) {
return (i * 20) + 9;
})
.text(function(d) {
return d.name;
});
svg.append("g")
.attr("class", "x axis")
.attr("transform", "translate(0," + height + ")")
.call(xAxis)
// .ticks(5);
svg.append("g")
.attr("class", "y axis")
.call(yAxis)
.append("text")
.attr("transform", "rotate(-90)")
.attr("y", 6)
.attr("dy", ".71em")
.style("text-anchor", "end")
.text("Test Drives");
var entity = svg.selectAll(".entity")
.data(testdrives)
.enter().append("g")
.attr("class", "entity");
// alert (JSON.stringify(entity));
entity.append("path")
.attr("class", "line")
.attr("d", function(d) {
return line(d.values);
})
.style("stroke", function(d) {
return color(d.name);
});
entity.append("text")
.datum(function(d) {
return {
name: d.week,
value: d.values[d.values.length - 1]
};
})
.attr("transform", function(d) {
return "translate(" + x(d.value.week) + "," + y(d.value.avg_test_drives) + ")";
})
.attr("x", 3)
.attr("dy", ".35em")
.text(function(d) {
return d.week;
});
var mouseG = svg.append("g")
.attr("class", "mouse-over-effects");
mouseG.append("path") // this is the black vertical line to follow mouse
.attr("class", "mouse-line")
.style("stroke", "black")
.style("stroke-width", "1px")
.style("opacity", "0");
var lines = document.getElementsByClassName('line');
var mousePerLine = mouseG.selectAll('.mouse-per-line')
.data(testdrives)
.enter()
.append("g")
.attr("class", "mouse-per-line");
mousePerLine.append("circle")
.attr("r", 7)
.style("stroke", function(d) {
return color(d.week);
})
.style("fill", "none")
.style("stroke-width", "1px")
.style("opacity", "0");
mousePerLine.append("text")
.attr("transform", "translate(10,3)");
mouseG.append('svg:rect') // append a rect to catch mouse movements on canvas
.attr('width', width) // can't catch mouse events on a g element
.attr('height', height)
.attr('fill', 'none')
.attr('pointer-events', 'all')
.on('mouseout', function() { // on mouse out hide line, circles and text
d3.select(".mouse-line")
.style("opacity", "0");
d3.selectAll(".mouse-per-line circle")
.style("opacity", "0");
d3.selectAll(".mouse-per-line text")
.style("opacity", "0");
})
.on('mouseover', function() { // on mouse in show line, circles and text
d3.select(".mouse-line")
.style("opacity", "1");
d3.selectAll(".mouse-per-line circle")
.style("opacity", "1");
d3.selectAll(".mouse-per-line text")
.style("opacity", "1");
})
.on('mousemove', function() { // mouse moving over canvas
var mouse = d3.mouse(this);
d3.select(".mouse-line")
.attr("d", function() {
var d = "M" + mouse[0] + "," + height;
d += " " + mouse[0] + "," + 0;
return d;
});
d3.selectAll(".mouse-per-line")
.attr("transform", function(d, i) {
console.log(width/mouse[0])
/* var xDate = x.invert(mouse[0]),
bisect = d3.bisector(function(d) { return d.week; }).right;
idx = bisect(d.values, xDate);
*/
var beginning = 0,
end = lines[i].getTotalLength(),
target = null;
while (true){
target = Math.floor((beginning + end) / 2);
pos = lines[i].getPointAtLength(target);
if ((target === end || target === beginning) && pos.x !== mouse[0]) {
break;
}
if (pos.x > mouse[0]) end = target;
else if (pos.x < mouse[0]) beginning = target;
else break; //position found
}
d3.select(this).select('text')
.text(y.invert(pos.y).toFixed(2));
return "translate(" + mouse[0] + "," + pos.y +")";
});
});
</script>
</body>
;你必须写一个临时输出文件,然后在源文件的顶部重命名临时输出文件,当然在检查一切进展顺利之后。