#! /usr/bin/perl -w
use strict;
use encoding 'utf-8';
# 'workfile.txt' is supposed to be encoded in cp932
open my $in, "<:encoding(cp932)", "./workfile.txt";
while ( my $line = <$in> ) {
# my process comes here
print $line;
cp932 "\x81" does not map to Unicode at ./my_program.pl line 8, <$in> line 1234.
#! /usr/bin/perl -w
use strict;
use encoding 'utf-8';
use English;
# 'workfile.txt' is supposed to be encoded in cp932
open my $in, "<:encoding(cp932)", "./workfile.txt";
open my $output_good, ">:encoding(utf8)", "good.txt";
open my $output_bad, ">:encoding(utf8)", "bad.txt";
select $output_good; # in most cases workfile.txt lines are good
while ( my $line = <$in> ) {
if ( $line contains malformed characters ) {
select $output_bad;
print "$INPUT_LINE_NUMBER: $line";
select $output_good;
我的问题是我如何写这个&#34; if($ line包含maloomed字符)&#34;部分。如何检查输入是好还是坏。
答案 0 :(得分:3)
#! /usr/bin/perl -w
use strict;
use utf8; # Source encoded using UTF-8
use open ':std', ':encoding(UTF-8)'; # STD* is UTF-8;
# UTF-8 is default encoding for open.
use Encode qw( decode );
open my $fh_in, "<:raw", "workfile.txt"
or die $!;
open my $fh_good, ">", "good.txt"
or die $!;
open my $fh_bad, ">:raw", "bad.txt"
or die $!;
while ( my $line = <$fh_in> ) {
my $decoded_line =
eval { decode('cp932', $line, Encode::FB_CROAK|Encode::LEAVE_SRC) };
if (defined($decoded_line)) {
print($fh_good "$. $decoded_line");
} else {
print($fh_bad "$. $line");