您好我希望在html文件中打印结果。我尝试使用(cp1252)和(UTF-8)进行编码,同时在HTML中进行打印但是我无法看到那些日文字符而是获得了一些垃圾值。例如,“【外资系転职の”打印为“ÂyŠOÂŽÂÂÂÂŒn”]。提前致谢
use strict;
use warnings;
use HTML::TreeBuilder::XPath;
use LWP::UserAgent;
use HTTP::Request;
use HTML::Entities;
use HTML::Strip;
use Encode qw( decode_utf8 encode_utf8 );
open( OUT, '>:utf8', "C:/Users/jeyakuma/Desktop/test1.html" );
my $URL = 'http://job.japantimes.com/';
my $agent = LWP::UserAgent->new( agent => "Mozilla/5.0" );
my $request = HTTP::Request->new( GET => $URL );
my $response = $agent->request($request);
# Check the outcome of the response
if ( $response->is_success ) {
my $xp = HTML::TreeBuilder::XPath->new_from_content( $response->decoded_content );
my $raw_html = $xp->findnodes_as_string( '//td[@class="text12"]');
my $hs = HTML::Strip->new();
my $clean_text = $hs->parse($raw_html);
$clean_text = decode_utf8($hs->parse(encode_utf8($raw_html)));
$hs->eof;
print OUT $clean_text;
}
elsif ( $response->is_error ) {
print "Error:$URL\n";
print $response->error_as_HTML;
}