我正在尝试使用cp1252对HTML页面进行编码,因为它有许多特殊字符,例如€
和£
磅,但是当我使用HTML::Strip
后保存这些内容时。内容显示为垃圾值。我尝试使用cp1252编码,但它无法正常工作。
请帮我解决问题。
use strict;
use warnings;
use HTML::TreeBuilder::XPath;
use LWP::UserAgent;
use HTTP::Request;
use HTML::Entities;
use HTML::Strip;
open( OUT, '>:encoding(cp1252)', "/home/local/ANT/jeyakuma/Desktop/test.html" );
my $URL = 'http://www.footlocker.eu/it/it/k/Customer-Service/Shipping.aspx';
my $agent = LWP::UserAgent->new( agent => "Mozilla/5.0" );
my $request = HTTP::Request->new( GET => $URL );
my $response = $agent->request($request);
# Check the outcome of the response
if ( $response->is_success ) {
my $xp = HTML::TreeBuilder::XPath->new_from_content( $response->decoded_content );
my $raw_html = $xp->findnodes_as_string(
'//div[@class="faq_text"]/p/strong/u[contains(.,\'spedizione Standard \')]'
);
my $hs = HTML::Strip->new();
my $clean_text = $hs->parse($raw_html);
$hs->eof;
print OUT $clean_text;
}
elsif ( $response->is_error ) {
print "Error:$URL\n";
print $response->error_as_HTML;
}
预期输出
£ 60/
当前输出
£ 60/â‚