我修改了一个代码来获取PPT数据,但是它在新行中给出了每个单词或几个单词。我想要幻灯片幻灯片数据,即每张幻灯片后,它应该是两个断线。如何修改代码请提出任何更改:
实施例
幻灯片-1
数据
新线
新线
滑2
数据
#!/usr/bin/perl
use strict;
use warnings;
use Archive::Zip qw( :ERROR_CODES );
use XML::Twig;
my @text;
my $file = "BI.pptx";
my $zip = Archive::Zip->new();
$zip->read( $file ) == AZ_OK or die "Unable to open Office file\n";
my @slides = $zip->membersMatching( "ppt/slides/slide.+\.xml" );
for my $i ( 1 .. scalar @slides ) # to sort them.
{
#print "Slide: $i\n";
my $content="Slide: $i";
$content .= $zip->contents( "ppt/slides/slide${i}.xml");
my $twig= XML::Twig->new( #keep_encoding=>1,
twig_handlers => { 'a:t' => \&topicref_processing,
},
);
$twig->parse( $content );
}
sub topicref_processing
{
my($twig, $ppttext) = @_;
push @text, $ppttext->text();
}
use FindBin qw($Bin);
open my $out, ">:encoding(UTF-8)", "$Bin/test_ppt-1.txt" or die "$!";
print {$out} "$_\n" for @text;