我是perl中win32:ole模块的新手。我试图在命令提示符下逐行打印MS word表数据。但我只能打印表格的最后一行。你能帮我解决这个问题吗?提前谢谢。
以下是我的代码:
#!/usr/bin/perl
use strict;
use warnings;
use File::Spec::Functions qw( catfile );
use Win32::OLE qw(in);
use Win32::OLE::Const 'Microsoft Word';
$Win32::OLE::Warn = 3;
my $word = get_word();
$word->{DisplayAlerts} = wdAlertsNone;
$word->{Visible} = 1;
my $doc = $word->{Documents}->Open('C:\\PerlScripts\\myTest.docx');
my $tables = $word->ActiveDocument->{'Tables'};
for my $table (in $tables)
{
my $tableText = $table->ConvertToText({ Separator => wdSeparateByTabs });
print "Table: ". $tableText->Text(). "\n";
}
$doc->Close(0);
sub get_word
{
my $word;
eval { $word = Win32::OLE->GetActiveObject('Word.Application');};
die "$@\n" if $@;
unless(defined $word)
{
$word = Win32::OLE->new('Word.Application', sub { $_[0]->Quit })
or die "Oops, cannot start Word: ", Win32::OLE->LastError, "\n";
}
return $word;
}
答案 0 :(得分:2)
无论如何不是一个完美的解决方案,但这是问题的进步。
我使用了字符串分隔符“\ n \ n”,它产生以下输出......
需要进一步的黑客攻击:(
C:\StackOverflow>perl word.pl meTest.docx
Table: Header1
Header2
Header3
Header4
Row1-Cell1
Row1-Cell2
Row1-Cell3
Row1-Cell4
Row2-Cell1
Row2-Cell2
Row2-Cell3
Row2-Cell4
Row2-Cell5
这是代码。我已经在表循环中注释掉了一些其他代码,我曾经用它来破解$ tableRange-> {Text}返回的数据。取消注释以进一步试验。
#!/usr/bin/perl
use strict;
use warnings;
use File::Spec::Functions qw( catfile );
use Win32::OLE qw(in);
use Win32::OLE::Const 'Microsoft Word';
$Win32::OLE::Warn = 3;
my $word = get_word();
$word->{DisplayAlerts} = wdAlertsNone;
$word->{Visible} = 1;
my $doc = $word->{Documents}->Open('meTest.docx');
my $tables = $word->ActiveDocument->{'Tables'};
for my $table (in $tables)
{
my $tableRange = $table->ConvertToText({ Separator => "\n\n" });
print "Table: \n" . $tableRange->{Text}. "\n";
# foreach $word (split/\n/, $tableRange->{Text}) {
# print $word . "\n" ;
# # $userinput = <STDIN>;
# }
}
$doc->Close(0);
sub get_word
{
my $word;
eval { $word = Win32::OLE->GetActiveObject('Word.Application');};
die "$@\n" if $@;
unless(defined $word)
{
$word = Win32::OLE->new('Word.Application', sub { $_[0]->Quit })
or die "Oops, cannot start Word: ", Win32::OLE->LastError, "\n";
}
return $word;
}
抱歉,我无法提供更多帮助。
答案 1 :(得分:2)
将所有doc表提取到单个xls文件中
sub doParseDoc {
my $msg = '' ;
my $ret = 1 ; # assume failure at the beginning ...
$msg = 'START --- doParseDoc' ;
$objLogger->LogDebugMsg( $msg );
$msg = 'using the following DocFile: "' . $DocFile . '"' ;
$objLogger->LogInfoMsg( $msg );
#-----------------------------------------------------------------------
#Using OLE + OLE constants for Variants and OLE enumeration for Enumerations
# Create a new Excel workbook
my $objWorkBook = Spreadsheet::WriteExcel->new("$DocFile" . '.xls');
# Add a worksheet
my $objWorkSheet = $objWorkBook->add_worksheet();
my $var1 = Win32::OLE::Variant->new(VT_BOOL, 'true');
Win32::OLE->Option(Warn => \&Carp::croak);
use constant true => 0;
# at this point you should have the Word application opened in UI with t
# the DocFile
# build the MS Word object during run-time
my $objMSWord = Win32::OLE->GetActiveObject('Word.Application')
or Win32::OLE->new('Word.Application', 'Quit');
# build the doc object during run-time
my $objDoc = $objMSWord->Documents->Open($DocFile)
or die "Could not open ", $DocFile, " Error:", Win32::OLE->LastError();
#Set the screen to Visible, so that you can see what is going on
$objMSWord->{'Visible'} = 1;
# try NOT printing directly to the file
#$objMSWord->ActiveDocument->SaveAs({Filename => 'AlteredTest.docx',
#FileFormat => wdFormatDocument});
my $tables = $objMSWord->ActiveDocument->Tables();
my $tableText = '' ;
my $xlsRow = 1 ;
for my $table (in $tables){
# extract the table text as a single string
#$tableText = $table->ConvertToText({ Separator => 'wdSeparateByTabs' });
# cheated those properties from here:
# https://msdn.microsoft.com/en-us/library/aa537149(v=office.11).aspx#officewordautomatingtablesdata_populateatablewithdata
my $RowsCount = $table->{'Rows'}->{'Count'} ;
my $ColsCount = $table->{'Columns'}->{'Count'} ;
# disgard the tables having different than 5 columns count
next unless ( $ColsCount == 5 ) ;
$msg = "Rows Count: $RowsCount " ;
$msg .= "Cols Count: $ColsCount " ;
$objLogger->LogDebugMsg ( $msg ) ;
#my $tableRange = $table->ConvertToText({ Separator => '##' });
# OBS !!! simple print WILL print to your doc file use Select ?!
#$objLogger->LogDebugMsg ( $tableRange . "\n" );
# skip the header row
foreach my $row ( 0..$RowsCount ) {
foreach my $col (0..$ColsCount) {
# nope ... $table->cell($row,$col)->->{'WrapText'} = 1 ;
# nope $table->cell($row,$col)->{'WordWrap'} = 1 ;
# so so $table->cell($row,$col)->WordWrap() ;
my $txt = '';
# well some 1% of the values are so nasty that we really give up on them ...
eval {
$txt = $table->cell($row,$col)->range->{'Text'};
#replace all the ctrl chars by space
$txt =~ s/\r/ /g ;
$txt =~ s/[^\040-\176]/ /g ;
# perform some cleansing - ColName<primary key>=> ColName
#$txt =~ s#^(.[a-zA-Z_0-9]*)(\<.*)#$1#g ;
# this will most probably brake your cmd ...
# $objLogger->LogDebugMsg ( "row: $row , col: $col with txt: $txt \n" ) ;
} or $txt = 'N/A' ;
# Write a formatted and unformatted string, row and column notation.
$objWorkSheet->write($xlsRow, $col, $txt);
} #eof foreach col
# we just want to dump all the tables into the one sheet
$xlsRow++ ;
} #eof foreach row
sleep 1 ;
} #eof foreach table
# close the opened in the UI document
$objMSWord->ActiveDocument->Close;
# OBS !!! now we are able to print
$objLogger->LogDebugMsg ( $tableText . "\n" );
# exit the whole Word application
$objMSWord->Quit;
return ( $ret , $msg ) ;
}
#eof sub doParseDoc
答案 2 :(得分:0)
使用以下代码行
my $doc = $word->Documents->Open('C:\\PerlScripts\\myTest.docx');
my $tables = $word->{'Tables'};
而不是代码
my $doc = $word->{Documents}->Open('C:\\PerlScripts\\myTest.docx');
my $tables = $word->ActiveDocument->{'Tables'};
你的问题得到解决。