我正在尝试编写一个Perl脚本来解析stcmd.exe(StarTeam命令行客户端)hist命令的输出。我正在获取视图中每个文件的历史记录,输出看起来像这样:
Folder: The View Name (working dir: C:\Projects\dir) History for: main.h Description: Some files Locked by: Status: Current ---------------------------- Revision: 1 View: The View Name Branch Revision: 1.0 Author: John Smith Date: 3/22/08 11:16:16 AM CST Main header ============================================================================= History for: main.c Description: Some files Locked by: Status: Current ---------------------------- Revision: 2 View: The View Name Branch Revision: 1.1 Author: Jane Doe Date: 3/22/08 1:55:55 PM CST Made an update. ---------------------------- Revision: 1 View: The View Name Branch Revision: 1.0 Author: John Smith Date: 3/22/08 11:16:16 AM CST Initial revision =============================================================================
请注意,修订摘要可以包含换行符,并且可以为空(在这种情况下,根本不存在任何行)。
我想获取文件名,并为每个修订版本获取作者姓名(第一个和最后一个),日期和更改摘要。我想将这些信息放在一个数据结构中,我可以按日期对修订进行排序,并在日期,作者和摘要匹配时组合修订。 (我想如果有人能帮助我进行解析,我可以解决这个问题。)我对正则表达式或Perl不太满意,但这就是我现在正在努力解决的问题:
# $hist contains the stcmd output in the format above
while($hist =~ /History for: (?<filename>.)/s)
{
# Record filename somewhere with $+{filename}
while($hist =~ /^Revision: (?<file_rev>\S+) View: (?<view_name>.+) Branch Revision: (?<branch_rev>\S+).\nAuthor: (?<author>.*) Date: (?<date>.*) \w+\r\n(?<summary>.*)/)
{
# Extract things with $+{author}, $+{date}, $+{summary}
}
}
然而,这不起作用。据我所知,我可能完全错了。有人能指出我正确的方向吗?
答案 0 :(得分:4)
关键是一次解析一个块并立即匹配所有相关的东西。请参阅qr
in perldoc perlop和$/ in perldoc perlvar。
请记住,您还希望将信息放在允许查询和操作信息的数据结构中,这是最后一次修订。下面的代码使用SQLite的功能来创建内存数据库。您可能实际上想要将功能拆分为两个脚本:一个用于解析和存储数据,另一个用于执行您需要的任何操作。实际上,可以在SQL中进行所有必要的操作。
#!/usr/bin/perl
use v5.010;
use strict; use warnings;
use DBI;
my $dbh = get_dbh();
my $header_pattern = qr{
History[ ]for: [ ](?<filename>[^\n]+) \n
Description: [ ](?<description>[^\n]+) \n
Locked[ ]by: [ ]?(?<lockedby>[^\n]*) \n
Status: [ ](?<status>.[^\n]+) \n
}x;
my $revision_pattern = qr{-+\n
Revision: [ ](?<revision>\d+) [ ]
View: [ ](?<view>.+) [ ]
Branch[ ]Revision: [ ](?<branch_revision>[^\n]+) \n
Author: [ ](?<author>.+) [ ]
Date: [ ](?<revdate>[^\n]+) \n
(?<summary>.*) \n
}x;
local $/ = '=' x 77 . "\n";
while ( my $entry = <>) {
if ( $entry =~ $header_pattern ) {
my %file = %+;
$dbh->do(sprintf(
q{INSERT INTO files (%s) VALUES (%s)},
join(',', keys %file),
join(',', ('?') x keys %file),
), {}, values %file );
while ( $entry =~ /$revision_pattern/g ) {
my %rev = %+;
$dbh->do(sprintf(
q{INSERT INTO revisions (%s) VALUES (%s)},
join(',', filename => keys %rev),
join(',', ('?') x (1 + keys %rev)),
), {}, $file{filename}, values %rev );
}
}
}
my $revs = $dbh->selectall_arrayref(
q{SELECT * FROM revisions JOIN files
ON files.filename = revisions.filename},
{ Slice => {} }
);
use Data::Dumper;
print Dumper $revs;
sub get_dbh {
my $dbh = DBI->connect(
'dbi:SQLite:dbname=:memory:', undef, undef,
{ RaiseError => 1, AutoCommit => 1 }
);
$dbh->do(q{PRAGMA foreign_keys = ON});
$dbh->do(q{CREATE TABLE files (
filename VARCHAR PRIMARY KEY,
description VARCHAR,
lockedby VARCHAR,
status VARCHAR
)});
$dbh->do(q{CREATE TABLE revisions (
filename VARCHAR,
revision VARCHAR,
view VARCHAR,
branch_revision VARCHAR,
author VARCHAR,
revdate VARCHAR,
summary VARCHAR,
CONSTRAINT pk_revisions PRIMARY KEY (filename, revision),
CONSTRAINT fk_revisions_files FOREIGN KEY (filename)
REFERENCES files(filename)
)});
return $dbh;
}
输出:
C:\Temp> y.pl test.txt $VAR1 = [ { 'status' => 'Current', 'revdate' => '3/22/08 11:16:16 AM CST', 'author' => 'John Smith', 'description' => 'Some files', 'revision' => '1', 'filename' => 'main.h', 'summary' => 'Main header', 'view' => 'The View Name', 'branch_revision' => '1.0', 'lockedby' => '' }, { 'status' => 'Current', 'revdate' => '3/22/08 1:55:55 PM CST', 'author' => 'Jane Doe', 'description' => 'Some files', 'revision' => '2', 'filename' => 'main.c', 'summary' => 'Made an update.', 'view' => 'The View Name', 'branch_revision' => '1.1', 'lockedby' => '' }, { 'status' => 'Current', 'revdate' => '3/22/08 11:16:16 AM CST', 'author' => 'John Smith', 'description' => 'Some files', 'revision' => '1', 'filename' => 'main.c', 'summary' => 'Initial revision', 'view' => 'The View Name', 'branch_revision' => '1.0', 'lockedby' => '' } ];
答案 1 :(得分:1)
这是一种开始的方式。我更喜欢将您的字符串拆分为行(\n
)并循环遍历:
use strict;
use warnings;
my $hist = <<'EOF';
Folder: The View Name (working dir: C:\Projects\dir)
History for: main.h
Description: Some files
Locked by:
Status: Current
----------------------------
Revision: 1 View: The View Name Branch Revision: 1.0
Author: John Smith Date: 3/22/08 11:16:16 AM CST
Main header
=============================================================================
History for: main.c
Description: Some files
Locked by:
Status: Current
----------------------------
Revision: 2 View: The View Name Branch Revision: 1.1
Author: Jane Doe Date: 3/22/08 1:55:55 PM CST
Made an update.
----------------------------
Revision: 1 View: The View Name Branch Revision: 1.0
Author: John Smith Date: 3/22/08 11:16:16 AM CST
Initial revision
=============================================================================
EOF
my %data;
my $filename;
for (split /\n/, $hist) {
if (/History for: (.*)/) {
$filename = $1;
}
if (/^Revision: (.+?) View: (.+?) Branch Revision: (.*)/) {
$data{$filename}{rev} = $1;
$data{$filename}{view} = $2;
$data{$filename}{branch} = $3;
}
}
use Data::Dumper; print Dumper(\%data);
__END__
$VAR1 = {
'main.h' => {
'view' => 'The View Name',
'rev' => '1',
'branch' => '1.0'
},
'main.c' => {
'view' => 'The View Name',
'rev' => '1',
'branch' => '1.0'
}
};
答案 2 :(得分:1)
你已经有了一些好的答案。这是分开工作的另一种方式:
use strict;
use warnings;
use Data::Dumper qw(Dumper);
# Read file a section at a time.
$/ = '=' x 77 . "\n";
my @data;
while (my $section = <>){
# Split each section into sub-sections, the
# first containing the file info and the rest
# containing info about each revision.
my @revs = split /-{20,}\n/, $section;
# Do whatever you want with @file_info and, below, @ref_info.
# The example here splits them apart into lines.
# Alternatively, you could run the sub-sections through
# regex parsing, as in Sinan's answer.
my @file_info = parse_lines(shift @revs);
push @data, { file_info => \@file_info };
for my $r (@revs){
my @rev_info = parse_lines($r);
push @{$data[-1]{revs}}, \@rev_info;
}
}
sub parse_lines {
# Parse each sub-section into lines.
my @lines = split /\n/, shift;
# Optionally, filtering out unwanted material.
@lines = grep { /\S/ and $_ !~ /={70,}/ } @lines;
# And perhaps splitting lines into their key-value components.
@lines = map { [split /:\s*/, $_, 2] } @lines;
return @lines;
}
print Dumper(\@data);
答案 3 :(得分:0)
您需要一个基于状态的解析器。使用__DATA__
部分与之前一样:
use v5.010;
use constant
{ READING_FOR_FILENAME => 0
, READING_FOR_AUTHOR => 1
, READING_FOR_DIVIDER => 2
};
use strict;
use warnings;
use English qw<%LAST_PAREN_MATCH>;
use Data::Dumper;
my $state = READING_FOR_FILENAME;
my %history_for;
my $file_name;
while ( <DATA> ) {
my $line = $_;
given ( $state ) {
when ( READING_FOR_FILENAME ) {
if ( $line =~ m/^History for: (?<file_name>\S+)/ ) {
$file_name = $LAST_PAREN_MATCH{file_name};
$state = READING_FOR_DIVIDER;
}
}
when ( READING_FOR_DIVIDER ) {
if ( $line =~ m/^-+\s*$/ ) {
$state = READING_FOR_AUTHOR;
}
elsif ( $line =~ m/^=+\s*$/ ) {
$state = READING_FOR_FILENAME;
}
}
when ( READING_FOR_AUTHOR ) {
if ( $line =~ m/^Author: (?<author>[^:]+?) Date: (?<time>.*)/ ) {
push @{ $history_for{$file_name} }
, { name => $LAST_PAREN_MATCH{author}
, time => $LAST_PAREN_MATCH{time}
};
$state = READING_FOR_DIVIDER;
}
}
}
}
print Dumper( \%history_for );