我需要在行之间提取文本行并将其填充到excel文件中。线路数量之间存在差异,但它们始于 评论记录“idno”...其他文本的字符串
__DATA__ (This is what my .txt file looks like)
Comment for the record "id1"
Attempt1 made on [time] outcome [outcome]
note 1
Comment for the record "id2"
Attempt1 made on [time] outcome [outcome]
note 1
Attempt2 made on [time] outcome [outcome]
note 2
Comment for the record "id3"
Attempt1 made on [time] outcome [outcome]
note 1
Attempt2 made on [time] outcome [outcome]
note 2
Attempt3 made on [time] outcome [outcome]
note 3
Attempt4 made on [time] outcome [outcome]
note 4
id1 Attempt1 Note1 [outcome]
id2 Attempt1 Note1 [outcome]
id2 Attempt2 Note2 [outcome]
id3 Attempt1 Note1 [outcome]
id3 Attempt2 Note2 [outcome]
id3 Attempt3 Note3 [outcome]
id3 Attempt4 Note4 [outcome]
使用GNU awk(用于正则表达式捕获组)
gawk '
/^$/ {next}
match($0, /Comment for the record "([^"]*)/, a) {id = a[1]; next}
match($0, /(.+) made on .* outcome (.+)/, a) {att = a[1]; out = a[2]; next}
{printf("%s\t%s\t%s\t%s\n", id, att, $0, out)}
perl -lne '
next if /^$/;
if (/Comment for the record "([^"]*)/) {$id = $1; next;}
if (/(.+) made on .* outcome (.+)/) {$att = $1; $out = $2; next;}
print join("\t", $id, $att, $_, $out);
use strict;
use warnings;
# Paragraph mode: read the input file a paragraph/block at a time.
local $/ = "";
while (my $block = <>){
# Convert the block to lines.
my @lines = grep /\S/, split("\n", $block);
# Parse the text, capturing needing items from @lines as we consume it.
# Note also the technique of assigning regex captures directly to variables.
my ($id) = shift(@lines) =~ /"(.+)"/;
while (@lines){
my ($attempt, $outcome) = shift(@lines) =~ /(Attempt\d+).+outcome (\d+)/;
my $note = shift @lines;
print join("\t", $id, $attempt, $note, $outcome), "\n";
use strict;
local $/;
block(/(id\d+)/,$_) for split /\n\n/, <DATA>;
sub block {
my ($id,$block) = @_;
$block =~ s/.*?(?=Attempt)//s;
print join(',', $id, /(Attempt\d+)/, /([^\n]+)$/, /outcome (\d+)/)."\n"
for split /(?=Attempt)/, $block
将包含尝试的数组,每个尝试将具有 Id ,时间,结果和注意。
#! /usr/bin/perl
# test.pl
use strict;
use warnings;
use feature qw(say);
my @dataList;
my $record;
while (my $line = <DATA>) {
chomp $line;
if ($line =~ /^Comment for the record "(.*)"/) {
my $id = $1;
$record = Id->new($id);
push @dataList, $record;
elsif ($line =~ /^(\S+)\s+made on\s(\S+)\soutcome\s(.*)/) {
my $attemptId = $1;
my $time = $2;
my $outcome = $3;
# Next line is the note
chomp (my $note = <DATA>);
my $attempt = Attempt->new($attemptId, $time, $outcome, $note);
foreach my $id (@dataList) {
foreach my $attempt ($id->Attempt) {
print $id->Id . "\t";
print $attempt->Id . "\t";
print $attempt->Note . "\t";
print $attempt->Outcome . "\n";
package Id;
use Carp;
sub new {
my $class = shift;
my $id = shift;
my $self = {};
bless $self, $class;
return $self;
sub Id {
my $self = shift;
my $id = shift;
if (defined $id) {
$self->{ID} = $id;
return $self->{ID};
sub PushAttempt {
my $self = shift;
my $attempt = shift;
if (not defined $attempt) {
croak qq(Missing Attempt in call to Id->PushAttempt);
if (not exists ${$self}{ATTEMPT}) {
$self->{ATTEMPT} = [];
push @{$self->{ATTEMPT}}, $attempt;
return $attempt;
sub PopAttempt {
my $self = shift;
return pop @{$self->{ATTEMPT}};
sub Attempt {
my $self = shift;
return @{$self->{ATTEMPT}};
# PACKAGE Attempt
package Attempt;
sub new {
my $class = shift;
my $id = shift;
my $time = shift;
my $note = shift;
my $outcome = shift;
my $self = {};
bless $self, $class;
return $self;
sub Id {
my $self = shift;
my $id = shift;
if (defined $id) {
$self->{ID} = $id;
return $self->{ID};
sub Time {
my $self = shift;
my $time = shift;
if (defined $time) {
$self->{TIME} = $time;
return $self->{TIME};
sub Note {
my $self = shift;
my $note = shift;
if (defined $note) {
$self->{NOTE} = $note;
return $self->{NOTE};
sub Outcome {
my $self = shift;
my $outcome = shift;
if (defined $outcome) {
$self->{OUTCOME} = $outcome;
return $self->{OUTCOME};
package main;
Comment for the record "id1"
Attempt1 made on [time] outcome [outcome11]
note 11
Comment for the record "id2"
Attempt21 made on [time] outcome [outcome21]
note 21
Attempt22 made on [time] outcome [outcome22]
note 22
Comment for the record "id3"
Attempt31 made on [time] outcome [outcome31]
note 31
Attempt32 made on [time] outcome [outcome32]
note 32
Attempt33 made on [time] outcome [outcome33]
note 33
Attempt34 made on [time] outcome [outcome34]
note 34
sed -r -n 's/Comment for the record "([^"]+)"$/\1/;tgo;bnormal;:go {h;n;};:normal /^Attempt[0-9]/{s/(.+) made on .* outcome (.+)$/\1 \2/;G;s/\n/ /;s/(.+) (.+) (.+)/\3\t\1\t\2/;N;s/\t([^\t]+)\n(.+)/\t\2\t\1/;p;d;}' data.txt
注意:仅限GNU sed。如果需要,便携性很容易实现。
基于你的例子awk oneliner。
kent$ awk 'NF==5{gsub(/\"/,"",$5);id=$5;next;} /^Attempt/{n=$1;gsub(/Attempt/,"Note",n);print id,$1,n,$6}' input
