0, 0, 0, 0, 0, 0, 0, 0, 3.043678e-05, 3.661498e-05, 2.070347e-05,
    2.47175e-05, 1.49877e-05, 3.031176e-05, 2.12128e-05, 2.817522e-05,
    1.802658e-05, 7.192285e-06, 8.467806e-06, 2.047874e-05, 9.621194e-05,
    4.467542e-05, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.000421869,
    5.0003081213, 0.0001938675, 8.70334e-05, 0.0002973858, 0.0003385935,
    8.763598e-05, 2.743326e-05, 0, 0.0001043894, 3.409237e-05, 0, 0, 0, 0, 0,
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0;





以上提取的行中为5.0003081213, 0.000421869, 0.0003385935 and 0.0002973858











0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1.193129938e-07, 0, 0, 0, 0, 0, 0,
    0, 2.505016514e-05, 4.835713883e-05, 6.128770648e-05, 1.38018881e-05, 2.303402101e-05,
    0, 0, 0, 0, 3.5838803e-05, 0.000104883779, 0, 0, 1.813278467e-05, 0.0001350646297,
    0.0007846746908, 0.001728603877, 0.001082733652, 0.001511217708, 0.0009537032505,
    0.0004436753321, 0.002182536356, 0.0005719495782, 9.055173127e-05, 1.245663419e-05,
    0.0004568318755, 0.0003056741688, 3.186642459e-05, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
    0.000101613512, 5.451410965e-05, 0, 0, 0, 0, 0.001172270099, 7.088900819e-05, 0,
    1.848198352e-06, 0.0006870109246, 0.00276857581, 0.002038545509, 0.001111047938,
    0.0007607533934, 0.0007915864957, 0.001105735631, 0.001456989534, 0.0007245351113,
    0.0004262289031, 0.0003041285247, 0.0001528418892, 2.332078749e-05, 9.695149464e-05,
    1.004024021e-07, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,


0, 0, 0, 0, 3.5838803e-05, 0.000104883779, 0, 0, 1.813278467e-05, 0.0001350646297,
    0.0007846746908, 0.001728603877, 0.001082733652, 0.001511217708, 0.0009537032505,
    0.0004436753321, 0.002182536356, 0.0005719495782, 9.055173127e-05, 1.245663419e-05,
    0.0004568318755, 0.0003056741688, 3.186642459e-05, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,

然后,我该如何修改脚本 grep -o '[0-9e.-]*' file | sort -rg | head -1达到这个目的?


grep -o '[0-9e.-]*' file | sort -rg | head -N
  • 命令grep -o '[0-9e.-]\+'(使用问题中提供的正则表达式)提取文件中的所有数字。
  • 然后,sort -g对指数值进行排序;通过使用-r我们反转结果,以便顶部的值显示在顶部。
  • 最后,head获得前N个值。


$ grep -o '[0-9e.-]*' file | sort -rg | head -1


$ grep -o '[0-9e.-]*' file | sort -rg | head -4

awk可以使用数字 - 即使是科学记数法。您可以使用以下脚本获取最大值:

awk '{m=(m>$0)?m:$0}END{print m}' RS="[,\n;]" input.file

use strict;
use warnings;
use feature 'say';
use List::Util 'max';

# I'm assuming you already have that data in one line in a variable
my $data = qq{0, 0, 0, 0, 0, 0, 0, 0, 3.043678e-05, 3.661498e-05, 2.070347e-05, 2.47175e-05, 1.49877e-05, 3.031176e-05, 2.12128e-05, 2.817522e-05, 1.802658e-05, 7.192285e-06, 8.467806e-06, 2.047874e-05, 9.621194e-05,4.467542e-05, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.000421869,    5.0003081213, 0.0001938675, 8.70334e-05, 0.0002973858, 0.0003385935,8.763598e-05, 2.743326e-05, 0, 0.0001043894, 3.409237e-05, 0, 0, 0, 0, 0,0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0;};

# remove the semicolon
chop $data;

# split to a list on comma and possible whitespace
my @numbers = split /,\s*/, $data;

# this is from List::Util
say 'Max: ' . max(@numbers);

# sort numerical and grab the highest 4
say $_ for ( reverse sort { $a <=> $b } @numbers )[ 0 .. 3 ];

use strict;
use warnings;

while (my $line = <>) {
    while($line =~ m|([-+]?[0-9]*\.?[0-9]+([eE][-+]?[0-9]+)?)|g) {
        push @numbers, $1;
@numbers = sort { $b <=> $a } @numbers;

print "largest value:\n  $numbers[0]\n";
print "next four numbers: \n  " . join("\n  ",@numbers[1..4]) . "\n";


像这样使用它:perl findNumbers.pl input.txt其中findNumbers.pl是上面的脚本。

perl -nle 'foreach (split(",|;")) { $_ += 0; @top_n = sort {$b <=> $a} ($_, @top_n); pop @top_n if @top_n > 4; } END { print foreach @top_n; }' input.txt



#!/usr/bin/perl -nl

# 0) The -n from above would make the script read the input line by line
# and the -l parameter would automatically strip off any newline chars
# from input and add a newline to every output line

# 1.1) So, the -n parameter made perl read a line from STDIN and place it
# into $_ variable for you. The following code (excluding the END{} block)
# is executed for every input line.
# 1.2) split() takes this $_ string and breaks it into a series of numbers
# (technically still sub-strings), returning the series as an array
# 1.3) Then foreach loops through this array placing each array's item into
# $_ again. (NB. Yes, we're losing the previous $_'s value which was an input
# string but we don't care about it any longer since we've already processed
# it with split().)
foreach (split(",|;")) {

    # 2) Ensure its stored internally as a numeral by adding zero to it.
    # This would save us a bit of conversion when sorting values and also
    # make final output nicer. Still, you'll get what you want if you
    # comment the following line out.
    $_ += 0;

    # 3.1) Compose a new array by adding the current value ($_) to what
    # we already have (@top_n). The new array is "($_, @top_n)". It's OK
    # if @top_n has nothing in it or even undefined so far, perl will
    # define and initialise it with an empty array when it encounters
    # the @top_n variable first time. (Note: we should better use -w
    # perl command line parameter and define @top_n explicitly beforehand
    # but I'm omitting it here for the sake of simplicity.)
    # 3.2) Then sort the new array. The "$b <=> $a" expression will make
    # it sorted in descending order.
    @top_n = sort {$b <=> $a} ($_, @top_n);

    # 3.3) Finally, throw away the last item (pop does this) if our top-N
    # array has grown beyond the lenth or interest (4 in this example).
    # This helps keeps our sript's memory consumption reasonaably low.
    # Without doing this we'd ended up with several hundreds of megabytes
    # in memory which would require sorting.
    pop @top_n if @top_n > 4;

# 4) This block is only executed once, after all the input file is read and
# processed.
    # 4.1) Here our old good foreach reads the @top_n array storing
    # current value in $_ for each iteration.
    # 4.2) Being called without parameters, print() outputs the value
    # of $_ variable. Remember, it also adds a newline to the output
    # - we told it doing so by adding -l in the very first line of the
    # script.
    print foreach @top_n;

用法:perl top_n.pl input.txt,前提是top_n.pl是脚本名称。

$/=undef;print "largest: " .(sort {$b <=> $a} split /,/ , scalar <> =~ tr/\n ;//rd)[0] . "\n";


$/=undef;print join ("," , (sort {$b <=> $a} split /,/ , scalar <> =~ tr/\n ;//rd)[0..3]) . "\n";

将其中一行保存到文件中,比如sort.pl,然后执行 cat /path/to/input.txt | perl /path/to/sort.pl


use strict;
use warnings;
use List::Util qw ( max );

$/ = ';';

while (<>) {
    my @lines = split("\n");
    my $block_max = max( split(",") );
    last unless defined $block_max;
    print $block_max, "\n";

    my @top;
    foreach my $line (@lines) {
        $line =~ s/\s+//g;
        my @numbers = split( ",", $line );
        my $max_num = max(@numbers);
        if ( defined $max_num ) { push( @top, $max_num ) }

    print "Top 5:\n";
    print join( "\n", ( sort { $b <=> $a } (@top) )[ 0 .. 4 ] );


  • 根据;迭代您的文件。
  • \n上拆分以获得一些界限。
  • 拆分,以获取单个值。
  • 在块上使用max - 打印出来。
  • 在每一行上使用max,将其填充到数组@top中。
  • @top打印已排序的前5个元素。


要扩展 - 根据您的原始文件,您可以在其中包含一个正则表达式来提取数字。


my @numbers = m/[\d+.-]+/g;

因为perl处理正则表达式的方式,它'匹配'适合这种特定'格式'的所有块。 (当然如果有人在文件中包含ee-44,那也会匹配。

我建议 - 不要去寻找一个衬垫。这是一个虚假的经济。最好有一个脚本,你可以写出,评论和实际理解以后,而不是一个紧凑的文本块,没有人可以告诉你在12个月内发生了什么。