awk做while循环

时间:2015-01-05 16:57:27

标签: awk

我正在研究这个awk脚本,认为文件打印出学生标记,如果它们在不同科目中通过或失败。

该文件看起来像http://pastebin.com/MMVC1qgD

输出应该像这个http://pastebin.com/3eD6r93U

BEGIN {
mathscounts=0
physicscount=0
chemistrycount=0
mathstotal=0
physicstotal=0
chemistrytotal=0
printf("Results for Form 6B\n\n")
}

$2=="Maths"{
printf("List of Maths Students\n")
printf("Name           Mark   Pass/Fail\n")
printf("----           ----   ---------\n")
next
do{
mark=(($3+$4)/2)
if (mark>49)
results="Pass"
else
    results="Fail"
printf("%s %s       %d      %s\n",$2, $1,mark,results)
    mathscount++
    mathstotal=(mathstotal+mark)    
}while($1!="SUBJECT")
}

END {
printf("\n\n")
printf("Subject      Mean   Standard Deviation\n")
printf("-------      ----   ------------------\n")


} 

我正在尝试使用do while语句,但它只是没有打印出来。

甚至可以在另一种情况下这样做吗?

需要下一个跳过当前行,没有我需要的信息

2 个答案:

答案 0 :(得分:1)

我有点挑战,所以我写了这个解决方案。

对于它的价值,Awk在这种分析中有点紧张,如果学生有0分,那么我与数组一起使用的方法将会失败。我实际上认为你最好将标记处理成一个简单的CSV,然后在电子表格中进行分析。

BEGIN {
    printf "Results for Form 6B\n\n"
}

$1 == "FORM" {next}

$1 == "SUBJECT" {
    subject = $2
    subjects[subject] = 1
    next
}

{
    student = $2" "$1
    students[student] = 1
    marks[subject, student] = ($3 + $4)/2
}

END {
    for (subject in subjects) {
        printf "List of %s Students\n", subject
        printf "Name           Mark   Pass/Fail\n"
        printf "----           ----   ---------\n"
        for (student in students) {
            mark = marks[subject, student]
            if (mark) {

                N[subject]++
                S[subject] += mark

                if (mark>49)
                    result = "Pass"
                else
                    result = "Fail"

                printf "%-15s%4d%10s\n", student, mark, result
            }
        }
        printf "\n"
    }

    printf "\n\n"
    printf "Subject      N  Mean   Standard Deviation\n"
    printf "-------     --  ----   ------------------\n"
    for (subject in subjects) {
        mean = S[subject]/N[subject]
        ssum = 0
        for (student in students) {
            mark = marks[subject, student]

            if (mark)
                ssum += (mark - mean)*(mark - mean)

            std = sqrt(ssum/N[subject])
        }
        printf "%-12s%2i%7.1f%7.1f\n", subject, N[subject], mean, std
    }
}

答案 1 :(得分:1)

编辑:不使用500阵列的更好方法,还包括SD。还改进了格式以提高可读性 旧答案仍在下面。

awk 'NR==1{print "Results from "$1,$2}
     $1=="SUBJECT"{
        print "\n\nList of "(subj=$2)" students"
        printf "\n%-20s%-20s%s\n","Name","Mark","Pass/Fail"
        printf "%-20s%-20s%s\n","----","----","---------"
        Total=Count=0;
        next
      }
      subj{
        printf "%-20s%-20s%s\n",$1" "$2,sum=($3+$4)/2,(sum>49?"PASS":"FAIL")
        Total+=sum
        Count++
        Mean[subj]=Total/Count
        Nums[subj]=Nums[subj]?Nums[subj]" "sum:sum
      }
      END{
        printf "\n\n%-12s%-12s%s\n","Subject","Mean","Standard Deviation"
        printf "%-12s%-12s%s\n","----","----","----"
        for(i in Mean){
                STot=0
                n=split(Nums[i],a," ")
                for(j=1;j<=n;j++)
                        STot+=(a[j]-Mean[i])^2
                SD=sqrt(STot/n)
                printf "%-12s%-12s%.2f\n",i,Mean[i],SD
      }
}' test

输出

Results from FORM 6B


List of Maths students

Name                Mark                Pass/Fail
----                ----                ---------
Smith John          45                  FAIL
Evans Mike          65                  PASS


List of Physics students

Name                Mark                Pass/Fail
----                ----                ---------
Jones Tom           50                  PASS
Evans Mike          61                  PASS
Smith John          45                  FAIL


List of Chemistry students

Name                Mark                Pass/Fail
----                ----                ---------
Jones Tom           55                  PASS
Evans Mike          35                  FAIL


Subject     Mean        Standard Deviation
----        ----        ----
Physics     52          6.68
Maths       55          10.00
Chemistry   45          10.00

OLD

另一种方式是awk

我遗漏了标准差,因为我忘记了怎么做:(
我确信这很容易添加。
如果你真的很挣扎,我会再次查看。

awk -vOFS="\t" 'NR==1{print "Results from "$0}
     $1=="SUBJECT"{x=$2;next}
     x{a[c[s++]=x";"$1" "$2]=($3+$4)/2}
     END{
    for(i=0;i<s;i++){
            split(c[i],b,";")
            if(!count[b[1]]++)
                print "\n\nList of "b[1]" students\n\nName\t\tMark\tPass/Fail\n----\t\t----\t----"
            print b[2],a[c[i]],(a[c[i]]>=50?"PASS":"FAIL")
            total[b[1]]+=a[c[i]]
            mean[b[1]]=total[b[1]]/count[b[1]]
         }
        printf "\n\n%-12s%-12s%s\n","Subject","Mean","Standard Deviation"
        printf "%-12s%-12s%s\n","----","----","----"
        for(i in mean)printf "%-12s%-12s\n",i,mean[i]
    }

' test

打印

Results from FORM    6B


List of Maths students

Name            Mark    Pass/Fail
----            ----    ----
Smith John      45      FAIL
Evans Mike      65      PASS


List of Physics students

Name            Mark    Pass/Fail
----            ----    ----
Jones Tom       50      PASS
Evans Mike      61      PASS
Smith John      45      FAIL


List of Chemistry students

Name            Mark    Pass/Fail
----            ----    ----
Jones Tom       55      PASS
Evans Mike      35      FAIL


Subject     Mean        Standard Deviation
----        ----        ----
Chemistry   45
Maths       55
Physics     52

HTH:)