我正在研究这个awk脚本,认为文件打印出学生标记,如果它们在不同科目中通过或失败。
该文件看起来像http://pastebin.com/MMVC1qgD
输出应该像这个http://pastebin.com/3eD6r93U
BEGIN {
mathscounts=0
physicscount=0
chemistrycount=0
mathstotal=0
physicstotal=0
chemistrytotal=0
printf("Results for Form 6B\n\n")
}
$2=="Maths"{
printf("List of Maths Students\n")
printf("Name Mark Pass/Fail\n")
printf("---- ---- ---------\n")
next
do{
mark=(($3+$4)/2)
if (mark>49)
results="Pass"
else
results="Fail"
printf("%s %s %d %s\n",$2, $1,mark,results)
mathscount++
mathstotal=(mathstotal+mark)
}while($1!="SUBJECT")
}
END {
printf("\n\n")
printf("Subject Mean Standard Deviation\n")
printf("------- ---- ------------------\n")
}
我正在尝试使用do while语句,但它只是没有打印出来。
甚至可以在另一种情况下这样做吗?
需要下一个跳过当前行,没有我需要的信息
答案 0 :(得分:1)
我有点挑战,所以我写了这个解决方案。
对于它的价值,Awk在这种分析中有点紧张,如果学生有0分,那么我与数组一起使用的方法将会失败。我实际上认为你最好将标记处理成一个简单的CSV,然后在电子表格中进行分析。
BEGIN {
printf "Results for Form 6B\n\n"
}
$1 == "FORM" {next}
$1 == "SUBJECT" {
subject = $2
subjects[subject] = 1
next
}
{
student = $2" "$1
students[student] = 1
marks[subject, student] = ($3 + $4)/2
}
END {
for (subject in subjects) {
printf "List of %s Students\n", subject
printf "Name Mark Pass/Fail\n"
printf "---- ---- ---------\n"
for (student in students) {
mark = marks[subject, student]
if (mark) {
N[subject]++
S[subject] += mark
if (mark>49)
result = "Pass"
else
result = "Fail"
printf "%-15s%4d%10s\n", student, mark, result
}
}
printf "\n"
}
printf "\n\n"
printf "Subject N Mean Standard Deviation\n"
printf "------- -- ---- ------------------\n"
for (subject in subjects) {
mean = S[subject]/N[subject]
ssum = 0
for (student in students) {
mark = marks[subject, student]
if (mark)
ssum += (mark - mean)*(mark - mean)
std = sqrt(ssum/N[subject])
}
printf "%-12s%2i%7.1f%7.1f\n", subject, N[subject], mean, std
}
}
答案 1 :(得分:1)
awk 'NR==1{print "Results from "$1,$2}
$1=="SUBJECT"{
print "\n\nList of "(subj=$2)" students"
printf "\n%-20s%-20s%s\n","Name","Mark","Pass/Fail"
printf "%-20s%-20s%s\n","----","----","---------"
Total=Count=0;
next
}
subj{
printf "%-20s%-20s%s\n",$1" "$2,sum=($3+$4)/2,(sum>49?"PASS":"FAIL")
Total+=sum
Count++
Mean[subj]=Total/Count
Nums[subj]=Nums[subj]?Nums[subj]" "sum:sum
}
END{
printf "\n\n%-12s%-12s%s\n","Subject","Mean","Standard Deviation"
printf "%-12s%-12s%s\n","----","----","----"
for(i in Mean){
STot=0
n=split(Nums[i],a," ")
for(j=1;j<=n;j++)
STot+=(a[j]-Mean[i])^2
SD=sqrt(STot/n)
printf "%-12s%-12s%.2f\n",i,Mean[i],SD
}
}' test
输出
Results from FORM 6B
List of Maths students
Name Mark Pass/Fail
---- ---- ---------
Smith John 45 FAIL
Evans Mike 65 PASS
List of Physics students
Name Mark Pass/Fail
---- ---- ---------
Jones Tom 50 PASS
Evans Mike 61 PASS
Smith John 45 FAIL
List of Chemistry students
Name Mark Pass/Fail
---- ---- ---------
Jones Tom 55 PASS
Evans Mike 35 FAIL
Subject Mean Standard Deviation
---- ---- ----
Physics 52 6.68
Maths 55 10.00
Chemistry 45 10.00
另一种方式是awk
我遗漏了标准差,因为我忘记了怎么做:(
我确信这很容易添加。
如果你真的很挣扎,我会再次查看。
awk -vOFS="\t" 'NR==1{print "Results from "$0}
$1=="SUBJECT"{x=$2;next}
x{a[c[s++]=x";"$1" "$2]=($3+$4)/2}
END{
for(i=0;i<s;i++){
split(c[i],b,";")
if(!count[b[1]]++)
print "\n\nList of "b[1]" students\n\nName\t\tMark\tPass/Fail\n----\t\t----\t----"
print b[2],a[c[i]],(a[c[i]]>=50?"PASS":"FAIL")
total[b[1]]+=a[c[i]]
mean[b[1]]=total[b[1]]/count[b[1]]
}
printf "\n\n%-12s%-12s%s\n","Subject","Mean","Standard Deviation"
printf "%-12s%-12s%s\n","----","----","----"
for(i in mean)printf "%-12s%-12s\n",i,mean[i]
}
' test
打印
Results from FORM 6B
List of Maths students
Name Mark Pass/Fail
---- ---- ----
Smith John 45 FAIL
Evans Mike 65 PASS
List of Physics students
Name Mark Pass/Fail
---- ---- ----
Jones Tom 50 PASS
Evans Mike 61 PASS
Smith John 45 FAIL
List of Chemistry students
Name Mark Pass/Fail
---- ---- ----
Jones Tom 55 PASS
Evans Mike 35 FAIL
Subject Mean Standard Deviation
---- ---- ----
Chemistry 45
Maths 55
Physics 52
HTH:)