使用Awk命令过滤多个表中的数据

时间:2013-08-29 08:34:28

标签: linux shell awk

我有3个文件,每个文件都有一个表。 File1:

RollNo  Name
1       Student1 
2       Student2 
3       Student3 
4       Student4 

文件2:

CourseId  CourseName
CS11      Discrete Mathematics 
CS12      Business Communication 
MT15      Business Management 
BM13      Linux 

file3:

RollNo CourseID  Grade
1      CS11         AB 
1      CS12         BC 
1      BM13     AB 
1      MT15     BB 
2      CS11     AA 
2      MT15     AB 
2      BM13     AB 
2      CS12         AA 

(注意:文件中没有列名,即RollNo,Name,CourseId)

这是我从源找到的 .awk 的脚本文件,与我想要的非常相似:

BEGIN{
FS="    "
}
{
 if(getline tmp < "file3.txt"){
 split(tmp, arr)  

 if(arr[1]==$1){
     #print arr[2],arr[3]
     cc[i++]=arr[2]

    while(getline tmp2 < "file2.txt"){
        split(tmp2, arr2)  

            if(arr2[1]==arr[2]){
                 #print arr2[2],arr[3]
                    cName[j++]=arr2[2]
}

    }
        close("file2.txt")
     grade[k++]=arr[3]

}
}
#print tmp
print "RollNo",$1
print "Name",$2
for(i in cc){
print cc[i],grade[i],cName[i]
}

}



我的输出假设是这样的:

Roll No: 1
Name: Student1
CourseID: CS11, CourseName: Discrete Mathematics, Grade:AB
CourseID: CS12, CourseName: Business Communication, Grade:BC
.....
similarly for Roll No: 2.

3 个答案:

答案 0 :(得分:3)

有一个单行可能适合你:

awk 'ARGIND<3{d[$1]=$2;next}FNR>1{if($1!=x){printf "RollNo:%s\nName:%s\n", $1,d[$1];x=$1} printf "CourseID:%s, CourseName:%s, Grade:%s\n",$2,d[$2],$3}' f1 f2 f3

为了更好的阅读:

awk 'ARGIND<3{d[$1]=$2;next}
     FNR>1{
              if($1!=x){printf "RollNo:%s\nName:%s\n", $1,d[$1];x=$1} 
              printf "CourseID:%s, CourseName:%s, Grade:%s\n",$2,d[$2],$3
          }' f1 f2 f3

用你的例子测试:

kent$  head f*
==> f1 <==
RollNo  Name
1       Student1 
2       Student2 
3       Student3 
4       Student4

==> f2 <==
CourseId  CourseName
CS11      Discrete Mathematics 
CS12      Business Communication 
MT15      Business Management 
BM13      Linux

==> f3 <==
RollNo CourseID  Grade
1      CS11         AB 
1      CS12         BC 
1      BM13     AB 
1      MT15     BB 
2      CS11     AA 
2      MT15     AB 
2      BM13     AB 
2      CS12         AA

kent$  awk 'ARGIND<3{d[$1]=$2;next}FNR>1{if($1!=x){printf "RollNo:%s\nName:%s\n", $1,d[$1];x=$1} printf "CourseID:%s, CourseName:%s, Grade:%s\n",$2,d[$2],$3}' f1 f2 f3
RollNo:1
Name:Student1
CourseID:CS11, CourseName:Discrete, Grade:AB
CourseID:CS12, CourseName:Business, Grade:BC
CourseID:BM13, CourseName:Linux, Grade:AB
CourseID:MT15, CourseName:Business, Grade:BB
RollNo:2
Name:Student2
CourseID:CS11, CourseName:Discrete, Grade:AA
CourseID:MT15, CourseName:Business, Grade:AB
CourseID:BM13, CourseName:Linux, Grade:AB
CourseID:CS12, CourseName:Business, Grade:AA

答案 1 :(得分:1)

这应该做的工作,应该很容易遵循 (需要GNU awk)

FILENAME=="file1"{                   # When reading file1
    SID[$1] = $2                     # Student ID/Name key value pair
}
FILENAME=="file2"{                   # When reading file2
    ID = $1                          # Store the course ID
    sub(/[A-Z]+[0-9]+\s+/,"",$0)     # Remove the course ID from the line
    CID[ID] = $0                     # Coursename/ID key value pair
}
FILENAME=="file3"{                   # When reading file3
    GRADE[$1][$2] = $3               # (StudentID/CourseID)/Grade key value pair
}
END {                                # After all the files have been read
  n = asorti(SID,SSID)               # Sort by student ID
  for (i=1;i<=n;i++) {               # For all the students
    s = SSID[i]                      # Store the current student ID
    print "Roll No:",s               # Print student ID
    print "Name:",SID[s]             # Print student name
    for (c in CID)                   # For all the courses
      if (GRADE[s][c])               # If the student has a grade for the course
        printf "CourseID: %s, CourseName %s, grade:%s\n", c, CID[c], GRADE[s][c]
  }
}

在您的输入上运行:

$ awk -f report.awk file1 file2 file3
Roll No: 1
Name: Student1
CourseID: BM13, CourseName Linux, grade:AB
CourseID: MT15, CourseName Business Management, grade:BB
CourseID: CS11, CourseName Discrete Mathematics, grade:AB
CourseID: CS12, CourseName Business Communication, grade:BC
Roll No: 2
Name: Student2
CourseID: BM13, CourseName Linux, grade:AB
CourseID: MT15, CourseName Business Management, grade:AB
CourseID: CS11, CourseName Discrete Mathematics, grade:AA
CourseID: CS12, CourseName Business Communication, grade:AA
Roll No: 3
Name: Student3
Roll No: 4
Name: Student4

答案 2 :(得分:1)

这可以为您提供格式:

#!/usr/bin/awk -f

BEGIN {
    file1 = ARGV[1]
    file2 = ARGV[2]
    file3 = ARGV[3]
    getline < file1
    i = 0
    while ((getline < file1) > 0) {
        r = $1
        rollno_in_order[i++] = r
        t = $0
        sub(/^[^[:blank:]]*[[:blank:]]*/, "", t)
        rollno_to_studentname[r] = t
    }
    getline < file2
    while ((getline < file2) > 0) {
        courseid = $1
        t = $0
        sub(/^[^[:blank:]]*[[:blank:]]*/, "", t)
        courseid_to_coursename[courseid] = t
    }
    i = 0
    getline < file3
    while ((getline < file3) > 0) {
        i_to_courseid[i] = $2
        i_to_grade[i] = $3
        rollno_i_array[$1] = rollno_i_array[$1] i " "
        ++i
    }
}

END {
    for (i = 0; i in rollno_in_order; ++i) {
        r = rollno_in_order[i]
        printf("Roll No: %d\n", r)
        printf("Name: %s\n", rollno_to_studentname[r])
        c = split(rollno_i_array[r], a, / */)
        for (j = 1; j < c; ++j) {
            k = a[j]
            course_id = i_to_courseid[k]
            course_name = courseid_to_coursename[course_id]
            grade = i_to_grade[k]
            printf("CourseID: %s, CourseName: %s, Grade: %s\n", course_id, course_name, grade) | "sort"
        }
        close("sort")
        print ""
    }
}

运行

awk -f script.awk -- file1 file2 file3

会给予

Roll No: 1
Name: Student1
CourseID: BM13, CourseName: Linux, Grade: AB
CourseID: CS11, CourseName: Discrete Mathematics, Grade: AB
CourseID: CS12, CourseName: Business Communication, Grade: BC
CourseID: MT15, CourseName: Business Management, Grade: BB

Roll No: 2
Name: Student2
CourseID: BM13, CourseName: Linux, Grade: AB
CourseID: CS11, CourseName: Discrete Mathematics, Grade: AA
CourseID: CS12, CourseName: Business Communication, Grade: AA
CourseID: MT15, CourseName: Business Management, Grade: AB

Roll No: 3
Name: Student3

Roll No: 4
Name: Student4

它与任何标准awk兼容。