以下是三个文件 -
emp.txt
7839|BLAKE|PRESIDENT||17-NOV-81|5000||10
7698|KING|MANAGER|7839|01-MAY-81|2850||10
7782|CLARK|MANAGER|7839|09-JUN-81|2450||10
7566|JONES|MANAGER|7839|02-APR-81|2975||40
7788|SCOTT|ANALYST|7566|19-APR-87|3000||50
7902|FORD|ANALYST|7566|03-DEC-81|3000||20
dept.txt
10|ACCOUNTING|NEW YORK
20|RESEARCH|DALLAS
30|SALES|CHICAGO
40|OPERATIONS|BOSTON
citycode.txt
1123|NEW YORK
1124|DALLAS
1125|CHICAGO
1126|BOSTON
1127|WASHINGTON
预期输出:$ 0(来自emp.txt,其中$ 6> = 2850)+ $ 2(来自dept.txt)+ $ 1(来自citycode.txt)
7839|BLAKE|PRESIDENT||17-NOV-81|5000||10|ACCOUNTING|1123
7698|KING|MANAGER|7839|01-MAY-81|2850||10|ACCOUNTING|1123
7566|JONES|MANAGER|7839|02-APR-81|2975||40|OPERATIONS|1126
7788|SCOTT|ANALYST|7566|19-APR-87|3000||50|NULL|NULL
7902|FORD|ANALYST|7566|03-DEC-81|3000||20|RESEARCH|1124
解释 - 使用emp.txt的公共密钥加入这些文件($ NF:emp.txt上的外部联接 - 记录没有50没有共同点,这就是它将打印NULL的原因)和 dept.txt($ 1)其中emp($ 6> = 2850)并使用公共列 dept.txt($ NF)和citycode.txt($ 1)。加入两个文件是一件容易的事 使用公共密钥,但我们如何将三个文件连接在一起。
答案 0 :(得分:4)
在处理城市和部门代码时,使用内置的FILENAME
变量构建适当的关联数组。
然后在处理员工时,输出所需的数据:
BEGIN {FS=OFS="|"}
function chknull(s) { return s ? s : "NULL"}
FILENAME == "citycode.txt" {citycode[$2]=$1}
FILENAME == "dept.txt" {depname[$1]=$2; depcity[$1]=$3}
FILENAME == "emp.txt" && $6 >= 2850 {
print $0, chknull(depname[$NF]), chknull(citycode[depcity[$NF]])
}
$ awk -f j3.awk citycode.txt dept.txt emp.txt
7839|BLAKE|PRESIDENT||17-NOV-81|5000||10|ACCOUNTING|1123
7698|KING|MANAGER|7839|01-MAY-81|2850||10|ACCOUNTING|1123
7566|JONES|MANAGER|7839|02-APR-81|2975||40|OPERATIONS|1126
7788|SCOTT|ANALYST|7566|19-APR-87|3000||50|NULL|NULL
7902|FORD|ANALYST|7566|03-DEC-81|3000||20|RESEARCH|1124
(注意我还添加了chknull
函数来打印" NULL"而不是空字符串以匹配问题的所需输出,但这对于肉类并不重要问题是如何连接两个以上的文件。)
答案 1 :(得分:3)
awk 'BEGIN{ FS=OFS="|" }
FNR==1{ f++ }
f==1{ d[$1]=$2; c[$NF]=$1; next }
f==2 && $NF in c{ c[c[$NF]]=$1; next }
$6 >=2850{
print $0, ($NF in d ? d[$NF] : "NULL" ),( $NF in c ? c[$NF] : "NULL")
}' dept.txt citycode.txt emp.txt
<强>解释强>
awk ' # Call awk
BEGIN{
FS=OFS="|" # Set input and output separator
}
FNR==1{ # FNR will be 1 for each file when awk reads first line
f++ # File counter
}
# use can also do FILENAME == "dept.txt" here but f==1 is easy
f==1{ # for first file dept.txt
d[$1]=$2; # set array d where index is $1 and value is $2 of file dept.txt
c[$NF]=$1; # set array c where index is last field and value is first field of file dept.txt
next # stop processing go to next line
}
# when f equal 2 that is when we read second file citycode.txt
# and last field of current file exists in array c
# there is no point in reading extra line exists in citycode.txt
# we just need whichever city exists in dept.txt
# so $NF in c
f==2 && $NF in c{
# modify array c by empid
# that is index will be the value of array c
# corresponding to last field of current file and
# array c value will be citycode
# Its because to make it easy to access array c value by empid while
# reading emp.txt file
c[c[$NF]]=$1;
# gawk user can delete array element here like below
# if you have gawk uncomment below line
# delete c[$NF]
next # stop processing go to next line
}
$6 >=2850{ # here we read emp.txt if 6th field is greater than or equal to 2850
# Print current record/row/line from emp.txt
# if last field of current file that is
# empid exists in array d then print department else NULL
# if last field of current file that is
# empid exists in array c then print citycode else NULL
print $0,($NF in d?d[$NF]:"NULL"),($NF in c?c[$NF]:"NULL")
}
' dept.txt citycode.txt emp.txt
<强>输入强>
$ cat emp.txt
7839|BLAKE|PRESIDENT||17-NOV-81|5000||10
7698|KING|MANAGER|7839|01-MAY-81|2850||10
7782|CLARK|MANAGER|7839|09-JUN-81|2450||10
7566|JONES|MANAGER|7839|02-APR-81|2975||40
7788|SCOTT|ANALYST|7566|19-APR-87|3000||50
7902|FORD|ANALYST|7566|03-DEC-81|3000||20
$ cat dept.txt
10|ACCOUNTING|NEW YORK
20|RESEARCH|DALLAS
30|SALES|CHICAGO
40|OPERATIONS|BOSTON
$ cat citycode.txt
1123|NEW YORK
1124|DALLAS
1125|CHICAGO
1126|BOSTON
1127|WASHINGTON
<强>输出强>
$ awk 'BEGIN{FS=OFS="|"}FNR==1{f++}f==1{d[$1]=$2;c[$NF]=$1;next}f==2 && $NF in c{c[c[$NF]]=$1;next}$6 >=2850{print $0,($NF in d?d[$NF]:"NULL"),($NF in c?c[$NF]:"NULL")}' dept.txt citycode.txt emp.txt
7839|BLAKE|PRESIDENT||17-NOV-81|5000||10|ACCOUNTING|1123
7698|KING|MANAGER|7839|01-MAY-81|2850||10|ACCOUNTING|1123
7566|JONES|MANAGER|7839|02-APR-81|2975||40|OPERATIONS|1126
7788|SCOTT|ANALYST|7566|19-APR-87|3000||50|NULL|NULL
7902|FORD|ANALYST|7566|03-DEC-81|3000||20|RESEARCH|1124
答案 2 :(得分:1)
在awk中很简单:
代码是:
awk -F'|' -v dptfile="dept.txt" -v citycodefile="citycode.txt" -v from="2850" '
BEGIN { OFS=FS;
rem="build 2 arrays, dpt[] associates number with department,";
rem="and dptcity[] associate same number with department city";
while ((getline line<dptfile) > 0) {
split(line,a,OFS);dpt[a[1]]=a[2]; dptcity[a[1]]=a[3]
}
close(dptfile)
rem="build 3rd array, city[cityname] associates a city name to its number";
while ((getline line<citycodefile)>0) {
split(line,a,OFS); city[a[2]]=a[1] ;
}
close(citycodefile);
}
( $6>=from ) { print $0 OFS ($8 in dpt? dpt[$8]:"NULL") OFS (dptcity[$8] in city? city[dptcity[$8]]:"NULL") ;}
' emp.txt
鉴于您引用的输入(以及借用@ akshay-hegde良好的演示文稿):
<强>输入强>
$ cat emp.txt
7839|BLAKE|PRESIDENT||17-NOV-81|5000||10
7698|KING|MANAGER|7839|01-MAY-81|2850||10
7782|CLARK|MANAGER|7839|09-JUN-81|2450||10
7566|JONES|MANAGER|7839|02-APR-81|2975||40
7788|SCOTT|ANALYST|7566|19-APR-87|3000||50
7902|FORD|ANALYST|7566|03-DEC-81|3000||20
$ cat dept.txt
10|ACCOUNTING|NEW YORK
20|RESEARCH|DALLAS
30|SALES|CHICAGO
40|OPERATIONS|BOSTON
$ cat citycode.txt
1123|NEW YORK
1124|DALLAS
1125|CHICAGO
1126|BOSTON
1127|WASHINGTON
<强>输出强>
$ awk -F'|' -v dptfile="dept.txt" -v citycodefile="citycode.txt" -v from="2850" 'BEGIN { OFS=FS; rem="build 2 arrays, dpt[] associates number with department,";rem="and dptcity[] associate same number with department city"; while ((getline line<dptfile) > 0) { split(line,a,OFS);dpt[a[1]]=a[2]; dptcity[a[1]]=a[3];} ; close(dptfile) ; rem="build 3rd array, city[cityname] associates a city name to its number"; while ((getline line<citycodefile)>0) { split(line,a,OFS); city[a[2]]=a[1] ; }; close(citycodefile); } ( $6>=from ) { print $0 OFS ($8 in dpt? dpt[$8]:"NULL") OFS (dptcity[$8] in city? city[dptcity[$8]]:"NULL") ;}' emp.txt
7839|BLAKE|PRESIDENT||17-NOV-81|5000||10|ACCOUNTING|1123
7698|KING|MANAGER|7839|01-MAY-81|2850||10|ACCOUNTING|1123
7566|JONES|MANAGER|7839|02-APR-81|2975||40|OPERATIONS|1126
7788|SCOTT|ANALYST|7566|19-APR-87|3000||50|NULL|NULL
7902|FORD|ANALYST|7566|03-DEC-81|3000||20|RESEARCH|1124