脚本没有达到awk中的任何验证步骤

时间:2015-02-19 16:27:48

标签: awk sh

这个脚本应该用以下格式读取csv

  Name,Date,ID,Number
  John Smith,09/05/2015,s,999-999-99
  Mike Smith,09/06/2015,s,989-979-99
  Fred Smith,09/03/2015,s,781-999-99

第一行是应该跳过的标题。因此,当脚本运行时,每个.csv文件似乎都转移到GoodFile目录,我认为这是假阳性,我捏造了验证步骤,如第3个并进入QE而不是SE(它必须是S或E)它不会甚至打了代码?我不知道为什么...... for(linenum = 1; linenum <nr; linenum++) { if (length(dataArr[linenum,3]) == 0){ printf "Failed 3rd a validation" exit 1

#!/bin/sh

for file in test/*.csv ; do

awk -F',' '                       

# skip the header and blank lines
NR = 1 || NF == 0 {next}

#save the data in to a 2d array called dataArr
{ for (i=1; i <= NF; i++) dataArr[++nr,i] = $i }

END {
    STATUS = "GOOD"

    #verify coulmn 1
    for( linenum=1; linenum <= nr; linenum++) {
        if (length(dataArr[linenum,1]) == 0){
        printf "Failed 1st validation"
        exit 1
        }
    }

   printf "file: %s, verify column 1, STATUS: %s\n", FILENAME, STATUS


    #verify coulmn 2
    for(linenum = 1; linenum <nr; linenum++) {
        if (length(dataArr[linenum,2]) == 0){
        printf "Failed 2nd a validation"
        exit 1
        }

        if ((dataArr[linenum,2]) !~ /^(0[1-9]|1[012])[- /.](0[1-9]|[12][0-9]|3[01])[- /.](19|20)[0-9][0-9]$/){
        printf "Failed 2nd b validation"
        exit 1
        }
    }

    #verify coulmn 3
    for(linenum = 1; linenum <nr; linenum++) {
        if (length(dataArr[linenum,3]) == 0){
        printf "Failed 3rd a validation"
        exit 1

        }
        # has to be either S or E
        if ((dataArr[linenum,3]) !~ /^[SE]$/){
        printf "Failed 3rd b validation"
        exit 1
        }
    }

    #verify coulmn 4
    for(linenum = 1; linenum <nr; linenum++) {
        #lenght has to between 9 AND 11
        if ((length(dataArr[linenum,4])) < 9 || (length(dataArr[linenum,4]) > 11)){
        printf "Failed 4th validation"
        exit 1
        }
    }

}' "$file"

if [[ $? -eq 0 ]]; then
       # "good" status
       mv ${file} test1/goodFile
else
    # "bad" status
    mv ${file} test1/badFile
fi

完成

1 个答案:

答案 0 :(得分:3)

您不需要将文件保存在数组中,您只需要:

awk -F',' '                       
# skip the header and blank lines
NR == 1 || NF == 0 {next}

$1 == "" { fails1++ }
$2 == "" { fails2a++ }
$2 !~ /^(0[1-9]|1[012])[- /.](0[1-9]|[12][0-9]|3[01])[- /.](19|20)[0-9][0-9]$/) { fails2b++ }
$3 == "" { fails3a++ }
$3 !~ /^[SE]$/ { fails3b++ }
length($4) < 9 || length($4) > 11 { fails4++ }

END {
    if (fails1)  { print "Failed 1st validation"; exit 1 }
    if (fails2a) { print "Failed 2nd a validation"; exit 1 }
    if (fails2b) { print "Failed 2nd b validation"; exit 1 }
    if (fails3a) { print "Failed 3rd a validation"; exit 1 }
    if (fails3b) { print "Failed 3rd b validation"; exit 1 }
    if (fails4)  { print "Failed 4th validation"; exit 1 }

}' "$file"

要将故障消息打印到stderr而不是stdout,顺便说一句,可以是:

if (fails4)  { print "Failed 4th validation" | "cat>&2"; exit 1 }

如果你不关心当文件包含多个错误时首先报告哪个错误,那么就是这个版本:

awk -F',' '

# skip the header and blank lines
NR == 1 || NF == 0 {next}

$1 == "" { print "Failed 1st validation"; exit 1 }
$2 == "" { print "Failed 2nd a validation"; exit 1 }
$2 !~ /^(0[1-9]|1[012])[- /.](0[1-9]|[12][0-9]|3[01])[- /.](19|20)[0-9][0-9]$/) { print "Failed 2nd b validation"; exit 1 }
$3 == "" { print "Failed 3rd a validation"; exit 1 }
$3 !~ /^[SE]$/ { print "Failed 3rd b validation"; exit 1 }
length($4) < 9 || length($4) > 11 { print "Failed 4th validation"; exit 1 }

' "$file"