将循环合并到文件处理中

时间:2019-05-10 11:01:23

标签: loops awk sed

我有几行看起来像这样的代码,尽管它只能处理一个文件( 5cym24.pdb ),即

grep -E 'TRP' 5cym24.pdb > 5cym24_d.pdb
grep -E 'CYS' 5cym24.pdb > 5cym24_b.pdb
sed -n '3030,3106p;3138,3205p;3238,3268p;3329,3361p;3423,3453p' 5cym24.pdb > 5cym24_a.pdb
awk '{$1=$1}1' OFS=, 5cym24_d.pdb > 5cym24_d.csv
awk '{$1=$1}1' OFS=, 5cym24_b.pdb > 5cym24_b.csv
awk '{$1=$1}1' OFS=, 5cym24_a.pdb > 5cym24_a.csv

我的问题是,如何包含一个循环,以便处理我拥有的许多 pdb 文件, ie 5cymX.pdb ,例如, X 的范围是24到70,因此会生成 5cymX_d.pdb 5cymX_b.pdb 5cymX_a.pdb 5cymX_d.csv 5cymX_b.csv 5cymX_a.csv 。谢谢!

2 个答案:

答案 0 :(得分:1)

尝试一下:

#!/bin/bash

do_work() {
    grep -E 'TRP' "5cym$1.pdb" > "5cym$1_d.pdb"
    grep -E 'CYS' "5cym$1.pdb" > "5cym$1_b.pdb"
    sed -n '3030,3106p;3138,3205p;3238,3268p;3329,3361p;3423,3453p' "5cym$1.pdb" > "5cym$1_a.pdb"
    awk '{$1=$1}1' OFS=, "5cym$1_d.pdb" > "5cym$1_d.csv"
    awk '{$1=$1}1' OFS=, "5cym$1_b.pdb" > "5cym$1_b.csv"
    awk '{$1=$1}1' OFS=, "5cym$1_a.pdb" > "5cym$1_a.csv"
}

for i in $(seq 24 70); do
   do_work "$i"
done

答案 1 :(得分:0)

您现有的代码:

grep -E 'TRP' 5cym24.pdb > 5cym24_d.pdb
grep -E 'CYS' 5cym24.pdb > 5cym24_b.pdb
sed -n '3030,3106p;3138,3205p;3238,3268p;3329,3361p;3423,3453p' 5cym24.pdb > 5cym24_a.pdb
awk '{$1=$1}1' OFS=, 5cym24_d.pdb > 5cym24_d.csv
awk '{$1=$1}1' OFS=, 5cym24_b.pdb > 5cym24_b.csv
awk '{$1=$1}1' OFS=, 5cym24_a.pdb > 5cym24_a.csv

可以减少为仅对awk的1个调用(当然,因为您没有提供要测试的示例输入/输出,所以请耐心等待):

awk -v OFS=',' '
    FNR==1 {
        close(base"_a.csv")
        close(base"_b.csv")
        close(base"_d.csv")
        base = FILENAME
        sub(/\.pdb$/,"",base)
    }
    { $1=$1 }
    /TRP/ { print > (base"_d.csv") }
    /CYS/ { print > (base"_b.csv") }
    (FNR>=3030 && FNR<=3106) || (FNR>=3138 && FNR<=3205) || (FNR>=3238 && FNR<=3268) ||
    (FNR>=3329 && FNR<=3361) || (FNR>=3423 && FNR<=3453) { print > (base"_a.csv") }
' 5cym24.pdb

,您可能只需将5cym24.pdb更改为*.pdb(或您要提供的输入文件的任何列表),即可立即按原样处理所有文件。< / p>