我的输入文件如下:
SL3.0ch00 maker_ITAG exon 16480 16794 . + . transcript_id "mRNA:Solyc00g005000.3.1"; gene_id "gene:Solyc00g005000.3"; gene_name "Solyc00g005000.3"; gene_biotype "protein_coding";
SL3.0ch00 maker_ITAG exon 16879 17940 . + . transcript_id "mRNA:Solyc00g005000.3.1"; gene_id "gene:Solyc00g005000.3"; gene_name "Solyc00g005000.3"; gene_biotype "protein_coding";
SL3.0ch00 maker_ITAG CDS 16480 16794 . + 0 transcript_id "mRNA:Solyc00g005000.3.1"; gene_id "gene:Solyc00g005000.3"; gene_name "Solyc00g005000.3"; gene_biotype "protein_coding";
SL3.0ch00 maker_ITAG CDS 16879 17940 . + 0 transcript_id "mRNA:Solyc00g005000.3.1"; gene_id "gene:Solyc00g005000.3"; gene_name "Solyc00g005000.3"; gene_biotype "protein_coding";
所需的输出:
SL3.0ch00 maker_ITAG exon 16480 16794 . + . transcript_id "mRNA:Solyc00g005000.3.1"; gene_id "gene:Solyc00g005000.3"; gene_biotype "protein_coding";
SL3.0ch00 maker_ITAG exon 16879 17940 . + . transcript_id "mRNA:Solyc00g005000.3.1"; gene_id "gene:Solyc00g005000.3"; gene_biotype "protein_coding";
SL3.0ch00 maker_ITAG CDS 16480 16794 . + 0 transcript_id "mRNA:Solyc00g005000.3.1"; gene_id "gene:Solyc00g005000.3"; gene_biotype "protein_coding";
SL3.0ch00 maker_ITAG CDS 16879 17940 . + 0 transcript_id "mRNA:Solyc00g005000.3.1"; gene_id "gene:Solyc00g005000.3"; gene_biotype "protein_coding";
我要删除“ gene_name“某些名称”;”从所有行归档。 我使用了以下内容:
awk '{prinf$13=$14=""; print $0}' input_file
但是前几列的所有格式都被更改(空格代替制表符)。请帮助。任何其他命令或方法也可以。
答案 0 :(得分:1)
使用awk:
fileprivate func apiCall() -> Observable<Response> {
let userID = UserModel.sharedInstance.uID
return self.dateParameterTrigger
.observeOn(self.queue)
.flatMap{ para -> Observable<Response> in
let startDateStr = para.0
let endDateStr = para.1
return self.provider.rx.request(CustomerTarget.GetInvitedUserList(userID, 0, 20, startDateStr, endDateStr))
.asObservable()
.share(replay: 1, scope: SubjectLifetimeScope.forever)
}
}
func getSummery() -> Observable<CustomerListJSON> {
return apiCall()
.flatMap{ event -> Observable<CustomerListJSON> in
let catchReturn = Observable.of(CustomerListJSON())
var baseModel:CustomerModel!
do{
let decoder = JSONDecoder()
baseModel = try decoder.decode(CustomerModel.self, from: event.data)
}catch let error {
print( error )
return catchReturn
}
guard let model = baseModel, let customerModel = model.returnValue else {
return catchReturn
}
return Observable.just(customerModel)
}
}
func getCustomerList() -> Observable<[CustomerJSON]> {
return self.apiCall()
.flatMap{ event -> Observable<[CustomerJSON]> in
let catchReturn = Observable.of([CustomerJSON]())
var baseModel:CustomerModel!
do{
let decoder = JSONDecoder()
baseModel = try decoder.decode(CustomerModel.self, from: event.data)
}catch let error {
print( error )
return catchReturn
}
guard let model = baseModel, let customerModel = model.returnValue, let customerList = customerModel.UserList else {
return catchReturn
}
return Observable.just(customerList)
}
}
使用sed:
awk 'BEGIN{FS=OFS=";"} {print $1,$2,$4,$5}' file
输出:
SL3.0ch00 maker_ITAG exon 16480 16794 . + . transcript_id "mRNA:Solyc00g005000.3.1"; gene_id "gene:Solyc00g005000.3"; gene_biotype "protein_coding"; SL3.0ch00 maker_ITAG exon 16879 17940 . + . transcript_id "mRNA:Solyc00g005000.3.1"; gene_id "gene:Solyc00g005000.3"; gene_biotype "protein_coding"; SL3.0ch00 maker_ITAG CDS 16480 16794 . + 0 transcript_id "mRNA:Solyc00g005000.3.1"; gene_id "gene:Solyc00g005000.3"; gene_biotype "protein_coding"; SL3.0ch00 maker_ITAG CDS 16879 17940 . + 0 transcript_id "mRNA:Solyc00g005000.3.1"; gene_id "gene:Solyc00g005000.3"; gene_biotype "protein_coding";
答案 1 :(得分:1)
请尝试以下操作。(如果您的Input_file由TAB分隔,请使用-F"\t"
awk 'match($0,/ gene_name[^;]*/){print substr($0,1,RSTART-1) substr($0,RSTART+RLENGTH+1);next} 1' Input_file
现在也添加一种非线性形式的解决方案并进行解释。
awk '
match($0,/ gene_name[^;]*/){ ##Using match function of awk where checking regex from keyword gene_name till semi colon.
print substr($0,1,RSTART-1) substr($0,RSTART+RLENGTH+1) ##Printing substring from 1st character to till RSTART-1 and then RSTART+RLENGTH+1 to till last, where RSTART and RLENGTH are out of the box keywords whose value will be SET when a regex match is found in match function.
next ##next is out of box keyword which will skip all further statements from here.
}
1 ##Mentioning 1 will print the lines which do not have match of above regex for gene_name one.
' Input_file ##Mentioning Input_file name here.
答案 2 :(得分:1)
您有一些字段用制表符分隔,而另一些字段则用分号分隔,后跟可选的空格。您可以使用FS="\t|; ?"
告诉awk进行拆分,这将正确识别您的字段,但是每个字段周围的特定分隔符将不会保留,您稍后需要它们将记录放回去。这就是GNU awks split()
函数被赋予第4个arg的原因,因此它可以保存字段和分隔符。在您的情况下,您可以将其用作:
nf = split($0,flds,/\t|; ?/,seps)
看看对输入的第一条记录有什么作用
$ cat tst.awk
{
nf = split($0,flds,/\t|; ?/,seps)
}
NR == 1 {
printf "$0=<%s>\n", $0
for (i=1; i<=nf; i++) {
printf " flds[%d] = <%s>\n", i, flds[i]
printf " seps[%d] = <%s>\n", i, seps[i]
}
}
。
$ awk -f tst.awk file
$0=<SL3.0ch00 maker_ITAG exon 16480 16794 . + . transcript_id "mRNA:Solyc00g005000.3.1"; gene_id "gene:Solyc00g005000.3"; gene_name "Solyc00g005000.3"; gene_biotype "protein_coding";>
flds[1] = <SL3.0ch00>
seps[1] = < >
flds[2] = <maker_ITAG>
seps[2] = < >
flds[3] = <exon>
seps[3] = < >
flds[4] = <16480>
seps[4] = < >
flds[5] = <16794>
seps[5] = < >
flds[6] = <.>
seps[6] = < >
flds[7] = <+>
seps[7] = < >
flds[8] = <.>
seps[8] = < >
flds[9] = <transcript_id "mRNA:Solyc00g005000.3.1">
seps[9] = <; >
flds[10] = <gene_id "gene:Solyc00g005000.3">
seps[10] = <; >
flds[11] = <gene_name "Solyc00g005000.3">
seps[11] = <; >
flds[12] = <gene_biotype "protein_coding">
seps[12] = <;>
flds[13] = <>
seps[13] = <>
看看您不仅可以访问flds[]
数组中的每个字段,而且还可以访问seps[]
数组中的每个字段周围的分隔符?因此,要删除字段,只需将数组中的相应元素设置为null并重新组合记录:
$ cat tst.awk
{
nf = split($0,flds,/\t|; ?/,seps)
flds[11] = seps[11] = ""
$0 = join(nf,flds,seps)
print
}
function join(n,f,s, i,o) {for (i=1;i<=n;i++) o=o f[i] s[i]; return o}
。
$ awk -f tst.awk file
SL3.0ch00 maker_ITAG exon 16480 16794 . + . transcript_id "mRNA:Solyc00g005000.3.1"; gene_id "gene:Solyc00g005000.3"; gene_biotype "protein_coding";
SL3.0ch00 maker_ITAG exon 16879 17940 . + . transcript_id "mRNA:Solyc00g005000.3.1"; gene_id "gene:Solyc00g005000.3"; gene_biotype "protein_coding";
SL3.0ch00 maker_ITAG CDS 16480 16794 . + 0 transcript_id "mRNA:Solyc00g005000.3.1"; gene_id "gene:Solyc00g005000.3"; gene_biotype "protein_coding";
SL3.0ch00 maker_ITAG CDS 16879 17940 . + 0 transcript_id "mRNA:Solyc00g005000.3.1"; gene_id "gene:Solyc00g005000.3"; gene_biotype "protein_coding";