我们有一个包含百万条目的文件,如下所示:
123,100F,abc
456,250F,xyz
另外,我们有一些映射如下所示:
100F > 522
150F > 523
200F > 524
250F > 525
300F > 526
如何以所需格式获取输出,如下所示: 第二个字段根据映射和最后一个字段更改为当前日期。
输出:
123,522,2017-01-25
456,525,2017-01-25
...
我不想像
这样的硬编码sed "s/100F/522/;s/250F/525/;s/\w*/`$date`" input > output
答案 0 :(得分:4)
$ awk -v date="$(date +"%Y-%m-%d")" '
NR==FNR { map[$1]=$3; next }
{ print $1, map[$2], date }
' map FS=, OFS=, file
123,522,2017-01-23
456,525,2017-01-23
或者如果您希望在脚本中保留FS和OFS设置:
$ awk -v date="$(date +"%Y-%m-%d")" '
NR==FNR { map[$1]=$3; next }
FNR==1 { FS=OFS=","; $0=$0 }
{ print $1, map[$2], date }
' map file
123,522,2017-01-23
456,525,2017-01-23
答案 1 :(得分:1)
您可以尝试使用hello.elf: file format elf32-littleriscv
Disassembly of section .text:
00010074 <_start>:
10074: 00005197 auipc gp,0x5
10078: fcc18193 addi gp,gp,-52 # 15040 <_gp>
1007c: 00004517 auipc a0,0x4
10080: 7d850513 addi a0,a0,2008 # 14854 <_edata>
10084: 00005617 auipc a2,0x5
10088: 83060613 addi a2,a2,-2000 # 148b4 <_end>
1008c: 40a60633 sub a2,a2,a0
10090: 00000593 li a1,0
10094: 2c0000ef jal ra,10354 <memset>
10098: 00000517 auipc a0,0x0
1009c: 1bc50513 addi a0,a0,444 # 10254 <__libc_fini_array>
100a0: 16c000ef jal ra,1020c <atexit>
100a4: 210000ef jal ra,102b4 <__libc_init_array>
100a8: 00012503 lw a0,0(sp)
100ac: 00410593 addi a1,sp,4
100b0: 00000613 li a2,0
100b4: 124000ef jal ra,101d8 <main>
100b8: 1680006f j 10220 <exit>
000100bc <_fini>:
100bc: 00008067 ret
000100c0 <deregister_tm_clones>:
100c0: 00015537 lui a0,0x15
100c4: 000157b7 lui a5,0x15
100c8: 84050713 addi a4,a0,-1984 # 14840 <__TMC_END__>
100cc: 84378793 addi a5,a5,-1981 # 14843 <__TMC_END__+0x3>
100d0: 40e787b3 sub a5,a5,a4
100d4: 00600713 li a4,6
100d8: 00f77c63 bleu a5,a4,100f0 <deregister_tm_clones+0x30>
100dc: 00000337 lui t1,0x0
100e0: 00030313 mv t1,t1
100e4: 00030663 beqz t1,100f0 <deregister_tm_clones+0x30>
100e8: 84050513 addi a0,a0,-1984
100ec: 00030067 jr t1
100f0: 00008067 ret
000100f4 <register_tm_clones>:
100f4: 00015537 lui a0,0x15
100f8: 000155b7 lui a1,0x15
100fc: 84050793 addi a5,a0,-1984 # 14840 <__TMC_END__>
10100: 84058593 addi a1,a1,-1984 # 14840 <__TMC_END__>
10104: 40f585b3 sub a1,a1,a5
10108: 4025d593 srai a1,a1,0x2
1010c: 01f5d793 srli a5,a1,0x1f
10110: 00b785b3 add a1,a5,a1
10114: 4015d593 srai a1,a1,0x1
10118: 00058c63 beqz a1,10130 <register_tm_clones+0x3c>
1011c: 00000337 lui t1,0x0
10120: 00030313 mv t1,t1
10124: 00030663 beqz t1,10130 <register_tm_clones+0x3c>
10128: 84050513 addi a0,a0,-1984
1012c: 00030067 jr t1
10130: 00008067 ret
,如果映射文件不太可能存储在字典中,则处理大文件(如果两个文件都已排序,则可以改进),
awk
或
awk -v date="$(date +"%Y-%m-%d")" '
BEGIN{FS=",|[ ]+[>][ ]+"; OFS=","}
FNR==NR{d[$1]=$2; next}
{print $1,d[$2],date}
' mapping file_with_million
你明白了,
awk -v date="2017-01-25" '
BEGIN{FS=",|[ ]+[>][ ]+"; OFS=","}
FNR==NR{d[$1]=$2; next}
{print $1,d[$2],date}
' mapping file_with_million
答案 2 :(得分:0)
以下是仅使用“日期”作为外部参考的解决方案。请注意,我将日期分配放在循环中可能会影响性能(如果您不关心在启动脚本时使用当前日期,则可以将其置于循环外部。)
#!/bin/bash
mappings_file="mapping"
data_file="data"
# Create a mapping array
declare -A mappings
while IFS= read -r line
do
[[ "$line" =~ ^[[:space:]]*([^[:space:]]+)[[:space:]]*\>[[:space:]]*([^[:space:]]+)[[:space:]]*$ ]] || continue
key="${BASH_REMATCH[1]}"
value="${BASH_REMATCH[2]}"
mappings["$key"]="$value"
done <"$mappings_file"
# Perform replacement
while IFS= read -r line
do
# The regex below requires that there be no extra spacing in the data file ;
# it could be adjusted to allow it, using the same technique as above
[[ "$line" =~ ^(.*),(.*),.*$ ]] || continue
# If you trust that the date will not change during execution,
# you could put the next assignment outside of the loop
date="$(date +"%Y-%m-%d")"
key="${BASH_REMATCH[2]}"
value="${mappings[$key]}"
# If you need to handle missing mappings, do it here before printing
printf "%s\n" "${BASH_REMATCH[1]},$value,$date"
done <"$data_file"