不知道这是否可以在这里问,因为它不是编程但我不知道还能去哪里: 我想以一致的方式组织以下数据。目前这是一团糟,只有前两列(逗号分隔)一致。其余列的编号可以是1-9,通常不同。 换句话说,我想对它进行排序,使文本匹配(一行中的所有值列,连续的所有反冲列等)。然后我可以删除文本并添加标题,它仍然有意义。
bm_wp_upg_o_t1micro, sight, value = 3, zoom = 3, recoil = 1, spread_moving = -1
bm_wp_upg_o_marksmansight_rear, sight, value = 3, zoom = 1, recoil = 1, spread = 1
bm_wp_upg_o_marksmansight_front, extra, value = 1
bm_wp_m4_upper_reciever_edge, upper_reciever, value = 3, recoil = 1
bm_wp_m4_upper_reciever_round, upper_reciever, value = 1
bm_wp_m4_uupg_b_long, barrel, value = 4, damage = 1, spread = 1, spread_moving = -2, concealment = -2
任何建议(即使是在正确的位置实际问这个的地方)都会很棒。 上下文只是从我正在尝试组织的游戏文件中删除的原始数据。
答案 0 :(得分:1)
我担心正则表达式对你的帮助不大,因为你输入的不规则性(它可以匹配它,但是它可能是一个让所有这些都安排好的熊) 。这可以通过任何编程语言轻松完成,但对于这样的东西,我总是去awk。
假设您的输入位于名为input.txt
的文件中,请将以下内容放入名为parse.awk
的程序中:
BEGIN {
FS=" *, *";
formatStr = "%32s,%8s,%8s,%8s,%10s,%16s,%8s,%18s,%10s,%10s,%16s,%16s\n";
printf( formatStr, "id", "sight", "value", "zoom", "recoil", "spread_moving", "extra", "upper_receiver", "barrel", "damage", "spread_moving", "concealment" );
}
{
split("",a);
for( i=2; i<=NF; i++ ) {
if( split( $(i), kvp, " *= *" ) == 1 ) {
a[kvp[1]] = "x";
} else {
a[kvp[1]] = gensub( /^\s*|\s*$/, "", "g", kvp[2] );
}
}
printf( formatStr, $1, a["sight"], a["value"], a["zoom"], a["recoil"],
a["spread_moving"], a["extra"], a["upper_receiver"],
a["barrel"], a["damage"], a["spread_moving"], a["concealment"] );
}
对它运行awk:
awk -f parse.awk input.txt
获得你的输出:
id, sight, value, zoom, recoil, spread_moving, extra, upper_receiver, barrel, damage, spread_moving, concealment
bm_wp_upg_o_t1micro, x, 3, 3, 1, -1, , , , , -1,
bm_wp_upg_o_marksmansight_rear, x, 3, 1, 1, , , , , , ,
bm_wp_upg_o_marksmansight_front, , 1, , , , x, , , , ,
bm_wp_m4_upper_reciever_edge, , 3, , 1, , , , , , ,
bm_wp_m4_upper_reciever_round, , 1, , , , , , , , ,
bm_wp_m4_uupg_b_long, , 4, , , -2, , , x, 1, -2, -2
请注意,我选择只使用'x'作为视线,这似乎是现在/不存在的事情。你可以在那里使用任何你想要的东西。
如果您使用的是Linux或Macintosh,则应该有awk可用。如果您使用的是Windows,则必须安装它。
答案 1 :(得分:1)
我确实制作了另一个awk版本。我认为这应该更容易阅读。 从文件中读取所有值/列,使其尽可能动态。
awk -F, '
{
ID[$1]=$2 # use column 1 as index
for (i=3;i<=NF;i++ ) # loop through all fields from #3 to end
{
gsub(/ +/,"",$i) # remove space from field
split($i,a,"=") # split field in name and value a[1] and a[2]
COLUMN[a[1]]++ # store field name as column name
DATA[$1" "a[1]]=a[2] # store data value in DATA using field #1 and column name as index
}
}
END {
printf "%49s ","info" # print info
for (i in COLUMN)
{printf "%15s",i} # print column name
print ""
for (i in ID) # loop through all ID
{
printf "%32s %16s ",i, ID[i] # print ID and info
for (j in COLUMN)
{
printf "%14s ",DATA[i" "j]+0 # print value
}
print ""
}
}' file
输出
info spread recoil zoom concealment spread_moving damage value
bm_wp_m4_upper_reciever_round upper_reciever 0 0 0 0 0 0 1
bm_wp_m4_uupg_b_long barrel 1 0 0 -2 -2 1 4
bm_wp_upg_o_marksmansight_rear sight 1 1 1 0 0 0 3
bm_wp_upg_o_marksmansight_front extra 0 0 0 0 0 0 1
bm_wp_m4_upper_reciever_edge upper_reciever 0 1 0 0 0 0 3
bm_wp_upg_o_t1micro sight 0 1 3 0 -1 0 3
答案 2 :(得分:0)
坚持Ethan的回答 - 这只是我享受自己。 (是的,这让我非常奇怪!)
awk 'BEGIN {
# f_idx[field] holds the column number c for a field=value item
# f_name[c] holds the names
# f_width[c] holds the width of the widest value (or the field name)
# f_fmt[c] holds the appropriate format
FS = " *, *"; n = 2;
f_name[0] = "id"; f_width[0] = length(f_name[0])
f_name[1] = "type"; f_width[1] = length(f_name[1])
}
{
#-#print NR ":" $0
line[NR,0] = $1
len = length($1)
if (len > f_width[0])
f_width[0] = len
line[NR,1] = $2
len = length($2)
if (len > f_width[1])
f_width[1] = len
for (i = 3; i <= NF; i++)
{
split($i, fv, " = ")
#-#print "1:" fv[1] ", 2:" fv[2]
if (!(fv[1] in f_idx))
{
f_idx[fv[1]] = n
f_width[n++] = length(fv[1])
}
c = f_idx[fv[1]]
f_name[c] = fv[1]
gsub(/ /, "", fv[2])
len = length(fv[2])
if (len > f_width[c])
f_width[c] = len
line[NR,c] = fv[2]
#-#print c ":" f_name[c] ":" f_width[c] ":" line[NR,c]
}
}
END {
for (i = 0; i < n; i++)
f_fmt[i] = "%s%" f_width[i] "s"
#-#for (i = 0; i < n; i++)
#-# printf "%d: (%d) %s %s\n", i, f_width[i], f_name[i], f_fmt[i]
#-# pad = ""
for (j = 0; j < n; j++)
{
printf f_fmt[j], pad, f_name[j]
pad = ","
}
printf "\n"
for (i = 1; i <= NR; i++)
{
pad = ""
for (j = 0; j < n; j++)
{
printf f_fmt[j], pad, line[i,j]
pad = ","
}
printf "\n"
}
}' data
此脚本适应它在文件中找到的数据。它将列标题“id”分配给输入的第1列,并将“type”分配给第2列。对于第3..N列中的每组值,它将数据拆分为键(在{{1}中) })和值(在fv[1]
中)。如果之前未看到该键,则会为其分配一个新的列号,并将该键存储为列名,并将键的宽度存储为初始列宽。然后将该值存储在该行中的相应列中。
当读取所有数据时,脚本知道列标题将是什么。然后它可以创建一组格式字符串。然后它打印标题和所有数据行。如果您不想要固定宽度输出,那么您可以大大简化脚本。可以对此脚本进行一些(主要是次要的)简化。
fv[2]
bm_wp_upg_o_t1micro, sight, value = 3, zoom = 3, recoil = 1, spread_moving = -1
bm_wp_upg_o_marksmansight_rear, sight, value = 3, zoom = 1, recoil = 1, spread = 1
bm_wp_upg_o_marksmansight_front, extra, value = 1
bm_wp_m4_upper_receiver_edge, upper_receiver, value = 3, recoil = 1
bm_wp_m4_upper_receiver_round, upper_receiver, value = 1
bm_wp_m4_uupg_b_long, barrel, value = 4, damage = 1, spread = 1, spread_moving = -2, concealment = -2