如何基于CSV中的匹配列数据将单个IP转换为范围

时间:2019-06-19 16:32:34

标签: bash awk sed ip port

我需要以某种方式对csv数据进行分组,以便将具有相同端口数据(列G)的未排序的连续IP地址合并到一个范围中。

sort --field-separator=',' -k 7 -k 6 $filename.tmp1 > $filename.tmp2 #sort list by port then IP address
awk -F, 'BEGIN {SUBSEP=OFS=FS} ???' $filename.tmp2 > $filename.tmp3 # combine consecutive IPs into ranges

输入:

"A","B","C","D","E","10.184.110.34","10110 36111" 
"A","B","C","D","E","10.184.110.33","1000" 
"A","B","C","D","E","10.184.110.36","10110 36111" 
"A","B","C","D","E","10.184.110.35","10110 36111" 
"A","B","C","D","E","10.184.110.38","10110" 
"A","B","C","D","E","10.184.110.39","10110 36111" 
"A","B","C","D","E","10.184.110.37","10110" 
"A","B","C","D","E","10.184.110.40","10110 36111" 
"A","B","C","D","E","10.184.110.42","10110 36111"

预期输出:

"A","B","C","D","E","10.184.110.33","1000" 
"A","B","C","D","E","10.184.110.34-10.184.110.36","10110 36111"
"A","B","C","D","E","10.184.110.37-10.184.110.38","10110"
"A","B","C","D","E","10.184.110.39-10.184.110.40","10110 36111" 
"A","B","C","D","E","10.184.110.42","10110 36111"

2 个答案:

答案 0 :(得分:0)

awk解决方案。

sort -t, -k7 -k6 in.txt |
awk -F, '
  function out(     x, ips) {
    ips = lastprefix startn
    if (startn != lastn) ips = ips "-" lastprefix lastn
    print $1,$2,$3,$4,$5,"\""ips"\"",lastportdata
  }

  BEGIN { OFS = FS; first = 1 }

  NF != 7 { ++first; next }

  {
    split($6, fields, ".")
    n = strtonum(fields[4])
    prefix = substr(fields[1],2) "." fields[2] "." fields[3] "."
  }

  NR == first {
    lastportdata = $7
    startn = lastn = n
    lastprefix = prefix
    next
  }

  {
    if ($7 != lastportdata || prefix != lastprefix || n != lastn + 1) {
      out()
      startn = n
    }
    lastportdata = $7
    lastprefix = prefix
    lastn = n
  }

  END { out() }
' |
sort -t, -k6

答案 1 :(得分:0)

这是使用python的解决方案。一旦我们开始处理引用的CSV文件,我便不再使用sed / awk / bash。例如,awk将无法区分带引号的逗号和定界符逗号。

python解决方案还在做一些IP数学运算,以允许范围包括比最后一个八位字节更多的范围。

以下是执行脚本的方法:

$ python IPs2Ranges.py <( sort --field-separator=',' -k 6,6  input ) 
"A","B","C","D","E","10.184.110.33","1000"
"A","B","C","D","E","10.184.110.34-10.184.110.36","10110 36111"
"A","B","C","D","E","10.184.110.37-10.184.110.38","10110"
"A","B","C","D","E","10.184.110.39-10.184.110.40","10110 36111"
"A","B","C","D","E","10.184.110.42","10110 36111"

这是脚本:

#!/usr/bin/python

import csv
import sys

def IPtoNum( IP ):
  octets = IP.split('.')
  num = 0
  for octet in octets:
    num <<= 8
    num += int(octet)
  return num

def NumToIP( num ):
  IP=""
  for x in range(4):
    IP= str(num % 256) + "." + IP
    num /= 256

  return IP[:-1]


def nextIP( IP, inc=1 ):
  num=IPtoNum(IP) + inc
  return NumToIP(num)

with open(sys.argv[1], 'r') as f:
  csvreader = csv.reader( f )
  firstIP=''
  lastIP=''
  prevIP=''
  lastPorts = ''
  for line in csvreader:
    IP=line[5]
    ports=line[6].rstrip()
    if  ports == lastPorts and IP == nextIP( prevIP ) :
      lastIP = IP
      prevIP = IP
    else:
      if len(firstIP) > 0 :      
        if firstIP == lastIP:
          print ('"%s",' * 7  % (line[0],line[1],line[2],line[3],line[4],firstIP,lastPorts))[:-1]
        else:
          print ('"%s",' * 7  % (line[0],line[1],line[2],line[3],line[4],firstIP + "-" + lastIP,lastPorts ))[:-1]

      lastPorts = ports
      firstIP = IP
      prevIP = IP 
      lastIP = IP 

  if firstIP == lastIP:
    print ('"%s",' * 7  % (line[0],line[1],line[2],line[3],line[4],firstIP,lastPorts))[:-1]
  else:
    print ('"%s",' * 7  % (line[0],line[1],line[2],line[3],line[4],firstIP + "-" + lastIP,lastPorts ))[:-1]