如何打印文本文件的某些部分?蟒蛇

时间:2020-05-01 03:08:12

标签: python string text-files

trailDict = {}
    for i in range(numTrails):
        for key in selectTrail[i].items():
            currentTrail = selectTrail[i]
            didkey = 'id'+str(i+1)
            dnamekey = 'name'+str(i+1)
            dlocationkey = 'location'+str(i+1)
            didvalue = currentTrail['id']
            dnamevalue = currentTrail['name']
            dlocationvalue = currentTrail['location']
            trailDict.update({didkey:didvalue})
            trailDict.update({dnamekey:dnamevalue})
            trailDict.update({dlocationkey:dlocationvalue})
return render_template('trails.html',response = response,form = form,city = city, lat = lat,
     long = longit, numTrails = numTrails,selectTrail = selectTrail, latlong = latlong, trailDict = trailDict)

我正在从文件中读取内容,我想打印出用户输入的部分内容。我正在尝试从开始日期到结束日期。所有都在文件中给出。我想分开打印。这两个日期之间的ClientIP。

<table align="center" style="width 80%">
    <tr>
        <th>KEY</th>
        <th>VALUE</th>
    </tr>

    {% for key,value in trailDict.items() %}
    <tr>
        <td> {{key}} </td>
        <td> {{value[loop.key]}} </td>
        {% endfor %}
    </tr>
</table>

如您所见,我可以随时打印它们。因为这是文件中的所有内容。虽然我只希望它获得文件的特定部分以在那里打印日期。下面的文件中的文本示例。正如我想要的那样,从2016年1月27日到2016年2月10日。因此,在下面提供的示例中,它将打印这4行客户端IP。 188.23.144.118 192.164.248.191 192.164.248.191 205.167.170.15

import os
import re 
from collections import Counter 
from collections import OrderedDict 
fileNames = []
textInfo = []
fileDict = {}

currentDirectoryPath = os.getcwd()
print(currentDirectoryPath)


regexp = re.compile(
    r'(?P<clientIP>\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}).+\['
    + '(?P<timestamp>\d{2}/[A-Z][a-z]{2}/\d\d\d\d).+\"'
    + '(?P<action>[A-Z]{3,4}).+\"'  
    + '\s*(?P<statuscode>[1-5][0-9][0-9])'
    )



os.chdir("/content/drive/log")
currentDirectoryPath = os.getcwd()
listOfFileNames = os.listdir(currentDirectoryPath)
#for files in listOfFileNames :
    #print(files) 


f = open('access_1.log', 'r')
matched = 0
failed = 0
cnt_clientIPs = Counter()
cnt_clientAction = Counter()
cnt_clientTimeStamp = Counter()
cnt_clientStatusCode = Counter()


for line in f:
    m = re.match(regexp, line)
    if m:
        cnt_clientIPs.update([m.group('clientIP')])
        cnt_clientAction.update([m.group('action')])
        cnt_clientStatusCode.update([m.group('statuscode')])
        matched += 1
    else:
        failed += 1
        continue

    print("""""\
client .........: %s
timestamp ......: %s
action .........: %s
statuscode.........: %s
""" % ( m.group('clientIP'),
        m.group('timestamp'),
        m.group('action'),
        m.group('statuscode'),
    ))




userInputIP = input("Enter how many of the top clients you want to see. ")
print('[*] %d lines matched the regular expression' % (matched))
print('[*] %d lines failed to match the regular expression' % (failed), end='\n\n')
print('[*] ============================================')
print('[*] '+ userInputIP +' Most Frequently Occurring Clients Queried')
print('[*] ============================================')

for clientIP, count in cnt_clientIPs.most_common(int(userInputIP)):
    print('[*] %30s: %d' % (clientIP, count))
print('[*] ============================================')



userInput=input("Would you like to see all clients from a certian date? (Yes or no)")
if userInput.lower() == "yes":
startdateInput = input("What date would you like to start at? ")
  enddateInput= input("What date would you like to end at? ")
  date_start = datetime.strptime(startdateInput, '%d/%b/%Y')
  date_end = datetime.strptime(enddateInput, '%d/%b/%Y')
  print('[*] ============================================')
  print('[*] '+ userInputIP +' Most Frequently Occurring Clients Queried')
  print('[*] ============================================')
  for clientIP, count in f:
    log_date = l.split()[3].lstrip("[").split(":")[0]
    log_date = datetime.strptime(log_date, '%d/%b/%Y')
    if date_start <= log_date <= date_end:
      print('[*] %30s: %d' %  (clientIP, count))

else:
  print('[*] ============================================')
  print('[*] '+ userInputIP +' Most Frequently Occurring Clients Queried')
  print('[*] ============================================')
  for clientIP, count in cnt_clientIPs.most_common(int(userInputIP)):
    print('[*] %30s: %d' % (clientIP, count))
print('[*] ============================================')

Enter how many of the top clients you want to see. 10
[*] 49997 lines matched the regular expression
[*] 3 lines failed to match the regular expression

[*] ============================================
[*] 10 Most Frequently Occurring Clients Queried
[*] ============================================
[*]                 205.167.170.15: 15695
[*]                  79.142.95.122: 3207
[*]                  52.22.118.215: 734
[*]                  84.112.161.41: 712
[*]                   37.1.206.196: 371
[*]                   91.200.12.22: 287
[*]                178.191.155.244: 284
[*]                 198.50.160.104: 249
[*]                   84.115.10.14: 234
[*]                  93.83.250.186: 219
[*] ============================================
Would you like to see all clients from all time or a certian date? (Yes or no)yes
What date would you like to start at? 05/feb/2016
What date would you like to end at? 10/feb/2016
[*] ============================================
[*] 10 Most Frequently Occurring Clients Queried
[*] ============================================
[*] ============================================

1 个答案:

答案 0 :(得分:1)

您可以使用:

from datetime import datetime

date_start = datetime.strptime("27/jan/2016", '%d/%b/%Y')
date_end = datetime.strptime("10/feb/2016", '%d/%b/%Y')

with open("access.log") as f:
  for l in f:
    log_date = l.split()[3].lstrip("[").split(":")[0]
    log_date = datetime.strptime(log_date, '%d/%b/%Y')
    if date_start <= log_date <= date_end:
      print(l)

Demo