我在python中从特定日期的CSV文件中获取数据。现在,我希望在特定时间内获取它,例如从特定日期的13:30到14:30。 我的CSV文件如下所示:
15 2017/02/07 17:30:45.983
15 2017/02/07 17:30:51.109
16 2017/02/07 17:30:56.008
16 2017/02/07 17:31:01.029
我目前的代码是这样的:
import csv
from tkinter import *
from tkinter.filedialog import askopenfilename
from tkinter.messagebox import showwarning, showinfo
import datetime
import matplotlib.pyplot as plt
#csv_file = csv.reader(open("C:\Users\Lala Rushan\Downloads\ARIF Drop Monitoring Final\ARIF Drop Monitoring Final\DataLog.csv"))
from Tools.scripts.treesync import raw_input
class App(Frame):
def __init__(self, master):
Frame.__init__(self, master)
button1 = Button(self, text="Browse for a file", command=self.askfilename)
button2 = Button(self, text="Count the file", command=self.takedate)
button3 = Button(self, text="Exit", command=master.destroy)
button1.grid()
button2.grid()
button3.grid()
self.userInputFromRaw = Entry(self)
self.userInputFromRaw.grid()
self.userInputToRaw = Entry(self)
self.userInputToRaw.grid()
self.grid()
def askfilename(self):
in_file = askopenfilename()
if not in_file.endswith(('.CSV')):
showwarning('Are you trying to annoy me?', 'How about giving me a CSV file, genius?')
else:
self.in_file=in_file
def CsvImport(self,csv_file):
dist = 0
for row in csv_file:
_dist = row[0]
try:
_dist = float(_dist)
except ValueError:
_dist = 0
dist += _dist
print ("Urine Volume is: %.2f" % (_dist*0.05))
def takedate(self):
from_raw = self.userInputFromRaw.get()
from_date = datetime.date(*map(int, from_raw.split('/')))
print ('From date: = ' + str(from_date))
to_raw = self.userInputToRaw.get()
to_date = datetime.date(*map(int, to_raw.split('/')))
in_file = ("H:\DataLog.csv")
in_file= csv.reader(open(in_file,"r"))
for line in in_file:
_dist = line[0]
try:
file_date = datetime.date(*map(int, line[1].split(' ')[1].split('/')))
if from_date <= file_date <= to_date:
self.CsvImport(in_file)
except IndexError:
pass
root = Tk()
root.title("Urine Measurement")
root.geometry("500x500")
app = App(root)
root.mainloop()
如何获取特定时间和特定日期的数据?
答案 0 :(得分:1)
使用pandas
及其DataFrame
容器,因为这是处理数据和选择数据的理想格式。请参阅以下示例:
import pandas as pd
df = pd.read_csv('eg.txt', header=None) # Read in the Data.
df.index = [pd.datetime.strptime(i, '%Y/%m/%d%H:%M:%S.%f') for i in (df[1] + df[2])] # Format the time into the index
这里
>>> df
0 1 2 3
2017-02-07 17:30:45.983 15 2017/02/07 17:30:45.983 3.3
2017-02-07 17:30:51.109 15 2017/02/07 17:30:51.109 4.4
2017-02-07 17:30:56.008 16 2017/02/07 17:30:56.008 5.2
2017-02-07 17:31:01.029 16 2017/02/07 17:31:01.029 NaN
您可以选择所需的时间范围:
>>> df[pd.datetime(2017, 2, 7, 17, 30, 50):pd.datetime(2017, 2, 7, 17, 30, 58)] # Slice the wanted time
0 1 2 3
2017-02-07 17:30:51.109 15 2017/02/07 17:30:51.109 4.4
2017-02-07 17:30:56.008 16 2017/02/07 17:30:56.008 5.2
生成数据的 csv 是eg.txt,看起来像。
15,2017/02/07,17:30:45.983,3.3
15,2017/02/07,17:30:51.109,4.4
16,2017/02/07,17:30:56.008,5.2
16,2017/02/07,17:31:01.029,NaN
然后,您可以根据需要删除,制作,移动列和数据。