我从一个组织不好的SQL表中获取了一个数据帧。该表为每个通道都有唯一的行 我可以将该信息提取到python数据帧,并打算进行进一步处理,但现在只想将其转换为更有用的格式
示例输入:
from Tkinter import *
from las import LASReader
from pprint import pprint
import tkFileDialog
import matplotlib, sys
matplotlib.use('TkAgg')
import numpy as np
from matplotlib.backends.backend_tkagg import FigureCanvasTkAgg, NavigationToolbar2TkAgg
from matplotlib.figure import Figure
import matplotlib.pyplot as plt
root = Tk()
root.geometry("900x700+10+10")
def read_file():
filename = tkFileDialog.askopenfilename()
f = open(filename)
log = LASReader(f, null_subs=np.nan)
for curve in log.curves.names:
parent.insert(END,curve)
def add_name():
it = parent.get(ACTIVE)
child.insert(END, it)
def show_graph():
child = Listbox(root, selectmode=MULTIPLE)
try:
s = child.selection_get()
if s == "GR":
print 'selected:', s
f = open('example1.las')
log = LASReader(f, null_subs=np.nan)
fig = plt.figure(figsize=(6, 7.5))
plt.plot(log.data['GR'], log.data['DEPT'])
plt.ylabel(log.curves.DEPT.descr + " (%s)" % log.curves.DEPT.units)
plt.xlabel(log.curves.GR.descr + " (%s)" % log.curves.GR.units)
plt.ylim(log.stop, log.start)
plt.title(log.well.WELL.data + ', ' + log.well.DATE.data)
plt.grid()
dataPlot = FigureCanvasTkAgg(fig, master=root)
dataPlot.show()
dataPlot.get_tk_widget().grid(row=0, column=2, columnspan=2, rowspan=2,
sticky=W+E+N+S, padx=380, pady=52)
elif s == "NPHI":
print 'selected:', s
f = open('Shamar-1.las')
log = LASReader(f, null_subs=np.nan)
fig = plt.figure(figsize=(6, 7.5))
plt.plot(log.data['NPHI'], log.data['DEPT'])
plt.ylabel(log.curves.DEPT.descr + " (%s)" % log.curves.DEPT.units)
plt.xlabel(log.curves.NPHI.descr + " (%s)" % log.curves.NPHI.units)
plt.ylim(log.stop, log.start)
plt.title(log.well.WELL.data + ', ' + log.well.DATE.data)
plt.grid()
dataPlot = FigureCanvasTkAgg(fig, master=root)
dataPlot.show()
dataPlot.get_tk_widget().grid(row=0, column=2, columnspan=2, rowspan=2,
sticky=W+E+N+S, padx=380, pady=52)
elif s == "DPHI":
print 'selected:', s
f = open('Shamar-1.las')
log = LASReader(f, null_subs=np.nan)
fig = plt.figure(figsize=(6, 7.5))
plt.plot(log.data['DPHI'], log.data['DEPT'])
plt.ylabel(log.curves.DEPT.descr + " (%s)" % log.curves.DEPT.units)
plt.xlabel(log.curves.DPHI.descr + " (%s)" % log.curves.DPHI.units)
plt.ylim(log.stop, log.start)
plt.title(log.well.WELL.data + ', ' + log.well.DATE.data)
plt.grid()
dataPlot = FigureCanvasTkAgg(fig, master=root)
dataPlot.show()
dataPlot.get_tk_widget().grid(row=0, column=2, columnspan=2, rowspan=2,
sticky=W+E+N+S, padx=380, pady=52)
except:
print 'no selection'
def remove_name():
child.delete(ACTIVE)
def btnClick():
pass
e = Entry(root)
e.pack(padx=5)
b = Button(root, text="OK", command=btnClick)
b.pack(pady=5)
# create the canvas, size in pixels
canvas = Canvas(width = 490, height = 600, bg = 'grey')
# pack the canvas into a frame/form
canvas.place(x=340, y=50)
etiqueta = Label(root, text='Nemonics:')
etiqueta.place(x=10, y=30)
parent = Listbox(root)
root.title("Viewer")
parent.place(x=5, y=50)
selec_button = Button(root, text='Graph',
command=show_graph)
selec_button.place(x=340, y=20)
remove_button = Button(root, text='<<delete',
command=remove_name)
remove_button.place(x=138, y=150)
add_button = Button(root, text='Add>>',
command=add_name)
add_button.place(x=138, y=75)
child = Listbox(root)
child.place(x=210, y=50)
butt = Button(root, text="load file", command=read_file)
butt.place(x=10, y=5)
root.mainloop()
制作
C = pd.DataFrame()
A = np.array([datetime.datetime(2016,8,8,0,0,1,1000),45,'foo1',1])
B = pd.DataFrame(A.reshape(1,4),columns = ['date','chNum','chNam','value'])
C = C.append(B)
A = np.array([datetime.datetime(2016,8,8,0,0,1,1000),46,'foo2',12.3])
B = pd.DataFrame(A.reshape(1,4),columns = ['date','chNum','chNam','value'])
C = C.append(B)
A = np.array([datetime.datetime(2016,8,8,0,0,2,1000),45,'foo1',10])
B = pd.DataFrame(A.reshape(1,4),columns = ['date','chNum','chNam','value'])
C = C.append(B)
A = np.array([datetime.datetime(2016,8,8,0,0,2,1000),46,'foo2',11.3])
B = pd.DataFrame(A.reshape(1,4),columns = ['date','chNum','chNam','value'])
C = C.append(B)
我想要
date chNum chNam value
0 2016-08-08 00:00:01.001000 45 foo1 1
0 2016-08-08 00:00:01.001000 46 foo2 12.3
0 2016-08-08 00:00:02.001000 45 foo1 10
0 2016-08-08 00:00:02.001000 46 foo2 11.3
我有一个解决方案:为每个日期循环数据框制作一个唯一日期列表,然后拉出每个频道,创建一个新行。有点乏味(容易出错)!编程,所以我想知道是否有更好的方法来使用Pandas工具