python,pandas,dataframe,rows to columns

时间:2016-09-14 13:52:20

标签: python pandas dataframe

我从一个组织不好的SQL表中获取了一个数据帧。该表为每个通道都有唯一的行 我可以将该信息提取到python数据帧,并打算进行进一步处理,但现在只想将其转换为更有用的格式

示例输入:

from Tkinter import *
from las import LASReader
from pprint import pprint
import tkFileDialog
import matplotlib, sys
matplotlib.use('TkAgg')
import numpy as np
from matplotlib.backends.backend_tkagg import FigureCanvasTkAgg, NavigationToolbar2TkAgg
from matplotlib.figure import Figure
import matplotlib.pyplot as plt

root = Tk()
root.geometry("900x700+10+10")

def read_file():
    filename = tkFileDialog.askopenfilename()
    f = open(filename)
    log = LASReader(f, null_subs=np.nan)

    for curve in log.curves.names:
    parent.insert(END,curve)

def add_name():
    it = parent.get(ACTIVE)
    child.insert(END, it)

def show_graph():
    child = Listbox(root, selectmode=MULTIPLE)
    try:
        s = child.selection_get()
        if s == "GR":
            print 'selected:', s            
            f = open('example1.las')
            log = LASReader(f, null_subs=np.nan)


            fig = plt.figure(figsize=(6, 7.5))

            plt.plot(log.data['GR'], log.data['DEPT'])
            plt.ylabel(log.curves.DEPT.descr + " (%s)" % log.curves.DEPT.units)
            plt.xlabel(log.curves.GR.descr + " (%s)" % log.curves.GR.units)
            plt.ylim(log.stop, log.start)
            plt.title(log.well.WELL.data + ', ' + log.well.DATE.data)
            plt.grid()
            dataPlot = FigureCanvasTkAgg(fig, master=root)
            dataPlot.show()
            dataPlot.get_tk_widget().grid(row=0, column=2, columnspan=2, rowspan=2,
                sticky=W+E+N+S, padx=380, pady=52)

        elif s == "NPHI":
            print 'selected:', s
            f = open('Shamar-1.las')
            log = LASReader(f, null_subs=np.nan) 


            fig = plt.figure(figsize=(6, 7.5))

            plt.plot(log.data['NPHI'], log.data['DEPT'])
            plt.ylabel(log.curves.DEPT.descr + " (%s)" % log.curves.DEPT.units)
            plt.xlabel(log.curves.NPHI.descr + " (%s)" % log.curves.NPHI.units)
            plt.ylim(log.stop, log.start)
            plt.title(log.well.WELL.data + ', ' + log.well.DATE.data)
            plt.grid()
            dataPlot = FigureCanvasTkAgg(fig, master=root)
            dataPlot.show()
            dataPlot.get_tk_widget().grid(row=0, column=2, columnspan=2, rowspan=2,
                sticky=W+E+N+S, padx=380, pady=52)

        elif s == "DPHI":
            print 'selected:', s
            f = open('Shamar-1.las')
            log = LASReader(f, null_subs=np.nan) 


            fig = plt.figure(figsize=(6, 7.5))

            plt.plot(log.data['DPHI'], log.data['DEPT'])
            plt.ylabel(log.curves.DEPT.descr + " (%s)" % log.curves.DEPT.units)
            plt.xlabel(log.curves.DPHI.descr + " (%s)" % log.curves.DPHI.units)
            plt.ylim(log.stop, log.start)
            plt.title(log.well.WELL.data + ', ' + log.well.DATE.data)
            plt.grid()
            dataPlot = FigureCanvasTkAgg(fig, master=root)
            dataPlot.show()
            dataPlot.get_tk_widget().grid(row=0, column=2, columnspan=2, rowspan=2,
                sticky=W+E+N+S, padx=380, pady=52)

    except:
        print 'no selection'

def remove_name():
    child.delete(ACTIVE)

def btnClick():
        pass

e = Entry(root)
e.pack(padx=5)
b = Button(root, text="OK", command=btnClick)
b.pack(pady=5)

# create the canvas, size in pixels
canvas = Canvas(width = 490, height = 600, bg = 'grey')
# pack the canvas into a frame/form
canvas.place(x=340, y=50)

etiqueta = Label(root, text='Nemonics:')
etiqueta.place(x=10, y=30)

parent = Listbox(root)

root.title("Viewer")

parent.place(x=5, y=50)

selec_button = Button(root, text='Graph',
                    command=show_graph)
selec_button.place(x=340, y=20)

remove_button = Button(root, text='<<delete',
                    command=remove_name)
remove_button.place(x=138, y=150)

add_button = Button(root, text='Add>>',
                    command=add_name)
add_button.place(x=138, y=75)


child = Listbox(root)
child.place(x=210, y=50)

butt = Button(root, text="load file", command=read_file)
butt.place(x=10, y=5)



root.mainloop()

制作

C = pd.DataFrame()
A = np.array([datetime.datetime(2016,8,8,0,0,1,1000),45,'foo1',1])
B = pd.DataFrame(A.reshape(1,4),columns = ['date','chNum','chNam','value'])
C = C.append(B)
A = np.array([datetime.datetime(2016,8,8,0,0,1,1000),46,'foo2',12.3])
B = pd.DataFrame(A.reshape(1,4),columns = ['date','chNum','chNam','value'])
C = C.append(B)
A = np.array([datetime.datetime(2016,8,8,0,0,2,1000),45,'foo1',10])
B = pd.DataFrame(A.reshape(1,4),columns = ['date','chNum','chNam','value'])
C = C.append(B)
A = np.array([datetime.datetime(2016,8,8,0,0,2,1000),46,'foo2',11.3])
B = pd.DataFrame(A.reshape(1,4),columns = ['date','chNum','chNam','value'])
C = C.append(B)

我想要

                             date chNum chNam value
0  2016-08-08 00:00:01.001000    45  foo1     1
0  2016-08-08 00:00:01.001000    46  foo2  12.3
0  2016-08-08 00:00:02.001000    45  foo1    10
0  2016-08-08 00:00:02.001000    46  foo2  11.3

我有一个解决方案:为每个日期循环数据框制作一个唯一日期列表,然后拉出每个频道,创建一个新行。有点乏味(容易出错)!编程,所以我想知道是否有更好的方法来使用Pandas工具

1 个答案:

答案 0 :(得分:2)

使用set_index然后unstack转动

C.set_index(['date', 'chNum', 'chNam'])['value'].unstack(['chNam', 'chNum'])

enter image description here

准确了解您的要求

C.set_index(['date', 'chNam'])['value'].unstack().rename_axis(None, 1)

enter image description here