正在获取以下内容
将pyspark数据框转换为Pandas数据框时出错
代码:
some_df = sc.parallelize([
("A", "no"),
("B", "yes"),
("B", "yes"),
("B", "no")]
).toDF(["user_id", "phone_number"])
pandas_df = some_df.toPandas()
错误: Py4JJavaError:调用o104.collectToPython时发生错误。
答案 0 :(得分:0)
在我检查的系统中它运行良好,当Spark希望将所有数据加载到驱动程序内存中时,会出现此错误,因此可能您没有足够的内存来增加驱动程序内存以解决问题或使用以下方法清除垃圾#----------
# import
#----------
import matplotlib.animation as animation
from matplotlib.figure import Figure
from matplotlib import style
from matplotlib.backends.backend_tkagg import FigureCanvasTkAgg
from matplotlib.backends.backend_tkagg import NavigationToolbar2Tk
import tkinter as tk
from tkinter import ttk
import sqlite3
import time
import random
style.use('ggplot')
LARGE_FONT= ("Verdana", 14)
f = Figure(figsize=(10,5), dpi=100)
ax = f.add_subplot(111)
writer = sqlite3.connect('./tmp.db')
cw = writer.cursor()
cw.execute("PRAGMA journal_mode=WAL;")
reader = sqlite3.connect('./tmp.db')
cr = reader.cursor()
cr.execute("PRAGMA journal_mode=WAL;")
cw.execute("CREATE TABLE IF NOT EXISTS 'tmpTable' ('time' INTERER, 'val1' REAL, 'val2' REAL)")
cw.execute("DELETE FROM 'tmpTable'")
def write_to_db():
for i in range(10):
unix = int(time.time())
val1 = '{:0.3f}'.format(random.random())
val2 = '{:0.3f}'.format(random.random())
cw.execute("INSERT INTO 'tmpTable' VALUES (?, ?, ?)", (unix, val1, val2))
writer.commit()
time.sleep(1)
def animate(i):
cr.execute("SELECT * FROM 'tmpTable'")
data = cr.fetchall()
xList = []
yList1 = []
yList2 = []
for line in data:
xList.append(line[0])
yList1.append(line[1])
yList2.append(line[2])
xList[:] = [x-xList[0] for x in xList]
print(xList)
print(yList1)
print(yList2)
ax.clear()
ax.plot(xList, yList1)
ax.plot(xList, yList2)
class MultiPage(tk.Tk):
def __init__(self, *args,**kwargs):
tk.Tk.__init__(self, *args, **kwargs)
tk.Tk.wm_title(self, 'Multithreading Test')
container = tk.Frame(self)
container.pack(side='top', fill='both', expand=True)
container.grid_rowconfigure(0, weight=1)
container.grid_columnconfigure(0, weight=1)
self.frames = {}
for F in (StartPage, GraphPage):
frame = F(container, self)
self.frames[F] = frame
frame.grid(row=0, column=0, sticky="nsew")
self.show_frame(StartPage)
def show_frame(self, cont):
frame = self.frames[cont]
frame.tkraise()
class StartPage(tk.Frame):
def __init__(self, parent, controller):
tk.Frame.__init__(self,parent)
label = ttk.Label(self, text="Start Page", font=LARGE_FONT)
label.pack(pady=10,padx=10)
button1 = ttk.Button(self, text="Go to Graph Page",
command=lambda: controller.show_frame(GraphPage))
button1.pack()
class GraphPage(tk.Frame):
def __init__(self, parent, controller):
tk.Frame.__init__(self, parent)
label = ttk.Label(self, text="Graph Page", font=LARGE_FONT)
label.pack(pady=10,padx=10)
button1 = ttk.Button(self, text="Back to Home",
command=lambda: controller.show_frame(StartPage))
button1.pack()
canvas = FigureCanvasTkAgg(f, self)
canvas.draw()
canvas.get_tk_widget().pack(side=tk.BOTTOM, fill=tk.BOTH, expand=True)
toolbar = NavigationToolbar2Tk(canvas, self)
toolbar.update()
canvas._tkcanvas.pack(side=tk.TOP, fill=tk.BOTH, expand=True)
#----------
# Main app
#----------
if __name__ == "__main__":
app = MultiPage()
write_to_db()
ani = animation.FuncAnimation(f, animate, interval=1000)
app.mainloop()
cw.close()
cr.close()
writer.close()
reader.close()
让我知道是否有帮助。