我正在尝试将Numpy数组插入PostgreSQL。试图这样做
def write_to_db(some_arr, some_txt):
""" insert a new array into the face_ar table """
sql = """INSERT INTO test_db VALUES(%s,%s);"""
conn = None
try:
params = config()
conn = psycopg2.connect(**params)
cur = conn.cursor()
cur.execute(sql, (some_arr, some_txt))
conn.commit()
cur.close()
except (Exception, psycopg2.DatabaseError) as e:
print(e)
finally:
if conn is not None:
conn.close()
在此之前,我在数据库中创建了一个表
create table test_db (encodings double precision[], link text);
最后我得到一个错误:“无法适应类型'numpy.ndarray'”
我需要写Numpy数组,其中包含125个float64项和小文本,例如每一行中的链接。我的项目中将有几百万行。重要的是读取速度和数据库大小。如我所知,不可能直接插入Numpy数组,而需要将其转换为另一种格式。我首先想到的是将其转换为Binary数据并将其保存到DB,但是我不知道该如何做以及如何以Numpy数组格式从DB中取回它。
答案 0 :(得分:1)
感谢Vasyl Kushnir。这种方法开始很好并且快速地读取数据
import psycopg2
from config import config
import msgpack
import msgpack_numpy as m
def write_to_db(encoding, link):
""" insert a new array into the test1_db table """
sql = """INSERT INTO test1_db VALUES(%s,%s);"""
conn = None
dumped_data = msgpack.packb(encoding, default=m.encode)
try:
params = config()
conn = psycopg2.connect(**params)
cur = conn.cursor()
cur.execute(sql, (dumped_data, link))
conn.commit()
cur.close()
except (Exception, psycopg2.DatabaseError) as e:
print(e)
finally:
if conn is not None:
conn.close()
def read_from_db():
""" query data from the test1_db table """
conn = None
row = None
try:
params = config()
conn = psycopg2.connect(**params)
cur = conn.cursor()
cur.execute("SELECT encodings, link FROM test1_db")
print("The number of rows: ", cur.rowcount)
row = cur.fetchone()
cur.close()
except (Exception, psycopg2.DatabaseError) as error:
print(error)
finally:
if conn is not None:
conn.close()
encoding1, somelink = row
return msgpack.unpackb(encoding1, object_hook=m.decode), somelink
答案 1 :(得分:0)
尝试使用pickle python进行二进制序列化/反序列化
示例:
import numpy as np
from pickle import dumps, loads
data=np.array([1,2,4,5,6])
dumped_data = dumps(data)
loaded_data = loads(dumped_data)
print(dumped_data)
print(loaded_data)