我用于创建Berkeley DB文件的代码:
def create_bdb_object(filename):
bdb = bsddb3.db.DB()
bdb.set_flags(bsddb3.db.DB_DUP | bsddb3.db.DB_DUPSORT)
open_flags = bsddb3.db.DB_CREATE | bsddb3.db.DB_EXCL
if os.path.exists(filename) and is_create:
os.remove(filename)
bdb.open(filename, dbtype=bsddb3.db.DB_BTREE, flags=open_flags)
return bdb
之后,我将一些腌制的数据写入了该文件。该文件创建没有任何问题。
更新#1: 写入文件的代码:
def write_to_the_file(filename, kv_pair_rdd):
bdb_filename = f'{filename}.new'
bdb = create_bdb_object(bdb_filename)
for url, record in kv_pair_rdd.toLocalIterator():
bdb.put(url.encode(), pickle.dumps(record, protocol=2))
bdb.close()
os.rename(bdb_filename, filename)
但是,当我尝试读取此文件时,并没有从中获取所有数据。 在文件中应该有9条记录,但是读取后我只有4条记录。
当我做db_dump -p filename
时,我会得到9条记录
用于从文件读取数据的代码:
bdb = bsddb3.db.DB()
bdb.set_flags(bsddb3.db.DB_DUP | bsddb3.db.DB_DUPSORT)
bdb.open(filename)
bdb_cursor = bdb.cursor()
record = bdb_cursor.first()
while record:
print(record[0], pickle.loads(record[1]))
record = bdb_cursor.next()
bdb_cursor.close()
bdb.close()
请问有人可以告诉我我做错了什么吗?
答案 0 :(得分:0)
调查文件中包含什么数据。我使用了您的代码并创建了以下脚本:
import bsddb3
import os
import pickle
def create_bdb_object(filename):
bdb = bsddb3.db.DB()
bdb.set_flags(bsddb3.db.DB_DUP | bsddb3.db.DB_DUPSORT)
open_flags = bsddb3.db.DB_CREATE | bsddb3.db.DB_EXCL
if os.path.exists(filename):
os.remove(filename)
bdb.open(filename, dbtype=bsddb3.db.DB_BTREE, flags=open_flags)
return bdb
def write_to_the_file(filename, data):
bdb_filename = f'{filename}.new'
bdb = create_bdb_object(bdb_filename)
for url, record in data.items():
bdb.put(url.encode(), pickle.dumps(record, protocol=2))
bdb.close()
os.rename(bdb_filename, filename)
def read_bdb(bdb_filename):
bdb = bsddb3.db.DB()
bdb.set_flags(bsddb3.db.DB_DUP | bsddb3.db.DB_DUPSORT)
bdb.open(bdb_filename)
bdb_cursor = bdb.cursor()
record = bdb_cursor.first()
counter = 1
while record:
print('Record num: %s, key: %s, value: %s' % (counter, record[0], pickle.loads(record[1])))
record = bdb_cursor.next()
counter += 1
bdb_cursor.close()
bdb.close()
def main():
bdb_filename = '/tmp/bsddb.bdb'
data = {'www.example1.com': 'lorem ipsum 1',
'www.example2.com': 'lorem ipsum 2',
'www.example3.com': 'lorem ipsum 3',
'www.example4.com': 'lorem ipsum 4',
'www.example5.com': 'lorem ipsum 5',
'www.example6.com': 'lorem ipsum 6',
'www.example7.com': 'lorem ipsum 7',
'www.example8.com': 'lorem ipsum 8',
'www.example9.com': 'lorem ipsum 9'}
write_to_the_file(bdb_filename, data)
read_bdb(bdb_filename)
main()
它运行完美,无法发现问题,输出如下:
Record num: 1, key: b'www.example1.com', value: lorem ipsum 1
Record num: 2, key: b'www.example2.com', value: lorem ipsum 2
Record num: 3, key: b'www.example3.com', value: lorem ipsum 3
Record num: 4, key: b'www.example4.com', value: lorem ipsum 4
Record num: 5, key: b'www.example5.com', value: lorem ipsum 5
Record num: 6, key: b'www.example6.com', value: lorem ipsum 6
Record num: 7, key: b'www.example7.com', value: lorem ipsum 7
Record num: 8, key: b'www.example8.com', value: lorem ipsum 8
Record num: 9, key: b'www.example9.com', value: lorem ipsum 9
也许,您还有一些其他代码可以修改您的数据