bsddb3无法读取整个文件

时间:2019-06-02 19:35:42

标签: python-3.x bsddb

我用于创建Berkeley DB文件的代码:

def create_bdb_object(filename):
    bdb = bsddb3.db.DB()
    bdb.set_flags(bsddb3.db.DB_DUP | bsddb3.db.DB_DUPSORT)
    open_flags = bsddb3.db.DB_CREATE | bsddb3.db.DB_EXCL
    if os.path.exists(filename) and is_create:
        os.remove(filename)
    bdb.open(filename, dbtype=bsddb3.db.DB_BTREE, flags=open_flags)
    return bdb

之后,我将一些腌制的数据写入了该文件。该文件创建没有任何问题。

更新#1: 写入文件的代码:

def write_to_the_file(filename, kv_pair_rdd):
    bdb_filename = f'{filename}.new'
    bdb = create_bdb_object(bdb_filename)

    for url, record in kv_pair_rdd.toLocalIterator():
        bdb.put(url.encode(), pickle.dumps(record, protocol=2))

    bdb.close()
    os.rename(bdb_filename, filename)

但是,当我尝试读取此文件时,并没有从中获取所有数据。 在文件中应该有9条记录,但是读取后我只有4条记录。

当我做db_dump -p filename时,我会得到9条记录

用于从文件读取数据的代码:

bdb = bsddb3.db.DB()                                                                                                                                                                            
bdb.set_flags(bsddb3.db.DB_DUP | bsddb3.db.DB_DUPSORT)                                   
bdb.open(filename)                                                                 
bdb_cursor = bdb.cursor() 

record = bdb_cursor.first()                                                              
while record:                                  
    print(record[0], pickle.loads(record[1]))                                       
    record = bdb_cursor.next()  

bdb_cursor.close()                                                                        
bdb.close()

请问有人可以告诉我我做错了什么吗?

1 个答案:

答案 0 :(得分:0)

调查文件中包含什么数据。我使用了您的代码并创建了以下脚本:

import bsddb3
import os
import pickle


def create_bdb_object(filename):
    bdb = bsddb3.db.DB()
    bdb.set_flags(bsddb3.db.DB_DUP | bsddb3.db.DB_DUPSORT)
    open_flags = bsddb3.db.DB_CREATE | bsddb3.db.DB_EXCL
    if os.path.exists(filename):
        os.remove(filename)
    bdb.open(filename, dbtype=bsddb3.db.DB_BTREE, flags=open_flags)
    return bdb


def write_to_the_file(filename, data):
    bdb_filename = f'{filename}.new'
    bdb = create_bdb_object(bdb_filename)

    for url, record in data.items():
        bdb.put(url.encode(), pickle.dumps(record, protocol=2))

    bdb.close()
    os.rename(bdb_filename, filename)


def read_bdb(bdb_filename):
    bdb = bsddb3.db.DB()
    bdb.set_flags(bsddb3.db.DB_DUP | bsddb3.db.DB_DUPSORT)
    bdb.open(bdb_filename)
    bdb_cursor = bdb.cursor()

    record = bdb_cursor.first()
    counter = 1
    while record:
        print('Record num: %s, key: %s, value: %s' % (counter, record[0], pickle.loads(record[1])))
        record = bdb_cursor.next()
        counter += 1

    bdb_cursor.close()
    bdb.close()


def main():
    bdb_filename = '/tmp/bsddb.bdb'
    data = {'www.example1.com': 'lorem ipsum 1',
            'www.example2.com': 'lorem ipsum 2',
            'www.example3.com': 'lorem ipsum 3',
            'www.example4.com': 'lorem ipsum 4',
            'www.example5.com': 'lorem ipsum 5',
            'www.example6.com': 'lorem ipsum 6',
            'www.example7.com': 'lorem ipsum 7',
            'www.example8.com': 'lorem ipsum 8',
            'www.example9.com': 'lorem ipsum 9'}
    write_to_the_file(bdb_filename, data)

    read_bdb(bdb_filename)


main()

它运行完美,无法发现问题,输出如下:

Record num: 1, key: b'www.example1.com', value: lorem ipsum 1
Record num: 2, key: b'www.example2.com', value: lorem ipsum 2
Record num: 3, key: b'www.example3.com', value: lorem ipsum 3
Record num: 4, key: b'www.example4.com', value: lorem ipsum 4
Record num: 5, key: b'www.example5.com', value: lorem ipsum 5
Record num: 6, key: b'www.example6.com', value: lorem ipsum 6
Record num: 7, key: b'www.example7.com', value: lorem ipsum 7
Record num: 8, key: b'www.example8.com', value: lorem ipsum 8
Record num: 9, key: b'www.example9.com', value: lorem ipsum 9

也许,您还有一些其他代码可以修改您的数据