从数据库中检索时如何避免python中的内存错误

时间:2015-03-21 11:23:02

标签: python sqlite

当我尝试使用大量数据运行我的代码时,我在all_rows = [[x[0], x[1]] for x in cur]中得到了一个内存错误。我有200M行。我怎么能避免它?

binwidth = 1
latitudes = []
userids = []
info = []
densities = []
with lite.connect(databasepath) as con:
    cur = con.execute('SELECT latitude, userid FROM dynamicMessage WHERE latitude>45')
    print "executed"
    all_rows = [[x[0], x[1]] for x in cur]
    all_rows = sorted(all_rows, key=itemgetter(0))
    print "sorted"
    for x in all_rows:
        latitudes.append(x[0])
        userids.append(x[1])
    min_lat = -100
    max_lat = 100
    binwidth = 1

    bin_range = np.arange(min_lat,max_lat,binwidth)

    binned_latitudes = np.digitize(latitudes,bin_range)
    all_in_bins = zip(binned_latitudes,userids)
    unique_in_bins = list(set(all_in_bins))
    all_in_bins.sort()
    unique_in_bins.sort()

    bin_count_all = []
    for bin, group in groupby(all_in_bins, lambda x: x[0]):
        bin_count_all += [(bin, len([k for k in group]))]

    bin_count_unique = []
    for bin, group in groupby(unique_in_bins, lambda x: x[0]):
        bin_count_unique += [(bin, len([ k for k in group]))]

    bin_density = [(bin_range[b-1],a*1.0/u) for ((b,a),(_,u)) in zip(bin_count_all, bin_count_unique)]

    bin_density =  np.array(bin_density).transpose()

    # all_in_bins and unique_in_bins now contain the data
    # corresponding to the SQL / pseudocode in your question

    # plot as standard bar - note you can put uneven widths in as an array-like here if necessary
    plt.bar(*bin_density, width=binwidth)
    plt.savefig('latlongstats'+'t'+str(time.strftime("%H:%M:%S")), format='png')

0 个答案:

没有答案