Question

我的算法运行了几个小时，完成后它必须将变量保存到文件中。虽然写入文件时由于内存异常而崩溃了......虽然算法有效但变量有我需要的值。无论如何还有在这一点上检索该变量？顺便说一句，我是python的新手，我完全清楚我所问的有点奇怪。

# -*- coding: utf-8 -*-
from scipy import *
feature_filename = '4087_features.pkl'
name = 'training'
minimum_features = 3
feature_output = '4087_features_min' + str(minimum_features) + '_' + name + '.txt'

print feature_output

# load necessary files for this step

print 'loading features'
import pickle
features = pickle.load(open(feature_filename, 'rb'))
features_new = {}
t = 0
from scipy.sparse import *
for k in features:
    features_new[k] = t
    t += 1
features = features_new
print feature_filename + ' loaded'

filename_in = '../../../Dropbox/Machinaal_leren/project/project/Emotion_Data_twitter/tweets_' + name + '.mat'
print 'loading ' + filename_in + '...'

import scipy.io
from numpy  import *
try:
    data
    tweets
except NameError:
    data = scipy.io.loadmat(filename_in)
    tweets = data['tweets_' + name].squeeze()
print 'tweets_' + name + 'loaded'

execfile('functions.py')

import numpy as np
from multiprocessing import Pool


t = 0


def create_feature_vector(tweet, ground_truth):
    feature_row = np.array([0] * len(features))
    tweet = clean_tweet(tweet)
    # N-grams
    for N in range(3):
        for j in range(0, len(tweet) - (N - 1)):
            try:
                key = ''
                for m in range(N):
                    key += tweet[j + m] + ' '
                index = features[key]
                feature_row[index] += 1
            except ValueError:
                pass
            except IndexError:
                pass
            except KeyError:
                pass
    count_features = (feature_row != 0).sum(0)
    if(count_features >= minimum_features):
        feature_row = [x / (1.*sum(feature_row)) for x in feature_row]
        return(feature_row, ground_truth)
    else:
        return (9, 9)

emotions = ['emo_joy', 'emo_fear', 'emo_sadness', 'emo_thankfulness', 'emo_anger', 'emo_surprise', 'emo_love']
N_emo = len(emotions)
ground_truth_list = []

for i in range(len(tweets)):
    feature_vector, ground_truth = create_feature_vector(tweets[i][0], emotions.index(tweets[i][1]) + 1)
    print i
    if(i==0):
        feature_vector_matrix =coo_matrix(ground_truth)

    else:
        if((feature_vector != 9) and (ground_truth != 9)):
            ground_truth_list.append(ground_truth)
            feature_vector_matrix = vstack([feature_vector_matrix,coo_matrix(feature_vector)])




print 'Calculated the matrix, ground truth and saving files'


ground_truth_array = np.array(ground_truth_list)

output = open('ground_truth.pkl', 'wb')
pickle.dump(ground_truth_array, output)
output.close()

output2 = open('feature_matrix.pkl', 'wb')
pickle.dump(feature_vector_matrix, output2)
output2.close()

在此行之后崩溃了

print'计算矩阵，基础事实和保存文件'

输出

Calculated the matrix, ground truth and saving files
Traceback (most recent call last):
  File "C:\Users\Olivier.Janssens\Documents\Aptana Studio 3 Workspace\MachineLearningBNB\generate_feature_vectors.py", line 99, in <module>
    pickle.dump(feature_vector_matrix, output2)
  File "C:\Program Files (x86)\python2.7\lib\pickle.py", line 1370, in dump
    Pickler(file, protocol).dump(obj)
  File "C:\Program Files (x86)\python2.7\lib\pickle.py", line 224, in dump
    self.save(obj)
  File "C:\Program Files (x86)\python2.7\lib\pickle.py", line 331, in save
    self.save_reduce(obj=obj, *rv)
  File "C:\Program Files (x86)\python2.7\lib\pickle.py", line 419, in save_reduce
    save(state)
  File "C:\Program Files (x86)\python2.7\lib\pickle.py", line 286, in save
    f(self, obj) # Call unbound method with explicit self
  File "C:\Program Files (x86)\python2.7\lib\pickle.py", line 562, in save_tuple
    save(element)
  File "C:\Program Files (x86)\python2.7\lib\pickle.py", line 286, in save
    f(self, obj) # Call unbound method with explicit self
  File "C:\Program Files (x86)\python2.7\lib\pickle.py", line 600, in save_list
    self._batch_appends(iter(obj))
  File "C:\Program Files (x86)\python2.7\lib\pickle.py", line 615, in _batch_appends
    save(x)
  File "C:\Program Files (x86)\python2.7\lib\pickle.py", line 331, in save
    self.save_reduce(obj=obj, *rv)
  File "C:\Program Files (x86)\python2.7\lib\pickle.py", line 419, in save_reduce
    save(state)
  File "C:\Program Files (x86)\python2.7\lib\pickle.py", line 286, in save
    f(self, obj) # Call unbound method with explicit self
  File "C:\Program Files (x86)\python2.7\lib\pickle.py", line 649, in save_dict
    self._batch_setitems(obj.iteritems())
  File "C:\Program Files (x86)\python2.7\lib\pickle.py", line 663, in _batch_setitems
    save(v)
  File "C:\Program Files (x86)\python2.7\lib\pickle.py", line 331, in save
    self.save_reduce(obj=obj, *rv)
  File "C:\Program Files (x86)\python2.7\lib\pickle.py", line 419, in save_reduce
    save(state)
  File "C:\Program Files (x86)\python2.7\lib\pickle.py", line 286, in save
    f(self, obj) # Call unbound method with explicit self
  File "C:\Program Files (x86)\python2.7\lib\pickle.py", line 562, in save_tuple
    save(element)
  File "C:\Program Files (x86)\python2.7\lib\pickle.py", line 286, in save
    f(self, obj) # Call unbound method with explicit self
  File "C:\Program Files (x86)\python2.7\lib\pickle.py", line 581, in save_tuple
    self.memoize(obj)
  File "C:\Program Files (x86)\python2.7\lib\pickle.py", line 247, in memoize
    self.memo[id(obj)] = memo_len, obj
MemoryError

第99行是：

pickle.dump(feature_vector_matrix, output2)

我有ground_truth.pkl，看起来确实完整

Answer 1

使用except MemoryError

这样，如果出现内存异常，就可以收到该值。

你应该这样使用它：

try:
   // Your code where you get the error
except MemoryError:
   //save or print your values here

但如果更频繁地发生这种崩溃，您可能希望优化代码。寻找非常大的处理步骤。有时，将流程拆分为较小的步骤或首先存储信息会有所帮助。

Answer 2

好消息，根据您的代码，您不在任何嵌套函数或类似的东西。只要你保持Python窗口打开，你应该能够像在代码中那样获得变量。换句话说，只需在python提示符下运行这段代码。

ground_truth_array = np.array(ground_truth_list)

output = open('ground_truth.pkl', 'wb')
pickle.dump(ground_truth_array, output)
output.close()

output2 = open('feature_matrix.pkl', 'wb')
pickle.dump(feature_vector_matrix, output2)
output2.close()

如果你还没有python提示，那你基本上不走运，并且必须重新运行数据。请记住，以便将来使用较小的子集保存变量或测试功能，以确保崩溃不会导致您死亡。

Answer 3

当代码尝试从numpy（我认为非常大）创建ground_truth_list数组时，会出现崩溃。我的建议是在创建阵列之前将列表保存到磁盘。通过这种方式，您将始终能够读取列表的值（如果您有Python提示符，则无关紧要。）

<强>更新

如果不能被腌制的对象是一个矩阵（顾名思义），可能的解决办法是将它分成几个切片（或者只是创建切片而不是整个矩阵）然后腌制每个切片到磁盘。之后，当您需要使用该矩阵时，您必须加载单个切片并再次连接以恢复原始矩阵。也许不是最有效的解决方案，但我认为它应该有效。

在python中崩溃后检索变量

3 个答案: