set.seed(2017);
z <- factor(z, levels = c(NA, "A", "B", "C"), exclude = "", ordered = T)
#[1] <NA> A C A C B B
#Levels: <NA> < A < B < C
min(z)
#[1] <NA>
#Levels: A < B < C
max(z)
#[1] C
#Levels: A < B < C
由于 #pseudo, actual list file could be found in the link below
data_words = [[word_11, word_12,...,word_1n],
[word_21],word_22,...,word_2m],
...
]
import pickle
with open('data_words.pk', 'rb') as f:
data_words = pickle.load(f)
word_list = np.concatenate(data_words)
>>MemoryError on Ubuntu 18.04 LTS 64 bit with 16G memory, Virtual Machine(VMware)
>>No problem on MacOS 10.12.6 with 8G memory
#And the code below works fine for both Mac and Ubuntu
import itertools
word_list_flat = list(itertools.chain.from_iterable(data_words))
的列表小于400Mb,所以在Ubuntu 18中data_words
可能有问题吗?我真的很想知道为什么会发生这种行为,尽管我通过使用获得了理想的结果
CTT's code
这是我用来产生错误的data_words列表