################################################################################
#...................Program to create feature vector (N-grams) ...............
################################################################################
import ast
import csv
import os
import sys
from string import *
from BST import Node
import ast
maxInt = sys.maxsize
decrement = True
while decrement:
# decrease the maxInt value by factor 10
# as long as the OverflowError occurs.
decrement = False
try:
csv.field_size_limit(maxInt)
except OverflowError:
maxInt = int(maxInt/10)
decrement = True
def File_Write(filename,write_ist):
filewrite=open(filename,"w")
filewrite.writerows(str(write_ist))
filewrite.close()
#################################################################################
#................Function to read the files and create the FVT...................
# Change 2,3,4,5,6,7
#################################################################################
def read_file_list(bigram,class_label) :
frqlist=[]
root.reset_frequency()
for i in range (0,len(bigram)):
row=str(bigram[i])
row = row.strip()
row = row.replace('(','')
row = row.replace(')','')
row = row.replace("'",'')
row = row.replace(",",' ')
node, parent = root.lookup(row)
if node :
root.increment_frequency(node)
frqlist = root.print_tree()
## Attach the class_label............
root.finalize_frq_lst(class_label )
root.write_to_csv(file_write1)
##################################################################################
#.................................MAIN PROGRAM....................................
##################################################################################
feature_list = ""
root_flag = 'false'
file_path_data = "/home/xxx/Project/Dataset/Cross/N_grams/7_POStags.csv" ;##Input file containing Bigrams of blogdata
fp_data=csv.reader(open(file_path_data,"r"),delimiter=',')
file_path_feature = "/home/xxx/Project/Dataset/Cross/N_gram_Features/7_gram.txt" ;##Input file containing sorted Bigrams
fp_feature=open(file_path_feature,"r")
list1=fp_feature.read()
#### Convert String into list ...................
read_list=ast.literal_eval(list1)
read_list1=list(set(read_list))
print read_list1
for i in range(0,len(read_list)):
feature=str(read_list[i])
feature = feature.strip()
feature = feature.replace('(','')
feature = feature.replace(')','')
feature = feature.replace("'",'')
feature = feature.replace(",",' ')
if root_flag == 'false':
root = Node( feature )
root_flag = 'true'
else :
root.insert(feature)
feature_list = feature_list + "\n" + feature
feature_list1 = feature_list.strip()
line = feature_list1.split('\n')
##print "#######################################################################"
##print line
line1 = list(set(line))
print len(line1)
##print "#######################################################################"
line1.sort()
i=1
######Setting the path for input and output files .......................
output_file = "/home/xxx/Project/Dataset/Cross/N_grams_recored/7_gram.csv" ;##Output file..............
with open(output_file,'w') as fo :
file_write1 = csv.writer(fo, delimiter=',', quotechar='"')
file_write1.writerow(line1)
#### Write header data into output file
for data in fp_data :
feature=ast.literal_eval(data[0])
class_label=data[1]
read_file_list(feature,class_label)
print feature
print i
i=i+1
这是我在3277样本数据中记录7克计数的代码。我正在尝试创建76克7克的BST。但是我得到这样的错误
Traceback (most recent call last):
File "N_gram_Record (2-7).py", line 79, in <module>
read_list=ast.literal_eval(list1)
File "/usr/lib/python2.7/ast.py", line 49, in literal_eval
node_or_string = parse(node_or_string, mode='eval')
File "/usr/lib/python2.7/ast.py", line 37, in parse
return compile(source, filename, mode, PyCF_ONLY_AST)
MemoryError
我认为记忆错误是在我试图创造7克的bst以来,因为其数量为76000.所以任何想法都要克服这个问题???