我在代码中理解这个错误时遇到了麻烦,首先让我试着解释一下我发生了什么以及我想做什么。
我的代码旨在将45个单独的文本文件加载到数组中,包括每个单词/短语的重量和单词短语本身。这必须在收到任何描述之前在开头发生。
其次,一旦收到描述,我的软件会将其解析为单词/短语,并与数组中的单词/短语进行比较。
第三,我的软件按顺序提供排名顺序(第一/第二/第三)的前三个课程,以及每个课程的分数。
我已经制作了一个django应用程序来提供这个代码,所以我有一个表单,它将提供两个参数类和描述,如下所示:
class TrademarkClassifierForm(forms.Form):
"""
TODO: This forms will cover the questions the
initial classifier program does
:returns: TODO
"""
classes = forms.CharField(max_length=10,
label="Test all trademark classes? Type 'yes' to do so or else enter the class to be tested ")
description = forms.CharField(widget=forms.Textarea)
def __init__(self, *args, **kwargs):
super(TrademarkClassifierForm, self).__init__(*args, **kwargs)
self.helper = FormHelper()
self.helper.add_input(Submit('submit', 'Submit'))
然后我想将view
中的这两个参数传递给url
,如下所示:
class TrademarkClassifierResultView(FormView):
"""
TODO: Post should redirect to it's on page with GET,
specify set values in some query parameters,
something like ?classes=yes&name=NameOfTrademarkClass
This should be visible on results page.
:param: classes
:param: description
:returns: TODO - params
"""
template_name = 'trademark.html'
form_class = TrademarkClassifierForm
def get(self, request, *args, **kwargs):
classes = str(self.request.GET.get('classes'))
description = str(self.request.GET.get('description'))
form = TrademarkClassifierForm(initial={'classes': classes, 'description': description})
context_data = self.get_context_data(classes, description, form=form)
return self.render_to_response(context_data)
def form_valid(self, form):
classes = form.cleaned_data['classes']
description = form.cleaned_data['description']
return redirect(self.get_success_url(classes, description))
def form_invalid(self, form):
messages.add_message(self.request, messages.ERROR,
"Invalid data. Please check fields.")
return self.render_to_response(
self.get_context_data(form=form)
)
def get_success_url(self, classes=None, description=None):
return reverse("classifier:trademark") + "?classes=" + str(classes) + "&description" + str(description)
def get_context_data(self, classes, description, **kwargs):
context = super(TrademarkClassifierResultView, self).get_context_data(**kwargs)
context['classes'] = classes
context['description'] = description
context['trademark'] = ClassifyMarkBased.control_program(classes, description)
return context
现在我的问题是这个错误:
Environment:
Request Method: GET
Request URL: http://127.0.0.1:8000/trademark/
Django Version: 1.11.2
Python Version: 2.7.12
Installed Applications:
['django.contrib.admin',
'django.contrib.auth',
'django.contrib.contenttypes',
'django.contrib.sessions',
'django.contrib.messages',
'django.contrib.staticfiles',
'django.contrib.sites',
'classifier',
'crispy_forms',
'allauth',
'allauth.account',
'allauth.socialaccount',
'widget_tweaks',
'debug_toolbar']
Installed Middleware:
['django.middleware.security.SecurityMiddleware',
'django.contrib.sessions.middleware.SessionMiddleware',
'django.middleware.common.CommonMiddleware',
'django.middleware.csrf.CsrfViewMiddleware',
'django.contrib.auth.middleware.AuthenticationMiddleware',
'django.contrib.messages.middleware.MessageMiddleware',
'django.middleware.clickjacking.XFrameOptionsMiddleware',
'debug_toolbar.middleware.DebugToolbarMiddleware']
Traceback:
File "/home/petar/.virtualenvs/trademark/local/lib/python2.7/site-packages/django/core/handlers/exception.py" in inner
41. response = get_response(request)
File "/home/petar/.virtualenvs/trademark/local/lib/python2.7/site-packages/django/core/handlers/base.py" in _get_response
187. response = self.process_exception_by_middleware(e, request)
File "/home/petar/.virtualenvs/trademark/local/lib/python2.7/site-packages/django/core/handlers/base.py" in _get_response
185. response = wrapped_callback(request, *callback_args, **callback_kwargs)
File "/home/petar/.virtualenvs/trademark/local/lib/python2.7/site-packages/django/views/generic/base.py" in view
68. return self.dispatch(request, *args, **kwargs)
File "/home/petar/.virtualenvs/trademark/local/lib/python2.7/site-packages/django/views/generic/base.py" in dispatch
88. return handler(request, *args, **kwargs)
File "/home/petar/Documents/Synergy/Trademark/TM_base/classifier/views.py" in get
60. context_data = self.get_context_data(classes, description, form=form)
File "/home/petar/Documents/Synergy/Trademark/TM_base/classifier/views.py" in get_context_data
82. context['trademark'] = ClassifyMarkBased.control_program(classes, description)
File "/home/petar/Documents/Synergy/Trademark/TM_base/classifier/services/classify_mark_based.py" in control_program
89. N = len(word_count_array_for_all_classes[i])
Exception Type: IndexError at /trademark/
Exception Value: list index out of range
这是我的网址:
url(r'^trademark/', TrademarkClassifierResultView.as_view(), name="trademark"),
这是代码的一部分,应该在这两个参数上计算商标:
import os
import numpy as np
import re
import requests
class TrademarkService(object):
# coding: utf-8
# In[5]:
# compare input string to a class
# for words not found,look in a dictionary - add to text files for trademark words
# In[6]:
# open each trademark class file and read the words/frequency back into an array
@staticmethod
def open_file_read_words(file_name):
unique_words_and_count_not_format = []
tm_word_count_array = []
my_list = []
all_possible_entries = 1
with open(file_name) as f:
lines = [line.strip() for line in open(file_name)]
all_possible_entries = len(lines)
tm_word_count_array = [[0 for x in range(2)] for y in range(all_possible_entries)]
i = 0
while i < all_possible_entries:
tm_word_count_array[i] = lines[i].split(',', 1)
i += 1
i = 0
while i < all_possible_entries:
tm_word_count_array[i][0] = int(tm_word_count_array[i][0])
i += 1
return tm_word_count_array
# In[7]:
# this section normalizes word frequency by the number of words x 1000
@staticmethod
def normalize_array(tm_word_count_array):
list_of_freqs = []
max_entries = len(tm_word_count_array)
list_of_freqs = [0 for y in range(max_entries)]
i = 0
while i < max_entries:
list_of_freqs[i] = tm_word_count_array[i][0]
i += 1
max_value = max(list_of_freqs)
i = 0
while i < max_entries:
tm_word_count_array[i][0] = ((float(tm_word_count_array[i][0])) / max_entries) * 1000
i += 1
return tm_word_count_array
# In[8]:
# include the list of not useful words here
@staticmethod
def find_not_useful_words(word):
not_useful_words = (
"about", "are", "upon", "-", " ", "up", "other", "or", "not", "namely", "more", "made", "in", "for", "except",
"but", "being", "all", "against", "was", "were", "will", "that", "its", "on", "it", "at", "was", "our", "your",
"ours", "yours", "their", "them", "other", "out", "having", "have", "has", "in", "be", "than", "use", "uses",
"using", "", "by", "and", "an", "a", "use", "used", "using", "for", "to", "of", "-)", "-]", "with", "as", "in",
"the", "from")
for test_word in not_useful_words:
if word == test_word:
return False
return True
# In[9]:
# clean up the phrases by removing problematic characters
@staticmethod
def clean_up_phrases(data):
important_words = ''
word = data
for c in word:
if 0 <= ord(c) <= 127:
# this is an ascii character.
not_a_variable = 0
else:
if ord(c) == 201:
word = word.replace(c, "e")
elif ord(c) == 241:
word = word.replace(c, "n")
elif ord(c) == 225:
word = word.replace(c, "a")
elif ord(c) == 251:
word = word.replace(c, "u")
elif ord(c) == 8206:
word = word.replace(c, "")
else:
word = word.replace(c, "")
# continue_yes=raw_input("do you want to continue?")
word = word.lower()
word = str(filter(lambda ch: ch not in "?.!/;:,'()[]", word))
# calls the function above to remove words that were found to interfere with classification
if data.find_not_useful_words(word):
if len(word) > 1:
important_words += word
return important_words
# In[10]:
# find the important words in the string
@staticmethod
def find_important_words(data):
all_entries = len(data)
important_words = []
for word in data.split():
for c in word:
if 0 <= ord(c) <= 127:
# this is an ascii character.
not_a_variable = 0
else:
if ord(c) == 201:
word = word.replace(c, "e")
elif ord(c) == 241:
word = word.replace(c, "n")
elif ord(c) == 225:
word = word.replace(c, "a")
elif ord(c) == 251:
word = word.replace(c, "u")
elif ord(c) == 8206:
word = word.replace(c, "")
else:
word = word.replace(c, "")
word = word.lower()
word = str(filter(lambda ch: ch not in " ?.!/;:,'()[]", word))
if word.endswith("-"):
word = word[:-1]
if word.startswith("-"):
word = word[:1]
if data.find_not_useful_words(word):
if len(word) > 1:
important_words.append(word)
return important_words
# In[11]:
@staticmethod
def analyze_each_line_test_data(test_sentence, N, normalized_tm_word_count_array):
# remove problematic characters and words, plus find important words/phrases
test_important_phrases = test_sentence.clean_up_phrases(test_sentence)
i = 0
total_found = 0
total_TM_class_count = 0
total_TM_words_matched = []
# score the trademark phrases in the string
while i < N:
count_phrases = 0
if len(normalized_tm_word_count_array[i][1].split()) > 1:
if test_important_phrases.find(normalized_tm_word_count_array[i][1]) > -1:
total_TM_words_matched.append(normalized_tm_word_count_array[i][1])
total_TM_class_count += (normalized_tm_word_count_array[i][0])
total_found += 1
i += 1
# decompose the string and remove extraneous words, then score the words in the string
test_important_words = test_sentence.find_important_words(test_sentence)
i = 0
while i < N:
count_words = 0
if test_important_words.count(normalized_tm_word_count_array[i][1]) > 0:
total_TM_words_matched.append(normalized_tm_word_count_array[i][1])
count_words = test_important_words.count(normalized_tm_word_count_array[i][1])
total_TM_class_count += (normalized_tm_word_count_array[i][0] * count_words)
total_found += 1
i += 1
i = 0
normalized_tm_word_count_values = [0 for y in range(N)]
normalized_tm_word_count_words = ['a' for y in range(N)]
while i < N:
normalized_tm_word_count_values[i] = normalized_tm_word_count_array[i][0]
normalized_tm_word_count_words[i] = normalized_tm_word_count_array[i][1]
i += 1
total_words_to_match = len(test_important_words) + len(test_important_phrases)
not_found_words = list(set(test_important_words) - set(normalized_tm_word_count_words))
return total_found, total_TM_words_matched, not_found_words, total_TM_class_count
# In[12]:
@staticmethod
def open_class_file_read_words_to_array(file_name, file_name_class=None):
tm_word_count_array = []
tm_word_count_array = file_name.open_file_read_words(file_name_class)
return tm_word_count_array
# In[13]:
# create a file for the trademark results
@staticmethod
def create_results_file(file_name, results_array, description):
unique_words_and_count_not_format = []
unique_words_and_count_to_write = []
open_file_name = open(file_name, 'a')
open_file_name.write("New trademark comparison")
open_file_name.write("\n")
open_file_name.write(description)
open_file_name.write("\n")
unique_words_and_count_to_write = np.array(results_array, dtype=object)
np.savetxt(open_file_name, unique_words_and_count_to_write, fmt='%s', delimiter=',')
open_file_name.write("\n")
open_file_name.write("\n")
open_file_name.write("\n")
open_file_name.close()
# In[14]:
# this section controls the program
@staticmethod
def control_the_program(classes, description):
description = []
word_count_array_for_all_classes = []
correct_class_set = ['1', '2', '3', '4', '5', '6', '7', '8', '9', '10', '11', '12', '13', '14', '15', '16', '17',
'18', '19', '20', '21', '22', '23', '24', '25', '26', '27', '28', '29', '30', '31', '32', '33',
'34', '35', '36', '37', '38', '39', '40', '41', '42', '43', '44', '45']
# depending on the answer, only one class worth of trademark words will be loaded up or else all will be loaded up
# test_all_classes = raw_input(
# "Test all trademark classes? Type 'yes' to do so or else enter the class to be tested ")
test_all_classes = classes
# test description of goods/services
# test_data_array = raw_input("Provide the description of goods or services ")
test_data_array = description
# file_name_data = raw_input("Provide the identifier for the results file ")
# this file has the output of the classification engine, including the top 3 results
# file_name_results = 'user_test_comparison_results_' + file_name_data + '.txt'
# call to a program to open each file of trademark words in turn and read the words back into an array
if test_all_classes == 'yes':
i = 1
number_classes_to_check = 45
word_count_array_for_all_classes = [[] for z in range(46)]
temp_array = []
while i <= 45:
# opens each file with the trademark words
file_name_class = 'counted_phrases_class' + str(i) + '.txt'
temp_array = classes.open_class_file_read_words_to_array(file_name_class)
# normalization is used because some classes have many words and some have few words
# the words/phrases are weighted according to frequency
word_count_array_for_all_classes[i] = classes.normalize_array(temp_array)
i += 1
else:
# print "you didn't enter yes"
pass
# length_test_data_array = len(test_data_array)
# open(file_name_results, 'a').close()
# start_writing_results = open(file_name_results, 'a')
# start_writing_results.write("The start of the test")
# start_writing_results.write("\n")
# start_writing_results.write("Total number of potential items to match ")
# start_writing_results.write(str(length_test_data_array))
# start_writing_results.write("\n")
# start_writing_results.close()
top_result = [0 for y in range(2)]
second_result = [0 for y in range(2)]
third_result = [0 for y in range(2)]
top_array_words_not_found = []
second_array_words_not_found = []
third_array_words_not_found = []
counter_for_9vs42 = 0
counter_for_data_errors = 0
top_result = [0 for y in range(2)]
second_result = [0 for y in range(2)]
third_result = [0 for y in range(2)]
top_array_words_not_found = []
second_array_words_not_found = []
third_array_words_not_found = []
actual_class_results = [0 for y in range(2)]
overall_array_results = [[0 for x in range(3)] for y in range(4)]
actual_class_words_not_found = []
i = 1
while i <= 45:
total_found = 0
total_TM_words_matched = 0
not_found_words = ['']
score = 0
N = len(word_count_array_for_all_classes[i])
total_found, total_TM_words_matched, not_found_words, score = classes.analyze_each_line_test_data(test_data_array, N,
word_count_array_for_all_classes[i])
if int(score) > 0:
if int(score) > top_result[0]:
third_result[0] = second_result[0]
third_result[1] = second_result[1]
third_array_words_not_found = second_array_words_not_found
second_result[0] = top_result[0]
second_result[1] = top_result[1]
second_array_words_not_found = top_array_words_not_found
top_result[0] = int(score)
top_result[1] = i
top_array_words_not_found = ['']
top_array_words_not_found = not_found_words
elif int(score) > second_result[0]:
third_result[0] = second_result[0]
third_result[1] = second_result[1]
third_array_words_not_found = second_array_words_not_found
second_result[0] = int(score)
second_result[1] = i
second_array_words_not_found = ['']
second_array_words_not_found = not_found_words
elif int(score) > third_result[0]:
third_result[0] = int(score)
third_result[1] = i
third_array_words_not_found = ['']
third_array_words_not_found = not_found_words
i += 1
overall_array_results[0][0] = top_result[0]
overall_array_results[0][1] = top_result[1]
overall_array_results[0][2] = top_array_words_not_found
overall_array_results[1][0] = second_result[0]
overall_array_results[1][1] = second_result[1]
overall_array_results[1][2] = second_array_words_not_found
overall_array_results[2][0] = third_result[0]
overall_array_results[2][1] = third_result[1]
overall_array_results[2][2] = third_array_words_not_found
# all results - including the first, second, third choices of the engine and the original description - are written to the file
# create_results_file(file_name_results, overall_array_results, test_data_array)
# start_writing_results = open(file_name_results, 'a')
# start_writing_results.write("The end of the test")
# start_writing_results.write("\n")
#
# start_writing_results.write("\n")
# start_writing_results.write("\n")
# start_writing_results.close()
# print "finished the process"
根据我提供的代码,您可以看到这些参数通过python raw_input
提供,并且在计算代码之后创建了一个文件,您可以在其中阅读结果。
我已经重写了这个,所以我可以通过django应用程序提供服务,因此参数classes
和description
应该覆盖raw_input
,结果将显示在模板中,像这样:
{{ trademark.overall_array_results.top_result }}<br>
{{ trademark.overall_array_results.second_result }}<br>
{{ trademark.overall_array_results.third_result }}
我不确定我是否在这里写作,所以我需要帮助才能更好地理解这一点,有人可以帮我解决错误。
答案 0 :(得分:1)
如果classes
不是“是”,则word_count_array_for_all_classes
仍为空列表。