我有一个名为ML Project的文件夹,其中有2个文件-getEmbeddings.ipynb和naive-bayes.ipynb(因为这些文件是在jupyter笔记本中创建的)
主文件夹的路径为C:\Users\Anushree\Desktop\ML Project
当我之前使文件朴素贝叶斯运行时,它可以完美运行并显示混乱矩阵,但是现在当我尝试运行它一个月后,它显示了以下错误-
ModuleNotFoundError Traceback (most recent call last) <ipython-input-19-854d3e562c4f> in <module>
7 """
8
----> 9 from getEmbeddings import getEmbeddings
10 from sklearn.naive_bayes import GaussianNB
11 import numpy as np
ModuleNotFoundError: No module named 'getEmbeddings'
getEmbeddings文件具有以下内容-
"""
Fake news detection
The Doc2Vec pre-processing
"""
import numpy as np
import re
import string
import pandas as pd
from gensim.models import Doc2Vec
from gensim.models.doc2vec import LabeledSentence
from gensim import utils
from nltk.corpus import stopwords
def textClean(text):
"""
Get rid of the non-letter and non-number characters
"""
text = re.sub(r"[^A-Za-z0-9^,!.\/'+-=]", " ", text)
text = text.lower().split()
stops = set(stopwords.words("english"))
text = [w for w in text if not w in stops]
text = " ".join(text)
return (text)
def cleanup(text):
text = textClean(text)
text = text.translate(str.maketrans("", "", string.punctuation))
return text
def constructLabeledSentences(data):
sentences = []
for index, row in data.iteritems():
sentences.append(LabeledSentence(utils.to_unicode(row).split(), ['Text' + '_%s' % str(index)]))
return sentences
def getEmbeddings(path,vector_dimension=300):
"""
Generate Doc2Vec training and testing data
"""
data = pd.read_csv(path)
missing_rows = []
for i in range(len(data)):
if data.loc[i, 'text'] != data.loc[i, 'text']:
missing_rows.append(i)
data = data.drop(missing_rows).reset_index().drop(['index','id'],axis=1)
for i in range(len(data)):
data.loc[i, 'text'] = cleanup(data.loc[i,'text'])
x = constructLabeledSentences(data['text'])
y = data['label'].values
text_model = Doc2Vec(min_count=1, window=5, vector_size=vector_dimension, sample=1e-4, negative=5, workers=7, epochs=10,
seed=1)
text_model.build_vocab(x)
text_model.train(x, total_examples=text_model.corpus_count, epochs=text_model.iter)
train_size = int(0.8 * len(x))
test_size = len(x) - train_size
text_train_arrays = np.zeros((train_size, vector_dimension))
text_test_arrays = np.zeros((test_size, vector_dimension))
train_labels = np.zeros(train_size)
test_labels = np.zeros(test_size)
for i in range(train_size):
text_train_arrays[i] = text_model.docvecs['Text_' + str(i)]
train_labels[i] = y[i]
j = 0
for i in range(train_size, train_size + test_size):
text_test_arrays[j] = text_model.docvecs['Text_' + str(i)]
test_labels[j] = y[i]
j = j + 1
return text_train_arrays, text_test_arrays, train_labels, test_labels
def clean_data():
"""
Generate processed string
"""
path = 'datasets/train.csv'
vector_dimension=300
data = pd.read_csv(path)
missing_rows = []
for i in range(len(data)):
if data.loc[i, 'text'] != data.loc[i, 'text']:
missing_rows.append(i)
data = data.drop(missing_rows).reset_index().drop(['index','id'],axis=1)
for i in range(len(data)):
data.loc[i, 'text'] = cleanup(data.loc[i,'text'])
data = data.sample(frac=1).reset_index(drop=True)
x = data.loc[:,'text'].values
y = data.loc[:,'label'].values
train_size = int(0.8 * len(y))
test_size = len(x) - train_size
xtr = x[:train_size]
xte = x[train_size:]
ytr = y[:train_size]
yte = y[train_size:]
np.save('xtr_shuffled.npy',xtr)
np.save('xte_shuffled.npy',xte)
np.save('ytr_shuffled.npy',ytr)
np.save('yte_shuffled.npy',yte)
我已经在jupyter笔记本中制作了一个文件__init__
,并尝试使用from .getEmbeddings import getEmbeddings
,但这没有用。它仍然给出相同的错误。
请提供其他解决方案的帮助来解决此问题。