我是Featureunion的新手。我知道Featureunion项目的输入必须相同。在将管道与featureunion拟合并进行预测后,出现以下错误。错误如下所示。就像使用scikit-learn机器学习模型进行预测一样,有什么方法可以进行预测?
ValueError: blocks[0,:] has incompatible row dimensions. Got blocks[0,1].shape[0] == 1739, expected 6954.
数据构造函数如下。
整个脚本如下所示。 df_Xtest是X_test的数据帧,以便将其放入功能部件中。
import pandas as pd
import numpy as np
import nltk
from nltk.tokenize import word_tokenize, sent_tokenize, RegexpTokenizer
import re
nltk.download('stopwords')
nltk.download('averaged_perceptron_tagger')
from nltk.corpus import stopwords
from sklearn.feature_extraction.text import CountVectorizer, TfidfVectorizer, TfidfTransformer
import string
import spacy
from spacy.lang.en.stop_words import STOP_WORDS
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import FunctionTransformer
from sklearn.feature_extraction.text import CountVectorizer,TfidfVectorizer
from sklearn.base import TransformerMixin, BaseEstimator
from sklearn.pipeline import Pipeline, FeatureUnion
from sklearn.svm import LinearSVC, SVC
# Custom transformer using spacy
class predictors(TransformerMixin):
def transform(self, X, **transform_params):
return [clean_text(text) for text in X]
def fit(self, X, y=None, **fit_params):
return self
def get_params(self, deep=True):
return {}
# basic function to clean the text
def clean_text(text):
return text.strip().lower()
# create negation feature function
def negation_feature(audio_dataset):
return audio_dataset.loc[0:6953,['negation_feature']]
def X_y_feature(audio_dataset):
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size= 0.2, random_state=42)
return X_train
negation_feature_ft = FunctionTransformer(negation_feature, validate=False)
X_y_feature_ft = FunctionTransformer( X_y_feature, validate=False)
# Pipeline with featureunion for joining the dtm and negation features
pipe_svc = Pipeline([
("featureunion", FeatureUnion([
('Pipeline', Pipeline([
("functiontransformer", X_y_feature_ft),
("cleaner", predictors()),
("vectorizer", vectorizer)
])),
("functiontransformer", negation_feature_ft)
])),
("classifier", clf_svc)
])
# predict the X_test
pipe_union.predict(df_Xtest)