Question

机器学习的新手，请多多包涵。

我正在尝试训练卷积神经网络，以将烂番茄评论分类为正面还是负面。原始数据帧本身并不太复杂。我添加了一个列，指示该行是否是培训，验证或测试拆分的一部分，并清除了文本，但除此之外，它看起来或多或少类似于以下内容：

class Vocabulary(object):
    def __init__(self, token_to_idx=None, add_unk=True, unk_token="<UNK>"):
        if token_to_idx is None:
            token_to_idx = {}
        self.token_to_idx_ = token_to_idx
        self.idx_to_token_ = {
            idx: token
            for token, idx in self.token_to_idx_.items()
        }
        self.add_unk_ = add_unk
        self.unk_token_ = unk_token
        self.unk_index = -1
        if self.add_unk_:
            self.unk_index_ = self.add_token(unk_token)

    def to_serializable(self):
        return {
            "token_to_idx": self.token_to_idx_,
            "add_unk": self.add_unk_,
            "unk_token": self.unk_token_
        }

    @classmethod
    def from_serializable(cls, contents):
        return cls(**contents)

    def add_token(self, token):
        if token in self.token_to_idx_:
            index = self.token_to_idx_[token]
        else:
            index = len(self.token_to_idx_)
            self.token_to_idx_[token] = index
            self.idx_to_token_[index] = token
        return index

    def lookup_token(self, token):
        if self.add_unk_:
            return self.token_to_idx_.get(token, self.unk_index)
        else:
            return self.token_to_idx_[token]

    def lookup_index(self, index):
        if index not in self.idx_to_token_:
            raise KeyError("index (%d) is not in the Vocabulary" % index)
        return self.idx_to_token_[index]

    def __str__(self):
        return "<Vocabulary(size=%d)>" % len(self)

    def __len__(self):
        return len(self.token_to_idx_)

我很难理解的是如何确定输入到我的分类器中的输入的正确尺寸。相关代码如下。

类定义：

class Vectorizer(object):
    def __init__(self, review_vocab, rating_vocab):
        self.review_vocab = review_vocab
        self.rating_vocab = rating_vocab

    def vectorize(self, review):
        # create collapsed one-hot vector for new observations
        one_hot = np.zeros(len(self.review_vocab), dtype=np.float64)

        for token in review.split(" "):
            if token not in string.punctuation:
                one_hot[self.review_vocab.lookup_token(token)] = 1

        return one_hot

    @classmethod
    def from_dataframe(cls, review_df, cutoff=10):
        review_vocab = Vocabulary(add_unk=True)
        rating_vocab = Vocabulary(add_unk=False)

        for rating in sorted(set(review_df["freshness"])):
            rating_vocab.add_token(rating)

        word_counts = Counter()
        for review in review_df["review"]:
            for word in review.split(" "):
                if word not in string.punctuation:
                    word_counts[word] += 1

        for word, count in word_counts.items():
            if count > cutoff:
                review_vocab.add_token(word)

        return cls(review_vocab, rating_vocab)

    @classmethod
    def from_serializable(cls, contents):
        review_vocab = Vocabulary.from_serializable(contents["review_vocab"])
        rating_vocab = Vocabulary.from_serializable(contents["rating_vocab"])

        return cls(review_vocab=review_vocab, rating_vocab=rating_vocab)

    def to_serializable(self):
        return {
            "review_vocab": self.review_vocab.to_serializable(),
            "rating_vocab": self.rating_vocab.to_serializable()
        }

class Classifier(nn.Module):
    def __init__(self, initial_n_channels, n_classes, network_n_channels):
        super(Classifier, self).__init__()
        self.network = nn.Sequential(
            nn.Conv1d(in_channels=initial_n_channels,
                      out_channels=network_n_channels,
                      kernel_size=args["kernel_size"]),
            nn.ReLU(),
            nn.Conv1d(in_channels=network_n_channels,
                      out_channels=network_n_channels,
                      kernel_size=args["kernel_size"],
                      stride=args["stride"]),
            nn.ReLU(),
            nn.Conv1d(in_channels=network_n_channels,
                      out_channels=network_n_channels,
                      kernel_size=args["kernel_size"],
                      stride=args["stride"]),
            nn.ReLU(),
            nn.Conv1d(in_channels=network_n_channels,
                      out_channels=network_n_channels,
                      kernel_size=args["kernel_size"],
                      stride=args["stride"]),
            nn.ReLU()
        )
        self.fc = nn.Linear(network_n_channels, n_classes)

    def forward(self, x_in, apply_sigmoid=False):
        features = self.network(x_in) # squeeze?
        prediction_vector = self.linear(features)
        if apply_sigmoid:
            prediction_vector = F.sigmoid(prediction_vector, dim=1)
        return prediction_vector

# dataset and vectorizer
dataset = ReviewDataset.load_and_vectorize(args["review_csv"])
vectorizer = dataset.get_vectorizer()

# model
classifier = Classifier(initial_n_channels=len(vectorizer.review_vocab),
                        n_classes=len(vectorizer.rating_vocab),
                        network_n_channels=args["num_channels"])

# loss and optimizer
loss_func = nn.BCEWithLogitsLoss()
optimizer = optim.Adam(classifier.parameters(), lr=args["learning_rate"])

RuntimeError: Expected 3-dimensional input for 3-dimensional weight 128 7882, but got 2-dimensional input of size [128, 7882] instead

实例：

(batch_size, len(review_vocab))

运行以上代码的结果：

react-intl

我认为采用Intl.PluralRules形状的二维张量会很好，但是显然我的CNN需要数据的第三维？我该怎么做才能增加额外的维度，或更改分类器本身的要求？

使分类器输入要求与张量尺寸一致

0 个答案: