我有一个class DataLoader():
def __init__(self,data,train_ratio):
import operator
self.all_sents = [sent for sent in data.sents()]
num_ex = len(self.all_sents)
num_train = int(train_ratio * num_ex)
# processing vocab, adding special tokens to vocabulary
self.full_vocab = [word.lower() for word in set(data.words())]
self.vocab_count = {word:0 for word in self.full_vocab}
for word in data.words():
self.vocab_count[word.lower()] +=1
self.sorted_count = sorted(self.vocab_count.items(),key=operator.itemgetter(1),reverse=True)
self.refined_vocab = [word for (word,count) in self.sorted_count if count > 50]
self.refined_vocab.append('<ukn>')
self.refined_vocab.append('<s>')
self.refined_vocab.append('</s>')
# process sentences, replace unknown characters with <UKN>, add <S> and </S> to start/stop of sentences as appropriate
def process_sent(sent):
sent = [word.lower() for word in sent]
sent.insert(0,'<s>')
sent.append('</s>')
new_sent = [word if word in self.refined_vocab else '<ukn>' for word in sent]
return new_sent
self.sents = [process_sent(sent) for sent in self.all_sents]
# create training and test sets as necessary
self.train = [word.lower() for sent in self.sents[:num_train] for word in sent]
self.test = [word.lower() for sent in self.sents[num_train:] for word in sent]
return None
class Model(DataLoader):
def __init__(self,data,train_ratio,n,lambda_):
self.n = n
self.lambda_ = lambda_
super().__init__(data,train_ratio)
self.root = {word:0 for word in self.refined_vocab}
def compute_unigram(self):
self.unigrams = {word:0 for word in self.refined_vocab}
for i in self.train:
self.unigrams[i] +=1
word_count = 0
for w1,count in self.unigrams.items():
word_count += count
for w1, count in self.unigrams.items():
self.unigrams[w1] = count/word_count
return None
def compute_bigram(self):
import copy
self.unigrams = self.compute_unigram()
self.bigrams = {word:copy.deepcopy(self.root) for word in self.refined_vocab}
for idx, i in enumerate(self.train):
try:
pair = (i, self.train[idx+1])
self.bigrams[pair[0]][pair[1]] +=1
except:
break
for w1,w2_dict in self.bigrams.items():
word_count = 0
for w2, count in w2_dict.items():
word_count += count
for w2, count in w2_dict.items():
total_mass = self.lambda_ * len(self.bigrams[w1].values())
self.bigrams[w1][w2] = count/word_count + (total_mass * self.unigrams[w2])
return None
类,看起来像这样
Foo
我想在gtest框架中编写此类的测试装置。在我的装置中,我有两个struct Foo {
// Constructors
Foo() = default;
Foo (const int& a) : a(a) {}
// Methods
int getA() {return a;}
// Data member(s)
int a;
};
类型的对象,其中一个是Foo
,另一个不是。像这样的灯具
const
不不起作用,因为在class FooTest : public ::testing:Test{
protected:
void SetUp() override {
objectFoo = Foo(1);
objectFooConst = Foo(10);
}
Foo objectFoo;
const Foo objectFooConst;
};
方法中执行objectFooConst
之前,Foo
是由objectFooConst = Foo(10)
的默认构造函数构造的。为了使其工作,我为测试夹具添加了一个构造函数,并让该构造函数负责SetUp
。换句话说,我做到了
objectFooConst
我想知道这是否是完成此任务的正确/标准方法