我有一个字符串,我想将其拆分为一个列表,以便列表中的每个元素都包含N个单词。 如果最后一个元素的单词数不够,请保留原样。
例如:
>>> Split_by_N_words(string="I grow a turtle. It's a very slow but cute animal.", N=4)
["I grow a turtle.", "It's a very slow", "but cute animal."]
我试图这样做:
def Split_by_N_words(string, N):
word_list = [""]
for a in string.split()[:N]:
word_list[0] += a + " "
.....
但是我不知道如何进行其他操作
答案 0 :(得分:0)
尝试一下:
def Split_by_N_words(string, N):
word_list = string.split()
out_list = []
for el in range(0, len(word_list), N):
out_list.append((" ".join(word_list[el:el+N])))
return (out_list)
print (Split_by_N_words("I grow a turtle. It's a very slow but cute animal.", 4))
答案 1 :(得分:0)
class Trainer(object):
def __init__(self, args, model, device, data_points, is_test=False, train_stats=None):
self.args = args
self.model = model
self.device = device
self.loss = nn.CrossEntropyLoss(reduction='none')
if is_test:
# Should load the model from checkpoint
self.model.eval()
self.model.load_state_dict(torch.load(args.saved_model))
logger.info('Loaded saved model from %s' % args.saved_model)
else:
self.model.train()
self.optim = AdamW(model.parameters(), lr=2e-5, eps=1e-8)
total_steps = data_points * self.args.epochs
self.scheduler = get_linear_schedule_with_warmup(self.optim, num_warmup_steps=0,
num_training_steps=total_steps)
def step(self, batch):
batch = tuple(t.to(self.device) for t in batch)
batch_input_ids, batch_input_masks, batch_labels = batch
self.model.zero_grad()
outputs = self.model(batch_input_ids,
attention_mask=batch_input_masks,
labels=batch_labels)
loss = self.loss(outputs, batch_labels)
loss = loss.sum()
(loss / loss.numel()).backward()
torch.nn.utils.clip_grad_norm_(self.model.parameters(), 1.0)
self.optim.step()
self.scheduler.step()
return loss
def validate(self, batch):
batch = tuple(t.to(self.device) for t in batch)
batch_input_ids, batch_input_masks, batch_labels = batch
with torch.no_grad():
model_output = self.model(batch_input_ids,
attention_mask=batch_input_masks,
labels=batch_labels)
predicted_label_ids = self._predict(model_output)
label_ids = batch_labels.to('cpu').numpy()
loss = self.loss(model_output, batch_labels)
loss = loss.sum()
return label_ids, predicted_label_ids, loss
def _predict(self, logits):
return np.argmax(logits.to('cpu').numpy(), axis=1)