如何智能地将一段文本分成段落?

时间:2016-05-19 10:04:41

标签: javascript jquery html css

以下小提琴允许将文本导入<textarea>并动态生成为相等的段落。是否可以在不破坏句子中间文本的情况下将文本分解为段落?我希望每个段落的长度处于或接近ChunkSize或用户调整的限制,页面上的每个段落元素都是相同的高度。

如果可以提供更新的小提琴,那将非常有帮助,因为我还不熟悉编码。

谢谢!

Fiddle

&#13;
&#13;
$(function() {
    $('select').on('change', function() {
        //Lets target the parent element, instead of P. P will inherit it's font size (css)
        var targets = $('#content'),
            property = this.dataset.property;
        targets.css(property, this.value);
        sameheight('#content p');
    }).prop('selectedIndex', 0);
});
var btn = document.getElementById('go'),
    textarea = document.getElementById('textarea1'),
    content = document.getElementById('content');
chunkSize = 100;
btn.addEventListener('click', initialDistribute);
content.addEventListener('keyup', handleKey);
content.addEventListener('paste', handlePaste);

function initialDistribute() {
    custom = parseInt(document.getElementById("custom").value);
    chunkSize = (custom > 0) ? custom : chunkSize;
    var text = textarea.value;
    while (content.hasChildNodes()) {
        content.removeChild(content.lastChild);
    }
    rearrange(text);
}

function rearrange(text) {
    var chunks = splitText(text, false);
    chunks.forEach(function(str, idx) {
        para = document.createElement('P');
        para.classList.add("Paragraph_CSS");
        para.setAttribute('contenteditable', true);
        para.textContent = str;
        content.appendChild(para);
    });
    sameheight('#content p');
}

function handleKey(e) {
    var para = e.target,
        position,
        key, fragment, overflow, remainingText;
    key = e.which || e.keyCode || 0;
    if (para.tagName != 'P') {
        return;
    }
    if (key != 13 && key != 8) {
        redistributeAuto(para);
        return;
    }
    position = window.getSelection().getRangeAt(0).startOffset;
    if (key == 13) {
        fragment = para.lastChild;
        overflow = fragment.textContent;
        fragment.parentNode.removeChild(fragment);
        remainingText = overflow + removeSiblings(para, false);
        rearrange(remainingText);
    }
    if (key == 8 && para.previousElementSibling && position == 0) {
        fragment = para.previousElementSibling;
        remainingText = removeSiblings(fragment, true);
        rearrange(remainingText);
    }
}

function handlePaste(e) {
    if (e.target.tagName != 'P') {
        return;
    }
    overflow = e.target.textContent + removeSiblings(fragment, true);
    rearrange(remainingText);
}

function redistributeAuto(para) {
    var text = para.textContent,
        fullText;
    if (text.length > chunkSize) {
        fullText = removeSiblings(para, true);
    }
    rearrange(fullText);
}

function removeSiblings(elem, includeCurrent) {
    var text = '',
        next;
    if (includeCurrent && !elem.previousElementSibling) {
        parent = elem.parentNode;
        text = parent.textContent;
        while (parent.hasChildNodes()) {
            parent.removeChild(parent.lastChild);
        }
    } else {
        elem = includeCurrent ? elem.previousElementSibling : elem;
        while (next = elem.nextSibling) {
            text += next.textContent;
            elem.parentNode.removeChild(next);
        }
    }
    return text;
}

function splitText(text, useRegex) {
    var chunks = [],
        i, textSize, boundary = 0;
    if (useRegex) {
        var regex = new RegExp('.{1,' + chunkSize + '}\\b', 'g');
        chunks = text.match(regex) || [];
    } else {
        for (i = 0, textSize = text.length; i < textSize; i = boundary) {
            boundary = i + chunkSize;
            if (boundary <= textSize && text.charAt(boundary) == ' ') {
                chunks.push(text.substring(i, boundary));
            } else {
                while (boundary <= textSize && text.charAt(boundary) != ' ') {
                    boundary++;
                }
                chunks.push(text.substring(i, boundary));
            }
        }
    }
    return chunks;
}
&#13;
#text_land {
  border: 1px solid #ccc;
  padding: 25px;
  margin-bottom: 30px;
}

textarea {
  width: 95%;
}

label {
  display: block;
  width: 50%;
  clear: both;
  margin: 0 0 .5em;
}

label select {
  width: 50%;
  float: right;
}

* {
  box-sizing: border-box;
  padding: 0;
  margin: 0;
}

body {
  font-family: monospace;
  font-size: 1em;
}

h3 {
  margin: 1.2em 0;
}

div {
  margin: 1.2em;
}

textarea {
  width: 100%;
}

button {
  padding: .5em;
}

p {
  /*Here the sliles for OTHER paragraphs*/
}

#content p {
  font-size: inherit;
  /*So it gets the font size set on the #content div*/
  padding: 1.2em .5em;
  margin: 1.4em 0;
  border: 1px dashed #aaa;
  overflow: hidden;
}
&#13;
<script src="https://ajax.googleapis.com/ajax/libs/jquery/2.1.1/jquery.min.js"></script>
<div>
  <h3>Import Text below, then press the button</h3>
  <textarea id="textarea1" placeholder="Type text here, then press the button below." rows="5">
  </textarea>
  <input style="width:200px;" id="custom" placeholder="Custom Characters per box">

  <br>

  <button style="width:200px;" id="go">Divide Text into Paragraphs</button>
</div>
<div>
  <h3 align="right">Divided Text Will Appear Below:</h3>
  <hr>
  <div id="content"></div>
</div>
&#13;
&#13;
&#13;

1 个答案:

答案 0 :(得分:1)

您可以采用将文本拆分为句子的方法,然后在段落中添加句子,直到达到所需的长度(代码中为chunkSize)。

function splitText (text) {
    var paragraph     = "",
        paragraphs    = [],
        sentenceRegex = /[^\.!\?]+([\.!\?]+|\s*$)/g,
        sentences     = text.match(sentenceRegex);

    sentences.forEach(function createParagraphs (sentence, index) {
        paragraph += sentence;

        if (paragraph.length >= chunkSize || index === sentences.length - 1) {
            paragraphs.push(paragraph);
            paragraph = "";
        }
    });

    return paragraphs.length === 0 ? [text] : paragraphs;
}

https://jsfiddle.net/DirectCtrl/95kuyw4g/4/(试图将其余代码保持尽可能类似)。

这并不处理边距(意味着如果你的句子靠近边界或远远超出边界限制,你可能会获得更长的段落),尽管​​这些问题很可能无论如何都会出现在边缘情况下(例如,chunkSize为100个字符,当第一个句子是40个字符而第二个是160个字符时,你会怎么做?)。然而,如果这是一个要求,调整这个以使用保证金应该是非常微不足道的。随着每个段落的字符数增加,这将成为一个问题。