我想创建一种算法,将文本划分为3个大小均匀的组(基于文本长度)。由于这将用于换行符,因此需要保持文本的顺序。
例如这个字符串:
Just testing to see how this works.
将排序:
Just testing // 12 characters
to see how // 10 characters
this works. // 11 characters
有什么想法吗?
答案 0 :(得分:2)
“最小粗糙”动态程序,也来自维基百科关于自动换行的文章,可以根据您的需要进行调整。设置LineWidth = len(text)/n - 1
并忽略关于超出线宽的无限惩罚的注释;使用c(i, j)
的定义与P = 2
一样。
Code. I took the liberty of modifying the DP always to return exactly n lines, at the cost of increasing the running time from O(#words ** 2) to O(#words ** 2 * n).
def minragged(text, n=3):
"""
>>> minragged('Just testing to see how this works.')
['Just testing', 'to see how', 'this works.']
>>> minragged('Just testing to see how this works.', 10)
['', '', 'Just', 'testing', 'to', 'see', 'how', 'this', 'works.', '']
"""
words = text.split()
cumwordwidth = [0]
# cumwordwidth[-1] is the last element
for word in words:
cumwordwidth.append(cumwordwidth[-1] + len(word))
totalwidth = cumwordwidth[-1] + len(words) - 1 # len(words) - 1 spaces
linewidth = float(totalwidth - (n - 1)) / float(n) # n - 1 line breaks
def cost(i, j):
"""
cost of a line words[i], ..., words[j - 1] (words[i:j])
"""
actuallinewidth = max(j - i - 1, 0) + (cumwordwidth[j] - cumwordwidth[i])
return (linewidth - float(actuallinewidth)) ** 2
# best[l][k][0] is the min total cost for words 0, ..., k - 1 on l lines
# best[l][k][1] is a minimizing index for the start of the last line
best = [[(0.0, None)] + [(float('inf'), None)] * len(words)]
# xrange(upper) is the interval 0, 1, ..., upper - 1
for l in xrange(1, n + 1):
best.append([])
for j in xrange(len(words) + 1):
best[l].append(min((best[l - 1][k][0] + cost(k, j), k) for k in xrange(j + 1)))
lines = []
b = len(words)
# xrange(upper, 0, -1) is the interval upper, upper - 1, ..., 1
for l in xrange(n, 0, -1):
a = best[l][b][1]
lines.append(' '.join(words[a:b]))
b = a
lines.reverse()
return lines
if __name__ == '__main__':
import doctest
doctest.testmod()
答案 1 :(得分:1)
你可以尝试下一个简单的启发式启动器:放置在n / 3和2n / 3中的迭代器,并搜索每个启动器附近的最近空间。
答案 2 :(得分:0)
来自http://en.wikipedia.org/wiki/Word_wrap:
SpaceLeft := LineWidth
for each Word in Text
if Width(Word) > SpaceLeft
insert line break before Word in Text
SpaceLeft := LineWidth - Width(Word)
else
SpaceLeft := SpaceLeft - (Width(Word) + SpaceWidth)
许多现代文字处理程序使用此方法,例如OpenOffice.org Writer和Microsoft Word。该算法是最佳的,因为它总是将文本放在最小行数上。
答案 3 :(得分:0)
“某人”的回答很好。但是,我在将其转换为SWIFT代码时遇到了问题。这是我对所有感兴趣的人的翻译。
import Foundation
class SplitText{
typealias MinRag = (Float, Int) // meaning (cost for line (so far), word index)
// from http://stackoverflow.com/questions/6426017/word-wrap-to-x-lines-instead-of-maximum-width-least-raggedness?lq=1
class func splitText(text:String, numberOfLines:Int)-> [String]{
//preparations
var words = split(text, maxSplit:100, allowEmptySlices: false, isSeparator:{(s:Character)-> Bool in return s == " " || s == "\n"})
var cumwordwidth = [Int](); //cummulative word widths
cumwordwidth.append(0);
for word in words{
cumwordwidth.append(cumwordwidth[cumwordwidth.count - 1] + count(word));
}
var totalwidth = cumwordwidth[cumwordwidth.count - 1] + count(words) - 1;
var linewidth:Float = Float(totalwidth - (numberOfLines - 1)) / Float(numberOfLines)
// cost function for one line for words i .. j
var cost = { (i:Int,j:Int)-> Float in
var actuallinewidth = max(j - i - 1, 0) + (cumwordwidth[j] - cumwordwidth[i]);
var remainingWidth: Float = linewidth - Float(actuallinewidth)
return remainingWidth * remainingWidth
}
var best = [[MinRag]]()
var tmp = [MinRag]();
//ensure that data structure is initialised in a way that we start with adding the first word
tmp.append((0, -1));
for word in words {
tmp.append((Float.infinity , -1));
}
best.append(tmp);
//now we can start. We simply calculate the cost for all possible lines
for l in 1...numberOfLines {
tmp = [MinRag]()
for j in 0...words.count {
var min:MinRag = (best[l - 1][0].0 + cost(0, j), 0);
var k: Int
for k = 0; k < j + 1 ; ++k {
var loc:Float = best[l - 1][k].0 + cost(k, j);
if (loc < min.0 || (loc == min.0 && k < min.1)) {
min=(loc, k);
}
println("l=\(l), j=\(j), k=\(k), min=\(min)")
}
tmp.append(min);
}
best.append(tmp);
}
//now build the answer based on above calculations
var lines = [String]();
var b = words.count;
var o:Int
for o = numberOfLines; o > 0 ; --o {
var a = best[o][b].1;
lines.append(" ".join(words[a...b-1]));
b = a;
}
return reverse(lines);
}
}