我正在排序由文本和数字组成的字符串。 我希望排序将数字部分排序为数字,而不是字母数字。
例如我想:abc1def,...,abc9def,abc10def
而不是:abc10def,abc1def,...,abc9def
有没有人知道这种算法(特别是在c ++中)
由于
答案 0 :(得分:17)
我问this exact question (although in Java)并指出http://www.davekoelle.com/alphanum.html,其中有许多语言的算法和实现。
答案 1 :(得分:6)
这被称为自然分类。有一个看起来很有希望的算法here。
请注意非ASCII字符的问题(请参阅主题上的Jeff's blog entry)。
答案 2 :(得分:5)
可以使用C ++的几种自然排序实现。简要回顾:
natural_sort<>
- 基于Boost.Regex。
alnum.hpp
,基于Dave Koelle的alphanum algorithm
natsort
- 用C语言编写,但很容易从C ++中使用。
答案 3 :(得分:1)
部分重新发布my another answer:
bool compareNat(const std::string& a, const std::string& b){
if (a.empty())
return true;
if (b.empty())
return false;
if (std::isdigit(a[0]) && !std::isdigit(b[0]))
return true;
if (!std::isdigit(a[0]) && std::isdigit(b[0]))
return false;
if (!std::isdigit(a[0]) && !std::isdigit(b[0]))
{
if (a[0] == b[0])
return compareNat(a.substr(1), b.substr(1));
return (toUpper(a) < toUpper(b));
//toUpper() is a function to convert a std::string to uppercase.
}
// Both strings begin with digit --> parse both numbers
std::istringstream issa(a);
std::istringstream issb(b);
int ia, ib;
issa >> ia;
issb >> ib;
if (ia != ib)
return ia < ib;
// Numbers are the same --> remove numbers and recurse
std::string anew, bnew;
std::getline(issa, anew);
std::getline(issb, bnew);
return (compareNat(anew, bnew));
}
toUpper()
功能:
std::string toUpper(std::string s){
for(int i=0;i<(int)s.length();i++){s[i]=toupper(s[i]);}
return s;
}
用法:
std::vector<std::string> str;
str.push_back("abc1def");
str.push_back("abc10def");
...
std::sort(str.begin(), str.end(), compareNat);
答案 4 :(得分:0)
要解决本质上是解析问题的问题,可以使用状态机(又名finite state automaton)。不满意上面的解决方案,我写了一个简单的一次通过早期纾困算法,该算法在性能方面优于上面提出的C / C ++变体,不会受到数值数据类型溢出错误的影响,并且很容易修改以在需要时添加不区分大小写
可以找到来源here
答案 5 :(得分:0)
对于那些到达这里并已经在其项目中使用Qt的人,可以使用QCollator
类。有关详细信息,请参见this question。
答案 6 :(得分:0)
Avalanchesort是自然排序的递归变体,合并运行,同时探索排序数据的堆栈。即使算法在运行/排序时,即使将数据添加到排序堆中,算法也会稳定排序。
搜索原理很简单。 仅具有相同等级的合并运行。
找到前两个Naturell游程(等级0)后,avalanchesort将其合并为等级1的游程。然后调用avalanchesort,生成等级1的第二个游程,并将两个游程合并为等级2的游程。然后,它调用avalancheSort在未排序的数据上生成等级为2的运行...。
我的实现porthd/avalanchesort使用接口注入将排序与数据处理分开。您可以将algorithm用于数组,关联数组或列表之类的数据结构。
/**
* @param DataListAvalancheSortInterface $dataList
* @param DataRangeInterface $beginRange
* @param int $avalancheIndex
* @return bool
*/
public function startAvalancheSort(DataListAvalancheSortInterface $dataList)
{
$avalancheIndex = 0;
$rangeResult = $this->avalancheSort($dataList, $dataList->getFirstIdent(), $avalancheIndex);
if (!$dataList->isLastIdent($rangeResult->getStop())) {
do {
$avalancheIndex++;
$lastIdent = $rangeResult->getStop();
if ($dataList->isLastIdent($lastIdent)) {
$rangeResult = new $this->rangeClass();
$rangeResult->setStart($dataList->getFirstIdent());
$rangeResult->setStop($dataList->getLastIdent());
break;
}
$nextIdent = $dataList->getNextIdent($lastIdent);
$rangeFollow = $this->avalancheSort($dataList, $nextIdent, $avalancheIndex);
$rangeResult = $this->mergeAvalanche($dataList, $rangeResult, $rangeFollow);
} while (true);
}
return $rangeResult;
}
/**
* @param DataListAvalancheSortInterface $dataList
* @param DataRangeInterface $range
* @return DataRangeInterface
*/
protected function findRun(DataListAvalancheSortInterface $dataList,
$startIdent)
{
$result = new $this->rangeClass();
$result->setStart($startIdent);
$result->setStop($startIdent);
do {
if ($dataList->isLastIdent($result->getStop())) {
break;
}
$nextIdent = $dataList->getNextIdent($result->getStop());
if ($dataList->oddLowerEqualThanEven(
$dataList->getDataItem($result->getStop()),
$dataList->getDataItem($nextIdent)
)) {
$result->setStop($nextIdent);
} else {
break;
}
} while (true);
return $result;
}
/**
* @param DataListAvalancheSortInterface $dataList
* @param $beginIdent
* @param int $avalancheIndex
* @return DataRangeInterface|mixed
*/
protected function avalancheSort(DataListAvalancheSortInterface $dataList,
$beginIdent,
int $avalancheIndex = 0)
{
if ($avalancheIndex === 0) {
$rangeFirst = $this->findRun($dataList, $beginIdent);
if ($dataList->isLastIdent($rangeFirst->getStop())) {
// it is the last run
$rangeResult = $rangeFirst;
} else {
$nextIdent = $dataList->getNextIdent($rangeFirst->getStop());
$rangeSecond = $this->findRun($dataList, $nextIdent);
$rangeResult = $this->mergeAvalanche($dataList, $rangeFirst, $rangeSecond);
}
} else {
$rangeFirst = $this->avalancheSort($dataList,
$beginIdent,
($avalancheIndex - 1)
);
if ($dataList->isLastIdent($rangeFirst->getStop())) {
$rangeResult = $rangeFirst;
} else {
$nextIdent = $dataList->getNextIdent($rangeFirst->getStop());
$rangeSecond = $this->avalancheSort($dataList,
$nextIdent,
($avalancheIndex - 1)
);
$rangeResult = $this->mergeAvalanche($dataList, $rangeFirst, $rangeSecond);
}
}
return $rangeResult;
}
protected function mergeAvalanche(DataListAvalancheSortInterface $dataList, $oddListRange, $evenListRange)
{
$resultRange = new $this->rangeClass();
$oddNextIdent = $oddListRange->getStart();
$oddStopIdent = $oddListRange->getStop();
$evenNextIdent = $evenListRange->getStart();
$evenStopIdent = $evenListRange->getStop();
$dataList->initNewListPart($oddListRange, $evenListRange);
do {
if ($dataList->oddLowerEqualThanEven(
$dataList->getDataItem($oddNextIdent),
$dataList->getDataItem($evenNextIdent)
)) {
$dataList->addListPart($oddNextIdent);
if ($oddNextIdent === $oddStopIdent) {
$restTail = $evenNextIdent;
$stopTail = $evenStopIdent;
break;
}
$oddNextIdent = $dataList->getNextIdent($oddNextIdent);
} else {
$dataList->addListPart($evenNextIdent);
if ($evenNextIdent === $evenStopIdent) {
$restTail = $oddNextIdent;
$stopTail = $oddStopIdent;
break;
}
$evenNextIdent = $dataList->getNextIdent($evenNextIdent);
}
} while (true);
while ($stopTail !== $restTail) {
$dataList->addListPart($restTail);
$restTail = $dataList->getNextIdent($restTail);
}
$dataList->addListPart($restTail);
$dataList->cascadeDataListChange($resultRange);
return $resultRange;
}
}
答案 7 :(得分:0)
我的算法与java版本的测试代码。如果你想在你的项目中使用它,你可以自己定义一个比较器。
import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Comparator;
import java.util.List;
import java.util.function.Consumer;
public class FileNameSortTest {
private static List<String> names = Arrays.asList(
"A__01__02",
"A__2__02",
"A__1__23",
"A__11__23",
"A__3++++",
"B__1__02",
"B__22_13",
"1_22_2222",
"12_222_222",
"2222222222",
"1.sadasdsadsa",
"11.asdasdasdasdasd",
"2.sadsadasdsad",
"22.sadasdasdsadsa",
"3.asdasdsadsadsa",
"adsadsadsasd1",
"adsadsadsasd10",
"adsadsadsasd3",
"adsadsadsasd02"
);
public static void main(String...args) {
List<File> files = new ArrayList<>();
names.forEach(s -> {
File f = new File(s);
try {
if (!f.exists()) {
f.createNewFile();
}
files.add(f);
} catch (IOException e) {
e.printStackTrace();
}
});
files.sort(Comparator.comparing(File::getName));
files.forEach(f -> System.out.print(f.getName() + " "));
System.out.println();
files.sort(new Comparator<File>() {
boolean caseSensitive = false;
int SPAN_OF_CASES = 'a' - 'A';
@Override
public int compare(File left, File right) {
char[] csLeft = left.getName().toCharArray(), csRight = right.getName().toCharArray();
boolean isNumberRegion = false;
int diff=0, i=0, j=0, lenLeft=csLeft.length, lenRight=csRight.length;
char cLeft = 0, cRight = 0;
for (; i<lenLeft && j<lenRight; i++, j++) {
cLeft = getCharByCaseSensitive(csLeft[i]);
cRight = getCharByCaseSensitive(csRight[j]);
boolean isNumericLeft = isNumeric(cLeft), isNumericRight = isNumeric(cRight);
if (isNumericLeft && isNumericRight) {
// Number start!
if (!isNumberRegion) {
isNumberRegion = true;
// Remove prefix '0'
while (i < lenLeft && cLeft == '0') i++;
while (j < lenRight && cRight == '0') j++;
if (i == lenLeft || j == lenRight) break;
}
// Diff start: calculate the diff value.
if (cLeft != cRight && diff == 0)
diff = cLeft - cRight;
} else {
if (isNumericLeft != isNumericRight) {
// One numeric and one char.
if (isNumberRegion)
return isNumericLeft ? 1 : -1;
return cLeft - cRight;
} else {
// Two chars: if (number) diff don't equal 0 return it.
if (diff != 0)
return diff;
// Calculate chars diff.
diff = cLeft - cRight;
if (diff != 0)
return diff;
// Reset!
isNumberRegion = false;
diff = 0;
}
}
}
// The longer one will be put backwards.
return (i == lenLeft && j == lenRight) ? cLeft - cRight : (i == lenLeft ? -1 : 1) ;
}
private boolean isNumeric(char c) {
return c >= '0' && c <= '9';
}
private char getCharByCaseSensitive(char c) {
return caseSensitive ? c : (c >= 'A' && c <= 'Z' ? (char) (c + SPAN_OF_CASES) : c);
}
});
files.forEach(f -> System.out.print(f.getName() + " "));
}
}
输出是,
1.sadasdsadsa 11.asdasdasdasdasd 12_222_222 1_22_2222 2.sadsadasdsad 22.sadasdasdsadsa 2222222222 3.asdasdsadsadsa A__01__02 A__11__23 A__1__23 A__2__02 A__3++++ B__1__02 B__22_13 adsadsadsasd02 adsadsadsasd1 adsadsadsasd10 adsadsadsasd3
1.sadasdsadsa 1_22_2222 2.sadsadasdsad 3.asdasdsadsadsa 11.asdasdasdasdasd 12_222_222 22.sadasdasdsadsa 2222222222 A__01__02 A__1__23 A__2__02 A__3++++ A__11__23 adsadsadsasd02 adsadsadsasd1 adsadsadsasd3 adsadsadsasd10 B__1__02 B__22_13
Process finished with exit code 0
答案 8 :(得分:-1)
// -1: s0 < s1; 0: s0 == s1; 1: s0 > s1
static int numericCompare(const string &s0, const string &s1) {
size_t i = 0, j = 0;
for (; i < s0.size() && j < s1.size();) {
string t0(1, s0[i++]);
while (i < s0.size() && !(isdigit(t0[0]) ^ isdigit(s0[i]))) {
t0.push_back(s0[i++]);
}
string t1(1, s1[j++]);
while (j < s1.size() && !(isdigit(t1[0]) ^ isdigit(s1[j]))) {
t1.push_back(s1[j++]);
}
if (isdigit(t0[0]) && isdigit(t1[0])) {
size_t p0 = t0.find_first_not_of('0');
size_t p1 = t1.find_first_not_of('0');
t0 = p0 == string::npos ? "" : t0.substr(p0);
t1 = p1 == string::npos ? "" : t1.substr(p1);
if (t0.size() != t1.size()) {
return t0.size() < t1.size() ? -1 : 1;
}
}
if (t0 != t1) {
return t0 < t1 ? -1 : 1;
}
}
return i == s0.size() && j == s1.size() ? 0 : i != s0.size() ? 1 : -1;
}
我不太确定你是不是想要,不管怎样,你可以尝试一下: - )