将额外的空白区域替换为一个空格的最快方法是什么? e.g。
来自 的
到 foo bar
foo bar
答案 0 :(得分:46)
最快的方式?迭代字符串并按字符构建StringBuilder
字符的第二个副本,只为每组空格复制一个空格。
更容易键入Replace
变体会产生额外字符串的存储桶负载(或浪费时间构建正则表达式DFA)。
使用比较结果进行编辑:
使用http://ideone.com/h6pw3,n = 50(必须在ideone上减少它,因为它花了这么长时间才能杀死我的进程),我得到:
正则表达:7771ms。
Stringbuilder:894ms。
确实如预期的那样,Regex
对于这么简单的事情是非常低效的。
答案 1 :(得分:39)
您可以使用正则表达式:
static readonly Regex trimmer = new Regex(@"\s\s+");
s = trimmer.Replace(s, " ");
为了提高性能,请传递RegexOptions.Compiled
。
答案 2 :(得分:23)
有点晚了,但我做了一些基准测试,以获得删除额外空格的最快方法。如果有更快的答案,我很乐意添加它们。
<强>结果:强>
<强>代码:强>
public class RemoveExtraWhitespaces
{
public static string WithRegex(string text)
{
return Regex.Replace(text, @"\s+", " ");
}
public static string WithRegexCompiled(Regex compiledRegex, string text)
{
return compiledRegex.Replace(text, " ");
}
public static string NormalizeWhiteSpace(string input)
{
if (string.IsNullOrEmpty(input))
return string.Empty;
int current = 0;
char[] output = new char[input.Length];
bool skipped = false;
foreach (char c in input.ToCharArray())
{
if (char.IsWhiteSpace(c))
{
if (!skipped)
{
if (current > 0)
output[current++] = ' ';
skipped = true;
}
}
else
{
skipped = false;
output[current++] = c;
}
}
return new string(output, 0, current);
}
public static string NormalizeWhiteSpaceForLoop(string input)
{
int len = input.Length,
index = 0,
i = 0;
var src = input.ToCharArray();
bool skip = false;
char ch;
for (; i < len; i++)
{
ch = src[i];
switch (ch)
{
case '\u0020':
case '\u00A0':
case '\u1680':
case '\u2000':
case '\u2001':
case '\u2002':
case '\u2003':
case '\u2004':
case '\u2005':
case '\u2006':
case '\u2007':
case '\u2008':
case '\u2009':
case '\u200A':
case '\u202F':
case '\u205F':
case '\u3000':
case '\u2028':
case '\u2029':
case '\u0009':
case '\u000A':
case '\u000B':
case '\u000C':
case '\u000D':
case '\u0085':
if (skip) continue;
src[index++] = ch;
skip = true;
continue;
default:
skip = false;
src[index++] = ch;
continue;
}
}
return new string(src, 0, index);
}
}
<强>试验:强>
[TestFixture]
public class RemoveExtraWhitespacesTest
{
private const string _text = "foo bar foobar moo foo bar foobar moo foo bar foobar moo foo bar foobar moo foo bar foobar moo foo bar foobar moo foo bar foobar moo foo bar foobar moo foo bar foobar moo foo bar foobar moo foo bar foobar moo foo bar foobar moo foo bar foobar moo foo bar foobar moo foo bar foobar moo foo bar foobar moo foo bar foobar moo foo bar foobar moo foo bar foobar moo foo bar foobar moo foo bar foobar moo foo bar foobar moo foo bar foobar moo foo bar foobar moo foo bar foobar moo foo bar foobar moo foo bar foobar moo foo bar foobar moo foo bar foobar moo foo bar foobar moo foo bar foobar moo foo bar foobar moo foo bar foobar moo foo bar foobar moo foo bar foobar moo foo bar foobar moo foo bar foobar moo foo bar foobar moo foo bar foobar moo foo bar foobar moo foo bar foobar moo foo bar foobar moo foo bar foobar moo foo bar foobar moo foo bar foobar moo foo bar foobar moo foo bar foobar moo foo bar foobar moo foo bar foobar moo foo bar foobar moo foo bar foobar moo foo bar foobar moo foo bar foobar moo foo bar foobar moo foo bar foobar moo foo bar foobar moo foo bar foobar moo foo bar foobar moo foo bar foobar moo foo bar foobar moo ";
private const string _expected = "foo bar foobar moo foo bar foobar moo foo bar foobar moo foo bar foobar moo foo bar foobar moo foo bar foobar moo foo bar foobar moo foo bar foobar moo foo bar foobar moo foo bar foobar moo foo bar foobar moo foo bar foobar moo foo bar foobar moo foo bar foobar moo foo bar foobar moo foo bar foobar moo foo bar foobar moo foo bar foobar moo foo bar foobar moo foo bar foobar moo foo bar foobar moo foo bar foobar moo foo bar foobar moo foo bar foobar moo foo bar foobar moo foo bar foobar moo foo bar foobar moo foo bar foobar moo foo bar foobar moo foo bar foobar moo foo bar foobar moo foo bar foobar moo foo bar foobar moo foo bar foobar moo foo bar foobar moo foo bar foobar moo foo bar foobar moo foo bar foobar moo foo bar foobar moo foo bar foobar moo foo bar foobar moo foo bar foobar moo foo bar foobar moo foo bar foobar moo foo bar foobar moo foo bar foobar moo foo bar foobar moo foo bar foobar moo foo bar foobar moo foo bar foobar moo foo bar foobar moo foo bar foobar moo foo bar foobar moo foo bar foobar moo foo bar foobar moo foo bar foobar moo foo bar foobar moo foo bar foobar moo foo bar foobar moo foo bar foobar moo ";
private const int _iterations = 10000;
[Test]
public void Regex()
{
var result = TimeAction("Regex", () => RemoveExtraWhitespaces.WithRegex(_text));
Assert.AreEqual(_expected, result);
}
[Test]
public void RegexCompiled()
{
var compiledRegex = new Regex(@"\s+", RegexOptions.Compiled);
var result = TimeAction("RegexCompiled", () => RemoveExtraWhitespaces.WithRegexCompiled(compiledRegex, _text));
Assert.AreEqual(_expected, result);
}
[Test]
public void NormalizeWhiteSpace()
{
var result = TimeAction("NormalizeWhiteSpace", () => RemoveExtraWhitespaces.NormalizeWhiteSpace(_text));
Assert.AreEqual(_expected, result);
}
[Test]
public void NormalizeWhiteSpaceForLoop()
{
var result = TimeAction("NormalizeWhiteSpaceForLoop", () => RemoveExtraWhitespaces.NormalizeWhiteSpaceForLoop(_text));
Assert.AreEqual(_expected, result);
}
public string TimeAction(string name, Func<string> func)
{
var timer = Stopwatch.StartNew();
string result = string.Empty; ;
for (int i = 0; i < _iterations; i++)
{
result = func();
}
timer.Stop();
Console.WriteLine(string.Format("{0}: {1} ms", name, timer.ElapsedMilliseconds));
return result;
}
}
答案 3 :(得分:12)
我使用以下方法 - 它们处理所有空白字符不仅空格,修剪前导和尾随空格,删除额外空格,所有空格都替换为空格 char(所以我们有统一的空格分隔符)。这些方法快。
public static String CompactWhitespaces( String s )
{
StringBuilder sb = new StringBuilder( s );
CompactWhitespaces( sb );
return sb.ToString();
}
public static void CompactWhitespaces( StringBuilder sb )
{
if( sb.Length == 0 )
return;
// set [start] to first not-whitespace char or to sb.Length
int start = 0;
while( start < sb.Length )
{
if( Char.IsWhiteSpace( sb[ start ] ) )
start++;
else
break;
}
// if [sb] has only whitespaces, then return empty string
if( start == sb.Length )
{
sb.Length = 0;
return;
}
// set [end] to last not-whitespace char
int end = sb.Length - 1;
while( end >= 0 )
{
if( Char.IsWhiteSpace( sb[ end ] ) )
end--;
else
break;
}
// compact string
int dest = 0;
bool previousIsWhitespace = false;
for( int i = start; i <= end; i++ )
{
if( Char.IsWhiteSpace( sb[ i ] ) )
{
if( !previousIsWhitespace )
{
previousIsWhitespace = true;
sb[ dest ] = ' ';
dest++;
}
}
else
{
previousIsWhitespace = false;
sb[ dest ] = sb[ i ];
dest++;
}
}
sb.Length = dest;
}
答案 4 :(得分:8)
string q = " Hello how are you doing?";
string a = String.Join(" ", q.Split(new string[] { " " }, StringSplitOptions.RemoveEmptyEntries));
答案 5 :(得分:7)
string text = "foo bar";
text = Regex.Replace(text, @"\s+", " ");
// text = "foo bar"
此解决方案适用于空格,制表符和换行符。如果只想要空格,请将'\ s'替换为''。
答案 6 :(得分:7)
我需要其中一个用于更大的字符串,并提出了下面的例程。
任何连续的空格(包括制表符,换行符)都会被normalizeTo
中的任何内容替换。
前导/尾随空格被删除。
使用我的5k-> 5mil char字符串比RegEx快8倍。
internal static string NormalizeWhiteSpace(string input, char normalizeTo = ' ')
{
if (string.IsNullOrEmpty(input))
return string.Empty;
int current = 0;
char[] output = new char[input.Length];
bool skipped = false;
foreach (char c in input.ToCharArray())
{
if (char.IsWhiteSpace(c))
{
if (!skipped)
{
if (current > 0)
output[current++] = normalizeTo;
skipped = true;
}
}
else
{
skipped = false;
output[current++] = c;
}
}
return new string(output, 0, skipped ? current - 1 : current);
}
答案 7 :(得分:6)
string yourWord = "beep boop baap beep boop baap beep";
yourWord = yourWord .Replace(" ", " |").Replace("| ", "").Replace("|", "");
答案 8 :(得分:5)
我尝试过使用StringBuilder:
这是性能和性能的最佳平衡。可读性我发现(使用100,000次迭代计时运行)。有时这种测试比不易读的版本更快,最多慢5%。在我的小测试字符串上,正则表达式需要4.24倍的时间。
obj != null // Filter out undefined and null
&& prop in Object(obj) // Convert to object and check property
答案 9 :(得分:3)
它并不快,但如果简单有帮助,这可行:
while (text.Contains(" ")) text=text.Replace(" ", " ");
答案 10 :(得分:2)
试试这个:
System.Text.RegularExpressions.Regex.Replace(input, @"\s+", " ");
答案 11 :(得分:2)
这段代码效果很好。我没有衡量表现。
string text = " hello - world, here we go !!! a bc ";
string.Join(" ", text.Split().Where(x => x != ""));
// Output
// "hello - world, here we go !!! a bc"
答案 12 :(得分:2)
在这个值得一些思考的问题中,有些要求并不清楚。
这是一个非常有效的版本,它用一个空格替换所有空格,并在for循环之前删除任何前导和尾随空格。
public static string WhiteSpaceToSingleSpaces(string input)
{
if (input.Length < 2)
return input;
StringBuilder sb = new StringBuilder();
input = input.Trim();
char lastChar = input[0];
bool lastCharWhiteSpace = false;
for (int i = 1; i < input.Length; i++)
{
bool whiteSpace = char.IsWhiteSpace(input[i]);
//Skip duplicate whitespace characters
if (whiteSpace && lastCharWhiteSpace)
continue;
//Replace all whitespace with a single space.
if (whiteSpace)
sb.Append(' ');
else
sb.Append(input[i]);
//Keep track of the last character's whitespace status
lastCharWhiteSpace = whiteSpace;
}
return sb.ToString();
}
答案 13 :(得分:1)
public string GetCorrectString(string IncorrectString)
{
string[] strarray = IncorrectString.Split(' ');
var sb = new StringBuilder();
foreach (var str in strarray)
{
if (str != string.Empty)
{
sb.Append(str).Append(' ');
}
}
return sb.ToString().Trim();
}
答案 14 :(得分:1)
这很有趣,但在我的电脑上,下面的方法和Sergey Povalyaev的StringBulder方法一样快 - (对于1000次重复,大约282ms,10k src字符串)。不确定内存使用情况。
string RemoveExtraWhiteSpace(string src, char[] wsChars){
return string.Join(" ",src.Split(wsChars, StringSplitOptions.RemoveEmptyEntries));
}
显然它对任何字符都有效 - 不仅仅是空格。
虽然这不是OP要求的 - 但如果您真正需要的是用一个实例替换字符串中的特定连续字符,您可以使用这种相对有效的方法:
string RemoveDuplicateChars(string src, char[] dupes){
var sd = (char[])dupes.Clone();
Array.Sort(sd);
var res = new StringBuilder(src.Length);
for(int i = 0; i<src.Length; i++){
if( i==0 || src[i]!=src[i-1] || Array.BinarySearch(sd,src[i])<0){
res.Append(src[i]);
}
}
return res.ToString();
}
答案 15 :(得分:1)
我只是掀起了这个,但还没有测试过。但我觉得这很优雅,避免了正则表达式:
/// <summary>
/// Removes extra white space.
/// </summary>
/// <param name="s">
/// The string
/// </param>
/// <returns>
/// The string, with only single white-space groupings.
/// </returns>
public static string RemoveExtraWhiteSpace(this string s)
{
if (s.Length == 0)
{
return string.Empty;
}
var stringBuilder = new StringBuilder();
var whiteSpaceCount = 0;
foreach (var character in s)
{
if (char.IsWhiteSpace(character))
{
whiteSpaceCount++;
}
else
{
whiteSpaceCount = 0;
}
if (whiteSpaceCount > 1)
{
continue;
}
stringBuilder.Append(character);
}
return stringBuilder.ToString();
}
答案 16 :(得分:1)
我在这里遗漏了什么吗?我想出了这个:
// Input: "HELLO BEAUTIFUL WORLD!"
private string NormalizeWhitespace(string inputStr)
{
// First split the string on the spaces but exclude the spaces themselves
// Using the input string the length of the array will be 3. If the spaces
// were not filtered out they would be included in the array
var splitParts = inputStr.Split(' ').Where(x => x != "").ToArray();
// Now iterate over the parts in the array and add them to the return
// string. If the current part is not the last part, add a space after.
for (int i = 0; i < splitParts.Count(); i++)
{
retVal += splitParts[i];
if (i != splitParts.Count() - 1)
{
retVal += " ";
}
}
return retVal;
}
// Would return "HELLO BEAUTIFUL WORLD!"
我知道我在这里创建第二个字符串以返回它以及创建splitParts数组。刚认为这很简单。也许我没有考虑到一些潜在的情况。
答案 17 :(得分:1)
我知道这很老了,但是压缩空格的最简单方法(用单个&#34;空格&#34;字符替换任何重复的空白字符)如下:
public static string CompactWhitespace(string astring)
{
if (!string.IsNullOrEmpty(astring))
{
bool found = false;
StringBuilder buff = new StringBuilder();
foreach (char chr in astring.Trim())
{
if (char.IsWhiteSpace(chr))
{
if (found)
{
continue;
}
found = true;
buff.Append(' ');
}
else
{
if (found)
{
found = false;
}
buff.Append(chr);
}
}
return buff.ToString();
}
return string.Empty;
}
答案 18 :(得分:1)
public static string RemoveExtraSpaces(string input)
{
input = input.Trim();
string output = "";
bool WasLastCharSpace = false;
for (int i = 0; i < input.Length; i++)
{
if (input[i] == ' ' && WasLastCharSpace)
continue;
WasLastCharSpace = input[i] == ' ';
output += input[i];
}
return output;
}
答案 19 :(得分:1)
您可以使用indexOf首先抓取空格序列开始的位置,然后使用replace方法将空格更改为“”。从那里,您可以使用您抓取的索引并在该位置放置一个空白字符。
答案 20 :(得分:1)
对于那些只想复制并继续下去的人:
private string RemoveExcessiveWhitespace(string value)
{
if (value == null) { return null; }
var builder = new StringBuilder();
var ignoreWhitespace = false;
foreach (var c in value)
{
if (!ignoreWhitespace || c != ' ')
{
builder.Append(c);
}
ignoreWhitespace = c == ' ';
}
return builder.ToString();
}
答案 21 :(得分:1)
我对C#不太熟悉,因此我的代码不是精巧/最有效的代码。我来这里是为了找到适合我的用例的答案,但是我找不到一个答案(或者我找不到一个答案)。
对于我的用例,我需要在以下条件下对所有空白(WS:{space
,tab
,cr lf
})进行规范化:
tab
(例如,使用制表符分隔的文件,在这种情况下,还需要保留重复的制表符)。但是在大多数情况下,它们必须转换为空格。这是示例输入和预期输出(免责声明:我的代码仅在此示例中经过测试)
Every night in my dreams I see you, I feel you
That's how I know you go on
Far across the distance and places between us
You have come to show you go on
要转换为
Every night in my dreams I see you, I feel you
That's how I know you go on
Far across the distance and places between us
You have come to show you go on
这是我的代码
using System;
using System.Text.RegularExpressions;
public class Program
{
public static void Main(string text)
{
bool preserveTabs = false;
//[Step 1]: Clean up white spaces around the text
text = text.Trim();
//Console.Write("\nTrim\n======\n" + text);
//[Step 2]: Reduce repeated spaces to single space.
text = Regex.Replace(text, @" +", " ");
// Console.Write("\nNo repeated spaces\n======\n" + text);
//[Step 3]: Hande Tab spaces. Tabs needs to treated with care because
//in some files tabs have special meaning (for eg Tab seperated files)
if(preserveTabs)
{
text = Regex.Replace(text, @" *\t *", "\t");
}
else
{
text = Regex.Replace(text, @"[ \t]+", " ");
}
//Console.Write("\nTabs preserved\n======\n" + text);
//[Step 4]: Reduce repeated new lines (and other white spaces around them)
//into a single new line.
text = Regex.Replace(text, @"([\t ]*(\n)+[\t ]*)+", "\n");
Console.Write("\nClean New Lines\n======\n" + text);
}
}
在此处查看此代码的实际操作:https://dotnetfiddle.net/eupjIU
答案 22 :(得分:1)
我不知道这是否是最快的方法,但是我使用了它,这对我有用:
/// <summary>
/// Remove all extra spaces and tabs between words in the specified string!
/// </summary>
/// <param name="str">The specified string.</param>
public static string RemoveExtraSpaces(string str)
{
str = str.Trim();
StringBuilder sb = new StringBuilder();
bool space = false;
foreach (char c in str)
{
if (char.IsWhiteSpace(c) || c == (char)9) { space = true; }
else { if (space) { sb.Append(' '); }; sb.Append(c); space = false; };
}
return sb.ToString();
}
答案 23 :(得分:0)
我尝试了一个没有if
的数组。
PS C:\dev\Spaces> dotnet run -c release
// .NETCoreApp,Version=v3.0
Seed=7, n=20, s.Length=2828670
Regex by SLaks 1407ms, len=996757
StringBuilder by Blindy 154ms, len=996757
Array 130ms, len=996757
NoIf 91ms, len=996757
All match!
private static string WithNoIf(string s)
{
var dst = new char[s.Length];
uint end = 0;
char prev = char.MinValue;
for (int k = 0; k < s.Length; ++k)
{
var c = s[k];
dst[end] = c;
// We'll move forward if the current character is not ' ' or if prev char is not ' '
// To avoid 'if' let's get diffs for c and prev and then use bitwise operatios to get
// 0 if n is 0 or 1 if n is non-zero
uint x = (uint)(' ' - c) + (uint)(' ' - prev); // non zero if any non-zero
end += ((x | (~x + 1)) >> 31) & 1; // https://stackoverflow.com/questions/3912112/check-if-a-number-is-non-zero-using-bitwise-operators-in-c by ruslik
prev = c;
}
return new string(dst, 0, (int)end);
}
private static string WithArray(string s)
{
var dst = new char[s.Length];
int end = 0;
char prev = char.MinValue;
for (int k = 0; k < s.Length; ++k)
{
char c = s[k];
if (c != ' ' || prev != ' ') dst[end++] = c;
prev = c;
}
return new string(dst, 0, end);
}
public static void Main()
{
const int n = 20;
const int seed = 7;
string s = GetTestString(seed);
var fs = new (string Name, Func<string, string> Func)[]{
("Regex by SLaks", WithRegex),
("StringBuilder by Blindy", WithSb),
("Array", WithArray),
("NoIf", WithNoIf),
};
Console.WriteLine($"Seed={seed}, n={n}, s.Length={s.Length}");
var d = new Dictionary<string, string>(); // method, result
var sw = new Stopwatch();
foreach (var f in fs)
{
sw.Restart();
var r = "";
for( int i = 0; i < n; i++) r = f.Func(s);
sw.Stop();
d[f.Name] = r;
Console.WriteLine($"{f.Name,-25} {sw.ElapsedMilliseconds,4}ms, len={r.Length}");
}
Console.WriteLine(d.Values.All( v => v == d.Values.First()) ? "All match!" : "Not all match! BAD");
}
private static string GetTestString(int seed)
{
// by blindy from https://stackoverflow.com/questions/6442421/c-sharp-fastest-way-to-remove-extra-white-spaces
var rng = new Random(seed);
// random 1mb+ string (it's slow enough...)
StringBuilder ssb = new StringBuilder(1 * 1024 * 1024);
for (int i = 0; i < 1 * 1024 * 1024; ++i)
if (rng.Next(5) == 0)
ssb.Append(new string(' ', rng.Next(20)));
else
ssb.Append((char)(rng.Next(128 - 32) + 32));
string s = ssb.ToString();
return s;
}
答案 24 :(得分:0)
如果您调整famos算法-在这种情况下比较“相似的”字符串怎么办-不区分大小写并且不关心多个空格,并且也可以容纳NULL。 不信任基准测试-将基准测试置于数据比较密集型任务aprox中。整个动作中1 / 4GB数据和加速约为100%(注释部分与该算法5 / 10min)。这里的these中有些差异较小,约为30%。可以告诉构建最佳算法,需要进行反汇编,并检查编译器在发布或调试构建中将做什么。这里的fulltrim as answer to similar (C question)也要简单一半,但还要区分大小写。
public static bool Differs(string srcA, string srcB)
{
//return string.Join(" ", (a?.ToString()??String.Empty).ToUpperInvariant().Split(new char[0], StringSplitOptions.RemoveEmptyEntries).ToList().Select(x => x.Trim()))
// != string.Join(" ", (b?.ToString()??String.Empty).ToUpperInvariant().Split(new char[0], StringSplitOptions.RemoveEmptyEntries).ToList().Select(x => x.Trim()));
if (srcA == null) { if (srcB == null) return false; else srcA = String.Empty; } // A == null + B == null same or change A to empty string
if (srcB == null) { if (srcA == null) return false; else srcB = String.Empty; }
int dstIdxA = srcA.Length, dstIdxB = srcB.Length; // are there any remaining (front) chars in a string ?
int planSpaceA = 0, planSpaceB = 0; // state automaton 1 after non-WS, 2 after WS
bool validA, validB; // are there any remaining (front) chars in a array ?
char chA = '\0', chB = '\0';
spaceLoopA:
if (validA = (dstIdxA > 0)) {
chA = srcA[--dstIdxA];
switch (chA) {
case '!': case '"': case '#': case '$': case '%': case '&': case '\'': case '(': case ')': case '*': case '+': case ',': case '-':
case '.': case '/': case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': case ':':
case ';': case '<': case '=': case '>': case '?': case '@': case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': case 'G':
case 'H': case 'I': case 'J': case 'K': case 'L': case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R': case 'S': case 'T':
case 'U': case 'V': case 'W': case 'X': case 'Y': case 'Z': case '[': case '\\': case ']': case '^': case '_': case '`': // a-z will be | 32 to Upper
case '{': case '|': case '}': case '~':
break; // ASCII except lowercase
case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': case 'g': case 'h': case 'i':
case 'j': case 'k': case 'l': case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
case 's': case 't': case 'u': case 'v': case 'w': case 'x': case 'y': case 'z':
chA = (Char)(chA & ~0x20);
break;
case '\u0020': case '\u00A0': case '\u1680': case '\u2000': case '\u2001':
case '\u2002': case '\u2003': case '\u2004': case '\u2005': case '\u2006':
case '\u2007': case '\u2008': case '\u2009': case '\u200A': case '\u202F':
case '\u205F': case '\u3000': case '\u2028': case '\u2029': case '\u0009':
case '\u000A': case '\u000B': case '\u000C': case '\u000D': case '\u0085':
if (planSpaceA == 1) planSpaceA = 2; // cycle here to address multiple WS before non-WS part
goto spaceLoopA;
default:
chA = Char.ToUpper(chA);
break;
}}
spaceLoopB:
if (validB = (dstIdxB > 0)) { // 2nd string / same logic
chB = srcB[--dstIdxB];
switch (chB) {
case '!': case '"': case '#': case '$': case '%': case '&': case '\'': case '(': case ')': case '*': case '+': case ',': case '-':
case '.': case '/': case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': case ':':
case ';': case '<': case '=': case '>': case '?': case '@': case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': case 'G':
case 'H': case 'I': case 'J': case 'K': case 'L': case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R': case 'S': case 'T':
case 'U': case 'V': case 'W': case 'X': case 'Y': case 'Z': case '[': case '\\': case ']': case '^': case '_': case '`': // a-z will be | 32 to Upper
break;
case '{': case '|': case '}': case '~':
break; // ASCII except lowercase
case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': case 'g': case 'h': case 'i':
case 'j': case 'k': case 'l': case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
case 's': case 't': case 'u': case 'v': case 'w': case 'x': case 'y': case 'z':
chB = (Char)(chB & ~0x20);
break;
case '\u0020': case '\u00A0': case '\u1680': case '\u2000': case '\u2001':
case '\u2002': case '\u2003': case '\u2004': case '\u2005': case '\u2006':
case '\u2007': case '\u2008': case '\u2009': case '\u200A': case '\u202F':
case '\u205F': case '\u3000': case '\u2028': case '\u2029': case '\u0009':
case '\u000A': case '\u000B': case '\u000C': case '\u000D': case '\u0085':
if (planSpaceB == 1) planSpaceB = 2;
goto spaceLoopB;
default:
chB = Char.ToUpper(chB);
break;
}}
if (planSpaceA != planSpaceB) return true; // both should/not have space now (0 init / 1 last non-WS / 2 last was WS)
if (validA) { // some (non-WS) in A still
if (validB) {
if (chA != chB) return true; // both have another char to compare, are they different ?
} else return true; // not in B not - they are different
} else { // A done, current last pair equal => continue 2 never ending loop till B end (by WS only to be same)
if (!validB) return false; // done and end-up here without leaving by difference => both are same except some WSs arround
else return true; // A done, but non-WS remains in B - different
} // A done, B had no non-WS or non + WS last follow - never ending loop continue
planSpaceA = 1; planSpaceB = 1;
goto spaceLoopA; // performs better
}
}
答案 25 :(得分:-1)
不需要复杂的代码!这是一个简单的代码,将删除任何重复:
public static String RemoveCharOccurence(String s, char[] remove)
{
String s1 = s;
foreach(char c in remove)
{
s1 = RemoveCharOccurence(s1, c);
}
return s1;
}
public static String RemoveCharOccurence(String s, char remove)
{
StringBuilder sb = new StringBuilder(s.Length);
Boolean removeNextIfMatch = false;
foreach(char c in s)
{
if(c == remove)
{
if(removeNextIfMatch)
continue;
else
removeNextIfMatch = true;
}
else
removeNextIfMatch = false;
sb.Append(c);
}
return sb.ToString();
}
答案 26 :(得分:-1)
这很简单,只需使用.Replace()
方法:
string words = "Hello world!";
words = words.Replace("\\s+", " ");
输出&gt;&gt;&gt; “你好世界!”