在C ++中,编写一个转换字符串的函数,使连续下划线的序列成为单个下划线。例如。 (‘_hello___world__’
=> ‘_hello_world_’
)。
与问题相关:在c ++中将多个字符合并为一个字符。 abbbcc - > ABC
答案 0 :(得分:19)
使用erase/unique
和C ++ 11 lambda。
#include <algorithm>
#include <iostream>
#include <string>
int main()
{
std::string text("_hello___world__");
text.erase(
std::unique(
text.begin(),
text.end(),
[](char a, char b){ return (a == b) && (a == '_'); }
),
text.end()
);
std::cout << text << '\n';
return 0;
}
如果你不想使用lambda,你可以定义一个类似的函子:
class both_equal_to
{
char value;
public:
both_equal_to(char ch) : value(ch) {}
bool operator()(char first, char second) const
{
return (first == second) && (first == value);
}
};
然后用both_equal_to('_')
替换lambda。
如果您只使用char*
并且不想支付构建std::string
的费用,则以下代码更接近RolandXu的代码。
char *convert_underscores(char *str)
{
if (str)
{
size_t length = strlen(str);
char *end = std::unique(str, str + length, both_equal_to('_'));
*end = '\0';
}
return str;
}
答案 1 :(得分:3)
没有图书馆:
#include <stdio.h>
char *RemoveUnderScore(char *src)
{
char* readIndex;
char* writeIndex;
if (src==NULL) return NULL;
readIndex = writeIndex = src;
while (*readIndex != '\0')
{
while(*readIndex !='_' && *readIndex != '\0')
*writeIndex++ = *readIndex++;
if (*readIndex != '\0')
*writeIndex++ = *readIndex++;
while (*readIndex == '_')
readIndex++;
}
*writeIndex = '\0';
return src;
}
int main(int argc,char** argv){
char str[] = "_hello___worl__d___";
printf(RemoveUnderScore(str));
return 0;
}
答案 2 :(得分:3)
这篇文章比较了此页面上提交的方法的速度。我在40个字符串上运行了一百万次函数,并计算了每个算法花了多长时间。
所需时间|使用的算法
0.2秒 | RolandXu的版本使用char *,杂乱指向chars。
0.4秒 | Blastfurnace的版本第二部分,仿函数,没有字符串。
2.7秒 | Blastfurnace的版本第一部分是仿函数和字符串。
8.7秒 | Eric L's版本循环遍历字符串,发现__替换为_。
11.0秒 | Eric L's版本循环遍历每个char并组装一个字符串。
11.8秒 | Jerry Coffin的版本与remove_copy_if。
用于证明上述基准和时间的C ++代码:
#include <iostream>
#include <cstdio>
#include <ctime>
#include <cstring>
#include <algorithm>
using namespace std;
string convert_underscores_by_EricL_using_string_replace(string str){
//Cons:
//This is the very slowest algorithm because the complexity of this operation
//is O(n^2) and possibly higher with the overhead of string conversions.
//Pros:
//This is the function is the most concise, needing only 3 lines of code.
while(str.find("__") != string::npos){
str = str.replace(str.find("__"), 2, "_");
}
return str;
}
string convert_underscores_EricL_loop_over_a_string_and_remove_repeats(string str){
//CONS:
//Using a string really slows things down. Algorithm is too slow.
//Not the most concise solution, 8 lines.
//Has lots of ugly conditionals, x, and x-1, confusing to look at.
//PROS:
//Fastest function of those tested.
int len = str.length();
string result = "";
if (len < 2) return str;
result += str[0];
for(int x = 1; x < len; x++){
if (str[x] != str[x-1] || str[x] != '_')
result += str[x];
}
return result;
}
class repeated_by_jerry_coffin {
char prev;
char val;
public:
repeated_by_jerry_coffin(char ch) : val(ch), prev(0) {}
bool operator()(char ch) {
bool ret = prev == val && ch == val;
prev = ch;
return ret;
}
};
string convert_underscores_jerry_coffins_with_remove_copy_if(string str){
//CONS:
//Algorithm is the 2nd slowest algorithm.
//PROS:
//Concise, intuitive, needing only 4 lines.
//Offloads the heavy lifting to std builtin methods: std::remove_copy_if and std::back_inserter
string result = "";
std::remove_copy_if(str.begin(), str.end(),
std::back_inserter(result),
repeated_by_jerry_coffin('_'));
return result;
}
char* convert_underscores_by_RolandXu_using_char_stars_and_pointers(char *src){
//CONS:
//You have to get your hands dirty with C++ pointers.
//PROS:
//Fastest function of those tested.
char* readIndex;
char* writeIndex;
if (src==NULL) return NULL;
readIndex = writeIndex = src;
while (*readIndex != '\0')
{
while(*readIndex !='_' && *readIndex != '\0')
*writeIndex++ = *readIndex++;
if (*readIndex != '\0')
*writeIndex++ = *readIndex++;
while (*readIndex == '_')
readIndex++;
}
*writeIndex = '\0';
return src;
}
class both_equal_to__blastfurnace_version1{
char value;
public:
both_equal_to__blastfurnace_version1(char ch) : value(ch) {}
bool operator()(char first, char second) const{
return (first == second) && (first == value);
}
};
string convert_underscores_blastfurnace_version1_with_functor(string str){
//CONS:
//You have to create an entirely new class that overloads an operator.
//The speed is harmed by the usage of string.
//PROS:
//Don't need to roll your own loops with pointers.
str.erase(
std::unique(
str.begin(),
str.end(),
both_equal_to__blastfurnace_version1('_')
),
str.end()
);
return str;
}
class both_equal_to_blastfurnace_version2{
char value;
public:
both_equal_to_blastfurnace_version2(char ch) : value(ch) {}
bool operator()(char first, char second) const{
return (first == second) && (first == value);
}
};
char *convert_underscores_blastfurnace_version2_std_unique_and_no_string(char *str){
//CONS:
//No problems.
//PROS:
//More concise/intuitive than the fastest version and nearly as fast. Winner!
if (str){
size_t length = strlen(str);
char *end = std::unique(str, str + length, both_equal_to_blastfurnace_version2('_'));
*end = '\0';
}
return str;
}
void assertCharStarEquals(char* a, char* b, string msg){
if (strcmp(a, b) == 0) cout<<"passed" << endl;
else cout << "Failed" << msg << " should be: '" << a << "' it returned: '" << b << "'" << endl;
}
void assertStringEquals(string a, string b, string msg){
if (a == b) cout<<"passed" << endl;
else cout << "Failed" << msg << " should be: '" << a << "' it returned: '" << b << "'" << endl;
}
void test01_convert_underscores_by_RolandXu_using_char_stars_and_pointers(int numtests, string str){
char mystr[str.length()];
strcpy(mystr, str.c_str());
clock_t start = clock();
int x = 0;
while(x < numtests){
char* s = convert_underscores_by_RolandXu_using_char_stars_and_pointers(mystr);
x++;
}
double diff = (std::clock() - start) / (double)CLOCKS_PER_SEC;
cout << diff << " RolandXu's version using char*. " << '\n';
}
void test02_convert_underscores_blastfurnace_version2_std_unique_and_no_string(int numtests, string str){
char mystr[str.length()];
strcpy(mystr, str.c_str());
clock_t start = clock();
int x = 0;
while(x < numtests){
char* val = convert_underscores_blastfurnace_version2_std_unique_and_no_string(mystr);
x++;
}
double diff = (std::clock() - start) / (double)CLOCKS_PER_SEC;
cout << diff << " Blastfurnace's version part two, functor, without string. " <<endl;
}
void test03_convert_underscores_blastfurnace_version1_with_functor(int numtests, string str){
clock_t start = clock();
int x = 0;
while(x < numtests){
string s = convert_underscores_blastfurnace_version1_with_functor(str);
x++;
}
double diff = (std::clock() - start) / (double)CLOCKS_PER_SEC;
cout << diff << " Blastfurnace's version part one with the functor and string. " <<endl;
}
void test04_convert_underscores_by_EricL_using_string_replace(int numtests, string str){
char mystr[str.length()];
strcpy(mystr, str.c_str());
clock_t start = clock();
int x = 0;
while(x < numtests){
string s = convert_underscores_by_EricL_using_string_replace(mystr);
x++;
}
double diff = (std::clock() - start) / (double)CLOCKS_PER_SEC;
cout<< diff << " Eric L's version looping over the string doing find double underscore replace with single underscore. " <<endl;
}
void test05_convert_underscores_EricL_loop_over_a_string_and_remove_repeats(int numtests, string str){
char mystr[str.length()];
strcpy(mystr, str.c_str());
clock_t start = clock();
int x = 0;
while(x < numtests){
string s = convert_underscores_EricL_loop_over_a_string_and_remove_repeats(mystr);
x++;
}
double diff = (std::clock() - start) / (double)CLOCKS_PER_SEC;
cout << diff << " Eric L's version looping over each char and assembling a string. "<< endl;
}
void test06_convert_underscores_jerry_coffins_with_remove_copy_if(int numtests, string str){
clock_t start = clock();
int x = 0;
while(x < numtests){
string s = convert_underscores_jerry_coffins_with_remove_copy_if(str);
x++;
}
double diff = (std::clock() - start) / (double)CLOCKS_PER_SEC;
cout<< diff << " Jerry Coffin's version with remove_copy_if. " <<endl;
}
int main(){
cout << "GO!\n";
int numtests = 1000000;
string test_string = "__aa_a_aaa_--__&___aa_234______3)_!___<_";
cout << "Time | Algorithm Used.\n";
test01_convert_underscores_by_RolandXu_using_char_stars_and_pointers(numtests, test_string);
test02_convert_underscores_blastfurnace_version2_std_unique_and_no_string(numtests, test_string);
test03_convert_underscores_blastfurnace_version1_with_functor(numtests, test_string);
test04_convert_underscores_by_EricL_using_string_replace(numtests, test_string);
test05_convert_underscores_EricL_loop_over_a_string_and_remove_repeats(numtests, test_string);
test06_convert_underscores_jerry_coffins_with_remove_copy_if(numtests, test_string);
//Produces the following output:
//Extra assertion testing to make sure everyone's algorithm is correct:
char in[30];
char out[30];
strcpy(in, "a__");
strcpy(out, "a_");
assertCharStarEquals(out, convert_underscores_by_RolandXu_using_char_stars_and_pointers(in), "01");
strcpy(in, "a_");
strcpy(out, "a_");
assertCharStarEquals(out, convert_underscores_by_RolandXu_using_char_stars_and_pointers(in), "02");
strcpy(in, "_______");
strcpy(out, "_");
assertCharStarEquals(out, convert_underscores_by_RolandXu_using_char_stars_and_pointers(in), "03");
strcpy(in, "__a");
strcpy(out, "_a");
assertCharStarEquals(out, convert_underscores_by_RolandXu_using_char_stars_and_pointers(in), "04");
strcpy(in, "_hello___world__");
strcpy(out, "_hello_world_");
assertCharStarEquals(out, convert_underscores_by_RolandXu_using_char_stars_and_pointers(in), "05");
strcpy(in, "");
strcpy(out, "");
assertCharStarEquals(out, convert_underscores_by_RolandXu_using_char_stars_and_pointers(in), "06");
strcpy(in, " __ ");
strcpy(out, " _ ");
assertCharStarEquals(out, convert_underscores_by_RolandXu_using_char_stars_and_pointers(in), "07");
strcpy(in, "U+221E");
strcpy(out, "U+221E");
assertCharStarEquals(out, convert_underscores_by_RolandXu_using_char_stars_and_pointers(in), "08");
strcpy(in, "__\u00b2__");
strcpy(out, "_\u00b2_");
assertCharStarEquals(out, convert_underscores_by_RolandXu_using_char_stars_and_pointers(in), "09");
cout<< "OK\n";
strcpy(in, "a__");
strcpy(out, "a_");
assertCharStarEquals(out, convert_underscores_blastfurnace_version2_std_unique_and_no_string(in), "01");
strcpy(in, "a_");
strcpy(out, "a_");
assertCharStarEquals(out, convert_underscores_blastfurnace_version2_std_unique_and_no_string(in), "02");
strcpy(in, "_______");
strcpy(out, "_");
assertCharStarEquals(out, convert_underscores_blastfurnace_version2_std_unique_and_no_string(in), "03");
strcpy(in, "__a");
strcpy(out, "_a");
assertCharStarEquals(out, convert_underscores_blastfurnace_version2_std_unique_and_no_string(in), "04");
strcpy(in, "_hello___world__");
strcpy(out, "_hello_world_");
assertCharStarEquals(out, convert_underscores_blastfurnace_version2_std_unique_and_no_string(in), "05");
strcpy(in, "");
strcpy(out, "");
assertCharStarEquals(out, convert_underscores_blastfurnace_version2_std_unique_and_no_string(in), "06");
strcpy(in, " __ ");
strcpy(out, " _ ");
assertCharStarEquals(out, convert_underscores_blastfurnace_version2_std_unique_and_no_string(in), "07");
strcpy(in, "U+221E");
strcpy(out, "U+221E");
assertCharStarEquals(out, convert_underscores_blastfurnace_version2_std_unique_and_no_string(in), "08");
strcpy(in, "__\u00b2__");
strcpy(out, "_\u00b2_");
assertCharStarEquals(out, convert_underscores_blastfurnace_version2_std_unique_and_no_string(in), "09");
cout<< "OK\n";
string in_s = "a__";
string out_s = "a_";
assertStringEquals(out_s, convert_underscores_blastfurnace_version1_with_functor(in_s), "01");
in_s = "a_";
out_s = "a_";
assertStringEquals(out_s, convert_underscores_blastfurnace_version1_with_functor(in_s), "02");
in_s = "_______";
out_s = "_";
assertStringEquals(out_s, convert_underscores_blastfurnace_version1_with_functor(in_s), "03");
in_s = "__a";
out_s = "_a";
assertStringEquals(out_s, convert_underscores_blastfurnace_version1_with_functor(in_s), "04");
in_s = "_hello___world__";
out_s = "_hello_world_";
assertStringEquals(out_s, convert_underscores_blastfurnace_version1_with_functor(in_s), "05");
in_s = "";
out_s = "";
assertStringEquals(out_s, convert_underscores_blastfurnace_version1_with_functor(in_s), "06");
in_s = " __ ";
out_s = " _ ";
assertStringEquals(out_s, convert_underscores_blastfurnace_version1_with_functor(in_s), "07");
in_s = "U+221E";
out_s = "U+221E";
assertStringEquals(out_s, convert_underscores_blastfurnace_version1_with_functor(in_s), "08");
in_s = "__\u00b2__";
out_s = "_\u00b2_";
assertStringEquals(out_s, convert_underscores_blastfurnace_version1_with_functor(in_s), "09");
cout<< "OK\n";
in_s = "a__";
out_s = "a_";
assertStringEquals(out_s, convert_underscores_by_EricL_using_string_replace(in_s), "01");
in_s = "a_";
out_s = "a_";
assertStringEquals(out_s, convert_underscores_by_EricL_using_string_replace(in_s), "02");
in_s = "_______";
out_s = "_";
assertStringEquals(out_s, convert_underscores_by_EricL_using_string_replace(in_s), "03");
in_s = "__a";
out_s = "_a";
assertStringEquals(out_s, convert_underscores_by_EricL_using_string_replace(in_s), "04");
in_s = "_hello___world__";
out_s = "_hello_world_";
assertStringEquals(out_s, convert_underscores_by_EricL_using_string_replace(in_s), "05");
in_s = "";
out_s = "";
assertStringEquals(out_s, convert_underscores_by_EricL_using_string_replace(in_s), "06");
in_s = " __ ";
out_s = " _ ";
assertStringEquals(out_s, convert_underscores_by_EricL_using_string_replace(in_s), "07");
in_s = "U+221E";
out_s = "U+221E";
assertStringEquals(out_s, convert_underscores_by_EricL_using_string_replace(in_s), "08");
in_s = "__\u00b2__";
out_s = "_\u00b2_";
assertStringEquals(out_s, convert_underscores_by_EricL_using_string_replace(in_s), "09");
cout<< "OK\n";
in_s = "a__";
out_s = "a_";
assertStringEquals(out_s, convert_underscores_EricL_loop_over_a_string_and_remove_repeats(in_s), "01");
in_s = "a_";
out_s = "a_";
assertStringEquals(out_s, convert_underscores_EricL_loop_over_a_string_and_remove_repeats(in_s), "02");
in_s = "_______";
out_s = "_";
assertStringEquals(out_s, convert_underscores_EricL_loop_over_a_string_and_remove_repeats(in_s), "03");
in_s = "__a";
out_s = "_a";
assertStringEquals(out_s, convert_underscores_EricL_loop_over_a_string_and_remove_repeats(in_s), "04");
in_s = "_hello___world__";
out_s = "_hello_world_";
assertStringEquals(out_s, convert_underscores_EricL_loop_over_a_string_and_remove_repeats(in_s), "05");
in_s = "";
out_s = "";
assertStringEquals(out_s, convert_underscores_EricL_loop_over_a_string_and_remove_repeats(in_s), "06");
in_s = " __ ";
out_s = " _ ";
assertStringEquals(out_s, convert_underscores_EricL_loop_over_a_string_and_remove_repeats(in_s), "07");
in_s = "U+221E";
out_s = "U+221E";
assertStringEquals(out_s, convert_underscores_EricL_loop_over_a_string_and_remove_repeats(in_s), "08");
in_s = "__\u00b2__";
out_s = "_\u00b2_";
assertStringEquals(out_s, convert_underscores_EricL_loop_over_a_string_and_remove_repeats(in_s), "09");
cout<< "OK\n";
in_s = "a__";
out_s = "a_";
assertStringEquals(out_s, convert_underscores_jerry_coffins_with_remove_copy_if(in_s), "01");
in_s = "a_";
out_s = "a_";
assertStringEquals(out_s, convert_underscores_jerry_coffins_with_remove_copy_if(in_s), "02");
in_s = "_______";
out_s = "_";
assertStringEquals(out_s, convert_underscores_jerry_coffins_with_remove_copy_if(in_s), "03");
in_s = "__a";
out_s = "_a";
assertStringEquals(out_s, convert_underscores_jerry_coffins_with_remove_copy_if(in_s), "04");
in_s = "_hello___world__";
out_s = "_hello_world_";
assertStringEquals(out_s, convert_underscores_jerry_coffins_with_remove_copy_if(in_s), "05");
in_s = "";
out_s = "";
assertStringEquals(out_s, convert_underscores_jerry_coffins_with_remove_copy_if(in_s), "06");
in_s = " __ ";
out_s = " _ ";
assertStringEquals(out_s, convert_underscores_jerry_coffins_with_remove_copy_if(in_s), "07");
in_s = "U+221E";
out_s = "U+221E";
assertStringEquals(out_s, convert_underscores_jerry_coffins_with_remove_copy_if(in_s), "08");
in_s = "__\u00b2__";
out_s = "_\u00b2_";
assertStringEquals(out_s, convert_underscores_jerry_coffins_with_remove_copy_if(in_s), "09");
return 0;
}
我们学到了什么?
使用c ++字符串,'str.length()'非常重,因为编译器尽职尽责地逐步执行字符串的内存,将字符串的结尾定位在内存中,算作它去。不要使用字符串,也不要使用str.length;);
使用str.end()也会调用O(n)性能命中,与第1项相同。不要使用str.end();
在char数组上使用std :: unique进行了优化并且闪电般快速。比带有字符串连接的for循环快一个数量级。
在C ++中,做一个mystring [x]导致在内存中查找该槽的内存,这需要很长时间,并且比使用指针并向指针添加1要慢得多。不要将mystring [x]放在迭代x的循环中。
如果必须使用字符串,请不要输入mystring + = anotherstring [x];每次运行此行时,string必须逐个遍历整个字符串。
不要连接字符串,抓取一块内存,定义指针,并在指针中放置字符,然后递增指针。循环和串联连接会调用O(n)复杂性。
这一天发生了很多学习,会唱出很棒的歌曲。
答案 3 :(得分:2)
在C ++中,我给的是非常低效的:
string convert_underscores(string str){
int len = str.length();
//bad, str.length() incurs O(n) complexity.
string result = "";
//bad, don't use string. string makes everything slow.
if (len < 2) return str;
result += str[0];
//bad bad bad, string concatenation incurs O(n) complexity.
for(int x = 1; x < len; x++){
//This for loop incurs unnecessary management overhead.
if (str[x] != str[x-1] || str[x] != '_'){
result += str[x];
//concatenation extremely evil here: costs O(n)
//A lookup on str[x] costs time.
}
}
return result;
//returning a string bad, the entire memory is moved
//instead of just a pointer to the first slot.
}
使用str.replace 甚至效率更低。
string convert_underscores(string str){
while(str.find("__") != string::npos){
str = str.replace(str.find("__"), 2, "_");
}
return str;
}
//find incurs unnecessary O(n) complexity. string copy incurs
//a full memory movement on contents of string.
//n passes are required over the string.
答案 4 :(得分:2)
我想我会使用std::remove_copy_if
,如下所示:
char prev;
std::remove_copy_if(input.begin(), input.end(),
std::back_inserter(result),
[&prev] (char ch) ->bool {
bool ret=prev=='_' && ch == '_';
prev=ch;
return ret;
});
或者,如果你坚持使用C ++ 03,你可以使用显式函子而不是lambda来做同样的事情:
class repeated {
char prev;
char val;
public:
repeated(char ch) : val(ch), prev(0) {}
bool operator()(char ch) {
bool ret = prev == val && ch == val;
prev = ch;
return ret;
}
};
std::remove_copy_if(input.begin(), input.end(),
std::back_inserter(result),
repeated('_'));
无论哪种方式,它只会将每个字符从输入复制到输出(最多)一次,其中使用std::string::replace
复制每个字符的频率与其左侧重复下划线的数量一样频繁。
编辑:虽然看了@ blastfurnace的答案,但我不确定我是否真的会使用它 - std::unique
可能更适合这份工作。