查找一些文本并将其替换为C字符串中的新文本可能比预期的要复杂一些。 我正在寻找一种快速且时间复杂度较低的算法。
我应该使用什么?
答案 0 :(得分:5)
我在C中找不到我喜欢的搜索/替换实现,所以我在这里展示自己的。它不使用strstr(),snprintf(),任意长度的临时缓冲区等。它只需要干草堆缓冲区足够大,以便在替换后保存结果字符串。
// str_replace(haystack, haystacksize, oldneedle, newneedle) --
// Search haystack and replace all occurences of oldneedle with newneedle.
// Resulting haystack contains no more than haystacksize characters (including the '\0').
// If haystacksize is too small to make the replacements, do not modify haystack at all.
//
// RETURN VALUES
// str_replace() returns haystack on success and NULL on failure.
// Failure means there was not enough room to replace all occurences of oldneedle.
// Success is returned otherwise, even if no replacement is made.
char *str_replace(char *haystack, size_t haystacksize,
const char *oldneedle, const char *newneedle);
// ------------------------------------------------------------------
// Implementation of function
// ------------------------------------------------------------------
#define SUCCESS (char *)haystack
#define FAILURE (void *)NULL
static bool
locate_forward(char **needle_ptr, char *read_ptr,
const char *needle, const char *needle_last);
static bool
locate_backward(char **needle_ptr, char *read_ptr,
const char *needle, const char *needle_last);
char *str_replace(char *haystack, size_t haystacksize,
const char *oldneedle, const char *newneedle)
{
size_t oldneedle_len = strlen(oldneedle);
size_t newneedle_len = strlen(newneedle);
char *oldneedle_ptr; // locates occurences of oldneedle
char *read_ptr; // where to read in the haystack
char *write_ptr; // where to write in the haystack
const char *oldneedle_last = // the last character in oldneedle
oldneedle +
oldneedle_len - 1;
// Case 0: oldneedle is empty
if (oldneedle_len == 0)
return SUCCESS; // nothing to do; define as success
// Case 1: newneedle is not longer than oldneedle
if (newneedle_len <= oldneedle_len) {
// Pass 1: Perform copy/replace using read_ptr and write_ptr
for (oldneedle_ptr = (char *)oldneedle,
read_ptr = haystack, write_ptr = haystack;
*read_ptr != '\0';
read_ptr++, write_ptr++)
{
*write_ptr = *read_ptr;
bool found = locate_forward(&oldneedle_ptr, read_ptr,
oldneedle, oldneedle_last);
if (found) {
// then perform update
write_ptr -= oldneedle_len;
memcpy(write_ptr+1, newneedle, newneedle_len);
write_ptr += newneedle_len;
}
}
*write_ptr = '\0';
return SUCCESS;
}
// Case 2: newneedle is longer than oldneedle
else {
size_t diff_len = // the amount of extra space needed
newneedle_len - // to replace oldneedle with newneedle
oldneedle_len; // in the expanded haystack
// Pass 1: Perform forward scan, updating write_ptr along the way
for (oldneedle_ptr = (char *)oldneedle,
read_ptr = haystack, write_ptr = haystack;
*read_ptr != '\0';
read_ptr++, write_ptr++)
{
bool found = locate_forward(&oldneedle_ptr, read_ptr,
oldneedle, oldneedle_last);
if (found) {
// then advance write_ptr
write_ptr += diff_len;
}
if (write_ptr >= haystack+haystacksize)
return FAILURE; // no more room in haystack
}
// Pass 2: Walk backwards through haystack, performing copy/replace
for (oldneedle_ptr = (char *)oldneedle_last;
write_ptr >= haystack;
write_ptr--, read_ptr--)
{
*write_ptr = *read_ptr;
bool found = locate_backward(&oldneedle_ptr, read_ptr,
oldneedle, oldneedle_last);
if (found) {
// then perform replacement
write_ptr -= diff_len;
memcpy(write_ptr, newneedle, newneedle_len);
}
}
return SUCCESS;
}
}
// locate_forward: compare needle_ptr and read_ptr to see if a match occured
// needle_ptr is updated as appropriate for the next call
// return true if match occured, false otherwise
static inline bool
locate_forward(char **needle_ptr, char *read_ptr,
const char *needle, const char *needle_last)
{
if (**needle_ptr == *read_ptr) {
(*needle_ptr)++;
if (*needle_ptr > needle_last) {
*needle_ptr = (char *)needle;
return true;
}
}
else
*needle_ptr = (char *)needle;
return false;
}
// locate_backward: compare needle_ptr and read_ptr to see if a match occured
// needle_ptr is updated as appropriate for the next call
// return true if match occured, false otherwise
static inline bool
locate_backward(char **needle_ptr, char *read_ptr,
const char *needle, const char *needle_last)
{
if (**needle_ptr == *read_ptr) {
(*needle_ptr)--;
if (*needle_ptr < needle) {
*needle_ptr = (char *)needle_last;
return true;
}
}
else
*needle_ptr = (char *)needle_last;
return false;
}
#define BUF 30
char *retval1, *retval2;
char message[BUF] = "Your name is $USERNAME.";
char username[] = "admin";
char username_toolong[] = "System Administrator";
int main() {
retval1 = str_replace(message, BUF, "$USERNAME", username_toolong);
retval2 = str_replace(message, BUF, "$USERNAME", username);
if (!retval1)
printf("Not enough room to replace $USERNAME with `%s'\n", username_toolong);
if (!retval2)
printf("Not enough room to replace $USERNAME with `%s'\n", username);
printf("%s\n", message);
return 0;
}
没有足够的空间用“系统管理员”替换$ USERNAME 你的名字是管理员。
干杯。
答案 1 :(得分:2)
Knuth-Morris-Pratt(经典)或Boyer-Moore(有时更快)?
尝试使用Google搜索“字符串搜索算法”。
答案 2 :(得分:1)
使用std::string
(来自<string>
),您只需使用find
和replace
。
编辑:Touché。这仅适用于C ++。
答案 3 :(得分:1)
我不禁想知道strstr()实现了什么算法。鉴于这些是相当标准的算法,strstr()的良好实现完全有可能使用其中一个。
然而,无法保证strstr()实现优化算法,或者从一个平台到另一个平台使用相同的算法。
答案 4 :(得分:0)
这是一个很好的代码
#include <stdio.h>
#include <string.h>
char *replace_str(char *str, char *orig, char *rep)
{
static char buffer[4096];
char *p;
if(!(p = strstr(str, orig))) // Is 'orig' even in 'str'?
return str;
strncpy(buffer, str, p-str); // Copy characters from 'str' start to 'orig' st$
buffer[p-str] = '\0';
sprintf(buffer+(p-str), "%s%s", rep, p+strlen(orig));
return buffer;
}
int main(void)
{
puts(replace_str("Hello, world!", "world", "Miami"));
return 0;
}
答案 5 :(得分:0)
我的解决方案,基于其他人,但我相信更安全一点:
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#define MAX_SOURCE_SIZE (0x100000)
char * searchReplace(char * string, char *toReplace[], char *replacements[], int numReplacements){
int i = 0;
char *locOfToRep;
char *toRep;
char *rep;
int lenToRep,lenStr,lenAfterLocRep;
static char buffer[MAX_SOURCE_SIZE];
for(i = 0; i < numReplacements; ++i){
toRep = toReplace[i];
rep = replacements[i];
//if str not in the string, exit.
if (!(locOfToRep = strstr(string,toRep))){
exit(EXIT_FAILURE);
}
lenToRep = strlen(toRep);
lenStr = strlen(string);
lenAfterLocRep = strlen(locOfToRep);
//Print the string upto the pointer, then the val, and then the rest of the string.
sprintf(buffer, "%.*s%s%s", lenStr-lenAfterLocRep, string,rep,locOfToRep+lenToRep);
string = buffer;
}
return buffer;
}
int main(){
char * string = "Hello, world!";
int numVals;
char *names[2] = {"Hello", "world"};
char *vals[2] = {"Goodbye", "you"};
numVals = 2;
string = searchReplace(string, names, vals, numVals);
printf("%s\n",string);
}