我通常会越来越努力地解决自己在代码中发现的所有错误,但这对我来说完全没有逻辑。不管使用什么字符串和char分隔符,它都可以正常工作,但是只能在函数printf
中使用无用的while
,否则它将打印
-> Lorem
然后
-> ▼
并向后坠毁。预先感谢任何可以告诉我发生了什么事的人。
#include <stdlib.h>
#include <string.h>
#include <stdio.h>
#include <stdint.h>
char **strsep_(char *str, char ch) {
// Sub-string length
uint8_t len = 0;
// The number of sub-strings found means the same as the position where it will be stored in the main pointer
// Obviously, the number tends to increase over time, and at the end of the algorithm, it means the main pointer length too
uint8_t pos = 0;
// Storage for any found sub-strings and one more byte as the pointer is null-terminated
char **arr = (char**)malloc(sizeof(char **) + 1);
while (*str) {
printf("Erase me and it will not work! :)\n");
if (*str == ch) {
// The allocated memory should be one step ahead of the current usage
arr = realloc(arr, sizeof(char **) * pos + 1);
// Allocates enough memory in the current main pointer position and the '\0' byte
arr[pos] = malloc(sizeof(char *) * len + 1);
// Copies the sub-string size (based in the length number) into the previously allocated space
memcpy(arr[pos], (str - len), len);
// `-_("")_-k
arr[pos][len] = '\0';
len = 0;
pos++;
} else {
len++;
}
*str++;
}
// Is not needed to reallocate additional memory if no separator character was found
if (pos > 0) arr = realloc(arr, sizeof(char **) * pos + 1);
// The last chunk of characters after the last separator character is properly allocated
arr[pos] = malloc(sizeof(char *) * len + 1);
memcpy(arr[pos], (str - len), len);
// To prevent undefined behavior while iterating over the pointer
arr[++pos] = NULL;
return arr;
}
void strsep_free_(char **arr) {
char **aux = arr;
while (*arr) {
free(*arr);
*arr = NULL;
arr++;
}
// One more time to fully deallocate the null-terminated pointer
free(*arr);
*arr = NULL;
arr++;
// Clearing The pointer itself
free(aux);
aux = NULL;
}
int main(void) {
char **s = strsep_("Lorem ipsum four words", ' ');
char **i = s;
while (*i != NULL) {
printf("-> %s\n", *i);
i++;
}
strsep_free_(s);
}
答案 0 :(得分:2)
崩溃的可能原因很可能是:realloc(arr, sizeof(char **) * pos + 1)
。
这与realloc(arr, (sizeof(char **) * pos) + 1)
相同,后者没有为您的“数组”分配足够的空间。您需要执行realloc(arr, sizeof(char **) * (pos + 1))
。
与arr[pos]
的分配相同,您也需要在其中正确使用括号。
答案 1 :(得分:2)
您的程序具有未定义的行为,这意味着它可能会以意想不到的方式运行,但偶然地可能会按预期运行。添加多余的printf
可以以似乎可以纠正错误的方式更改行为,但这只是偶然的。在不同的计算机上,或者甚至在同一时间在同一台计算机上,其行为可能会再次更改。
程序中存在多个导致未定义行为的错误:
您没有以适当的大小分配数组:它应该有fpr pos + 1
指针,因此还有sizeof(char **) * (pos + 1)
。错误的语句是:char **arr = (char**)malloc(sizeof(char **) + 1);
和arr = realloc(arr, sizeof(char **) * pos + 1);
。
此外,为每个子字符串分配的空间也不正确:arr[pos] = malloc(sizeof(char *) * len + 1);
应该读为arr[pos] = malloc(sizeof(char) * len + 1);
,其定义为arr[pos] = malloc(len + 1);
。这不会导致未定义的行为,您只需分配过多的内存即可。如果您的系统支持,则可以在一次对strndup(str - len, len)
的调用中组合分配和复制。
您永远不会检查内存分配失败,在内存分配失败的情况下会导致不确定的行为。
为uint8_t
和len
使用pos
是有风险的:如果子字符串的数量超过255,该怎么办? pos
和len
会默默地卷回到0
,从而产生意外结果和内存泄漏。使用这么小的类型没有优势,而是使用int
或size_t
。
这是更正的版本:
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
char **strsep_(const char *str, char ch) {
// Sub-string length
int len = 0;
// The number of sub-strings found, index where to store the NULL at the end of the array.
int pos = 0;
// return value: array of pointers to substrings with an extra slot for a NULL terminator.
char **arr = (char**)malloc(sizeof(*arr) * (pos + 1));
if (arr == NULL)
return NULL;
for (;;) {
if (*str == ch || *str == '\0') {
// alocate the substring and reallocate the array
char *p = malloc(len + 1);
char **new_arr = realloc(arr, sizeof(*arr) * (pos + 2));
if (new_arr == NULL || p == NULL) {
// allocation failure: free the memory allocated so far
free(p);
if (new_arr)
arr = new_arr;
while (pos-- > 0)
free(arr[pos]);
free(arr);
return NULL;
}
arr = new_arr;
memcpy(p, str - len, len);
p[len] = '\0';
arr[pos] = p;
pos++;
len = 0;
if (*str == '\0')
break;
} else {
len++;
}
str++;
}
arr[pos] = NULL;
return arr;
}
void strsep_free_(char **arr) {
int i;
// Free the array elements
for (i = 0; arr[i] != NULL; i++) {
free(arr[i]);
arr[i] = NULL; // extra safety, not really needed
}
// Free The array itself
free(arr);
}
int main(void) {
char **s = strsep_("Lorem ipsum four words", ' ');
int i;
for (i = 0; s[i] != NULL; i++) {
printf("-> %s\n", s[i]);
}
strsep_free_(s);
return 0;
}
输出:
-> Lorem
-> ipsum
-> four
-> words
答案 2 :(得分:0)
@chqrlie的好答案。从我的角度来看,我认为最好在复制之前对所有内容进行计数,这应该有助于避免重新分配。
#include <string.h>
#include <stdio.h>
#include <stdint.h>
#include <stdlib.h>
int count_chars(const char *str, const char ch)
{
int i;
int count;
i = 0;
count = 0;
if (*str == ch)
str++;
while (str[i] != ch && str[i] != '\0')
{
count++;
i++;
}
return (count);
}
int count_delimeter(const char *str, const char ch)
{
int i = 0;
int count = 0;
while (str[i])
{
if (str[i] == ch && str[i + 1] != ch)
count++;
i++;
}
return count;
}
char** strsep_(const char *str, const char ch)
{
char **arr;
int index = 0;
int size = 0;
int i = 0;
size = count_delimeter(str, ch) + 1;
if ((arr = malloc(sizeof(char *) * (size + 1))) == NULL)
return (NULL);
arr[size] = NULL;
while (i < size)
{
if (str[index] == ch)
index++;
if (str[index] && str[index] == ch && str[index + 1] == ch)
{
while (str[index] && str[index] == ch && str[index + 1] == ch)
index++;
index++;
}
int len = count_chars(&str[index], ch);
if ((arr[i] = malloc(sizeof(char) * (len + 1))) == NULL)
return NULL;
memcpy(arr[i], &str[index], len);
index += len;
arr[i++][len] = '\0';
}
return arr;
}
int main(void)
{
char *str = "Lorem ipsum ipsum Lorem lipsum gorem insum";
char **s = strsep_(str, ' ');
/* char *str = "Lorem + Ipsum"; */
/* char **s = strsep_(str, '+'); */
/* char *str = "lorem, torem, horem, lorem"; */
/* char **s = strsep_(str, ','); */
while (*s != NULL) {
printf("-> [%s]\n", *s);
s++;
}
/* dont forget to free */
return 0;
}