我试图使用C将包含多行空白分隔整数的文件解析为动态int数组的动态数组。每行都是数组数组中的数组。每行中的行数和元素是非常量的。
到目前为止,我所做的是使用fgets将每一行作为字符串抓取。
但是,我不能弄清楚如何解析一串空格分隔的整数。我以为我可以使用sscanf(因为fscanf可用于解析整个空格分隔的整数文件)。但是,似乎sscanf具有不同的功能。 sscanf只解析字符串中的第一个数字。我的猜测是,因为该行是一个字符串不是一个流。
我已经四处查找了一种从字符串中创建流的方法,但它看起来并不像C中那样(我无法使用非标准库)。
char* line;
char lineBuffer[BUFFER_SIZE];
FILE *filePtr;
int value;
...
while((line = fgets(lineBuffer, BUFFER_SIZE, filePtr)) != NULL) {
printf("%s\n", lineBuffer);
while(sscanf(lineBuffer, "%d ", &value) > 0) {
printf("%d\n", value);
}
}
我可以使用什么来解析字符串。如果没有,是否有替代整个系统?我宁愿不使用REGEX。
答案 0 :(得分:4)
使用strtol(),如果有匹配则给出指向匹配结束的指针,并使用char指针存储当前位置:
while((line = fgets(lineBuffer, BUFFER_SIZE, filePtr)) != NULL) {
printf("%s\n", lineBuffer);
char* p = lineBuffer;
while(p < lineBuffer+BUFFER_SIZE ) {
char* end;
long int value = strtol( p , &end , 10 );
if( value == 0L && end == p ) //docs also suggest checking errno value
break;
printf("%ld\n", value);
p = end ;
}
}
答案 1 :(得分:2)
通过fgets()
阅读一行是很好的第一步。
2种方法:strtol()
(更好的错误处理)和sscanf()
while((line = fgets(lineBuffer, BUFFER_SIZE, filePtr)) != NULL) {
char *endptr;
while (1) {
errno = 0;
long num = strtol(line, &endptr, 10);
if (line == endptr) break; // no conversion
if (errno) break; // out of range or other error
#if LONG_MIN < INT_MIN || LONG_MAX > INT_MAX
// long and int may have different ranges
if (num < INT_MIN || num > INT_MAX) {
errno = ERANGE;
break; // out of range
}
#endif
int value = (int) num;
printf("%d\n", value);
line = endptr;
}
while (isspace((unsigned char) *endptr)) endptr++;
if (*endptr != '\0') Handle_ExtraGarbageAtEndOfLine();
}
“sscanf只解析字符串中的第一个数字。”并非如此。使用sscanf()
和"%n"
来记录扫描停止的位置。
while((line = fgets(lineBuffer, BUFFER_SIZE, filePtr)) != NULL) {
int n;
while (1) {
n = 0;
int value;
if (sscanf(line, "%d %n", &value, &n) != 1) break;
printf("%d\n", value);
line += n;
}
if (line[n] != '\0') Handle_ExtraGarbageAtEndOfLine();
}
答案 2 :(得分:1)
使用带有" "
(空格)的strtok()
函数作为分隔符,并将其置于循环中,当strtok()
返回NULL
时,该循环将终止,以获取每个标记,然后打印每个每个令牌:
while((line = fgets(lineBuffer, BUFFER_SIZE, filePtr)) != NULL) {
printf("%s\n", lineBuffer);
char *token=strtok(line," ");
while(token!=NULL)
{
if(sscanf(token, "%d", &value) > 0)
printf("%d\n", value);
token=strtok(NULL," ");
}
}
答案 3 :(得分:0)
只需在输入行上使用一个循环,利用atol()无论如何都会停留在下一个空格分隔符上。只适用于正整数;)但速度很快,你不需要阅读大量的strtok和sscanf文档,它们甚至可以在&#34;噪声和#34;在你的整数之间散落。
为了使它适用于负数,也可以使用!isspace()替换isdigit(),然后就可以了。
void bla()
{
const char * input = " 1 3 4 6 ";
size_t i;
size_t len = strlen(input);
for (i = 0; i < len; ++i)
{
if (isdigit(input[i]))
{
printf("%d\n", atol(&input[i]));
while (i < len && isdigit(input[i]))
++i;
}
}
}
void bla1()
{ // positive and negative ints version
const char * input = " 10 -3 42 6 ";
size_t i;
size_t len = strlen(input);
for (i = 0; i < len; ++i)
{
if (!isspace(input[i]))
{
printf("%d\n", atol(&input[i]));
while (i < len && !isspace(input[i]))
++i;
}
}
/* Output:
10
-3
42
6
*/
}
你的问题的下一部分是(隐式地),如何处理动态数组来存储你解析的int值。这里是一个基于上面代码的解决方案。 chunkSize对于输入设置得太小,所以我可以测试realloc代码部分是否也有效。
typedef struct DataRow_tag
{
int32_t *data;
size_t length;
} DataRow_t;
// Returns a "bool" in C-style. Yes, there is stdbool.h in ansi c99 but it is disadviced.
// (Platform dependent trouble in the context of C/C++ interaction, often across library/DLL boundaries.
// Especially if you compile C with a C-compiler and the C++ code with C++ compiler. Which happens.
// Every now and then, sizeof(c++ bool) != sizeof(C bool) and you waste a lot of time finding the problem.)
// The caller takes ownership of the DataRow_t::data pointer and has to free() it when done using it.
// 0: false -> fail
// 1: true -> success!
int
ReadRowWithUnknownNumberOfColumnsOfInt32
( const char * row // Zero terminated string containing 1 row worth of data.
, DataRow_t *result // Pointer to the place the data will be stored at.
)
{
int success = 0;
size_t chunkSize = 10; // Set this value to something most likely large enough for your application.
// This function is not cleaning up your garbage, dude ;) Gimme a clean result structure!
assert(NULL != result && NULL == result->data);
if (NULL != result && NULL == result->data)
{
result->length = 0;
size_t rowLength = strlen(row);
const char *pInput = row;
const char *pEnd = &row[rowLength-1];
result->data = (int32_t*)malloc(chunkSize * sizeof(int32_t));
if (NULL != result->data )
{
for (; pInput < pEnd; ++pInput)
{
assert(pInput <= pEnd);
assert(*pInput != 0);
if (!isspace(*pInput)) // ultra correct would be to cast to unsigned char first...says microsoft code analyzer in paranoia mode.
{
long lval = atol(pInput); // what is a long anyway? 4 bytes, 2 bytes, 8 bytes? We only hope it will fit into our int32_t...
// TODO: we could test here if lval value fits in an int32_t...platform dependent!
result->data[result->length++] = lval;
if (result->length == chunkSize)
{ // our buffer was too small... we need a bigger one.
chunkSize = chunkSize + chunkSize; // doubling our buffer, hoping it will be enough, now.
int32_t * temp = (int32_t*)realloc(result->data, chunkSize * sizeof(int32_t));
if (NULL == temp)
{ // realloc is a funny function from the dark ages of c. It returns NULL if out of memory.
// So we cannot simply use result->data pointer for realloc call as this might end up with a memory leak.
free(result->data);
result->length = 0;
break;
}
else
{
result->data = temp;
}
}
while (pInput < pEnd && !isspace(*pInput))
++pInput;
}
}
if (pInput >= pEnd)
success = 1;
else
{ // make sure we do not leave result in some funny state.
result->length = 0;
free(result->data); // free(NULL) legal. If memblock is NULL, the pointer is ignored and free immediately returns.
result->data = NULL;
}
}
}
return success;
}
void Bla2()
{
const char * input = "-10 -9 -8 -7 -6 -5 -4 -3 -2 -1 0 1 2 3 4 5 6 7 8 9 10 11 12 13";
DataRow_t dataRow = { 0 };
if (ReadRowWithUnknownNumberOfColumnsOfInt32(input, &dataRow))
{
for (size_t i = 0; i < dataRow.length; ++i)
{
printf("%d ", dataRow.data[i]);
}
printf("\n");
free(dataRow.data);
dataRow.data = NULL;
dataRow.length = 0;
}
}
答案 4 :(得分:0)
您应该使用:
lineBuffer = (char *)malloc(sizeof(BUFFER_SIZE + 1));
比:
char lineBuffer[BUFFER_SIZE];
你的筹码将会感谢你!