我正在寻找能够以Excel格式正确解析CSV文件的C库,例如其他常用语言,例如 Python 和 Java
例如,以下文本是Excel格式的记录:
hello,world,"This is one field, really, no joking",12345,,,
以下是单个多行字段的另一个示例(在Excel中完全支持):
hello,world,"This multiline text
is considered as one field
that ends here",and,these,are,other,fields,at,the,same,row
答案 0 :(得分:4)
最后,在网上找不到令人满意的答案之后,我自己编写了API
共享...
用法示例:
#include <stdio.h>
#include "csvparser.h"
int main() {
int i = 0;
// file, delimiter, first_line_is_header?
CsvParser *csvparser = CsvParser_new("Book1.csv", ",", 1);
CsvRow *header;
CsvRow *row;
header = CsvParser_getHeader(csvparser);
if (header == NULL) {
printf("%s\n", CsvParser_getErrorMessage(csvparser));
return 1;
}
char **headerFields = CsvParser_getFields(header);
for (i = 0 ; i < CsvParser_getNumFields(header) ; i++) {
printf("TITLE: %s\n", headerFields[i]);
}
while ((row = CsvParser_getRow(csvparser)) ) {
char **rowFields = CsvParser_getFields(row);
for (i = 0 ; i < CsvParser_getNumFields(row) ; i++) {
printf("FIELD: %s\n", rowFields[i]);
}
CsvParser_destroy_row(row);
}
CsvParser_destroy(csvparser);
return 0;
}
标头文件 - csvparser.h :
#ifndef CSVPARSER_H
#define CSVPARSER_H
#include <stdio.h>
typedef struct CsvRow {
char **fields_;
int numOfFields_;
} CsvRow;
typedef struct CsvParser {
char *filePath_;
char delimiter_;
int firstLineIsHeader_;
char *errMsg_;
CsvRow *header_;
FILE *fileHandler_;
} CsvParser;
// Public
CsvParser *CsvParser_new(const char *filePath, const char *delimiter, int firstLineIsHeader);
void CsvParser_destroy(CsvParser *csvParser);
void CsvParser_destroy_row(CsvRow *csvRow);
CsvRow *CsvParser_getHeader(CsvParser *csvParser);
CsvRow *CsvParser_getRow(CsvParser *csvParser);
int CsvParser_getNumFields(CsvRow *csvRow);
char **CsvParser_getFields(CsvRow *csvRow);
const char* CsvParser_getErrorMessage(CsvParser *csvParser);
int CsvParser_getNumOfFields(const CsvRow *csvRow);
// Private
CsvRow *_CsvParser_getRow(CsvParser *csvParser);
int _CsvParser_delimiterIsAccepted(const char *delimiter);
void _CsvParser_setErrorMessage(CsvParser *csvParser, const char *errorMessage);
#endif
实施 - csvparser.c :
#include <stdlib.h>
#include <string.h>
#include <stdio.h>
#include <errno.h>
#include "csvparser.h"
CsvParser *CsvParser_new(const char *filePath, const char *delimiter, int firstLineIsHeader) {
CsvParser *csvParser = malloc(sizeof(CsvParser));
if (filePath == NULL) {
csvParser->filePath_ = NULL;
} else {
int filePathLen = strlen(filePath);
csvParser->filePath_ = malloc((filePathLen + 1));
strcpy(csvParser->filePath_, filePath);
}
csvParser->firstLineIsHeader_ = firstLineIsHeader;
csvParser->errMsg_ = NULL;
if (delimiter == NULL) {
csvParser->delimiter_ = ',';
} else if (_CsvParser_delimiterIsAccepted(delimiter)) {
csvParser->delimiter_ = *delimiter;
} else {
csvParser->delimiter_ = '\0';
}
csvParser->header_ = NULL;
csvParser->fileHandler_ = NULL;
return csvParser;
}
void CsvParser_destroy(CsvParser *csvParser) {
if (csvParser == NULL) {
return;
}
if (csvParser->filePath_ != NULL) {
free(csvParser->filePath_);
}
if (csvParser->errMsg_ != NULL) {
free(csvParser->errMsg_);
}
if (csvParser->fileHandler_ != NULL) {
fclose(csvParser->fileHandler_);
}
if (csvParser->header_ != NULL) {
CsvParser_destroy_row(csvParser->header_);
}
free(csvParser);
}
void CsvParser_destroy_row(CsvRow *csvRow) {
int i;
for (i = 0 ; i < csvRow->numOfFields_ ; i++) {
free(csvRow->fields_[i]);
}
free(csvRow);
}
CsvRow *CsvParser_getHeader(CsvParser *csvParser) {
if (! csvParser->firstLineIsHeader_) {
_CsvParser_setErrorMessage(csvParser, "Cannot supply header, as current CsvParser object does not support header");
return NULL;
}
if (csvParser->header_ == NULL) {
csvParser->header_ = _CsvParser_getRow(csvParser);
}
return csvParser->header_;
}
CsvRow *CsvParser_getRow(CsvParser *csvParser) {
if (csvParser->firstLineIsHeader_ && csvParser->header_ == NULL) {
csvParser->header_ = _CsvParser_getRow(csvParser);
}
return _CsvParser_getRow(csvParser);
}
int CsvParser_getNumFields(CsvRow *csvRow) {
return csvRow->numOfFields_;
}
char **CsvParser_getFields(CsvRow *csvRow) {
return csvRow->fields_;
}
CsvRow *_CsvParser_getRow(CsvParser *csvParser) {
int accceptedFields = 64;
int acceptedCharsInField = 64;
if (csvParser->filePath_ == NULL) {
_CsvParser_setErrorMessage(csvParser, "Supplied CSV file path is NULL");
return NULL;
}
if (csvParser->delimiter_ == '\0') {
_CsvParser_setErrorMessage(csvParser, "Supplied delimiter is not supported");
return NULL;
}
if (csvParser->fileHandler_ == NULL) {
csvParser->fileHandler_ = fopen(csvParser->filePath_, "r");
if (csvParser->fileHandler_ == NULL) {
int errorNum = errno;
const char *errStr = strerror(errorNum);
char *errMsg = malloc(1024 + strlen(errStr));
strcpy(errMsg, "");
sprintf(errMsg, "Error opening CSV file for reading: %s : %s", csvParser->filePath_, errStr);
_CsvParser_setErrorMessage(csvParser, errMsg);
free(errMsg);
return NULL;
}
}
CsvRow *csvRow = malloc(sizeof(CsvRow));
csvRow->fields_ = malloc(accceptedFields * sizeof(char*));
csvRow->numOfFields_ = 0;
int fieldIter = 0;
char *currField = malloc(acceptedCharsInField);
int inside_complex_field = 0;
int currFieldCharIter = 0;
int seriesOfQuotesLength = 0;
int lastCharIsQuote = 0;
int isEndOfFile = 0;
while (1) {
char currChar = fgetc(csvParser->fileHandler_);
if (feof(csvParser->fileHandler_)) {
if (currFieldCharIter == 0 && fieldIter == 0) {
_CsvParser_setErrorMessage(csvParser, "Reached EOF");
return NULL;
}
currChar = '\n';
isEndOfFile = 1;
}
if (currChar == '\r') {
continue;
}
if (currFieldCharIter == 0 && ! lastCharIsQuote) {
if (currChar == '\"') {
inside_complex_field = 1;
lastCharIsQuote = 1;
continue;
}
} else if (currChar == '\"') {
seriesOfQuotesLength++;
inside_complex_field = (seriesOfQuotesLength % 2 == 0);
if (inside_complex_field) {
currFieldCharIter--;
}
} else {
seriesOfQuotesLength = 0;
}
if (isEndOfFile || ((currChar == csvParser->delimiter_ || currChar == '\n') && ! inside_complex_field)) {
currField[lastCharIsQuote ? currFieldCharIter - 1 : currFieldCharIter] = '\0';
csvRow->fields_[fieldIter] = malloc(currFieldCharIter + 1);
strcpy(csvRow->fields_[fieldIter], currField);
free(currField);
csvRow->numOfFields_++;
if (currChar == '\n') {
return csvRow;
}
acceptedCharsInField = 64;
currField = malloc(acceptedCharsInField);
currFieldCharIter = 0;
fieldIter++;
inside_complex_field = 0;
} else {
currField[currFieldCharIter] = currChar;
currFieldCharIter++;
if (currFieldCharIter == acceptedCharsInField - 1) {
acceptedCharsInField *= 2;
currField = realloc(currField, acceptedCharsInField);
}
}
lastCharIsQuote = (currChar == '\"') ? 1 : 0;
}
}
int CsvParser_getNumOfFields(const CsvRow *csvRow) {
return csvRow->numOfFields_;
}
int _CsvParser_delimiterIsAccepted(const char *delimiter) {
char actualDelimiter = *delimiter;
if (actualDelimiter == '\n' || actualDelimiter == '\r' || actualDelimiter == '\0' ||
actualDelimiter == '\"') {
return 0;
}
return 1;
}
void _CsvParser_setErrorMessage(CsvParser *csvParser, const char *errorMessage) {
if (csvParser->errMsg_ != NULL) {
free(csvParser->errMsg_);
}
int errMsgLen = strlen(errorMessage);
csvParser->errMsg_ = malloc(errMsgLen + 1);
strcpy(csvParser->errMsg_, errorMessage);
}
const char *CsvParser_getErrorMessage(CsvParser *csvParser) {
return csvParser->errMsg_;
}