我必须开发一个简单的解析器,来读取文本的“块” 例如:
/TEST
{. text .}
/TEST_DATA
{. infs .}
并且,我需要阅读标签内部的信息.... 和...具有此信息的文件...有很多标签,具有相同的信息
例如:
/TEST
{. text .}
/TEST_DATA
{. infs .}
/LBL1
{. text .}
/LBL1_DATA
{. infs .}
/LBL2
{. text .}
/LBL2_DATA
{. infs .}
/LBL3
{. text .}
/LBL3_DATA
{. infs .}
我需要读取特定标签的块,例如:
parseFile(“FileName.txt”,LBL1)
和函数,返回给我,块内的文本:LBL1和LBL1_DATA 或者,返回给我,LBL1和LBL1_DATA的内容
我不知道,我怎么能这样做:xxx 我需要帮助; x
感谢。
答案 0 :(得分:4)
假设一行中的数据,这是一个非常简单的代码示例。
你需要,显然测试它,测试它并测试它。看看你的行为,修复可能的错误和我忘记做的事情(这对你来说是件有用的),你的新实现也是如此。
int main(void)
{
const char *key = "TEST";
const char *filename = "file";
char *val = get(filename, key);
if(val) {
printf("%s\n", val); // {. text .}
free(val); // don't to forget!
} else {
printf("'%s' was not found.\n", key);
}
}
char*
get(const char *filename, const char *key) {
char *line = NULL, *pline = NULL, *buf = NULL, *pbuf, *tbuf;
size_t size = -1, ssearch = strlen(key), i = 0, bufsize = 256;
int open = 0;
FILE *fp = fopen(filename, "r");
if(fp == NULL) {
fprintf(stderr, "Cannot read '%s' file.\n", filename);
exit(EXIT_FAILURE);
}
while(getline(&line, &size, fp) != -1) {
if(open == 0 && *line == '/' &&
strncmp(line + 1, key, ssearch) == 0 && isspace(((unsigned char)*(line + ssearch + 1)))) {
open = 1;
continue;
}
if(open) {
pline = line;
while(isspace((unsigned char) *pline)) ++ pline; /* strip white-spaces [\r\n\t\v ] */
if(*pline == '{') {
if((buf = malloc(bufsize)) == NULL) {
fprintf(stderr, "NO MEMORY!");
exit(EXIT_FAILURE);
}
++pline; /* strip '{' */
pbuf = buf;
while(1) {
if(*pline == '\0') {
fprintf(stderr, "EOF but '{' was not closed.");
exit(EXIT_FAILURE);
}
/* etc.. */
if(*pline == '}') break;
if((i + size + 1) >= bufsize) {
if((tbuf = realloc(buf, bufsize + size + 1)) == NULL) {
if(buf) free(buf);
fprintf(stderr, "No MEMORY!\n");
exit(EXIT_FAILURE);
}
buf = tbuf;
}
*pbuf ++= *pline++,
i ++;
}
*pbuf ++= '\0';
if(pline != NULL)
free(line);
fclose(fp);
return buf;
} else {
fprintf(stderr, "expected '{' but '%c' was found.\n", *pline);
exit(EXIT_FAILURE);
}
}
line = NULL;
size = -1;
}
if(line != NULL)
free(line);
fclose(fp);
return NULL;
}
更新:编写了更简单的代码。
#define EXPECTEDSYMBOL(w, f) \
fprintf(stderr, "expected '%c' but '%c' was found.\n", w, f); \
exit(EXIT_FAILURE)
char* get2(const char *filename, const char *key) {
char *line = NULL, *buf = NULL, *pline;
size_t size = -1, ssearch = strlen(key);
int open = 0;
FILE *fp = fopen(filename, "r");
if(fp == NULL) {
fprintf(stderr, "Cannot read '%s' file.\n", filename);
exit(EXIT_FAILURE);
}
while(getline(&line, &size, fp) != -1) {
if(open == 0 && *line == '/' &&
strncmp(line + 1, key, ssearch) == 0 && isspace(((unsigned char)*(line + ssearch + 1)))) {
open = 1;
continue;
}
if(open) {
pline = line;
while(isspace((unsigned char) *pline)) ++ pline;
if(*pline != '{') {
EXPECTEDSYMBOL('{', *pline);
}
if(strchr(pline, '}') == NULL) {
EXPECTEDSYMBOL('}', *(pline + strlen(pline) - 1));
}
buf = pline;
break;
}
line = NULL;
}
fclose(fp);
return buf;
}
我希望这对你有所帮助。
编辑#2:我再次阅读您的问题并看到您还需要以下密钥。
试试这个:
void
get(const char *filename, const char *key, char buf[][512]) {
char *line = NULL;
size_t size = -1, ssearch = strlen(key);
int open = 0;
FILE *fp = fopen(filename, "r");
if(fp == NULL) {
fprintf(stderr, "Cannot read '%s' file.\n", filename);
exit(EXIT_FAILURE);
}
while(getline(&line, &size, fp) != -1) {
if(open == 0 && *line == '/' &&
strncmp(line + 1, key, ssearch) == 0 && isspace(((unsigned char)*(line + ssearch + 1)))) {
open = 1;
continue;
}
if(open == 1) {
strcpy(buf[0], line);
++ open;
continue;
}
if((open + 1) == 3) {
getline(&line, &size, fp);
strcpy(buf[1], line);
break;
}
}
fclose(fp);
}
然后:
const char *key = "TEST"; // /TEST
const char *filename = "config";
char buf[2][512] = { { 0 } };
get(filename, key, buf);
printf("%s\n", buf[0]); // {. text . }
printf("%s\n", buf[1]); // {. infs .}
答案 1 :(得分:1)
您主要有两种选择:
答案 2 :(得分:0)
我通常使用awk来处理文本。
$ awk '/^\/TEST$/, /}/{print $0}' test.dat | tail -1
{. text .}
告诉awk返回一系列行。返回的第一行将匹配此正则表达式^\/TEST$
;返回的最后一行将匹配此一行}
。并且tail -1
仅将最后一行传递给stdout。
如果“text”实际上可以是多行文本,那么我可能会输入awk而不是tail。
$ awk '/^\/TEST$/, /}/{print $0}' test.dat | awk 'NR!=1{print $0}'
{. text
text
text
.}
如果您愿意,可以将该单行重新打包为shell脚本。