我想使用C。
在文本文件中打印标签之间的数据输入声明:
<PERSON> Mark Zuckerberg </PERSON> is a entrepreneur from <LOCATION> USA </LOCATION>. He is also the CEO of <ORGANIZATION> Facebook </ORGANIZATION>.
输出:Mark Zuckerberg USA Facebook。
我的程序代码是:
const char* getfield(char* line, int num)
{
const char* tok;
for (tok = strtok(line, "/>");
tok && *tok;
tok = strtok(NULL, ">"))
{
if (!--num)
return tok;
}
return NULL;
}
int main()
{
char line[500000];
while (fgets(line, 500000, stdin))
{
char *arg = line;
const char *tok;
while ((tok = getfield(arg, 2)) != NULL) {
printf("%s\n", tok);
arg = NULL;
}
}
}
我的输出是:
Mark Zuckerberg </PERSON
USA </LOCATION
Facebook </ORGANIZATION
我想摆脱</Tag
,只获得Mark Zuckerberg USA Facebook作为输出。我在哪里需要更改代码?
答案 0 :(得分:1)
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
char *getfield(char **sp){
char *left; //point to <
char *right;//point to >
if((left = strchr(*sp, '<')) == NULL)
return NULL;
if((right = strchr(left, '>')) == NULL)
return NULL;
size_t len = right - left;//if len == 1, tag is nothing(<>)
char *tag = malloc(len);
memcpy(tag, left + 1, len -1);
tag[len-1] = '\0';
char *etag = malloc(len + 3);
sprintf(etag, "</%s>", tag);
left = right + 1;
if((right = strstr(left, etag)) == NULL)//right point to end tag
{
free(tag);
free(etag);
return NULL;
}
len = right - left;
char *text = malloc(len + 1);
memcpy(text, left, len);
text[len] = '\0';
*sp = right + strlen(etag);
free(tag);
free(etag);
return text;
}
int main(void){
char line[500000];
while (fgets(line, sizeof line, stdin)){
char *arg = line;
char *text;
while ((text = getfield(&arg)) != NULL){
printf("%s\n", text);
free(text);
}
}
return 0;
}