<School>
</SchoolName>latha2 //skip, but keep
</School>
<Student>
<Team>power //skip,but keep
<StudentNo>1 //skip
<Sport>
<StartDate>16122016</StartDate> //*skip(May or maynot contained)
<SportType>All
<ExpiryDate>16122020</EndDate> //*skip (May or maynot contained)
</Sport>
<Personal>
<phone>50855466 //skip,but keep
<rollno>6 //skip,but keep
</Personal>
<hobby> //skip
</Student>
注意:有4个<Student>
标记。
&#34;场景&#34; - 有4个&#34;学生&#34;一所学校的标签。在每个标签中,&#34; Team&#34;重复。
&#34;有限制的问题&#34;
要做的两件事。 1.匹配文件格式2.具有特定值的新文本
&#34;新文本示例&#34;
latha2 // SchoolName
power // Team
5035546 // phone - student1
6 // rollno - student1
5089973 // phone - student2
5 // rollno - student2
5402734 // phone - student3
1 // rollno - student3
8540345 // phone - student4
2 // rollno - student4
答案 0 :(得分:0)
与之前的问题相比,这个问题不是将差异文件与某些例外情况进行比较。这里有一个format
文件,提供有效的标签和订单,然后是data
文件,其中包含带有数据的标签。因此,不是比较差异,而是阅读第一个获取预期/有效标签,然后读取/处理第二个以获取所需信息。
下面,我还要检查文件中的标签是否以正确的顺序显示。如果您不需要,可以放宽该限制。另一点逻辑跳过的行少于3
个字符(有效标记至少 3
(例如<t>
)。
输出的格式非常简单,您可以根据需要进行改进。我没有数据文件可供使用,因此我使用您提供的信息并通过在单独的文件中将您的文件复制3次以上来创建一个。查看代码。正如其他人所提到的,在C中解析XML虽然很好,但在实践中很少进行,因为其他工具提供了用于处理模式的现成工具。如果您有任何疑问,请告诉我。这将为您提供一种处理此类信息的方法:
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#define MAXL 128
char *gettag (char *s, char *t);
int main (int argc, char **argv) {
if (argc < 3 ) {
fprintf (stderr, "error: insufficient input, usage: %s file1 file2\n",
argv[0]);
return 1;
}
char s1[MAXL] = {0}; /* line buffer */
char *tags[MAXL] = {0}; /* file 1 tags */
char *retr[] = { "<Team>", "<phone>", /* skip/print terms */
"<rollno>" };
char *retr1[] = { "</SchoolName>", /* skip/print once */
"<Team>" };
char *skip[] = { "<StudentNo>","<hobby>" }; /* skip terms */
char *opt[] = { "<StartDate>", /* optional tags */
"<ExpiryDate>"};
size_t retrsz = sizeof retr/sizeof *retr; /* elements in retr */
size_t retr1sz = sizeof retr1/sizeof *retr1;/* elements in retr1*/
size_t skipsz = sizeof skip/sizeof *skip; /* elements in skip */
size_t optsz = sizeof opt/sizeof *opt; /* elements in opt */
size_t tidx = 0; /* tags indes */
size_t idx = 0; /* general index */
size_t i = 0; /* general variable */
FILE *f1, *f2; /* file pointers */
unsigned char retvd[retr1sz]; /* retr1 flag */
unsigned char tagok = 0; /* tag OK flag */
/* initialize retr1 VLA values */
for (i = 0; i < retr1sz; i++)
retvd[i] = 0;
/* open both files or exit */
if (!((f1 = fopen (argv[1], "r")) && (f2 = fopen (argv[2], "r")))) {
fprintf (stderr, "error: file open failure.\n");
return 1;
}
/* read lines from format file1, create tags array */
while (fgets (s1, MAXL, f1))
{
size_t len = strlen (s1);
while (len && (s1[len-1] == '\n' || s1[len-1] == '\r'))
s1[--len] = 0; /* strip newline or carriage return */
if (len < 3) /* skip blank, 3 char for valid tag */
continue;
char *tmp = NULL;
if ((tmp = gettag (s1, NULL)) == NULL) {
fprintf (stderr, "error: tag not found in '%s'", s1);
return 1;
}
tags[tidx++] = tmp;
}
fclose (f1); /* close file1 */
/* read each line in file2 */
while (fgets (s1, MAXL, f2))
{
char tag[MAXL] = {0};
size_t len = strlen (s1);
while (len && (s1[len-1] == '\n' || s1[len-1] == '\r'))
s1[--len] = 0; /* strip newline or carriage return */
if (len < 3) /* skip blank or lines < 3 chars */
goto skipping;
gettag (s1, tag);
/* verify that current tag is a valid tag from format file */
if (strncmp (tag, tags[idx], strlen (tags[idx])) != 0) {
tagok = 0;
for (i = 0; i < tidx; i++) {
if (strncmp (tag, tags[i], strlen (tags[i])) == 0) {
tagok = 1;
break;
}
}
if (!tagok) {
fprintf (stderr, "warning: invalid tag '%s', skipping.\n", tag);
goto skipping; /* or handle as desired (e.g. exit) */
}
}
/* check if tag is retr1 and not retvd, if so skip/print */
for (i = 0; i < retr1sz; i++)
if (strncmp (tag, retr1[i], strlen (retr1[i])) == 0) {
if (!retvd[i]) { /* print line skipped */
char *p = strchr (s1, '>'); /* print data */
printf ("%s\n", (p + 1));
retvd[i] = 1; /* set flag to skip next */
}
goto incriment; /* yes -- it lives.... */
}
/* check if tag is a known retr tag, if so skip/print */
for (i = 0; i < retrsz; i++) /* skip if matches skip[i] */
if (strncmp (tag, retr[i], strlen (retr[i])) == 0) {
char *p = strchr (s1, '>');
printf ("%s\n", (p + 1)); /* print data */
goto incriment;
}
/* check if tag is a known skip tag, if so skip/print */
for (i = 0; i < skipsz; i++) /* skip if matches skip[i] */
if (strncmp (tag, skip[i], strlen (skip[i])) == 0)
goto incriment;
/* check if tag matches optional tag, if so skip */
for (i = 0; i < optsz; i++) {
if (strncmp (tag, opt[i], strlen (opt[i]) == 0))
goto incriment;
}
incriment:;
idx++; /* increment index */
if (idx == tidx) /* reset if tagsz */
idx = 0;
skipping:;
}
fclose (f2); /* xlose file2 */
for (i = 0; i < tidx; i++) /* free tags memory */
free (tags[i]);
return 0;
}
/* extract <tag> from s.
* if 't' is NULL, memory is allocated sufficient to hold <tag> + 1
* characters, else <tag> is copied to 't' without allocations.
* On success, the address of 't' is returned, NULL otherwise
*/
char *gettag (char *s, char *t)
{
if (!s) return NULL; /* test valid string */
char *p = strchr (s, '>'); /* find first '>' in s */
if (!p) return NULL; /* if no '>', return NULL */
size_t len = strlen (s);
unsigned char nt = 0;
int tmpc = 0;
if (len > (size_t)(p - s) + 1) {/* if chars after '>' */
tmpc = *(p + 1); /* save char before term */
*(p + 1) = 0; /* null-terminate at '>' */
nt = 1; /* set null-terminated flag */
}
char *sp = s;
while (sp < p && *sp != '<') /* trim space before '<' */
sp++;
if (!t)
t = strdup (sp); /* allocate/copy to t */
else
strncpy (t, sp, len + 1); /* copy w/terminator */
if (nt) /* if null-terminated */
*(p + 1) = tmpc; /* restore '>' character */
return t;
}
File1 - 格式(列表标签)
$ cat dat/student_format.txt
<School>
</SchoolName>latha2 //skip, but keep
</School>
<Student>
<Team>power //skip,but keep
<StudentNo>1 //skip
<Sport>
<StartDate>16122016</StartDate> //*skip(May or maynot contained)
<SportType>All
<ExpiryDate>16122020</EndDate> //*skip (May or maynot contained)
</Sport>
<Personal>
<phone>50855466 //skip,but keep
<rollno>6 //skip,but keep
</Personal>
<hobby> //skip
</Student>
File1 - 数据文件(与上述相同3次)
$ cat dat/student_file.txt
<School>
</SchoolName>latha2
</School>
<Student>
<Team>power
<StudentNo>1
<Sport>
<StartDate>16122016</StartDate>
<SportType>All
<ExpiryDate>16122020</EndDate>
</Sport>
<Personal>
<phone>50855466
<rollno>6
</Personal>
<hobby>
</Student>
<School>
</SchoolName>latha2
</School>
<Student>
<Team>power
<StudentNo>1
<Sport>
<StartDate>16122016</StartDate>
<SportType>All
<ExpiryDate>16122020</EndDate>
</Sport>
<Personal>
<phone>50855466
<rollno>6
</Personal>
<hobby>
</Student>
<School>
</SchoolName>latha2
</School>
<Student>
<Team>power
<StudentNo>1
<Sport>
<StartDate>16122016</StartDate>
<SportType>All
<ExpiryDate>16122020</EndDate>
</Sport>
<Personal>
<phone>50855466
<rollno>6
</Personal>
<hobby>
</Student>
示例输出
$ ./bin/cmpf1f2_2 dat/student_format.txt dat/student_file.txt
latha2
power
50855466
6
50855466
6
50855466
6