首先验证文件格式,然后在C

时间:2015-07-01 07:48:45

标签: c

<School>
      </SchoolName>latha2  //skip, but keep
</School>

<Student>
   <Team>power  //skip,but keep
   <StudentNo>1 //skip
       <Sport>
            <StartDate>16122016</StartDate> //*skip(May or maynot contained)
            <SportType>All 
            <ExpiryDate>16122020</EndDate> //*skip (May or maynot contained)
       </Sport>

 <Personal> 
   <phone>50855466 //skip,but keep
   <rollno>6 //skip,but keep
 </Personal>
 <hobby>  //skip
</Student>

注意:有4个<Student>标记。

  • 假设File1已修复且File2为输入文件。 在文件1中,一所学校有4名学生。在文件2中,有许多学校,但必须根据它拥有的学校数量反复检查File-1格式。以上是File1的一个示例。

&#34;场景&#34; - 有4个&#34;学生&#34;一所学校的标签。在每个标签中,&#34; Team&#34;重复。

&#34;有限制的问题&#34;

  1. 来自文件1,&#34; Sport&#34;标记,&#34; StartDate&#34;和&#34; ExpiryDate&#34;已定义,但它们可能不包含在每个&#34;学校&#34;来自File2。
    • 如果已定义,如何验证它们应该在正确的行?
    • 如果在File2的某些学校中没有定义格式,如何验证格式是否合适?
  2. 比较2个文件时会跳过某些行,但是需要从File2收集一些行来写入新的txt,即使它们被跳过也是如此。来自File2,&#34; SchoolName&#34;,&#34; Team&#34;,&#34; phone&#34;并且&#34;滚动否#34;检索并逐行写入txt。 ****重要,检索&#34;团队&#34;一次来自一所学校&#34;。因为它在四个&#34;学生&#34;中重复4次。来自同一个&#34;学校&#34;。
    • 如何在跳过的行中仅检索SchoolName,Team,Phone,RollNo?
    • 如果在编写新文本文件时如何只检索团队,即使它在一所学校的学生中重复了?
  3. 要做的两件事。 1.匹配文件格式2.具有特定值的新文本

    &#34;新文本示例&#34;

     latha2   // SchoolName    
     power    // Team
     5035546  // phone  - student1
     6        // rollno - student1
     5089973  // phone  - student2
     5        // rollno - student2
     5402734  // phone  - student3
     1        // rollno - student3
     8540345  // phone  - student4
     2        // rollno - student4
    

1 个答案:

答案 0 :(得分:0)

与之前的问题相比,这个问题不是将差异文件与某些例外情况进行比较。这里有一个format文件,提供有效的标签和订单,然后是data文件,其中包含带有数据的标签。因此,不是比较差异,而是阅读第一个获取预期/有效标签,然后读取/处理第二个以获取所需信息。

下面,我还要检查文件中的标签是否以正确的顺序显示。如果您不需要,可以放宽该限制。另一点逻辑跳过的行少于3个字符(有效标记至少 3(例如<t>)。

输出的格式非常简单,您可以根据需要进行改进。我没有数据文件可供使用,因此我使用您提供的信息并通过在单独的文件中将您的文件复制3次以上来创建一个。查看代码。正如其他人所提到的,在C中解析XML虽然很好,但在实践中很少进行,因为其他工具提供了用于处理模式的现成工具。如果您有任何疑问,请告诉我。这将为您提供一种处理此类信息的方法:

#include <stdio.h>
#include <stdlib.h>
#include <string.h>

#define MAXL 128

char *gettag (char *s, char *t);

int main (int argc, char **argv) {

    if (argc < 3 ) {
        fprintf (stderr, "error: insufficient input, usage: %s file1 file2\n",
                argv[0]);
        return 1;
    }

    char s1[MAXL] = {0};                        /* line buffer      */
    char *tags[MAXL] = {0};                     /* file 1 tags      */
    char *retr[] = { "<Team>", "<phone>",       /* skip/print terms */
                    "<rollno>" };
    char *retr1[] = { "</SchoolName>",          /* skip/print once  */
                    "<Team>" };
    char *skip[] = { "<StudentNo>","<hobby>" }; /* skip terms       */
    char *opt[]  = { "<StartDate>",             /* optional tags    */
                    "<ExpiryDate>"};
    size_t retrsz = sizeof retr/sizeof *retr;   /* elements in retr */
    size_t retr1sz = sizeof retr1/sizeof *retr1;/* elements in retr1*/
    size_t skipsz = sizeof skip/sizeof *skip;   /* elements in skip */
    size_t optsz = sizeof opt/sizeof *opt;      /* elements in opt  */
    size_t tidx = 0;                            /* tags indes       */
    size_t idx = 0;                             /* general index    */
    size_t i = 0;                               /* general variable */
    FILE *f1, *f2;                              /* file pointers    */
    unsigned char retvd[retr1sz];               /* retr1 flag       */
    unsigned char tagok = 0;                    /* tag OK flag      */

    /* initialize retr1 VLA values */
    for (i = 0; i < retr1sz; i++)
        retvd[i] = 0;

    /* open both files or exit */
    if (!((f1 = fopen (argv[1], "r")) && (f2 = fopen (argv[2], "r")))) {
        fprintf (stderr, "error: file open failure.\n");
        return 1;
    }

    /* read lines from format file1, create tags array */
    while (fgets (s1, MAXL, f1)) 
    {
        size_t len = strlen (s1);
        while (len && (s1[len-1] == '\n' || s1[len-1] == '\r'))
            s1[--len] = 0;      /* strip newline or carriage return */

        if (len < 3)            /* skip blank, 3 char for valid tag */
            continue;

        char *tmp = NULL;
        if ((tmp = gettag (s1, NULL)) == NULL) {
            fprintf (stderr, "error: tag not found in '%s'", s1);
            return 1;
        }
        tags[tidx++] = tmp;
    }

    fclose (f1);    /* close file1 */

    /* read each line in file2 */
    while (fgets (s1, MAXL, f2))
    {
        char tag[MAXL] = {0};
        size_t len = strlen (s1);

        while (len && (s1[len-1] == '\n' || s1[len-1] == '\r'))
            s1[--len] = 0;      /* strip newline or carriage return */

        if (len < 3)            /* skip blank or lines < 3 chars    */
            goto skipping;

        gettag (s1, tag);
        /* verify that current tag is a valid tag from format file  */
        if (strncmp (tag, tags[idx], strlen (tags[idx])) != 0) {
            tagok = 0;
            for (i = 0; i < tidx; i++) {
                if (strncmp (tag, tags[i], strlen (tags[i])) == 0) {
                    tagok = 1;
                    break;
                }
            }
            if (!tagok) {
                fprintf (stderr, "warning: invalid tag '%s', skipping.\n", tag);
                goto skipping;  /* or handle as desired (e.g. exit) */
            }
        }

        /* check if tag is retr1 and not retvd, if so skip/print    */
        for (i = 0; i < retr1sz; i++)
            if (strncmp (tag, retr1[i], strlen (retr1[i])) == 0) {
                if (!retvd[i]) {        /* print line  skipped      */
                    char *p = strchr (s1, '>'); /* print data       */
                    printf ("%s\n", (p + 1));
                    retvd[i] = 1;       /* set flag to skip next    */
                }
                goto incriment;         /* yes -- it lives....      */
            }

        /* check if tag is a known retr tag, if so skip/print       */
        for (i = 0; i < retrsz; i++)    /* skip if matches skip[i]  */
            if (strncmp (tag, retr[i], strlen (retr[i])) == 0) {
                char *p = strchr (s1, '>');
                printf ("%s\n", (p + 1));      /* print data       */
                goto incriment;
            }

        /* check if tag is a known skip tag, if so skip/print       */
        for (i = 0; i < skipsz; i++)    /* skip if matches skip[i]  */
            if (strncmp (tag, skip[i], strlen (skip[i])) == 0)
                goto incriment;

        /* check if tag matches optional tag, if so skip */
        for (i = 0; i < optsz; i++) {
            if (strncmp (tag, opt[i], strlen (opt[i]) == 0))
                goto incriment;
        }

        incriment:;

        idx++;                  /* increment index  */
        if (idx == tidx)        /* reset if tagsz   */
            idx = 0;

        skipping:;
    }

    fclose (f2);                /* xlose file2      */

    for (i = 0; i < tidx; i++)  /* free tags memory */
        free (tags[i]);

    return 0;
}

/* extract <tag> from s.
* if 't' is NULL, memory is allocated sufficient to hold <tag> + 1
* characters, else <tag> is copied to 't' without allocations.
* On success, the address of 't' is returned, NULL otherwise 
*/
char *gettag (char *s, char *t)
{
    if (!s) return NULL;            /* test valid string        */

    char *p = strchr (s, '>');      /* find first '>' in s      */
    if (!p) return NULL;            /* if no '>', return NULL   */

    size_t len = strlen (s);
    unsigned char nt = 0;
    int tmpc = 0;

    if (len > (size_t)(p - s) + 1) {/* if chars after '>'       */
        tmpc = *(p + 1);            /* save char before term    */
        *(p + 1) = 0;               /* null-terminate at '>'    */
        nt = 1;                     /* set null-terminated flag */
    }

    char *sp = s;
    while (sp < p && *sp != '<')    /* trim space before '<'    */
        sp++;

    if (!t)
        t = strdup (sp);            /* allocate/copy to t       */
    else
        strncpy (t, sp, len + 1);   /* copy w/terminator        */

    if (nt)                         /* if null-terminated       */
        *(p + 1) = tmpc;            /* restore '>' character    */

    return t;
}

File1 - 格式(列表标签)

$ cat dat/student_format.txt
<School>
      </SchoolName>latha2  //skip, but keep
</School>
<Student>
   <Team>power  //skip,but keep
   <StudentNo>1 //skip
       <Sport>
            <StartDate>16122016</StartDate> //*skip(May or maynot contained)
            <SportType>All
            <ExpiryDate>16122020</EndDate> //*skip (May or maynot contained)
       </Sport>

 <Personal>
   <phone>50855466 //skip,but keep
   <rollno>6 //skip,but keep
 </Personal>
 <hobby>  //skip
</Student>

File1 - 数据文件(与上述相同3次)

$ cat dat/student_file.txt
<School>
      </SchoolName>latha2
</School>

<Student>
   <Team>power
   <StudentNo>1
       <Sport>
            <StartDate>16122016</StartDate>
            <SportType>All
            <ExpiryDate>16122020</EndDate>
       </Sport>

 <Personal>
   <phone>50855466
   <rollno>6
 </Personal>
 <hobby>
</Student>
<School>
      </SchoolName>latha2
</School>

<Student>
   <Team>power
   <StudentNo>1
       <Sport>
            <StartDate>16122016</StartDate>
            <SportType>All
            <ExpiryDate>16122020</EndDate>
       </Sport>

 <Personal>
   <phone>50855466
   <rollno>6
 </Personal>
 <hobby>
</Student>
<School>
      </SchoolName>latha2
</School>

<Student>
   <Team>power
   <StudentNo>1
       <Sport>
            <StartDate>16122016</StartDate>
            <SportType>All
            <ExpiryDate>16122020</EndDate>
       </Sport>

 <Personal>
   <phone>50855466
   <rollno>6
 </Personal>
 <hobby>
</Student>

示例输出

$ ./bin/cmpf1f2_2 dat/student_format.txt dat/student_file.txt
latha2
power
50855466
6
50855466
6
50855466
6