我正在用C编写一个小程序,以检查HTML文件是否有正确的打开和关闭标签? 但我有一些问题...... 我有一个文件包含所有可能的标签,名为tags.txt(这些只是第一个):
<a>
</a>
<abbr>
</abbr>
<area>
</area>
<aside>
</aside>
我有htmlfile.html,我必须检查:
<!--#echo var="date" -->
<area>
</area>
<area>
</area>
其次,我想将这样的注释替换为sysdate 比如,格式是可以的,我可以做到,但编程放入文件 这个
我的代码:
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <time.h>
#define MAX_SIZE 512
void menu();
void check();
void datumos();
int main(int argc,char *argv[])
{
menu();
return 0;
}
void menu()
{
char menu[MAX_SIZE];
while(1 < 2)
{
printf("\npress a button:\n\n");
printf("\tFile HTML check..............:c\n");
printf("\t<!--#echo var="date" -->...........:d\n");
printf("\tExit:\tCTRL + C\n");
scanf("%s",menu);
if( strcmp(menu,"c") == 0 )
{
check();
}
else if( strcmp(menu,"d") == 0 )
{
datumos();
}
}
}
void check()
{
FILE *htmlfile;
FILE *checkfile;
htmlfile = fopen("htmlfile.html","w");
checkfile = fopen("tags.txt","r");
char line[MAX_SIZE];
char htmlline[MAX_SIZE];
char tags[189][30];
int i=0;
printf("\tcheck__1\n");
while(fgets(line,sizeof(line),checkfile) != NULL)
{
int j;
for(j=0; j<sizeof(line); ++j)
{
tags[i][j]=line[j];
}
++i;
}
printf("\tcheck__2\n");
int k=0; char htmlfiletags[MAX_SIZE][30];
while(fgets(htmlline,sizeof(htmlline),htmlfile) != NULL)
{
char currentline[sizeof(htmlline)];
int j=0;
if( currentline[j]=="<" )
{
while(currentline[j]!=">")
{
htmlfiletags[k][j]=currentline[j];
++j;
}
strcat(htmlfiletags[k][j+1],">");
++k;
}
}
printf("\tcheck__3\n");
int n;
for(n=0; n<sizeof(htmlfiletags); ++n)
{
int j; int howmanytimesnot=0;
for(j=0; j<sizeof(tags); ++j)
{
printf("\tcheck__3/1\n");
if(strcmp(htmlfiletags[n],tags[j])==0)
{
printf("\t%d\n", howmanytimesnot);
++howmanytimesnot;
}
}
printf("\tcheck__3/3\n");
if(!(howmanytimesnot<sizeof(tags)))
{
printf("\tcheck__3/4\n");
printf("the file is not wellformed");
exit (1);
}
}
printf("\tcheck__4\n");
}
void copy_file(const char *from,const char *to)
{
FILE *fr;
FILE *t;
fr = fopen(from,"r");
t = fopen(to,"w");
char line[MAX_SIZE];
char row[MAX_SIZE];
while(fgets(line,sizeof(line),fr) != NULL)
{
sscanf(line,"%s",row);
fprintf(t,"%s\n",row);
}
fclose(fr);
fclose(t);
remove("tempfile.html");
}
void datumos()
{
time_t now = time(NULL);
struct tm *t = localtime(&now);
char date_time[30];
strftime( date_time, sizeof(date_time), "%x_%X", t );
FILE *htmlfile;
FILE *tempfile;
htmlfile = fopen("htmlfile.html","r");
tempfile = fopen("tempfile.html","w");
char line[MAX_SIZE];
//char datecomment[]="<!--#echo var=date -->";
while(fgets(line,sizeof(line),htmlfile) != NULL)
{
if( strcmp(line,"<!--#echo var="date" -->") == 0 )
{
char row[40];
strcpy(row,"<!--");
strcat(row, date_time);
strcat(row,"-->");
printf("%s",row);
fputs(row,tempfile);
}
else
{
fputs(line,tempfile);
}
}
fclose(htmlfile);
fclose(tempfile);
copy_file("tempfile.html","htmlfile.html");
}
它死在这里,在内部for循环中,if在第200次检查......我不知道为什么......
int n;
for(n=0; n<sizeof(htmlfiletags); ++n)
{
int j; int howmanytimesnot=0;
for(j=0; j<sizeof(tags); ++j)
{
printf("\tcheck__3/1\n");
if(strcmp(htmlfiletags[n],tags[j])==0)
{
printf("\t%d\n", howmanytimesnot);
++howmanytimesnot;
}
}
printf("\tcheck__3/3\n");
if(!(howmanytimesnot<sizeof(tags)))
{
printf("\tcheck__3/4\n");
printf("the file is not wellformed");
exit (1);
}
}
感谢所有回复!! ģ
答案 0 :(得分:1)
您的代码非常复杂,它有几个问题。
这是一个:
for(j=0; j<sizeof(tags); ++j)
这不会做我认为你期望的事情; sizeof(tags)
不是tags
的数组长度(声明为char tags[189][30];
),而是变量的总大小。因此,这个循环将从0到189 * 30 - 1,即5669,因此索引输出超出数组的末尾。
此外,在这里以任何方式使用sizeof
的想法是错误的,因为tags
的内容来自文件,因此编译器无法知道。请记住,对于像这样的表达式,在编译时评估sizeof
。
您需要为从标记文件解析的每一行增加一个变量(例如size_t num_tags
),并且稍后用于迭代tags
。
答案 1 :(得分:0)
Do not use regex或某种字符串解析,用于解析HTML。而是在网络or this site上搜索c库来解析html。然后检查解析的HTML文件中的标签。这样可以简化开发过程,因为您不必自己解析文件。
答案 2 :(得分:0)
代码现在是:
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <time.h>
#define MAX_SIZE 512
void menu();
void check();
void datumos();
int main(int argc,char *argv[])
{
menu();
return 0;
}
void menu()
{
char menu[MAX_SIZE];
while(1 < 2)
{
printf("\npress a button:\n\n");
printf("\tFile HTML check..............:c\n");
printf("\t<!--#echo var="date" -->...........:d\n");
printf("\tExit:\tCTRL + C\n");
scanf("%s",menu);
if( strcmp(menu,"c") == 0 )
{
check();
}
else if( strcmp(menu,"d") == 0 )
{
datumos();
}
}
}
void check()
{
FILE *htmlfile;
FILE *checkfile;
htmlfile = fopen("htmlfile.html","r");
checkfile = fopen("tags.txt","r");
char line[MAX_SIZE];
char htmlline[MAX_SIZE];
int i2=0;
printf("\tcheck__1\n");
while(fgets(line,sizeof(line),checkfile) != NULL)
{
++i2;
}
char tags[i2][20];
int i=0;
printf("\tcheck__11\n");
while(fgets(line,sizeof(line),checkfile) != NULL)
{
int j;
for(j=0; j<sizeof(line); ++j)
{
tags[i][j]=line[j];
}
++i;
}
printf("\tcheck__2\n");
int k=0; char htmlfiletags[MAX_SIZE][30];
while(fgets(htmlline,sizeof(htmlline),htmlfile) != NULL)
{
char currentline[sizeof(htmlline)];
int j=0;
if( currentline[j]=="<" )
{
while(currentline[j]!=">")
{
htmlfiletags[k][j]=currentline[j];
++j;
}
strcat(htmlfiletags[k][j+1],">");
++k;
}
}
printf("\tcheck__3\n");
int n;
for(n=0; n<sizeof(htmlfiletags); ++n)
{
int j; int howmanytimesnot=0;
for(j=0; j<sizeof(tags); ++j)
{
//printf("\tcheck__3/1\n");
if(strcmp(htmlfiletags[n],tags[j])==0)
{
// printf("\t%d\n", howmanytimesnot);
++howmanytimesnot;
}
}
printf("\tcheck__3/3\n");
if(!(howmanytimesnot<sizeof(tags)))
{
printf("\tcheck__3/4\n");
printf("the file is not wellformed");
exit (1);
}
}
printf("\tcheck__4\n");
}
void copy_file(const char *from,const char *to)
{
FILE *fr;
FILE *t;
fr = fopen(from,"r");
t = fopen(to,"w");
char line[MAX_SIZE];
char row[MAX_SIZE];
while(fgets(line,sizeof(line),fr) != NULL)
{
sscanf(line,"%s",row);
fprintf(t,"%s\n",row);
}
fclose(fr);
fclose(t);
remove("tempfile.html");
}
void datumos()
{
time_t now = time(NULL);
struct tm *t = localtime(&now);
char date_time[30];
strftime( date_time, sizeof(date_time), "%x_%X", t );
FILE *htmlfile;
FILE *tempfile;
htmlfile = fopen("htmlfile.html","r");
tempfile = fopen("tempfile.html","w");
char line[MAX_SIZE];
char* datecomment="<!--#echo var=\"date\" -->";
while(fgets(line,sizeof(line),htmlfile) != NULL)
{
int i3; int db=0;
for(i3=0; i3<strlen(datecomment); ++i3)
{
if(line[i3]==datecomment[i3])
{
++db;
}
}
if(db==strlen(datecomment))
{
char row[30];
strcpy(row,"<!--");
strcat(row, date_time);
strcat(row,"-->\n");
fputs(row,tempfile);
}
else
{
fputs(line,tempfile);
}
}
fclose(htmlfile);
fclose(tempfile);
copy_file("tempfile.html","htmlfile.html");
}
答案 3 :(得分:0)
当前行没有必要,我也修了比较
while(fgets(htmlline,sizeof(htmlline),htmlfile) != NULL)
{
int j=0;
if( htmlline[j]=='<' )
{
while(htmlline[j]!='>')
{
htmlfiletags[k][j]=htmlline[j];
++j;
}
strcat(htmlfiletags[k][j+1],">");
++k;
}
}
- 此外,另一个问题是只更换合适的评论,并且不会伤害不同的评论仍然无法正常工作
“所以它取代了
<!--#echo var="date" --> to the sysdate, it's ok, but when there are different comments like
<!--#include something -->, it wont be copied back well, in the htmlfile will be only <!--#include"
想法?