#include<stdio.h>
#include<string.h>
#include<stdlib.h>
char* get_page(char *s)
{
FILE *fp = fopen(s,"r");
if(fp==NULL){
printf("file not found\n");
}else{
char c;
char *a;
int y,i;
a=(char *) malloc(sizeof(char)*100000);
for(i=0;(c=fgetc(fp))!=EOF;i++){
a[i]=c;
}
a[i+1]='\0';
fclose(fp);
return a;
}
}
char** get_all_links(char *b)
{
//printf("%s",b);
int u,i=0,j=0,t,z;
char **f;
f=(char **) malloc(sizeof(char *)*100);
u=strlen(b);
for(t=0;t<u;t++){
if(b[t]=='<' && (b[t+1]=='A' || b[t+1]=='a')){
f[j]=(char *) malloc(sizeof(char)*10000);
for(z=t+9;b[z]!='"';z++){
f[j][i++]=b[z];
}
i=0;
j++;
}
}
printf("%d\n",j);
if(j==0){
f[0]=NULL;
}else{
f[j+1]=NULL;
}
return f;
}
int main(){
char f[100],*p,*l,*w,**k,**links;
scanf("%s",f);
int loop_count=0,ii,prev_links=0,check=0,get_link=1,o,u;
k=NULL;
while(f!=NULL){
//printf("in while 1\n");
printf("%s\n",f);
p=get_page(f);
//printf("%s",p);
if(k!=NULL)
free(k);
k=get_all_links(p);
//printf("\n");
int count_for_links=0,x=0;
while(k[x]!=NULL){
//printf("in while\n");
count_for_links++;
x++;
}
printf("%d\n",count_for_links);
prev_links=prev_links+count_for_links;
if(loop_count==0){
//printf("in loop count ==0\n");
links=(char **) malloc(sizeof(char *)*(count_for_links+1));
links[0]=(char *) malloc(sizeof(char)*strlen(f));
strcpy(links[0],f);
//printf("%s\n",links[0]);
for(ii=0;ii<count_for_links;ii++){
//printf("in for ii<count_for_links\n");
links[ii+1]=(char *) malloc(sizeof(char)*strlen(k[ii]));
strcpy(links[ii+1],k[ii]);
printf("%s\n",links[ii+1]);
}
}else if(loop_count>0){
//printf("%s\n",k[0] );
for(o=0;(k[o]!=NULL && o<count_for_links);o++){
printf("%s",k[o]);
//printf("check extra %d\n",loop_count);
for(u=0;u<=ii;u++){
if(strcmp(k[o],links[u])==1){
check=1;
printf("the war");
}
}
if(check==0){
// printf("in link not found\n");
links =(char **) realloc(links,(sizeof(char *)));
links[ii+1]=(char *) malloc(sizeof(char)*strlen(k[o]));
ii++;
strcpy(links[ii],k[o]);
}
}
}
printf("OUT");
strcpy(f,links[get_link]);
get_link++;
loop_count++;
}
int q;
printf("helloooooo");
for(q=0;links[q]!=NULL;q++){
printf("%s",links[q]);
}
return 0;
}
/ *在这个代码中,k数组表示我从html获得的链接,并且第一次存储在链接数组中,并且从下次检查其他页面时,如果链接相同,则省略相同的链接并采取其他链接到数组* /
答案 0 :(得分:-1)
如果您的文件大于100.000,您将覆盖:
a = (char *)malloc(sizeof(char)* 100000); <--
for (i = 0; (c = fgetc(fp)) != EOF; i++){
a[i] = c;
}
你应该使用文件大小