/*********************************
***                            ***
***      HTMLgobble v2.3b      ***
***      ========== ====       ***
***                            ***
*** (c) 18.2.94 by Andreas Ley ***
***                            ***
*** (u) 30.5.94		       ***
***                            ***
*********************************/

/*
** Known bugs:
**	Doesn't like scheme:relative/path URLs
**	Chokes on servers like plexus where it can't get the header with the
**		first read()
*/

/*
** Thanks to joe@babel.ho.att.com (Joseph M. Orost) for patches!
** Thanks to dieroc@athena.CSELT.STET.IT (Diego Roccato) for bug reports!
*/

char header[]="HTMLgobble v2.3b\n(c) 1994 by Andreas Ley\n";
char identify[]="User-agent: HTMLgobble/2.3b\nFrom: ley@rz.uni-karlsruhe.de (Andreas Ley)\n";

#include <stdlib.h>
#include <unistd.h>
#include <stdio.h>
#include <fcntl.h>
#include <string.h>
#include <errno.h>
#include <sys/param.h>
#include <sys/stat.h>
#include "html.h"

#define NEEDSTRERROR
#ifdef NEEDSTRERROR
extern int	sys_nerr;
extern char	*sys_errlist[];
#define	strerror(errno)	((errno)>=sys_nerr?"Undefined error":sys_errlist[errno])
#endif /* NEEDSTRERROR */

#define	EXISTS		000
#define	ONSTACK		001
#define	ORIG		002
#define	OK		200
#define	FOUND		302
#define	FORBIDDEN	403
#define	NOTFOUND	404

enum	{ NONE, DOWN, UP, PORT, HOST, PROTOCOL } mode=NONE;
enum	{ DONE, DONT, LIST, TEXT, OPEN, CLOSE } nl;
int	last;

typedef struct entry {
	struct entry	*next;
	url		*link;
	char		*data;
	int		len;
	} entry;

int	LINK,A,HREF,IMG,SRC,PRE,P,LI,DD,DT,H6;
int	check=FALSE,resume=FALSE,retrieve=FALSE,verbose=0,sleeptime=60,nosleep=TRUE;
url	root,gateway;
entry	*fifo_in=NULL,*fifo_out,*failed,*failed_in=NULL,*processed,*processed_in=NULL;


/*
** Gives the memory used up by a fifo
*/
size_t size(fifo)
entry	*fifo;
{
	size_t	memory;

	memory=0;
	while (fifo) {
		memory+=sizeof(entry);
		memory+=sizeof(url);
		if (fifo->link->scheme);
			memory+=strlen(fifo->link->scheme);
		if (fifo->link->host)
			memory+=strlen(fifo->link->host);
		if (fifo->link->path)
			memory+=strlen(fifo->link->path);
		if (fifo->link->anchor)
			memory+=strlen(fifo->link->anchor);
		memory+=fifo->len;
		fifo=fifo->next;
	}
	return(memory);
}



/*
** Prints out a fifo
*/
void show(fifo)
entry	*fifo;
{
	char	tmp[MAXPATHLEN];

	while (fifo) {
		url2txt(tmp,fifo->link);
		if (fifo->len)
			fprintf(stderr,"FIFO: %s (%d bytes starting at 0x%08x)\n",tmp,fifo->len,fifo->data);
		else
			fprintf(stderr,"FIFO: %s\n",tmp);
		fifo=fifo->next;
	}
}



/*
** Removes the given fifo entry and returns the next.
*/
entry *next(fifo)
entry	*fifo;
{
	entry	*ptr;

	ptr=fifo->next;
	free(fifo->link->scheme);	/* from add */
	free(fifo->link->host);		/* from add */
	free(fifo->link->path);		/* from add */
	free(fifo->link->anchor);	/* from add */
	free(fifo->link);		/* from add */
	free(fifo->data);		/* from link2mem */
	free(fifo);			/* from add */
	return(ptr);
}



/*
** Adds a link to a fifo
*/
void add(in,out,link,data,len)
entry	**in;
entry	**out;
url	*link;
char	*data;
int	len;
{
	entry	*ptr;

	if (verbose>4)
		(void)fprintf(stderr,"Add to fifo: %s://%s:%d%s#%s (%d bytes starting at 0x%08x)\n",link->scheme,link->host,link->port,link->path,link->anchor,len,data);
	ptr=(entry *)malloc(sizeof(entry));	/* for free see add */
	ptr->next=NULL;
	ptr->link=(url *)malloc(sizeof(url));	/* for free see add */
	ptr->link->scheme=link->scheme?strdup(link->scheme):NULL;	/* for free see add */
	ptr->link->host=link->host?strdup(link->host):NULL;	/* for free see add */
	ptr->link->port=link->port;
	ptr->link->path=link->path?strdup(link->path):NULL;	/* for free see add */
	ptr->link->anchor=link->anchor?strdup(link->anchor):NULL;	/* Never really used */
	ptr->data=data;
	ptr->len=len;
	if (*in)
		(*in)->next=ptr;
	else
		*out=ptr;
	*in=ptr;
}



/*
** Moves the given fifo entry to another queue and returns the next.
*/
entry *move(src,in,out)
entry	*src;
entry	**in;
entry	**out;
{
	entry	*ptr;

	ptr=src->next;
	src->next=NULL;
	free(src->data);		/* from link2mem */
	src->data=NULL;
	src->len=0;
	if (*in)
		(*in)->next=src;
	else
		*out=src;
	*in=src;
	return(ptr);
}



entry *infifo(fifo,link)
entry	*fifo;
url	*link;
{
	for(;fifo;fifo=fifo->next)
		if(!strcmp(fifo->link->scheme,link->scheme)&&!strcmp(fifo->link->host,link->host)&&fifo->link->port==link->port&&!strcmp(fifo->link->path,link->path))
			break;
	return(fifo);
}



int link2mem(link,type,data,clen)
url	*link;
char	*type;
char	**data;
int	*clen;
{
	char	port[6],tmp[MAXPATHLEN],buffer[BUFSIZ*8],*ptr,*nptr,*dptr,*retchr;
	int	sock,len,retval,bufsiz;
	url	srv,tmpurl;

	url2fname(tmp,link);
	if (verbose>3)
		(void)fprintf(stderr,"Fname %s\n",tmp);
	if (!retrieve&&!access(tmp,F_OK))
		return(EXISTS);
	if (check)
		return(NOTFOUND);

	if (sleeptime&&!nosleep&&mode>NONE)
		sleep(sleeptime);
	nosleep=FALSE;

	if (verbose) {
		url2txt(tmp,link,FALSE);
		(void)fprintf(stderr,"%s",tmp);
	}

	/* Prepare request */
	if(!strcmp(link->scheme,"http")) {
		(void)sprintf(buffer,"GET %s%s%s HTTP/1.0\r\n%s",gateway.host?"/":"",gateway.host?link->host:"",link->path,identify);
	}
	else if(!strcmp(link->scheme,"gopher")) {
		(void)sprintf(buffer,"%s\r\n",link->path+(*link->path=='/'?2:0));
	} else {
		(void)fprintf(stderr,"Unimplemented protocol: %s\n",link->scheme);
		return(-1);
	}
	if (verbose>3)
		(void)fprintf(stderr,"\nRequest: %s",buffer);
	if(!strcmp(link->scheme,"http"))
		strcat(buffer,"\r\n");
	len=strlen(buffer);

	/* Open connection */
	srv.host=link->host;
	srv.port=link->port;
	if (gateway.host&&!strcmp(link->scheme,"http")) {
		srv.host=gateway.host;
		srv.port=gateway.port;
	}
	(void)sprintf(port,"%d",srv.port);
	if ((sock=get_tcp_conn(srv.host,port))<0) {
		(void)fprintf(stderr,"Cannot open connection to server %s on port %d\n",srv.host,srv.port);
		return(-1);
	}
	if (verbose)
		(void)fprintf(stderr,".");

	/* Send request */
	if (write(sock,(void*)buffer,(size_t)len)!=len) {
		(void)fprintf(stderr,"Write error on server socket: %s\n", strerror(errno));
		(void)NETCLOSE(sock);
		return(-1);
	}
	if (verbose)
		(void)fprintf(stderr,".");

	/* Read answer */
	*clen=-1;
	url2txt(tmp,link,FALSE);
	if(!strcmp(link->scheme,"http")) {
		/* Subject to failure: If the first read doesn't get all the header
			 lines, we'll fail with no body in document. Should have read all
			 to memory and parsed then. */
		if ((len=read(sock,buffer,sizeof(buffer)-1))>0) {
			if (verbose)
				(void)fprintf(stderr,".");
			buffer[len]='\0';
			if (ptr=strstr(buffer,"\n\r\n")) {
				dptr=ptr+3;
				len-=ptr-buffer+3;
				ptr[2]='\0';
				if (verbose>4)
					(void)fprintf(stderr,"Server returns CRLFs, header is %d bytes\n",ptr-buffer+3);
			}
			else if (ptr=strstr(buffer,"\n\n")) {
				dptr=ptr+2;
				len-=ptr-buffer+2;
				ptr[1]='\0';
				if (verbose>4)
					(void)fprintf(stderr,"Server returns LFs, header is %d bytes\n",ptr-buffer+2);
			}
			else {
				(void)fprintf(stderr,"No body in document\n");
				(void)NETCLOSE(sock);
				return(-1);
			}
			ptr=strchr(buffer,' ');
			retval=atoi(ptr+1);
			ptr=strchr(ptr+1,' ');
			retchr=ptr+1;
			if (verbose>3)
				(void)fprintf(stderr,"%s",buffer);
			ptr=buffer;
			while(nptr=strchr(ptr,'\n')) {
				*nptr++='\0';
				if (nptr[-2]=='\r')
					nptr[-2]='\0';
				/* Date: */
				/* Server: */
				/* MIME-version: */
				if (!strncasecmp(ptr,"Location: ",10)) {
					txt2url(&tmpurl,link,ptr+10);
					freeurl(link);
					link->scheme=tmpurl.scheme;
					link->host=tmpurl.host;
					link->port=tmpurl.port;
					link->path=tmpurl.path;
					link->anchor=tmpurl.anchor;
				}
				if (!strncasecmp(ptr,"Content-type: ",14))
					(void)strcpy(type,ptr+14);
				/* Last-modified: */
				if (!strncasecmp(ptr,"Content-length: ",16))
					*clen=atoi(ptr+16);
				ptr=nptr;
			}	
			if (retval!=200) {
				(void)NETCLOSE(sock);
				if (verbose)
					switch(retval) {
					case FOUND:
						url2txt(tmp,link,FALSE);
						(void)fprintf(stderr,"moved to %s\n",tmp);
						break;
					default:
						add(&failed_in,&failed,link,NULL,0);	/* Never be freed, we can live with that */
						(void)fprintf(stderr," %s\n",retchr);
					}
			return(retval);
			}
		}
		if (len<0) {
			(void)fprintf(stderr,"Read error on server socket\n");
			(void)NETCLOSE(sock);
			return(-1);
		}
		bufsiz=(*clen<0)?len:*clen;
		if (verbose>2&&*clen<0)
			(void)fprintf(stderr,"No Content-length header, allocating %d bytes\n",bufsiz);
	}

	else {			/* gopher or ftp, no parsing of first packet needed */
		len=0;
		bufsiz=BUFSIZ;
		retval=OK;
		strcpy(type,"text/plain");
	}

	*data=malloc((size_t)bufsiz);	/* for free see link2mem */
	(void)memcpy((void*)*data,(void*)dptr,(size_t)len);
	dptr=*data+len;
	while ((len=read(sock,buffer,sizeof(buffer)))>0) {
		if (verbose)
			(void)fprintf(stderr,".");
		if ((dptr-*data)+len>bufsiz) { /* buffer size exceeded */
			ptr=*data;
			while ((dptr-*data)+len>bufsiz)
				bufsiz*=2;		/* grow buffer to double size */
			if (verbose>2)
				(void)fprintf(stderr,"Buffer exceeded, growing to %d bytes\n",bufsiz);
			*data=malloc((size_t)bufsiz);	/* for free see link2mem */
			(void)memcpy((void*)*data,(void*)ptr,(size_t)(dptr-ptr));
			free(ptr);
			dptr=*data+(dptr-ptr);
		}
		(void)memcpy((void*)dptr,(void*)buffer,(size_t)len);
		dptr+=len;
	}
	if (len<0) {
		(void)fprintf(stderr,"Read error on server socket\n");
		(void)NETCLOSE(sock);
		return(-1);
	}

	/* Close connection */
	(void)NETCLOSE(sock);
	if (verbose)
		(void)fprintf(stderr," Ok\n");
	if (verbose>1&&*clen>=0&&*clen!=dptr-*data)
		(void)fprintf(stderr,"Content-length was %d, but %d bytes were transmitted!\n",*clen,dptr-*data);
	*clen=dptr-*data;
	return(retval);
}



int mkdirs(fname)
char	*fname;
{
	char	*ptr;
	int	stat;

	for(ptr=strchr(fname,'/');ptr;ptr=strchr(ptr+1,'/')) {
		*ptr='\0';
		stat=access(fname,F_OK);
		if (stat<0&&errno==ENOENT) {
			if (mkdir(fname,0777)) {
				(void)fprintf(stderr,"Can't create %s: %s\n",fname,strerror(errno));
				return(-1);
			}
		}
		else
			if (stat) {
				(void)fprintf(stderr,"Can't stat %s: %s\n",fname,strerror(errno));
				return(-1);
			}
		*ptr='/';
	}
	return(0);
}



int mem2file(link,data,len)
url	*link;
char	*data;
int	len;
{
	char	tmp[MAXPATHLEN];
	int	file,stat;

	url2fname(tmp,link);
	if (stat=mkdirs(tmp))
		return(stat);
	if ((file=open(tmp,O_WRONLY|O_CREAT|O_TRUNC,0666))<0) {
		(void)fprintf(stderr,"Can't create %s: %s\n",tmp,strerror(errno));
		return(-1);
	}
	if (write(file,(void*)data,(size_t)len)<0) {
		(void)fprintf(stderr,"Can't write to %s: %s\n",tmp,strerror(errno));
		(void)close(file);
		return(-1);
	}
	(void)close(file);
	return(0);
}



int file2mem(link,data,len)
url	*link;
char	**data;
int	*len;
{
	char	tmp[MAXPATHLEN];
	int	file,stat;

	url2fname(tmp,link);
	if ((file=open(tmp,O_RDONLY))<0) {
		(void)fprintf(stderr,"Can't open %s: %s\n",tmp,strerror(errno));
		return(-1);
	}
	*len=lseek(file,0,SEEK_END);
	*data=malloc((size_t)*len);
	lseek(file,0,SEEK_SET);
	if ((stat=read(file,(void*)*data,(size_t)*len))<0) {
		(void)fprintf(stderr,"Can't read from %s: %s\n",tmp,strerror(errno));
		free(*data);
		(void)close(file);
		return(-1);
	}
	if (verbose>4)
		(void)fprintf(stderr,"Read %d bytes from %s\n",stat,tmp);
	(void)close(file);
	return(0);
}



void uncache(ptr)
element	*ptr;
{
	element	*optr;

	if (ptr) {
		for(optr=NULL;ptr&&ptr->next;optr=ptr,ptr=ptr->next);
		if (optr&&ptr->type==H6&&ptr->content&&!ptr->content->type&&!strncmp(ptr->content->content,"From Cache",10)) {
			optr->next=NULL;
			freeparse(ptr);	/* from mem2parse */
			nosleep=TRUE;	/* request was served from cache, just try another */
			if (verbose>2)
				(void)fprintf(stderr,"Removed cache comment\n");
		}
 	}
}



void element2file(dest,ptr,pre)
FILE	*dest;
element	*ptr;
int	pre;
{
	int	cnt;
	char	*tmp,*fptr,*bptr;

	for(;ptr;ptr=ptr->next)
		if (ptr->type) {
			/* Opening tag */
			if (!pre&&nl>LIST&&ptr->type!=P)
				(void)fprintf(dest,"\n");
			(void)fprintf(dest,"<%s",elements[ptr->type].name);
			last=ptr->type;
			if (ptr->attrib) {
				for (cnt=0;cnt<elements[ptr->type].attribs/sizeof(char *);cnt++)
					if (ptr->attrib[cnt]) {
						(void)fprintf(dest," %s",elements[ptr->type].attrib[cnt]);
						if (ptr->attrib[cnt]!=NOVALUE)
							(void)fprintf(dest,"=\"%s\"",ptr->attrib[cnt]);
					}
			}
			(void)fprintf(dest,">");
			if (!pre&&ptr->type==P) {
				(void)fprintf(dest,"\n");
				nl=DONE;
			}
			else
				if (ptr->type==LI||ptr->type==DD||ptr->type==DT)
					nl=LIST;
				else
					nl=OPEN;
				
			if (ptr->type&&ptr->content)
				(void)element2file(dest,ptr->content,pre||ptr->type>=PRE);

			/* Closing tag */
			if (elements[ptr->type].bracket!=EMPTY) {
				if (!pre&&nl>=CLOSE)
					(void)fprintf(dest,"\n");
				(void)fprintf(dest,"</%s>",elements[ptr->type].name);
				last=ptr->type;
				nl=CLOSE;
			}
		}
		else {
			/* Text element */
			tmp=strdup((char *)ptr->content);
			bptr=strchr(tmp,'\0');
			if (!pre&&bptr-1>=tmp&&bptr[-1]==' ')
				bptr[-1]='\0';
			/* No <PRE>, want line break and text allows it */
			fptr=tmp;
			if (!pre&&nl>OPEN&&*fptr==' ') {
				(void)fprintf(dest,"\n");
				fptr++;
				nl=DONE;
			}
			else	/* No line break, but previous tag implies line break */
				if (!pre&&elements[last].brk&&*fptr==' ') {
					fptr++;
					nl=DONE;
				}
			if (*fptr) {
				(void)fprintf(dest,"%s",fptr);
				nl=TEXT;
				if (bptr[-1]!='\0')
					nl=DONT;
			}
			free(tmp);
		}
}



int parse2file(link,ptr)
url	*link;
element	*ptr;
{
	char	tmp[MAXPATHLEN];
	int	stat;
	FILE	*dest;

	url2fname(tmp,link);
	if (stat=mkdirs(tmp))
		return(stat);
	if (!(dest=fopen(tmp,"w"))) {
		(void)fprintf(stderr,"Can't create %s: %s\n",tmp,strerror(errno));
		return(-1);
	}
	last=0;
	nl=DONE;
	uncache(ptr);
	element2file(dest,ptr,FALSE);
	if (nl>DONE)
		(void)fprintf(dest,"\n");
	return(fclose(dest));
}



void process();


/*
** Processes a newly retrieved document. If it's a html-file, processing
** is delayed and the document is added to the fifo. If it's any other
** kind of file, it'll be directly written do disk.
** The data will be freed, the link will not.
*/
void process_mem(link,type,data,len)
url	*link;
char	*type;
char	*data;
int	len;
{
	char	tmp[MAXPATHLEN];
	element	*base;

	if (!strcmp(type,"text/html"))
		add(&fifo_in,&fifo_out,link,data,len);	/* freed in next() within main loop */

	else {
		add(&processed_in,&processed,link,NULL,0);	/* Never be freed, we can live with that */
		url2txt(tmp,link,FALSE);
		if (!strcasecmp(type,"text/plain")) {
			if (verbose>2)
				(void)fprintf(stderr,"%s is plain text\n",tmp);
		}
		else if (!strncasecmp(type,"image/",6)) {
			if (verbose>2)
				(void)fprintf(stderr,"%s is an image file\n",tmp);
		}
		else if (!strncasecmp(type,"video/",6)) {
			if (verbose>2)
				(void)fprintf(stderr,"%s is a video file\n",tmp);
		}
		else if (!strncasecmp(type,"audio/",6)) {
			if (verbose>2)
				(void)fprintf(stderr,"%s is an audio file\n",tmp);
		}
		else if (!strncasecmp(type,"application/",12)) {
			if (verbose>2)
				(void)fprintf(stderr,"%s is an application file\n",tmp);
		}
		else
			(void)fprintf(stderr,"Unknown Content-type for %s: \"%s\"\n",tmp,type);

		(void)mem2file(link,data,len);
		free((void*)data);	/* from link2mem */
	}
}



/*
** Takes a textual link from a given document, expands it and checks for
** permission to get it. In case of any restriction viloation, it returns
** the expanded link to replace it in the referencing document. If the
** link is valid to get, it tries to do so, evaluation error messages from
** the get attempt. If the attempt was successful, if calls a processing
** routine.
*/
int process_link(link,ptr,new)
url	*link;
char	*ptr;
url	*new;
{
	char	tmp[MAXPATHLEN],type[BUFSIZ],*p,*data;
	int	stat,len;
	entry	*eptr;

	/* Evaluate the link */
	txt2url(new,link,ptr);	/* for free see process_link */
	url2txt(tmp,new,TRUE);
	if (verbose>2)
		(void)fprintf(stderr,"Link %s -> %s\n",ptr,tmp);

	/* Check access */

	if (!strncmp(new->path,"/cgi-bin",8)||!strncmp(new->path,"/htbin",6)) {
		if (verbose>1)
			(void)fprintf(stderr,"Link %s is a script\n",tmp);
		return(ORIG);
	}
	if (strcmp(new->scheme,root.scheme)&&mode<PROTOCOL) {
		if (verbose>1)
			(void)fprintf(stderr,"Link %s violates scheme\n",tmp);
		return(ORIG);
	}
	if (strcmp(new->host,root.host)&&mode<HOST) {
		if (verbose>1)
			(void)fprintf(stderr,"Link %s violates host\n",tmp);
		return(ORIG);
	}
	if (new->port!=root.port&&mode<PORT) {
		if (verbose>1)
			(void)fprintf(stderr,"Link %s violates port\n",tmp);
		return(ORIG);
	}
	if (strncmp(new->path,root.path,strlen(root.path))&&mode<UP) {
		if (verbose>1)
			(void)fprintf(stderr,"Link %s violates directory\n",tmp);
		return(ORIG);
	}

	/* Check for stack */

	if (infifo(fifo_out,new)||infifo(failed,new)||infifo(processed,new))
		return(ONSTACK);

	/* Get new document */

	stat=link2mem(new,type,&data,&len);	/* for free see process_mem */
	switch (stat) {
	case EXISTS:	/* No data has been allocated */
		if (resume) {
			url2fname(tmp,new);
			if (!strcasecmp(tmp+strlen(tmp)-5,".html")||!strcasecmp(tmp+strlen(tmp)-4,".htm")) {
				if (!file2mem(new,&data,&len)) {	/* for free see process_mem */
					if (verbose) {
						url2txt(tmp,new,FALSE);
						(void)fprintf(stderr,"%s...Resumed\n",tmp);
					}
					process_mem(new,"text/html",data,len);
				}
			}
			else
				add(&processed_in,&processed,new,NULL,0);	/* Never be freed, we can live with that */
			return(OK);
		}
		else
			if (verbose>2)
				(void)fprintf(stderr,"Document exists\n");
		break;
	case OK:	/* Data freed in process_mem */
		process_mem(new,type,data,len);
		break;
	case FOUND:	/* No data has been allocated */
		url2txt(tmp,new,TRUE);
		stat=process_link(new,tmp,new);
		return(stat==OK?FOUND:stat);
	}
	return(stat);
}



/*
** Takes a parsed html elements tree and extracts all links to other documents.
** It then executes these links and replaces relocated links or links which
** viloate any restrictions with their new or original location.
*/
void process_links(link,ptr)
url	*link;
element	*ptr;
{
	char	tmp[MAXPATHLEN],**attr;
	url	new;

	attr=NULL;
	for(;ptr;ptr=ptr->next) {
		if (ptr->type==LINK|ptr->type==A)
			attr=&ptr->attrib[HREF];
		else if (ptr->type==IMG)
			attr=&ptr->attrib[SRC];
		if (attr && *attr) {
			if (mode==NONE) 
				txt2url(&new,link,*attr);	/* freed after next if */
			if (mode==NONE||process_link(link,*attr,&new)==ORIG) { /* freed after if */
				url2txt(tmp,&new,TRUE);
				if (strcmp(*attr,tmp)) {
					if (verbose>1)
						(void)fprintf(stderr,"Rewriting link: %s becomes %s\n",*attr,tmp);
					free(*attr);
					*attr=strdup(tmp);
				}
			}
			else {
				url2rel(tmp,link,&new,TRUE);
				/* Local files must use the local notion of DirectoryIndex */
				if (tmp[strlen(tmp)-1]=='/')
					strcat(tmp,"index.html");
				if (strcmp(*attr,tmp)) {
					if (verbose>1)
						(void)fprintf(stderr,"Rewriting link: %s becomes %s\n",*attr,tmp);
					free(*attr);
					*attr=strdup(tmp);
				}
			}
			freeurl(&new);	/* from process_link */
			attr=NULL;
		}
		if (ptr->type&&ptr->content)
			process_links(link,ptr->content);
	}
}



/*
** This routine takes a text/html document given from the fifo and processes
** the links within.
*/
void process(link,data,len)
url	*link;
char	*data;
int	len;
{
	char	tmp[MAXPATHLEN];
	element	*base;

	if (verbose) {
		url2txt(tmp,link,FALSE);
		(void)fprintf(stderr,"Processing %s\n",tmp);
	}

	base=mem2parse(data,len);	/* freed at end of procedure */
	process_links(link,base);
	(void)parse2file(link,base);
	freeparse(base);	/* from mem2parse */
}



/*
** Usage information.
*/
void usage(image)
char *image;
{
	(void)fprintf(stderr,"Usage: %s [-h] [-v] [-R|-N] [-r|-u|-p|-s] [-S sleeptime] URL\n",image);
	(void)fprintf(stderr,"-R Process documents already gobbled\n");
	(void)fprintf(stderr,"-N Replace documents already gobbled\n");
	(void)fprintf(stderr,"-C Only check documents already gobbled, don't retrive any new\n");
	(void)fprintf(stderr,"-r Follow links within this directory\n");
	(void)fprintf(stderr,"-u Follow links to upward directories\n");
	(void)fprintf(stderr,"-p Follow links to other ports on the same server\n");
	(void)fprintf(stderr,"-s Follow links to other servers\n");
	/*
	(void)fprintf(stderr,"-P Follow links to other protocols\n");
	*/
	(void)fprintf(stderr,"-S Specify network sleeptime\n");
	(void)fprintf(stderr,"-v Trace actions, each '-v' adds verbosity\n");
	exit(1);
}


/*
** Parse commandline; main fifo loop
*/
main(argc,argv)
int	argc;
char	*argv[];
{
	int		c;
	extern char	*optarg;
	extern int	optind;
	char		tmp[MAXPATHLEN],*p;
	url		dummy;
	entry		ptr;
	int		stat;

	LINK=element_type("LINK");
	HREF=attrib_type(LINK,"HREF");
	A=element_type("A");
	IMG=element_type("IMG");
	SRC=attrib_type(IMG,"SRC");
	PRE=element_type("PRE");
	P=element_type("P");
	LI=element_type("LI");
	DD=element_type("DD");
	DT=element_type("DT");
	H6=element_type("H6");

	root.scheme="http";
	root.host="www.rz.uni-karlsruhe.de";
	root.port=80;
	root.path="/";
	root.anchor=NULL;

	while ((c=getopt(argc,argv,"rupsPCRNS:vh?")) != EOF)
		switch ((char)c) {
		case 'r':
			if (mode)
				usage(argv[0]);
			mode=DOWN;
			break;
		case 'u':
			if (mode)
				usage(argv[0]);
			mode=UP;
			break;
		case 'p':
			if (mode)
				usage(argv[0]);
			mode=PORT;
			break;
		case 's':
			if (mode)
				usage(argv[0]);
			mode=HOST;
			break;
		case 'P':
			if (mode)
				usage(argv[0]);
			mode=PROTOCOL;
			break;
		case 'C':
			check=TRUE;;
			break;
		case 'R':
			if (retrieve)
				usage(argv[0]);
			resume=TRUE;;
			break;
		case 'N':
			if (resume)
				usage(argv[0]);
			retrieve=TRUE;;
			break;
		case 'S':
			sleeptime=atoi(optarg);;
			break;
		case 'v':
			verbose++;
			break;
		case 'h':
			(void)fprintf(stderr,header);
		case '?':
			usage(argv[0]);
		}

	if (optind+1!=argc)
			usage(argv[0]);

	if (verbose>4)
		fprintf(stderr,"\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n");
	gateway.host=NULL;
	p=getenv("WWW_http_GATEWAY");
	if (p) {
		txt2url(&gateway,&root,p);
		if (verbose>3) {
			url2txt(tmp,gateway,FALSE);
			(void)fprintf(stderr,"Using gateway %s\n",tmp);
		}
	}
	stat=process_link(&root,argv[optind],&root);
	if ((stat==OK||stat==FOUND)&&fifo_in) {
		strrchr(root.path,'/')[1]='\0';
		do {
			if (verbose>4)
				fprintf(stderr,"FIFO: %d fifo, %d processed, %d failed\n",size(fifo_out),size(processed),size(failed));
			/* show(fifo_out); */
			process(fifo_out->link,fifo_out->data,fifo_out->len);
		} while(fifo_out=move(fifo_out,&processed_in,&processed));
	}
	return(0);
}
