// WaisDocument.m
//
// Free software created 30 Nov 1992
// by Paul Burchard <burchard@math.utah.edu>.
// Incorporating:
/* 
   WIDE AREA INFORMATION SERVER SOFTWARE:
   No guarantees or restrictions.  See the readme file for the full standard
   disclaimer.

   This is part of the [NeXTstep] user-interface for the WAIS software.
   Do with it as you please.

   Version 0.82
   Wed Apr 24 1991

   jonathan@Think.COM

*/
//

#import "WaisDocument.h"

// Search path for documents.
static id documentFolderList;

// Mapping of WAIS types to file name extension of contents file.
static id extensionForType;

// Error panel title.
static char *errorTitle = "WAIS Document Error!";

// Decoders for WAIS structured files.

_WaisDecoder waisSourceIDDecoder[] = 
{
    { ":filename",		W_FIELD,0,0,	ReadString,3,	WriteString,2,
    						MAX_SYMBOL_SIZE },
    { NULL }
};

_WaisDecoder waisDocumentDecoder[] = 
{
    { ":number-of-lines",	W_FIELD,0,0,	ReadLongS,2,	WriteLongS,2 },
    { ":number-of-bytes",	W_FIELD,0,0,	ReadLongS,2,	WriteLongS,2 },
    { ":number-of-characters",	W_FIELD,0,0,	ReadLongS,2,	WriteLongS,2 },
    { ":best-line",		W_FIELD,0,0,	ReadLongS,2,	WriteLongS,2 },
    { ":date",			W_FIELD,0,0,	ReadString,3,	WriteString,2,
    						MAX_SYMBOL_SIZE },
    { ":headline",		W_FIELD,0,0,	ReadString,3,	WriteString,2,
    						MAX_SYMBOL_SIZE },
    { ":type",			W_FIELD,0,0,	ReadString,3,	WriteString,2,
    						MAX_SYMBOL_SIZE },
    { ":source",		W_STRUCT,
	":source-id",		waisSourceIDDecoder },
    { ":doc-id",		W_STRUCT,
        ":doc-id",		NULL/*special case*/ },
    { NULL }
};

_WaisDecoder waisFragmentDecoder[] = 
{
    { ":para-id",		W_FIELD,0,0,	ReadLongS,2,	WriteLongS,2 },
    { ":line-pos",		W_FIELD,0,0,	ReadLongS,2,	WriteLongS,2 },
    { ":byte-pos",		W_FIELD,0,0,	ReadLongS,2,	WriteLongS,2 },
    { NULL }
};

_WaisDecoder waisDocumentIDDecoder[] = 
{
    { ":score",			W_FIELD,0,0,	ReadLongS,2,	WriteLongS,2 },
    { ":document",		W_STRUCT,
	":document",		waisDocumentDecoder },
    { ":start",			W_STRUCT,
	":fragment",		waisFragmentDecoder },
    { ":end",			W_STRUCT,
	":fragment",		waisFragmentDecoder },
    { NULL }
};


@implementation WaisDocument

+ folderList
{
    return documentFolderList;
}

+ setFolderList:aList
{
    if(documentFolderList) [documentFolderList free];
    documentFolderList = aList;
    return self;
}

+ (const char *)defaultHomeFolder
{
    return "/Library/WAIS/documents";
}

+ (const char *)fileStructName
{
    return ":document-id";
}

+ (WaisDecoder)fileStructDecoder
{
    return waisDocumentIDDecoder;
}

+ (const char *)errorTitle
{
    return errorTitle;
}

+ initialize
{
    [super initialize];
    if(self == [WaisDocument class])
    {
	extensionForType = [[HashTable alloc] initKeyDesc:"%" valueDesc:"%"];
	[self registerExtension:"txt" forType:"TEXT"];
	[self registerExtension:"src" forType:"WSRC"];
    }
    return self;
}

+ (const char *)extensionForType:(const char *)waisType
{
    return (const char *)[extensionForType valueForKey:(void *)waisType];
}

+ registerExtension:(const char *)extn forType:(const char *)waisType
{
    const char *saveExtn = NXUniqueString(extn);
    const char *saveType = NXUniqueString(waisType);
    [extensionForType insertKey:(void *)saveType value:(void *)saveExtn];
    return self;
}

+ (BOOL)checkFileName:(const char *)fileName
{
    // We read in the .wais file corresponding to the doc's content file.
    if(!fileName) return NO;
    if(strlen(fileName) <= strlen(W_D_EXT)) return NO;
    if(!strstr(fileName, W_D_EXT)) return NO;
    if(0 != strcmp(W_D_EXT, strstr(fileName, W_D_EXT))) return NO;
    return YES;
}

- free
{
    if(waisDocID) s_free(waisDocID);
    return [super free];
}

+ objectForCompleteKey:(const char *)aKey
{
    char *buf, *endp;
    id found;

    // First remove any ".wais" extension from keys, then call standard method.
    if(!aKey || !strstr(aKey, W_D_EXT))
    	return [super objectForCompleteKey:aKey];
    if(!(buf = s_malloc(strlen(aKey)+1))) return nil;
    strcpy(buf, aKey);
    if(!(endp = strstr(buf, W_D_EXT))) { s_free(buf); return nil; }
    *endp = 0;
    found = [super objectForCompleteKey:buf];
    s_free(buf);
    if(found) return found;
    else return nil;
}

- setKey:(const char *)aKey
{
    char *buf, *endp;
    id rtn;
    
    // First remove any ".wais" extension from keys.
    if(!aKey || !(endp = strstr(aKey, W_D_EXT)))
    	return [super setKey:aKey];
    if(!(buf = s_malloc(strlen(aKey)+1))) return nil;
    strcpy(buf, aKey);
    if(!(endp = strstr(buf, W_D_EXT))) return nil;
    *endp = 0;
    rtn = [super setKey:buf];
    s_free(buf);
    return rtn;
}

- setKeyFromInfo
{
    char *buf, *p;
    const char *src, *src_end, *headline, *theType;
    const char **foldp, *fold, *extn, *extn_tmp, *extn_end;
    int i, len, extn_len;
    BOOL lc_extn, match, case_match;
    
    // ATTEMPT MINIMAL PROCESSING TO TURN WAIS HEADLINES INTO FILE NAMES.
    //
    // Unfortunately this is not very pretty since headlines are up to the
    // DB designer's discretion, and type info returned is not always reliable.
    // We use the ":headline", ":type", and [source] ":filename" info fields,
    // and information about fromSource.
    //
    //     1. All '/', TAB, and unprintable chars in the :headline are changed
    //         to '_' (note that '/' interferes with file system operations
    //         while TAB interferes with pasteboard operations).
    //     2. If the :headline is empty it is replaced by "?".
    //     3. IF the :type is non-NULL and different from "TEXT", then a file
    //	       name extension is added.  The extensionForType table is checked
    //         first; otherwise the extension defaults to the lowercase form
    //         of the WAIS type.  The extension is not added if already there.
    //     4. ELSE if the :type is NULL or "TEXT", we try a heuristic to
    //         find a file extension within the headline (because many
    //         database maintainers forget to index with the proper type,
    //         and waisindex doesn't always make it easy for them to do it). 
    //     4. As long as the document is not a WAIS source itself (according to
    //         ":type" field), the source's ":filename", excluding the ".src" 
    //         extension but followed by a ':', is prepended.  Or, if that 
    //         ":filename" info field is blank, our ":filename" field is
    //         tried (this would come from a source), or lastly the 
    //         final component of the source's "key" member is used instead.
    //     5. The default folder for WaisDocuments is prepended (with 
    //         separating '/' if necessary), unless the document is a WAIS 
    //         source, in which case the default folder for WaisSources is 
    //         prepended instead.
    //
    theType = [self valueForStringKey:":type"];
    headline = [self valueForStringKey:":headline"];
    if(!headline || !*headline) headline = "?";
    len = strlen(headline);
    
    // Find name of source.
    if(!(fromSource && (src=[fromSource valueForStringKey:":filename"]))
	&& !(src = [self valueForStringKey:":filename"])
    	&& !(fromSource && (src=[fromSource key])))
	src = "?";
    if(strrchr(src, '/')) { src = strrchr(src, '/'); src++; }
    src_end = src + strlen(src);
    if(strlen(src)>=4 || 0==strcmp(src_end-4, ".src")) src_end -= 4;

    // Find correct folder to put this in.
    if(theType && 0==strcmp(theType, "WSRC"))
    	foldp = (const char **)[[WaisSource folderList] elementAt:0];
    else foldp = (const char **)[[WaisDocument folderList] elementAt:0];
    if(foldp) fold = *foldp;
    else fold = "/";
    if(!fold) return nil;
    
    // Find file name extension if any.
    extn = extn_end = 0;
    lc_extn = NO;
    if(!theType || 0==strcmp(theType, "TEXT"))
    {
    	// Heuristic to locate extension in headline: look for
	// "xxx" in the pattern "aaaaa.xxx /bbbbb", where the '/'
	// is the first '/' in the headline and the '.' is the last
	// '.' before the first '/'.  Don't use "xxx" unless it
	// consists only of alphanumeric chars.
	
	if(extn_end = strchr(headline, '/'))
	{
	    for(; extn_end>headline && isascii(*(extn_end-1))
	    	&& isspace(*(extn_end-1)); extn_end--);
	    for(extn_tmp=strchr(headline, '.'); extn_tmp && extn_tmp<extn_end;
	    	extn=extn_tmp+1, extn_tmp=strchr(extn, '.'));
	    if(extn && extn>=extn_end) extn = 0;
	    else if(extn)
	    {
	    	for(extn_tmp=extn; extn_tmp<extn_end; extn_tmp++)
		    if(!isascii(*extn_tmp) || !isalnum(*extn_tmp)) break;
		if(extn_tmp < extn_end) extn = 0;
	    }
	}
    }
    else if(!(extn = [WaisDocument extensionForType:theType]))
    {
    	extn = theType;
	lc_extn = YES;
    }
    if(extn && !extn_end) extn_end = extn + strlen(extn);
    if(extn) extn_len = extn_end - extn;
    else extn_len = 0;

    // Create buffer for key, prepending folder and source names as necessary.
    if(!(buf = s_malloc(strlen(fold) + strlen(src) + strlen("/:.")
    	+ len + extn_len))) return nil;
    strcpy(buf, fold);
    p = buf + strlen(buf);
    if(buf[0] != '/') { s_free(buf); return nil; }
    if(*(p-1) != '/') *p++ = '/';
    if(!theType || 0!=strcmp(theType, "WSRC"))
   	{ for(; src<src_end; src++) *p++ = *src; *p++ = ':'; }
    *p = 0;
    
    // Copy headline, replacing bad chars with '_'.
    for(; *headline; headline++) *p++ =
    	((isascii(*headline) && isprint(*headline)
	    && *headline!='/' && *headline!='\t') ? *headline : '_');
    *p = 0;

    // Append file name extension if appropriate.
    // Check if it already ends with the proposed extension
    // (perhaps just in wrong case).
    if(extn && strlen(buf)>extn_len && *(p-extn_len-1)=='.')
    {
	for(match=YES, case_match=YES, i=0; i<extn_len; i++)
	{
	    if(lc_extn && isascii(extn[i]) && isupper(extn[i]))
	    {
		if((p-extn_len)[i] != tolower(extn[i]))
		{
		    case_match = NO;
		    if((p-extn_len)[i] != extn[i]) match = NO;
		}
	    }
	    else if((p-extn_len)[i] != extn[i])
	    {
		case_match = NO;
		if( ! ( isascii(extn[i]) && (
		    (isupper(extn[i]) && (p-extn_len)[i]==tolower(extn[i]))
		    || (islower(extn[i]) && (p-extn_len)[i]==toupper(extn[i]))
		    ) ) )
			match = NO;
	    }
	}
	if(case_match) extn = 0;
	else if(match) p -= (extn_len+1);
    }
    if(extn)
    {
    	*p++ = '.';
	for(; extn<extn_end; extn++)
	    if(lc_extn && isascii(*extn) && isupper(*extn))
	    	*p++ = tolower(*extn);
	    else *p++ = *extn;
    }
    *p = 0;
    
    // Finally use the constructed key.
    [self setKey:buf];
    s_free(buf);
    return self;
}

- fromSource
{
    return fromSource;
}

- setFromSource:aSource
{
    const char *src;
    
    fromSource = aSource;
    isRetrieved = NO;
    if(fromSource && [fromSource valueForStringKey:":filename"])
	[self insertStringKey:":filename"
	    value:[fromSource valueForStringKey:":filename"]];
    else if(fromSource && [fromSource key])
    {
    	src = [fromSource key];
	if(strrchr(src, '/')) { src = strrchr(src, '/'); src++; }
	[self insertStringKey:":filename" value:src];
    }
    return self;
}

- (DocID *)waisDocID
{
    return waisDocID;
}

// theDocID must be s_free()-able.
- setWaisDocID:(DocID *)theDocID
{
    if(waisDocID) s_free(waisDocID);
    waisDocID = theDocID;
    return self;
}

- setWaisDocIDFromAny:(any *)docAny
{
    isRetrieved = NO;
    if(waisDocID) s_free(waisDocID);
    if(!(waisDocID = docIDFromAny(docAny)))
    {
	waisDocID = (DocID *)s_malloc(sizeof(DocID));
	waisDocID->originalLocalID = copy_any(docAny);
    }
    return self;
}

- (BOOL)isRetrieved
{
    return isRetrieved;
}

- setUnretrieved
{
    isRetrieved = NO;
    return self;
}

- cleanUpClose:(FILE *)file free:(any *)ptr
{
    [Wais lockFileIO]; fclose(file); [Wais unlockFileIO];
    if(ptr) s_free(ptr);
    return nil;
}

- retrieve
{
    int i;
    long retrieved_size, page_count, length;
    long request_length, chars_per_page;
    const char *database, *wType;
    static char request[MAX_MESSAGE_LEN], response[MAX_MESSAGE_LEN];
    FILE *file;
    any* docany;
    WAISDocumentText *data;
    SearchResponseAPDU *interp_response;
    diagnosticRecord **diag;
    BOOL doc_done;

    // Set up source for retrieval.
    isRetrieved = NO;
    [fromSource setConnected:YES];
    if(![fromSource isConnected]) return nil;

    // Open local document file to receive retrieved data.
    [Wais lockFileIO];
    if(!key || !(file = fopen(key, "w")))
    {
	[Wais unlockFileIO];
	ErrorMsg(errorTitle, "Can't create local document file %s.",
	    key ? key : "???");
	return nil;
    }
    [Wais unlockFileIO];

    // parameters for "page-by-page" retrieval loop.
    chars_per_page = [fromSource bufferLength]-HEADER_LENGTH-1000;/*paranoia?*/    
    docany = anyFromDocID(waisDocID);
    database = [fromSource valueForStringKey:":database-name"];
    if(!(wType = [self valueForStringKey:":type"])) wType = "TEXT";
    
    // Retrieve one page at a time and write to local doc file.
    // We can't use the document length info returned by server in order
    // to terminate this retrieval loop because that info only applies to
    // the server's preferred document type (which might not be ours).
    // Instead we must wait for a D_PresentRequestOutOfRange error.
    for(retrieved_size=0, page_count=0, doc_done=NO; !doc_done; page_count++)
    {
    	// Lock transaction to prevent conflict with port.
	[Wais lockTransaction];
	
	// Create retrieval request message.
	request_length = [fromSource bufferLength];
	if(!generate_retrieval_apdu(request + HEADER_LENGTH,
	    &request_length, docany, CT_byte, page_count * chars_per_page,
	    (page_count+1) * chars_per_page, wType, database))
	{
	    [Wais unlockTransaction]; [self cleanUpClose:file free:docany];
	    ErrorMsg(errorTitle, "Overflow: retrieval request too large for %s.", key);
	    return nil;
	}
	request_length = [fromSource bufferLength] - request_length;
	writeWAISPacketHeader(request,
	    request_length, (long)Z3950, "WAISclient",
	    (long)NO_COMPRESSION, (long)NO_ENCODING, (long)HEADER_VERSION);
	
	// Send retrieval message.
	if(!interpret_message(request, request_length,
	    response, MAX_MESSAGE_LEN, [fromSource connection], false))
	{
	    [Wais unlockTransaction]; [self cleanUpClose:file free:docany];
	    ErrorMsg(errorTitle,"Warning: missing data for document %s.",key);
	    return nil;
	}

	// Interpret received reply message.
	// Transaction is done; unlock.
	readSearchResponseAPDU(&interp_response, response + HEADER_LENGTH);
	[Wais unlockTransaction];
	
	// Check for errors or "out of range" diagnostic (= we're done).
	if(interp_response
	    && (WAISSearchResponse *)interp_response
	    	->DatabaseDiagnosticRecords 
	    && (diag = ((WAISSearchResponse *)interp_response
	    	->DatabaseDiagnosticRecords)->Diagnostics)
	    )
	    for(i=0; diag[i]; i++)
	    {
	    	if(strcmp(diag[i]->DIAG, D_PresentRequestOutOfRange) == 0)
		    doc_done = YES;
	    	else if(diag[i]->ADDINFO)
		    ErrorMsg(errorTitle, "Retrieval diagnostics: %s, %s",
			diag[i]->DIAG, diag[i]->ADDINFO);
	    }
	
	// Extract document data chunk from response.
	// If doc is of type "TEXT", strip out weird stuff.
	// (Note "TEXT" type is ASCII-based, not international.)
	if(!interp_response
	    || !(WAISSearchResponse *)interp_response
		->DatabaseDiagnosticRecords
	    || !((WAISSearchResponse *)interp_response
		->DatabaseDiagnosticRecords)->Text)
	{
	    [self cleanUpClose:file free:docany];
	    ErrorMsg(errorTitle,"Incomplete transmission for document %s.",key);
	    return nil;
	}
	data = ((WAISSearchResponse *)interp_response
	    ->DatabaseDiagnosticRecords)->Text[0];
	if(data->DocumentText->size <= 0) continue;
	if(0 == strcmp(wType, "TEXT"))
	{
	    length = data->DocumentText->size;
	    delete_seeker_codes(data->DocumentText->bytes, &length);
	    data->DocumentText->size = length;
	    replace_controlM(data->DocumentText->bytes, &length);
	    data->DocumentText->size = length;
	}
	retrieved_size += data->DocumentText->size;

	// Write data chunk to file.
	[Wais lockFileIO];
	if(data->DocumentText->size != fwrite(data->DocumentText->bytes,
	    sizeof(char), (size_t)data->DocumentText->size, file))
	{
	    [Wais unlockFileIO]; [self cleanUpClose:file free:docany];
	    ErrorMsg(errorTitle, "Write error on document %s.", key);
	    return nil;
	}
	[Wais unlockFileIO];
    }
    [self cleanUpClose:file free:docany];
    if(retrieved_size <= 0)
    {
    	ErrorMsg(errorTitle, "Document %s is empty.", key);
	return nil;
    }
    
    // Successful retrieval.
    [Wais lockTransaction];
    isRetrieved = YES;
    [Wais unlockTransaction];
    return self;
}

- (short)readWaisStruct:(const char *)structName
    forElement:(const char *)elementName
    fromFile:(FILE *)file
    withDecoder:(WaisDecoder)theDecoder
{
    short check_result;
    DocID *docid;
    
    // Use doc-id shortcut routine.
    if(0 == strcmp(structName, ":doc-id"))
    {
	if(!(docid = (DocID *)s_malloc(sizeof(DocID)))) return FALSE;
	check_result = ReadDocID(docid, file);
	if(check_result==FALSE || check_result==END_OF_STRUCT_OR_LIST)
	    { s_free(docid); return check_result; }
	[self setWaisDocID:docid];
	return check_result;
    }
    
    // Standard read.
    check_result = [super readWaisStruct:structName
	forElement:elementName fromFile:file withDecoder:theDecoder];

    // We flatten the WAIS document file structure for convenience,
    // so must avoid confusing ":start", ":end" subfields (both are frags).
    if(0==strcmp(elementName, ":start") || 0==strcmp(elementName, ":end"))
    {
	if([self valueForStringKey:":byte-pos"])
    	    [self insertStringKey:elementName
	    	value:[self valueForStringKey:":byte-pos"]];
	else if([self valueForStringKey:":line-pos"])
    	    [self insertStringKey:elementName
	    	value:[self valueForStringKey:":line-pos"]];
	else if([self valueForStringKey:":para-id"])
	    [self insertStringKey:elementName
	    	value:[self valueForStringKey:":para-id"]];
    }
    
    // Find source if necessary.
    if(0==strcmp(structName, ":source-id"))
    {
    	[self setFromSource:[WaisSource objectForKey:[self 
	    valueForStringKey:":filename"]]];
	if(!fromSource) ErrorMsg(errorTitle, "Unknown source %s.",
	    [self valueForStringKey:":filename"]);
    }
    
    // Set key from info, if still NULL even tho full doc record has been read.
    if(!key && 0==strcmp(structName, [WaisDocument fileStructName]))
    	{ isRetrieved = NO; [self setKeyFromInfo]; }
    return check_result;
}

- readWaisFile
{
    NXAtom orig_key;
    char buf[MAXPATHLEN+1];
    
    // We read WAIS specification file rather than content file,
    //     so temporarily append ".wais" to key (note call to super
    //     since our -setKey: strips the ".wais").
    if(!key) return nil;
    orig_key = key;
    strcpy(buf, orig_key);
    strcat(buf, W_D_EXT);
    [super setKey:buf];
    if(![super readWaisFile]) return nil;
    [self setKey:orig_key];
    
    // Mark doc as retrieved if file named by (original) key exists.
    [Wais lockFileIO];
    if(0 == access(key, R_OK)) isRetrieved = YES;
    else isRetrieved = NO;
    [Wais unlockFileIO];
    return self;
}

- (short)writeWaisStruct:(const char *)structName
    forElement:(const char *)elementName
    toFile:(FILE *)file
    withDecoder:(WaisDecoder)theDecoder
{
    // Use doc-id shortcut routine.
    if(0 == strcmp(structName, ":doc-id"))
    {
    	if(waisDocID)
	    { WriteDocID(waisDocID, file); WriteNewline(file); return TRUE; }
	else { ErrorMsg(errorTitle, "No Doc-ID for %s.", key); return FALSE; }
    }	
    
    // We flatten the WAIS document file structure for convenience,
    // so must avoid confusing ":start", ":end" subfields (both are frags).
    if(0==strcmp(elementName, ":start") || 0==strcmp(elementName, ":end"))
    {
    	//!!! note kludge from xwais: we ignore distinctions here!
    	[self insertStringKey:":byte-pos"
	    value:[self valueForStringKey:elementName]];
	[self insertStringKey:":line-pos" value:NULL];
	[self insertStringKey:":para-id" value:NULL];
    }

    // Standard write.
    return [super writeWaisStruct:structName
	forElement:elementName toFile:file withDecoder:theDecoder];
}

- writeWaisFile
{
    NXAtom orig_key;
    char buf[MAXPATHLEN+1];
    
    // Fill in missing fields.
    if(![self valueForStringKey:":date"]
    	|| strlen([self valueForStringKey:":date"])==0)
    	[self insertStringKey:":date" value:"0"];
    
    // We write WAIS specification file rather than content file,
    //     so temporarily append ".wais" to key (note call to super
    //     since our -setKey: strips the ".wais").
    if(!key) return nil;
    orig_key = key;
    strcpy(buf, orig_key);
    strcat(buf, W_D_EXT);
    [super setKey:buf];
    if(![super writeWaisFile]) return nil;
    [self setKey:orig_key];
    return self;
}

@end
    





