// WaisDocument.m
//
// Free software created 1 Feb 1992
// by Paul Burchard <burchard@math.utah.edu>.
// Incorporating:
/* 
   WIDE AREA INFORMATION SERVER SOFTWARE:
   No guarantees or restrictions.  See the readme file for the full standard
   disclaimer.

   This is part of the [NeXTstep] user-interface for the WAIS software.
   Do with it as you please.

   Version 0.82
   Wed Apr 24 1991

   jonathan@Think.COM

*/
//

#import "WaisDocument.h"

// Search path for documents.
static id documentFolderList;

// Error panel title.
static char *errorTitle = "WAIS Document Error!";

// Decoders for WAIS structured files.

_WaisDecoder waisSourceIDDecoder[] = 
{
    { ":filename",		W_FIELD,0,0,	ReadString,3,	WriteString,2,
    						MAX_SYMBOL_SIZE },
    { NULL }
};

_WaisDecoder waisDocumentDecoder[] = 
{
    { ":number-of-lines",	W_FIELD,0,0,	ReadLongS,2,	WriteLongS,2 },
    { ":number-of-bytes",	W_FIELD,0,0,	ReadLongS,2,	WriteLongS,2 },
    { ":number-of-characters",	W_FIELD,0,0,	ReadLongS,2,	WriteLongS,2 },
    { ":best-line",		W_FIELD,0,0,	ReadLongS,2,	WriteLongS,2 },
    { ":date",			W_FIELD,0,0,	ReadString,3,	WriteString,2,
    						MAX_SYMBOL_SIZE },
    { ":headline",		W_FIELD,0,0,	ReadString,3,	WriteString,2,
    						MAX_SYMBOL_SIZE },
    { ":type",			W_FIELD,0,0,	ReadString,3,	WriteString,2,
    						MAX_SYMBOL_SIZE },
    { ":source",		W_STRUCT,
	":source-id",		waisSourceIDDecoder },
    { ":doc-id",		W_STRUCT,
        ":doc-id",		NULL/*special case*/ },
    { NULL }
};

_WaisDecoder waisFragmentDecoder[] = 
{
    { ":para-id",		W_FIELD,0,0,	ReadLongS,2,	WriteLongS,2 },
    { ":line-pos",		W_FIELD,0,0,	ReadLongS,2,	WriteLongS,2 },
    { ":byte-pos",		W_FIELD,0,0,	ReadLongS,2,	WriteLongS,2 },
    { NULL }
};

_WaisDecoder waisDocumentIDDecoder[] = 
{
    { ":score",			W_FIELD,0,0,	ReadLongS,2,	WriteLongS,2 },
    { ":document",		W_STRUCT,
	":document",		waisDocumentDecoder },
    { ":start",			W_STRUCT,
	":fragment",		waisFragmentDecoder },
    { ":end",			W_STRUCT,
	":fragment",		waisFragmentDecoder },
    { NULL }
};


@implementation WaisDocument

+ folderList
{
    return documentFolderList;
}

+ setFolderList:aList
{
    if(documentFolderList) [documentFolderList free];
    documentFolderList = aList;
    return self;
}

+ (const char *)defaultHomeFolder
{
    return "/Library/WAIS/documents";
}

+ (const char *)fileStructName
{
    return ":document-id";
}

+ (WaisDecoder)fileStructDecoder
{
    return waisDocumentIDDecoder;
}

+ (const char *)errorTitle
{
    return errorTitle;
}

+ (BOOL)checkFileName:(const char *)fileName
{
    // We read in the .wais file corresponding to the doc's content file.
    if(!fileName) return NO;
    if(strlen(fileName) <= strlen(W_D_EXT)) return NO;
    if(!strstr(fileName, W_D_EXT)) return NO;
    if(0 != strcmp(W_D_EXT, strstr(fileName, W_D_EXT))) return NO;
    return YES;
}

- free
{
    if(waisDocID) s_free(waisDocID);
    return [super free];
}

+ objectForCompleteKey:(const char *)aKey
{
    char *buf, *endp;
    id found;

    // First remove any ".wais" extension from keys, then call standard method.
    if(!aKey || !strstr(aKey, W_D_EXT))
    	return [super objectForCompleteKey:aKey];
    if(!(buf = s_malloc(strlen(aKey)+1))) return nil;
    strcpy(buf, aKey);
    if(!(endp = strstr(buf, W_D_EXT))) { s_free(buf); return nil; }
    *endp = 0;
    found = [super objectForCompleteKey:buf];
    s_free(buf);
    if(found) return found;
    else return nil;
}

- setKey:(const char *)aKey
{
    char *buf, *endp;
    id rtn;
    
    // First remove any ".wais" extension from keys.
    if(!aKey || !(endp = strstr(aKey, W_D_EXT)))
    	return [super setKey:aKey];
    if(!(buf = s_malloc(strlen(aKey)+1))) return nil;
    strcpy(buf, aKey);
    if(!(endp = strstr(buf, W_D_EXT))) return nil;
    *endp = 0;
    rtn = [super setKey:buf];
    s_free(buf);
    return rtn;
}

- setKeyFromInfo
{
    char *buf, *p, *hname, *extn;
    const char *head, *src, *src_end, *src_ext;
    const char *headline, *theType;
    const char **foldp, *fold;
    int len, extcnt;
    
    // HACKING WAIS HEADLINES INTO FILE NAMES AND TYPES.
    //
    //!!! (Not international enuf!)
    //
    // Uses the ":headline", ":type", and [source] ":filename" info fields,
    // information in the fromSource member.
    //
    // The "key" member is set to the headline altered with this recipe:
    //     1. Everything after and including the first '/' is removed.
    //     2. Trailing & leading blanks of the new string are removed,
    //         sequences of blanks are compressed into single space chars,
    //         and non-ascii chars are replaced by '?'.
    //     3. IF the string is now empty it is replaced by "?".
    //     4. ELSE IF the new string ends with a file extension
    //         ("[.][A-Za-z0-9]+"), then the extension is lowercased.
    //     5. If the waisType input is non-NULL and different from "TEXT",
    //	       then the extension is changed to reflect the waisType.
    //     6. As long as the document is not a WAIS source itself (according to
    //         ":type" field), the source's ":filename", excluding the ".src" 
    //         extension but followed by a ':', is prepended.  Or, if the 
    //         ":filename" info field is blank, our ":filename" field is
    //         tried (this would come from a source), or lastly the 
    //         final component of the source's "key" member is used instead.
    //     7. The default folder for WaisDocuments is prepended (with 
    //         separating '/' if necessary), unless the document is a WAIS 
    //         source, in which case the default folder for WaisSources is 
    //         prepended instead.
    //
    headline = [self valueForStringKey:":headline"];
    theType = [self valueForStringKey:":type"];
    if(theType && 0==strcmp(theType, "WSRC"))
    	foldp = (const char **)[[WaisSource folderList] elementAt:0];
    else foldp = (const char **)[[WaisDocument folderList] elementAt:0];
    if(foldp) fold = *foldp;
    else fold = "/";
    if(!(fromSource && (src=[fromSource valueForStringKey:":filename"]))
	&& !(src = [self valueForStringKey:":filename"])
    	&& !(fromSource && (src=[fromSource key])))
	src = "?";
    if(strrchr(src, '/')) { src = strrchr(src, '/'); src++; }
    if(headline) len = strlen(headline);
    else len = strlen("?");
    if(!(buf = s_malloc(strlen(fold) + strlen(src) + len + strlen("/:.tiff"))))
    	return nil;
    strcpy(buf, fold);
    p = buf + strlen(buf);
    if(buf[0] != '/') return nil;
    if(*(p-1) != '/') *p++ = '/';
    src_end = src + strlen(src);
    if(strlen(src)<4 || 0!=strcmp((src_ext=src_end-4), ".src"))
    	src_ext = src_end;
    if(!theType || 0!=strcmp(theType, "WSRC"))
   	{ for(; src<src_ext; src++) *p++ = *src; *p++ = ':'; }
    *(hname=p) = 0;
    if(headline) head = headline;
    else head = "?";
    for(; isascii(*head) && isspace(*head); head++);
    for(; *head && *head!='/'; head++)
    	if(!(isascii(*head) && isspace(*head)
	    && isascii(*(p-1)) && isspace(*(p-1))))
	    *p++ = ((isascii(*head) && isspace(*head)) ? ' ' : *head);
    for(p--; p>=hname && isascii(*p) && isspace(*p); p--);
    *++p = 0;
    for(p=hname; *p; p++) if(!isascii(*p)) *p = '?';
    extn = 0;
    if(p == hname) { *p++ = '?'; *p = 0; }
    else
    {
    	p = hname + strlen(hname);
	for(extcnt=0, p--; p>hname && isalnum(*p); p--) extcnt++;
	if(*p=='.' && extcnt>0)
	    { for(extn=p, p++; *p; p++) if(isupper(*p)) *p = tolower(*p); }
    }
    if(theType && 0!=strcmp(theType, "TEXT"))
    {
	if(0 == strcmp(theType, "WSRC"))
	    { if(extn) strcpy(extn, ".src"); else strcat(buf, ".src"); }
	else if(0 == strcmp(theType, "TIFF"))
	    { if(extn) strcpy(extn, ".tiff"); else strcat(buf, ".tiff"); }
	else if(0 == strcmp(theType, "GIF"))
	    { if(extn) strcpy(extn, ".gif"); else strcat(buf, ".gif"); }
    }
    [self setKey:buf];
    s_free(buf);
    return self;
}

- fromSource
{
    return fromSource;
}

- setFromSource:aSource
{
    const char *src;
    
    fromSource = aSource;
    isRetrieved = NO;
    if(fromSource && [fromSource valueForStringKey:":filename"])
	[self insertStringKey:":filename"
	    value:[fromSource valueForStringKey:":filename"]];
    else if(fromSource && [fromSource key])
    {
    	src = [fromSource key];
	if(strrchr(src, '/')) { src = strrchr(src, '/'); src++; }
	[self insertStringKey:":filename" value:src];
    }
    return self;
}

- (DocID *)waisDocID
{
    return waisDocID;
}

// theDocID must be s_free()-able.
- setWaisDocID:(DocID *)theDocID
{
    if(waisDocID) s_free(waisDocID);
    waisDocID = theDocID;
    return self;
}

- setWaisDocIDFromAny:(any *)docAny
{
    isRetrieved = NO;
    if(waisDocID) s_free(waisDocID);
    if(!(waisDocID = docIDFromAny(docAny)))
    {
	waisDocID = (DocID *)s_malloc(sizeof(DocID));
	waisDocID->originalLocalID = copy_any(docAny);
    }
    return self;
}

- (BOOL)isRetrieved
{
    return isRetrieved;
}

- setUnretrieved
{
    isRetrieved = NO;
    return self;
}

- cleanUpClose:(FILE *)file free:(any *)ptr
{
    [Wais lockFileIO]; fclose(file); [Wais unlockFileIO];
    if(ptr) s_free(ptr);
    return nil;
}

- retrieve
{
    int i;
    long lines, size, count, chars, length;
    long request_length, chars_per_page;
    const char *value, *database, *wType;
    static char request[MAX_MESSAGE_LEN], response[MAX_MESSAGE_LEN];
    FILE *file;
    any* docany;
    WAISDocumentText *data;
    SearchResponseAPDU *interp_response;
    diagnosticRecord **diag;
    extern char *delete_seeker_codes();/* in ui.c, but not declared in ui.h */

    // Set up source for retrieval.
    isRetrieved = NO;
    [fromSource setConnected:YES];
    if(![fromSource isConnected]) return nil;

    // Open local document file to receive retrieved data.
    [Wais lockFileIO];
    if(!key || !(file = fopen(key, "w")))
    {
	[Wais unlockFileIO];
	ErrorMsg(errorTitle, "Can't create local document file %s.",
		key ? key : "???");
	return nil;
    }
    [Wais unlockFileIO];

    // parameters for "page-by-page" retrieval loop.
    if(value=[self valueForStringKey:":number-of-lines"]) lines = atol(value);
    else lines = 0;
    if(value=[self valueForStringKey:":number-of-bytes"]) chars = atol(value);
    else if(value=[self valueForStringKey:":number-of-characters"])
	chars = atol(value);
    else chars = 0;
    size = 0;
    chars_per_page = [fromSource bufferLength]-HEADER_LENGTH-1000;/*paranoia?*/    
    docany = anyFromDocID(waisDocID);
    database = [fromSource valueForStringKey:":database-name"];
    wType = [self valueForStringKey:":type"];
    if(!wType) wType = "TEXT";
    if(lines<=0 || chars<=0)
    {
	[self cleanUpClose:file free:docany];
    	ErrorMsg(errorTitle, "Document %s is empty.", key);
	return nil;
    }
    
    // Retrieve one page at a time and write to local doc file.
    for(count=0; count*chars_per_page<chars; count++)
    {
    	// Lock transaction to prevent conflict with port.
	[Wais lockTransaction];
	
	// Create retrieval request message.
	request_length = [fromSource bufferLength];
	if(!generate_retrieval_apdu(request + HEADER_LENGTH,
	    &request_length, docany, CT_byte, count * chars_per_page,
	    MIN((count + 1) * chars_per_page, chars), wType, database))
	{
	    [Wais unlockTransaction]; [self cleanUpClose:file free:docany];
	    ErrorMsg(errorTitle, "Overflow: retrieval request too large for %s.", key);
	    return nil;
	}
	
	// Send retrieval message.
	if(!interpret_message(request, MAX_MESSAGE_LEN - request_length,
	    response, MAX_MESSAGE_LEN, [fromSource connection], false))
	{
	    [Wais unlockTransaction]; [self cleanUpClose:file free:docany];
	    ErrorMsg(errorTitle,"Warning: missing data for document %s.",key);
	    return nil;
	}

	// Interpret received reply message.
	// Transaction is done; unlock.
	readSearchResponseAPDU(&interp_response, response + HEADER_LENGTH);
	[Wais unlockTransaction];
	if(interp_response
	    && (WAISSearchResponse *)interp_response
	    	->DatabaseDiagnosticRecords 
	    && (diag = ((WAISSearchResponse *)interp_response
	    	->DatabaseDiagnosticRecords)->Diagnostics)
	    )
	    for(i=0; diag[i]; i++) if(diag[i]->ADDINFO)
	    	ErrorMsg(errorTitle, "Retrieval diagnostics: %s, %s",
		    diag[i]->DIAG, diag[i]->ADDINFO);
	
	// Extract document data chunk from response.
	// If of file type "TEXT", strip out weird stuff.
	// (Note "TEXT" type is ASCII-based, not international.)
	if(!((WAISSearchResponse *)interp_response
	    ->DatabaseDiagnosticRecords)->Text)
	{
	    [self cleanUpClose:file free:docany];
	    ErrorMsg(errorTitle,"Warning: missing data for document %s.",key);
	    return nil;
	}
	data = ((WAISSearchResponse *)interp_response
	    ->DatabaseDiagnosticRecords)->Text[0];
	if(0 == strcmp(wType, "TEXT"))
	{
	    length = data->DocumentText->size;
	    delete_seeker_codes(data->DocumentText->bytes, &length);
	    data->DocumentText->size = length;
	    replace_controlM(data->DocumentText->bytes, &length);
	    data->DocumentText->size = length;
	}
	size += data->DocumentText->size;

	// Write data chunk to file.
	[Wais lockFileIO];
	if(data->DocumentText->size
	    != fwrite(data->DocumentText->bytes, sizeof(char), 
	    (size_t)data->DocumentText->size, file))
	{
	    [Wais unlockFileIO]; [self cleanUpClose:file free:docany];
	    ErrorMsg(errorTitle, "Write error on document %s.", key);
	    return nil;
	}
	[Wais unlockFileIO];
    }
    [self cleanUpClose:file free:docany];
    [Wais lockTransaction];
    isRetrieved = YES;
    [Wais unlockTransaction];
    return self;
}

- (short)readWaisStruct:(const char *)structName
    forElement:(const char *)elementName
    fromFile:(FILE *)file
    withDecoder:(WaisDecoder)theDecoder
{
    short check_result;
    DocID *docid;
    
    // Use doc-id shortcut routine.
    if(0 == strcmp(structName, ":doc-id"))
    {
	if(!(docid = (DocID *)s_malloc(sizeof(DocID)))) return FALSE;
	check_result = ReadDocID(docid, file);
	if(check_result==FALSE || check_result==END_OF_STRUCT_OR_LIST)
	    { s_free(docid); return check_result; }
	[self setWaisDocID:docid];
	return check_result;
    }
    
    // Standard read.
    check_result = [super readWaisStruct:structName
	forElement:elementName fromFile:file withDecoder:theDecoder];

    // We flatten the WAIS document file structure for convenience,
    // so must avoid confusing ":start", ":end" subfields (both are frags).
    if(0==strcmp(elementName, ":start") || 0==strcmp(elementName, ":end"))
    {
	if([self valueForStringKey:":byte-pos"])
    	    [self insertStringKey:elementName
	    	value:[self valueForStringKey:":byte-pos"]];
	else if([self valueForStringKey:":line-pos"])
    	    [self insertStringKey:elementName
	    	value:[self valueForStringKey:":line-pos"]];
	else if([self valueForStringKey:":para-id"])
	    [self insertStringKey:elementName
	    	value:[self valueForStringKey:":para-id"]];
    }
    
    // Find source if necessary.
    if(0==strcmp(structName, ":source-id"))
    {
    	[self setFromSource:[WaisSource objectForKey:[self 
	    valueForStringKey:":filename"]]];
	if(!fromSource) ErrorMsg(errorTitle, "Unknown source %s.",
	    [self valueForStringKey:":filename"]);
    }
    
    // Set key from info, if still NULL even tho full doc record has been read.
    if(!key && 0==strcmp(structName, [WaisDocument fileStructName]))
    	{ isRetrieved = NO; [self setKeyFromInfo]; }
    return check_result;
}

- readWaisFile
{
    NXAtom orig_key;
    char buf[MAXPATHLEN+1];
    
    // We read WAIS specification file rather than content file,
    //     so temporarily append ".wais" to key (note call to super
    //     since our -setKey: strips the ".wais").
    if(!key) return nil;
    orig_key = key;
    strcpy(buf, orig_key);
    strcat(buf, W_D_EXT);
    [super setKey:buf];
    if(![super readWaisFile]) return nil;
    [self setKey:orig_key];
    
    // Mark doc as retrieved if file named by (original) key exists.
    [Wais lockFileIO];
    if(0 == access(key, R_OK)) isRetrieved = YES;
    else isRetrieved = NO;
    [Wais unlockFileIO];
    return self;
}

- (short)writeWaisStruct:(const char *)structName
    forElement:(const char *)elementName
    toFile:(FILE *)file
    withDecoder:(WaisDecoder)theDecoder
{
    // Use doc-id shortcut routine.
    if(0 == strcmp(structName, ":doc-id"))
    {
    	if(waisDocID)
	    { WriteDocID(waisDocID, file); WriteNewline(file); return TRUE; }
	else { ErrorMsg(errorTitle, "No Doc-ID for %s.", key); return FALSE; }
    }	
    
    // We flatten the WAIS document file structure for convenience,
    // so must avoid confusing ":start", ":end" subfields (both are frags).
    if(0==strcmp(elementName, ":start") || 0==strcmp(elementName, ":end"))
    {
    	//!!! note kludge from xwais: we ignore distinctions here!
    	[self insertStringKey:":byte-pos"
	    value:[self valueForStringKey:elementName]];
	[self insertStringKey:":line-pos" value:NULL];
	[self insertStringKey:":para-id" value:NULL];
    }

    // Standard write.
    return [super writeWaisStruct:structName
	forElement:elementName toFile:file withDecoder:theDecoder];
}

- writeWaisFile
{
    NXAtom orig_key;
    char buf[MAXPATHLEN+1];
    
    // Fill in missing fields.
    if(![self valueForStringKey:":date"]
    	|| strlen([self valueForStringKey:":date"])==0)
    	[self insertStringKey:":date" value:"0"];
    
    // We write WAIS specification file rather than content file,
    //     so temporarily append ".wais" to key (note call to super
    //     since our -setKey: strips the ".wais").
    if(!key) return nil;
    orig_key = key;
    strcpy(buf, orig_key);
    strcat(buf, W_D_EXT);
    [super setKey:buf];
    if(![super writeWaisFile]) return nil;
    [self setKey:orig_key];
    return self;
}

@end
    





