*** irretrvl.c	Tue Apr  7 13:27:08 1992
--- irretrvl.c.new	Tue Apr  7 11:37:44 1992
***************
*** 59,64 ****
--- 59,65 ----
  #include "futil.h"
  #include <ctype.h>  /* for isspace */
  #include "irext.h"
+ #include <sys/stat.h>
  
  /*----------------------------------------------------------------------*/
  
***************
*** 130,140 ****
       char *filename;
       char *database_name;  /* full pathname of the database */
  {
!   /* this is complicated by back compatibility to WAIStation, 
!      which does not give the right database name (it always says 'Quest'). 
!      The solution is to look for Quest in the database_name and always say it
!      is legit.  This is an unfortunate security hole, that hopefully will 
!      be able to be purged in a major version change.*/
  
    /* the help file and catalog file (the .src and .cat files) must also be
       special cased because it is not in the filename table */
--- 131,137 ----
       char *filename;
       char *database_name;  /* full pathname of the database */
  {
!   struct stat sbuf;
  
    /* the help file and catalog file (the .src and .cat files) must also be
       special cased because it is not in the filename table */
***************
*** 148,159 ****
    if(NULL != strstr(filename, ".cat")) /* let it pass */
      return(true);
  
!   if(!strstr(database_name, "Quest") ||
!      !strstr(database_name, "INFO")) { /* it is a request from WAIStation */
!     return(true);
!   }
!   else {     /* check it */
  
      /* name of the file of the filetable for this db (eg  /bar/foo.fn).  confusing, no? */
      char filename_table_filename[MAX_FILE_NAME_LEN +1]; 
      
--- 145,159 ----
    if(NULL != strstr(filename, ".cat")) /* let it pass */
      return(true);
  
!   stat(filename, &sbuf);
!   if(S_ISDIR(sbuf.st_mode)) {
!     waislog(WLOG_HIGH, WLOG_WARNING,
! 	    "File: '%s' is a directory, and cannot be retrieved.",
! 	    filename);
!     return(false);
  
+   }
+   else {
      /* name of the file of the filetable for this db (eg  /bar/foo.fn).  confusing, no? */
      char filename_table_filename[MAX_FILE_NAME_LEN +1]; 
      
***************
*** 183,199 ****
  
  WAISDocumentText* getData(doc, databaseName, errorCode)
  DocObj* doc;
! char *databaseName;
  long* errorCode;
  /* it isn't text, so we can just grab data */
  {
    FILE* file = NULL;
!   char fileName[MAX_FILENAME_LEN + 1];
    WAISDocumentText* data = NULL;
    long start,end;		/* position of the document in the file */
    long startByte,endByte,bytes,bytesRead; /* part of the doc that we want */
    char* buffer = NULL;
    any* bufAny = NULL;
  #ifdef NeXT  
    char tmpFileName[MAX_FILENAME_LEN+1];
  #else
--- 183,201 ----
  
  WAISDocumentText* getData(doc, databaseName, errorCode)
  DocObj* doc;
! char *databaseName; /* ignored - gotten from docid instead */
  long* errorCode;
  /* it isn't text, so we can just grab data */
  {
    FILE* file = NULL;
!   char fileName[MAX_FILENAME_LEN + 1]; 
!   char* dbname = NULL;
    WAISDocumentText* data = NULL;
    long start,end;		/* position of the document in the file */
    long startByte,endByte,bytes,bytesRead; /* part of the doc that we want */
    char* buffer = NULL;
    any* bufAny = NULL;
+   DocID *docid;
  #ifdef NeXT  
    char tmpFileName[MAX_FILENAME_LEN+1];
  #else
***************
*** 200,214 ****
    char *tmpFileName = NULL;
  #endif
  
    /* we can only handle byte chunks here */
    if ((doc->ChunkCode == CT_byte) ||
        (doc->ChunkCode == CT_document)) {
      if (parseDocID(doc,fileName,&start,&end,errorCode) == false)
!       return(NULL);
   
!     *errorCode = GDT_NoError;
!   
!     if(true == check_for_legitimate_file(fileName, databaseName)){
        file = s_fopen(fileName,"rb"); 
  
        if (file == NULL){
--- 202,225 ----
    char *tmpFileName = NULL;
  #endif
  
+ 
+   *errorCode = GDT_NoError;
+   
    /* we can only handle byte chunks here */
    if ((doc->ChunkCode == CT_byte) ||
        (doc->ChunkCode == CT_document)) {
      if (parseDocID(doc,fileName,&start,&end,errorCode) == false)
!      {
!        waislog(WLOG_HIGH,WLOG_WARNING, "can't parse docid");
!        *errorCode = GDT_MissingDocID;
!        return(NULL);
!      }
   
!     docid = docIDFromAny(doc->DocumentID);
!     dbname = anyToString(GetDatabase(docid));
!     freeDocID(docid);
! 
!     if(true == check_for_legitimate_file(fileName, dbname)){
        file = s_fopen(fileName,"rb"); 
  
        if (file == NULL){
***************
*** 232,237 ****
--- 243,249 ----
  		"Attempt to retrieve data for missing doc-id: '%s'",
  		fileName);
  	*errorCode = GDT_MissingDocID;
+ 	s_free(dbname);
  	return(NULL);
        }
  
***************
*** 246,253 ****
  
      waislog(WLOG_LOW, WLOG_RETRIEVE,
  	    "Retrieving DocID: %d %d %s, byte: %d %d, from database %s", 
! 	    start, end, fileName, startByte, endByte, databaseName);
  
      if (endByte > end && end != 0)
        { 
  	waislog(WLOG_HIGH, WLOG_WARNING, 
--- 258,267 ----
  
      waislog(WLOG_LOW, WLOG_RETRIEVE,
  	    "Retrieving DocID: %d %d %s, byte: %d %d, from database %s", 
! 	    start, end, fileName, startByte, endByte, dbname);
  
+     s_free(dbname);
+ 
      if (endByte > end && end != 0)
        { 
  	waislog(WLOG_HIGH, WLOG_WARNING, 
***************
*** 309,315 ****
  
  WAISDocumentText* getDocumentText(doc, databaseName, errorCode)
  DocObj* doc;
! char *databaseName;
  long* errorCode;
  /* find the text for doc, get the sub part if any, finally construct and
     return a WAISDocumentText.  If it can not find the document 
--- 323,329 ----
  
  WAISDocumentText* getDocumentText(doc, databaseName, errorCode)
  DocObj* doc;
! char *databaseName; /* ignored - gotten from docid instead */
  long* errorCode;
  /* find the text for doc, get the sub part if any, finally construct and
     return a WAISDocumentText.  If it can not find the document 
***************
*** 318,323 ****
--- 332,338 ----
  {
    WAISDocumentText* text = NULL;
    FILE* file = NULL;
+   char* dbname = NULL;
    char* buffer = NULL;
    any* bufAny = NULL;
    char filename[MAX_FILENAME_LEN + 1];
***************
*** 333,350 ****
  #endif
    DocID* theDocID = NULL;
    char* local_id = NULL;
!   
    /* we can only handle line chunks for now */
    if (doc->ChunkCode != CT_line)
     { 
       waislog(WLOG_HIGH, WLOG_WARNING, 
  	     "search engine can only use line offsets for now.");
-      
       *errorCode = GDT_UnsupportedChunkType;
       return(NULL);
     }
  
    theDocID = docIDFromAny(doc->DocumentID);
    local_id = anyToString(GetLocalID(theDocID));
    freeDocID(theDocID);
  
--- 348,367 ----
  #endif
    DocID* theDocID = NULL;
    char* local_id = NULL;
! 
!   *errorCode = GDT_NoError;
! 
    /* we can only handle line chunks for now */
    if (doc->ChunkCode != CT_line)
     { 
       waislog(WLOG_HIGH, WLOG_WARNING, 
  	     "search engine can only use line offsets for now.");
       *errorCode = GDT_UnsupportedChunkType;
       return(NULL);
     }
  
    theDocID = docIDFromAny(doc->DocumentID);
+   dbname = anyToString(GetDatabase(theDocID));
    local_id = anyToString(GetLocalID(theDocID));
    freeDocID(theDocID);
  
***************
*** 352,357 ****
--- 369,376 ----
        false) {
      waislog(WLOG_HIGH, WLOG_ERROR,
  	    "Can't parse doc-id: '%s'", local_id);
+     *errorCode = GDT_MissingDocID;
+     s_free(dbname);    
      s_free(local_id);
      return(NULL);
    }
***************
*** 359,367 ****
    waislog(WLOG_LOW, WLOG_RETRIEVE,
  	  "Retrieving DocID: '%s', line range: %d %d, from database %s", 
  	  local_id, doc->ChunkStart.Pos, doc->ChunkEnd.Pos,
! 	  databaseName);
    /* check the database */
!   if(NULL == databaseName){
      waislog(WLOG_HIGH, WLOG_WARNING,
  	    "Missing database for doc-id: '%s'", local_id);
      *errorCode = GDT_MissingDatabase;
--- 378,386 ----
    waislog(WLOG_LOW, WLOG_RETRIEVE,
  	  "Retrieving DocID: '%s', line range: %d %d, from database %s", 
  	  local_id, doc->ChunkStart.Pos, doc->ChunkEnd.Pos,
! 	  dbname);
    /* check the database */
!   if(NULL == dbname){
      waislog(WLOG_HIGH, WLOG_WARNING,
  	    "Missing database for doc-id: '%s'", local_id);
      *errorCode = GDT_MissingDatabase;
***************
*** 369,379 ****
      return(NULL);
    }
    
!   if(check_for_legitimate_file(filename, databaseName) == false){
!     /* report is in check */
      s_free(local_id);
      return(NULL);
    }
    file = s_fopen(filename,"r");
    if (file == NULL)
      if(probe_file_possibly_compressed(filename)) {
--- 388,404 ----
      return(NULL);
    }
    
!   if(check_for_legitimate_file(filename, dbname) == false){
!     waislog(WLOG_HIGH, WLOG_WARNING,
! 	    "doc-id: '%s' not in database '%s'", local_id,dbname);
!     *errorCode = GDT_MissingDocID;
!     s_free(dbname);
      s_free(local_id);
      return(NULL);
    }
+ 
+   s_free(dbname);
+ 
    file = s_fopen(filename,"r");
    if (file == NULL)
      if(probe_file_possibly_compressed(filename)) {
