
/*	
	Document.cpp : implementation of the CSnaggerDoc class
	
	Implements project file persistence for options, tree data and statistics.
	Also performs the actual retrieval of files from the host using the 
	CInet class.

	Author: Steven E. Sipe
*/

#include "stdafx.h"

#include "SiteSnag.h"
#include "Document.h"
#include "View.h"
#include "progress.h"

#ifdef _DEBUG
#define new DEBUG_NEW
#undef THIS_FILE
static char THIS_FILE[] = __FILE__;
#endif

// File version for serialization
const long g_lFileVersion = 0x0101;

/////////////////////////////////////////////////////////////////////////////
// CSnaggerDoc

IMPLEMENT_DYNCREATE(CSnaggerDoc, CDocument)

BEGIN_MESSAGE_MAP(CSnaggerDoc, CDocument)
	//{{AFX_MSG_MAP(CSnaggerDoc)
	//}}AFX_MSG_MAP
END_MESSAGE_MAP()


/////////////////////////////////////////////////////////////////////////////
// CSnaggerDoc construction/destruction

// Constructor
CSnaggerDoc::CSnaggerDoc()
{
	// Set some default project options
	m_Options.nMaxDepth = 2;
	m_Options.nMaxPages = 0;
	m_Options.bFixupLinks = TRUE;
	m_Options.bContents = TRUE;
	m_Options.bMultimedia = TRUE;
	m_Options.bOffsiteLinks = FALSE;


	// Set the initial hash table sizes
	m_arrPagesDone.InitHashTable(1200);
	m_arrMediaDone.InitHashTable(2400);

	// Initialize some flags
	m_bProjectLoaded = FALSE;
	m_pProgress = NULL;
	m_bAutoMode = FALSE;

	m_nLevel = 0;
}

// Destructor
CSnaggerDoc::~CSnaggerDoc()
{
	try
	{
		// Remove the page and media maps
		ClearCacheMaps();
	}
	catch(...)
	{
	}
}

// Handles creation of a new document -- this routine is called by the framework
// the first time to create the empty project, in this case no additional work
// is done.
BOOL CSnaggerDoc::OnNewDocument()
{

	static bFirstTime = TRUE;

	// Is is this the empty project file?
	if(bFirstTime)
	{
		bFirstTime = FALSE;

		// Yes, set the title to "(No project)"
		CString strDefName;
		strDefName.LoadString(IDS_NO_PROJECT);
		SetTitle(strDefName);

		// Call the base class and get out...
		if (!CDocument::OnNewDocument())
			return FALSE;

		return(TRUE);
	}

	// Call the base class
	if (!CDocument::OnNewDocument())
		return FALSE;

	// Clear the statisitics and indicate that we now have a 
	// project loaded
	m_bProjectLoaded = TRUE;
	m_strStartPage.Empty();
	m_nGottenPageCount = 0;
	m_nGottenFileCount = 0;
	m_nQueuedPageCount = 0;
	m_nTotalBytes = 0;

	// Make sure that the info in the statistics window is reset
	POSITION pos = GetFirstViewPosition();
	CSnaggerView* pView = (CSnaggerView *) GetNextView(pos);
	m_pProgress = pView->GetProgress();
	m_pProgress->SetActionTitle("");

	return TRUE;
}

// Called to handle opening an existing document
BOOL CSnaggerDoc::OnOpenDocument(LPCTSTR lpszPathName) 
{
	POSITION pos = GetFirstViewPosition();
	CSnaggerView* pView = (CSnaggerView *) GetNextView(pos);

	// Save the current project (if necessary)
	SaveModified();

	// Initialize the tree
	pView->ClearTree();

	// Call the base class to read the document's contents from disk
	if (!CDocument::OnOpenDocument(lpszPathName))
		return FALSE;

	// Set the project location
	SetPathName(lpszPathName);
	m_strDirectory = CInet::SplitFileName(lpszPathName,
				CInet::DRIVE|CInet::PATH|CInet::FNAME)+"\\";

	// Set the document's title
	SetTitle(CInet::SplitFileName(lpszPathName,CInet::FNAME|CInet::EXT));


	// Indicate that the project is loaded
	m_bProjectLoaded = TRUE;

	// Update the project's information in the statistics window
	if(m_pProgress)
	{
		m_pProgress = pView->GetProgress();
		m_pProgress->SetActionTitle("");
		UpdateStatus();
	}

	// Make sure this document gets saved at the end
	SetModifiedFlag(TRUE);

	return TRUE;
}

// Saves files that have been change (DoFileSave() calls the document's 
// ::Serialize() method
BOOL CSnaggerDoc::SaveModified() 
{
	// Was the document changed??
	if(IsModified())
		return CDocument::DoFileSave();
	return(TRUE);
}

// Prevents the user from exiting the application if a snagging operation is
// in progress
BOOL CSnaggerDoc::CanCloseFrame(CFrameWnd* pFrame) 
{
	POSITION pos = GetFirstViewPosition();
	CSnaggerView* pView = (CSnaggerView *) GetNextView(pos);

	return(!pView->GetSnagging());
}

// Reloads the empty project (after a delete)
void CSnaggerDoc::Reset(LPCTSTR lpszProjName)
{
	CString strNewProjName;
		
	if(lpszProjName)
		strNewProjName = lpszProjName;

	// Use the default name -- (No Project)
	strNewProjName.LoadString(IDS_NO_PROJECT);
	m_strPathName.Empty();
	m_strDirectory.Empty();
	m_bProjectLoaded = FALSE;
	SetModifiedFlag(FALSE);
	SetTitle(strNewProjName);

	// Reset the statistics window's information
	m_strStartPage.Empty();
	m_nGottenPageCount = 0;
	m_nGottenFileCount = 0;
	m_nQueuedPageCount = 0;
	m_nTotalBytes = 0;

	// Update the statistics window
	POSITION pos = GetFirstViewPosition();
	CSnaggerView* pView = (CSnaggerView *) GetNextView(pos);
	m_pProgress = pView->GetProgress();
	m_pProgress->SetActionTitle("");

	UpdateStatus();
}

// Handles closing the document -- i.e. the user pressed the X button or chose the
// close menu item
void CSnaggerDoc::OnCloseDocument() 
{
	// Make sure that we save the current document
	SaveModified();

	CDocument::OnCloseDocument();
}

// Get the project's options 
void CSnaggerDoc::GetOptions(CSnagOptions& Options)
{
	Options.nMaxDepth = m_Options.nMaxDepth;
	Options.nMaxPages = m_Options.nMaxPages;
	Options.bFixupLinks = m_Options.bFixupLinks;
	Options.bContents = m_Options.bContents;
	Options.bMultimedia = m_Options.bMultimedia;
	Options.bOffsiteLinks = m_Options.bOffsiteLinks;
}

// Set the project's options -- this is called during the command line
// mode of operation
void CSnaggerDoc::SetOptions(CSnagOptions& Options) 
{ 
	m_Options.nMaxDepth = Options.nMaxDepth;
	m_Options.nMaxPages = Options.nMaxPages;
	m_Options.bFixupLinks = Options.bFixupLinks;
	m_Options.bContents = Options.bContents;
	m_Options.bMultimedia = Options.bMultimedia;
	m_Options.bOffsiteLinks = Options.bOffsiteLinks;
}

/////////////////////////////////////////////////////////////////////////////
// CSnaggerDoc serialization

// Handles document persistence  -- called by the framework during OnOpenDocument()
// and during OnSaveDocument()
void CSnaggerDoc::Serialize(CArchive& ar)
{
	long lFileVersion;
	POSITION pos = GetFirstViewPosition();
	CSnaggerView* pView = (CSnaggerView *) GetNextView(pos);	

	// Are we writing to disk??
	if (ar.IsStoring())
	{
		// Write the SiteSnagger version information 
		ar << g_lFileVersion;
		lFileVersion = g_lFileVersion;

		// Write the information in the statistics window
		ar << m_nGottenPageCount;
		ar << m_nGottenFileCount;
		ar << m_nTotalBytes;
	}
	else
	{
		// Get the SiteSnagger version -- (right now we don't use this, 
		// since we only have one version of the utility)
		ar >> lFileVersion;

		// Read the information in the statistics window
		ar >> m_nGottenPageCount;
		ar >> m_nGottenFileCount;
		ar >> m_nTotalBytes;

		// Reset the queued page count (used in the statistics window)
		m_nQueuedPageCount = 0;
	}

	// Get/Save the project's options
	m_Options.SetVersion(lFileVersion);
	m_Options.Serialize(ar);

	// Get/Save the tree's contents
	pView->SerializeTree(ar);

	m_nLevel = 0;
}

// Retrieves the specified page using CInet or retrieves it from disk.  
// If the page is gotten using CInet it saves the new page to the 
// name specified in strFileName.  It also parses the page and returns its list 
// of links and multimedia (in linkEntry).
BOOL CSnaggerDoc::GetPage(CString& strPage, CString& strFileName, LINKS& linkEntry)
{
	BYTE *pbyBuffer = m_byBuffer;
	int nLen;
	BOOL bPageInCache = FALSE;
	BOOL bRet = FALSE;
	CInet::RESULTS ret;
	MAP_FILES* pMapEntry;

	// Initialize the link stack entry
	linkEntry.arrLinks.SetSize(0,100);
	linkEntry.arrMedia.SetSize(0,100);
	linkEntry.arrOffsite.SetSize(0,100);
	linkEntry.nIndex = 0;

	// Should we request the page from the host or use a local cached copy?
	if(ShouldGetPage(strPage,pMapEntry))
	{
		// Ask the web server to transmit the page
		ret = m_Inet.GetPage(strPage,&pbyBuffer,nLen,TRUE);
		if(ret == CInet::SUCCESS)
		{
			// Add the total bytes to the statistics window count
			bRet = TRUE;
			m_nTotalBytes += nLen;
		}
	}
	else
	{
		// Using a local cached copy...open the file and read it.  This is
		// done because we still may need the links for a previously saved
		// page.
		CFile fileIn;
		CFileException ex;

		// Make sure we get the full location of the file
		strFileName = pMapEntry->strFileName;
		CString strTempFileName = m_strDirectory+strFileName;

		// Open the local file for create 
		if(fileIn.Open(strTempFileName,CFile::modeRead,&ex))
		{
			// Read the data
			nLen = fileIn.Read(pbyBuffer,MAX_INET_BUFFER);
			fileIn.Close();
			bRet = TRUE;
		}

		// Indicate that we didn't download a new page from the INet
		bPageInCache = TRUE;
	}

	// Did everything work okay?
	if(bRet)
	{
		// Yes...get the links and media info for this page
		CSnaggerHtmlParser Parser;
		Parser.SetPageURL(strPage);
		m_pProgress->SetActionTitle("Parsing Page: "+strPage);

		// Just for safety
		if(nLen > MAX_INET_BUFFER)
			nLen = MAX_INET_BUFFER;

		// Initialize and call the parser
		pbyBuffer = m_byBuffer;
		Parser.SetFixupMode(FALSE);
		Parser.ResetArrays();
		Parser.SetGetMedia(m_Options.bMultimedia);
		Parser.ParseText((char *)pbyBuffer,nLen);
		m_strPageTitle = Parser.GetTitle();

		// Save the page to a local file (if it doesn't already exist)
		if(!bPageInCache)
		{
			m_pProgress->SetActionTitle("Saving Page: "+strPage);
			pbyBuffer = m_byBuffer;
			m_Inet.SaveFile(strFileName,m_strDirectory,pbyBuffer,nLen);
		}

		// Determine the number of linked pages that we have
		int nLinks;
		BOOL bOffsite;
		CString strNewPage;
		nLinks = Parser.GetLinks().GetSize();

		// Evaluate each of the links for this page to determine if we need to 
		// add them to the download queue
		for(int i = 0; i < nLinks; i++)
		{
			// Get the page's URL
			strNewPage = Parser.GetLinks().GetAt(i);

			// Get the offsite link flag for this page
			bOffsite = Parser.GetOffsiteFlags().GetAt(i);

			// See if we should at it to the download queue
			if(ShouldQueuePage(strNewPage,bOffsite)) 
			{
				linkEntry.arrLinks.Add(strNewPage);
				linkEntry.arrOffsite.Add(bOffsite);
			}
		}

		// Don't need the images if we've already parsed this page
		// before
		if(!bPageInCache)
		{
			// New page, so get the all of the media information
			int nMedia = Parser.GetMedia().GetSize();
			CString strMedia;
			for(i = 0; i < nMedia; i++)
			{
				strMedia = Parser.GetMedia().GetAt(i);
				if(ShouldGetMedia(strMedia,pMapEntry))
					linkEntry.arrMedia.Add(strMedia);
			}
		}

		// Success
		bRet = TRUE;
	}

	return(bRet);
}

// Usings CInet to retrieve the multimedia item specified by strMedia.  It saves
// this new file with the name specified in strFileName.
BOOL CSnaggerDoc::GetMedia(CString& strMedia, CString& strFileName)
{
	BYTE *pbyBuffer = m_byBuffer;
	int nLen;
	BOOL bRet = FALSE;

	// Get the specified page
	CInet::RESULTS ret;

	// Get the file from the INet
	ret	= m_Inet.GetFile(strMedia,&pbyBuffer,nLen);

	if(ret == CInet::SUCCESS)
	{
		// Add the size to the statistics count
		m_nTotalBytes += nLen;

		// Write the file 
		m_pProgress->SetActionTitle("Saving File: "+strMedia);
		m_Inet.SaveFile(strFileName,m_strDirectory,pbyBuffer,nLen);

		bRet = TRUE;
	}

	return(bRet);
}

// Returns TRUE if the current page was already retrieved from the INet.
// If it has been retrieved then it returns a pointer to the entry in the
// list of retrieved pages.
BOOL CSnaggerDoc::ShouldGetPage(CString& strPage, MAP_FILES*& pMapEntry)
{
	// Page names shouldn't be case sensitive
	CString strNewPage = strPage;
	strNewPage.MakeLower();
	strNewPage = strNewPage.SpanExcluding("#");

	// Allow for pages that contain a relative URL (SS 1.2 FIX)
	CString strExt = CInet::SplitFileName(strNewPage,CInet::EXT);
	if(strExt.IsEmpty() && strNewPage.Right(1) != "/")
		strNewPage += "/";

	// Did we find it??
	return(!m_arrPagesDone.Lookup(strNewPage,(CObject *&) pMapEntry));
}

// Returns TRUE if the current multimedia file was already retrieved from the INet.
// If it has been retrieved then it returns a pointer to the entry in the
// list of retrieved multimedia files.
BOOL CSnaggerDoc::ShouldGetMedia(CString& strMedia, MAP_FILES*& pMapEntry)
{
	// Page names shouldn't be case sensitive
	CString strNewMedia = strMedia;
	strNewMedia.MakeLower();
	strNewMedia = strNewMedia.SpanExcluding("#");

	// Page names shouldn't be case sensitive
	return(!m_arrMediaDone.Lookup(strNewMedia,(CObject *&) pMapEntry));
}

// Returns TRUE if the specified page should be added to the download queue.
// This basically means that the page is not in the list of queued pages at a 
// lower level or that it hasn't been previously downloaded at a lower level.
// If the above criteria are met -- it thens check to see if this is an offsite
// page and determines whether it should be downloaded.
BOOL CSnaggerDoc::ShouldQueuePage(CString& strNewPage, BOOL bOffsite)
{
	MAP_FILES* pMapEntry;

	// Have we downloaded this page yet???
	if(ShouldGetPage(strNewPage,pMapEntry))
	{
		// No...then look for it in the queue of waiting pages at previous
		// levels
		for(int i = 0; i < m_nLevel; i++)
		{
			for(int j = 0; j < m_aLinks[i].arrLinks.GetSize(); j++)
			{
				if(strNewPage == m_aLinks[i].arrLinks.GetAt(j))
					return(FALSE);
			}
		}
	}
	else
	{
		// Yes...did we follow its links all the way to the 
		// maximum level?
		if(m_Options.nMaxDepth && m_nLevel >= pMapEntry->nMaxLevel)
			return(TRUE);
	}

	// Make sure that we allow offsite links for offsite pages
	if(bOffsite && !m_Options.bOffsiteLinks)
		return(FALSE);

	return(TRUE);
}

// Initializes the specified link stack entry
void CSnaggerDoc::ResetLink(int nLevel)
{
	m_aLinks[nLevel].nIndex = 0;
	m_aLinks[nLevel].arrLinks.SetSize(0,100);
	m_aLinks[nLevel].arrMedia.SetSize(0,100);
	m_aLinks[nLevel].arrOffsite.SetSize(0,100);
}

// Updates the information in the statistics window (if m_pProgress contains
// a valid window class pointer)
void CSnaggerDoc::UpdateStatus()
{
	// Does the statistics window exist?
	if(m_pProgress)
	{
		// Yep...update the info in its fields
		m_pProgress->SetQueuedFiles(m_nQueuedPageCount);
		m_pProgress->SetDownloadedPages(m_nGottenPageCount);
		m_pProgress->SetDownloadedFiles(m_nGottenFileCount);
		m_pProgress->SetKBDownloaded(m_nTotalBytes);
		m_pProgress->SetLevel(m_nLevel+1);
	}
}

// The workhouse thread routine that recursively navigates linked web pages and
// retrieves each of them along with their multimedia files.  This process is
// spawned indirectrly in RecursiveDownload() using the AfxBeginThread() call.
UINT CSnaggerDoc::DownloadThread(LPVOID lpvData)
{
	HTREEITEM htreePage;

	// Static methods can't have a "this" pointer to get the parent class's 
	// pointer which the call passes as a parameter
	CSnaggerDoc *pThis = (CSnaggerDoc *) lpvData;

	int nMaxDepth = pThis->m_Options.nMaxDepth-1;
	int nCount;
	CString strPage = pThis->m_strStartPage;
	CString strFileName;
	CString strLogData;
	CString strText;
	POSITION pos = pThis->GetFirstViewPosition();
	CSnaggerView* pView = (CSnaggerView *) pThis->GetNextView(pos);	
	BOOL bIsOffsite = FALSE;

	// Establish the WinInet Session
	try
	{
		pThis->m_Inet.OpenSession(pThis->m_Options.bUseProxy,pThis->m_Options.strProxyName);
	}
	catch(...)
	{
	}

	// Create the log file
	pThis->m_fileLog.Open(pThis->m_strDirectory+"sitesnag.log",
						CFile::modeCreate|CFile::modeWrite);

	// Create the table of contents file
	if(pThis->m_Options.bContents)
	{
		pThis->m_fileContents.Open(pThis->m_strDirectory+"SnagCon1.htm",
								CFile::modeCreate|CFile::modeWrite);

		// Add the TOC to the list of downloaded files 
		pThis->SetPageCacheEntry("snagcon1.htm","SnagCon1.htm",0);

		// Add the TOC to the tree control
		CString strTitle = "Contents Page 1 (SnagCon1.htm)";
		pView->AddTreeContent(strTitle);

		// Write the beginning of the first TOC page
		strText = "<HTML>\r\n<HEAD>\r\n<TITLE>SiteSnagger Contents</TITLE>\r\n";
		strText += "</HEAD\r\n<BODY>\r\n";
		strText += "<H1><center>SiteSnagger Table of Contents</center><br><br></H1>\r\n<UL>\r\n";
		pThis->m_fileContents.Write(strText,strText.GetLength());
	}

	// Initialize the index for the first link level, start with the first level
	pThis->m_nLevel = 0;
	pThis->m_aLinks[0].nIndex = 0;
	pThis->m_Inet.ResetUniqueCount();

	// Recusively search web links until either we've searched them all (m_nLevel is
	// -1 or if the user decides to abort
	while(pThis->m_nLevel >= 0 && !pThis->m_pProgress->IsAborted())
	{
		// Get the name of a new page in a second dimension element
		if(pThis->m_aLinks[pThis->m_nLevel].nIndex > 0)
		{
			// Save the URL and whether it's offsite
			int nIndex = pThis->m_aLinks[pThis->m_nLevel].nIndex;
			strPage = pThis->m_aLinks[pThis->m_nLevel].arrLinks.GetAt(nIndex);
			bIsOffsite = pThis->m_aLinks[pThis->m_nLevel].arrOffsite.GetAt(nIndex);

			// Bump to the next level so we can get the page's links
			pThis->m_nLevel++;
		}

		// Generate a unique filename for this page
		pThis->m_Inet.GenerateUniqueFileName(strPage,strFileName,
						pThis->m_arrPagesDone,TRUE);
		pThis->m_pProgress->SetActionTitle("Getting Page: "+strPage);

		// Write a log entry for this page -- leave room for the result
		strLogData.Format("[%02d] Getting page %s ",pThis->m_nLevel+1,strPage);
		pThis->m_fileLog.Write(strLogData,strLogData.GetLength());

		CString strOrigPage = strPage;

		// Get the page from Inet or from local file
		if(pThis->GetPage(strPage,strFileName,pThis->m_aLinks[pThis->m_nLevel]))
		{
			MAP_FILES *pMapEntry;
		
			// Get the count of links
			nCount = pThis->m_aLinks[pThis->m_nLevel].arrLinks.GetSize();

			// Did we just download this new page??
			if(pThis->ShouldGetPage(strPage,pMapEntry))
			{
				// Yes, add it to the list of retrieved pages
				pThis->SetPageCacheEntry(strPage,strFileName,pThis->m_nLevel);

				// If the page was redirected then add its original name too
				if(strPage != strOrigPage && pThis->ShouldGetPage(strOrigPage,pMapEntry))
					pThis->SetPageCacheEntry(strOrigPage,strFileName,pThis->m_nLevel);

				// Prefix offsite pages with their URL (i.e. http://www.xxx.yyy)
				if(bIsOffsite)
					strText = strPage+" - ";
				else strText.Empty();

				// Add the page's title and local filename 
				strText += pThis->m_strPageTitle+"  ("+
							strFileName.SpanExcluding("#")+")";

				htreePage = pView->AddTreePage(strText,bIsOffsite);
				strText.Format("<a href=%s><li> %s (%s - %s)<br>\r\n",strFileName,
									pThis->m_strPageTitle,
									strFileName.SpanExcluding("#"),strPage);
				if(pThis->m_Options.bContents)
					pThis->m_fileContents.Write(strText,strText.GetLength());

				// Update the statistics
				pThis->m_nGottenPageCount++;
				pThis->m_nGottenFileCount++;
				pThis->UpdateStatus();
			}
			else
			{
				// Set the new depth level if necessary
				if(nMaxDepth)
				{
					// Have we gone to the max level yet???
					if(pThis->m_nLevel >= pMapEntry->nMaxLevel)
						nCount = 0;
					else pMapEntry->nMaxLevel = pThis->m_nLevel;
				}
			}

			// Log the results
			pThis->m_fileLog.Write("[OK]\n",5);

			// Check for offsite links, don't follow the current page's
			// links if it is an offsite page
			if(bIsOffsite)
				nCount = 0;

			// Should we get multimedia files??
			if(pThis->m_Options.bMultimedia)
			{
				// Iterate through the list of multimedia links
				CString strMedia;
				for(int j = 0; j < pThis->m_aLinks[pThis->m_nLevel].arrMedia.GetSize() &&
							!pThis->m_pProgress->IsAborted(); j++)
				{
					strMedia = pThis->m_aLinks[pThis->m_nLevel].arrMedia.GetAt(j);

					// Should we get this file?
					if(pThis->ShouldGetMedia(strMedia,pMapEntry))
					{
						// Yep, make sure it has a unique name
						pThis->m_Inet.GenerateUniqueFileName(strMedia,
										strFileName,pThis->m_arrMediaDone,FALSE);
						pThis->m_pProgress->SetActionTitle("Getting File: "+strFileName);

						// Log the info
						strLogData.Format("[%02d] Getting media %s ",pThis->m_nLevel,
														strMedia);
						pThis->m_fileLog.Write(strLogData,strLogData.GetLength());

						// We don't need to download EMAIL links so just make
						// them look like a successful file entry
						BOOL bMail;
						if(strMedia.Left(7) == "mailto:")
						{
							bMail = TRUE;
							strFileName = strMedia;
						}
						else bMail = FALSE;

						// Did everything work okay??
						if(bMail || pThis->GetMedia(strMedia,strFileName))
						{
							// Yep...add this file to our file list and to the tree
							pThis->SetMediaCacheEntry(strMedia,strFileName);
							pView->AddTreeMedia(strFileName.SpanExcluding("#"),
											CTree::GetMediaType(strFileName));

							// Increment the statistics count
							if(!bMail)
								pThis->m_nGottenFileCount++;
							pThis->UpdateStatus();

							// Log the results
							pThis->m_fileLog.Write("[OK]\n",5);
						}
						else
						{
							// Log the results
							pThis->m_fileLog.Write("[FAILED] ",9);

							// Show a detailed error -- if possible
							CString strError = pThis->m_Inet.GetErrorText();
							pThis->m_fileLog.Write(strError,strError.GetLength());
							pThis->m_fileLog.Write("\n",1);
						}
					}
				}
			}
		}
		else
		{
			// Log the results
			pThis->m_fileLog.Write("[FAILED] ",9);

			// Show a detailed error -- if possible
			CString strError = pThis->m_Inet.GetErrorText();
			pThis->m_fileLog.Write(strError,strError.GetLength());
			pThis->m_fileLog.Write("\n",1);
			
			nCount = 0;
		}

		// Make sure the statistics window is updated properly
		pThis->UpdateStatus();

		// If we've hit the max page count then just get out
		if(pThis->m_Options.nMaxPages > 0 && 
				pThis->m_nGottenPageCount >= pThis->m_Options.nMaxPages) 
			break;

		// Continue recursion if we haven't hit maximum depth yet
		// and as long as we have links on this page
		if(pThis->m_nLevel < nMaxDepth && nCount > 0)
		{
			// Get the next page to parse
			strPage = pThis->m_aLinks[pThis->m_nLevel].arrLinks.GetAt(0);
			bIsOffsite = pThis->m_aLinks[pThis->m_nLevel].arrOffsite.GetAt(0);

			// Move to the next level, initialize its link info
			pThis->m_nLevel++;
			pThis->ResetLink(pThis->m_nLevel);

			// Queue the links
			pThis->m_nQueuedPageCount += nCount;
			continue;
		}

		// Finished will all links on this page, reset its link info
		pThis->ResetLink(pThis->m_nLevel);

		// Move back to the previous level
		pThis->m_nLevel--;

		// Find the next page on the second dimension
		if(pThis->m_nLevel >= 0)
		{
			int nMaxCount;

			// Find another page that has links
			while(pThis->m_nLevel >= 0)
			{
				// How many second dimension entries do we have??
				nMaxCount = pThis->m_aLinks[pThis->m_nLevel].arrLinks.GetSize();

				// Did we have another valid page at this level?
				if(pThis->m_aLinks[pThis->m_nLevel].nIndex < nMaxCount-1)
				{
					// Yes, get the next page
					pThis->m_aLinks[pThis->m_nLevel].nIndex++;
					pThis->m_nQueuedPageCount--;
					break;
				}
				else 
				{
					// No, back up a level in the tree
					pThis->m_nLevel--;
					pThis->m_nQueuedPageCount--;
				}
			}
		}
	}

	// Make sure the "stopping, please wait" message isn't displayed
	pView->EndWait();

	// Make sure that the tree was correctly parsed
	// nNodeCount should always be 0
	pThis->m_nLevel = pThis->m_nQueuedPageCount;

	// Should we fix up the links for browsing??
	if(pThis->m_Options.bFixupLinks)
	{
		// Yes, but the user has canceled the download so ask if they want
		// to fix up anyway
		if(pThis->m_pProgress->IsAborted() && 
			pView->ShowPrompt(IDS_FIXUP_PAGES,MB_ICONQUESTION|MB_YESNO) != IDYES)
			;
		else
		{
			// Everything was normal so just fixup the links
			pThis->m_pProgress->SetAborted(FALSE);
			pThis->FixupLinks();
		}
	}

	// Close the log file
	pThis->m_fileLog.Close();

	// Are we doing the TOC??
	if(pThis->m_Options.bContents)
	{
		// Yep...write the bottom of it and close the file
		CString strText;
		strText = "</UL>\r\n</BODY>\r\n</HTML>\r\n";
		pThis->m_fileContents.Write(strText,strText.GetLength());
		pThis->m_fileContents.Close();
	}

	// Let the user know that we've finished
	pThis->m_pProgress->SetActionTitle("********* Processing Finished *********");
	pView->SetSnagging(FALSE);

	// Finished with the maps, so free the memory
	pThis->ClearCacheMaps();

	// Make sure the "stopping, please wait" message isn't displayed
	pView->EndWait();

	// Close the main window if we're in automatic mode
	if(pThis->m_bAutoMode)
		AfxGetMainWnd()->PostMessage(WM_CLOSE);

	// Shutdown the WinInet Session
	try
	{
		pThis->m_Inet.CloseSession();
	}
	catch(...)
	{
	}

	pThis->m_nLevel = 0;

	return(0);
}

// Removes all of the entries in the page and file download lists.  These are
// CMap's so it must navigate the linked list and delete each of the CObject
// entries.
void CSnaggerDoc::ClearCacheMaps()
{
	MAP_FILES *pMapEntry;
	CString strPage;
	POSITION pos;
	int i;

	// Zap the page list
	pos = m_arrPagesDone.GetStartPosition();
	for(i = 0; i < m_arrPagesDone.GetCount(); i++)
	{
		m_arrPagesDone.GetNextAssoc(pos,strPage,(CObject *&) pMapEntry);
		delete pMapEntry;
	}
	m_arrPagesDone.RemoveAll();

	// Zap the multimedia list
	pos = m_arrMediaDone.GetStartPosition();
	for(i = 0; i < m_arrMediaDone.GetCount(); i++)
	{
		m_arrMediaDone.GetNextAssoc(pos,strPage,(CObject *&) pMapEntry);
		delete pMapEntry;
	}
	m_arrMediaDone.RemoveAll();
}

// Adds a new entry to the retrieved page list
void CSnaggerDoc::SetPageCacheEntry(LPCTSTR lpszPage, LPCTSTR lpszFileName, int nLevel)
{
	CString strTempPage = lpszPage;
	MAP_FILES *pMapEntry = new MAP_FILES();

	// The page name should always be saved as lowercase
	pMapEntry->strFileName = lpszFileName;
	pMapEntry->nMaxLevel = nLevel;
	strTempPage.MakeLower();
	strTempPage = strTempPage.SpanExcluding("#");

	// Allow for pages that contain a relative URL (SS 1.2 FIX)
	CString strExt = CInet::SplitFileName(strTempPage,CInet::EXT);
	if(strExt.IsEmpty() && strTempPage.Right(1) != "/")
		strTempPage += "/";

	// Assimilate it into the collective
	m_arrPagesDone.SetAt(strTempPage,(CObject *&) pMapEntry);
	TRACE("Pages Done: "+strTempPage+"\n");

	// Check for a generated default page name...if we have one then add some
	// other default names so we don't break during link fixup
	if(pMapEntry->strFileName == "index.htm" && strTempPage.Find("index.htm") == -1)
	{
		// Common default page names, for links such as "www.mycompany.com/"
		char *aszDefPages[4] = { "index.htm", "index.html", "default.htm", 
										"default.html"};

		MAP_FILES *pTempMapEntry;

		CString strServer, strObject, strUser, strPassword;
		INTERNET_PORT nPort;
		DWORD dwServiceType;
		AfxParseURLEx(strTempPage,dwServiceType,strServer,strObject,nPort,
									strUser,strPassword,
									ICU_NO_ENCODE);

		// Add the default names
		if(!strServer.IsEmpty())
		{
			strServer = "http://"+strServer+"/";

			for(int i = 0; i < 4; i++)
			{
				pTempMapEntry = new MAP_FILES();
				pTempMapEntry->strFileName = pMapEntry->strFileName;
				pTempMapEntry->nMaxLevel = pMapEntry->nMaxLevel;
				m_arrPagesDone.SetAt(strServer+aszDefPages[i],(CObject *&) pTempMapEntry);
				TRACE("Pages Done: "+CString(aszDefPages[i])+"\n");
			}
		}
	}	
}

// Adds a new entry to the retrieved file list
void CSnaggerDoc::SetMediaCacheEntry(LPCTSTR lpszMedia, LPCTSTR lpszFileName)
{
	CString strTempMedia = lpszMedia;
	MAP_FILES *pMapEntry = new MAP_FILES();

	// The file name should always be saved as lowercase
	pMapEntry->strFileName = lpszFileName;
	pMapEntry->nMaxLevel = -1;
	strTempMedia.MakeLower();
	strTempMedia = strTempMedia.SpanExcluding("#");

	// Assimilate it into the collective
	m_arrMediaDone.SetAt(strTempMedia,(CObject *&) pMapEntry);
}

// Sets up a snagging session and calls AfxBeginThread() to start the
// real download thread routine.  This routine returns immediately once that
// thread is started, freeing up the UI.
void CSnaggerDoc::RecursiveDownload(LPCTSTR lpszURL)
{
	// Initialize the statistics
	m_nGottenPageCount = 0;
	m_nGottenFileCount = 0;
	m_nQueuedPageCount = 0;
	m_nTotalBytes = 0;
	m_strStartPage = lpszURL;
	ClearCacheMaps();

	// Initialize the statistics window display
	POSITION pos = GetFirstViewPosition();
	CSnaggerView* pView = (CSnaggerView *) GetNextView(pos);
	m_pProgress = pView->GetProgress();

	m_pProgress->SetAborted(FALSE);
	UpdateStatus();

	// Start the download thread
	CWinThread *pThread = AfxBeginThread(DownloadThread,this);
}

// Fixs up each of the downloaded pages to allow local browsing.  It
// basically goes through the downloaded page list and opens each 
// file specified then parses the page and fixes up each of the 
// links found.
void CSnaggerDoc::FixupLinks()
{
	BYTE *pbyBuffer = m_byBuffer;
	int nLen;
	CFile fileIn, fileOut;
	CFileException ex;
	MAP_FILES* pMapEntry;
	int nIndex;
	int nBytesWritten;
	int nSectionLength;
	int nOffset;
	CString strFixupFileName;
	CString strPage;
	CString strFileName;
	CString strTempName = m_strDirectory+"$ssnag$.htm";
	BOOL bFixup;
	int nFiles = m_arrPagesDone.GetCount();
	BOOL bIndexDone = FALSE;

	// Update the statistics window to indicate the number of files
	// we have left to do
	m_pProgress->SetQueuedFiles(nFiles);

	// Iterate through the downloaded page list
	POSITION pos = m_arrPagesDone.GetStartPosition();
	for(int i = 0; i < m_arrPagesDone.GetCount() && !m_pProgress->IsAborted(); i++)
	{
		m_arrPagesDone.GetNextAssoc(pos,strPage,(CObject *&) pMapEntry);

		if(pMapEntry->strFileName == "index.htm" || 
						pMapEntry->strFileName == "index.html")
		{
			if(bIndexDone)
			{
				// Decrement the queued file count in the stastics window
				m_pProgress->SetQueuedFiles(--nFiles);
				continue;
			}	
			else bIndexDone = TRUE;
		}

		// Display the current file in that statistics window
		strFileName = m_strDirectory+pMapEntry->strFileName;
		m_pProgress->SetActionTitle("Fixing up links in file: "+pMapEntry->strFileName);

		// Open the local file for create 
		TRACE("Fixing up: "+strFileName+"\n");
		if(fileIn.Open(strFileName.SpanExcluding("#"),CFile::modeRead,&ex))
		{
			pbyBuffer = m_byBuffer;

			// Read the file into memory
			nLen = fileIn.Read(pbyBuffer,MAX_INET_BUFFER);
			fileIn.Close();

			// Parse it (tell the parser to look for only links, i.e. fixup mode)
			CSnaggerHtmlParser Parser;
			Parser.SetFixupMode(TRUE);
			Parser.ResetArrays();
			Parser.SetGetMedia(m_Options.bMultimedia);
			Parser.SetPageURL(strPage);
			Parser.ParseText((char *)pbyBuffer,nLen);

			CString strServer, strObject, strUser, strPassword;
			INTERNET_PORT nPort;
			DWORD dwServiceType;
			AfxParseURLEx(strPage,dwServiceType,strServer,strObject,nPort,
									strUser,strPassword,
									ICU_NO_ENCODE);
			CString strOrigHost = "http://"+strServer+"/";

			// Open a temporary workfile
			if(fileOut.Open(strTempName,CFile::modeCreate|CFile::modeWrite,&ex))
			{
				CString strURL;

				nIndex = 0;
				nBytesWritten = 0;
				nOffset = 0;

				// Write the file and perform fix-ups (if necessary)
				for(int j = 0; j < Parser.GetFixups().arrURL.GetSize(); j++)
				{
					strURL = Parser.GetFixups().arrURL.GetAt(j);

					// Text page fixup??
					if(Parser.GetFixups().arrTextPage.GetAt(j))
					{
						// Did we download this page??
						if(!ShouldGetPage(strURL,pMapEntry))
						{
							// Yep...get it's local filename
							strFixupFileName = pMapEntry->strFileName;
							bFixup = TRUE;
						}
						else
						{							
							AfxParseURLEx(strURL,dwServiceType,strServer,
									strObject,nPort,strUser,strPassword,
									ICU_NO_ENCODE);

							if(!strServer.IsEmpty())
							{
								strFixupFileName = strURL;
								bFixup = TRUE;
							}
							else bFixup = FALSE;
						}
					}
					else // Media fixup
					{
						// Did we download this multimedia file??
						if(!ShouldGetMedia(strURL,pMapEntry))
						{
							// Yep...get it's local filename
							strFixupFileName = pMapEntry->strFileName;
							bFixup = TRUE;
						}
						else bFixup = FALSE;
					}

					// Write the data leading up to the reference that we are
					// going to fix up
					nSectionLength = Parser.GetFixups().arrIndex.GetAt(j)-nIndex-nOffset;

					try
					{
						fileOut.Write(pbyBuffer,nSectionLength);
					}
					catch(...)
					{
					}

					nBytesWritten += nSectionLength;
					pbyBuffer += nSectionLength;
					nOffset = 0;

					// Do we have an entry to fix up??
					if(bFixup)
					{
						// Yes, write the local filename
						try
						{
							fileOut.Write(" \"",2);
							fileOut.Write(strFixupFileName,strFixupFileName.GetLength());
						}
						catch(...)
						{
						}

						// Find the start of the old filename
						while(*pbyBuffer == ' ') 
						{
							pbyBuffer++;
							nBytesWritten++;
							nOffset++;
						}

						// Is the filename in quotes?
						if(*pbyBuffer == '"') 
						{
							// Yep, look for the end
							do
							{
								pbyBuffer++;
								nBytesWritten++;
								nOffset++;
							}
							while(*pbyBuffer != '"');

							pbyBuffer++;
							nBytesWritten++;
							nOffset++;
						}
						else
						{
							// Not in quotes, so just look for the first
							// trailing blank
							do
							{
								pbyBuffer++;
								nBytesWritten++;
								nOffset++;
							}
							while(*pbyBuffer != ' ');
						}

						// Write a trailing quote and blank, just to be safe
						fileOut.Write("\" ",2);
					}

					// Save the location where this fixup started
					nIndex = Parser.GetFixups().arrIndex.GetAt(j);
				}

				// Finished fixing up, so write the rest of the data left
				// in the file buffer
				try
				{
					if(nLen-nBytesWritten > 0)
						fileOut.Write(pbyBuffer,nLen-nBytesWritten);
				}
				catch(...)
				{
				}

				fileOut.Close();
			}
		}

		// Replace the original page with this new fixed up page
		remove(strFileName.SpanExcluding("#"));
		rename(strTempName,strFileName.SpanExcluding("#"));

		// Decrement the queued file count in the stastics window
		m_pProgress->SetQueuedFiles(--nFiles);
	}

	// Get rid if any leftover work files (this might happen if the
	// user aborted in the middle)
	remove(strTempName);
}

/////////////////////////////////////////////////////////////////////////////
// CSnaggerDoc diagnostics

#ifdef _DEBUG

// Check validity of object
void CSnaggerDoc::AssertValid() const
{
	CDocument::AssertValid();
}

// Dump the object
void CSnaggerDoc::Dump(CDumpContext& dc) const
{
	CDocument::Dump(dc);
}
#endif //_DEBUG

