//==================================
// EXESIZE.C - Matt Pietrek 1993
//==================================
#include <stdarg.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "newexe.h"

#define OLDEXE_SIGNATURE 0x5A4D
#define NEWEXE_SIGNATURE 0x454E
#define DEBUG_SIGNATURE  0x424E

// 6 bytes in prologue, 1 byte in epilogue
#define PROLOGUE_EXTRA_SIZE 7

char *filename;
FILE *file;
NEWEXEHEADER nehdr;
UINT32 nehdr_offset;
UINT32 FileSize;
UINT32 DebugInfoSize=0;
UINT16 ExportedEntries = 0;

UINT16 OptimalAlignment;
UINT16 CurrentAlignment;

UINT32 WastedSegmentAlignSpace = 0;
UINT32 WastedResourceAlignSpace = 0;
UINT32 WastedEntrySpace = 0;
UINT16 WastedEntryCount = 0;
UINT32 WastedResNamesSpace = 0;
UINT16 WastedResNamesCount = 0;
UINT32 ExportedPrologues = 0;
UINT32 ExportedPrologueSize = 0;

// Opcode bytes that identify an exportable prologue
char ExportPrologue[] = {0x1E, 0x8E, 0xD8}; // PUSH DS,  MOV DS,AX

//
// a generic output routine so that we don't have to embed printf()'s
// all over the place.
//
void output( char *format, ... )
{
    va_list argptr;
    
    va_start( argptr, format );
    vprintf(format, argptr);
    va_end( argptr );
}

void FatalExit(char *format, ...)
{
    va_list argptr;

    va_start( argptr, format );
    vprintf(format, argptr);
    va_end( argptr );
    exit(1);
}

void ParseCommandLine(int argc, char *argv[])
{
    if ( argc != 2 )
        FatalExit("Syntax: EXESIZE filename\n"
          "  EXESIZE scans New Executable files for wasted space\n");
    
    filename = strdup(argv[1]);
}

void OpenAndVerifyFile(void)
{
    UINT16 oldexe_signature;
    UINT16 reloc_offset;

    // Open the input file
    file = fopen(filename, "rb");
    if ( !file )
        FatalExit("Couldn't open %s\n", filename);

    // Look for the DOS 'MZ' signature
    fread(&oldexe_signature, sizeof(oldexe_signature), 1, file);
    if ( oldexe_signature != OLDEXE_SIGNATURE )
        FatalExit("Not a valid .EXE file\n");
    
    // Verify that byte 18h is 40h.  Although the Windows loader ignores
    // this field, it documented as being necessary.
    fseek(file, 0x18, SEEK_SET);
    fread(&reloc_offset, sizeof(reloc_offset), 1, file);
    if ( reloc_offset != 0x40 )     // EXCEL.EXE is oddball case
        FatalExit("Not a New Executable file\n");

    // Seek to and read in the offset of the 'NE' header.  Then, go
    // read in the 'NE' header.
    fseek(file, 0x3C, SEEK_SET);
    fread(&nehdr_offset, sizeof(nehdr_offset), 1, file);
    fseek(file, nehdr_offset, SEEK_SET);    
    fread(&nehdr, sizeof(nehdr), 1, file);

    // Verify that it's an 'NE' file, as opposed to an 'LE', 'LX', or
    // 'PE' file.
    if ( nehdr.signature != NEWEXE_SIGNATURE )
        FatalExit("Not a New Executable file\n");
}

//
// Routine to calculate how much of the file is debug information at
// the end.  Detects both Borland and MS debug information.
//
void CalculateDebugInfoSize(void)
{
    UINT32 DebugSignature;
    UINT32 DebugLength;

    // Look for the 'NB' signature near the end of the file
    fseek(file, -8, SEEK_END);
    fread(&DebugSignature, sizeof(DebugSignature), 1, file);
    if ( (UINT16)DebugSignature != DEBUG_SIGNATURE )
        return;
    
    // Seek back to the start of the debug info.
    fread(&DebugLength, sizeof(DebugLength), 1, file);
    
    // Verify the 2nd 'NB' signature at the start of the debug info.
    DebugSignature = 0;
    fseek(file, -DebugLength, SEEK_END);
    fread(&DebugSignature, sizeof(DebugSignature), 1, file);
    if ( (UINT16)DebugSignature != DEBUG_SIGNATURE )
        return;
    
    DebugInfoSize = DebugLength;
}

//
// Based upon the size of the file (not counting debug information),
// calculates the smallest alignment size that can be used for the file.
// If the size would be below 16 bytes, it sets the size to 16 bytes.
//
void CalculateOptimalAlignSize(void)
{
    UINT32 RealFileSize;
    UINT16 shiftcount=0;
    
    fseek(file, 0, SEEK_END);
    FileSize = ftell(file);
    
    CalculateDebugInfoSize();
    RealFileSize = FileSize - DebugInfoSize;
        
    while ( (1L<<shiftcount) <= (RealFileSize>>16) )
        shiftcount++;

    if ( shiftcount < 4 )
        shiftcount = 4;
    
    OptimalAlignment = 1 << shiftcount;
}

//
// Iterates through the segment table, calculating how much space
// the linker is wasting between the end of one segment, and the start
// of the next sector boundary.  The waste that would have occured even
// at the optimal alignment size is also calculated, and not counted
// as wasted space.
//
// After calculating the waste for the segments, the routine walks
// through the resources, and applies an average amount of waste for
// each resource.  It's not easy to calculate the real waste from
// resources, as that would involve parsing each resource type to find
// the actual end of the resource.
//
void CalculateWastedAlignSpace(void)
{
    UINT16 i;
    SEGMENT_RECORD segrec;
    RESOURCE_HEADER rsrc_hdr;
    UINT16 AverageResourceWaste;

    //
    // Don't bother doing anything if the alignment is already optimal.
    //
    CurrentAlignment = 1 << nehdr.shift_count;
    if ( CurrentAlignment <= OptimalAlignment )
        return;
    
    for ( i=0; i < nehdr.segment_count; i++ )
    {
        UINT16 WasteWithOptimalAlign;
        UINT16 WasteWithCurrentAlign;
        UINT32 SegLengthWithFixups;

        // Seek to and read in the next segment in the segment table
        fseek(file, nehdr_offset+nehdr.segment_offset+(i*sizeof(segrec)),
            SEEK_SET);
        fread(&segrec, sizeof(segrec), 1, file); 

        SegLengthWithFixups = segrec.segment_length;

        // If the segment has fixups, we have to factor them in too,
        // in order to accurately find the end of the segment.
        if ( segrec.reloc_info )
        {
            UINT16 cFixups;
            
            fseek(file,
                    (segrec.sector_offset * (UINT32)CurrentAlignment)
                    + segrec.segment_length,
                    SEEK_SET);
            fread(&cFixups, sizeof(cFixups), 1, file);
            SegLengthWithFixups += ((cFixups * 8L) +2);
        }
                
        WasteWithOptimalAlign =
            OptimalAlignment - (SegLengthWithFixups%OptimalAlignment);
        WasteWithCurrentAlign =
            CurrentAlignment - (SegLengthWithFixups%CurrentAlignment);

        // Take care of the special case where the segment fits
        // exactly into a multiple of a sector length.
        if ( WasteWithOptimalAlign == OptimalAlignment )
            WasteWithOptimalAlign = 0;       
        if ( WasteWithCurrentAlign == CurrentAlignment )
            WasteWithCurrentAlign = 0;       

        WastedSegmentAlignSpace += 
            (WasteWithCurrentAlign - WasteWithOptimalAlign);
    }

    // Any resources?  We're done if not.
    if ( (nehdr.resident_offset - nehdr.resource_offset) == 0 )
        return;

    AverageResourceWaste = (CurrentAlignment - OptimalAlignment) / 2;
    
    fseek( file, nehdr_offset + nehdr.resource_offset + 2, SEEK_SET );  
    while ( 1 )
    {
        // Read in the next resource header
        fread(&rsrc_hdr, sizeof(rsrc_hdr), 1, file);
        if ( rsrc_hdr.type_id == 0 )
            break;
        
        // Compute waste from this group of resources
        WastedResourceAlignSpace +=
            ( rsrc_hdr.resource_count * AverageResourceWaste );

        // Seek to the start of the next resource header
        fseek( file, 
            rsrc_hdr.resource_count * sizeof(RESOURCE_ENTRY), SEEK_CUR);
    }
}

//
// Given a pointer to a code segment's data in memory, scan through
// it looking for exportable prologue code.
//
void ScanSegmentForPrologues(char *data, UINT16 length, char *prologue)
{
    UINT16 remaining;
    UINT16 scanBackLen;
    char *current;  // Pointer to where we're looking in the segment
    char *end;      // Last byte to seach for

    if ( length <= PROLOGUE_EXTRA_SIZE )
        return;
    
    current = data;
    end = data + (length - PROLOGUE_EXTRA_SIZE);
    
    while ( current < end )
    {
        remaining = end - current;
        
        // Quickly search for the first byte of the prologue
        current = memchr(current, prologue[0], remaining);
        if ( !current )
            break;

        // Now compare all the bytes.
        if (!memcmp(current, prologue, sizeof(ExportPrologue)))
        {
            // Verify by looking for a PUSH BP somewhere in the
            // preceding 9 bytes.  Make sure we don't back up too
            // far, and wrap around to the end of the data block.
            scanBackLen = (current-data >= 9) ? 9 : (current-data);
            if ( memchr(current-scanBackLen, 0x55, scanBackLen) )
            {
                ExportedPrologues++;
                current+= PROLOGUE_EXTRA_SIZE;
            }
            else
                current++;
        }
        else
            current++;
    }
}

//
// 
//
void CalculateWastedExportPrologues(void)
{
    SEGMENT_RECORD segrec;
    UINT16 i;
    char *segdata;
    
    for ( i=0; i < nehdr.segment_count; i++ )
    {
        fseek(file, nehdr_offset+nehdr.segment_offset
                +(i*sizeof(segrec)), SEEK_SET);
        fread(&segrec, sizeof(segrec), 1, file);
        if (segrec.segment_type & 1)    // Is it a data segment?
            continue;                   // Ignore it if so.
        
        fseek(file,segrec.sector_offset * (UINT32)CurrentAlignment, SEEK_SET);

        segdata = malloc(segrec.segment_length);
        if ( !segdata )
            return;
        fread(segdata, segrec.segment_length, 1, file);
        
        ScanSegmentForPrologues(segdata, segrec.segment_length,
            ExportPrologue);
        free(segdata);
    }   
}

void CalculateUnneededEntries(void)
{
    UINT16 size;
    UINT8 bundleCount;
    UINT8 segnum;
    UINT8 i;
    FIX_ENTRY fixedEntry;
    MOV_ENTRY moveableEntry;
    
    size = nehdr.nonres_name_offset - nehdr.entry_offset - nehdr_offset;
    if ( size == 0 )
        return;
    
    fseek(file, nehdr.entry_offset + nehdr_offset, SEEK_SET);
    
    while ( 1 )
    {
        fread(&bundleCount, sizeof(bundleCount), 1, file);
        if (bundleCount == 0)
            break;
        fread(&segnum, sizeof(segnum), 1, file);
    
        if ( segnum == 0xFF )
        {
            for (i=0; i < bundleCount; i++)
            {
                fread(&moveableEntry, sizeof(moveableEntry), 1, file);
                if ( (moveableEntry.eflags & 1) == 0 )
                {
                    WastedEntrySpace += sizeof(moveableEntry);
                    WastedEntryCount++;
                }
                else
                    ExportedEntries++;
            }           
        }
        else if (segnum != 0 )
        {
            for (i=0; i < bundleCount; i++)
            {
                fread(&fixedEntry, sizeof(fixedEntry), 1, file);
                if ( (fixedEntry.eflags & 1) == 0 )
                {
                    WastedEntrySpace += sizeof(fixedEntry);
                    WastedEntryCount++;
                }
                else
                    ExportedEntries++;
            }
        }
    }
}

void ExamineResidentNamesTable(void)
{
    UINT16 ordinal;
    UINT8 length;
    char buffer[256];
    
    fseek(file, nehdr.resident_offset + nehdr_offset, SEEK_SET);
    
    while ( 1 )
    { 
        fread(&length, sizeof(length), 1, file);
        if ( length == 0 )
            break;
        fread(buffer, length, 1, file);
        buffer[length] = 0;
        fread(&ordinal, sizeof(ordinal), 1, file);
        
        if ( (ordinal != 0) &&
            (strcmp(buffer, "WEP") != 0) &&
            (strstr(buffer,"EXPORTEDSTUB") == 0) )
        {
            WastedResNamesSpace += (length+3);
            WastedResNamesCount++;
        }
    }
}

void DoReports(void)
{
    UINT32 totalWastedSpace;
    UINT32 wastedExportPrologueSpace=0;
    
    output("File: %s\n", filename);

    output("Current alignment: %u\n"
           "Optimal alignment: %u\n",
               CurrentAlignment, OptimalAlignment);
        
    if ( WastedSegmentAlignSpace )
        output("Wasted segment align space: %lu bytes\n",
            WastedSegmentAlignSpace);
    
    if ( WastedResourceAlignSpace )
        output("Estimated wasted resource align space: %lu bytes\n",
            WastedResourceAlignSpace);

    if ( ExportedPrologues >  ExportedEntries )
    {
        UINT32 unneededPrologues = ExportedPrologues - ExportedEntries;
        wastedExportPrologueSpace = unneededPrologues * PROLOGUE_EXTRA_SIZE;
        output("%lu unneeded export prologues using %lu bytes",
            unneededPrologues, wastedExportPrologueSpace);
        output("\n");
    }

    if ( WastedEntrySpace )
        output("%u unneeded real mode entries - %lu bytes\n",
            WastedEntryCount, WastedEntrySpace);

    if ( WastedResNamesSpace )
        output("%u possibly unneeded resident names - %lu bytes\n",
            WastedResNamesCount, WastedResNamesSpace);
        
    if ( DebugInfoSize )
        output("Debug Information - %lu bytes\n", DebugInfoSize);
        
    totalWastedSpace =  WastedSegmentAlignSpace +
                        WastedResourceAlignSpace +
                        WastedEntrySpace +
                        DebugInfoSize +
                        wastedExportPrologueSpace;

    if (totalWastedSpace)
        output("Total wasted space: %lu bytes (%u%%)\n\n",
            totalWastedSpace,
            (unsigned)((totalWastedSpace*100L)/FileSize));
    else
        output("File checks out fine\n\n");
} 

void DoFileReport(void)
{       
    OpenAndVerifyFile();

    CalculateOptimalAlignSize();
    
    CalculateWastedAlignSpace();

    CalculateWastedExportPrologues();

    CalculateUnneededEntries();

    ExamineResidentNamesTable();

    DoReports();
    
    fclose(file);
}

int main(int argc, char *argv[])
{
    output("EXESIZE - 1993 Matt Pietrek\n");
    
    ParseCommandLine(argc, argv);
    
    DoFileReport();
    
    return 0;
}
