/*
 *  This file is part of ixemul.library for the Amiga.
 *  Copyright (C) 1991, 1992  Markus M. Wild
 *
 *  This library is free software; you can redistribute it and/or
 *  modify it under the terms of the GNU Library General Public
 *  License as published by the Free Software Foundation; either
 *  version 2 of the License, or (at your option) any later version.
 *
 *  This library is distributed in the hope that it will be useful,
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 *  Library General Public License for more details.
 *
 *  You should have received a copy of the GNU Library General Public
 *  License along with this library; if not, write to the Free
 *  Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
 *
 *  buddy-alloc.c,v 1.1.1.1 1994/04/04 04:29:43 amiga Exp
 *
 *  buddy-alloc.c,v
 * Revision 1.1.1.1  1994/04/04  04:29:43  amiga
 * Initial CVS check in.
 *
 *  Revision 1.3  1992/10/20  16:11:01  mwild
 *  fixed NASTY bug: MINSIZE must cover the whole size of a block, before
 *  a block on the freelist would cover 16 bytes, but was only assigned 8 bytes...
 *  added numerous additional debugging-section (ifdef'able). Mainly to find the
 *  above bug;-)
 *  resort to Forbid/Permit locking when dealing with PUBLIC memory, as that
 *  can be used (and is used) inside Forbid.
 *
 *  Revision 1.2  1992/09/14  01:40:24  mwild
 *  change from using aligned blocks (obtained thru an AllocMem/FreeMem/AllocAbs
 *  hack) to using non-aligned blocks. The price for this is an additional
 *  field in every allocated block.
 *
 *  In the same run, change Forbid/Permit into Semaphore locking.
 *
 *  Revision 1.1  1992/05/22  01:42:33  mwild
 *  Initial revision
 *
 */
#define KERNEL
#include "ixemul.h"
#include <exec/memory.h>
#include <stddef.h>

/* #define TEST_EXEC_BLOCKS */
/* #define CLOBBER_CHECK */
/* if you enable CLOBBER_CHECK, make sure to adjust MINLOG2 accordingly!! */

#undef DEBUG

#ifdef TEST
#ifndef PRETTY_STABLE
#define AllocMem(size,attr)	test_allocmem(size,attr)
#define AllocAbs(size,buf)	test_allocabs(size,buf)
#define FreeMem(buf,size)	test_freemem(buf,size)
#define Forbid()
#define Permit()
#define ix_panic(str) puts(str)
#define P(arg) printf arg
#else
#define P(arg)
#define ix_panic(arg)
#endif
#else
#ifdef DEBUG
#define P(arg) kprintf arg
#else
#define P(arg)
#endif
#endif

/* this provides a straight replacement for AllocMem() and FreeMem().
   Being this, it does *not* remember the size of allocation, the
   clients have to do this instead. */

/* NOTE: currently only two pools are supported, MEMF_PUBLIC and
         ! MEMF_PUBLIC. No MEMF_CHIP pools are needed by the library
         and are thus not supported */




/* TUNING: The two parameters that can be adjusted to fine tune
           allocation strategy are MAXSIZE and BUDDY_LIMIT. By setting
           MAXSIZE larger than BUDDY_LIMIT results in less Exec
           overhead, since blocks stay longer in the buddy system.
           Setting MAXSIZE==BUDDY_LIMIT sets memory usage to the
           minimum, at the cost of more Exec calls. */


/* no request for memory can be lower than this */
#define MINLOG2		4
#define MINSIZE		(1 << MINLOG2)

/* this is the size the buddy system gets memory pieces from Exec */
#ifdef TEST_EXEC_BLOCKS
#define MAXLOG2		20	/* get (one) 1M chunk */
#define MAXSIZE		(1 << MAXLOG2)
void *exec_block_address[2] = { 0 };
#else
#define MAXLOG2		15	/* get 32K chunks */
#define MAXSIZE		(1 << MAXLOG2)
#endif

/* this is the limit for b_alloc to go straight to Exec */
#define BUDDY_LIMIT	(1 << (MAXLOG2 - 5))	/* but serve only upto 1K */



#define PRIVATE_POOL	0
#define PUBLIC_POOL	1
#define NUMPOOLS	2	/* public and !public */
/* attention: don't go larger than 3 pools, or you'll have to change the
              encoding in free_block (only 2 bits for now) */

struct free_list {
  u_int  exec_attr;
  struct SignalSemaphore sem;
  struct MinList buckets[MAXLOG2 - MINLOG2];
} free_list[NUMPOOLS] = { { 0, }, { MEMF_PUBLIC, } };


struct free_block {
  /* to make the smallest allocatable block 16, and not 32 byte, stuff both
     the freelist information and the exec-block address into one long. */
  u_int	pool:2,		/* 0: block is free, > 0: POOL + 1 */
  	exec_block:30;	/* shift left twice to get the real address */

#ifdef CLOBBER_CHECK
  u_int  magic[5];
#endif

  /* from here on, fields only exist while the block is on the free list.
     The application sees a block as a chunk of memory starting at &next */
  struct free_block *next, *prev;	/* min-node compatible */
  int index;
};


void
init_buddy (void)
{
  int i, l; 

  /* don't want such a nightmare of bug-hunt any more... */
  if (sizeof (struct free_block) > MINSIZE)
    {
      ix_panic ("buddy-system: MINSIZE/MINLOG2 too small, increase!");
      Wait (0);
    }

  for (l = 0; l < NUMPOOLS; l++)
    {
      for (i = 0; i < MAXLOG2 - MINLOG2; i++)
        NewList ((struct List *) &free_list[l].buckets[i]);

      InitSemaphore (&free_list[l].sem);
    }
}

static inline struct free_block *
unlink_block (u_int free_pool, u_char ind, void *block)
{
  struct free_block *fb = (struct free_block *) block;
  struct free_list *fl = free_list + free_pool;

  if (! fb)
    {
      fb = (struct free_block *) RemHead ((struct List *) &fl->buckets[ind]);
      if (fb)
	{
	  fb = (struct free_block *) ((int)fb - offsetof (struct free_block, next));
	  fb->pool = free_pool + 1;
  P(("    unlink_block (%s, %ld) == $%lx\n", 
     free_pool == PRIVATE_POOL ? "PRIVATE" : (free_pool == PUBLIC_POOL ? "PUBLIC" : "BOGOUS"), ind, fb));

	}
    }
  else
    {
  P(("    unlink_block (%s, %ld, $%lx)\n", 
     free_pool == PRIVATE_POOL ? "PRIVATE" : (free_pool == PUBLIC_POOL ? "PUBLIC" : "BOGOUS"), ind, fb));

      fb->pool = free_pool + 1;
      Remove ((struct Node *) &fb->next);
    }

  return fb;
}

#if defined(DEBUG) || defined(TEST)
void
check_empty (int pool)
{
  int i;
  struct free_block *fb;
  
  for (i = 0; i < MAXLOG2 - MINLOG2; i++)
    {
      while (fb = unlink_block (pool, i, 0))
        {
          P(("%s POOL: $%lx block @$%lx\n",
             pool ? "PUBLIC" : "PRIVATE", (1 << (i + MINLOG2)), fb));
	}
    }
}
#endif

static void inline
link_block (u_int free_pool, u_char ind, void *block)
{
  struct free_block *fb = (struct free_block *) block;
  struct free_list *fl = free_list + free_pool;

  P(("    link_block (%s, %ld, $%lx)\n", 
     free_pool == PRIVATE_POOL ? "PRIVATE" : (free_pool == PUBLIC_POOL ? "PUBLIC" : "BOGOUS"), ind, fb));

  fb->pool = 0;	/* we're on the freelist of this pool */
  fb->index = ind; /* and of this size */
  AddHead ((struct List *) &fl->buckets[ind], (struct Node *) &fb->next);
}

/* this is a very special log2() function that knows the upper bound
   of its argument, and also automatically rounds to the next upper
   power of two */

static inline int const
log2 (int size)
{
  int pow = MAXLOG2;
  int lower_bound = 1 << (MAXLOG2 - 1);

  for (;;)
    {
      if (size > lower_bound)
        return pow;

      lower_bound >>= 1;
      pow--;
    }
}


static inline struct free_block *
get_block (u_int free_pool, u_char index)
{
  struct free_block *fb, *buddy;
  struct free_list *fl = free_list + free_pool;

  P(("  get_block (%s, %ld)\n", 
     free_pool == PRIVATE_POOL ? "PRIVATE" : (free_pool == PUBLIC_POOL ? "PUBLIC" : "BOGOUS"), index, fb));

#ifdef DEBUG
  if (free_pool >= NUMPOOLS)
    {
      ix_panic ("get_block: illegal free list!!");
      return;
    }
#endif

  if (index == (MAXLOG2 - MINLOG2))
    {
      fb = (struct free_block *) AllocMem (MAXSIZE, fl->exec_attr);
      if (! fb)
        return 0;

#ifdef TEST_EXEC_BLOCKS
      if (! exec_block_address[0])
	exec_block_address[0] = fb;
      else if (! exec_block_address[1])
        exec_block_address[1] = fb;
      else
	;
#endif

      fb->exec_block = (int)fb >> 2; /* buddies are relative to this base address */
      fb->pool = free_pool + 1; /* not free */

      return fb;
    }
  else if (fb = unlink_block (free_pool, index, 0))
    return fb;

  fb = get_block (free_pool, index + 1);

#ifdef TEST_EXEC_BLOCKS
  if ((void*)(fb->exec_block<<2) != exec_block_address[0] && (void*)(fb->exec_block<<2) != exec_block_address[1])
    {
      u_int *sp = get_sp(), i;
      kprintf ("get_block: invalid exec_block: $%lx, eb[0] = $%lx, eb[1] = $%lx\n", fb->exec_block<<2, exec_block_address[0], exec_block_address[1]);
      for (i = 0; i < 48; i++)
	kprintf ("  %08lx", *sp++);
      kprintf("\n");
    }
#endif

#ifdef DEBUG
  if (! fb->exec_block || fb->exec_block<<2 < 0x7400000)
    {
      *(volatile long *)0xD5ADD5AD = fb;
      ix_panic ("get_block: invalid exec_block!");
      Wait (0);
    }
#endif

  if (fb)
    {
      /* when splitting a block, we always free the upper buddy. So
         we can just add the size, instead of or'ing the offset to the
         Exec memory block */
      buddy = (struct free_block *)((int)fb + (1 << (index + MINLOG2)));

#ifdef DEBUG
      if (fb == buddy)
	ix_panic ("get_block: | operator should be ^ !!");
#endif

      buddy->exec_block = fb->exec_block;
      link_block (free_pool, index, buddy);
    }

  return fb;
}


static inline void
free_block (u_int free_pool, u_char index, struct free_block *fb)
{
  struct free_block *buddy;

  P(("  free_block (%s, %ld, $%lx)\n", 
     free_pool == PRIVATE_POOL ? "PRIVATE" : (free_pool == PUBLIC_POOL ? "PUBLIC" : "BOGOUS"), index, fb));

#ifdef DEBUG
  if (! fb)
    {
      ix_panic ("free_block: zero block!!");
      return;
    }
 
  if (free_pool >= NUMPOOLS)
    {
      ix_panic ("free_block: illegal free list in block!!");
      return;
    }
#endif

#ifdef TEST_EXEC_BLOCKS
  if ((void*)(fb->exec_block<<2) != exec_block_address[0] && (void*)(fb->exec_block<<2) != exec_block_address[1])
    {
      u_int *sp = get_sp(), i;
      kprintf ("free_block: invalid exec_block: $%lx, eb[0] = $%lx, eb[1] = $%lx\n", fb->exec_block<<2, exec_block_address[0], exec_block_address[1]);
      for (i = 0; i < 48; i++)
	kprintf ("  %08lx", *sp++);
      kprintf("\n");
    }
#endif

#ifdef DEBUG
  if (! fb->exec_block || (int)fb->exec_block<<2 < 0x7400000)
    {
      *(volatile long *)0xD5ADD5AD = fb;
      ix_panic ("free_block: invalid exec_block!");
      Wait (0);
    }
#endif

  buddy = (struct free_block *)
	  ((((int)fb - (fb->exec_block<<2)) ^ (1 << (index + MINLOG2)))
	   + (fb->exec_block<<2));

  if (index == (MAXLOG2 - MINLOG2))
    {
#ifdef DEBUG
      if (fb != fb->exec_block<<2)
        ix_panic ("free_block: fb != fb->exec_block!");
#endif 
      FreeMem (fb, MAXSIZE);
      return;
    }
  else if (buddy->pool || buddy->index != index)
    {
      /* too bad, buddy is not on freelist or of wrong size */
      link_block (free_pool, index, fb);
      return;
    }

  /* reserve the buddy, then recombine both */
  unlink_block (free_pool, index, buddy);

  /* since the buddy is free as well, recombine both blocks
     and free the twice as large block */
  free_block (free_pool, index + 1, fb < buddy ? fb : buddy);
}


void *
b_alloc (int size, unsigned pool)
{
  u_char bucket;
  struct free_block *block;
  struct free_list *fl = free_list + pool;

#ifdef DEBUG
  if (pool >= NUMPOOLS)
    {
      ix_panic ("b_alloc: illegal pool spezified!");
      return 0;
    }
#endif

  if (size < MINSIZE)
    size = MINSIZE;

  /* the additional bytes are needed for the freelist pointer at
     the beginning of each block in use and the base block originally
     obtained from Exec. */
    
  if (size >= BUDDY_LIMIT - offsetof (struct free_block, next))
    return AllocMem (size, pool == PUBLIC_POOL ? MEMF_PUBLIC : 0);

  size += offsetof (struct free_block, next);

  bucket = log2 (size) - MINLOG2;

  /* have to differentiate between PUBLIC and PRIVATE memory here, sigh. 
     PRIVATE memory can safely be accessed by using a semaphore, PUBLIC
     memory however is allocated and free'd inside Forbid(), and using a
     semaphore there would possibly break a Forbid..
     Note: this is safe for use in GigaMem, as GigaMem only uses non-PUBLIC
	   memory, if you don't fiddle with attribute masks.. */
  if (pool == PRIVATE_POOL)
    ObtainSemaphore (&fl->sem);
  else
    Forbid();
  block = get_block (pool, bucket);
  if (pool == PRIVATE_POOL)
    ReleaseSemaphore (&fl->sem);
  else
    Permit();
 
#ifdef DEBUG
  if (!block)
    ix_panic ("b_alloc: out of memory");
#endif

  P(("b_alloc ($%lx, %ld) = $%lx\n", size, pool, block));

#ifdef CLOBBER_CHECK
  { int i;
    for (i = 0; i < sizeof(block->magic)/4; i++)
      block->magic[i] = 0x12345678;
  }
#endif

  if (block)
    return (void *) & block->next;
  else
    return block;
}


void
b_free (struct free_block *fb, int size)
{
  u_char bucket;
  struct free_list *fl;
  int free_pool;

#ifdef DEBUG
  if (! fb)
    {
      *(volatile char *)0xDEADDEAD;
      ix_panic ("b_free called with 0 fb");
      _exit (20);
    }
#endif

  if (size < MINSIZE)
    size = MINSIZE;
    
  if (size >= BUDDY_LIMIT - offsetof (struct free_block, next))
    {
      FreeMem ((void *) fb, size);
      return;
    }

  size += offsetof (struct free_block, next);

  bucket = log2 (size) - MINLOG2;
  fb = (struct free_block *) ((int)fb - offsetof (struct free_block, next));
  free_pool = fb->pool - 1;
  fl = free_list + free_pool;

#ifdef CLOBBER_CHECK
{
  int i;
  for (i = 0; i < sizeof (fb->magic)/4; i++)
    if (fb->magic[i] != 0x12345678)
      {
        u_int *sp = get_sp();
        kprintf ("b_free: block clobbered before start!\n");
        kprintf ("area-dump:\n");
        for (i = -4; i < (sizeof (fb->magic)+3)/4; i++)
          kprintf ("  %08lx", ((int*)fb)[i]);
        kprintf ("\nstack-dump:\n");
        for (i = 0; i < 48; i++)
	  kprintf ("  %08lx", *sp++);
        kprintf("\n");
	break;
      }
}
#endif /* CLOBBER_CHECK */

#ifdef DEBUG
  if (free_pool >= NUMPOOLS)
    {
      u_int *sp = get_sp();
      int i;
      kprintf ("b_free: invalid freelist [$%lx]\n", fl);
      kprintf ("area-dump:\n");
      for (i = -13; i < 3; i++)
        kprintf ("  %08lx", ((int*)fb)[i]);
      kprintf ("\nstack-dump:\n");
      for (i = 0; i < 48; i++)
	kprintf ("  %08lx", *sp++);
      kprintf("\n");
      /* _exit (20);*/ /* might recurse... */
      Wait (0);
    }
#endif

  P(("b_free (@$%lx, $%lx, $%lx)\n", fl, fb, size));

  if (free_pool == PRIVATE_POOL)
    ObtainSemaphore (&fl->sem);
  else
    Forbid();
  free_block (free_pool, bucket, fb);
  if (free_pool == PRIVATE_POOL)
    ReleaseSemaphore (&fl->sem);
  else
    Permit();
}


#ifdef TEST
int
main (int argc, char *argv[])
{
#ifndef PRETTY_STABLE
  int i;
  void *allocs[16], *bllocs[13];

  init_buddy ();
  
  printf ("13 allocs of pool 0\n");
  for (i = 0; i < sizeof (allocs)/sizeof (void*); i++)
    {
      allocs[i] = b_alloc ((i + 3) * (i + 3), 0);
      memset (allocs[i], 0x34, (i + 3) * (i + 3));
    }

  printf ("13 allocs of pool 1\n");
  for (i = 0; i < sizeof (bllocs)/sizeof (void*); i++)
    {
      bllocs[i] = b_alloc ((i + 3) * (i + 3) * (i + 3), 1);
      memset (bllocs[i], 0x34, (i + 3) * (i + 3) * (i + 3));
    }

  printf ("13 frees of pool 1\n");
  for (i = sizeof (bllocs)/sizeof (void*) - 1; i >= 0 ; i--)
    b_free (bllocs[i], (i + 3) * (i + 3) * (i + 3));

  printf ("13 frees of pool 0\n");
  for (i = sizeof (allocs)/sizeof (void*) - 1; i >= 0 ; i--)
    b_free (allocs[i], (i + 3) * (i + 3));


  printf ("13 allocs of pool 0\n");
  for (i = 0; i < sizeof (allocs)/sizeof (void*); i++)
    {
      allocs[i] = b_alloc ((i + 3) * (i + 3), 0);
      memset (allocs[i], 0x34, (i + 3) * (i + 3));
    }

  printf ("13 allocs of pool 1\n");
  for (i = 0; i < sizeof (bllocs)/sizeof (void*); i++)
    {
      bllocs[i] = b_alloc ((i + 3) * (i + 3) * (i + 3), 1);
      memset (bllocs[i], 0x34, (i + 3) * (i + 3) * (i + 3));
    }

  printf ("13 frees of pool 1\n");
  for (i = sizeof (bllocs)/sizeof (void*) - 1; i >= 0 ; i--)
    b_free (bllocs[i], (i + 3) * (i + 3) * (i + 3));

  printf ("13 frees of pool 0\n");
  for (i = sizeof (allocs)/sizeof (void*) - 1; i >= 0 ; i--)
    b_free (allocs[i], (i + 3) * (i + 3));
#else
  /* run a performance comparison */
  void *allocs[10000];
  struct timeval tstart, tend, tdiff1, tdiff2;
  int size;
  int i;
  
  init_buddy ();

  if (argc <= 1)
    size = 50;
  else
    size = atoi (argv[1]) ? : 50;

#if 1
  gettimeofday (&tstart, 0);
  for (i = 0; i < sizeof (allocs)/sizeof (void*); i++)
    {
      allocs[i] = b_alloc (size, 0);
      memset (allocs[i], 0x34, size);
    }
    
  for (i = 0; i < sizeof (allocs)/sizeof (void*); i++)
    b_free (allocs[i], size);

  for (i = 0; i < sizeof (allocs)/sizeof (void*); i++)
    {
      allocs[i] = b_alloc (size, 0);
      memset (allocs[i], 0x34, size);
    }
    
  for (i = 0; i < sizeof (allocs)/sizeof (void*); i++)
    b_free (allocs[i], size);

#if 0
  for (i = 0; i < sizeof (allocs)/sizeof (void*); i++)
    allocs[i] = b_alloc (size, 0);
    
  for (i = 0; i < sizeof (allocs)/sizeof (void*); i++)
    b_free (allocs[i], size);
#endif
  gettimeofday (&tend, 0);
  tdiff1.tv_sec = tend.tv_sec - tstart.tv_sec;
  tdiff1.tv_usec = tend.tv_usec -tstart.tv_usec;
  if (tdiff1.tv_usec < 0)
    {
      tdiff1.tv_usec += 1000000;
      tdiff1.tv_sec--;
    }
#endif

#if 0
  gettimeofday (&tstart, 0);
  for (i = 0; i < sizeof (allocs)/sizeof (void*); i++)
    allocs[i] = AllocMem (size, 0);
    
  for (i = 0; i < sizeof (allocs)/sizeof (void*); i++)
    FreeMem (allocs[i], size);

  for (i = 0; i < sizeof (allocs)/sizeof (void*); i++)
    allocs[i] = AllocMem (size, 0);
    
  for (i = 0; i < sizeof (allocs)/sizeof (void*); i++)
    FreeMem (allocs[i], size);

  for (i = 0; i < sizeof (allocs)/sizeof (void*); i++)
    allocs[i] = AllocMem (size, 0);
    
  for (i = 0; i < sizeof (allocs)/sizeof (void*); i++)
    FreeMem (allocs[i], size);
  gettimeofday (&tend, 0);
  tdiff2.tv_sec = tend.tv_sec - tstart.tv_sec;
  tdiff2.tv_usec = tend.tv_usec -tstart.tv_usec;
  if (tdiff2.tv_usec < 0)
    {
      tdiff2.tv_usec += 1000000;
      tdiff2.tv_sec--;
    }
#endif

  check_empty (0);

#if 1
  printf ("delta with buddy system: %d.%06d\n", tdiff1.tv_sec, tdiff1.tv_usec);
#endif
#if 0
  printf ("delta without buddy system: %d.%06d\n", tdiff2.tv_sec, tdiff2.tv_usec);
#endif
#endif
}

#ifndef PRETTY_STABLE
test_allocmem (int size, int attr)
{
  int res = malloc (size);
  printf ("AllocMem ($%lx, %d) = $%lx\n", size, attr, res);
  return res;
}

test_freemem (void *buf, int size)
{
  printf ("FreeMem ($%lx, $%lx)\n", buf, size);
}

test_allocabs (int size, void *buf)
{
  return buf;
}
#endif
#endif
