/* $Id: regiondbdef.h 222139 2020-03-13 00:15:01Z twu $ */
#ifndef REGIONDBDEF_INCLUDED
#define REGIONDBDEF_INCLUDED
#ifdef HAVE_CONFIG_H
#include <config.h>		/* For HAVE_PTHREAD */
#endif

#include "genomicpos.h"
#include "access.h"
#include "types.h"

#ifdef PMAP
#include "alphabet.h"
#endif


#define BADVAL (Univcoord_T) -1

/* Compression types */
#define NO_COMPRESSION 0
#define BITPACK64_COMPRESSION 1


/* A regiondb has a series of regional offsets and regional positions blocks.
   It is designed to allow GSNAP to find 6-mers within a given genomic
   region quickly.  Starting with the 2020 versions of this package, 
   the offsets and positions blocks are not compressed.
   The fixed size of offsets and positions blocks allows them to be
   located without any tables.

   A region is 65536 bp in the genome, and covers 4^8 = 65536
   distinct oligos, where oligos are 6-mers.  The positions in a region
   can be represented by an unsigned short with 16 bits, because
   2^16 = 65536.  The regional offsets are a set of 4^6 pointers into
   the regional positions.  These offsets require 4^6 = 4096 unsigned
   shorts.

   To enhance memory access, the offsets and positions for a region
   are placed next to each other, occupying 4096 + 65536 = 69632
   unsigned shorts.
*/


#ifdef LARGE_GENOMES
#define REGION_LENGTH 65536ULL	/* 2^16 */
#else
#define REGION_LENGTH 65536U	/* 2^16 */
#endif


#define T Regiondb_T
struct T {
#ifdef PMAP
  Alphabet_T alphabet;
  int alphabet_size;
#endif

  Width_T region1part;		/* generally 6 */
  Width_T region1interval;	/* always 1 */
  size_t offsets_size;		/* e.g., 4^6 = 4096 */
  size_t region_size;		/* offsets_size + REGION_LENGTH */

  Access_T regiondb_access;
  int regiondb_shmid;
  key_t regiondb_key;
  int regiondb_fd;
  size_t regiondb_len;
  UINT2 *regiondb;		/* Values range from 0..65535 */
};

#undef T
#endif

