#ifndef DYNAMITEsequenceHEADERFILE
#define DYNAMITEsequenceHEADERFILE
#ifdef _cplusplus
extern "C" {
#endif

#include "wisebase.h"
#include "codon.h"

#ifdef LINUX
#include "posix.h"
#endif

#define SEQUENCEBLOCK 128

enum SequenceType {
SEQUENCE_UNKNOWN = 64,
SEQUENCE_PROTEIN,
SEQUENCE_DNA,
SEQUENCE_CDNA,
SEQUENCE_GENOMIC,
SEQUENCE_EST,
SEQUENCE_RNA };

#define is_dna_SequenceType(type) (type == SEQUENCE_DNA || type == SEQUENCE_CDNA || type == SEQUENCE_GENOMIC || type == SEQUENCE_EST ? TRUE : FALSE)
#define is_rna_SequenceType(type) (type == SEQUENCE_RNA ? TRUE : FALSE)
#define is_protein_SequenceType(type) (type == SEQUENCE_PROTEIN ? TRUE : FALSE )

#define is_dna_Sequence(seq) (is_dna_SequenceType(seq->type))
#define is_rna_Sequence(seq) (is_rna_SequenceType(seq->type))
#define is_protein_Sequence(seq) (is_protein_SequenceType(seq->type))

/* Object Sequence
 *
 * Descrip: This object is the basic sequence object,
 *        trying to hold little more than the 
 *        name and sequence of the DNA/protein. 
 *
 *        The len/maxlen is the actual length
 *        of the sequence (strlen(obj->seq)) and
 *        amount of memory allocated in obj->seq 
 *        mainly for parsing purposes.
 *
 *        You are strongly encouraged to used the
 *        typed counterparts of Sequence, namely,
 *        Protien, cDNA and Genomic. By doing this 
 *        you are much, much less likely to
 *        mess up algorithms which expect specific
 *        sequence types. 
 *
 *
 */
struct bp_sw_Sequence {  
    int dynamite_hard_link;  
    char * name;    /*  name of the sequence */ 
    char * seq; /*  actual sequence */ 
    int len;    /*  length of the sequence */ 
    int maxlen; /*  internal counter, indicating how much space in seq there is */ 
    int offset; /*  start (in bio-coords) of the sequence. Not called start due to weird legacy. */ 
    int end;    /*  end (in bio-coords == C coords) of the sequence */ 
    int type;   /*  guess of protein/dna type */ 
    } ;  
/* Sequence defined */ 
#ifndef DYNAMITE_DEFINED_Sequence
typedef struct bp_sw_Sequence bp_sw_Sequence;
#define Sequence bp_sw_Sequence
#define DYNAMITE_DEFINED_Sequence
#endif




    /***************************************************/
    /* Callable functions                              */
    /* These are the functions you are expected to use */
    /***************************************************/



/* Function:  new_Sequence_from_strings(name,seq)
 *
 * Descrip:    Makes a new sequence from strings given. 
 *             Separate memory will be allocated for them
 *             and them copied into it.
 *
 *             They can be NULL, in which case 
 *             o  a dummy name SequenceName will be assigned
 *             o  No sequence placed and length of zero.
 *
 *             Though this is dangerous later on. 
 *
 *             The sequence type is calculated automatically using
 *             /best_guess_type. If you want a DNA sequence but are
 *             unsure of the content of, for example, IUPAC codes,
 *             please use /force_to_dna_Sequence before using the
 *             sequence. Most of the rest of dynamite relies on a
 *             five letter A,T,G,C,N alphabet, but this function
 *             will allow any sequence type to be stored, so please
 *             check if you want to save yourself alot of grief.
 *
 *             In perl and other interfaces, this is a much safer
 *             constructor than the raw "new" type
 *
 *
 * Arg:        name [READ ] name of sequence, memory is allocated for it. [char *]
 * Arg:         seq [READ ] char * of sequence, memory is allocated for it. [char *]
 *
 * Return [UNKN ]  Undocumented return value [Sequence *]
 *
 */
Sequence * bp_sw_new_Sequence_from_strings(char * name,char * seq);
#define new_Sequence_from_strings bp_sw_new_Sequence_from_strings


/* Function:  looks_like_accession(name)
 *
 * Descrip:    Returns true if name looks like [A-Za-z]+[0-9]+
 *             This should be an accession number 
 *
 *
 * Arg:        name [READ ] name to be tested [char *]
 *
 * Return [UNKN ]  Undocumented return value [boolean]
 *
 */
boolean bp_sw_looks_like_accession(char * name);
#define looks_like_accession bp_sw_looks_like_accession


/* Function:  make_len_type_Sequence(seq)
 *
 * Descrip:    makes seq->len and seq->end match the seq->seq
 *             length number. 
 *
 *             It also checks the type of the sequence with
 *             /best_guess_type
 *
 *
 * Arg:        seq [RW   ] Sequence object [Sequence *]
 *
 */
void bp_sw_make_len_type_Sequence(Sequence * seq);
#define make_len_type_Sequence bp_sw_make_len_type_Sequence


/* Function:  best_guess_type(seq)
 *
 * Descrip:    Guesses DNA or protein, by adding all
 *             the A,T,G,C up and if len < 300 && > 95% or 
 *             len > 300 && > 75% then considers
 *             it to be DNA. NB - Ns not counted.
 *
 *
 * Arg:        seq [READ ] Sequence to be guessed [Sequence *]
 *
 * Return [OWNER]  SEQUENCE_DNA or SEQUENCE_PROTEIN [int]
 *
 */
int bp_sw_best_guess_type(Sequence * seq);
#define best_guess_type bp_sw_best_guess_type


/* Function:  Sequence_type_to_string(type)
 *
 * Descrip:    Converts sequence type (SEQUENCE_*) to a string
 *
 *
 * Arg:        type [UNKN ] type eg SEQUENCE_PROTEIN [int]
 *
 * Return [UNKN ]  Undocumented return value [char *]
 *
 */
char * bp_sw_Sequence_type_to_string(int type);
#define Sequence_type_to_string bp_sw_Sequence_type_to_string


/* Function:  uppercase_Sequence(seq)
 *
 * Descrip:    makes all the sequence uppercase
 *
 *
 * Arg:        seq [RW   ] Sequence to be uppercas'd [Sequence *]
 *
 */
void bp_sw_uppercase_Sequence(Sequence * seq);
#define uppercase_Sequence bp_sw_uppercase_Sequence


/* Function:  force_to_dna_Sequence(seq,fraction,number_of_conver)
 *
 * Descrip:    This 
 *              a) sees how many non ATGCN characters there are in Seq
 *              b) If the level is below fraction
 *                 a) flips non ATGC chars to N
 *                 b) writes number of conversions to number_of_conver
 *                 c) returns TRUE
 *              c) else returns FALSE
 *
 *             fraction of 0.0 means completely intolerant of errors
 *             fraction of 1.0 means completely tolerant of errors
 *
 *
 *
 * Arg:                     seq [RW   ] sequence object read and converted  [Sequence *]
 * Arg:                fraction [READ ] number 0..1 for tolerance of conversion [double]
 * Arg:        number_of_conver [WRITE] number of conversions actually made [int *]
 *
 * Return [READ ]  TRUE for conversion to DNA, FALSE if not [boolean]
 *
 */
boolean bp_sw_force_to_dna_Sequence(Sequence * seq,double fraction,int * number_of_conver);
#define force_to_dna_Sequence bp_sw_force_to_dna_Sequence


/* Function:  is_reversed_Sequence(seq)
 *
 * Descrip:    Currently the sequence object stores 
 *             reversed sequences as start > end.
 *
 *             This tests that and returns true if it is
 *
 *
 * Arg:        seq [READ ] sequence to test [Sequence *]
 *
 * Return [UNKN ]  Undocumented return value [boolean]
 *
 */
boolean bp_sw_is_reversed_Sequence(Sequence * seq);
#define is_reversed_Sequence bp_sw_is_reversed_Sequence


/* Function:  translate_Sequence(dna,ct)
 *
 * Descrip:    This translates a DNA sequence to a protein.
 *             It assummes that it starts at first residue
 *             (use trunc_Sequence to chop a sequence up).
 *
 *
 * Arg:        dna [READ ] DNA sequence to be translated [Sequence *]
 * Arg:         ct [READ ] Codon table to do codon->aa mapping [CodonTable *]
 *
 * Return [OWNER]  new protein sequence [Sequence *]
 *
 */
Sequence * bp_sw_translate_Sequence(Sequence * dna,CodonTable * ct);
#define translate_Sequence bp_sw_translate_Sequence


/* Function:  reverse_complement_Sequence(seq)
 *
 * Descrip:    This both complements and reverses a sequence,
 *             - a common wish!
 *
 *             The start/end are correct with respect to the start/end
 *             of the sequence (ie start = end, end = start).
 *
 *
 * Arg:        seq [READ ] Sequence to that is used to reverse (makes a new Sequence) [Sequence *]
 *
 * Return [OWNER]  new Sequence which is reversed [Sequence *]
 *
 */
Sequence * bp_sw_reverse_complement_Sequence(Sequence * seq);
#define reverse_complement_Sequence bp_sw_reverse_complement_Sequence


/* Function:  magic_trunc_Sequence(seq,start,end)
 *
 * Descrip:    Clever function for dna sequences.
 *
 *             When start < end, truncates normally
 *
 *             when start > end, truncates end,start and then
 *             reverse complements.
 *
 *             ie. If you have a coordinate system where reverse 
 *             sequences are labelled in reverse start/end way,
 *             then this routine produces the correct sequence.
 *
 *
 * Arg:          seq [READ ] sequence that is the source to be truncated [Sequence *]
 * Arg:        start [READ ] start point [int]
 * Arg:          end [READ ] end point [int]
 *
 * Return [OWNER]  new Sequence which is truncated/reversed [Sequence *]
 *
 */
Sequence * bp_sw_magic_trunc_Sequence(Sequence * seq,int start,int end);
#define magic_trunc_Sequence bp_sw_magic_trunc_Sequence


/* Function:  trunc_Sequence(seq,start,end)
 *
 * Descrip:    truncates a sequence. It produces a new memory structure
 *             which is filled from sequence start to end.
 *
 *             Please notice
 *               
 *               Truncation is in C coordinates. That is
 *             the first residue is 0 and end is the number of the
 *             residue after the cut-point. In otherwords to 
 *             2 - 3 would be a single residue truncation. So - if
 *             you want to work in more usual, 'inclusive' molecular
 *             biology numbers, which start at 1, then you need to say
 *
 *               trunc_Sequence(seq,start-1,end);
 *
 *             (NB, should be (end - 1 + 1) = end for the last coordinate).
 *
 *               Truncation occurs against the *absolute* coordinate
 *             system of the Sequence, not the offset/end pair inside.
 *             So, this is a very bad error
 *              
 *               ** wrong code, and also leaks memory **
 *
 *               tru = trunc_Sequence(trunc_Sequence(seq,50,80),55,75); 
 *
 *             This the most portable way of doing this
 *
 *               temp = trunc_Sequence(seq,50,80);
 *
 *               tru  = trunc_Sequence(temp,55-temp->offset,75-temp->offset);
 *
 *               free_Sequence(temp);
 *
 *
 *
 * Arg:          seq [READ ] object holding the sequence to be truncated [Sequence *]
 * Arg:        start [READ ] start point of truncation [int]
 * Arg:          end [READ ] end point of truncation [int]
 *
 * Return [OWNER]  newly allocated sequence structure [Sequence *]
 *
 */
Sequence * bp_sw_trunc_Sequence(Sequence * seq,int start,int end);
#define trunc_Sequence bp_sw_trunc_Sequence


/* Function:  read_SRS_db_Sequence(datastring,srsstring)
 *
 * Descrip:    A function for you to easily specify the sequence name
 *             and the database separately. Just concatonates the two
 *             strings with : betwqeen them. Therefore you should use
 *             "swisprot-id" for example as your datastring.
 *
 *             calls /read_SRS_Sequence
 *
 *
 * Arg:        datastring [READ ] string representing the database (swissprot-id) [char *]
 * Arg:         srsstring [READ ] string for the name (eg, ROA1_HUMAN) [char *]
 *
 * Return [UNKN ]  Undocumented return value [Sequence *]
 *
 */
Sequence * bp_sw_read_SRS_db_Sequence(char * datastring,char * srsstring);
#define read_SRS_db_Sequence bp_sw_read_SRS_db_Sequence


/* Function:  read_SRS_Sequence(srsstring)
 *
 * Descrip:    reads SRS specified sequence. calls popoen
 *             with getz -f using srs4 syntax. Will only read
 *             the first sequence if there is more than one in the 
 *             SRS spec, and does not warn you about additional 
 *             sequences
 *
 *
 * Arg:        srsstring [READ ] srs spec'd string swissprot-id:ROA1_HUMAN [char *]
 *
 * Return [UNKN ]  Undocumented return value [Sequence *]
 *
 */
Sequence * bp_sw_read_SRS_Sequence(char * srsstring);
#define read_SRS_Sequence bp_sw_read_SRS_Sequence


/* Function:  read_efetch_Sequence(efetchstring)
 *
 * Descrip:    reads efetch specificed sequence. calls popen to
 *             efetch. A hack around accession numbers so that if the 
 *             thing looks like WP:acc number, calls it with -a...
 *             otherwise assummes you have both database and name in the
 *             efetchstring
 *
 *
 * Arg:        efetchstring [READ ] efetch valid string [char *]
 *
 * Return [UNKN ]  Undocumented return value [Sequence *]
 *
 */
Sequence * bp_sw_read_efetch_Sequence(char * efetchstring);
#define read_efetch_Sequence bp_sw_read_efetch_Sequence


/* Function:  read_fasta_file_Sequence(filename)
 *
 * Descrip:    Just a call
 *               a) open filename
 *               b) read sequence with /read_fasta_Sequence
 *               c) close file.
 *
 *
 * Arg:        filename [READ ] filename to open  [char *]
 *
 * Return [UNKN ]  Undocumented return value [Sequence *]
 *
 */
Sequence * bp_sw_read_fasta_file_Sequence(char * filename);
#define read_fasta_file_Sequence bp_sw_read_fasta_file_Sequence


/* Function:  read_Sequence_EMBL_seq(buffer,maxlen,ifp)
 *
 * Descrip:    reads the sequence part of an EMBL file.
 *
 *             This function can either take a file which 
 *             starts
 *
 *
 *
 * Arg:        buffer [RW   ] buffer containing the first line. [char *]
 * Arg:        maxlen [READ ] length of buffer [int]
 * Arg:           ifp [READ ] input file to read from [FILE *]
 *
 * Return [UNKN ]  Undocumented return value [Sequence *]
 *
 */
Sequence * bp_sw_read_Sequence_EMBL_seq(char * buffer,int maxlen,FILE * ifp);
#define read_Sequence_EMBL_seq bp_sw_read_Sequence_EMBL_seq


/* Function:  read_fasta_Sequence(ifp)
 *
 * Descrip:    reads the fasta file: format is
 *
 *             >name
 *             sequence
 *
 *             allocates a structure and puts in the
 *             sequence. Calls /make_len_type_Sequence to
 *             check type and length.
 *
 *             It leaves the '>' on the next fasta sequence
 *             for multiple sequence reading
 *
 *
 * Arg:        ifp [READ ] input file to read from [FILE *]
 *
 * Return [OWNER]  new Sequence structure  [Sequence *]
 *
 */
Sequence * bp_sw_read_fasta_Sequence(FILE * ifp);
#define read_fasta_Sequence bp_sw_read_fasta_Sequence


/* Function:  show_Sequence_residue_list(seq,start,end,ofp)
 *
 * Descrip:    shows a region of a sequence as
 *                124  A
 *                125  T
 *
 *             etc from start to end. The numbers
 *             are in C coordinates (ie, 0 is the first
 *             letter).
 *
 *             useful for debugging
 *
 *
 * Arg:          seq [READ ] Sequence to show [Sequence *]
 * Arg:        start [READ ] start of list [int]
 * Arg:          end [READ ] end of list [int]
 * Arg:          ofp [UNKN ] Undocumented argument [FILE *]
 *
 */
void bp_sw_show_Sequence_residue_list(Sequence * seq,int start,int end,FILE * ofp);
#define show_Sequence_residue_list bp_sw_show_Sequence_residue_list


/* Function:  empty_Sequence_from_dynamic_memory(name)
 *
 * Descrip:    Only allocates sequence structure and name
 *
 *
 * Arg:        name [UNKN ] Undocumented argument [char *]
 *
 * Return [UNKN ]  Undocumented return value [Sequence *]
 *
 */
Sequence * bp_sw_empty_Sequence_from_dynamic_memory(char * name);
#define empty_Sequence_from_dynamic_memory bp_sw_empty_Sequence_from_dynamic_memory


/* Function:  Sequence_alloc_len(len)
 *
 * Descrip:    allocates sequence structure with enough
 *             length in char for len sequence.
 *
 *
 * Arg:        len [READ ] length of blank sequene space [int]
 *
 * Return [UNKN ]  Undocumented return value [Sequence *]
 *
 */
Sequence * bp_sw_Sequence_alloc_len(int len);
#define Sequence_alloc_len bp_sw_Sequence_alloc_len


/* Function:  write_fasta_Sequence(seq,ofp)
 *
 * Descrip:    writes a fasta file of the form
 *             >name
 *             Sequence
 *
 *
 * Arg:        seq [READ ] sequence to be written [Sequence *]
 * Arg:        ofp [UNKN ] file to write to [FILE *]
 *
 */
void bp_sw_write_fasta_Sequence(Sequence * seq,FILE * ofp);
#define write_fasta_Sequence bp_sw_write_fasta_Sequence


/* Function:  hard_link_Sequence(obj)
 *
 * Descrip:    Bumps up the reference count of the object
 *             Meaning that multiple pointers can 'own' it
 *
 *
 * Arg:        obj [UNKN ] Object to be hard linked [Sequence *]
 *
 * Return [UNKN ]  Undocumented return value [Sequence *]
 *
 */
Sequence * bp_sw_hard_link_Sequence(Sequence * obj);
#define hard_link_Sequence bp_sw_hard_link_Sequence


/* Function:  Sequence_alloc(void)
 *
 * Descrip:    Allocates structure: assigns defaults if given 
 *
 *
 *
 * Return [UNKN ]  Undocumented return value [Sequence *]
 *
 */
Sequence * bp_sw_Sequence_alloc(void);
#define Sequence_alloc bp_sw_Sequence_alloc


/* Function:  free_Sequence(obj)
 *
 * Descrip:    Free Function: removes the memory held by obj
 *             Will chain up to owned members and clear all lists
 *
 *
 * Arg:        obj [UNKN ] Object that is free'd [Sequence *]
 *
 * Return [UNKN ]  Undocumented return value [Sequence *]
 *
 */
Sequence * bp_sw_free_Sequence(Sequence * obj);
#define free_Sequence bp_sw_free_Sequence


  /* Unplaced functions */
  /* There has been no indication of the use of these functions */


    /***************************************************/
    /* Internal functions                              */
    /* you are not expected to have to call these      */
    /***************************************************/
boolean bp_sw_replace_seq_Sequence(Sequence * obj,char * seq);
#define replace_seq_Sequence bp_sw_replace_seq_Sequence
int bp_sw_access_len_Sequence(Sequence * obj);
#define access_len_Sequence bp_sw_access_len_Sequence
boolean bp_sw_replace_maxlen_Sequence(Sequence * obj,int maxlen);
#define replace_maxlen_Sequence bp_sw_replace_maxlen_Sequence
boolean bp_sw_replace_name_Sequence(Sequence * obj,char * name);
#define replace_name_Sequence bp_sw_replace_name_Sequence
int bp_sw_access_type_Sequence(Sequence * obj);
#define access_type_Sequence bp_sw_access_type_Sequence
int bp_sw_access_maxlen_Sequence(Sequence * obj);
#define access_maxlen_Sequence bp_sw_access_maxlen_Sequence
char * bp_sw_access_seq_Sequence(Sequence * obj);
#define access_seq_Sequence bp_sw_access_seq_Sequence
boolean bp_sw_replace_offset_Sequence(Sequence * obj,int offset);
#define replace_offset_Sequence bp_sw_replace_offset_Sequence
boolean bp_sw_replace_len_Sequence(Sequence * obj,int len);
#define replace_len_Sequence bp_sw_replace_len_Sequence
int bp_sw_access_offset_Sequence(Sequence * obj);
#define access_offset_Sequence bp_sw_access_offset_Sequence
boolean bp_sw_replace_type_Sequence(Sequence * obj,int type);
#define replace_type_Sequence bp_sw_replace_type_Sequence
boolean bp_sw_replace_end_Sequence(Sequence * obj,int end);
#define replace_end_Sequence bp_sw_replace_end_Sequence
char * bp_sw_access_name_Sequence(Sequence * obj);
#define access_name_Sequence bp_sw_access_name_Sequence
int bp_sw_access_end_Sequence(Sequence * obj);
#define access_end_Sequence bp_sw_access_end_Sequence
boolean bp_sw_add_string_to_Sequence(Sequence * seq,char * more);
#define add_string_to_Sequence bp_sw_add_string_to_Sequence
Sequence * bp_sw_Sequence_from_static_memory (char * name,char * seq);
#define Sequence_from_static_memory  bp_sw_Sequence_from_static_memory 
Sequence * bp_sw_Sequence_from_dynamic_memory(char * name,char * seq);
#define Sequence_from_dynamic_memory bp_sw_Sequence_from_dynamic_memory

#ifdef _cplusplus
}
#endif

#endif
