\name{CORNA-package}
\alias{CORNA-package}
\alias{CORNA}
\docType{package}
\title{
Statistical Enrichment of microRNA target relationships in a sample of genes
}
\description{
CORNA is a package for R that analyses a sample of genes and microRNA target prediction data to find statistically 
over-representing miRNA-target relationships. The genes sample here can be any group of interested genes: 
differential expression genes in microarray analysis, genes within a particular metabolic pathway or genes associated 
with some GO terms etc.
}
\details{
\tabular{ll}{
Package: \tab CORNA\cr
Type: \tab Package\cr
Version: \tab 1.0\cr
Date: \tab 2008-11-11\cr
License: \tab GPL\cr
}
In case of microarray analysis, genes sample is taken in the form of a list of probes. These probes 
are then linked to the transcript identifiers used in the miRNA-target data. The source of data is flexible, but methods 
exist for reading the Ensembl BioMart database. Genes can then be linked to miRNA target information using any source, 
but methods exist for reading miRBase and microRNA.org targets data.
Genes sample can also come from other resources like KEGG and GO where several methods are available to manipulate them.   
}
\author{
Xikun Wu and Michael Watson

Maintainer: Xikun Wu (xikun.wu@bbsrc.ac.uk)
}
\keyword{ package }
\keyword{ manip }

\examples{
##### miRNA-target relationships including transcript-miRNA and gene-miRNA 

# get mouse data from miRBase 
tran2mir.df <- miRBase2df.fun(url="ftp://ftp.sanger.ac.uk/pub/mirbase/targets/v5/arch.v5.txt.mus_musculus.zip")

# get links between transcript and gene from BioMart
tran2gene.df  <- BioMart2df.fun(biomart="ensembl", dataset="mmusculus_gene_ensembl", 
                                col.old=c("ensembl_transcript_id", "ensembl_gene_id"),
                                col.new=c("tran", "gene")) 

# make gene-mir data frame
gene2mir.df <- corna.map.fun(tran2mir.df, tran2gene.df, "gene", "mir")


##### microarray analysis

# microarray data is provided 
data(CORNA.DATA)

# it can also be read from a downloaded GEO soft format file 
microarray.df <- GEO2df.fun(file="GSE7333_family.soft")

# get links between transcript and probe from BioMart
tran2probe.df <- BioMart2df.fun(biomart="ensembl", 
                                dataset="mmusculus_gene_ensembl", 
                                col.old=c("ensembl_transcript_id", "affy_mouse430_2"),
                                col.new=c("tran", "probe"))

# the population in this study is the set of transcripts associate with probes in microarray.df and miRNA in tran2mir.df

# all transcripts in microarray
microarray.tran.vec <- corna.map.fun(tran2probe.df, rownames(microarray.df), "probe", "tran")
# population
pop.tran.vec <- corna.map.fun(tran2mir.df, microarray.tran.vec, "tran")


# the sample in this study comes from a list of probes which is provided and can also be read from a local text file
sam.probe.vec <- sam.probe.fun(file="sam.probe.txt")
# get the transcript from sample probes
my.tran.vec <- corna.map.fun(tran2probe.df, sam.probe.vec, "probe", "tran")
# the sample transcripts should be within population
sam.tran.vec <- corna.map.fun(my.tran.vec, pop.tran.vec)

# run CORNA test: hypergeometric test (default), Fisher's exact test and chi-squared test (optional)
corna.microarray_test.df <- corna.test.fun(sam.tran.vec, pop.tran.vec, tran2mir.df, fisher=T, chi.square=T)

# significant results
sig.microarray_test.df <- corna.microarray_test.df[corna.microarray_test.df[, "hypergeometric"]<=0.01, ] 

# show significant results
sig.microarray_test.df
#                 total expectation observation hypergeometric       fisher   chi.square
# mmu-mir-878-5p    640          38          59   0.0006316418 0.0009079053 0.0006061438
# mmu-mir-449b      738          44          66   0.0006984588 0.0011225561 0.0007210760
# hsa-mir-516a-5p   797          48          70   0.0007975266 0.0013057629 0.0008660369
# mmu-mir-7a       1215          73          94   0.0058538345 0.0104086201 0.0092306588


# CORNA also provides methods of drawing figures for microarray data

# mir IDs from the significant list
my.mir.vec <- rownames(sig.microarray_test.df) 

# get transcripts IDs in sample which associate with these mirs
my.tran.vec <- corna.map.fun(tran2mir.df, my.mir.vec, "mir", "tran")

# get probe IDs in sample which associate with these mirs 
my.probe.vec <- corna.map.fun(corna.map.fun(tran2probe.df, my.tran.vec, "tran", "probe"), sam.probe.vec)

# get the subset of microarray.df that only contains these probes
my.microarray.df <- corna.sub.fun(microarray.df, my.probe.vec)

# draw lines for all probes
corna.line.fun(my.microarray.df)

# draw barplot for the first 9 probes of my.microarray.df in one 3 times 3 figure
corna.barplot.fun(my.microarray.df, row=3, column=3)    


##### given a group of interesting genes, here we use the targets of a particular miRNA "mmu-mir-155" 

# genes associate with "mmu-mir-155" 
my.gene.vec <- corna.map.fun(gene2mir.df, "mmu-mir-155", "mir", "gene")

# transcripts associate with "mmu-mir-155" 
my.tran.vec  <- corna.map.fun(tran2mir.df, "mmu-mir-155" , "mir", "tran")


##### KEGG pathway analysis

# get links between genes and pathways
gene2path.df <- KEGG2df.fun(org="mmu")

# get links between pathway ID and name
path2name.df <- unique(gene2path.df[c("path", "name")])
rownames(path2name.df) <- c(1:nrow(path2name.df))

# population here is the set of all genes in gene2path.df
pop.gene.vec <- as.vector(unique(gene2path.df[, "gene"]))

# sample here is the set of genes in both my.gene.vec and population
sam.gene.vec <- conra.map.fun(pop.gene.vec, my.gene.vec) 

# run CORNA test: 
corna.pathway_test.df <- corna.test.fun(sam.gene.vec, pop.gene.vec, gene2path.df, 
                                        hypergeometric=F, fisher=T, fisher.alternative="greater", sort="fisher", min.pop=10, 
                                        desc=path2name.df) 

# significant list
corna.pathway_test.df[corna.pathway_test.df[, "fisher"] <= 0.05, ]


##### GO term analysis

# get links between transcript and GO term (molecular function) from BioMart
tran2gomf.df  <- BioMart2df.fun(biomart="ensembl", dataset="mmusculus_gene_ensembl",  
                                col.old=c("ensembl_transcript_id", "go_molecular_function_id"),
                                col.new=c("tran", "gomf"))
                                
# get links between GO id and term
go2term.df <- GO2df.fun(url="ftp://ftp.ncbi.nlm.nih.gov/gene/DATA/gene2go.gz")

# population here is the set of all transcripts in tran2gomf.df
pop.tran.vec <- as.vector(unique(tran2gomf.df[, "tran"]))

# sample here is the set of transcripts in both my.tran.vec and population
sam.tran.vec <- corna.map.fun(pop.tran.vec, my.tran.vec) 

# test
corna.gomf_test.df <- corna.test.fun(sam.tran.vec, pop.tran.vec, tran2gomf.df, 
                                     fisher=T, sort="fisher", min.pop=5, 
                                     desc=go2term.df) 

# significant list
corna.gomf_test.df[corna.gomf_test.df[, "fisher"] <= 0.05, ]    
}
