MALAT1 chr11:65,265,233-65,273,939 st=+


r2
FUBP2-IgG minus 1000

r1
FUBP2-IgG minus 400

r8 all 200binSize vs Input
r7
bamFiles=c(
"PHR23r-FUBP2.bam",
"PHR24r-IgG.bam")
cNAME <- "IgG"
outDir <- file.path(getwd(), "RIPSeeker_FUBP2_vs_IgG_minus2") 
binSize <- 200     # automatically determine bin size
minBinSize <- 150	 # min bin size in automatic bin size selection
maxBinSize <- 300	 # max bin size in automatic bin size selection
multicore <- TRUE		 # use multicore
biomart <- "ensembl"           # use archive to get ensembl 65
biomaRt_dataset <- "hsapiens_gene_ensembl" # human dataset id name	
goAnno <- "org.Hs.eg.db"                    # GO annotation database
seekOut.CCNT1 <- ripSeek(bamPath = bamFiles, cNAME = cNAME,
                   reverseComplement = TRUE, genomeBuild = "hg19",
		   strandType = "-",  
                   uniqueHit = TRUE, assignMultihits = TRUE, 
                   rerunWithDisambiguatedMultihits = TRUE,
                   binSize=binSize, minBinSize = minBinSize, 
                   maxBinSize = maxBinSize,
                   biomart=biomart, goAnno = goAnno,
                   biomaRt_dataset = biomaRt_dataset,
                   multicore=multicore, outDir=outDir)

r6
bamFiles=c(
"PHR23r-FUBP2.bam",
"PHR24r-IgG.bam")
cNAME <- "IgG"
outDir <- file.path(getwd(), "RIPSeeker_FUBP2_vs_IgG_plus") 
binSize <- 200     # automatically determine bin size
minBinSize <- 150	 # min bin size in automatic bin size selection
maxBinSize <- 300	 # max bin size in automatic bin size selection
multicore <- TRUE		 # use multicore
biomart <- "ensembl"           # use archive to get ensembl 65
biomaRt_dataset <- "hsapiens_gene_ensembl" # human dataset id name	
goAnno <- "org.Hs.eg.db"                    # GO annotation database
seekOut.CCNT1 <- ripSeek(bamPath = bamFiles, cNAME = cNAME,
                   reverseComplement = TRUE, genomeBuild = "hg19",
		   strandType = "+",  
                   uniqueHit = TRUE, assignMultihits = TRUE, 
                   rerunWithDisambiguatedMultihits = TRUE,
                   binSize=binSize, minBinSize = minBinSize, 
                   maxBinSize = maxBinSize,
                   biomart=biomart, goAnno = goAnno,
                   biomaRt_dataset = biomaRt_dataset,
                   multicore=multicore, outDir=outDir)


r5
bamFiles=c(
"PHR22r-TCF4.bam",
"PHR19r-Input.bam")
cNAME <- "IgG"
outDir <- file.path(getwd(), "RIPSeeker_TCF4_vs_Input_minus")
binSize <- 200     # automatically determine bin size
seekOut.CCNT1 <- ripSeek(bamPath = bamFiles, cNAME = cNAME,
                   reverseComplement = TRUE, genomeBuild = "hg19",
		   strandType = "-",  
                   uniqueHit = TRUE, assignMultihits = TRUE, 
                   rerunWithDisambiguatedMultihits = TRUE,
                   binSize=binSize, minBinSize = minBinSize, 
                   maxBinSize = maxBinSize,
                   biomart=biomart, goAnno = goAnno,
                   biomaRt_dataset = biomaRt_dataset,
                   multicore=multicore, outDir=outDir)

r4
bamFiles=c(
"PHR22r-TCF4.bam",
"PHR19r-Input.bam")
cNAME <- "IgG"
outDir <- file.path(getwd(), "RIPSeeker_TCF4_vs_Input_plus")
binSize <- 200     # automatically determine bin size
seekOut.CCNT1 <- ripSeek(bamPath = bamFiles, cNAME = cNAME,
                   reverseComplement = TRUE, genomeBuild = "hg19",
		   strandType = "+",  
                   uniqueHit = TRUE, assignMultihits = TRUE, 
                   rerunWithDisambiguatedMultihits = TRUE,
                   binSize=binSize, minBinSize = minBinSize, 
                   maxBinSize = maxBinSize,
                   biomart=biomart, goAnno = goAnno,
                   biomaRt_dataset = biomaRt_dataset,
                   multicore=multicore, outDir=outDir)


r2
bamFiles=c(
"PHR22r-TCF4.bam",
"PHR24r-IgG.bam")
cNAME <- "IgG"
outDir <- file.path(getwd(), "RIPSeeker_TCF4_vs_IgG_minus")
binSize <- 200     # automatically determine bin size
seekOut.CCNT1 <- ripSeek(bamPath = bamFiles, cNAME = cNAME,
                   reverseComplement = TRUE, genomeBuild = "hg19",
		   strandType = "-",  
                   uniqueHit = TRUE, assignMultihits = TRUE, 
                   rerunWithDisambiguatedMultihits = TRUE,
                   binSize=binSize, minBinSize = minBinSize, 
                   maxBinSize = maxBinSize,
                   biomart=biomart, goAnno = goAnno,
                   biomaRt_dataset = biomaRt_dataset,
                   multicore=multicore, outDir=outDir)


r1
bamFiles=c(
"PHR22r-TCF4.bam",
"PHR24r-IgG.bam")
cNAME <- "IgG"
outDir <- file.path(getwd(), "RIPSeeker_TCF4_vs_IgG_plus") 
binSize <- 200     # automatically determine bin size
minBinSize <- 150	 # min bin size in automatic bin size selection
maxBinSize <- 300	 # max bin size in automatic bin size selection
multicore <- TRUE		 # use multicore
biomart <- "ensembl"           # use archive to get ensembl 65
biomaRt_dataset <- "hsapiens_gene_ensembl" # human dataset id name	
goAnno <- "org.Hs.eg.db"                    # GO annotation database
seekOut.CCNT1 <- ripSeek(bamPath = bamFiles, cNAME = cNAME,
                   reverseComplement = TRUE, genomeBuild = "hg19",
		   strandType = "+",  
                   uniqueHit = TRUE, assignMultihits = TRUE, 
                   rerunWithDisambiguatedMultihits = TRUE,
                   binSize=binSize, minBinSize = minBinSize, 
                   maxBinSize = maxBinSize,
                   biomart=biomart, goAnno = goAnno,
                   biomaRt_dataset = biomaRt_dataset,
                   multicore=multicore, outDir=outDir)


#@ below here, before PH meeting, 11052017
viewRIP(seekOut.CCNT1$RIPGRList$chr2, seekOut.CCNT1$mainSeekOutputRIP$alignGal, seekOut.CCNT1$mainSeekOutputCTL$alignGal, scoreType="eFDR")
UCSCView of :33031597-33041570 
trackNames(13): 'aligned RIP reads' 'aligned CTL reads' ... 'RepeatMasker'

df <- elementMetadata(seekOut.CCNT1$annotatedRIPGR$sigGRangesAnnotated)

r9
bamFiles=c(
"PHR22r-TCF4.bam",
"PHR24r-IgG.bam")
cNAME <- "IgG"
minBinSize <- 50	 # min bin size in automatic bin size selection
maxBinSize <- 205	 # max bin size in automatic bin size selection
outDir <- file.path(getwd(), "RIPSeeker_TCF4_vs_IgG_bothStrands2")


r8
bamFiles=c(
"PHR21r-bcatMono.bam",
"PHR24r-IgG.bam")
cNAME <- "IgG"
minBinSize <- 50	 # min bin size in automatic bin size selection
maxBinSize <- 205	 # max bin size in automatic bin size selection
outDir <- file.path(getwd(), "RIPSeeker_bcatMono_vs_IgG_bothStrands2")

r5
bamFiles=c(
"PHR22r-TCF4.bam",
"PHR19r-Input.bam")
cNAME <- "Input"
minBinSize <- 50	 # min bin size in automatic bin size selection
maxBinSize <- 250	 # max bin size in automatic bin size selection
outDir <- file.path(getwd(), "RIPSeeker_TCF4_vs_Input_bothStrands2")

r4
bamFiles=c(
"PHR23r-FUBP2.bam",
"PHR19r-Input.bam")
cNAME <- "Input"
minBinSize <- 50	 # min bin size in automatic bin size selection
maxBinSize <- 250	 # max bin size in automatic bin size selection
outDir <- file.path(getwd(), "RIPSeeker_FUBP2_vs_Input_bothStrands2")

bamFiles=c(
"PHR21r-bcatMono.bam",
"PHR19r-Input.bam")
cNAME <- "Input"
minBinSize <- 50	 # min bin size in automatic bin size selection
maxBinSize <- 250	 # max bin size in automatic bin size selection
outDir <- file.path(getwd(), "RIPSeeker_bcatMono_vs_Input_bothStrands")

bamFiles=c(
"PHR20r-bcatPoly.bam",
"PHR24r-IgG.bam")
cNAME <- "IgG"
minBinSize <- 50	 # min bin size in automatic bin size selection
maxBinSize <- 250	 # max bin size in automatic bin size selection
outDir <- file.path(getwd(), "RIPSeeker_bcatPoly_vs_IgG_bothStrands")

bamFiles=c(
"PHR20r-bcatPoly.bam",
"PHR19r-Input.bam")
cNAME <- "Input"
minBinSize <- 50	 # min bin size in automatic bin size selection
maxBinSize <- 250	 # max bin size in automatic bin size selection
outDir <- file.path(getwd(), "RIPSeeker_bcatPoly_vs_Input_bothStrands")


bamFiles=c(
"PHR23r-FUBP2.bam",
"PHR24r-IgG.bam")
cNAME <- "IgG"
minBinSize <- 50	 # min bin size in automatic bin size selection
maxBinSize <- 250	 # max bin size in automatic bin size selection
outDir <- file.path(getwd(), "RIPSeeker_FUBP2_vs_IgG_bothStrands2")

bamFiles=c(
"PHR23r-FUBP2.bam",
"PHR24r-IgG.bam")
cNAME <- "IgG"
minBinSize <- 20	 # min bin size in automatic bin size selection
maxBinSize <- 60	 # max bin size in automatic bin size selection
outDir <- file.path(getwd(), "RIPSeeker_FUBP2_vs_IgG_bothStrands3")


require("RIPSeeker")
bamFiles=c(
"PHR23r-FUBP2.bam",
"PHR19r-Input.bam")
cNAME <- "Input"
outDir <- file.path(getwd(), "RIPSeeker_FUBP2_vs_Input_bothStrands")
binSize <- NULL     # automatically determine bin size
minBinSize <- 150	 # min bin size in automatic bin size selection
maxBinSize <- 300	 # max bin size in automatic bin size selection
multicore <- TRUE		 # use multicore
biomart <- "ensembl"           # use archive to get ensembl 65
biomaRt_dataset <- "hsapiens_gene_ensembl" # human dataset id name	
goAnno <- "org.Hs.eg.db"                    # GO annotation database
seekOut.CCNT1 <- ripSeek(bamPath = bamFiles, cNAME = cNAME,
                   reverseComplement = TRUE, genomeBuild = "hg19", 
                   uniqueHit = TRUE, assignMultihits = TRUE, 
                   rerunWithDisambiguatedMultihits = TRUE,
                   binSize=binSize, minBinSize = minBinSize, 
                   maxBinSize = maxBinSize,
                   biomart=biomart, goAnno = goAnno,
                   biomaRt_dataset = biomaRt_dataset,
                   multicore=multicore, outDir=outDir)

bamFiles=c(
"PHR22r-TCF4.bam",
"PHR19r-Input.bam")
cNAME <- "Input"
outDir <- file.path(getwd(), "RIPSeeker_TCF4_vs_Input_bothStrands")
binSize <- NULL     # automatically determine bin size
minBinSize <- 150	 # min bin size in automatic bin size selection
maxBinSize <- 300	 # max bin size in automatic bin size selection
multicore <- TRUE		 # use multicore
biomart <- "ensembl"           # use archive to get ensembl 65
biomaRt_dataset <- "hsapiens_gene_ensembl" # human dataset id name	
goAnno <- "org.Hs.eg.db"                    # GO annotation database
seekOut.CCNT1 <- ripSeek(bamPath = bamFiles, cNAME = cNAME,
                   reverseComplement = TRUE, genomeBuild = "hg19", 
                   uniqueHit = TRUE, assignMultihits = TRUE, 
                   rerunWithDisambiguatedMultihits = TRUE,
                   binSize=binSize, minBinSize = minBinSize, 
                   maxBinSize = maxBinSize,
                   biomart=biomart, goAnno = goAnno,
                   biomaRt_dataset = biomaRt_dataset,
                   multicore=multicore, outDir=outDir)


bamFiles=c(
"PHR21r-bcatMono.bam",
"PHR24r-IgG.bam")
cNAME <- "IgG"
outDir <- file.path(getwd(), "RIPSeeker_bcatMono_vs_IgG_bothStrands")
binSize <- NULL     # automatically determine bin size
minBinSize <- 150	 # min bin size in automatic bin size selection
maxBinSize <- 300	 # max bin size in automatic bin size selection
multicore <- TRUE		 # use multicore
biomart <- "ensembl"           # use archive to get ensembl 65
biomaRt_dataset <- "hsapiens_gene_ensembl" # human dataset id name	
goAnno <- "org.Hs.eg.db"                    # GO annotation database
seekOut.CCNT1 <- ripSeek(bamPath = bamFiles, cNAME = cNAME,
                   reverseComplement = TRUE, genomeBuild = "hg19", 
                   uniqueHit = TRUE, assignMultihits = TRUE, 
                   rerunWithDisambiguatedMultihits = TRUE,
                   binSize=binSize, minBinSize = minBinSize, 
                   maxBinSize = maxBinSize,
                   biomart=biomart, goAnno = goAnno,
                   biomaRt_dataset = biomaRt_dataset,
                   multicore=multicore, outDir=outDir)


bamFiles=c(
"PHR22r-TCF4.bam",
"PHR24r-IgG.bam")
cNAME <- "IgG"
outDir <- file.path(getwd(), "RIPSeeker_TCF4_vs_IgG_bothStrands")
binSize <- NULL     # automatically determine bin size
minBinSize <- 150	 # min bin size in automatic bin size selection
maxBinSize <- 300	 # max bin size in automatic bin size selection
multicore <- TRUE		 # use multicore
biomart <- "ensembl"           # use archive to get ensembl 65
biomaRt_dataset <- "hsapiens_gene_ensembl" # human dataset id name	
goAnno <- "org.Hs.eg.db"                    # GO annotation database
seekOut.CCNT1 <- ripSeek(bamPath = bamFiles, cNAME = cNAME,
                   reverseComplement = TRUE, genomeBuild = "hg19", 
                   uniqueHit = TRUE, assignMultihits = TRUE, 
                   rerunWithDisambiguatedMultihits = TRUE,
                   binSize=binSize, minBinSize = minBinSize, 
                   maxBinSize = maxBinSize,
                   biomart=biomart, goAnno = goAnno,
                   biomaRt_dataset = biomaRt_dataset,
                   multicore=multicore, outDir=outDir)

bamFiles=c(
"PHR23r-FUBP2.bam",
"PHR24r-IgG.bam")
cNAME <- "IgG"
outDir <- file.path(getwd(), "RIPSeeker_FUBP2_vs_IgG_bothStrands_noRC")
# Parameters setting
binSize <- NULL     # automatically determine bin size
minBinSize <- 200	 # min bin size in automatic bin size selection
maxBinSize <- 500	 # max bin size in automatic bin size selection
multicore <- TRUE		 # use multicore
biomart <- "ensembl"           # use archive to get ensembl 65
biomaRt_dataset <- "hsapiens_gene_ensembl" # human dataset id name	
goAnno <- "org.Hs.eg.db"                    # GO annotation database
seekOut.CCNT1 <- ripSeek(bamPath = bamFiles, cNAME = cNAME,
                   reverseComplement = TRUE, genomeBuild = "hg19", 
                   uniqueHit = TRUE, assignMultihits = TRUE, 
                   rerunWithDisambiguatedMultihits = TRUE,
                   binSize=binSize, minBinSize = minBinSize, 
                   maxBinSize = maxBinSize,
                   biomart=biomart, goAnno = goAnno,
                   biomaRt_dataset = biomaRt_dataset,
                   multicore=multicore, outDir=outDir)


#@
outDir <- file.path(getwd(), "RIPSeeker_FUBP2_vs_IgG_minus")
seekOut.CCNT1 <- ripSeek(bamPath = bamFiles, cNAME = cNAME,
                   reverseComplement = TRUE, genomeBuild = "hg19", 
                   strandType = "-", 
                   uniqueHit = TRUE, assignMultihits = TRUE, 
                   rerunWithDisambiguatedMultihits = TRUE,
                   binSize=binSize, minBinSize = minBinSize, 
                   maxBinSize = maxBinSize,
                   biomart=biomart, goAnno = goAnno,
                   biomaRt_dataset = biomaRt_dataset,
                   multicore=multicore, outDir=outDir)


#@
bamFiles=c(
"PHR21r-bcatMono.bam",
"PHR23r-FUBP2.bam",
"PHR20r-bcatPoly.bam",
"PHR22r-TCF4.bam",
"PHR24r-IgG.bam")
cNAME <- "IgG"


bamFiles=c(
"PHR19r-Input.bam",
"PHR21r-bcatMono.bam",
"PHR23r-FUBP2.bam",
"PHR20r-bcatPoly.bam",
"PHR22r-TCF4.bam")
cNAME <- "Input"

# output file directory
outDir <- file.path(getwd(), "RIPSeeker_IgG1")

# Parameters setting
binSize <- 10000     # automatically determine bin size
minBinSize <- NULL	 # min bin size in automatic bin size selection
maxBinSize <- NULL	 # max bin size in automatic bin size selection
multicore <- TRUE		 # use multicore
strandType <- "+"		 # set strand type to minus strand

biomart <- "ensembl"           # use archive to get ensembl 65
biomaRt_dataset <- "hsapiens_gene_ensembl" # human dataset id name	
goAnno <- "org.Hs.eg.db"                    # GO annotation database
seekOut.CCNT1 <- ripSeek(bamPath = bamFiles, cNAME = cNAME,
                   reverseComplement = TRUE, genomeBuild = "hg19", 
                   strandType = strandType, 
                   uniqueHit = TRUE, assignMultihits = TRUE, 
                   rerunWithDisambiguatedMultihits = TRUE,
                   binSize=binSize, minBinSize = minBinSize, 
                   maxBinSize = maxBinSize,
                   biomart=biomart, goAnno = goAnno,
                   biomaRt_dataset = biomaRt_dataset,
                   multicore=multicore, outDir=outDir)


#@

bamFiles=c(
"PHR23r-FUBP2.bam",
"PHR24r-IgG.bam")
cNAME <- "IgG"
outDir <- file.path(getwd(), "RIPSeeker_FUBP2_vs_IgG")
seekOut.CCNT1 <- ripSeek(bamPath = bamFiles, cNAME = cNAME,
                   reverseComplement = TRUE, genomeBuild = "hg19", 
                   uniqueHit = TRUE, assignMultihits = TRUE, 
                   rerunWithDisambiguatedMultihits = TRUE,
                   binSize=binSize, minBinSize = minBinSize, 
                   maxBinSize = maxBinSize,
                   biomart=biomart, goAnno = goAnno,
                   biomaRt_dataset = biomaRt_dataset,
                   multicore=multicore, outDir=outDir)


txDbName <- "biomart"
     biomart <- "ENSEMBL_MART_ENSEMBL"               # use archive to get ensembl 65
#     dataset <- "mmusculus_gene_ensembl"             
#     host <- "dec2011.archive.ensembl.org"   # use ensembl 65 for annotation
host="grch37.ensembl.org"
dataset="hsapiens_gene_ensembl"

     resultlist <- rulebaseRIPSeek(bamFiles, cNAME, dataset=dataset, 
                     txDbName=txDbName, biomart=biomart, host=host, by="tx",moreRIPGeneInfo=FALSE,moreGeneInfo=FALSE)
df <- resultlist$rpkmDF
write.table(df, file="/mnt/max/c/genomics_facility/proton/run341/www/RIPSeeker_FUBP2_vs_IgG_RPKM.csv", append=FALSE, quote=TRUE, sep = ";", eol = "\n", na = "NA", dec = ".", row.names=TRUE, col.names=TRUE, qmethod = c("escape", "double"), fileEncoding = "")
"ENST00000416931";3;1.146037227204;156;48.3139550209789;0.0237206253701724
"ENST00000457540";35;4.76417774527334;352;38.8448185373506;0.122646415266233
"ENST00000414273";48;4.42074761913041;616;45.9944805364481;0.0961147417596598

rpkmDFData frame containing read count, RPKM for the RIP (or treatment) and control, foldchange


ripRPKM <- computeRPKM(bamFiles[1], dataset="hsapiens_gene_ensembl", paired=FALSE, moreGeneInfo=TRUE, justRPKM=FALSE, idType="ensembl_transcript_id", featureType="exon",txDbName="biomart", biomart="ensembl", by="tx", saveData=file.path(outDir, "ripRPKM"))

rulebase.results <- rulebaseRIPSeek(bamFiles=bamFiles, cNAME="PHR24r-IgG.bam", myMin=1, featureGRanges=ripRPKM$featureGRanges, rpkmCutoff = 0.4, fcCutoff = 3, moreRIPGeneInfo=TRUE, idType="ensembl_transcript_id", biomart="ensembl",dataset="hsapiens_gene_ensembl", saveData=fiel.path(outDir,"rulebase.results"))

head(ripRPKM$rpkmDF)

df <- rulebase.results$rpkmDF

df <- df[order(df$foldchange, decreasing=TRUE), ]

write.table(df, file="/home/yourpath/csv", append=FALSE, quote=TRUE, sep = ";", eol = "\n", na = "NA", dec = ".", row.names=TRUE, col.names=TRUE, qmethod = c("escape", "double"), fileEncoding = "")

6) now_look_for_coverage

RipGal <- readGappedAlignments("/data1/home/yourpath/control_accepted_hits.bam")

CtlGal <- readGappedAlignments("/data1/home/yourpath/RIP_accepted_hits.bam")

ripGR <- as(RipGal, "GRanges")

ctlGR <- as(CtlGal, "GRanges")

ripGRList <- split(ripGR, seqnames(ripGR))


ripGRList <- ripGRList[elementLengths(ripGRList) != 0]

ctlGRList <- GRangesList(as.list(split(ctlGR, seqnames(ctlGR))))

ctlGRList <- ctlGRList[lapply(ctlGRList, length) != 0]

binSize <- 1000

#c(bottom, left, top, right)

par(mfrow=c(1, 2), mar=c(2, 2, 2, 0) + 0.1)

tmp <- lapply(ripGRList, plotStrandedCoverage, binSize=binSize, xlab="", ylab="", plotLegend=TRUE, legend.cex=1.5, main="RIP", cex.main=1.5)

tmp <- lapply(ctlGRList, plotStrandedCoverage, binSize=binSize, xlab="", ylab="", plotLegend=TRUE, legend.cex=1.5, main="CTL", cex.main=1.5)


#@


outDir <- file.path(getwd(), "RIPSeeker_FUBP2_vs_IgG_bothStrands")

# Parameters setting
binSize <- NULL     # automatically determine bin size
minBinSize <- 200	 # min bin size in automatic bin size selection
maxBinSize <- 1000	 # max bin size in automatic bin size selection
multicore <- TRUE		 # use multicore
biomart <- "ensembl"           # use archive to get ensembl 65
biomaRt_dataset <- "hsapiens_gene_ensembl" # human dataset id name	
goAnno <- "org.Hs.eg.db"                    # GO annotation database
seekOut.CCNT1 <- ripSeek(bamPath = bamFiles, cNAME = cNAME,
                   reverseComplement = TRUE, genomeBuild = "hg19", 
                   uniqueHit = TRUE, assignMultihits = TRUE, 
                   rerunWithDisambiguatedMultihits = TRUE,
                   binSize=binSize, minBinSize = minBinSize, 
                   maxBinSize = maxBinSize,
                   biomart=biomart, goAnno = goAnno,
                   biomaRt_dataset = biomaRt_dataset,
                   multicore=multicore, outDir=outDir)

#@
txDbName <- "biomart"
     biomart <- "ENSEMBL_MART_ENSEMBL"               # use archive to get ensembl 65
#     dataset <- "mmusc