MALAT1 chr11:65,265,233-65,273,939 st=+ r2 FUBP2-IgG minus 1000 r1 FUBP2-IgG minus 400 r8 all 200binSize vs Input r7 bamFiles=c( "PHR23r-FUBP2.bam", "PHR24r-IgG.bam") cNAME <- "IgG" outDir <- file.path(getwd(), "RIPSeeker_FUBP2_vs_IgG_minus2") binSize <- 200 # automatically determine bin size minBinSize <- 150 # min bin size in automatic bin size selection maxBinSize <- 300 # max bin size in automatic bin size selection multicore <- TRUE # use multicore biomart <- "ensembl" # use archive to get ensembl 65 biomaRt_dataset <- "hsapiens_gene_ensembl" # human dataset id name goAnno <- "org.Hs.eg.db" # GO annotation database seekOut.CCNT1 <- ripSeek(bamPath = bamFiles, cNAME = cNAME, reverseComplement = TRUE, genomeBuild = "hg19", strandType = "-", uniqueHit = TRUE, assignMultihits = TRUE, rerunWithDisambiguatedMultihits = TRUE, binSize=binSize, minBinSize = minBinSize, maxBinSize = maxBinSize, biomart=biomart, goAnno = goAnno, biomaRt_dataset = biomaRt_dataset, multicore=multicore, outDir=outDir) r6 bamFiles=c( "PHR23r-FUBP2.bam", "PHR24r-IgG.bam") cNAME <- "IgG" outDir <- file.path(getwd(), "RIPSeeker_FUBP2_vs_IgG_plus") binSize <- 200 # automatically determine bin size minBinSize <- 150 # min bin size in automatic bin size selection maxBinSize <- 300 # max bin size in automatic bin size selection multicore <- TRUE # use multicore biomart <- "ensembl" # use archive to get ensembl 65 biomaRt_dataset <- "hsapiens_gene_ensembl" # human dataset id name goAnno <- "org.Hs.eg.db" # GO annotation database seekOut.CCNT1 <- ripSeek(bamPath = bamFiles, cNAME = cNAME, reverseComplement = TRUE, genomeBuild = "hg19", strandType = "+", uniqueHit = TRUE, assignMultihits = TRUE, rerunWithDisambiguatedMultihits = TRUE, binSize=binSize, minBinSize = minBinSize, maxBinSize = maxBinSize, biomart=biomart, goAnno = goAnno, biomaRt_dataset = biomaRt_dataset, multicore=multicore, outDir=outDir) r5 bamFiles=c( "PHR22r-TCF4.bam", "PHR19r-Input.bam") cNAME <- "IgG" outDir <- file.path(getwd(), "RIPSeeker_TCF4_vs_Input_minus") binSize <- 200 # automatically determine bin size seekOut.CCNT1 <- ripSeek(bamPath = bamFiles, cNAME = cNAME, reverseComplement = TRUE, genomeBuild = "hg19", strandType = "-", uniqueHit = TRUE, assignMultihits = TRUE, rerunWithDisambiguatedMultihits = TRUE, binSize=binSize, minBinSize = minBinSize, maxBinSize = maxBinSize, biomart=biomart, goAnno = goAnno, biomaRt_dataset = biomaRt_dataset, multicore=multicore, outDir=outDir) r4 bamFiles=c( "PHR22r-TCF4.bam", "PHR19r-Input.bam") cNAME <- "IgG" outDir <- file.path(getwd(), "RIPSeeker_TCF4_vs_Input_plus") binSize <- 200 # automatically determine bin size seekOut.CCNT1 <- ripSeek(bamPath = bamFiles, cNAME = cNAME, reverseComplement = TRUE, genomeBuild = "hg19", strandType = "+", uniqueHit = TRUE, assignMultihits = TRUE, rerunWithDisambiguatedMultihits = TRUE, binSize=binSize, minBinSize = minBinSize, maxBinSize = maxBinSize, biomart=biomart, goAnno = goAnno, biomaRt_dataset = biomaRt_dataset, multicore=multicore, outDir=outDir) r2 bamFiles=c( "PHR22r-TCF4.bam", "PHR24r-IgG.bam") cNAME <- "IgG" outDir <- file.path(getwd(), "RIPSeeker_TCF4_vs_IgG_minus") binSize <- 200 # automatically determine bin size seekOut.CCNT1 <- ripSeek(bamPath = bamFiles, cNAME = cNAME, reverseComplement = TRUE, genomeBuild = "hg19", strandType = "-", uniqueHit = TRUE, assignMultihits = TRUE, rerunWithDisambiguatedMultihits = TRUE, binSize=binSize, minBinSize = minBinSize, maxBinSize = maxBinSize, biomart=biomart, goAnno = goAnno, biomaRt_dataset = biomaRt_dataset, multicore=multicore, outDir=outDir) r1 bamFiles=c( "PHR22r-TCF4.bam", "PHR24r-IgG.bam") cNAME <- "IgG" outDir <- file.path(getwd(), "RIPSeeker_TCF4_vs_IgG_plus") binSize <- 200 # automatically determine bin size minBinSize <- 150 # min bin size in automatic bin size selection maxBinSize <- 300 # max bin size in automatic bin size selection multicore <- TRUE # use multicore biomart <- "ensembl" # use archive to get ensembl 65 biomaRt_dataset <- "hsapiens_gene_ensembl" # human dataset id name goAnno <- "org.Hs.eg.db" # GO annotation database seekOut.CCNT1 <- ripSeek(bamPath = bamFiles, cNAME = cNAME, reverseComplement = TRUE, genomeBuild = "hg19", strandType = "+", uniqueHit = TRUE, assignMultihits = TRUE, rerunWithDisambiguatedMultihits = TRUE, binSize=binSize, minBinSize = minBinSize, maxBinSize = maxBinSize, biomart=biomart, goAnno = goAnno, biomaRt_dataset = biomaRt_dataset, multicore=multicore, outDir=outDir) #@ below here, before PH meeting, 11052017 viewRIP(seekOut.CCNT1$RIPGRList$chr2, seekOut.CCNT1$mainSeekOutputRIP$alignGal, seekOut.CCNT1$mainSeekOutputCTL$alignGal, scoreType="eFDR") UCSCView of :33031597-33041570 trackNames(13): 'aligned RIP reads' 'aligned CTL reads' ... 'RepeatMasker' df <- elementMetadata(seekOut.CCNT1$annotatedRIPGR$sigGRangesAnnotated) r9 bamFiles=c( "PHR22r-TCF4.bam", "PHR24r-IgG.bam") cNAME <- "IgG" minBinSize <- 50 # min bin size in automatic bin size selection maxBinSize <- 205 # max bin size in automatic bin size selection outDir <- file.path(getwd(), "RIPSeeker_TCF4_vs_IgG_bothStrands2") r8 bamFiles=c( "PHR21r-bcatMono.bam", "PHR24r-IgG.bam") cNAME <- "IgG" minBinSize <- 50 # min bin size in automatic bin size selection maxBinSize <- 205 # max bin size in automatic bin size selection outDir <- file.path(getwd(), "RIPSeeker_bcatMono_vs_IgG_bothStrands2") r5 bamFiles=c( "PHR22r-TCF4.bam", "PHR19r-Input.bam") cNAME <- "Input" minBinSize <- 50 # min bin size in automatic bin size selection maxBinSize <- 250 # max bin size in automatic bin size selection outDir <- file.path(getwd(), "RIPSeeker_TCF4_vs_Input_bothStrands2") r4 bamFiles=c( "PHR23r-FUBP2.bam", "PHR19r-Input.bam") cNAME <- "Input" minBinSize <- 50 # min bin size in automatic bin size selection maxBinSize <- 250 # max bin size in automatic bin size selection outDir <- file.path(getwd(), "RIPSeeker_FUBP2_vs_Input_bothStrands2") bamFiles=c( "PHR21r-bcatMono.bam", "PHR19r-Input.bam") cNAME <- "Input" minBinSize <- 50 # min bin size in automatic bin size selection maxBinSize <- 250 # max bin size in automatic bin size selection outDir <- file.path(getwd(), "RIPSeeker_bcatMono_vs_Input_bothStrands") bamFiles=c( "PHR20r-bcatPoly.bam", "PHR24r-IgG.bam") cNAME <- "IgG" minBinSize <- 50 # min bin size in automatic bin size selection maxBinSize <- 250 # max bin size in automatic bin size selection outDir <- file.path(getwd(), "RIPSeeker_bcatPoly_vs_IgG_bothStrands") bamFiles=c( "PHR20r-bcatPoly.bam", "PHR19r-Input.bam") cNAME <- "Input" minBinSize <- 50 # min bin size in automatic bin size selection maxBinSize <- 250 # max bin size in automatic bin size selection outDir <- file.path(getwd(), "RIPSeeker_bcatPoly_vs_Input_bothStrands") bamFiles=c( "PHR23r-FUBP2.bam", "PHR24r-IgG.bam") cNAME <- "IgG" minBinSize <- 50 # min bin size in automatic bin size selection maxBinSize <- 250 # max bin size in automatic bin size selection outDir <- file.path(getwd(), "RIPSeeker_FUBP2_vs_IgG_bothStrands2") bamFiles=c( "PHR23r-FUBP2.bam", "PHR24r-IgG.bam") cNAME <- "IgG" minBinSize <- 20 # min bin size in automatic bin size selection maxBinSize <- 60 # max bin size in automatic bin size selection outDir <- file.path(getwd(), "RIPSeeker_FUBP2_vs_IgG_bothStrands3") require("RIPSeeker") bamFiles=c( "PHR23r-FUBP2.bam", "PHR19r-Input.bam") cNAME <- "Input" outDir <- file.path(getwd(), "RIPSeeker_FUBP2_vs_Input_bothStrands") binSize <- NULL # automatically determine bin size minBinSize <- 150 # min bin size in automatic bin size selection maxBinSize <- 300 # max bin size in automatic bin size selection multicore <- TRUE # use multicore biomart <- "ensembl" # use archive to get ensembl 65 biomaRt_dataset <- "hsapiens_gene_ensembl" # human dataset id name goAnno <- "org.Hs.eg.db" # GO annotation database seekOut.CCNT1 <- ripSeek(bamPath = bamFiles, cNAME = cNAME, reverseComplement = TRUE, genomeBuild = "hg19", uniqueHit = TRUE, assignMultihits = TRUE, rerunWithDisambiguatedMultihits = TRUE, binSize=binSize, minBinSize = minBinSize, maxBinSize = maxBinSize, biomart=biomart, goAnno = goAnno, biomaRt_dataset = biomaRt_dataset, multicore=multicore, outDir=outDir) bamFiles=c( "PHR22r-TCF4.bam", "PHR19r-Input.bam") cNAME <- "Input" outDir <- file.path(getwd(), "RIPSeeker_TCF4_vs_Input_bothStrands") binSize <- NULL # automatically determine bin size minBinSize <- 150 # min bin size in automatic bin size selection maxBinSize <- 300 # max bin size in automatic bin size selection multicore <- TRUE # use multicore biomart <- "ensembl" # use archive to get ensembl 65 biomaRt_dataset <- "hsapiens_gene_ensembl" # human dataset id name goAnno <- "org.Hs.eg.db" # GO annotation database seekOut.CCNT1 <- ripSeek(bamPath = bamFiles, cNAME = cNAME, reverseComplement = TRUE, genomeBuild = "hg19", uniqueHit = TRUE, assignMultihits = TRUE, rerunWithDisambiguatedMultihits = TRUE, binSize=binSize, minBinSize = minBinSize, maxBinSize = maxBinSize, biomart=biomart, goAnno = goAnno, biomaRt_dataset = biomaRt_dataset, multicore=multicore, outDir=outDir) bamFiles=c( "PHR21r-bcatMono.bam", "PHR24r-IgG.bam") cNAME <- "IgG" outDir <- file.path(getwd(), "RIPSeeker_bcatMono_vs_IgG_bothStrands") binSize <- NULL # automatically determine bin size minBinSize <- 150 # min bin size in automatic bin size selection maxBinSize <- 300 # max bin size in automatic bin size selection multicore <- TRUE # use multicore biomart <- "ensembl" # use archive to get ensembl 65 biomaRt_dataset <- "hsapiens_gene_ensembl" # human dataset id name goAnno <- "org.Hs.eg.db" # GO annotation database seekOut.CCNT1 <- ripSeek(bamPath = bamFiles, cNAME = cNAME, reverseComplement = TRUE, genomeBuild = "hg19", uniqueHit = TRUE, assignMultihits = TRUE, rerunWithDisambiguatedMultihits = TRUE, binSize=binSize, minBinSize = minBinSize, maxBinSize = maxBinSize, biomart=biomart, goAnno = goAnno, biomaRt_dataset = biomaRt_dataset, multicore=multicore, outDir=outDir) bamFiles=c( "PHR22r-TCF4.bam", "PHR24r-IgG.bam") cNAME <- "IgG" outDir <- file.path(getwd(), "RIPSeeker_TCF4_vs_IgG_bothStrands") binSize <- NULL # automatically determine bin size minBinSize <- 150 # min bin size in automatic bin size selection maxBinSize <- 300 # max bin size in automatic bin size selection multicore <- TRUE # use multicore biomart <- "ensembl" # use archive to get ensembl 65 biomaRt_dataset <- "hsapiens_gene_ensembl" # human dataset id name goAnno <- "org.Hs.eg.db" # GO annotation database seekOut.CCNT1 <- ripSeek(bamPath = bamFiles, cNAME = cNAME, reverseComplement = TRUE, genomeBuild = "hg19", uniqueHit = TRUE, assignMultihits = TRUE, rerunWithDisambiguatedMultihits = TRUE, binSize=binSize, minBinSize = minBinSize, maxBinSize = maxBinSize, biomart=biomart, goAnno = goAnno, biomaRt_dataset = biomaRt_dataset, multicore=multicore, outDir=outDir) bamFiles=c( "PHR23r-FUBP2.bam", "PHR24r-IgG.bam") cNAME <- "IgG" outDir <- file.path(getwd(), "RIPSeeker_FUBP2_vs_IgG_bothStrands_noRC") # Parameters setting binSize <- NULL # automatically determine bin size minBinSize <- 200 # min bin size in automatic bin size selection maxBinSize <- 500 # max bin size in automatic bin size selection multicore <- TRUE # use multicore biomart <- "ensembl" # use archive to get ensembl 65 biomaRt_dataset <- "hsapiens_gene_ensembl" # human dataset id name goAnno <- "org.Hs.eg.db" # GO annotation database seekOut.CCNT1 <- ripSeek(bamPath = bamFiles, cNAME = cNAME, reverseComplement = TRUE, genomeBuild = "hg19", uniqueHit = TRUE, assignMultihits = TRUE, rerunWithDisambiguatedMultihits = TRUE, binSize=binSize, minBinSize = minBinSize, maxBinSize = maxBinSize, biomart=biomart, goAnno = goAnno, biomaRt_dataset = biomaRt_dataset, multicore=multicore, outDir=outDir) #@ outDir <- file.path(getwd(), "RIPSeeker_FUBP2_vs_IgG_minus") seekOut.CCNT1 <- ripSeek(bamPath = bamFiles, cNAME = cNAME, reverseComplement = TRUE, genomeBuild = "hg19", strandType = "-", uniqueHit = TRUE, assignMultihits = TRUE, rerunWithDisambiguatedMultihits = TRUE, binSize=binSize, minBinSize = minBinSize, maxBinSize = maxBinSize, biomart=biomart, goAnno = goAnno, biomaRt_dataset = biomaRt_dataset, multicore=multicore, outDir=outDir) #@ bamFiles=c( "PHR21r-bcatMono.bam", "PHR23r-FUBP2.bam", "PHR20r-bcatPoly.bam", "PHR22r-TCF4.bam", "PHR24r-IgG.bam") cNAME <- "IgG" bamFiles=c( "PHR19r-Input.bam", "PHR21r-bcatMono.bam", "PHR23r-FUBP2.bam", "PHR20r-bcatPoly.bam", "PHR22r-TCF4.bam") cNAME <- "Input" # output file directory outDir <- file.path(getwd(), "RIPSeeker_IgG1") # Parameters setting binSize <- 10000 # automatically determine bin size minBinSize <- NULL # min bin size in automatic bin size selection maxBinSize <- NULL # max bin size in automatic bin size selection multicore <- TRUE # use multicore strandType <- "+" # set strand type to minus strand biomart <- "ensembl" # use archive to get ensembl 65 biomaRt_dataset <- "hsapiens_gene_ensembl" # human dataset id name goAnno <- "org.Hs.eg.db" # GO annotation database seekOut.CCNT1 <- ripSeek(bamPath = bamFiles, cNAME = cNAME, reverseComplement = TRUE, genomeBuild = "hg19", strandType = strandType, uniqueHit = TRUE, assignMultihits = TRUE, rerunWithDisambiguatedMultihits = TRUE, binSize=binSize, minBinSize = minBinSize, maxBinSize = maxBinSize, biomart=biomart, goAnno = goAnno, biomaRt_dataset = biomaRt_dataset, multicore=multicore, outDir=outDir) #@ bamFiles=c( "PHR23r-FUBP2.bam", "PHR24r-IgG.bam") cNAME <- "IgG" outDir <- file.path(getwd(), "RIPSeeker_FUBP2_vs_IgG") seekOut.CCNT1 <- ripSeek(bamPath = bamFiles, cNAME = cNAME, reverseComplement = TRUE, genomeBuild = "hg19", uniqueHit = TRUE, assignMultihits = TRUE, rerunWithDisambiguatedMultihits = TRUE, binSize=binSize, minBinSize = minBinSize, maxBinSize = maxBinSize, biomart=biomart, goAnno = goAnno, biomaRt_dataset = biomaRt_dataset, multicore=multicore, outDir=outDir) txDbName <- "biomart" biomart <- "ENSEMBL_MART_ENSEMBL" # use archive to get ensembl 65 # dataset <- "mmusculus_gene_ensembl" # host <- "dec2011.archive.ensembl.org" # use ensembl 65 for annotation host="grch37.ensembl.org" dataset="hsapiens_gene_ensembl" resultlist <- rulebaseRIPSeek(bamFiles, cNAME, dataset=dataset, txDbName=txDbName, biomart=biomart, host=host, by="tx",moreRIPGeneInfo=FALSE,moreGeneInfo=FALSE) df <- resultlist$rpkmDF write.table(df, file="/mnt/max/c/genomics_facility/proton/run341/www/RIPSeeker_FUBP2_vs_IgG_RPKM.csv", append=FALSE, quote=TRUE, sep = ";", eol = "\n", na = "NA", dec = ".", row.names=TRUE, col.names=TRUE, qmethod = c("escape", "double"), fileEncoding = "") "ENST00000416931";3;1.146037227204;156;48.3139550209789;0.0237206253701724 "ENST00000457540";35;4.76417774527334;352;38.8448185373506;0.122646415266233 "ENST00000414273";48;4.42074761913041;616;45.9944805364481;0.0961147417596598 rpkmDFData frame containing read count, RPKM for the RIP (or treatment) and control, foldchange ripRPKM <- computeRPKM(bamFiles[1], dataset="hsapiens_gene_ensembl", paired=FALSE, moreGeneInfo=TRUE, justRPKM=FALSE, idType="ensembl_transcript_id", featureType="exon",txDbName="biomart", biomart="ensembl", by="tx", saveData=file.path(outDir, "ripRPKM")) rulebase.results <- rulebaseRIPSeek(bamFiles=bamFiles, cNAME="PHR24r-IgG.bam", myMin=1, featureGRanges=ripRPKM$featureGRanges, rpkmCutoff = 0.4, fcCutoff = 3, moreRIPGeneInfo=TRUE, idType="ensembl_transcript_id", biomart="ensembl",dataset="hsapiens_gene_ensembl", saveData=fiel.path(outDir,"rulebase.results")) head(ripRPKM$rpkmDF) df <- rulebase.results$rpkmDF df <- df[order(df$foldchange, decreasing=TRUE), ] write.table(df, file="/home/yourpath/csv", append=FALSE, quote=TRUE, sep = ";", eol = "\n", na = "NA", dec = ".", row.names=TRUE, col.names=TRUE, qmethod = c("escape", "double"), fileEncoding = "") 6) now_look_for_coverage RipGal <- readGappedAlignments("/data1/home/yourpath/control_accepted_hits.bam") CtlGal <- readGappedAlignments("/data1/home/yourpath/RIP_accepted_hits.bam") ripGR <- as(RipGal, "GRanges") ctlGR <- as(CtlGal, "GRanges") ripGRList <- split(ripGR, seqnames(ripGR)) ripGRList <- ripGRList[elementLengths(ripGRList) != 0] ctlGRList <- GRangesList(as.list(split(ctlGR, seqnames(ctlGR)))) ctlGRList <- ctlGRList[lapply(ctlGRList, length) != 0] binSize <- 1000 #c(bottom, left, top, right) par(mfrow=c(1, 2), mar=c(2, 2, 2, 0) + 0.1) tmp <- lapply(ripGRList, plotStrandedCoverage, binSize=binSize, xlab="", ylab="", plotLegend=TRUE, legend.cex=1.5, main="RIP", cex.main=1.5) tmp <- lapply(ctlGRList, plotStrandedCoverage, binSize=binSize, xlab="", ylab="", plotLegend=TRUE, legend.cex=1.5, main="CTL", cex.main=1.5) #@ outDir <- file.path(getwd(), "RIPSeeker_FUBP2_vs_IgG_bothStrands") # Parameters setting binSize <- NULL # automatically determine bin size minBinSize <- 200 # min bin size in automatic bin size selection maxBinSize <- 1000 # max bin size in automatic bin size selection multicore <- TRUE # use multicore biomart <- "ensembl" # use archive to get ensembl 65 biomaRt_dataset <- "hsapiens_gene_ensembl" # human dataset id name goAnno <- "org.Hs.eg.db" # GO annotation database seekOut.CCNT1 <- ripSeek(bamPath = bamFiles, cNAME = cNAME, reverseComplement = TRUE, genomeBuild = "hg19", uniqueHit = TRUE, assignMultihits = TRUE, rerunWithDisambiguatedMultihits = TRUE, binSize=binSize, minBinSize = minBinSize, maxBinSize = maxBinSize, biomart=biomart, goAnno = goAnno, biomaRt_dataset = biomaRt_dataset, multicore=multicore, outDir=outDir) #@ txDbName <- "biomart" biomart <- "ENSEMBL_MART_ENSEMBL" # use archive to get ensembl 65 # dataset <- "mmusc