wt=cbind(log2(1+(all_m0[,33])),log2(1+(all_m0[,17])))
ko=cbind(log2(1+(all_m0[,32])),log2(1+(all_m0[,16])))
tot=cbind(wt,ko)
#wiss={};  for (i in 2:20){k=kmeans(tot, centers=i, iter.max=200);wiss=c(wiss,k$tot.withinss);print(c(i, k$tot.withinss)) }
#plot(wiss)
#opt
i=12;
tot=cbind(wt,ko)
k=kmeans(tot, centers=i, iter.max=200);
pdf("m0_rna_vs_poly.pdf")
plot(k$centers[,1], k$centers[,2], pch="w", col="blue",xlab="log2(RPKM_abundance)",ylab="log2(RPKM_translation)",xlim=c(0,8.5),ylim=c(0,8.5))
points(k$centers[,3], k$centers[,4], pch="k", col="red") #+48 to get labels correct
dev.off()


#all2_m0=all_m0[(log2(1+all_m0[,17])<1)&(log2(1+all_m0[,16])<1),]
#ks.test(log2(1+(all2_m0[,33])),log2(1+(all2_m0[,32])))



wt=cbind(log2(1+(all_m1[,33])),log2(1+(all_m1[,17])))
ko=cbind(log2(1+(all_m1[,32])),log2(1+(all_m1[,16])))
i=12;
tot=cbind(wt,ko)
k=kmeans(tot, centers=i, iter.max=200);
pdf("m1_rna_vs_poly.pdf")
plot(k$centers[,1], k$centers[,2], pch="w", col="blue",xlab="log2(RPKM_abundance)",ylab="log2(RPKM_translation)",xlim=c(0,8.5),ylim=c(0,8.5))
points(k$centers[,3], k$centers[,4], pch="k", col="red") #+48 to get labels correct
dev.off()

wt=cbind(log2(1+(all_m2[,33])),log2(1+(all_m2[,17])))
ko=cbind(log2(1+(all_m2[,32])),log2(1+(all_m2[,16])))
i=12;
tot=cbind(wt,ko)
k=kmeans(tot, centers=i, iter.max=200);
pdf("m2_rna_vs_poly.pdf")
plot(k$centers[,1], k$centers[,2], pch="w", col="blue",xlab="log2(RPKM_abundance)",ylab="log2(RPKM_translation)",xlim=c(0,8.5),ylim=c(0,8.5))
points(k$centers[,3], k$centers[,4], pch="k", col="red") #+48 to get labels correct
dev.off()

wt=cbind(log2(1+(all_2h[,33])),log2(1+(all_2h[,17])))
ko=cbind(log2(1+(all_2h[,32])),log2(1+(all_2h[,16])))
i=12;
tot=cbind(wt,ko)
k=kmeans(tot, centers=i, iter.max=200);
#plotcluster(k, k$cluster)      #from library(fpc)
#plot (tot[,1],tot[,2])
#plot(k$centers[,1], k$centers[,2], pch=k$cluster+48, col=k$cluster) #+48 to get labels correc,xlim=c(0,8.5),ylim=c(0,8.5)t
#points(k$centers[,1], k$centers[,2], pch=k$cluster+48, col=k$cluster) #+48 to get labels correct
pdf("2h_rna_vs_poly.pdf")
plot(k$centers[,1], k$centers[,2], pch="w", col="blue",xlab="log2(RPKM_abundance)",ylab="log2(RPKM_translation)",xlim=c(0,8.5),ylim=c(0,8.5))
points(k$centers[,3], k$centers[,4], pch="k", col="red") #+48 to get labels correct
dev.off()


wt=cbind(log2(1+(all_6h[,33])),log2(1+(all_6h[,17])))
ko=cbind(log2(1+(all_6h[,32])),log2(1+(all_6h[,16])))
k=kmeans(tot, centers=i, iter.max=200);
pdf("6h_rna_vs_poly.pdf")
plot(k$centers[,1], k$centers[,2], pch="w", col="blue",xlab="log2(RPKM_abundance)",ylab="log2(RPKM_translation)",xlim=c(0,8.5),ylim=c(0,8.5))
points(k$centers[,3], k$centers[,4], pch="k", col="red") #+48 to get labels correct
dev.off()



library(MASS)
library(RColorBrewer)
rf <- colorRampPalette(rev(brewer.pal(11,'Spectral')))
r <- rf(32)


Welcome!
Here you will find daily news and tutorials about R, contributed by over 750 bloggers.
There are many ways to follow us -
By e-mail:
On Facebook:
If you are an R blogger yourself you are invited to add your own R content feed to this site (Non-English R bloggers should add themselves- here)
RSS Jobs for R-users

    Customer Success Representative
    Movement Building Analyst
    Business Intelligence Analyst
    Innovation Fellow
    Postdoctoral position Stats, Comp. Biol. @ Madrid, Spain.

Recent Posts

    Create Animation in R : Learn by Examples
    Predicting Car Battery Failure With R And H2O – Study
    Practical Data Science with R, half off sale!
    Rstudio & ThinkR roadshow – June 6 – Paris
    [R]eady for Production: a Joint Event with RStudio and EODA
    Royal Society of Biology: Introduction to Reproducible Analyses in R
    Spotlight on: Julia Silge, Stack Overflow
    Comparing Frequentist, Bayesian and Simulation methods and conclusions
    Analysing the HIV pandemic, Part 4: Classification of lab samples
    MRAN snapshots, and you
    Deep (learning) like Jacques Cousteau – Part 5 – Vector addition
    New Color Palette for R
    Easy quick PCA analysis in R
    Create a CLI for R with npm
    Bug when Creating Reference Maps with Choroplethr

Other sites

    SAS blogs
    Jobs for R-users

5 Ways to Do 2D Histograms in R
September 1, 2014
By Myles Harrison

(This article was first published on everyday analytics, and kindly contributed to R-bloggers)

Share
Tweet
Introduction

Lately I was trying to put together some 2D histograms in R and found that there are many ways to do it, with directions on how to do so scattered across the internet in blogs, forums and of course, Stackoverflow.

As such I thought I’d give each a go and also put all of them together here for easy reference while also highlighting their difference.

For those not “in the know” a 2D histogram is an extensions of the regular old histogram, showing the distribution of values in a data set across the range of two quantitative variables. It can be considered a special case of the heat map, where the intensity values are just the count of observations in the data set within a particular area of the 2D space (bucket or bin).

So, quickly, here are 5 ways to make 2D histograms in R, plus one additional figure which is pretty neat.

First and foremost I get the palette looking all pretty using RColorBrewer, and then chuck some normally distributed data into a data frame (because I’m lazy). Also one scatterplot to justify the use of histograms.

# Color housekeeping
library(RColorBrewer)
rf <- colorRampPalette(rev(brewer.pal(11,'Spectral')))
r <- rf(32)

# Create normally distributed data for plotting
x <- rnorm(mean=1.5, 5000)
y <- rnorm(mean=1.6, 5000)
df <- data.frame(x,y)

# Plot
plot(df, pch=16, col='black', cex=0.5)

Option 1: hexbin
The hexbin package slices the space into 2D hexagons and then counts the number of points in each hexagon. The nice thing about hexbin is that it provides a legend for you, which adding manually in R is always a pain. The default invocation provides a pretty sparse looking monochrome figure. Adding the colramp parameter with a suitable vector produced from colorRampPalette makes things nicer. The legend placement is a bit strange – I adjusted it after the fact though you just as well do so in the R code.

##### OPTION 1: hexbin from package 'hexbin' #######
library(hexbin)
# Create hexbin object and plot
h <- hexbin(df)
plot(h)
plot(h, colramp=rf)

Using the hexbinplot function provides greater flexibility, allowing specification of endpoints for the bin counting, and also allowing the provision of a transformation functions. Here I did log scaling. Also it appears to handle the legend placement better; no adjustment was required for these figures.

# hexbinplot function allows greater flexibility
hexbinplot(y~x, data=df, colramp=rf)
# Setting max and mins
hexbinplot(y~x, data=df, colramp=rf, mincnt=2, maxcnt=60)

# Scaling of legend - must provide both trans and inv functions
hexbinplot(y~x, data=df, colramp=rf, trans=log, inv=exp)

Option 2: hist2d
Another simple way to get a quick 2D histogram is to use the hist2d function from the gplots package. Again, the default invocation leaves a lot to be desired:

##### OPTION 2: hist2d from package 'gplots' #######
library(gplots)

# Default call
h2 <- hist2d(df)

Setting the colors and adjusting the bin sizing coarser yields a more desirable result. We can also scale so that the intensity is logarithmic as before.

# Coarser binsizing and add colouring
h2 <- hist2d(df, nbins=25, col=r)

# Scaling with log as before
h2 <- hist2d(df, nbins=25, col=r, FUN=function(x) log(length(x)))

Option 3: stat_2dbin from ggplot
And of course, where would a good R article be without reference to the ggplot way to do things? Here we can use the stat_bin2d function, other added to a ggplot object or as a type of geometry in the call to qplot.

##### OPTION 3: stat_bin2d from package 'ggplot' #######
library(ggplot2)

# Default call (as object)
p <- ggplot(df, aes(x,y))
h3 <- p + stat_bin2d()
h3

# Default call (using qplot)
qplot(x,y,data=df, geom='bin2d')

Again, we probably want to adjust the bin sizes to a desired number, and also ensure that ggplot uses our colours that we created before. The latter is done by adding the scale_fill_gradientn function with our colour vector as the colours argument. Log scaling is also easy to add using the trans parameter.

# Add colouring and change bins
h3 <- p + stat_bin2d(bins=25) + scale_fill_gradientn(colours=r)
h3

# Log scaling
h3 <- p + stat_bin2d(bins=25) + scale_fill_gradientn(colours=r, trans="log")
h3

Option 4: kde2d
Option #4 is to do kernel density estimation using kde2d from the MASS library. Here we are actually starting to stray from discrete bucketing of histograms to true density estimation, as this function does interpolation.
The default invocation uses n = 25 which is actually what we’ve been going with in this case. You can then plot the output using image().

Setting n higher does interpolation and we are into the realm of kernel density estimation, as you can set your “bin size” lower than how your data actually appear. Hadley Wickham notes that in R there are over 20 packages [PDF] with which to do density estimation so we’ll keep that to a separate discussion.

##### OPTION 4: kde2d from package 'MASS' #######
# Not a true heatmap as interpolated (kernel density estimation)




library(MASS)
library(RColorBrewer)
rf <- colorRampPalette(rev(brewer.pal(11,'Spectral')))
r <- rf(32)

# Default call 
k <- kde2d(log2(1+(all_m0[,33])),log2(1+(all_m0[,17])))
image(k, col=r)

# Adjust binning (interpolate - can be computationally intensive for large datasets)
k <- kde2d(log2(1+(all_m0[,33])),log2(1+(all_m0[,17])),n=100)
image(k, col=r)
# Contour plot overlayed on heat map image of results
#contour(k, add = TRUE)     # from base graphics package

png("dens_m0_ko_rna_vs_poly.png",width=1024,height=1024,pointsize = 22)
k <- kde2d(log2(1+(all_m0[,32])),log2(1+(all_m0[,16])),n=200,lims=c(0,13,0,13))
k2=k
k2$z=log10(k$z+1e-3)
image(k2, col=r,xlab="log2(RPKM_abundance)",ylab="log2(RPKM_translation)")
#contour(k2, add = TRUE,labcex=1)     # from base graphics package
contour(k2, add = TRUE,vfont = c("sans serif", "plain"),labcex=0.65)     # from base graphics package
dev.off()

png("dens_m0_wt_rna_vs_poly.png",width=1024,height=1024,pointsize = 22)
k <- kde2d(log2(1+(all_m0[,33])),log2(1+(all_m0[,17])),n=200,lims=c(0,13,0,13))
k2=k
k2$z=log10(k$z+1e-3)
image(k2, col=r,xlab="log2(RPKM_abundance)",ylab="log2(RPKM_translation)")
#contour(k2, add = TRUE,labcex=1)     # from base graphics package
contour(k2, add = TRUE,vfont = c("sans serif", "plain"),labcex=0.65)     # from base graphics package
dev.off()

png("dens_m1_ko_rna_vs_poly.png",width=1024,height=1024,pointsize = 22)
k <- kde2d(log2(1+(all_m1[,32])),log2(1+(all_m1[,16])),n=200,lims=c(0,13,0,13))
k2=k
k2$z=log10(k$z+1e-3)
image(k2, col=r,xlab="log2(RPKM_abundance)",ylab="log2(RPKM_translation)")
contour(k2, add = TRUE,vfont = c("sans serif", "bold"),labcex=0.65)     # from base graphics package
dev.off()
png("dens_m1_wt_rna_vs_poly.png",width=1024,height=1024,pointsize = 22)
k <- kde2d(log2(1+(all_m1[,33])),log2(1+(all_m1[,17])),n=200,lims=c(0,13,0,13))
k2=k
k2$z=log10(k$z+1e-3)
image(k2, col=r,xlab="log2(RPKM_abundance)",ylab="log2(RPKM_translation)")
contour(k2, add = TRUE,vfont = c("sans serif", "bold"),labcex=0.65)     # from base graphics package
dev.off()

png("dens_m2_ko_rna_vs_poly.png",width=1024,height=1024,pointsize = 22)
k <- kde2d(log2(1+(all_m2[,32])),log2(1+(all_m2[,16])),n=200,lims=c(0,13,0,13))
k2=k
k2$z=log10(k$z+1e-3)
image(k2, col=r,xlab="log2(RPKM_abundance)",ylab="log2(RPKM_translation)")
contour(k2, add = TRUE,vfont = c("sans serif", "bold"),labcex=0.65)     # from base graphics package
dev.off()
png("dens_m2_wt_rna_vs_poly.png",width=1024,height=1024,pointsize = 22)
k <- kde2d(log2(1+(all_m2[,33])),log2(1+(all_m2[,17])),n=200,lims=c(0,13,0,13))
k2=k
k2$z=log10(k$z+1e-3)
image(k2, col=r,xlab="log2(RPKM_abundance)",ylab="log2(RPKM_translation)")
contour(k2, add = TRUE,vfont = c("sans serif", "bold"),labcex=0.65)     # from base graphics package
dev.off()


png("dens_2h_ko_rna_vs_poly.png",width=1024,height=1024,pointsize = 22)
k <- kde2d(log2(1+(all_2h[,32])),log2(1+(all_2h[,16])),n=200,lims=c(0,13,0,13))
k2=k
k2$z=log10(k$z+1e-3)
image(k2, col=r,xlab="log2(RPKM_abundance)",ylab="log2(RPKM_translation)")
contour(k2, add = TRUE,vfont = c("sans serif", "bold"),labcex=0.65)     # from base graphics package
dev.off()
png("dens_2h_wt_rna_vs_poly.png",width=1024,height=1024,pointsize = 22)
k <- kde2d(log2(1+(all_2h[,33])),log2(1+(all_2h[,17])),n=200,lims=c(0,13,0,13))
k2=k
k2$z=log10(k$z+1e-3)
image(k2, col=r,xlab="log2(RPKM_abundance)",ylab="log2(RPKM_translation)")
contour(k2, add = TRUE,vfont = c("sans serif", "bold"),labcex=0.65)     # from base graphics package
dev.off()

png("dens_6h_ko_rna_vs_poly.png",width=1024,height=1024,pointsize = 22)
k <- kde2d(log2(1+(all_6h[,32])),log2(1+(all_6h[,16])),n=200,lims=c(0,13,0,13))
k2=k
k2$z=log10(k$z+1e-3)
image(k2, col=r,xlab="log2(RPKM_abundance)",ylab="log2(RPKM_translation)")
contour(k2, add = TRUE,vfont = c("sans serif", "bold"),labcex=0.65)     # from base graphics package
dev.off()
png("dens_6h_wt_rna_vs_poly.png",width=1024,height=1024,pointsize = 22)
k <- kde2d(log2(1+(all_6h[,33])),log2(1+(all_6h[,17])),n=200,lims=c(0,13,0,13))
k2=k
k2$z=log10(k$z+1e-3)
image(k2, col=r,xlab="log2(RPKM_abundance)",ylab="log2(RPKM_translation)")
#image(k2, col=r,xlab="log2(RPKM_abundance)",ylab="log2(RPKM_translation)")
contour(k2, add = TRUE,vfont = c("sans serif", "bold"),labcex=0.65)     # from base graphics package
dev.off()





#@ worse:
require(hexbin)
hbin=hexbin(log2(1+(all_m0[,33])),log2(1+(all_m0[,17])))
png("dens_m0_wt_rna_vs_poly.png")
plot(hbin)
dev.off()
hbin=hexbin(log2(1+(all_m0[,32])),log2(1+(all_m0[,16])))
png("dens_m0_ko_rna_vs_poly.png")
plot(hbin)
dev.off()

hbin=hexbin(log2(1+(all_6h[,33])),log2(1+(all_6h[,17])))
png("dens_m0_wt_rna_vs_poly.png")
plot(hbin)
dev.off()
hbin=hexbin(log2(1+(all_6h[,32])),log2(1+(all_6h[,16])))
png("dens_m0_ko_rna_vs_poly.png")
plot(hbin)
dev.off()
<
