r - Automatically order x axis on ggplot2 histogram in a nicely way -
i have dataset (but hundreds of samples):
data <- structure(list(sample = c("c001", "c001", "c001", "c001", "c001", "c001", "c001", "c001", "c001", "c001", "c001", "c001", "c001", "c002", "c002", "c002", "c002", "c002", "c002", "c002", "c002", "c002", "c002", "c002", "c002", "c002", "c003", "c003", "c003", "c003", "c003", "c003", "c003", "c003", "c003", "c003", "c003", "c003", "c003", "c004", "c004", "c004", "c004", "c004", "c004", "c004", "c004", "c004", "c004", "c004", "c004", "c004", "c007", "c007", "c007", "c007", "c007", "c007", "c007", "c007", "c007", "c007", "c007", "c007", "c007", "c009", "c009", "c009", "c009", "c009", "c009", "c009", "c009", "c009", "c009", "c009", "c009", "c009", "c011", "c011", "c011", "c011", "c011", "c011", "c011", "c011", "c011", "c011", "c011", "c011", "c011", "c012", "c012", "c012", "c012", "c012", "c012", "c012", "c012", "c012", "c012", "c012", "c012", "c012", "c014", "c014", "c014", "c014", "c014", "c014", "c014", "c014", "c014", "c014", "c014", "c014", "c014", "c015", "c015", "c015", "c015", "c015", "c015", "c015", "c015", "c015", "c015", "c015", "c015", "c015", "c016", "c016", "c016", "c016", "c016", "c016", "c016", "c016", "c016", "c016", "c016", "c016", "c016", "c018", "c018", "c018", "c018", "c018", "c018", "c018", "c018", "c018", "c018", "c018", "c018", "c018"), count = c(0l, 130l, 0l, 10l, 0l, 20l, 568l, 23l, 6l, 77l, 616l, 230734l, 177l, 10l, 6396l, 0l, 5747l, 0l, 208l, 115189l, 13130l, 1l, 38l, 200l, 2604l, 3104l, 0l, 95476l, 0l, 3591l, 0l, 7l, 26359l, 83l, 5l, 1l, 1521l, 36004l, 9779l, 12l, 852l, 0l, 13l, 5l, 329l, 152053l, 288l, 2l, 0l, 0l, 530l, 1023l, 57l, 84l, 98060l, 122l, 0l, 8552l, 668l, 209l, 7l, 0l, 155l, 10159l, 4934l, 15l, 47l, 83l, 1l, 0l, 54l, 462l, 89l, 43l, 0l, 127476l, 2614l, 3659l, 12l, 1l, 1l, 1061l, 0l, 84199l, 845l, 898l, 0l, 29l, 10l, 63l, 1834l, 87l, 36l, 7l, 407l, 20167l, 39969l, 1429l, 51072l, 0l, 0l, 27l, 9560l, 3643l, 2899l, 10l, 0l, 380l, 0l, 82l, 1543l, 55l, 765l, 25172l, 29791l, 39805l, 922l, 6l, 843l, 5l, 110l, 0l, 174l, 134582l, 575l, 15l, 65l, 37l, 19240l, 830l, 1l, 1l, 0l, 0l, 0l, 63l, 156446l, 22l, 1l, 15l, 76l, 9710l, 793l, 128l, 4l, 1l, 2l, 0l, 1904l, 199l, 98779l, 0l, 0l, 11436l, 91l, 1813l), class = structure(c(1l, 2l, 3l, 4l, 5l, 6l, 7l, 8l, 9l, 11l, 12l, 13l, 14l, 1l, 2l, 3l, 4l, 5l, 6l, 7l, 8l, 9l, 11l, 12l, 13l, 14l, 1l, 2l, 3l, 4l, 5l, 6l, 7l, 8l, 9l, 11l, 12l, 13l, 14l, 1l, 2l, 3l, 4l, 5l, 6l, 7l, 8l, 9l, 11l, 12l, 13l, 14l, 1l, 2l, 3l, 4l, 5l, 6l, 7l, 8l, 9l, 11l, 12l, 13l, 14l, 1l, 2l, 3l, 4l, 5l, 6l, 7l, 8l, 9l, 11l, 12l, 13l, 14l, 1l, 2l, 3l, 4l, 5l, 6l, 7l, 8l, 9l, 11l, 12l, 13l, 14l, 1l, 2l, 3l, 4l, 5l, 6l, 7l, 8l, 9l, 11l, 12l, 13l, 14l, 1l, 2l, 3l, 4l, 5l, 6l, 7l, 8l, 9l, 11l, 12l, 13l, 14l, 1l, 2l, 3l, 4l, 5l, 6l, 7l, 8l, 9l, 11l, 12l, 13l, 14l, 1l, 2l, 3l, 4l, 5l, 6l, 7l, 8l, 9l, 11l, 12l, 13l, 14l, 1l, 2l, 3l, 4l, 5l, 6l, 7l, 8l, 9l, 11l, 12l, 13l, 14l), .label = c("a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k", "l", "m", "n"), class = "factor")), .names = c("sample", "count", "class"), row.names = c(na, -156l), class = c("tbl_df", "tbl", "data.frame"))
and want plot histogram of data:
library(tidyverse) ggplot(data = data, aes(x = sample)) + geom_bar(aes(y = count, fill = class), color = "black", position = "fill", stat = "identity")
but can see, bar not weel-ordered ans it's not easy compare different samples.
so reorganize @ hand make more "beautiful" (in ways)
data$sample <- factor(data$sample, levels = c("c001", "c014", "c009", "c018", "c012", "c004", "c016", "c002", "c015", "c011", "c003", "c007")) ggplot(data = data, aes(x = sample)) + geom_bar(aes(y = count, fill = class), color = "black", position = "fill", stat = "identity")
it's not best order it's easier compare proportions between similar samples.
at end, want make plots these (with facet_grid
) let's start beginning.
there no clear best way this. first thing have define sort of dissimilarity measure between samples. 1 minus correlation seems 1 (of many) possible candidate. can @ how order results based on similarity measure. hierarchical clustering gives possible order.
in following code used sample data ordered , complete. otherwise may have adjust.
# unique samples samples <- unique(data$sample) ## dissimilarity measure dm <- matrix(mapply(function(x, y) 1-cor(data[data$sample == x, ]$count, data[data$sample == y, ]$count), rep(samples, times = length(samples)), rep(samples, each = length(samples))), nrow = length(samples)) # single linkage clustering hc <- hclust(as.dist(dm), method = "single") # reorder data$sample <- factor(data$sample, levels = samples[hc$order]) # plot ggplot(data = data, aes(x = sample)) + geom_bar(aes(y = count, fill = class), color = "black", position = "fill", stat = "identity")
Comments
Post a Comment