r - Automatically order x axis on ggplot2 histogram in a nicely way -


i have dataset (but hundreds of samples):

data <- structure(list(sample = c("c001", "c001", "c001", "c001", "c001",                            "c001", "c001", "c001", "c001", "c001", "c001", "c001", "c001",                            "c002", "c002", "c002", "c002", "c002", "c002", "c002", "c002",                            "c002", "c002", "c002", "c002", "c002", "c003", "c003", "c003",                            "c003", "c003", "c003", "c003", "c003", "c003", "c003", "c003",                            "c003", "c003", "c004", "c004", "c004", "c004", "c004", "c004",                            "c004", "c004", "c004", "c004", "c004", "c004", "c004", "c007",                            "c007", "c007", "c007", "c007", "c007", "c007", "c007", "c007",                            "c007", "c007", "c007", "c007", "c009", "c009", "c009", "c009",                            "c009", "c009", "c009", "c009", "c009", "c009", "c009", "c009",                            "c009", "c011", "c011", "c011", "c011", "c011", "c011", "c011",                            "c011", "c011", "c011", "c011", "c011", "c011", "c012", "c012",                            "c012", "c012", "c012", "c012", "c012", "c012", "c012", "c012",                            "c012", "c012", "c012", "c014", "c014", "c014", "c014", "c014",                            "c014", "c014", "c014", "c014", "c014", "c014", "c014", "c014",                            "c015", "c015", "c015", "c015", "c015", "c015", "c015", "c015",                            "c015", "c015", "c015", "c015", "c015", "c016", "c016", "c016",                            "c016", "c016", "c016", "c016", "c016", "c016", "c016", "c016",                            "c016", "c016", "c018", "c018", "c018", "c018", "c018", "c018",                            "c018", "c018", "c018", "c018", "c018", "c018", "c018"), count = c(0l,                                                                                               130l, 0l, 10l, 0l, 20l, 568l, 23l, 6l, 77l, 616l, 230734l, 177l,                                                                                               10l, 6396l, 0l, 5747l, 0l, 208l, 115189l, 13130l, 1l, 38l, 200l,                                                                                               2604l, 3104l, 0l, 95476l, 0l, 3591l, 0l, 7l, 26359l, 83l, 5l,                                                                                               1l, 1521l, 36004l, 9779l, 12l, 852l, 0l, 13l, 5l, 329l, 152053l,                                                                                               288l, 2l, 0l, 0l, 530l, 1023l, 57l, 84l, 98060l, 122l, 0l, 8552l,                                                                                               668l, 209l, 7l, 0l, 155l, 10159l, 4934l, 15l, 47l, 83l, 1l, 0l,                                                                                               54l, 462l, 89l, 43l, 0l, 127476l, 2614l, 3659l, 12l, 1l, 1l,                                                                                               1061l, 0l, 84199l, 845l, 898l, 0l, 29l, 10l, 63l, 1834l, 87l,                                                                                               36l, 7l, 407l, 20167l, 39969l, 1429l, 51072l, 0l, 0l, 27l, 9560l,                                                                                               3643l, 2899l, 10l, 0l, 380l, 0l, 82l, 1543l, 55l, 765l, 25172l,                                                                                               29791l, 39805l, 922l, 6l, 843l, 5l, 110l, 0l, 174l, 134582l,                                                                                               575l, 15l, 65l, 37l, 19240l, 830l, 1l, 1l, 0l, 0l, 0l, 63l, 156446l,                                                                                               22l, 1l, 15l, 76l, 9710l, 793l, 128l, 4l, 1l, 2l, 0l, 1904l,                                                                                               199l, 98779l, 0l, 0l, 11436l, 91l, 1813l), class = structure(c(1l,                                                                                                                                                              2l, 3l, 4l, 5l, 6l, 7l, 8l, 9l, 11l, 12l, 13l, 14l, 1l, 2l, 3l,                                                                                                                                                              4l, 5l, 6l, 7l, 8l, 9l, 11l, 12l, 13l, 14l, 1l, 2l, 3l, 4l, 5l,                                                                                                                                                              6l, 7l, 8l, 9l, 11l, 12l, 13l, 14l, 1l, 2l, 3l, 4l, 5l, 6l, 7l,                                                                                                                                                              8l, 9l, 11l, 12l, 13l, 14l, 1l, 2l, 3l, 4l, 5l, 6l, 7l, 8l, 9l,                                                                                                                                                              11l, 12l, 13l, 14l, 1l, 2l, 3l, 4l, 5l, 6l, 7l, 8l, 9l, 11l,                                                                                                                                                              12l, 13l, 14l, 1l, 2l, 3l, 4l, 5l, 6l, 7l, 8l, 9l, 11l, 12l,                                                                                                                                                              13l, 14l, 1l, 2l, 3l, 4l, 5l, 6l, 7l, 8l, 9l, 11l, 12l, 13l,                                                                                                                                                              14l, 1l, 2l, 3l, 4l, 5l, 6l, 7l, 8l, 9l, 11l, 12l, 13l, 14l,                                                                                                                                                              1l, 2l, 3l, 4l, 5l, 6l, 7l, 8l, 9l, 11l, 12l, 13l, 14l, 1l, 2l,                                                                                                                                                              3l, 4l, 5l, 6l, 7l, 8l, 9l, 11l, 12l, 13l, 14l, 1l, 2l, 3l, 4l,                                                                                                                                                              5l, 6l, 7l, 8l, 9l, 11l, 12l, 13l, 14l), .label = c("a", "b",                                                                                                                                                                                                                  "c", "d", "e", "f", "g", "h", "i", "j", "k", "l", "m", "n"), class = "factor")), .names = c("sample",                                                                                                                                                                                                                                                                                                              "count", "class"), row.names = c(na, -156l), class = c("tbl_df",                                                                                                                                                                                                                                                                                                                                                                     "tbl", "data.frame")) 

and want plot histogram of data:

library(tidyverse) ggplot(data = data, aes(x = sample)) +   geom_bar(aes(y = count, fill = class), color = "black",             position = "fill", stat = "identity") 

enter image description here

but can see, bar not weel-ordered ans it's not easy compare different samples.

so reorganize @ hand make more "beautiful" (in ways)

data$sample <- factor(data$sample, levels = c("c001", "c014", "c009", "c018",                       "c012", "c004", "c016", "c002", "c015", "c011", "c003", "c007"))  ggplot(data = data, aes(x = sample)) +   geom_bar(aes(y = count, fill = class), color = "black",             position = "fill", stat = "identity") 

enter image description here

it's not best order it's easier compare proportions between similar samples.

at end, want make plots these (with facet_grid) let's start beginning.

enter image description here source

there no clear best way this. first thing have define sort of dissimilarity measure between samples. 1 minus correlation seems 1 (of many) possible candidate. can @ how order results based on similarity measure. hierarchical clustering gives possible order.

in following code used sample data ordered , complete. otherwise may have adjust.

# unique samples samples <- unique(data$sample) ## dissimilarity measure dm <- matrix(mapply(function(x, y) 1-cor(data[data$sample == x, ]$count, data[data$sample == y, ]$count),                      rep(samples, times = length(samples)),                     rep(samples, each = length(samples))), nrow = length(samples)) # single linkage clustering hc <- hclust(as.dist(dm), method = "single") # reorder data$sample <- factor(data$sample, levels = samples[hc$order]) # plot ggplot(data = data, aes(x = sample)) +   geom_bar(aes(y = count, fill = class), color = "black",             position = "fill", stat = "identity") 

ordered plot


Comments

Popular posts from this blog

Is there a better way to structure post methods in Class Based Views -

performance - Why is XCHG reg, reg a 3 micro-op instruction on modern Intel architectures? -

c# - Asp.net web api : redirect unauthorized requst to forbidden page -