The solutions given below are just one way of obtaining the desired plots! There are probably several different ways you could code to get the same plots.
All you need to do is to replace the filename raw_counts.txt
with each of the different files to look at the differences between different normalization methods.
Task Plot 1:
gc_raw <- read.table(file = "data/counts_raw.txt", sep = "\t", header = T)
gc_filt <- read.table(file = "data/counts_filtered.txt", sep = "\t", header = T)
gc_vst <- read.table(file = "data/counts_vst.txt", sep = "\t", header = T)
gc_deseq <- read.table(file = "data/counts_deseq2.txt", sep = "\t", header = T)
md <- read.table("data/metadata.csv", header = T, sep = ";")
gene_counts_all <-
gc_raw %>% gather(Sample_ID, Raw, -Gene) %>%
full_join(gc_filt %>% gather(Sample_ID, Filtered, -Gene), by = c("Gene", "Sample_ID")) %>%
full_join(gc_vst %>% gather(Sample_ID, VST, -Gene), by = c("Gene", "Sample_ID")) %>%
full_join(gc_deseq %>% gather(Sample_ID, DESeq2, -Gene), by = c("Gene", "Sample_ID")) %>%
gather(Method, count, Raw:DESeq2) %>%
filter(!is.na(count)) %>%
full_join(md, by = "Sample_ID")
gene_counts_all$Time <- factor(gene_counts_all$Time, levels = c("t0","t2","t6","t24"))
gene_counts_all$Replicate <- factor(gene_counts_all$Replicate, levels = c("A","B","C"))
gene_counts_all$Method <- factor(gene_counts_all$Method, levels = c("Raw","Filtered","DESeq2","VST"))
gene_counts_all %>%
group_by(Time, Replicate, Method) %>%
summarise(mean=mean(log10(count +1)),se=se(log10(count +1))) %>%
ggplot(aes(x= Time, y= mean, fill = Replicate)) +
geom_bar(position = position_dodge2(), stat = "identity") +
geom_errorbar(aes(ymin=mean-se, ymax=mean+se), position = position_dodge2(.9, padding = .6)) +
facet_wrap(~Method, scales = "free")
Task Plot 2:
gene_counts_all %>%
group_by(Time, Replicate, Method) %>%
ggplot() +
geom_boxplot(mapping = aes(x = Sample_Name, y = log10(count + 1), fill = Time)) +
facet_wrap(~Method*Replicate, ncol = 3, scales = "free")
gc_long %>%
group_by(Time, Replicate) %>%
summarise(mean=mean(log10(count +1)),se=se(log10(count +1))) %>%
ggplot(aes(x=Time, y=mean, color = Replicate)) +
facet_wrap(~Replicate) +
geom_line(aes(group=1), stat= "identity", size = 2) +
scale_x_discrete(limits= c("t0", "t2", "t24")) +
scale_y_continuous(limits = c(0.4,0.8), breaks = seq(0.4,0.8,0.05)) +
guides(color="none") +
ylab(label = "mean(log10(count + 1))") +
theme_light() +
theme(axis.text = element_text(face="bold", size=12),
axis.title = element_text(face="bold", color = "#C84DF9", size=14),
axis.ticks = element_blank())
p4 <- ggplot(data=iris,mapping=aes(x=Sepal.Length, y = Sepal.Width, color = Species)) +
geom_point(size = 3, alpha = 0.6) +
theme_classic(base_size = 12) +
border()
d1 <- ggplot(data=iris,mapping=aes(Sepal.Length, fill = Species)) +
geom_density(alpha = 0.6) +
theme_classic() +
clean_theme() +
theme(legend.position = "none")
d2 <- ggplot(data=iris,mapping=aes(Sepal.Width, fill = Species)) +
geom_density(alpha = 0.6) +
theme_classic() +
clean_theme() +
theme(legend.position = "none") +
rotate()
ggarrange(d1, NULL, p4, d2,
ncol = 2, nrow = 2, align = "hv",
widths = c(3, 1), heights = c(1, 3),
common.legend = TRUE)
Eigenvalues <- gc_mds$eig
Variance <- Eigenvalues / sum(Eigenvalues)
Variance1 <- 100 * signif(Variance[1], 3)
Variance2 <- 100 * signif(Variance[2], 3)
gc_mds_long <- gc_mds$points %>%
as.data.frame() %>%
rownames_to_column("Sample_ID") %>%
full_join(md, by = "Sample_ID")
gc_mds_long$Sample_Name <- factor(gc_mds_long$Sample_Name, levels = c("t0_A","t0_B","t0_C","t2_A","t2_B","t2_C","t6_A","t6_B","t6_C","t24_A","t24_B","t24_C"))
gc_mds_long$Time <- factor(gc_mds_long$Time, levels = c("t0","t2","t6","t24"))
gc_mds_long$Replicate <- factor(gc_mds_long$Replicate, levels = c("A","B","C"))
ggplot(gc_mds_long, aes(x=V1, y=V2, color = Time)) +
geom_point(size = 3, aes(shape = Replicate)) +
xlab(paste("PCO1: ", Variance1, "%")) +
ylab(paste("PCO2: ", Variance2, "%")) +
geom_vline(xintercept = 0, linetype=2) +
geom_hline(yintercept = 0, linetype=2) +
theme_bw() +
theme(panel.grid.major = element_blank(),
panel.grid.minor = element_blank())
sessionInfo()
## R version 4.1.3 (2022-03-10)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Ubuntu 18.04.6 LTS
##
## Matrix products: default
## BLAS: /usr/lib/x86_64-linux-gnu/openblas/libblas.so.3
## LAPACK: /usr/lib/x86_64-linux-gnu/libopenblasp-r0.2.20.so
##
## locale:
## [1] LC_CTYPE=C.UTF-8 LC_NUMERIC=C LC_TIME=C.UTF-8
## [4] LC_COLLATE=C.UTF-8 LC_MONETARY=C.UTF-8 LC_MESSAGES=C.UTF-8
## [7] LC_PAPER=C.UTF-8 LC_NAME=C LC_ADDRESS=C
## [10] LC_TELEPHONE=C LC_MEASUREMENT=C.UTF-8 LC_IDENTIFICATION=C
##
## attached base packages:
## [1] grid stats graphics grDevices utils datasets methods
## [8] base
##
## other attached packages:
## [1] shiny_1.7.2 ggrepel_0.9.1 wesanderson_0.3.6
## [4] gridExtra_2.3 jpeg_0.1-9 ggpubr_0.4.0
## [7] cowplot_1.1.1 ggthemes_4.2.4 scales_1.2.1
## [10] forcats_0.5.2 stringr_1.4.1 purrr_0.3.5
## [13] readr_2.1.3 tidyr_1.2.1 tibble_3.1.8
## [16] tidyverse_1.3.2 reshape2_1.4.4 ggplot2_3.3.6
## [19] formattable_0.2.1 kableExtra_1.3.4 dplyr_1.0.10
## [22] lubridate_1.8.0 leaflet_2.1.1 yaml_2.3.5
## [25] fontawesome_0.3.0.9000 captioner_2.2.3 bookdown_0.29
## [28] knitr_1.40
##
## loaded via a namespace (and not attached):
## [1] nlme_3.1-155 fs_1.5.2 webshot_0.5.4
## [4] httr_1.4.4 tools_4.1.3 backports_1.4.1
## [7] bslib_0.4.0 utf8_1.2.2 R6_2.5.1
## [10] DBI_1.1.3 mgcv_1.8-39 colorspace_2.0-3
## [13] withr_2.5.0 processx_3.7.0 tidyselect_1.2.0
## [16] compiler_4.1.3 cli_3.4.1 rvest_1.0.3
## [19] xml2_1.3.3 labeling_0.4.2 sass_0.4.2
## [22] callr_3.7.2 systemfonts_1.0.4 digest_0.6.29
## [25] rmarkdown_2.17 svglite_2.1.0 pkgconfig_2.0.3
## [28] htmltools_0.5.3 dbplyr_2.2.1 fastmap_1.1.0
## [31] highr_0.9 htmlwidgets_1.5.4 rlang_1.0.6
## [34] readxl_1.4.1 xaringan_0.26 rstudioapi_0.14
## [37] jquerylib_0.1.4 generics_0.1.3 farver_2.1.1
## [40] jsonlite_1.8.2 crosstalk_1.2.0 car_3.1-0
## [43] googlesheets4_1.0.1 magrittr_2.0.3 Matrix_1.5-1
## [46] Rcpp_1.0.9 munsell_0.5.0 fansi_1.0.3
## [49] abind_1.4-5 lifecycle_1.0.3 stringi_1.7.8
## [52] carData_3.0-5 plyr_1.8.7 promises_1.2.0.1
## [55] crayon_1.5.2 lattice_0.20-45 haven_2.5.1
## [58] splines_4.1.3 hms_1.1.2 ps_1.7.1
## [61] pillar_1.8.1 ggsignif_0.6.3 reprex_2.0.2
## [64] glue_1.6.2 evaluate_0.17 modelr_0.1.9
## [67] httpuv_1.6.6 vctrs_0.4.2 tzdb_0.3.0
## [70] cellranger_1.1.0 gtable_0.3.1 assertthat_0.2.1
## [73] cachem_1.0.6 xfun_0.33 mime_0.12
## [76] xtable_1.8-4 broom_1.0.1 later_1.3.0
## [79] rstatix_0.7.0 googledrive_2.0.0 viridisLite_0.4.1
## [82] gargle_1.2.1 memoise_2.0.1 ellipsis_0.3.2