<- read.table(file = "../data/counts_raw.txt", sep = "\t", header = T)
gc_raw <- read.table(file = "../data/counts_filtered.txt", sep = "\t", header = T)
gc_filt <- read.table(file = "../data/counts_vst.txt", sep = "\t", header = T)
gc_vst <- read.table(file = "../data/counts_deseq2.txt", sep = "\t", header = T)
gc_deseq <- read.table("../data/metadata.csv", header = T, sep = ";")
md rownames(md) <- md$Sample_ID
<- function(x) sqrt(var(x)/length(x))
se
<-
gene_counts_all %>% gather(Sample_ID, Raw, -Gene) %>%
gc_raw full_join(gc_filt %>% gather(Sample_ID, Filtered, -Gene), by = c("Gene", "Sample_ID")) %>%
full_join(gc_vst %>% gather(Sample_ID, VST, -Gene), by = c("Gene", "Sample_ID")) %>%
full_join(gc_deseq %>% gather(Sample_ID, DESeq2, -Gene), by = c("Gene", "Sample_ID")) %>%
gather(Method, count, Raw:DESeq2) %>%
filter(!is.na(count)) %>%
full_join(md, by = "Sample_ID")
$Time <- factor(gene_counts_all$Time, levels = c("t0","t2","t6","t24"))
gene_counts_all$Replicate <- factor(gene_counts_all$Replicate, levels = c("A","B","C"))
gene_counts_all$Method <- factor(gene_counts_all$Method, levels = c("Raw","Filtered","DESeq2","VST"))
gene_counts_all
%>%
gene_counts_all group_by(Time, Replicate, Method) %>%
summarise(mean=mean(log10(count +1)),se=se(log10(count +1))) %>%
ggplot(aes(x= Time, y= mean, fill = Replicate)) +
geom_bar(position = position_dodge2(), stat = "identity") +
geom_errorbar(aes(ymin=mean-se, ymax=mean+se), position = position_dodge2(.9, padding = .6)) +
facet_wrap(~Method, scales = "free")
The solutions given below are just one way of obtaining the desired plots! There are probably several different ways you could code to get the same plots.
1 ggplot basics
1.1 Exercise I and II
For these exercises the solutions are already part of the material, all you need to do is to replace the filename counts_raw.txt
with each of the different files to look at the differences between different normalization methods.
1.2 Exercise III
1.2.1 Task 3.1
1.2.2 Task 3.2
%>%
gene_counts_all group_by(Time, Replicate, Method) %>%
ggplot() +
geom_boxplot(mapping = aes(x = Sample_Name, y = log10(count + 1), fill = Time)) +
facet_wrap(~Method*Replicate, ncol = 3, scales = "free")
2 Advanced ggplot
2.1 Exercise I
<- gc_raw %>%
gc_long gather(Sample_ID, count, -Gene) %>%
full_join(md, by = "Sample_ID") %>%
select(Sample_ID, everything()) %>%
select(-c(Gene,count), c(Gene,count))
$Sample_Name <- factor(gc_long$Sample_Name, levels = c("t0_A","t0_B","t0_C","t2_A","t2_B","t2_C","t6_A","t6_B","t6_C","t24_A","t24_B","t24_C"))
gc_long$Time <- factor(gc_long$Time, levels = c("t0","t2","t6","t24"))
gc_long$Replicate <- factor(gc_long$Replicate, levels = c("A","B","C"))
gc_long
%>%
gc_long group_by(Time, Replicate) %>%
summarise(mean=mean(log10(count +1)),se=se(log10(count +1))) %>%
ggplot(aes(x=Time, y=mean, color = Replicate)) +
facet_wrap(~Replicate) +
geom_line(aes(group=1), stat= "identity", size = 2) +
scale_x_discrete(limits= c("t0", "t2", "t24")) +
scale_y_continuous(limits = c(0.4,0.8), breaks = seq(0.4,0.8,0.05)) +
guides(color="none") +
ylab(label = "mean(log10(count + 1))") +
theme_light() +
theme(axis.text = element_text(face="bold", size=12),
axis.title = element_text(face="bold", color = "#C84DF9", size=14),
axis.ticks = element_blank())
2.2 Exercise II
library(ggpubr)
<- ggplot(data=iris,mapping=aes(x=Sepal.Length, y = Sepal.Width, color = Species)) +
p4 geom_point(size = 3, alpha = 0.6) +
theme_classic(base_size = 12) +
border()
<- ggplot(data=iris,mapping=aes(Sepal.Length, fill = Species)) +
d1 geom_density(alpha = 0.6) +
theme_classic() +
clean_theme() +
theme(legend.position = "none")
<- ggplot(data=iris,mapping=aes(Sepal.Width, fill = Species)) +
d2 geom_density(alpha = 0.6) +
theme_classic() +
clean_theme() +
theme(legend.position = "none") +
rotate()
ggarrange(d1, NULL, p4, d2,
ncol = 2, nrow = 2, align = "hv",
widths = c(3, 1), heights = c(1, 3),
common.legend = TRUE)
3 MDR and Gene Expression
3.1 Exercise I
row.names(gc_vst) <- gc_vst$Gene
$Gene <- NULL
gc_vst<- dist(t(gc_vst))
gc_dist <- cmdscale(gc_dist,eig=TRUE, k=2)
gc_mds <- gc_mds$eig
Eigenvalues <- Eigenvalues / sum(Eigenvalues)
Variance <- 100 * signif(Variance[1], 3)
Variance1 <- 100 * signif(Variance[2], 3)
Variance2
<- gc_mds$points %>%
gc_mds_long as.data.frame() %>%
rownames_to_column("Sample_ID") %>%
full_join(md, by = "Sample_ID")
$Sample_Name <- factor(gc_mds_long$Sample_Name, levels = c("t0_A","t0_B","t0_C","t2_A","t2_B","t2_C","t6_A","t6_B","t6_C","t24_A","t24_B","t24_C"))
gc_mds_long$Time <- factor(gc_mds_long$Time, levels = c("t0","t2","t6","t24"))
gc_mds_long$Replicate <- factor(gc_mds_long$Replicate, levels = c("A","B","C"))
gc_mds_long
ggplot(gc_mds_long, aes(x=V1, y=V2, color = Time)) +
geom_point(size = 3, aes(shape = Replicate)) +
xlab(paste("PCO1: ", Variance1, "%")) +
ylab(paste("PCO2: ", Variance2, "%")) +
geom_vline(xintercept = 0, linetype=2) +
geom_hline(yintercept = 0, linetype=2) +
theme_bw() +
theme(panel.grid.major = element_blank(),
panel.grid.minor = element_blank())
3.2 Exercise II
3.2.1 Task 2.1
<- read.table("../data/Time_t2_vs_t0.txt", sep = "\t", header = TRUE, row.names = 1)
t2_vs_t0
%>%
t2_vs_t0 ggplot() +
geom_point(aes(x = baseMean, y = log2FoldChange), color = "grey70") +
geom_point(data=filter(t2_vs_t0, padj < 0.05), aes(x = baseMean, y = log2FoldChange), color = "blue") +
geom_hline(yintercept = 0) +
scale_x_log10("Mean of normalized counts") +
ylab("log fold change") +
theme_bw(base_size = 14)
3.2.2 Task 2.2
library(ggrepel)
<- read.table("../data/human_biomaRt_annotation.csv", sep = ";", header = TRUE, row.names = 1)
gene_info names(gene_info) <- c('gene_id', 'gene_name')
<- t2_vs_t0 %>%
de_w_names rownames_to_column('gene_id') %>%
left_join(gene_info, by = "gene_id")
ggplot(de_w_names, aes(x = log2FoldChange, y = -log10(padj))) +
geom_point(color = "grey70") +
geom_text_repel(data=filter(de_w_names, padj < 0.05 & abs(log2FoldChange) > 1.5), aes(x = log2FoldChange, y = -log10(padj), label=gene_name)) +
geom_hline(yintercept = 1.3, linetype = 2) +
geom_point(data=filter(t2_vs_t0, padj < 0.05), color = "red") +
ylab("Adjusted P-values in -log10") +
theme_bw(base_size = 14)