Solutions

Workshop on Advanced Data Visualization

Author

Lokesh Mano

Published

2026-May-08

The solutions given below are just one way of obtaining the desired plots! There are probably several different ways you could code to get the same plots.

1 ggplot basics

1.1 Exercise I and II

For these exercises the solutions are already part of the material, all you need to do is to replace the filename counts_raw.txt with each of the different files to look at the differences between different normalization methods.

1.2 Exercise III

1.2.1 Task 3.1

gc_raw <- read.table(file = "../data/counts_raw.txt", sep = "\t", header = T)
gc_filt <- read.table(file = "../data/counts_filtered.txt", sep = "\t", header = T)
gc_vst <- read.table(file = "../data/counts_vst.txt", sep = "\t", header = T)
gc_deseq <- read.table(file = "../data/counts_deseq2.txt", sep = "\t", header = T)
md <- read.table("../data/metadata.csv", header = T, sep = ";")
rownames(md) <- md$Sample_ID
se <- function(x) sqrt(var(x)/length(x))

gene_counts_all <- 
  gc_raw %>% gather(Sample_ID, Raw, -Gene) %>%
  full_join(gc_filt %>% gather(Sample_ID, Filtered, -Gene), by = c("Gene", "Sample_ID")) %>%
  full_join(gc_vst %>% gather(Sample_ID, VST, -Gene), by = c("Gene", "Sample_ID")) %>%
  full_join(gc_deseq %>% gather(Sample_ID, DESeq2, -Gene), by = c("Gene", "Sample_ID")) %>%
  gather(Method, count, Raw:DESeq2) %>%
  filter(!is.na(count)) %>%
  full_join(md, by = "Sample_ID")

gene_counts_all$Time <- factor(gene_counts_all$Time, levels = c("t0","t2","t6","t24"))
gene_counts_all$Replicate <- factor(gene_counts_all$Replicate, levels = c("A","B","C"))
gene_counts_all$Method <- factor(gene_counts_all$Method, levels = c("Raw","Filtered","DESeq2","VST"))

gene_counts_all %>% 
  group_by(Time, Replicate, Method) %>% 
  summarise(mean=mean(log10(count +1)),se=se(log10(count +1))) %>%
  ggplot(aes(x= Time, y= mean, fill = Replicate)) + 
  geom_bar(position = position_dodge2(), stat = "identity") +
  geom_errorbar(aes(ymin=mean-se, ymax=mean+se), position = position_dodge2(.9, padding = .6)) +
  facet_wrap(~Method, scales = "free")

1.2.2 Task 3.2

gene_counts_all %>% 
  group_by(Time, Replicate, Method) %>% 
  ggplot() +
  geom_boxplot(mapping = aes(x = Sample_Name, y = log10(count + 1), fill = Time)) +
  facet_wrap(~Method*Replicate, ncol = 3, scales = "free")

2 Advanced ggplot

2.1 Exercise I

gc_long <- gc_raw %>%
  gather(Sample_ID, count, -Gene) %>% 
  full_join(md, by = "Sample_ID") %>% 
  select(Sample_ID, everything()) %>% 
  select(-c(Gene,count), c(Gene,count)) 
gc_long$Sample_Name <- factor(gc_long$Sample_Name, levels = c("t0_A","t0_B","t0_C","t2_A","t2_B","t2_C","t6_A","t6_B","t6_C","t24_A","t24_B","t24_C"))
gc_long$Time <- factor(gc_long$Time, levels = c("t0","t2","t6","t24"))
gc_long$Replicate <- factor(gc_long$Replicate, levels = c("A","B","C"))

gc_long %>% 
  group_by(Time, Replicate) %>% 
  summarise(mean=mean(log10(count +1)),se=se(log10(count +1))) %>%
  ggplot(aes(x=Time, y=mean, color = Replicate)) +
  facet_wrap(~Replicate) +
  geom_line(aes(group=1), stat= "identity", size = 2) +
  scale_x_discrete(limits= c("t0", "t2", "t24")) +
  scale_y_continuous(limits = c(0.4,0.8), breaks = seq(0.4,0.8,0.05)) +
  guides(color="none") +
  ylab(label = "mean(log10(count + 1))") +
  theme_light() +
  theme(axis.text = element_text(face="bold", size=12),
        axis.title = element_text(face="bold", color = "#C84DF9", size=14),
        axis.ticks = element_blank())

2.2 Exercise II

library(ggpubr)
p4 <- ggplot(data=iris,mapping=aes(x=Sepal.Length, y = Sepal.Width, color = Species)) +
  geom_point(size = 3, alpha = 0.6) +
  theme_classic(base_size = 12) +
  border() 

d1 <- ggplot(data=iris,mapping=aes(Sepal.Length, fill = Species)) +
  geom_density(alpha = 0.6) +
  theme_classic() +
  clean_theme() +
  theme(legend.position = "none")

d2 <- ggplot(data=iris,mapping=aes(Sepal.Width, fill = Species)) +
  geom_density(alpha = 0.6) +
  theme_classic() +
  clean_theme() +
  theme(legend.position = "none") +
  rotate()

ggarrange(d1, NULL, p4, d2, 
          ncol = 2, nrow = 2,  align = "hv", 
          widths = c(3, 1), heights = c(1, 3),
          common.legend = TRUE)

3 PCA and Gene Expression

3.1 Exercise I

row.names(gc_vst) <- gc_vst$Gene
gc_vst$Gene <- NULL
gc_dist <- dist(t(gc_vst))
gc_mds <- cmdscale(gc_dist,eig=TRUE, k=2) 
Eigenvalues <- gc_mds$eig
Variance <- Eigenvalues / sum(Eigenvalues) 
Variance1 <- 100 * signif(Variance[1], 3)
Variance2 <- 100 * signif(Variance[2], 3)

gc_mds_long <- gc_mds$points %>%
  as.data.frame() %>%
  rownames_to_column("Sample_ID") %>%
  full_join(md, by = "Sample_ID")

gc_mds_long$Sample_Name <- factor(gc_mds_long$Sample_Name, levels = c("t0_A","t0_B","t0_C","t2_A","t2_B","t2_C","t6_A","t6_B","t6_C","t24_A","t24_B","t24_C"))
gc_mds_long$Time <- factor(gc_mds_long$Time, levels = c("t0","t2","t6","t24"))
gc_mds_long$Replicate <- factor(gc_mds_long$Replicate, levels = c("A","B","C"))

ggplot(gc_mds_long, aes(x=V1, y=V2, color = Time)) +
  geom_point(size = 3, aes(shape = Replicate)) +
  xlab(paste("PCO1: ", Variance1, "%")) +
  ylab(paste("PCO2: ", Variance2, "%")) +
  geom_vline(xintercept = 0, linetype=2) +
  geom_hline(yintercept = 0, linetype=2) +
  theme_bw() +
  theme(panel.grid.major = element_blank(), 
        panel.grid.minor = element_blank()) 

3.2 Exercise II

3.2.1 Task 2.1

t2_vs_t0 <- read.table("../data/Time_t2_vs_t0.txt", sep = "\t", header = TRUE, row.names = 1)

t2_vs_t0 %>%
  ggplot() +
  geom_point(aes(x = baseMean, y = log2FoldChange), color = "grey70") +
  geom_point(data=filter(t2_vs_t0, padj < 0.05), aes(x = baseMean, y = log2FoldChange), color = "blue") +
  geom_hline(yintercept = 0) +
  scale_x_log10("Mean of normalized counts") +
  ylab("log fold change") +
  theme_bw(base_size = 14) 

3.2.2 Task 2.2

library(ggrepel)
gene_info <- read.table("../data/human_biomaRt_annotation.csv", sep = ";", header = TRUE, row.names = 1)
names(gene_info) <- c('gene_id', 'gene_name')

de_w_names <- t2_vs_t0 %>%
  rownames_to_column('gene_id') %>%
  left_join(gene_info, by = "gene_id")

ggplot(de_w_names, aes(x = log2FoldChange, y = -log10(padj))) +
  geom_point(color = "grey70") +
  geom_text_repel(data=filter(de_w_names, padj < 0.05 & abs(log2FoldChange) > 1.5), aes(x = log2FoldChange, y = -log10(padj), label=gene_name)) +
  geom_hline(yintercept = 1.3, linetype = 2) +
  geom_point(data=filter(t2_vs_t0, padj < 0.05), color = "red") +
  ylab("Adjusted P-values in -log10") +
  theme_bw(base_size = 14) 

4 Dynamic Plotting

4.1 Exercise 2.1

library(palmerpenguins)
library(leaflet)

island_colors <- c("darkorange","purple","green")

leaflet() %>%
  addTiles() %>%
  #addProviderTiles(providers$Esri.WorldImagery) %>%
  setView(lng = -65.5, lat = -65.5, zoom = 6) %>%
  addCircleMarkers(data = island_coordinates, popup = c("Biscoe", "Dream", "Torgersen"), color = island_colors) %>%
  addLegend(position = "bottomright", colors = island_colors, labels = c("Biscoe", "Dream", "Torgersen"), title = "Islands")

4.2 Exercise 3.1

library(ggplot2)
library(plotly)

plot_ly(msleep, x = ~sleep_rem, y = ~sleep_total, 
    color = ~vore, colors = "Set1", size=2, type = "scatter", mode = "markers")
library(ggplot2)
library(plotly)

p1 <- plot_ly(msleep, x = ~vore, color = ~vore, colors = c("#88CCEE", "#CC6677", "#DDCC77", "#117733")) %>%
  add_histogram()

p2 <- plot_ly(msleep,  x = ~vore, y = ~sleep_total, color = ~vore, type = "box", size=2, colors = c("#88CCEE", "#CC6677", "#DDCC77", "#117733"), showlegend = F)

fig <- subplot(p1, p2)

fig %>% 
    layout(title = "Mammals Sleep Data",
    xaxis = list(title = "Diet"), 
    yaxis = list(title = "Number of mammals"),
    xaxis2 = list(title = "Diet"), 
    yaxis2 = list(title = "Total sleep (hours)"), 
    legend=list(title=list(text="Diet")))

4.3 Exercise 3.2

library(ggplot2)
library(plotly)

# Generate a random matrix with 10 rows and 10 columns using `matrix()` function
data <- matrix(rnorm(100), nrow = 10, ncol = 10,
               dimnames = list(paste0("Gene", 1:10), paste0("Sample", 1:10)))

plot_ly(
  x = colnames(data),
  y = rownames(data),
  z = data,
  type = "heatmap",
  colorscale = "Viridis"
) %>%
  layout(
    title = "Random plotly heatmap",
    xaxis = list(title = "Samples"),
    yaxis = list(title = "Genes")
  )

4.4 Exercise 4.1

library(ggiraph)
library(ggplot)

PlotData <- ggplot(msleep,aes(x = vore, y = sleep_total, color = vore, tooltip = vore)) +
    geom_boxplot_interactive() +
    theme_minimal() +
    scale_color_manual(values = c("#88CCEE", "#CC6677", "#DDCC77", "#117733", "#999999")) + 
    labs(title = "Mammals Sleep Data", x = "Diet", y = "Total Sleep (hours)", color = "Diet")
  
girafe(ggobj = PlotData)

4.5 Exercise 4.2

library(patchwork)

ggiraph1 <- ggplot(msleep,aes(x = vore, y = sleep_total, color = vore)) +
    geom_boxplot_interactive(aes(tooltip = vore, data_id = vore)) +
    theme_minimal() +
    scale_color_manual(values = c("#88CCEE", "#CC6677", "#DDCC77", "#117733", "#999999")) + 
    labs(title = "Mammals Sleep Data", x = "Diet", y = "Total Sleep (hours)", color = "Diet")


ggiraph2 <- ggplot(msleep, aes(x = sleep_rem, y = sleep_total, color = vore)) +
    geom_point_interactive(aes(tooltip = name, data_id = vore)) +
    theme_minimal() +
    scale_color_manual(values = c("#88CCEE", "#CC6677", "#DDCC77", "#117733", "#999999")) + 
    labs(title = "Mammals Sleep Data", x = "REM Sleep (hours)", y = "Total Sleep (hours)", color = "Diet")

girafe(ggobj = ggiraph1)
girafe(ggobj = ggiraph2)

girafe(ggobj = ggiraph1 + ggiraph2, options = list(
    opts_hover(css = "fill:black;stroke:black;r:3pt;"),
    opts_hover_inv(css = "opacity:0.2;")))

4.6 Exercise 5.1

library(DT)

gc <- read.table("../../data/counts_raw.txt", header = T, row.names = 1, sep = "\t")
md <- read.table("../../data/metadata.csv", header = T, sep = ";")

datatable(gc, options = list(pageLength = 10), caption = "Table 1: Raw gene counts") %>% 
  formatStyle(
    'Sample_1',
    background = styleColorBar(gc$Sample_1, 'steelblue'),
    backgroundSize = '100% 90%',
    backgroundRepeat = 'no-repeat',
    backgroundPosition = 'center') %>%
  formatStyle('Sample_3', color = styleInterval(c(8500, 15000), c('white', 'blue', 'green')),
    backgroundColor = styleInterval(11000, c('gray', 'orange')))

datatable(md, options = list(pageLength = 10, order = list(list(4, 'desc'))), caption = "Table 1: Metadata")

4.7 Exercise 6.1

library(plotly)
library(ggplot2)
library(crosstalk)

shared_msleep <- SharedData$new(msleep)

gg_plot1 <- ggplot(data = shared_msleep, aes(x = sleep_rem, y = sleep_total, color = vore)) +
  geom_point() + theme_bw()

plotly_plot1 <- ggplotly(gg_plot1)

bscols(widths = c(3, NA),
    list(
        filter_checkbox("vore", "Diet", shared_msleep, ~vore, inline = TRUE),
        filter_slider("sleep_total", "Total Sleep (hours)", shared_msleep, ~sleep_total, width = "100%"),
        filter_select("conservation", "Conservation Status", shared_msleep, ~conservation)
    ),
    plotly_plot1)

4.8 Exercise 7.1

4.9 ````

title: “OJS example” author: “NBIS” date: last-modified date-format: “YYYY-MMM-DD” format: html —

```{r}
library(ggplot2)
```
```{r}
dt_sleep <- msleep
ojs_define(ojsd = dt_sleep)
```
```{ojs}
ojsdata = transpose(ojsd)
```
```{ojs}
viewof raw_table = Inputs.table(ojsdata)
```
```{ojs}
viewof diets = Inputs.checkbox(
  ["herbi", "carni", "omni", "insecti"], 
  { value: ["herbi", "carni"], 
    label: "Diet:"
  }
)
```
```{ojs}
filtered = ojsdata.filter(function(mammal) {
    return diets.includes(mammal.vore);
})
```
```{ojs}
Plot.plot({
  marks: [
    Plot.dot(filtered, {
      x:"sleep_rem",
      y: "sleep_total",
      fill: "vore"
    })
  ],
  grid: true
})

```