Spatial transcriptomics


Spatial transcriptomic data with the Visium platform is in many ways similar to scRNAseq data. It contains UMI counts for 5-20 cells instead of single cells, but is still quite sparse in the same way as scRNAseq data is, but with the additional information about spatial location in the tissue.

Here we will first run quality control in a similar manner to scRNAseq data, then QC filtering, dimensionality reduction, integration and clustering. Then we will use scRNAseq data from mouse cortex to run LabelTransfer to predict celltypes in the Visium spots.

We will use two Visium spatial transcriptomics dataset of the mouse brain (Sagittal), which are publicly available from the 10x genomics website. Note, that these dataset have already been filtered for spots that does not overlap with the tissue.

Load packages

webpath <- "https://cf.10xgenomics.com/samples/spatial-exp/1.1.0/V1_Mouse_Brain_Sagittal_Posterior/"
PATH <- "./data/visium/Posterior"
if (!dir.exists(PATH)) {
    dir.create(PATH, recursive = T)
}
file_list <- c("V1_Mouse_Brain_Sagittal_Posterior_filtered_feature_bc_matrix.tar.gz",
    "V1_Mouse_Brain_Sagittal_Posterior_spatial.tar.gz")
for (i in file_list) {
    download.file(url = paste0(webpath, i), destfile = paste0("./data/raw/", i))
    system(paste0("tar xvzf ./data/raw/", i))
}

webpath <- "https://cf.10xgenomics.com/samples/spatial-exp/1.1.0/V1_Mouse_Brain_Sagittal_Anterior/"
PATH <- "./data/visium/Anterior"
if (!dir.exists(PATH)) {
    dir.create(PATH, recursive = T)
}
file_list <- c("V1_Mouse_Brain_Sagittal_Anterior_filtered_feature_bc_matrix.tar.gz",
    "V1_Mouse_Brain_Sagittal_Anterior_spatial.tar.gz")
for (i in file_list) {
    download.file(url = paste0(webpath, i), destfile = paste0("./data/raw/", i))
    system(paste0("tar xvzf ./data/raw/", i))
}
# BiocManager::install('DropletUtils',update = F)
devtools::install_github("RachelQueen1/Spaniel", ref = "Development", upgrade = F,
    dependencies = F)
## ── R CMD build ──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
##   
   checking for file ‘/private/var/folders/f_/vj_w4xx933z1rr95yf4rhphr0000gp/T/Rtmph6h3eR/remotes18342fd17b60/RachelQueen1-Spaniel-bb1bb99/DESCRIPTION’ ...
  
✔  checking for file ‘/private/var/folders/f_/vj_w4xx933z1rr95yf4rhphr0000gp/T/Rtmph6h3eR/remotes18342fd17b60/RachelQueen1-Spaniel-bb1bb99/DESCRIPTION’
## 
  
─  preparing ‘Spaniel’:
##    checking DESCRIPTION meta-information ...
  
✔  checking DESCRIPTION meta-information
## 
  
─  checking for LF line-endings in source and make files and shell scripts
## 
  
─  checking for empty or unneeded directories
## 
  
   Omitted ‘LazyData’ from DESCRIPTION
## 
  
─  building ‘Spaniel_1.2.0.tar.gz’
## 
  
   
## 
library(Spaniel)
library(biomaRt)

suppressPackageStartupMessages(require(Matrix))
suppressPackageStartupMessages(require(dplyr))
suppressPackageStartupMessages(require(scran))
suppressPackageStartupMessages(require(SingleR))
suppressPackageStartupMessages(require(scater))
suppressPackageStartupMessages(require(ggplot2))
suppressPackageStartupMessages(require(patchwork))
suppressPackageStartupMessages(require(cowplot))

Load ST data

We can first load and merge the objects into one SCE object.

sce.a <- Spaniel::createVisiumSCE(tenXDir = "data/visium/Anterior", resolution = "Low")
sce.p <- Spaniel::createVisiumSCE(tenXDir = "data/visium/Posterior", resolution = "Low")

sce <- cbind(sce.a, sce.p)
sce$Sample <- sub(".*[/]", "", sub("/filtered_feature_bc_matrix", "", sce$Sample))

lll <- list(sce.a, sce.p)
lll <- lapply(lll, function(x) x@metadata)
names(lll) <- c("Anterior", "Posterior")
sce@metadata <- lll

We can further convert the gene ensembl IDs to gene names.

mart <- biomaRt::useMart(biomart = "ENSEMBL_MART_ENSEMBL", dataset = "mmusculus_gene_ensembl")
annot <- biomaRt::getBM(attributes = c("ensembl_gene_id", "external_gene_name", "gene_biotype"),
    mart = mart, useCache = F)

gene_names <- as.character(annot[match(rownames(sce), annot[, "ensembl_gene_id"]),
    "external_gene_name"])
gene_names[is.na(gene_names)] <- ""

sce <- sce[gene_names != "", ]
rownames(sce) <- gene_names[gene_names != ""]
dim(sce)
## [1] 32160  6050

Quality control


Similar to scRNAseq we use statistics on number of counts, number of features and percent mitochondria for quality control.

Now the counts and feature counts are calculated on the Spatial assay, so they are named “nCount_Spatial” and “nFeature_Spatial”.

# Mitochondrial genes
mito_genes <- rownames(sce)[grep("^mt-", rownames(sce))]

# Ribosomal genes
ribo_genes <- rownames(sce)[grep("^Rp[sl]", rownames(sce))]

# Hemoglobin genes - includes all genes starting with HB except HBP.
hb_genes <- rownames(sce)[grep("^Hb[^(p)]", rownames(sce))]

sce <- addPerCellQC(sce, flatten = T, subsets = list(mt = mito_genes, hb = hb_genes,
    ribo = ribo_genes))

head(colData(sce))
## DataFrame with 6 rows and 24 columns
##        Sample            Barcode   Section    Spot_Y    Spot_X   Image_Y
##   <character>        <character> <integer> <integer> <integer> <integer>
## 1    Anterior AAACAAGTATCTCCCA-1         1        50       102      7474
## 2    Anterior AAACACCAATAACTGC-1         1        59        19      8552
## 3    Anterior AAACAGAGCGACTCCT-1         1        14        94      3163
## 4    Anterior AAACAGCTTTCAGAAG-1         1        43         9      6636
## 5    Anterior AAACAGGGTCTATATT-1         1        47        13      7115
## 6    Anterior AAACATGGTGAGAGGA-1         1        62         0      8912
##     Image_X   pixel_x   pixel_y       sum  detected     total       sum
##   <integer> <numeric> <numeric> <numeric> <integer> <numeric> <numeric>
## 1      8500   438.898   214.079     13991      4462     13991     13960
## 2      2788   143.959   158.417     39797      8126     39797     39743
## 3      7950   410.499   436.678     29951      6526     29951     29905
## 4      2100   108.434   257.349     42333      8190     42333     42263
## 5      2375   122.633   232.616     35700      8090     35700     35660
## 6      1480    76.420   139.828     22148      6518     22148     22098
##    detected subsets_mt_sum subsets_mt_detected subsets_mt_percent
##   <integer>      <numeric>           <integer>          <numeric>
## 1      4458           1521                  12           10.89542
## 2      8117           3977                  12           10.00679
## 3      6520           4265                  12           14.26183
## 4      8182           2870                  12            6.79081
## 5      8083           1831                  13            5.13460
## 6      6511           2390                  12           10.81546
##   subsets_hb_sum subsets_hb_detected subsets_hb_percent subsets_ribo_sum
##        <numeric>           <integer>          <numeric>        <numeric>
## 1             60                   4           0.429799              826
## 2            831                   6           2.090934             2199
## 3            111                   5           0.371175             1663
## 4            117                   5           0.276838             3129
## 5             73                   5           0.204711             2653
## 6            134                   5           0.606390             1478
##   subsets_ribo_detected subsets_ribo_percent     total
##               <integer>            <numeric> <numeric>
## 1                    85              5.91691     13960
## 2                    89              5.53305     39743
## 3                    88              5.56094     29905
## 4                    88              7.40364     42263
## 5                    90              7.43971     35660
## 6                    84              6.68839     22098
plot_grid(plotColData(sce, y = "detected", x = "Sample", colour_by = "Sample"), plotColData(sce,
    y = "total", x = "Sample", colour_by = "Sample"), plotColData(sce, y = "subsets_mt_percent",
    x = "Sample", colour_by = "Sample"), plotColData(sce, y = "subsets_ribo_percent",
    x = "Sample", colour_by = "Sample"), plotColData(sce, y = "subsets_hb_percent",
    x = "Sample", colour_by = "Sample"), ncol = 3)