Spatial transcriptomic data with the Visium platform is in many ways similar to scRNAseq data. It contains UMI counts for 5-20 cells instead of single cells, but is still quite sparse in the same way as scRNAseq data is, but with the additional information about spatial location in the tissue.
Here we will first run quality control in a similar manner to scRNAseq data, then QC filtering, dimensionality reduction, integration and clustering. Then we will use scRNAseq data from mouse cortex to run LabelTransfer to predict celltypes in the Visium spots.
We will use two Visium spatial transcriptomics dataset of the mouse brain (Sagittal), which are publicly available from the 10x genomics website. Note, that these dataset have already been filtered for spots that does not overlap with the tissue.
<- "https://cf.10xgenomics.com/samples/spatial-exp/1.1.0/V1_Mouse_Brain_Sagittal_Posterior/"
webpath <- "./data/visium/Posterior"
PATH if (!dir.exists(PATH)) {
dir.create(PATH, recursive = T)
}<- c("V1_Mouse_Brain_Sagittal_Posterior_filtered_feature_bc_matrix.tar.gz",
file_list "V1_Mouse_Brain_Sagittal_Posterior_spatial.tar.gz")
for (i in file_list) {
download.file(url = paste0(webpath, i), destfile = paste0("./data/raw/", i))
system(paste0("tar xvzf ./data/raw/", i))
}
<- "https://cf.10xgenomics.com/samples/spatial-exp/1.1.0/V1_Mouse_Brain_Sagittal_Anterior/"
webpath <- "./data/visium/Anterior"
PATH if (!dir.exists(PATH)) {
dir.create(PATH, recursive = T)
}<- c("V1_Mouse_Brain_Sagittal_Anterior_filtered_feature_bc_matrix.tar.gz",
file_list "V1_Mouse_Brain_Sagittal_Anterior_spatial.tar.gz")
for (i in file_list) {
download.file(url = paste0(webpath, i), destfile = paste0("./data/raw/", i))
system(paste0("tar xvzf ./data/raw/", i))
}
# BiocManager::install('DropletUtils',update = F)
::install_github("RachelQueen1/Spaniel", ref = "Development", upgrade = F,
devtoolsdependencies = F)
## ── R CMD build ──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
##
checking for file ‘/private/var/folders/f_/vj_w4xx933z1rr95yf4rhphr0000gp/T/Rtmph6h3eR/remotes18342fd17b60/RachelQueen1-Spaniel-bb1bb99/DESCRIPTION’ ...
✔ checking for file ‘/private/var/folders/f_/vj_w4xx933z1rr95yf4rhphr0000gp/T/Rtmph6h3eR/remotes18342fd17b60/RachelQueen1-Spaniel-bb1bb99/DESCRIPTION’
##
─ preparing ‘Spaniel’:
## checking DESCRIPTION meta-information ...
✔ checking DESCRIPTION meta-information
##
─ checking for LF line-endings in source and make files and shell scripts
##
─ checking for empty or unneeded directories
##
Omitted ‘LazyData’ from DESCRIPTION
##
─ building ‘Spaniel_1.2.0.tar.gz’
##
##
library(Spaniel)
library(biomaRt)
suppressPackageStartupMessages(require(Matrix))
suppressPackageStartupMessages(require(dplyr))
suppressPackageStartupMessages(require(scran))
suppressPackageStartupMessages(require(SingleR))
suppressPackageStartupMessages(require(scater))
suppressPackageStartupMessages(require(ggplot2))
suppressPackageStartupMessages(require(patchwork))
suppressPackageStartupMessages(require(cowplot))
We can first load and merge the objects into one SCE object.
<- Spaniel::createVisiumSCE(tenXDir = "data/visium/Anterior", resolution = "Low")
sce.a <- Spaniel::createVisiumSCE(tenXDir = "data/visium/Posterior", resolution = "Low")
sce.p
<- cbind(sce.a, sce.p)
sce $Sample <- sub(".*[/]", "", sub("/filtered_feature_bc_matrix", "", sce$Sample))
sce
<- list(sce.a, sce.p)
lll <- lapply(lll, function(x) x@metadata)
lll names(lll) <- c("Anterior", "Posterior")
@metadata <- lll sce
We can further convert the gene ensembl IDs to gene names.
<- biomaRt::useMart(biomart = "ENSEMBL_MART_ENSEMBL", dataset = "mmusculus_gene_ensembl")
mart <- biomaRt::getBM(attributes = c("ensembl_gene_id", "external_gene_name", "gene_biotype"),
annot mart = mart, useCache = F)
<- as.character(annot[match(rownames(sce), annot[, "ensembl_gene_id"]),
gene_names "external_gene_name"])
is.na(gene_names)] <- ""
gene_names[
<- sce[gene_names != "", ]
sce rownames(sce) <- gene_names[gene_names != ""]
dim(sce)
## [1] 32160 6050
Similar to scRNAseq we use statistics on number of counts, number of features and percent mitochondria for quality control.
Now the counts and feature counts are calculated on the Spatial assay, so they are named “nCount_Spatial” and “nFeature_Spatial”.
# Mitochondrial genes
<- rownames(sce)[grep("^mt-", rownames(sce))]
mito_genes
# Ribosomal genes
<- rownames(sce)[grep("^Rp[sl]", rownames(sce))]
ribo_genes
# Hemoglobin genes - includes all genes starting with HB except HBP.
<- rownames(sce)[grep("^Hb[^(p)]", rownames(sce))]
hb_genes
<- addPerCellQC(sce, flatten = T, subsets = list(mt = mito_genes, hb = hb_genes,
sce ribo = ribo_genes))
head(colData(sce))
## DataFrame with 6 rows and 24 columns
## Sample Barcode Section Spot_Y Spot_X Image_Y
## <character> <character> <integer> <integer> <integer> <integer>
## 1 Anterior AAACAAGTATCTCCCA-1 1 50 102 7474
## 2 Anterior AAACACCAATAACTGC-1 1 59 19 8552
## 3 Anterior AAACAGAGCGACTCCT-1 1 14 94 3163
## 4 Anterior AAACAGCTTTCAGAAG-1 1 43 9 6636
## 5 Anterior AAACAGGGTCTATATT-1 1 47 13 7115
## 6 Anterior AAACATGGTGAGAGGA-1 1 62 0 8912
## Image_X pixel_x pixel_y sum detected total sum
## <integer> <numeric> <numeric> <numeric> <integer> <numeric> <numeric>
## 1 8500 438.898 214.079 13991 4462 13991 13960
## 2 2788 143.959 158.417 39797 8126 39797 39743
## 3 7950 410.499 436.678 29951 6526 29951 29905
## 4 2100 108.434 257.349 42333 8190 42333 42263
## 5 2375 122.633 232.616 35700 8090 35700 35660
## 6 1480 76.420 139.828 22148 6518 22148 22098
## detected subsets_mt_sum subsets_mt_detected subsets_mt_percent
## <integer> <numeric> <integer> <numeric>
## 1 4458 1521 12 10.89542
## 2 8117 3977 12 10.00679
## 3 6520 4265 12 14.26183
## 4 8182 2870 12 6.79081
## 5 8083 1831 13 5.13460
## 6 6511 2390 12 10.81546
## subsets_hb_sum subsets_hb_detected subsets_hb_percent subsets_ribo_sum
## <numeric> <integer> <numeric> <numeric>
## 1 60 4 0.429799 826
## 2 831 6 2.090934 2199
## 3 111 5 0.371175 1663
## 4 117 5 0.276838 3129
## 5 73 5 0.204711 2653
## 6 134 5 0.606390 1478
## subsets_ribo_detected subsets_ribo_percent total
## <integer> <numeric> <numeric>
## 1 85 5.91691 13960
## 2 89 5.53305 39743
## 3 88 5.56094 29905
## 4 88 7.40364 42263
## 5 90 7.43971 35660
## 6 84 6.68839 22098
plot_grid(plotColData(sce, y = "detected", x = "Sample", colour_by = "Sample"), plotColData(sce,
y = "total", x = "Sample", colour_by = "Sample"), plotColData(sce, y = "subsets_mt_percent",
x = "Sample", colour_by = "Sample"), plotColData(sce, y = "subsets_ribo_percent",
x = "Sample", colour_by = "Sample"), plotColData(sce, y = "subsets_hb_percent",
x = "Sample", colour_by = "Sample"), ncol = 3)