Load the required packages:
library(tidyverse)
library(glue)
library(knitr)
library(Matrix)
library(scRNAseq)
library(scater)
library(scran)
library(velociraptor)
library(dittoSeq)
library(viridis)# Load the Hermann et al. (2018) mouse spermatogenic cells dataset
sce <- HermannSpermatogenesisData()
sce## class: SingleCellExperiment
## dim: 54448 2325
## metadata(0):
## assays(2): spliced unspliced
## rownames(54448): ENSMUSG00000102693.1 ENSMUSG00000064842.1 ...
## ENSMUSG00000064369.1 ENSMUSG00000064372.1
## rowData names(0):
## colnames(2325): CCCATACTCCGAAGAG AATCCAGTCATCTGCC ... ATCCACCCACCACCAG
## ATTGGTGGTTACCGAT
## colData names(1): celltype
## reducedDimNames(0):
## mainExpName: NULL
## altExpNames(0):
# Cell metadata
colData(sce)## DataFrame with 2325 rows and 1 column
## celltype
## <character>
## CCCATACTCCGAAGAG DIplotene/Secondary ..
## AATCCAGTCATCTGCC DIplotene/Secondary ..
## GACTGCGGTTTGACTG DIplotene/Secondary ..
## ACACCAATCTTCGGTC DIplotene/Secondary ..
## TGACAACAGGACAGAA Mid Round spermatids
## ... ...
## CCCTCCTCATCGGACC NA
## GAAACTCCACTATCTT NA
## AACGTTGTCATCGATG NA
## ATCCACCCACCACCAG NA
## ATTGGTGGTTACCGAT NA
# Cell type summary
sce$celltype %>%
table(useNA = "ifany") %>%
enframe() %>%
arrange(desc(value))| name | value |
|---|---|
| Mid Round spermatids | 878 |
| NA | 447 |
| Late Round spermatids | 423 |
| Early Round spermatids | 410 |
| DIplotene/Secondary spermatocytes | 136 |
| Pachytene spermatocytes | 10 |
| A3-A4-In-B Differentiating spermatogonia | 7 |
| Leptotene/Zygotene spermatocytes | 7 |
| Preleptotene spermatocytes | 3 |
| Sertoli cells | 2 |
| A1-A2 Differentiating spermatogonia | 1 |
| Undifferentiated spermatogonia | 1 |
# Prepare the cell type labels
sce$cell_type <- sce$celltype %>%
fct_explicit_na(na_level = "Other") %>%
fct_lump_min(min = 400, other_level = "Other") %>%
fct_relevel("Early Round spermatids", "Mid Round spermatids",
"Late Round spermatids", "Other") %>%
fct_drop()
table(sce$cell_type)##
## Early Round spermatids Mid Round spermatids Late Round spermatids
## 410 878 423
## Other
## 614
# Set the RNG seed for reproducible results
set.seed(42)
# Data normalization
quick_clusters <- quickCluster(sce, assay.type = "spliced")
table(quick_clusters)## quick_clusters
## 1 2 3 4 5 6 7 8 9 10 11 12 13
## 133 139 244 254 176 117 196 177 157 201 213 197 121
sce <- computeSumFactors(sce, clusters = quick_clusters, assay.type = "spliced")
sce <- logNormCounts(sce, assay.type = "spliced")
# Selecting highly variable genes
dec <- modelGeneVar(sce)
top_hvgs <- getTopHVGs(dec, fdr.threshold = 0.05)
length(top_hvgs)## [1] 975
# Principal component analysis
sce <- denoisePCA(sce, dec, subset.row = top_hvgs, min.rank = 5, max.rank = 50)
ncol(reducedDim(sce, "PCA"))## [1] 5
# Dimensionality reduction by UMAP
sce <- runUMAP(sce, dimred = "PCA")dittoDimPlot(sce, "cell_type", reduction.use = "UMAP", size = 0.5,
legend.show = FALSE, do.label = TRUE,
labels.highlight = FALSE, labels.size = 4)dittoDimPlot(sce, "cell_type", reduction.use = "UMAP", size = 0.1,
split.by = "cell_type", split.ncol = 2, legend.show = FALSE)# Set the RNG seed for reproducible results
set.seed(42)
# Run scVelo
sce_velo_steady_state <- scvelo(sce,
mode = "steady_state",
assay.X = "spliced",
assay.spliced = "spliced",
assay.unspliced = "unspliced",
subset.row = top_hvgs,
use.dimred = "PCA")
sce_velo_steady_state## class: SingleCellExperiment
## dim: 975 2325
## metadata(4): neighbors velocity_params velocity_graph
## velocity_graph_neg
## assays(6): X spliced ... Mu velocity
## rownames(975): ENSMUSG00000038015.6 ENSMUSG00000022501.6 ...
## ENSMUSG00000049098.4 ENSMUSG00000116409.1
## rowData names(3): velocity_gamma velocity_r2 velocity_genes
## colnames(2325): CCCATACTCCGAAGAG AATCCAGTCATCTGCC ... ATCCACCCACCACCAG
## ATTGGTGGTTACCGAT
## colData names(7): velocity_self_transition root_cells ...
## velocity_confidence velocity_confidence_transition
## reducedDimNames(1): X_pca
## mainExpName: NULL
## altExpNames(0):
# Add RNA velocity information to the SingleCellExperiment object
sce$velocity_pseudotime_steady_state <- sce_velo_steady_state$velocity_pseudotime
sce$velocity_length_steady_state <- sce_velo_steady_state$velocity_length
sce$velocity_confidence_steady_state <- sce_velo_steady_state$velocity_confidence
# Project velocity vectors to the UMAP coordinates
embedded_steady_state <- embedVelocity(reducedDim(sce, "UMAP"),
sce_velo_steady_state)
grid_df_steady_state <- gridVectors(reducedDim(sce, "UMAP"),
embedded_steady_state, resolution = 30)dittoDimPlot(sce, "velocity_pseudotime_steady_state",
reduction.use = "UMAP", size = 0.5) +
geom_segment(
data = grid_df_steady_state,
mapping = aes(x = start.1, y = start.2, xend = end.1, yend = end.2),
arrow = arrow(length = unit(0.05, "inches"), type = "open")
)dittoDimPlot(sce, "velocity_pseudotime_steady_state",
cells.use = rep(FALSE, ncol(sce)),
reduction.use = "UMAP", size = 0.5) +
geom_segment(
data = grid_df_steady_state,
mapping = aes(x = start.1, y = start.2, xend = end.1, yend = end.2),
arrow = arrow(length = unit(0.05, "inches"), type = "open")
)dittoDimPlot(sce, "velocity_pseudotime_steady_state",
reduction.use = "UMAP", size = 0.5)dittoPlot(sce, "velocity_pseudotime_steady_state", group.by = "cell_type",
x.reorder = rev(seq(length(levels(sce$cell_type)))),
plots = c("ridgeplot"), ridgeplot.lineweight = 0.5,
legend.show = FALSE)dittoDimPlot(sce, "velocity_length_steady_state",
reduction.use = "UMAP", size = 0.5)dittoDimPlot(sce, "velocity_confidence_steady_state",
reduction.use = "UMAP", size = 0.5)# Set the RNG seed for reproducible results
set.seed(42)
# Run scVelo
sce_velo_stochastic <- scvelo(sce,
mode = "stochastic",
assay.X = "spliced",
assay.spliced = "spliced",
assay.unspliced = "unspliced",
subset.row = top_hvgs,
use.dimred = "PCA")
sce_velo_stochastic## class: SingleCellExperiment
## dim: 975 2325
## metadata(4): neighbors velocity_params velocity_graph
## velocity_graph_neg
## assays(7): X spliced ... velocity variance_velocity
## rownames(975): ENSMUSG00000038015.6 ENSMUSG00000022501.6 ...
## ENSMUSG00000049098.4 ENSMUSG00000116409.1
## rowData names(3): velocity_gamma velocity_r2 velocity_genes
## colnames(2325): CCCATACTCCGAAGAG AATCCAGTCATCTGCC ... ATCCACCCACCACCAG
## ATTGGTGGTTACCGAT
## colData names(7): velocity_self_transition root_cells ...
## velocity_confidence velocity_confidence_transition
## reducedDimNames(1): X_pca
## mainExpName: NULL
## altExpNames(0):
# Add RNA velocity information to the SingleCellExperiment object
sce$velocity_pseudotime_stochastic <- sce_velo_stochastic$velocity_pseudotime
sce$velocity_length_stochastic <- sce_velo_stochastic$velocity_length
sce$velocity_confidence_stochastic <- sce_velo_stochastic$velocity_confidence
# Project velocity vectors to the UMAP coordinates
embedded_stochastic <- embedVelocity(reducedDim(sce, "UMAP"),
sce_velo_stochastic)
grid_df_stochastic <- gridVectors(reducedDim(sce, "UMAP"),
embedded_stochastic, resolution = 30)dittoDimPlot(sce, "velocity_pseudotime_stochastic",
reduction.use = "UMAP", size = 0.5) +
geom_segment(
data = grid_df_stochastic,
mapping = aes(x = start.1, y = start.2, xend = end.1, yend = end.2),
arrow = arrow(length = unit(0.05, "inches"), type = "open")
)dittoDimPlot(sce, "velocity_pseudotime_stochastic",
cells.use = rep(FALSE, ncol(sce)),
reduction.use = "UMAP", size = 0.5) +
geom_segment(
data = grid_df_stochastic,
mapping = aes(x = start.1, y = start.2, xend = end.1, yend = end.2),
arrow = arrow(length = unit(0.05, "inches"), type = "open")
)dittoDimPlot(sce, "velocity_pseudotime_stochastic",
reduction.use = "UMAP", size = 0.5)dittoPlot(sce, "velocity_pseudotime_stochastic", group.by = "cell_type",
x.reorder = rev(seq(length(levels(sce$cell_type)))),
plots = c("ridgeplot"), ridgeplot.lineweight = 0.5,
legend.show = FALSE)dittoDimPlot(sce, "velocity_length_stochastic",
reduction.use = "UMAP", size = 0.5)dittoDimPlot(sce, "velocity_confidence_stochastic",
reduction.use = "UMAP", size = 0.5)# Set the RNG seed for reproducible results
set.seed(42)
# Run scVelo
sce_velo_dynamical <- scvelo(sce,
mode = "dynamical",
assay.X = "spliced",
assay.spliced = "spliced",
assay.unspliced = "unspliced",
subset.row = top_hvgs,
use.dimred = "PCA")
sce_velo_dynamical## class: SingleCellExperiment
## dim: 975 2325
## metadata(5): neighbors recover_dynamics velocity_params velocity_graph
## velocity_graph_neg
## assays(10): X spliced ... velocity velocity_u
## rownames(975): ENSMUSG00000038015.6 ENSMUSG00000022501.6 ...
## ENSMUSG00000049098.4 ENSMUSG00000116409.1
## rowData names(18): fit_r2 fit_alpha ... velocity_genes varm
## colnames(2325): CCCATACTCCGAAGAG AATCCAGTCATCTGCC ... ATCCACCCACCACCAG
## ATTGGTGGTTACCGAT
## colData names(8): velocity_self_transition root_cells ...
## velocity_confidence velocity_confidence_transition
## reducedDimNames(1): X_pca
## mainExpName: NULL
## altExpNames(0):
# Add RNA velocity information to the SingleCellExperiment object
sce$velocity_pseudotime_dynamical <- sce_velo_dynamical$velocity_pseudotime
sce$velocity_length_dynamical <- sce_velo_dynamical$velocity_length
sce$velocity_confidence_dynamical <- sce_velo_dynamical$velocity_confidence
# Project velocity vectors to the UMAP coordinates
embedded_dynamical <- embedVelocity(reducedDim(sce, "UMAP"),
sce_velo_dynamical)
grid_df_dynamical <- gridVectors(reducedDim(sce, "UMAP"),
embedded_dynamical, resolution = 30)dittoDimPlot(sce, "velocity_pseudotime_dynamical",
reduction.use = "UMAP", size = 0.5) +
geom_segment(
data = grid_df_dynamical,
mapping = aes(x = start.1, y = start.2, xend = end.1, yend = end.2),
arrow = arrow(length = unit(0.05, "inches"), type = "open")
)dittoDimPlot(sce, "velocity_pseudotime_dynamical",
cells.use = rep(FALSE, ncol(sce)),
reduction.use = "UMAP", size = 0.5) +
geom_segment(
data = grid_df_dynamical,
mapping = aes(x = start.1, y = start.2, xend = end.1, yend = end.2),
arrow = arrow(length = unit(0.05, "inches"), type = "open")
)dittoDimPlot(sce, "velocity_pseudotime_dynamical",
reduction.use = "UMAP", size = 0.5)dittoPlot(sce, "velocity_pseudotime_dynamical", group.by = "cell_type",
x.reorder = rev(seq(length(levels(sce$cell_type)))),
plots = c("ridgeplot"), ridgeplot.lineweight = 0.5,
legend.show = FALSE)dittoDimPlot(sce, "velocity_length_dynamical",
reduction.use = "UMAP", size = 0.5)dittoDimPlot(sce, "velocity_confidence_dynamical",
reduction.use = "UMAP", size = 0.5)sessionInfo()## R version 4.1.0 (2021-05-18)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Ubuntu 18.04.5 LTS
##
## Matrix products: default
## BLAS: /usr/lib/x86_64-linux-gnu/openblas/libblas.so.3
## LAPACK: /usr/lib/x86_64-linux-gnu/libopenblasp-r0.2.20.so
##
## locale:
## [1] LC_CTYPE=en_US.UTF-8 LC_NUMERIC=C
## [3] LC_TIME=pl_PL.UTF-8 LC_COLLATE=en_US.UTF-8
## [5] LC_MONETARY=pl_PL.UTF-8 LC_MESSAGES=en_US.UTF-8
## [7] LC_PAPER=pl_PL.UTF-8 LC_NAME=C
## [9] LC_ADDRESS=C LC_TELEPHONE=C
## [11] LC_MEASUREMENT=pl_PL.UTF-8 LC_IDENTIFICATION=C
##
## attached base packages:
## [1] parallel stats4 stats graphics grDevices utils datasets
## [8] methods base
##
## other attached packages:
## [1] viridis_0.6.1 viridisLite_0.4.0
## [3] dittoSeq_1.4.1 velociraptor_1.2.0
## [5] scran_1.20.1 scater_1.20.1
## [7] scuttle_1.2.0 scRNAseq_2.6.1
## [9] SingleCellExperiment_1.14.1 SummarizedExperiment_1.22.0
## [11] Biobase_2.52.0 GenomicRanges_1.44.0
## [13] GenomeInfoDb_1.28.1 IRanges_2.26.0
## [15] S4Vectors_0.30.0 BiocGenerics_0.38.0
## [17] MatrixGenerics_1.4.0 matrixStats_0.59.0
## [19] Matrix_1.3-4 knitr_1.33
## [21] glue_1.4.2 forcats_0.5.1
## [23] stringr_1.4.0 dplyr_1.0.7
## [25] purrr_0.3.4 readr_1.4.0
## [27] tidyr_1.1.3 tibble_3.1.2
## [29] ggplot2_3.3.5 tidyverse_1.3.1
##
## loaded via a namespace (and not attached):
## [1] readxl_1.3.1 backports_1.2.1
## [3] AnnotationHub_3.0.1 BiocFileCache_2.0.0
## [5] plyr_1.8.6 igraph_1.2.6
## [7] lazyeval_0.2.2 BiocParallel_1.26.1
## [9] digest_0.6.27 ensembldb_2.16.2
## [11] htmltools_0.5.1.1 fansi_0.5.0
## [13] magrittr_2.0.1 memoise_2.0.0
## [15] ScaledMatrix_1.0.0 cluster_2.1.2
## [17] limma_3.48.1 Biostrings_2.60.1
## [19] modelr_0.1.8 prettyunits_1.1.1
## [21] colorspace_2.0-2 ggrepel_0.9.1
## [23] blob_1.2.1 rvest_1.0.0
## [25] rappdirs_0.3.3 haven_2.4.1
## [27] xfun_0.24 crayon_1.4.1
## [29] RCurl_1.98-1.3 jsonlite_1.7.2
## [31] gtable_0.3.0 zlibbioc_1.38.0
## [33] XVector_0.32.0 DelayedArray_0.18.0
## [35] BiocSingular_1.8.1 scales_1.1.1
## [37] pheatmap_1.0.12 edgeR_3.34.0
## [39] DBI_1.1.1 Rcpp_1.0.7
## [41] xtable_1.8-4 progress_1.2.2
## [43] reticulate_1.20 dqrng_0.3.0
## [45] bit_4.0.4 rsvd_1.0.5
## [47] metapod_1.0.0 httr_1.4.2
## [49] RColorBrewer_1.1-2 dir.expiry_1.0.0
## [51] ellipsis_0.3.2 farver_2.1.0
## [53] pkgconfig_2.0.3 XML_3.99-0.6
## [55] sass_0.4.0 dbplyr_2.1.1
## [57] locfit_1.5-9.4 utf8_1.2.1
## [59] labeling_0.4.2 tidyselect_1.1.1
## [61] rlang_0.4.11 later_1.2.0
## [63] AnnotationDbi_1.54.1 munsell_0.5.0
## [65] BiocVersion_3.13.1 cellranger_1.1.0
## [67] tools_4.1.0 cachem_1.0.5
## [69] cli_3.0.1 generics_0.1.0
## [71] RSQLite_2.2.7 ExperimentHub_2.0.0
## [73] ggridges_0.5.3 broom_0.7.8
## [75] evaluate_0.14 fastmap_1.1.0
## [77] yaml_2.2.1 bit64_4.0.5
## [79] fs_1.5.0 KEGGREST_1.32.0
## [81] AnnotationFilter_1.16.0 sparseMatrixStats_1.4.0
## [83] mime_0.11 xml2_1.3.2
## [85] biomaRt_2.48.2 compiler_4.1.0
## [87] rstudioapi_0.13 beeswarm_0.4.0
## [89] filelock_1.0.2 curl_4.3.2
## [91] png_0.1-7 interactiveDisplayBase_1.30.0
## [93] reprex_2.0.0 statmod_1.4.36
## [95] bslib_0.2.5.1 stringi_1.7.3
## [97] highr_0.9 basilisk.utils_1.4.0
## [99] GenomicFeatures_1.44.0 bluster_1.2.1
## [101] lattice_0.20-44 ProtGenerics_1.24.0
## [103] vctrs_0.3.8 pillar_1.6.1
## [105] lifecycle_1.0.0 BiocManager_1.30.16
## [107] jquerylib_0.1.4 BiocNeighbors_1.10.0
## [109] cowplot_1.1.1 bitops_1.0-7
## [111] irlba_2.3.3 httpuv_1.6.1
## [113] rtracklayer_1.52.0 R6_2.5.0
## [115] BiocIO_1.2.0 promises_1.2.0.1
## [117] gridExtra_2.3 vipor_0.4.5
## [119] codetools_0.2-18 zellkonverter_1.2.1
## [121] assertthat_0.2.1 rjson_0.2.20
## [123] withr_2.4.2 GenomicAlignments_1.28.0
## [125] Rsamtools_2.8.0 GenomeInfoDbData_1.2.6
## [127] hms_1.1.0 grid_4.1.0
## [129] beachmat_2.8.0 basilisk_1.4.0
## [131] rmarkdown_2.9 DelayedMatrixStats_1.14.0
## [133] shiny_1.6.0 lubridate_1.7.10
## [135] ggbeeswarm_0.6.0 restfulr_0.0.13