Preparing the environment

Load the required packages:

library(tidyverse)
library(glue)
library(knitr)
library(Matrix)
library(scRNAseq)
library(scater)
library(scran)
library(velociraptor)
library(dittoSeq)
library(viridis)

Preparing the data

# Load the Hermann et al. (2018) mouse spermatogenic cells dataset
sce <- HermannSpermatogenesisData()
sce
## class: SingleCellExperiment 
## dim: 54448 2325 
## metadata(0):
## assays(2): spliced unspliced
## rownames(54448): ENSMUSG00000102693.1 ENSMUSG00000064842.1 ...
##   ENSMUSG00000064369.1 ENSMUSG00000064372.1
## rowData names(0):
## colnames(2325): CCCATACTCCGAAGAG AATCCAGTCATCTGCC ... ATCCACCCACCACCAG
##   ATTGGTGGTTACCGAT
## colData names(1): celltype
## reducedDimNames(0):
## mainExpName: NULL
## altExpNames(0):
# Cell metadata
colData(sce)
## DataFrame with 2325 rows and 1 column
##                                celltype
##                             <character>
## CCCATACTCCGAAGAG DIplotene/Secondary ..
## AATCCAGTCATCTGCC DIplotene/Secondary ..
## GACTGCGGTTTGACTG DIplotene/Secondary ..
## ACACCAATCTTCGGTC DIplotene/Secondary ..
## TGACAACAGGACAGAA   Mid Round spermatids
## ...                                 ...
## CCCTCCTCATCGGACC                     NA
## GAAACTCCACTATCTT                     NA
## AACGTTGTCATCGATG                     NA
## ATCCACCCACCACCAG                     NA
## ATTGGTGGTTACCGAT                     NA
# Cell type summary
sce$celltype %>%
    table(useNA = "ifany") %>%
    enframe() %>%
    arrange(desc(value))
name value
Mid Round spermatids 878
NA 447
Late Round spermatids 423
Early Round spermatids 410
DIplotene/Secondary spermatocytes 136
Pachytene spermatocytes 10
A3-A4-In-B Differentiating spermatogonia 7
Leptotene/Zygotene spermatocytes 7
Preleptotene spermatocytes 3
Sertoli cells 2
A1-A2 Differentiating spermatogonia 1
Undifferentiated spermatogonia 1
# Prepare the cell type labels
sce$cell_type <- sce$celltype %>%
    fct_explicit_na(na_level = "Other") %>%
    fct_lump_min(min = 400, other_level = "Other") %>%
    fct_relevel("Early Round spermatids", "Mid Round spermatids",
                "Late Round spermatids", "Other") %>%
    fct_drop()
table(sce$cell_type)
## 
## Early Round spermatids   Mid Round spermatids  Late Round spermatids 
##                    410                    878                    423 
##                  Other 
##                    614

Basic analysis

# Set the RNG seed for reproducible results
set.seed(42)
# Data normalization
quick_clusters <- quickCluster(sce, assay.type = "spliced")
table(quick_clusters)
## quick_clusters
##   1   2   3   4   5   6   7   8   9  10  11  12  13 
## 133 139 244 254 176 117 196 177 157 201 213 197 121
sce <- computeSumFactors(sce, clusters = quick_clusters, assay.type = "spliced")
sce <- logNormCounts(sce, assay.type = "spliced")
# Selecting highly variable genes
dec <- modelGeneVar(sce)
top_hvgs <- getTopHVGs(dec, fdr.threshold = 0.05)
length(top_hvgs)
## [1] 975
# Principal component analysis
sce <- denoisePCA(sce, dec, subset.row = top_hvgs, min.rank = 5, max.rank = 50)
ncol(reducedDim(sce, "PCA"))
## [1] 5
# Dimensionality reduction by UMAP
sce <- runUMAP(sce, dimred = "PCA")

UMAP plots

dittoDimPlot(sce, "cell_type", reduction.use = "UMAP", size = 0.5,
             legend.show = FALSE, do.label = TRUE,
             labels.highlight = FALSE, labels.size = 4)

dittoDimPlot(sce, "cell_type", reduction.use = "UMAP", size = 0.1,
             split.by = "cell_type", split.ncol = 2, legend.show = FALSE)

RNA velocity analysis

Steady-state mode

scVelo

# Set the RNG seed for reproducible results
set.seed(42)
# Run scVelo
sce_velo_steady_state <- scvelo(sce,
                                mode = "steady_state",
                                assay.X = "spliced",
                                assay.spliced = "spliced",
                                assay.unspliced = "unspliced",
                                subset.row = top_hvgs,
                                use.dimred = "PCA")
sce_velo_steady_state
## class: SingleCellExperiment 
## dim: 975 2325 
## metadata(4): neighbors velocity_params velocity_graph
##   velocity_graph_neg
## assays(6): X spliced ... Mu velocity
## rownames(975): ENSMUSG00000038015.6 ENSMUSG00000022501.6 ...
##   ENSMUSG00000049098.4 ENSMUSG00000116409.1
## rowData names(3): velocity_gamma velocity_r2 velocity_genes
## colnames(2325): CCCATACTCCGAAGAG AATCCAGTCATCTGCC ... ATCCACCCACCACCAG
##   ATTGGTGGTTACCGAT
## colData names(7): velocity_self_transition root_cells ...
##   velocity_confidence velocity_confidence_transition
## reducedDimNames(1): X_pca
## mainExpName: NULL
## altExpNames(0):
# Add RNA velocity information to the SingleCellExperiment object
sce$velocity_pseudotime_steady_state <- sce_velo_steady_state$velocity_pseudotime
sce$velocity_length_steady_state <- sce_velo_steady_state$velocity_length
sce$velocity_confidence_steady_state <- sce_velo_steady_state$velocity_confidence
# Project velocity vectors to the UMAP coordinates
embedded_steady_state <- embedVelocity(reducedDim(sce, "UMAP"),
                                       sce_velo_steady_state)
grid_df_steady_state <- gridVectors(reducedDim(sce, "UMAP"),
                                    embedded_steady_state, resolution = 30)

RNA velocity vectors & pseudotime

dittoDimPlot(sce, "velocity_pseudotime_steady_state",
             reduction.use = "UMAP", size = 0.5) +
    geom_segment(
        data = grid_df_steady_state,
        mapping = aes(x = start.1, y = start.2, xend = end.1, yend = end.2),
        arrow = arrow(length = unit(0.05, "inches"), type = "open")
    )

RNA velocity vectors

dittoDimPlot(sce, "velocity_pseudotime_steady_state",
             cells.use = rep(FALSE, ncol(sce)),
             reduction.use = "UMAP", size = 0.5) +
    geom_segment(
        data = grid_df_steady_state,
        mapping = aes(x = start.1, y = start.2, xend = end.1, yend = end.2),
        arrow = arrow(length = unit(0.05, "inches"), type = "open")
    )

RNA velocity pseudotime

dittoDimPlot(sce, "velocity_pseudotime_steady_state",
             reduction.use = "UMAP", size = 0.5)

RNA velocity pseudotime by cell type

dittoPlot(sce, "velocity_pseudotime_steady_state", group.by = "cell_type",
          x.reorder = rev(seq(length(levels(sce$cell_type)))),
          plots = c("ridgeplot"), ridgeplot.lineweight = 0.5,
          legend.show = FALSE)

RNA velocity length

dittoDimPlot(sce, "velocity_length_steady_state",
             reduction.use = "UMAP", size = 0.5)

RNA velocity confidence

dittoDimPlot(sce, "velocity_confidence_steady_state",
             reduction.use = "UMAP", size = 0.5)

Stochastic mode

scVelo

# Set the RNG seed for reproducible results
set.seed(42)
# Run scVelo
sce_velo_stochastic <- scvelo(sce,
                              mode = "stochastic",
                              assay.X = "spliced",
                              assay.spliced = "spliced",
                              assay.unspliced = "unspliced",
                              subset.row = top_hvgs,
                              use.dimred = "PCA")
sce_velo_stochastic
## class: SingleCellExperiment 
## dim: 975 2325 
## metadata(4): neighbors velocity_params velocity_graph
##   velocity_graph_neg
## assays(7): X spliced ... velocity variance_velocity
## rownames(975): ENSMUSG00000038015.6 ENSMUSG00000022501.6 ...
##   ENSMUSG00000049098.4 ENSMUSG00000116409.1
## rowData names(3): velocity_gamma velocity_r2 velocity_genes
## colnames(2325): CCCATACTCCGAAGAG AATCCAGTCATCTGCC ... ATCCACCCACCACCAG
##   ATTGGTGGTTACCGAT
## colData names(7): velocity_self_transition root_cells ...
##   velocity_confidence velocity_confidence_transition
## reducedDimNames(1): X_pca
## mainExpName: NULL
## altExpNames(0):
# Add RNA velocity information to the SingleCellExperiment object
sce$velocity_pseudotime_stochastic <- sce_velo_stochastic$velocity_pseudotime
sce$velocity_length_stochastic <- sce_velo_stochastic$velocity_length
sce$velocity_confidence_stochastic <- sce_velo_stochastic$velocity_confidence
# Project velocity vectors to the UMAP coordinates
embedded_stochastic <- embedVelocity(reducedDim(sce, "UMAP"),
                                     sce_velo_stochastic)
grid_df_stochastic <- gridVectors(reducedDim(sce, "UMAP"),
                                  embedded_stochastic, resolution = 30)

RNA velocity vectors & pseudotime

dittoDimPlot(sce, "velocity_pseudotime_stochastic",
             reduction.use = "UMAP", size = 0.5) +
    geom_segment(
        data = grid_df_stochastic,
        mapping = aes(x = start.1, y = start.2, xend = end.1, yend = end.2),
        arrow = arrow(length = unit(0.05, "inches"), type = "open")
    )

RNA velocity vectors

dittoDimPlot(sce, "velocity_pseudotime_stochastic",
             cells.use = rep(FALSE, ncol(sce)),
             reduction.use = "UMAP", size = 0.5) +
    geom_segment(
        data = grid_df_stochastic,
        mapping = aes(x = start.1, y = start.2, xend = end.1, yend = end.2),
        arrow = arrow(length = unit(0.05, "inches"), type = "open")
    )

RNA velocity pseudotime

dittoDimPlot(sce, "velocity_pseudotime_stochastic",
             reduction.use = "UMAP", size = 0.5)

RNA velocity pseudotime by cell type

dittoPlot(sce, "velocity_pseudotime_stochastic", group.by = "cell_type",
          x.reorder = rev(seq(length(levels(sce$cell_type)))),
          plots = c("ridgeplot"), ridgeplot.lineweight = 0.5,
          legend.show = FALSE)

RNA velocity length

dittoDimPlot(sce, "velocity_length_stochastic",
             reduction.use = "UMAP", size = 0.5)

RNA velocity confidence

dittoDimPlot(sce, "velocity_confidence_stochastic",
             reduction.use = "UMAP", size = 0.5)

Dynamical mode

scVelo

# Set the RNG seed for reproducible results
set.seed(42)
# Run scVelo
sce_velo_dynamical <- scvelo(sce,
                             mode = "dynamical",
                             assay.X = "spliced",
                             assay.spliced = "spliced",
                             assay.unspliced = "unspliced",
                             subset.row = top_hvgs,
                             use.dimred = "PCA")
sce_velo_dynamical
## class: SingleCellExperiment 
## dim: 975 2325 
## metadata(5): neighbors recover_dynamics velocity_params velocity_graph
##   velocity_graph_neg
## assays(10): X spliced ... velocity velocity_u
## rownames(975): ENSMUSG00000038015.6 ENSMUSG00000022501.6 ...
##   ENSMUSG00000049098.4 ENSMUSG00000116409.1
## rowData names(18): fit_r2 fit_alpha ... velocity_genes varm
## colnames(2325): CCCATACTCCGAAGAG AATCCAGTCATCTGCC ... ATCCACCCACCACCAG
##   ATTGGTGGTTACCGAT
## colData names(8): velocity_self_transition root_cells ...
##   velocity_confidence velocity_confidence_transition
## reducedDimNames(1): X_pca
## mainExpName: NULL
## altExpNames(0):
# Add RNA velocity information to the SingleCellExperiment object
sce$velocity_pseudotime_dynamical <- sce_velo_dynamical$velocity_pseudotime
sce$velocity_length_dynamical <- sce_velo_dynamical$velocity_length
sce$velocity_confidence_dynamical <- sce_velo_dynamical$velocity_confidence
# Project velocity vectors to the UMAP coordinates
embedded_dynamical <- embedVelocity(reducedDim(sce, "UMAP"),
                                    sce_velo_dynamical)
grid_df_dynamical <- gridVectors(reducedDim(sce, "UMAP"),
                                 embedded_dynamical, resolution = 30)

RNA velocity vectors & pseudotime

dittoDimPlot(sce, "velocity_pseudotime_dynamical",
             reduction.use = "UMAP", size = 0.5) +
    geom_segment(
        data = grid_df_dynamical,
        mapping = aes(x = start.1, y = start.2, xend = end.1, yend = end.2),
        arrow = arrow(length = unit(0.05, "inches"), type = "open")
    )

RNA velocity vectors

dittoDimPlot(sce, "velocity_pseudotime_dynamical",
             cells.use = rep(FALSE, ncol(sce)),
             reduction.use = "UMAP", size = 0.5) +
    geom_segment(
        data = grid_df_dynamical,
        mapping = aes(x = start.1, y = start.2, xend = end.1, yend = end.2),
        arrow = arrow(length = unit(0.05, "inches"), type = "open")
    )

RNA velocity pseudotime

dittoDimPlot(sce, "velocity_pseudotime_dynamical",
             reduction.use = "UMAP", size = 0.5)

RNA velocity pseudotime by cell type

dittoPlot(sce, "velocity_pseudotime_dynamical", group.by = "cell_type",
          x.reorder = rev(seq(length(levels(sce$cell_type)))),
          plots = c("ridgeplot"), ridgeplot.lineweight = 0.5,
          legend.show = FALSE)

RNA velocity length

dittoDimPlot(sce, "velocity_length_dynamical",
             reduction.use = "UMAP", size = 0.5)

RNA velocity confidence

dittoDimPlot(sce, "velocity_confidence_dynamical",
             reduction.use = "UMAP", size = 0.5)

Session Info

sessionInfo()
## R version 4.1.0 (2021-05-18)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Ubuntu 18.04.5 LTS
## 
## Matrix products: default
## BLAS:   /usr/lib/x86_64-linux-gnu/openblas/libblas.so.3
## LAPACK: /usr/lib/x86_64-linux-gnu/libopenblasp-r0.2.20.so
## 
## locale:
##  [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C              
##  [3] LC_TIME=pl_PL.UTF-8        LC_COLLATE=en_US.UTF-8    
##  [5] LC_MONETARY=pl_PL.UTF-8    LC_MESSAGES=en_US.UTF-8   
##  [7] LC_PAPER=pl_PL.UTF-8       LC_NAME=C                 
##  [9] LC_ADDRESS=C               LC_TELEPHONE=C            
## [11] LC_MEASUREMENT=pl_PL.UTF-8 LC_IDENTIFICATION=C       
## 
## attached base packages:
## [1] parallel  stats4    stats     graphics  grDevices utils     datasets 
## [8] methods   base     
## 
## other attached packages:
##  [1] viridis_0.6.1               viridisLite_0.4.0          
##  [3] dittoSeq_1.4.1              velociraptor_1.2.0         
##  [5] scran_1.20.1                scater_1.20.1              
##  [7] scuttle_1.2.0               scRNAseq_2.6.1             
##  [9] SingleCellExperiment_1.14.1 SummarizedExperiment_1.22.0
## [11] Biobase_2.52.0              GenomicRanges_1.44.0       
## [13] GenomeInfoDb_1.28.1         IRanges_2.26.0             
## [15] S4Vectors_0.30.0            BiocGenerics_0.38.0        
## [17] MatrixGenerics_1.4.0        matrixStats_0.59.0         
## [19] Matrix_1.3-4                knitr_1.33                 
## [21] glue_1.4.2                  forcats_0.5.1              
## [23] stringr_1.4.0               dplyr_1.0.7                
## [25] purrr_0.3.4                 readr_1.4.0                
## [27] tidyr_1.1.3                 tibble_3.1.2               
## [29] ggplot2_3.3.5               tidyverse_1.3.1            
## 
## loaded via a namespace (and not attached):
##   [1] readxl_1.3.1                  backports_1.2.1              
##   [3] AnnotationHub_3.0.1           BiocFileCache_2.0.0          
##   [5] plyr_1.8.6                    igraph_1.2.6                 
##   [7] lazyeval_0.2.2                BiocParallel_1.26.1          
##   [9] digest_0.6.27                 ensembldb_2.16.2             
##  [11] htmltools_0.5.1.1             fansi_0.5.0                  
##  [13] magrittr_2.0.1                memoise_2.0.0                
##  [15] ScaledMatrix_1.0.0            cluster_2.1.2                
##  [17] limma_3.48.1                  Biostrings_2.60.1            
##  [19] modelr_0.1.8                  prettyunits_1.1.1            
##  [21] colorspace_2.0-2              ggrepel_0.9.1                
##  [23] blob_1.2.1                    rvest_1.0.0                  
##  [25] rappdirs_0.3.3                haven_2.4.1                  
##  [27] xfun_0.24                     crayon_1.4.1                 
##  [29] RCurl_1.98-1.3                jsonlite_1.7.2               
##  [31] gtable_0.3.0                  zlibbioc_1.38.0              
##  [33] XVector_0.32.0                DelayedArray_0.18.0          
##  [35] BiocSingular_1.8.1            scales_1.1.1                 
##  [37] pheatmap_1.0.12               edgeR_3.34.0                 
##  [39] DBI_1.1.1                     Rcpp_1.0.7                   
##  [41] xtable_1.8-4                  progress_1.2.2               
##  [43] reticulate_1.20               dqrng_0.3.0                  
##  [45] bit_4.0.4                     rsvd_1.0.5                   
##  [47] metapod_1.0.0                 httr_1.4.2                   
##  [49] RColorBrewer_1.1-2            dir.expiry_1.0.0             
##  [51] ellipsis_0.3.2                farver_2.1.0                 
##  [53] pkgconfig_2.0.3               XML_3.99-0.6                 
##  [55] sass_0.4.0                    dbplyr_2.1.1                 
##  [57] locfit_1.5-9.4                utf8_1.2.1                   
##  [59] labeling_0.4.2                tidyselect_1.1.1             
##  [61] rlang_0.4.11                  later_1.2.0                  
##  [63] AnnotationDbi_1.54.1          munsell_0.5.0                
##  [65] BiocVersion_3.13.1            cellranger_1.1.0             
##  [67] tools_4.1.0                   cachem_1.0.5                 
##  [69] cli_3.0.1                     generics_0.1.0               
##  [71] RSQLite_2.2.7                 ExperimentHub_2.0.0          
##  [73] ggridges_0.5.3                broom_0.7.8                  
##  [75] evaluate_0.14                 fastmap_1.1.0                
##  [77] yaml_2.2.1                    bit64_4.0.5                  
##  [79] fs_1.5.0                      KEGGREST_1.32.0              
##  [81] AnnotationFilter_1.16.0       sparseMatrixStats_1.4.0      
##  [83] mime_0.11                     xml2_1.3.2                   
##  [85] biomaRt_2.48.2                compiler_4.1.0               
##  [87] rstudioapi_0.13               beeswarm_0.4.0               
##  [89] filelock_1.0.2                curl_4.3.2                   
##  [91] png_0.1-7                     interactiveDisplayBase_1.30.0
##  [93] reprex_2.0.0                  statmod_1.4.36               
##  [95] bslib_0.2.5.1                 stringi_1.7.3                
##  [97] highr_0.9                     basilisk.utils_1.4.0         
##  [99] GenomicFeatures_1.44.0        bluster_1.2.1                
## [101] lattice_0.20-44               ProtGenerics_1.24.0          
## [103] vctrs_0.3.8                   pillar_1.6.1                 
## [105] lifecycle_1.0.0               BiocManager_1.30.16          
## [107] jquerylib_0.1.4               BiocNeighbors_1.10.0         
## [109] cowplot_1.1.1                 bitops_1.0-7                 
## [111] irlba_2.3.3                   httpuv_1.6.1                 
## [113] rtracklayer_1.52.0            R6_2.5.0                     
## [115] BiocIO_1.2.0                  promises_1.2.0.1             
## [117] gridExtra_2.3                 vipor_0.4.5                  
## [119] codetools_0.2-18              zellkonverter_1.2.1          
## [121] assertthat_0.2.1              rjson_0.2.20                 
## [123] withr_2.4.2                   GenomicAlignments_1.28.0     
## [125] Rsamtools_2.8.0               GenomeInfoDbData_1.2.6       
## [127] hms_1.1.0                     grid_4.1.0                   
## [129] beachmat_2.8.0                basilisk_1.4.0               
## [131] rmarkdown_2.9                 DelayedMatrixStats_1.14.0    
## [133] shiny_1.6.0                   lubridate_1.7.10             
## [135] ggbeeswarm_0.6.0              restfulr_0.0.13