Python ‘virtual environment’ to integrate scVI with Seurat

Multiomics
Biology
A short
Author

Rishi Das Roy

Published

October 13, 2025

This post provides alternative of the conda environment to use scVI-tools through Seurat wrappers. For the R users (and not expert in python), SeuratWrappers provides scVIIntegration method which is really useful and explained here. However, this depends on conda environment which may not be available on every platform, like some HPCs I use.

Here I discuss how to use python’s virtual environment and its own challenges. The hurdles are

library(Seurat, quietly = TRUE, verbose = FALSE)
library(SeuratData, quietly = TRUE, verbose = FALSE)
library(SeuratWrappers, quietly = TRUE, verbose = FALSE)
library(ggplot2, quietly = TRUE, verbose = FALSE)
library(patchwork, quietly = TRUE, verbose = FALSE)
options(future.globals.maxSize = 1e9)
library(reticulate, quietly = TRUE, verbose = FALSE)

1 Installation

Declare a path to install the python packages and use following commands of reticulate package. The downloaded files are could be very big and it is useful to declare the cache-dir for smooth installation

install_dirPath <- "/your/path_to/install/"
 
reticulate::virtualenv_create(envname = install_dirPath, force = TRUE,
                              version = ">=3.11", packages = c("numpy<2"),
                              pip_options = c("--cache-dir=/your/path_to/tmp/"))
reticulate::virtualenv_install(envname = install_dirPath, 
                               packages = c("scvi-tools"), 
                               pip_version = FALSE,
                               pip_options = c("--cache-dir=/your/path_to/tmp/"))

Verify the numpy version?

reticulate::use_virtualenv(virtualenv=install_dirPath)
config <- py_config()
config$numpy$version
[1] '1.26.4'

2 Rewrite the scVIIntegration

The environment option is hard coded in the original function. Therefore, it needs to be redefined as below. Can you identify the line numbers to see the difference?

venv_scVIIntegration <- function (object, features = NULL, layers = "counts", venv_env = NULL, 
  new.reduction = "integrated.dr", ndims = 30, nlayers = 2, 
  gene_likelihood = "nb", max_epochs = NULL, ...) 
{
  reticulate::use_virtualenv(virtualenv = venv_env, required = TRUE)
  sc <- reticulate::import("scanpy", convert = FALSE)
  scvi <- reticulate::import("scvi", convert = FALSE)
  anndata <- reticulate::import("anndata", convert = FALSE)
  scipy <- reticulate::import("scipy", convert = FALSE)
  if (is.null(max_epochs)) {
    max_epochs <- reticulate::r_to_py(max_epochs)
  }
  else {
    max_epochs <- as.integer(max_epochs)
  }
  batches <- .FindBatches(object, layers = layers)
  object <- JoinLayers(object = object, layers = "counts")
  adata <- sc$AnnData(X = scipy$sparse$csr_matrix(Matrix::t(LayerData(object, 
    layer = "counts")[features, ])), obs = batches, var = object[[]][features, 
    ])
  scvi$model$SCVI$setup_anndata(adata, batch_key = "batch")
  model <- scvi$model$SCVI(adata = adata, n_latent = as.integer(x = ndims), 
    n_layers = as.integer(x = nlayers), gene_likelihood = gene_likelihood)
  model$train(max_epochs = max_epochs)
  latent <- model$get_latent_representation()
  latent <- as.matrix(latent)
  rownames(latent) <- reticulate::py_to_r(adata$obs$index$values)
  colnames(latent) <- paste0(new.reduction, "_", 1:ncol(latent))
  suppressWarnings(latent.dr <- CreateDimReducObject(embeddings = latent, 
    key = new.reduction))
  output.list <- list(latent.dr)
  names(output.list) <- new.reduction
  return(output.list)
}

Additionally, we have to make this function .FindBatches which is not exported by the SeuratWrappers for external uses.

Code
.FindBatches <- function(object, layers) {
  # if an `SCTAssay` is passed in it's expected that the transformation
  # was run on each batch individually and then merged so we can use
  # the model identifier to split our batches
  if (inherits(object, what = "SCTAssay")) {
    # build an empty data.frame indexed
    # on the cell identifiers from `object`
    batch.df <- SeuratObject::EmptyDF(n = ncol(object))
    row.names(batch.df) <- Cells(object)
    # for each
    for (sct.model in levels(object)) {
      cell.identifiers <- Cells(object, layer = sct.model)
      batch.df[cell.identifiers, "batch"] <- sct.model
    }
    # otherwise batches can be split using `object`'s layers
  } else {
    # build a LogMap indicating which layer each cell is from
    layer.masks <- slot(object, name = "cells")[, layers]
    # get a named vector mapping each cell to its respective layer
    layer.map <- labels(
      layer.masks,
      values = Cells(object, layer = layers)
    )
    # wrap the vector up in a data.frame
    batch.df <- as.data.frame(layer.map)
    names(batch.df) <- "batch"
  }

  return(batch.df)
}

3 Reproduce

The example given in the documentation of scVIIntegration.

# load in the pbmc systematic comparative analysis dataset
obj <- LoadData("pbmcsca")
obj[["RNA"]] <- split(obj[["RNA"]], f = obj$Method)
obj <- NormalizeData(obj)
obj <- FindVariableFeatures(obj)
obj <- ScaleData(obj)
obj <- RunPCA(obj)
obj
An object of class Seurat 
33694 features across 31021 samples within 1 assay 
Active assay: RNA (33694 features, 2000 variable features)
 19 layers present: counts.Smart-seq2, counts.CEL-Seq2, counts.10x_Chromium_v2_A, counts.10x_Chromium_v2_B, counts.10x_Chromium_v3, counts.Drop-seq, counts.Seq-Well, counts.inDrops, counts.10x_Chromium_v2, data.Smart-seq2, data.CEL-Seq2, data.10x_Chromium_v2_A, data.10x_Chromium_v2_B, data.10x_Chromium_v3, data.Drop-seq, data.Seq-Well, data.inDrops, data.10x_Chromium_v2, scale.data
 1 dimensional reduction calculated: pca
obj <- IntegrateLayers(
  object = obj, method = venv_scVIIntegration,
  new.reduction = "integrated.scvi",
  venv_env = install_dirPath,
  orig.reduction = "pca",  verbose = FALSE
)

Training:   0%|          | 0/258 [00:00<?, ?it/s]
Epoch 1/258:   0%|          | 0/258 [00:00<?, ?it/s]
obj <- FindNeighbors(obj, reduction = "integrated.scvi", dims = 1:30, verbose = FALSE)
obj <- FindClusters(obj, resolution = 2, cluster.name = "scvi_clusters", verbose = FALSE)

obj <- RunUMAP(obj, reduction = "integrated.scvi", dims = 1:30, 
               reduction.name = "umap.scvi", verbose = FALSE)
DimPlot(
  obj,
  reduction = "umap.scvi",
  group.by = c("Method", "predicted.celltype.l2", "scvi_clusters"),
  combine = TRUE, label.size = 2
)

sessionInfo
sessionInfo()
R version 4.4.0 (2024-04-24)
Platform: x86_64-pc-linux-gnu
Running under: Rocky Linux 8.9 (Green Obsidian)

Matrix products: default
BLAS/LAPACK: /opt/intel/oneapi/mkl/2024.1/lib/libmkl_gf_lp64.so.2;  LAPACK version 3.11.0

locale:
 [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C              
 [3] LC_TIME=en_US.UTF-8        LC_COLLATE=en_US.UTF-8    
 [5] LC_MONETARY=en_US.UTF-8    LC_MESSAGES=en_US.UTF-8   
 [7] LC_PAPER=en_US.UTF-8       LC_NAME=C                 
 [9] LC_ADDRESS=C               LC_TELEPHONE=C            
[11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C       

time zone: Europe/Helsinki
tzcode source: system (glibc)

attached base packages:
[1] stats     graphics  grDevices utils     datasets  methods   base     

other attached packages:
 [1] reticulate_1.36.1        patchwork_1.2.0          ggplot2_3.5.1           
 [4] SeuratWrappers_0.3.5     pbmcsca.SeuratData_3.0.0 pbmcref.SeuratData_1.0.0
 [7] SeuratData_0.2.2.9001    Seurat_5.1.0             SeuratObject_5.0.2      
[10] sp_2.1-4                

loaded via a namespace (and not attached):
  [1] RColorBrewer_1.1-3     rstudioapi_0.16.0      jsonlite_1.8.8        
  [4] magrittr_2.0.3         spatstat.utils_3.0-4   farver_2.1.2          
  [7] rmarkdown_2.27         vctrs_0.6.5            ROCR_1.0-11           
 [10] spatstat.explore_3.2-7 htmltools_0.5.8.1      sctransform_0.4.1     
 [13] parallelly_1.37.1      KernSmooth_2.23-22     htmlwidgets_1.6.4     
 [16] ica_1.0-3              plyr_1.8.9             plotly_4.10.4         
 [19] zoo_1.8-12             igraph_2.0.3           mime_0.12             
 [22] lifecycle_1.0.4        pkgconfig_2.0.3        rsvd_1.0.5            
 [25] Matrix_1.7-0           R6_2.5.1               fastmap_1.2.0         
 [28] fitdistrplus_1.1-11    future_1.33.2          shiny_1.8.1.1         
 [31] digest_0.6.35          colorspace_2.1-0       tensor_1.5            
 [34] RSpectra_0.16-1        irlba_2.3.5.1          labeling_0.4.3        
 [37] unixtools_0.1-1        progressr_0.14.0       fansi_1.0.6           
 [40] spatstat.sparse_3.0-3  httr_1.4.7             polyclip_1.10-6       
 [43] abind_1.4-5            compiler_4.4.0         remotes_2.5.0         
 [46] withr_3.0.0            fastDummies_1.7.3      R.utils_2.12.3        
 [49] MASS_7.3-60.2          rappdirs_0.3.3         tools_4.4.0           
 [52] lmtest_0.9-40          httpuv_1.6.15          future.apply_1.11.2   
 [55] goftest_1.2-3          R.oo_1.26.0            glue_1.7.0            
 [58] nlme_3.1-164           promises_1.3.0         grid_4.4.0            
 [61] Rtsne_0.17             cluster_2.1.6          reshape2_1.4.4        
 [64] generics_0.1.3         gtable_0.3.5           spatstat.data_3.0-4   
 [67] R.methodsS3_1.8.2      tidyr_1.3.1            data.table_1.15.4     
 [70] utf8_1.2.4             spatstat.geom_3.2-9    RcppAnnoy_0.0.22      
 [73] ggrepel_0.9.5          RANN_2.6.1             pillar_1.9.0          
 [76] stringr_1.5.1          spam_2.10-0            RcppHNSW_0.6.0        
 [79] later_1.3.2            splines_4.4.0          dplyr_1.1.4           
 [82] lattice_0.22-6         survival_3.5-8         deldir_2.0-4          
 [85] tidyselect_1.2.1       miniUI_0.1.1.1         pbapply_1.7-2         
 [88] knitr_1.46             gridExtra_2.3          scattermore_1.2       
 [91] xfun_0.44              matrixStats_1.3.0      stringi_1.8.4         
 [94] lazyeval_0.2.2         yaml_2.3.8             evaluate_0.23         
 [97] codetools_0.2-20       tibble_3.2.1           BiocManager_1.30.23   
[100] cli_3.6.2              uwot_0.2.2             xtable_1.8-4          
[103] munsell_0.5.1          Rcpp_1.0.12            globals_0.16.3        
[106] spatstat.random_3.2-3  png_0.1-8              parallel_4.4.0        
[109] dotCall64_1.1-1        listenv_0.9.1          viridisLite_0.4.2     
[112] scales_1.3.0           ggridges_0.5.6         leiden_0.4.3.1        
[115] purrr_1.0.2            crayon_1.5.2           rlang_1.1.3           
[118] cowplot_1.1.3         
Back to top