diff --git a/DESCRIPTION b/DESCRIPTION
index 5fb3f22..5e36344 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -1,6 +1,6 @@
 Package: rliger
-Version: 2.1.0.9001
-Date: 2024-11-13
+Version: 2.1.0.9002
+Date: 2024-11-15
 Type: Package
 Title: Linked Inference of Genomic Experimental Relationships
 Description: Uses an extension of nonnegative matrix factorization to identify shared and dataset-specific factors. See Welch J, Kozareva V, et al (2019) <doi:10.1016/j.cell.2019.05.006>, and Liu J, Gao C, Sodicoff J, et al (2020) <doi:10.1038/s41596-020-0391-8> for more details.
diff --git a/NAMESPACE b/NAMESPACE
index f6197d9..1e952ae 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -51,6 +51,9 @@ S3method(scaleNotCenter,dgCMatrix)
 S3method(scaleNotCenter,liger)
 S3method(scaleNotCenter,ligerDataset)
 S3method(scaleNotCenter,ligerMethDataset)
+S3method(selectBatchHVG,dgCMatrix)
+S3method(selectBatchHVG,liger)
+S3method(selectBatchHVG,ligerDataset)
 S3method(selectGenes,Seurat)
 S3method(selectGenes,liger)
 S3method(writeH5,default)
@@ -212,6 +215,7 @@ export(runWilcoxon)
 export(scaleData)
 export(scaleNotCenter)
 export(scaleUnsharedData)
+export(selectBatchHVG)
 export(selectGenes)
 export(selectGenesVST)
 export(seuratToLiger)
diff --git a/NEWS.md b/NEWS.md
index ac1d5d0..daa94ff 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -13,8 +13,9 @@
   - Pseudo-bulk should be easy because we are just aggregating cells.
   - Wilcoxon might be a bit harder because ranks are calculated per gene but the H5 sparse data is column majored. Might need to find a fast on-disk transposition method, which would also enhance RcppPlanc performance when running ANLS on H5 data.
 
-## rliger 2.1.0.9001
+## rliger 2.1.0.9002
 
+- Added `selectBatchHVG()` which implements another HVG selection strategy, credit to SCIB
 - Fixed Wilcoxon rank-sum test bug when using ATAC peak counts
 
 ## rliger 2.1.0
diff --git a/R/selectBatchHVG.R b/R/selectBatchHVG.R
new file mode 100644
index 0000000..20b91e6
--- /dev/null
+++ b/R/selectBatchHVG.R
@@ -0,0 +1,226 @@
+#' `r lifecycle::badge("experimental")` Batch-aware highly variable gene selection
+#' @rdname selectBatchHVG
+#' @description
+#' Method to select HVGs based on mean dispersions of genes that are highly
+#' variable genes in all batches. Using a the top target_genes per batch by
+#' average normalize dispersion. If target genes still hasn't been reached,
+#' then HVGs in all but one batches are used to fill up. This is continued
+#' until HVGs in a single batch are considered.
+#'
+#' This is an *rliger* implementation of the method originally published in
+#' [SCIB](https://scib.readthedocs.io/en/latest/api/scib.preprocessing.hvg_batch.html).
+#' We found the potential that it can improve integration under some
+#' circumstances, and is currently testing it.
+#'
+#' This function currently only works for shared features across all datasets.
+#' For selection from only part of the datasets and selection for
+#' dataset-specific unshared features, please use \code{\link{selectGenes}()}.
+#' @references
+#' Luecken, M.D., Büttner, M., Chaichoompu, K. et al. (2022), Benchmarking
+#' atlas-level data integration in single-cell genomics. *Nat Methods*, 19,
+#' 41–50. https://doi.org/10.1038/s41592-021-01336-8.
+#' @seealso [selectGenes()]
+#' @param object A \code{\linkS4class{liger}} object,
+#' \code{\linkS4class{ligerDataset}} object or a sparse/dense matrix. The liger
+#' objects must have raw counts available. A direct matrix input is preferably
+#' log-1p transformed from CPM normalized counts in cell per column orientation.
+#' @param nGenes Integer number of target genes to select. Default \code{2000}.
+#' @param features For ligerDataset method, the feature subset to limit the
+#' selection to, due to liger downstream requires non-zero features to be
+#' considered. Default \code{NULL} selects from all features.
+#' @param verbose Logical. Whether to show a progress bar. Default
+#' \code{getOption("ligerVerbose")} or \code{TRUE} if users have not set.
+#' @param returnStats Logical, for dgCMatrix-method, whether to return a data
+#' frame of statistics for all features, or by default \code{FALSE} just return
+#' a character vector of selected features.
+#' @param ... Arguments passed to S3 methods.
+#' @return
+#' \itemize{
+#' \item{liger-method: Returns the input liger object with the selected genes
+#' updated in \code{varFeatures} slot, which can be accessed with
+#' \code{varFeatures(object)}. Additionally, the statistics are updated in
+#' the \code{featureMeta} slot of each ligerDataset object within the
+#' \code{datasets} slot of the \code{object}.}
+#' \item{ligerDataset-method: Returns the input ligerDataset object with the
+#' statistics updated in the \code{featureMeta} slot.}
+#' \item{dgCMatrix-method: By default returns a character vector of selected
+#' variable features. If \code{returnStats = TRUE}, returns a data.frame of the
+#' statistics.}
+#' }
+#' @export
+#' @examples
+#' pbmc <- selectBatchHVG(pbmc, nGenes = 10)
+#' varFeatures(pbmc)
+selectBatchHVG <- function(
+        object,
+        ...
+) {
+    UseMethod("selectBatchHVG", object)
+}
+
+#' @export
+#' @method selectBatchHVG liger
+#' @rdname selectBatchHVG
+selectBatchHVG.liger <- function(
+        object,
+        nGenes = 2000,
+        verbose = getOption("ligerVerbose", TRUE),
+        ...
+) {
+    object <- recordCommand(object, ...)
+    sharedFeature <- Reduce(intersect, lapply(datasets(object), rownames))
+    hvgDFList <- list()
+    for (d in names(object)) {
+        if (isTRUE(verbose))
+            cli::cli_alert_info("Selecting variable features for dataset {.val {d}}")
+        ld <- dataset(object, d)
+        ld <- selectBatchHVG(ld, nGenes = nGenes, features = sharedFeature,
+                             verbose = verbose, ...)
+        object@datasets[[d]] <- ld
+        fm <- featureMeta(ld)[sharedFeature, , drop = FALSE]
+        fm$dataset <- d
+        fm$feature <- rownames(fm)
+        hvgDFList[[d]] <- as.data.frame(fm)
+    }
+    hvgDFAgg <- Reduce(rbind, hvgDFList)
+    hvgDFAgg <- hvgDFAgg %>%
+        dplyr::mutate(feature = factor(.data[['feature']], levels = sharedFeature)) %>%
+        dplyr::group_by(.data[['feature']]) %>%
+        dplyr::summarise(
+            means = mean(.data[['means']]),
+            dispersion = mean(.data[['dispersion']]),
+            dispersion_norm = mean(.data[['dispersion_norm']]),
+            n_batch = sum(.data[['highly_variable']])
+        )
+    res <- character()
+    enough <- FALSE
+    nSelected <- 0
+    nNeeded <- nGenes
+    n_batch_look <- length(object)
+    while (!enough && n_batch_look > 0) {
+        hvgDFSub <- hvgDFAgg %>%
+            dplyr::filter(.data[['n_batch']] == n_batch_look) %>%
+            dplyr::arrange(-.data[['dispersion_norm']])
+        if (nrow(hvgDFSub) < nNeeded) {
+            if (isTRUE(verbose)) {
+                cli::cli_alert_info(
+                    "Selected {nrow(hvgDFSub)} gene{?s} that {?is/are} highly variable in {n_batch_look} batch{?es}"
+                )
+            }
+            nSelected <- nrow(hvgDFSub)
+            nNeeded <- nGenes - nSelected
+            n_batch_look <- n_batch_look - 1
+        } else {
+            if (isTRUE(verbose)) {
+                cli::cli_alert_info(
+                    "Selected {nNeeded} gene{?s} that {?is/are} highly variable in {n_batch_look} batch{?es}"
+                )
+            }
+            hvgDFSub <- hvgDFSub %>% dplyr::slice_head(n = nNeeded)
+            enough <- TRUE
+        }
+        res <- c(res, as.character(hvgDFSub$feature))
+    }
+    if (isTRUE(verbose)) {
+        if (!enough) {
+            cli::cli_alert_warning("Only {length(res)} gene{?s} {?is/are} selected while {nGenes} {?is/are} requested.")
+        } else {
+            cli::cli_alert_success("Totally {length(res)} gene{?s} {?is/are} selected.")
+        }
+    }
+    varFeatures(object) <- res
+    return(object)
+}
+
+#' @export
+#' @method selectBatchHVG ligerDataset
+#' @rdname selectBatchHVG
+selectBatchHVG.ligerDataset <- function(
+        object,
+        nGenes = 2000,
+        features = NULL,
+        verbose = getOption("ligerVerbose", TRUE),
+        ...
+) {
+    features <- .idxCheck(object, features, orient = "feature")
+    mat <- rawData(object)
+    mat <- normalize(mat, log = TRUE, scaleFactor = 1e6)
+    fm <- featureMeta(object)
+    idx <- rep(FALSE, nrow(object))
+    idx[features] <- TRUE
+    idx <- idx & (fm$nCell > 0)
+    mat <- mat[idx, , drop = FALSE]
+    stats <- selectBatchHVG(mat, nGenes = nGenes, verbose = verbose,
+                            returnStats = TRUE, ...)
+    fm$means <- 0
+    fm$dispersion <- 0
+    fm$dispersion_norm <- 0
+    fm$highly_variable <- FALSE
+    fm[stats$feature, "means"] <- stats$means
+    fm[stats$feature, "dispersion"] <- stats$dispersion
+    fm[stats$feature, "dispersion_norm"] <- stats$dispersion_norm
+    fm[stats$feature, "highly_variable"] <- stats$highly_variable
+    featureMeta(object) <- fm
+    return(object)
+}
+
+#' @export
+#' @method selectBatchHVG dgCMatrix
+#' @rdname selectBatchHVG
+selectBatchHVG.dgCMatrix <- function(
+        object,
+        nGenes = 2000,
+        returnStats = FALSE,
+        verbose = getOption("ligerVerbose", TRUE),
+        ...
+) {
+    # Get mean and var
+    means <- Matrix::rowMeans(object)
+    vars <- rowVars_sparse_rcpp(object, means)
+    stats <- .selectBatchHVG.by.metric(
+        feature = rownames(object),
+        means = means,
+        vars = vars,
+        nGenes = nGenes,
+        verbose = verbose
+    )
+    if (isTRUE(returnStats)) {
+        return(stats)
+    } else {
+        stats %>%
+            dplyr::arrange(-.data[['dispersion_norm']]) %>%
+            dplyr::filter(.data[['highly_variable']]) %>%
+            dplyr::pull(.data[['feature']])
+    }
+}
+
+.selectBatchHVG.by.metric <- function(
+        feature,
+        means,
+        vars,
+        nGenes,
+        verbose
+) {
+    means[means == 0] <- 1e-12
+    dispersion <- vars / means
+    df <- data.frame(
+        feature = feature,
+        means = means,
+        dispersion = dispersion
+    )
+    bins <- c(-Inf, stats::quantile(means, seq(0.1, 1, 0.05)), Inf)
+    df$mean_bin <- cut(means, breaks = bins)
+    df <- df %>%
+        dplyr::group_by(.data[['mean_bin']]) %>%
+        dplyr::mutate(
+            dispersion_norm = (.data[['dispersion']] - stats::median(.data[['dispersion']]))/.mad(.data[['dispersion']])
+        )
+    if (nGenes > nrow(df)) nGenes <- nrow(df)
+    df$highly_variable <- FALSE
+    df$highly_variable[order(df$dispersion_norm, decreasing = TRUE)[seq(nGenes)]] <- TRUE
+    df
+}
+
+.mad <- function(x) {
+    stats::median(abs(x - stats::median(x)))/0.6744897501960817
+}
diff --git a/man/selectBatchHVG.Rd b/man/selectBatchHVG.Rd
new file mode 100644
index 0000000..aa0f350
--- /dev/null
+++ b/man/selectBatchHVG.Rd
@@ -0,0 +1,97 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/selectBatchHVG.R
+\name{selectBatchHVG}
+\alias{selectBatchHVG}
+\alias{selectBatchHVG.liger}
+\alias{selectBatchHVG.ligerDataset}
+\alias{selectBatchHVG.dgCMatrix}
+\title{\ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#experimental}{\figure{lifecycle-experimental.svg}{options: alt='[Experimental]'}}}{\strong{[Experimental]}} Batch-aware highly variable gene selection}
+\usage{
+selectBatchHVG(object, ...)
+
+\method{selectBatchHVG}{liger}(
+  object,
+  nGenes = 2000,
+  verbose = getOption("ligerVerbose", TRUE),
+  ...
+)
+
+\method{selectBatchHVG}{ligerDataset}(
+  object,
+  nGenes = 2000,
+  features = NULL,
+  verbose = getOption("ligerVerbose", TRUE),
+  ...
+)
+
+\method{selectBatchHVG}{dgCMatrix}(
+  object,
+  nGenes = 2000,
+  returnStats = FALSE,
+  verbose = getOption("ligerVerbose", TRUE),
+  ...
+)
+}
+\arguments{
+\item{object}{A \code{\linkS4class{liger}} object,
+\code{\linkS4class{ligerDataset}} object or a sparse/dense matrix. The liger
+objects must have raw counts available. A direct matrix input is preferably
+log-1p transformed from CPM normalized counts in cell per column orientation.}
+
+\item{...}{Arguments passed to S3 methods.}
+
+\item{nGenes}{Integer number of target genes to select. Default \code{2000}.}
+
+\item{verbose}{Logical. Whether to show a progress bar. Default
+\code{getOption("ligerVerbose")} or \code{TRUE} if users have not set.}
+
+\item{features}{For ligerDataset method, the feature subset to limit the
+selection to, due to liger downstream requires non-zero features to be
+considered. Default \code{NULL} selects from all features.}
+
+\item{returnStats}{Logical, for dgCMatrix-method, whether to return a data
+frame of statistics for all features, or by default \code{FALSE} just return
+a character vector of selected features.}
+}
+\value{
+\itemize{
+\item{liger-method: Returns the input liger object with the selected genes
+updated in \code{varFeatures} slot, which can be accessed with
+\code{varFeatures(object)}. Additionally, the statistics are updated in
+the \code{featureMeta} slot of each ligerDataset object within the
+\code{datasets} slot of the \code{object}.}
+\item{ligerDataset-method: Returns the input ligerDataset object with the
+statistics updated in the \code{featureMeta} slot.}
+\item{dgCMatrix-method: By default returns a character vector of selected
+variable features. If \code{returnStats = TRUE}, returns a data.frame of the
+statistics.}
+}
+}
+\description{
+Method to select HVGs based on mean dispersions of genes that are highly
+variable genes in all batches. Using a the top target_genes per batch by
+average normalize dispersion. If target genes still hasn't been reached,
+then HVGs in all but one batches are used to fill up. This is continued
+until HVGs in a single batch are considered.
+
+This is an \emph{rliger} implementation of the method originally published in
+\href{https://scib.readthedocs.io/en/latest/api/scib.preprocessing.hvg_batch.html}{SCIB}.
+We found the potential that it can improve integration under some
+circumstances, and is currently testing it.
+
+This function currently only works for shared features across all datasets.
+For selection from only part of the datasets and selection for
+dataset-specific unshared features, please use \code{\link{selectGenes}()}.
+}
+\examples{
+pbmc <- selectBatchHVG(pbmc, nGenes = 10)
+varFeatures(pbmc)
+}
+\references{
+Luecken, M.D., Büttner, M., Chaichoompu, K. et al. (2022), Benchmarking
+atlas-level data integration in single-cell genomics. \emph{Nat Methods}, 19,
+41–50. https://doi.org/10.1038/s41592-021-01336-8.
+}
+\seealso{
+\code{\link[=selectGenes]{selectGenes()}}
+}
diff --git a/tests/testthat/test_preprocessing.R b/tests/testthat/test_preprocessing.R
index f861156..b64f166 100644
--- a/tests/testthat/test_preprocessing.R
+++ b/tests/testthat/test_preprocessing.R
@@ -242,6 +242,18 @@ test_that("selectGenes - on disk", {
     )
 })
 
+test_that("selectBatchHVG", {
+    expect_no_error(pbmc <- selectBatchHVG(pbmc, nGenes = 100))
+    expect_length(varFeatures(pbmc), 100)
+    expect_message(pbmc <- selectBatchHVG(pbmc, nGenes = 300),
+                   "Only 249 genes")
+    ctrl.raw <- rawData(pbmc, "ctrl")
+    sel1 <- selectBatchHVG(ctrl.raw, nGenes = 100)
+    expect_length(sel1, 100)
+    sel1 <- selectBatchHVG(ctrl.raw, nGenes = 100, returnStats = TRUE)
+    expect_true(any(is.nan(sel1$dispersion_norm)))
+})
+
 #%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 # Scaling
 #%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%