Skip to content

Commit

Permalink
Bug fixes in R/ATAC.R and R/selectBatchHVG.R
Browse files Browse the repository at this point in the history
  • Loading branch information
mvfki committed Dec 5, 2024
1 parent c70c724 commit c074c34
Show file tree
Hide file tree
Showing 4 changed files with 60 additions and 48 deletions.
4 changes: 2 additions & 2 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
Package: rliger
Version: 2.1.0.9002
Date: 2024-11-15
Version: 2.1.0.9003
Date: 2024-12-04
Type: Package
Title: Linked Inference of Genomic Experimental Relationships
Description: Uses an extension of nonnegative matrix factorization to identify shared and dataset-specific factors. See Welch J, Kozareva V, et al (2019) <doi:10.1016/j.cell.2019.05.006>, and Liu J, Gao C, Sodicoff J, et al (2020) <doi:10.1038/s41596-020-0391-8> for more details.
Expand Down
7 changes: 5 additions & 2 deletions NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -13,10 +13,13 @@
- Pseudo-bulk should be easy because we are just aggregating cells.
- Wilcoxon might be a bit harder because ranks are calculated per gene but the H5 sparse data is column majored. Might need to find a fast on-disk transposition method, which would also enhance RcppPlanc performance when running ANLS on H5 data.

## rliger 2.1.0.9002
## rliger 2.1.0.9003

- Added `selectBatchHVG()` which implements another HVG selection strategy, credit to SCIB
- Fixed Wilcoxon rank-sum test bug when using ATAC peak counts
- Fixed multiple problems related to ATAC analysis
- Fixed Wilcoxon rank-sum test bug when using ATAC peak counts
- Fixed gene coordinate parsing bug from BED file
- Optimized peak parsing speed

## rliger 2.1.0

Expand Down
95 changes: 52 additions & 43 deletions R/ATAC.R
Original file line number Diff line number Diff line change
Expand Up @@ -420,8 +420,11 @@ exportInteractTrack <- function(
colClasses = c("character", "integer", "integer",
"character", "NULL", "NULL")
)
genesCoords <- genesCoords[stats::complete.cases(genesCoords$V4), ]
rownames(genesCoords) <- genesCoords[, 4]
mask <- stats::complete.cases(genesCoords$V4) & genesCoords$V4 != ""
genesCoords <- genesCoords[mask, , drop = FALSE]
uniqnames <- make.unique(genesCoords$V4)
rownames(genesCoords) <- uniqnames

# split peak names into chrom and coordinates
regions <- .splitPeakRegion(rownames(corrMat))

Expand All @@ -440,41 +443,45 @@ exportInteractTrack <- function(
'visibility=full')
write(trackDoc, file = outputPath)
for (gene in useGenes) {
peaksSel <- corrMat[, gene] != 0
track <- data.frame(
chrom = regions$chrs[peaksSel],
chromStart = regions$chrsStart[peaksSel],
chromEnd = regions$chrsEnd[peaksSel],
name = paste0(gene, "/", rownames(corrMat)[peaksSel]),
score = 0,
value = as.numeric(corrMat[peaksSel, gene]),
exp = ".",
color = 5,
sourceChrom = regions$chrs[peaksSel],
sourceStart = regions$chrsStart[peaksSel],
sourceEnd = regions$chrsStart[peaksSel] + 1,
sourceName = ".",
sourceStrand = ".",
targetChrom = genesCoords[gene, 1],
targetStart = genesCoords[gene, 2],
targetEnd = genesCoords[gene, 2] + 1,
targetName = gene,
targetStrand = "."
)
utils::write.table(
track,
file = outputPath,
append = TRUE,
quote = FALSE,
sep = "\t",
eol = "\n",
na = "NA",
dec = ".",
row.names = FALSE,
col.names = FALSE,
qmethod = c("escape", "double"),
fileEncoding = ""
)
coordSel <- which(genesCoords$V4 == gene)
for (geneUniqIdx in coordSel) {
peaksSel <- corrMat[, gene] != 0
track <- data.frame(
chrom = regions$chrs[peaksSel],
chromStart = regions$chrsStart[peaksSel],
chromEnd = regions$chrsEnd[peaksSel],
name = paste0(gene, "/", rownames(corrMat)[peaksSel]),
score = 0,
value = as.numeric(corrMat[peaksSel, gene]),
exp = ".",
color = 5,
sourceChrom = regions$chrs[peaksSel],
sourceStart = regions$chrsStart[peaksSel],
sourceEnd = regions$chrsStart[peaksSel] + 1,
sourceName = ".",
sourceStrand = ".",
targetChrom = genesCoords[geneUniqIdx, 1],
targetStart = genesCoords[geneUniqIdx, 2],
targetEnd = genesCoords[geneUniqIdx, 2] + 1,
targetName = gene,
targetStrand = "."
)
utils::write.table(
track,
file = outputPath,
append = TRUE,
quote = FALSE,
sep = "\t",
eol = "\n",
na = "NA",
dec = ".",
row.names = FALSE,
col.names = FALSE,
qmethod = c("escape", "double"),
fileEncoding = ""
)
}

}
cli::cli_alert_success("Result written at: {.file {outputPath}}")
invisible(NULL)
Expand Down Expand Up @@ -516,10 +523,12 @@ makeInteractTrack <- function(

.splitPeakRegion <- function(peakNames) {
peakNames <- strsplit(peakNames, "[:-]")
chrs <- Reduce(append, lapply(peakNames, function(peak) peak[1]))
chrsStart <- Reduce(append, lapply(peakNames, function(peak) peak[2]))
chrsEnd <- Reduce(append, lapply(peakNames, function(peak) peak[3]))
list(chrs = chrs,
chrsStart = as.numeric(chrsStart),
chrsEnd = as.numeric(chrsEnd))
# chrs <- Reduce(append, lapply(peakNames, function(peak) peak[1]))
# chrsStart <- Reduce(append, lapply(peakNames, function(peak) peak[2]))
# chrsEnd <- Reduce(append, lapply(peakNames, function(peak) peak[3]))
list(
chrs = sapply(peakNames, `[`, i = 1),
chrsStart = as.numeric(sapply(peakNames, `[`, i = 2)),
chrsEnd = as.numeric(sapply(peakNames, `[`, i = 3))
)
}
2 changes: 1 addition & 1 deletion R/selectBatchHVG.R
Original file line number Diff line number Diff line change
Expand Up @@ -107,7 +107,7 @@ selectBatchHVG.liger <- function(
"Selected {nrow(hvgDFSub)} gene{?s} that {?is/are} highly variable in {n_batch_look} batch{?es}"
)
}
nSelected <- nrow(hvgDFSub)
nSelected <- nSelected + nrow(hvgDFSub)
nNeeded <- nGenes - nSelected
n_batch_look <- n_batch_look - 1
} else {
Expand Down

0 comments on commit c074c34

Please sign in to comment.