-
Notifications
You must be signed in to change notification settings - Fork 0
/
numbat_preprocess.R
36 lines (30 loc) · 1.38 KB
/
numbat_preprocess.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
library(Seurat)
library(numbat)
# Rscript /numbat/inst/bin/pileup_and_phase.R \
# --label AH3 \
# --samples AH3 \
# --bams /numbat/data/KMA7/KMA7_possorted_genome_bam.bam \
# --barcodes /numbat/data/KMA7/AH3/myeloid_aml_cells_barcodes.tsv \
# --outdir /numbat/data/KMA7/AH3/output \
# --gmap /Eagle_v2.4.1/tables/genetic_map_hg38_withX.txt.gz \
# --snpvcf /data/genome1K.phase3.SNP_AF5e2.chr1toX.hg38.vcf \
# --paneldir /data/1000G_hg38 \
# --ncores 32
data = Read10X(data.dir = "/numbat/data/KMA7/")
seurat_obj = CreateSeuratObject(counts=data$`Gene Expression`)
counts_matrix = GetAssayData(seurat_obj, slot="counts")
cell_annot = read.csv('/numbat/data/KMA7/AH3/normal_cells_annotations.csv')
ref_internal = aggregate_counts(counts_matrix, cell_annot)
myeloid_aml_cell_barcodes = read.csv('/numbat/data/KMA7/AH3/myeloid_aml_cells_barcodes.tsv', sep='\t', header=FALSE)
counts_matrix_myeloid_aml_cells = counts_matrix[, myeloid_aml_cell_barcodes[,]]
df_allele = read.csv('/numbat/data/KMA7/AH3/output/AH3_allele_counts.tsv.gz', sep='\t')
out = run_numbat(
counts_matrix_myeloid_aml_cells, # gene x cell integer UMI count matrix
ref_internal, # reference expression profile, a gene x cell type normalized expression level matrix
df_allele, # allele dataframe generated by pileup_and_phase script
genome = "hg38",
t = 1e-5,
ncores = 32,
plot = TRUE,
out_dir = '/numbat/data/KMA7/AH3/output'
)