Skip to content

Commit

Permalink
Add examples with real data.
Browse files Browse the repository at this point in the history
  • Loading branch information
emunozdc authored and emunozdc committed Jun 21, 2024
1 parent 0d07345 commit 71840c3
Show file tree
Hide file tree
Showing 3 changed files with 73 additions and 0 deletions.
Binary file added examples/fig_4.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
14 changes: 14 additions & 0 deletions examples/pr_fig_4.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
import pyranges as pr
import pyranges_plot as prp

prp.set_engine("plotly")

## Sars cov 2
# http://ftp.ensemblgenomes.org/pub/viruses/gtf/sars_cov_2/Sars_cov_2.ASM985889v3.101.gtf.gz
p = pr.read_gtf(
"../../performance/sars_cov_2/Sars_cov_2.ASM985889v3.101.gtf"
) ## modify if needed

p_cds = p[p["Feature"] == "CDS"]

prp.plot(p_cds, id_col="gene_name", text=False, legend=True, to_file="fig_4.png")
59 changes: 59 additions & 0 deletions examples/time_test.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
import pyranges as pr
import pyranges_plot as prp
import time
from memory_profiler import memory_usage


prp.set_engine("plt")
prp.set_warnings(False)

# https://ftp.ensembl.org/pub/release-112/gtf/drosophila_melanogaster/Drosophila_melanogaster.BDGP6.46.112.gtf.gz
chrom = "X"
feat = "CDS"

start_loading_t = time.time()
start_loading_m = memory_usage()[0]

path = "drosophila/Drosophila_melanogaster.BDGP6.46.112.gtf" ## modify if needed
d = pr.read_gtf(path)

end_loading_t = time.time()
end_loading_m = memory_usage()[0]

# Get CDS
d_cds = d[d["Feature"] == feat]

# Check number of genes in each chrom
# d_cds.groupby("Chromosome").apply(
# lambda x: print(
# "CHROM:"
# + str(x["Chromosome"].iloc[0])
# + ": "
# + str(len(x.groupby("gene_id")))
# )
# )

# Chromosome subset
d_cds_chrom = d_cds[d_cds["Chromosome"] == chrom]


end_subset_t = time.time()
end_subset_m = memory_usage()[0]

prp.plot(d_cds_chrom, id_col="gene_id", to_file="_tmp.png", max_shown=3000)

end_plot_t = time.time()
end_plot_m = memory_usage()[0]

print("==============================================================")
print(f"Drosophila melanogaster | Chromosome {chrom} | Feature {feat}\n")

print("Loading time: \t" + str(end_loading_t - start_loading_t) + " (s)")
print("Loading memory:\t" + str(end_loading_m - start_loading_m) + " (MiB)")
print()
print("Subsetting time: \t" + str(end_subset_t - end_loading_t) + " (s)")
print("Subsetting memory: \t" + str(end_subset_m - end_loading_m) + " (MiB)")
print()
print("Plotting and exporting time: \t" + str(end_plot_t - end_subset_t) + " (s)")
print("Plotting and exporting memory: \t" + str(end_plot_m - end_subset_m) + " (MiB)")
print("\n\n\n")

0 comments on commit 71840c3

Please sign in to comment.