Skip to content

Commit

Permalink
add load functino for coessentiality
Browse files Browse the repository at this point in the history
  • Loading branch information
abearab committed Jun 6, 2024
1 parent 1ba375a commit 4fd9dd0
Show file tree
Hide file tree
Showing 2 changed files with 41 additions and 3 deletions.
7 changes: 4 additions & 3 deletions CanDI/candi/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
from CanDI.candi import data
data = data.Data() #Global object data instantiated on import required for access by GeneQuery Objects
from CanDI.candi.candi import (Gene, CellLine, Organelle, Cancer, CellLineCluster, GeneCluster)
from . import load

from . import data
data = data.Data() #Global object data instantiated on import required for access by GeneQuery Objects
from .candi import (Gene, CellLine, Organelle, Cancer, CellLineCluster, GeneCluster)
37 changes: 37 additions & 0 deletions CanDI/candi/load.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
import numpy as np
import pandas as pd
import polars as pl
from CanDI import candi
from pathlib import Path


def coessentiality(pvalue_threshold = 10**-3, data_dir='auto'):

This comment has been minimized.

Copy link
@abearab

abearab Jun 13, 2024

Author Collaborator

Hi @Yogiski, Would you think we can use your Grabber class to load coessentiality data instead of my code here?

if data_dir == 'auto':
data_dir=str(Path(candi.__path__[0]).parent.absolute()) + '/setup/data/coessentiality'
else:
# check if the path exists and it contains the necessary files
if not Path(data_dir).exists():
raise ValueError(f"Path {data_dir} does not exist")
if not Path(data_dir+'/genes.txt').exists():
raise ValueError(f"Path {data_dir}/genes.txt does not exist")
if not Path(data_dir+'/GLS_sign.npy').exists():
raise ValueError(f"Path {data_dir}/GLS_sign.npy does not exist")
if not Path(data_dir+'/GLS_p.npy').exists():
raise ValueError(f"Path {data_dir}/GLS_p.npy does not exist")

gene_names = pd.read_csv(f'{data_dir}/genes.txt',header=None,names=['gene_name'])['gene_name']

GLS_sign = np.load(f'{data_dir}/GLS_sign.npy')
GLS_p = np.load(f'{data_dir}/GLS_p.npy')

coessentiality_mat = pd.DataFrame((-1*np.log10(GLS_p)) * GLS_sign, columns = gene_names, index = gene_names).reset_index()
coessentiality_mat = pl.from_dataframe(coessentiality_mat)

coessentiality_df = coessentiality_mat.melt('gene_name')
coessentiality_df.columns = ['gene_1','gene_2','coessentiality']
coessentiality_df = coessentiality_df.filter(~(pl.col('gene_1') == pl.col('gene_2')))
coessentiality_df = coessentiality_df.filter(pl.col('coessentiality') > -np.log10(pvalue_threshold))

out = coessentiality_df.to_pandas()

return out

0 comments on commit 4fd9dd0

Please sign in to comment.