Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

debug, new modules, and code improvements #46

Merged
merged 51 commits into from
Jun 24, 2024
Merged
Show file tree
Hide file tree
Changes from 15 commits
Commits
Show all changes
51 commits
Select commit Hold shift + click to select a range
1ba375a
update .gitignore file
abearab Jun 6, 2024
4fd9dd0
add load functino for coessentiality
abearab Jun 6, 2024
4fa9aa8
add `diffexp` module
abearab Jun 12, 2024
1324a41
add `cfig_path` argument
abearab Jun 23, 2024
dc54404
make `Manager` as parent class
abearab Jun 23, 2024
f47ff47
add additional packages
abearab Jun 23, 2024
79ed21a
Merge branch 'master' into abe-dev
abearab Jun 23, 2024
538cf57
fix import
abearab Jun 23, 2024
2aa82b6
mend
abearab Jun 23, 2024
00e2543
minor fixes
abearab Jun 23, 2024
026bb75
minor fixes
abearab Jun 23, 2024
f0904af
mend
abearab Jun 23, 2024
fe07d58
mend
abearab Jun 23, 2024
119f541
mend
abearab Jun 23, 2024
654f98f
bump version 0.1.2
abearab Jun 23, 2024
1a9a0d1
relative import
abearab Jun 24, 2024
65effc7
switch to python >3.11
abearab Jun 24, 2024
dcb70b0
mend
abearab Jun 24, 2024
6675812
mend
abearab Jun 24, 2024
1a6703a
switch to python >3.11
abearab Jun 24, 2024
2c8ca70
draft `Manager` class test
abearab Jun 24, 2024
87b92bc
update `.gitignore`
abearab Jun 24, 2024
b0a3d18
relative import
abearab Jun 24, 2024
686cd03
add `data_paths`
abearab Jun 24, 2024
2b511fa
switch to python >3.11
abearab Jun 24, 2024
8254f37
update README
abearab Jun 24, 2024
6dc9482
mend
abearab Jun 24, 2024
cee3f98
mend
abearab Jun 24, 2024
de52809
mend
abearab Jun 24, 2024
52f39b5
mend
abearab Jun 24, 2024
1b00131
mend
abearab Jun 24, 2024
422b2b3
mend
abearab Jun 24, 2024
3f045bc
mend
abearab Jun 24, 2024
a6cdb68
mend
abearab Jun 24, 2024
410d298
mend
abearab Jun 24, 2024
33cf352
add citation
abearab Jun 24, 2024
11ee76f
mend
abearab Jun 24, 2024
7403413
add badge
abearab Jun 24, 2024
c97223c
add downloads
abearab Jun 24, 2024
198c163
mend
abearab Jun 24, 2024
33083a5
update .gitignore file
abearab Jun 24, 2024
940eae3
set `include_package_data` as true
abearab Jun 24, 2024
0c81203
add uninstall scripts
abearab Jun 24, 2024
1d0e9b1
mend
abearab Jun 24, 2024
3d1ac9e
mend
abearab Jun 24, 2024
d74c0fa
minor debug
abearab Jun 24, 2024
04db890
debug
abearab Jun 24, 2024
bd64917
debug
abearab Jun 24, 2024
e6003d2
major changes in CanDI setup scripts and harmonize coessentiality ins…
abearab Jun 24, 2024
f2f2aba
draft coessentiality module
abearab Jun 24, 2024
fbdfee2
bump version 0.2.0
abearab Jun 24, 2024
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Binary file modified .gitignore
Binary file not shown.
2 changes: 1 addition & 1 deletion CanDI/__version__.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
version = "0.1.1"
version = "0.1.2"
4 changes: 3 additions & 1 deletion CanDI/candi/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
from . import load
from . import data

data = data.Data() #Global object data instantiated on import required for access by GeneQuery Objects
from . import (Gene, CellLine, Organelle, Cancer, CellLineCluster, GeneCluster)

from .candi import (Gene, CellLine, Organelle, Cancer, CellLineCluster, GeneCluster)
2 changes: 1 addition & 1 deletion CanDI/candi/candi.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
import pandas as pd
import numpy as np
from . import data, grabber
from . import entity
from ..structures import entity

class SubsetHandler(object):

Expand Down
13 changes: 9 additions & 4 deletions CanDI/candi/data.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,10 +14,15 @@ class Data(object):
can be tuned to load specific datasets upon import by editing config.ini
can call Data.load() to load any specific dataset
"""
def __init__(self):

self._file_path = Path(os.path.dirname(os.path.realpath(__file__))).parent.absolute() / 'setup'
config_path = self._file_path / 'data/config.ini'
def __init__(self, config_path='auto', verbose=False):

if config_path == 'auto':
self._file_path = Path(os.path.dirname(os.path.realpath(__file__))).parent.absolute() / 'setup'
config_path = self._file_path / 'data/config.ini'
elif os.path.exists(config_path) == False:
raise FileNotFoundError("Config file not found at {}".format(config_path))
elif os.path.exists(config_path) == True:
if verbose: print("Using config file at {}".format(config_path))

parser = configparser.ConfigParser() #parses config for data sources
parser.read(config_path)
Expand Down
37 changes: 37 additions & 0 deletions CanDI/candi/load.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
import numpy as np
import pandas as pd
import polars as pl
from CanDI import candi
from pathlib import Path


def coessentiality(pvalue_threshold = 10**-3, data_dir='auto'):
if data_dir == 'auto':
data_dir=str(Path(candi.__path__[0]).parent.absolute()) + '/setup/data/coessentiality'
else:
# check if the path exists and it contains the necessary files
if not Path(data_dir).exists():
raise ValueError(f"Path {data_dir} does not exist")
if not Path(data_dir+'/genes.txt').exists():
raise ValueError(f"Path {data_dir}/genes.txt does not exist")
if not Path(data_dir+'/GLS_sign.npy').exists():
raise ValueError(f"Path {data_dir}/GLS_sign.npy does not exist")
if not Path(data_dir+'/GLS_p.npy').exists():
raise ValueError(f"Path {data_dir}/GLS_p.npy does not exist")

gene_names = pd.read_csv(f'{data_dir}/genes.txt',header=None,names=['gene_name'])['gene_name']

GLS_sign = np.load(f'{data_dir}/GLS_sign.npy')
GLS_p = np.load(f'{data_dir}/GLS_p.npy')

coessentiality_mat = pd.DataFrame((-1*np.log10(GLS_p)) * GLS_sign, columns = gene_names, index = gene_names).reset_index()
coessentiality_mat = pl.from_dataframe(coessentiality_mat)

coessentiality_df = coessentiality_mat.melt('gene_name')
coessentiality_df.columns = ['gene_1','gene_2','coessentiality']
coessentiality_df = coessentiality_df.filter(~(pl.col('gene_1') == pl.col('gene_2')))
coessentiality_df = coessentiality_df.filter(pl.col('coessentiality') > -np.log10(pvalue_threshold))

out = coessentiality_df.to_pandas()

return out
Empty file added CanDI/pipelines/__init__.py
Empty file.
52 changes: 52 additions & 0 deletions CanDI/pipelines/diffexp.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
import numpy as np
import pandas as pd
import anndata as ad

from pydeseq2.dds import DeseqDataSet
from pydeseq2.default_inference import DefaultInference
from pydeseq2.ds import DeseqStats
from adpbulk import ADPBulk


def pseudobulk_by_group(adt, groups, method="mean"):
# initialize the object
adpb = ADPBulk(adt, groupby=groups, method=method)

# perform the pseudobulking
pseudobulk_matrix = adpb.fit_transform()

# retrieve the sample metadata (useful for easy incorporation with edgeR)
sample_meta = adpb.get_meta()

out = ad.AnnData(
X=pseudobulk_matrix,
obs=sample_meta.set_index('SampleName')
)

return out


def run_deseq(adata, design, tested_level, ref_level, n_cpus=8):

inference = DefaultInference(n_cpus=n_cpus)

dds = DeseqDataSet(
counts=adata.to_df().astype(int),
metadata=adata.obs,
design_factors=design, # compare samples based on the "condition"
refit_cooks=True,
inference=inference,
)

dds.deseq2()

stat_res = DeseqStats(
dds,
contrast=[design, tested_level, ref_level],
inference=inference
)
stat_res.summary()

df = stat_res.results_df

return df
9 changes: 5 additions & 4 deletions CanDI/setup/install.py
Original file line number Diff line number Diff line change
@@ -1,21 +1,22 @@
import argparse
from .manager import Manager
from manager import DataverseDepMap, BroadDepMap


def main():
parser = argparse.ArgumentParser()
parser.add_argument("--source", help="Specify the download source", default="dataverse")
parser.add_argument("--data_dir", help="Specify the data directory", default=None)
parser.add_argument("--data_dir", help="Specify the data directory", default='auto')
args = parser.parse_args()

if args.source == 'dataverse':
print("Downloading data from Dataverse")
m = Manager(download_source=args.source, data_dir=args.data_dir)
m = DataverseDepMap(manager_path=args.data_dir, verbose=True)
m.download_reformatted_data()
m.write_config(m.cfig_path, m.parser)

elif args.source == 'depmap':
print("Downloading data from DepMap")
m = Manager(download_source=args.source, data_dir=args.data_dir)
m = BroadDepMap(manager_path=args.data_dir, verbose=True)
m.get_depmap_info()
m.write_config(m.cfig_path, m.parser)
m.download_defaults()
Expand Down
Loading
Loading