Skip to content

Commit

Permalink
#97 taxonomy config issues fixed
Browse files Browse the repository at this point in the history
  • Loading branch information
hkir-dev committed Sep 10, 2021
1 parent 04e290b commit e486ecc
Show file tree
Hide file tree
Showing 8 changed files with 327 additions and 329 deletions.
7 changes: 7 additions & 0 deletions src/config/config_schema.json
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,13 @@
"description": "Brain region abbreviation."
}
},
"Reference_gene_list": {
"type": "array",
"items": {
"type": "string",
"description": "Reference gene list."
}
},
"Root_nodes": {
"type": "array",
"items": {
Expand Down
22 changes: 6 additions & 16 deletions src/dendrograms/taxonomy_details.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,8 @@
- UBERON:0001384
Brain_region_abbv:
- MOp
Ensemble_data: Ensmusg
Reference_gene_list:
- Ensmusg
Root_nodes:
- Node: CS202002013_123
Cell_type: CL:0011005
Expand Down Expand Up @@ -144,7 +145,8 @@
- NCBITaxon:9606
Brain_region:
- UBERON:0001384
Ensemble_data: Ensg
Reference_gene_list:
- Ensg
Root_nodes:
- Node: CS201912131_149
Cell_type: CL:0011005
Expand Down Expand Up @@ -173,34 +175,27 @@
non_taxonomy_roots:
- Node: CS201912131_128
Cell_type: CL:4023011
Location_relation: has_soma_location
#Lamp5
- Node: CS201912131_129
Cell_type: CL:4023015
Location_relation: has_soma_location
#Sncg
- Node: CS201912131_130
Cell_type: CL:4023016
Location_relation: has_soma_location
#Vip
- Node: CS201912131_131
Cell_type: CL:4023017
Location_relation: has_soma_location
#Sst
- Node: CS201912131_132
Cell_type: CL:4023018
Location_relation: part_of
#Pvalb
- Node: CS201912131_147
Cell_type: CL:0002453
Location_relation: part_of
#OPC
- Node: CS201912131_151
Cell_type: CL:0000125
# non-neuronal currently mapped as glial cell
- Node: CS201912131_140
Cell_type: CL:0000128
Location_relation: part_of
#Oligodendrocyte
-
Taxonomy_id: CCN201912132
Expand All @@ -212,7 +207,8 @@
- NCBITaxon:9483
Brain_region:
- UBERON:0001384
Ensemble_data: Enscjag
Reference_gene_list:
- Enscjag
Root_nodes:
- Node: CS201912132_116
Cell_type: CL:0011005
Expand Down Expand Up @@ -249,27 +245,21 @@
non_taxonomy_roots:
- Node: CS201912132_105
Cell_type: CL:0000128
Location_relation: part_of
#Oligodendrocyte
- Node: CS201912132_95
Cell_type: CL:4023011
Location_relation: has_soma_location
#Lamp5
- Node: CS201912132_96
Cell_type: CL:4023015
Location_relation: has_soma_location
#Sncg
- Node: CS201912132_97
Cell_type: CL:4023016
Location_relation: has_soma_location
#Vip
- Node: CS201912132_98
Cell_type: CL:4023017
Location_relation: has_soma_location
#Sst
- Node: CS201912132_99
Cell_type: CL:4023018
Location_relation: part_of
#Pvalb
- Node: CS201912131_151
Cell_type: CL:0000125
Expand Down
392 changes: 196 additions & 196 deletions src/markers/CS202002013_markers_denormalized.tsv

Large diffs are not rendered by default.

198 changes: 99 additions & 99 deletions src/patterns/data/default/CCN202002013_class_base.tsv

Large diffs are not rendered by default.

10 changes: 5 additions & 5 deletions src/scripts/dosdp_pattern_generation.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
import os

from dendrogram_tools import dend_json_2_nodes_n_edges
from template_generation_utils import read_taxonomy_config, get_subtrees, read_dendrogram_tree, read_ensemble_data, \
from template_generation_utils import read_taxonomy_config, get_subtrees, read_dendrogram_tree, read_gene_data, \
read_markers, get_gross_cell_type

MARKER_PATH = '../markers/CS{}_markers.tsv'
Expand All @@ -23,13 +23,13 @@ def generate_pattern_table_denormalised_markers(dend_json_path, output_filepath)
dend_tree = read_dendrogram_tree(dend_json_path)

marker_path = MARKER_PATH.format(str(taxon).replace("CCN", ""))
ensemble_path = ENSEMBLE_PATH.format(str(taxonomy_config["Ensemble_data"]).strip().lower())
gene_db_path = ENSEMBLE_PATH.format(str(taxonomy_config["Reference_gene_list"][0]).strip().lower())

if taxonomy_config:
subtrees = get_subtrees(dend_tree, taxonomy_config)
ensmusg_names = read_ensemble_data(ensemble_path)
denorm_markers = get_denorm_markers(taxon, ensmusg_names)
minimal_markers = read_markers(marker_path, ensmusg_names)
gene_names = read_gene_data(gene_db_path)
denorm_markers = get_denorm_markers(taxon, gene_names)
minimal_markers = read_markers(marker_path, gene_names)

dl = []
for o in dend['nodes']:
Expand Down
2 changes: 1 addition & 1 deletion src/scripts/marker_tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -175,7 +175,7 @@ def generate_marker_table(marker_data, output_filepath):
d = dict()
d['Taxonomy_node_ID'] = o
d['clusterName'] = marker_data[o][CLUSTER]
d['Markers'] = EXPRESSION_SEPARATOR.join(marker_data[o][EXPRESSIONS])
d['Markers'] = EXPRESSION_SEPARATOR.join(sorted(marker_data[o][EXPRESSIONS]))
for k in robot_marker_template_seed.keys():
if not (k in d.keys()):
d[k] = ''
Expand Down
10 changes: 5 additions & 5 deletions src/scripts/template_generation_tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
from dendrogram_tools import dend_json_2_nodes_n_edges
from template_generation_utils import get_synonyms_from_taxonomy, get_synonym_pairs, read_taxonomy_config, \
get_subtrees, read_dendrogram_tree, get_dend_subtrees, index_dendrogram,\
read_csv, read_ensemble_data, read_markers, get_gross_cell_type, merge_tables
read_csv, read_gene_data, read_markers, get_gross_cell_type, merge_tables


log = logging.getLogger(__name__)
Expand Down Expand Up @@ -85,13 +85,13 @@ def generate_base_class_template(dend_json_path, output_filepath):

marker_path = MARKER_PATH.format(str(taxon).replace("CCN", ""))
allen_marker_path = ALLEN_MARKER_PATH.format(str(taxon).replace("CCN", ""))
ensemble_path = ENSEMBLE_PATH.format(str(taxonomy_config["Ensemble_data"]).strip().lower())
gene_db_path = ENSEMBLE_PATH.format(str(taxonomy_config["Reference_gene_list"][0]).strip().lower())

if taxonomy_config:
subtrees = get_subtrees(dend_tree, taxonomy_config)
ensmusg_names = read_ensemble_data(ensemble_path)
minimal_markers = read_markers(marker_path, ensmusg_names)
allen_markers = read_markers(allen_marker_path, ensmusg_names)
gene_names = read_gene_data(gene_db_path)
minimal_markers = read_markers(marker_path, gene_names)
allen_markers = read_markers(allen_marker_path, gene_names)

robot_class_curation_seed = ['defined_class',
'prefLabel',
Expand Down
15 changes: 8 additions & 7 deletions src/scripts/template_generation_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,8 +27,9 @@ def get_synonyms_from_taxonomy(node):
"""
synonym_properties = ['cell_set_preferred_alias', 'original_label', 'cell_set_label', 'cell_set_aligned_alias',
'cell_set_additional_aliases']
synonyms = {node[prop] for prop in synonym_properties if prop in node.keys() and node[prop]}

return OR_SEPARATOR.join({node[prop] for prop in synonym_properties if prop in node.keys() and node[prop]})
return OR_SEPARATOR.join(sorted(synonyms))


def get_synonym_pairs(node):
Expand Down Expand Up @@ -270,17 +271,17 @@ def index_dendrogram(dend):
return dend_dict


def read_ensemble_data(ensemble_path):
ensemble = {}
with open(ensemble_path) as fd:
def read_gene_data(gene_db_path):
genes = {}
with open(gene_db_path) as fd:
rd = csv.reader(fd, delimiter="\t", quotechar='"')
# skip first 2 rows
# skip first 2 header rows
next(rd)
next(rd)
for row in rd:
_id = row[0]
ensemble[_id] = row[2]
return ensemble
genes[_id] = row[2]
return genes


def read_markers(marker_path, ensmusg_names):
Expand Down

0 comments on commit e486ecc

Please sign in to comment.