Skip to content

Commit

Permalink
Add support for using TES distances (breaking changes from previous v…
Browse files Browse the repository at this point in the history
…ersions).
  • Loading branch information
pjbriggs committed Feb 5, 2024
1 parent 47ae014 commit d49672d
Show file tree
Hide file tree
Showing 4 changed files with 479 additions and 37 deletions.
34 changes: 22 additions & 12 deletions rnachipintegrator/analysis.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@
# distances from an arbitrary zero, and then calculating distances
# between things with reference to that?

def find_nearest_features(peaks,features,distance=None,tss_only=False,
def find_nearest_features(peaks,features,distance=None,use_edge='BOTH',
only_differentially_expressed=False):
"""
Locate nearest features for each peak
Expand All @@ -38,9 +38,10 @@ def find_nearest_features(peaks,features,distance=None,tss_only=False,
features (FeatureList): list of features
peaks (PeakList): list of peaks
distance (int): optional cut-off distance to apply
tss_only (bool): only consider distances from the
feature TSS (default is to consider distances from
both the TSS and TES)
use_edge (str): one of 'BOTH' (the default), 'TSS'
or 'TSS'. 'Both' considers distances from both the
feature TSS and TES and picks the nearest, whereas
'tss' and 'tes' only consider the specified edge.
only_differentially_expressed (bool): only consider
features that are flagged as differentially expressed
pad (bool): add extra 'None' items to output
Expand All @@ -58,17 +59,22 @@ def find_nearest_features(peaks,features,distance=None,tss_only=False,
# Differentially-expressed features only?
if only_differentially_expressed:
feature_list = feature_list.filterByFlag(1)
if tss_only:
if use_edge == "TSS":
sort_features_by_tss_distances(peak,feature_list)
elif use_edge == "TES":
sort_features_by_tes_distances(peak,feature_list)
else:
sort_features_by_edge_distances(peak,feature_list)
# Apply distance cut-off
if distance is not None:
closest = FeatureSet()
for feature in feature_list:
if tss_only:
if use_edge == "TSS":
if distances.distance_tss(peak,feature) > distance:
break
elif use_edge == "TES":
if distances.distance_tes(peak,feature) > distance:
break
else:
if distances.distance_closest_edge(peak,feature) > distance:
break
Expand All @@ -80,7 +86,7 @@ def find_nearest_features(peaks,features,distance=None,tss_only=False,
# Return result
yield (peak,feature_list)

def find_nearest_peaks(features,peaks,distance=None,tss_only=False,
def find_nearest_peaks(features,peaks,distance=None,use_edge='BOTH',
only_differentially_expressed=False):
"""
Locate nearest peaks for each feature
Expand All @@ -89,9 +95,10 @@ def find_nearest_peaks(features,peaks,distance=None,tss_only=False,
features (FeatureList): list of features
peaks (PeakList): list of peaks
distance (int): optional cut-off distance to apply
tss_only (bool): only consider distances from the
feature TSS (default is to consider distances from
both the TSS and TES)
use_edge (str): one of 'BOTH' (the default), 'TSS'
or 'TSS'. 'Both' considers distances from both the
feature TSS and TES and picks the nearest, whereas
'tss' and 'tes' only consider the specified edge.
only_differentially_expressed (bool): only consider
features that are flagged as differentially expressed
Expand All @@ -102,9 +109,12 @@ def find_nearest_peaks(features,peaks,distance=None,tss_only=False,
"""
# Set functions according to edges
if tss_only:
if use_edge == "TSS":
sort_peaks = sort_peaks_by_tss_distances
get_distance = distances.distance_tss
elif use_edge == "TES":
sort_peaks = sort_peaks_by_tes_distances
get_distance = distances.distance_tes
else:
sort_peaks = sort_peaks_by_edge_distances
get_distance = distances.distance_closest_edge
Expand Down Expand Up @@ -145,7 +155,7 @@ def sort_features_by_edge_distances(peak,features):
"""
features.features = sorted(features.features,
key = lambda f:
distances.edge_distances(peak,f))
(distances.edge_distances(peak,f),f.id))

def sort_features_by_tss_distances(peak,features):
"""
Expand Down
40 changes: 22 additions & 18 deletions rnachipintegrator/cli.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
#!/usr/bin/env python
#
# RnaChipIntegrator.py: analyse genomic features (genes) with peak data
# Copyright (C) University of Manchester 2011-19 Peter Briggs, Leo Zeef
# & Ian Donaldson
# Copyright (C) University of Manchester 2011-19,2024 Peter Briggs,
# Leo Zeef & Ian Donaldson
#
# This code is free software; you can redistribute it and/or modify it
# under the terms of the Artistic License 2.0 (see the file LICENSE
Expand Down Expand Up @@ -207,13 +207,13 @@ def add_edge_option(self,group=None):
"""
self.add_option('--edge',
action='store',dest="edge",
choices=('tss','both'),
choices=('tss','tes','both'),
default='tss',
help="Gene edges to consider when calculating "
"distances between genes and peaks, either: "
"'tss' (default: only use gene TSS) or 'both' "
"(use whichever of TSS or TES gives shortest "
"distance)",
"'tss' (default: only use gene TSS), 'tes' "
"(only use gene TES), or 'both' (use whichever "
"of TSS or TES gives shortest distance)",
group=group)

def add_only_de_option(self,group=None):
Expand Down Expand Up @@ -497,7 +497,8 @@ class AnalysisParams(object):
genes: FeatureSet instance
peaks: PeakSet instance
cutoff: cutoff distance
tss_only: whether to use TSS or both TSS and TES
use_edge: whether to use both TSS and TES, TSS only
or TES only
only_differentially_expressed: whether to restrict
genes to those flagged as differenitally
expressed
Expand All @@ -514,7 +515,7 @@ def __init__(self,**kws):
genes=None,
peaks=None,
cutoff=None,
tss_only=False,
use_edge="BOTH",
only_differentially_expressed=False)
for key in kws:
if key not in self._params:
Expand All @@ -531,8 +532,8 @@ def peaks(self):
def cutoff(self):
return self._params['cutoff']
@property
def tss_only(self):
return self._params['tss_only']
def use_edge(self):
return self._params['use_edge']
@property
def only_differentially_expressed(self):
return self._params['only_differentially_expressed']
Expand Down Expand Up @@ -675,7 +676,7 @@ def find_nearest_features(params):
for peak,genes in analysis.find_nearest_features(
params.peaks,
params.genes,
tss_only=params.tss_only,
use_edge=params.use_edge,
distance=params.cutoff,
only_differentially_expressed=\
params.only_differentially_expressed):
Expand All @@ -695,7 +696,7 @@ def find_nearest_peaks(params):
for gene,peaks in analysis.find_nearest_peaks(
params.genes,
params.peaks,
tss_only=params.tss_only,
use_edge=params.use_edge,
distance=params.cutoff,
only_differentially_expressed=\
params.only_differentially_expressed):
Expand Down Expand Up @@ -853,9 +854,11 @@ def main(args=None):

# Gene edge to use
if options.edge == 'tss':
tss_only = True
use_edge = "TSS"
elif options.edge == 'tes':
use_edge = "TES"
else:
tss_only = False
use_edge = "BOTH"

# Promoter region
promoter = (abs(int(options.promoter_region.split(',')[0])),
Expand Down Expand Up @@ -960,8 +963,7 @@ def main(args=None):
print("Cutoffs (bp) : %s" % ','.join([str(d) if d is not None
else "no cutoff"
for d in cutoffs]))
print("Edge : %s" % ('TSS only' if tss_only
else 'TSS or TES'))
print("Edge : %s" % use_edge)
print("DE only : %s" % ('yes' if options.only_diff_expressed
else 'no'))
print("Nprocs : %s" % options.nprocs)
Expand Down Expand Up @@ -1021,7 +1023,7 @@ def main(args=None):
genes=gene_lists[genes],
peaks=peak_lists[peaks],
cutoff=cutoff,
tss_only=tss_only,
use_edge=use_edge,
only_differentially_expressed=
options.only_diff_expressed
))
Expand Down Expand Up @@ -1182,8 +1184,10 @@ def main(args=None):
else "%d" % options.max_closest))
xlsx.append_to_notes("Promoter region (bp from TSS)\t-%d to %d" %
promoter)
if tss_only:
if use_edge == "TSS":
xlsx.append_to_notes("Distances calculated from\tTSS only")
elif use_edge == "TES":
xlsx.append_to_notes("Distances calculated from\tTES only")
else:
xlsx.append_to_notes("Distances calculated from\tTSS or TES")
xlsx.append_to_notes("Only use differentially expressed %ss\t%s" %
Expand Down
Loading

0 comments on commit d49672d

Please sign in to comment.