From 4a3e42c0906d4a841ea9cddf7edb10fc994f7735 Mon Sep 17 00:00:00 2001 From: Peter Briggs Date: Mon, 5 Feb 2024 17:39:34 +0000 Subject: [PATCH 1/4] distances: new function 'tes_distances' (dist btwn peak edges and feature TES). --- rnachipintegrator/distances.py | 38 ++++++++++++++++++++++++++++++++-- test/test_distances.py | 32 +++++++++++++++++++++++++++- 2 files changed, 67 insertions(+), 3 deletions(-) diff --git a/rnachipintegrator/distances.py b/rnachipintegrator/distances.py index 360c968..54f0b32 100644 --- a/rnachipintegrator/distances.py +++ b/rnachipintegrator/distances.py @@ -1,8 +1,8 @@ #!/bin/env python # # distances.py: functions for determining distances and overlaps -# Copyright (C) University of Manchester 2011-2019 Peter Briggs, Leo Zeef -# & Ian Donaldson +# Copyright (C) University of Manchester 2011-2019,2024 Peter Briggs, +# Leo Zeef & Ian Donaldson # """ distances.py @@ -218,6 +218,40 @@ def tss_distances(peak,feature): d_tss.sort() return tuple(d_tss) +def tes_distances(peak,feature): + """ + Return distances between peak edges and feature TES + + Arguments: + peak (Peak): peak instance + feature (Feature): feature instance + + Returns: + tuple: a pair of distances, the first being the + smallest distance between an edge of the peak + region to the TES of the feature region, and + the second being the distance from the other peak + edge to the TES. + If the TES position is entirely contained within + the peak then the smallest distance is returned as + zero, the second is the average of the distances + to the two peak edges from the TES. + + """ + # TES is considered as a point + if (feature.tes >= peak.start and + feature.tes <= peak.end): + # TES entirely contained in the peak + # Rank using the average distance of the peak + # edges from the TES + return (0,(abs(feature.tes - peak.start) + + abs(feature.tes - peak.end))/2) + # Possible distances to TES + d_tes = [abs(feature.tes - peak.start), + abs(feature.tes - peak.end)] + d_tes.sort() + return tuple(d_tes) + def distance_closest_edge(peak,feature): """ Get distance from a peak to a feature diff --git a/test/test_distances.py b/test/test_distances.py index fd7a54d..abeb261 100644 --- a/test/test_distances.py +++ b/test/test_distances.py @@ -1,6 +1,6 @@ # # test_analysis.py: unit tests for analysis module -# Copyright (C) University of Manchester 2011-5 Peter Briggs +# Copyright (C) University of Manchester 2011-5,2024 Peter Briggs from common import * from rnachipintegrator.Peaks import Peak @@ -9,6 +9,7 @@ from rnachipintegrator.distances import closestDistanceToRegion from rnachipintegrator.distances import edge_distances from rnachipintegrator.distances import tss_distances +from rnachipintegrator.distances import tes_distances from rnachipintegrator.distances import direction import unittest @@ -160,6 +161,35 @@ def test_tss_distances_peak_contains_TSS(self): Feature('NM4','chr1','200','300','-')), (0,50)) +######################################################################## +# +# TestTESDistancesFunction +# +######################################################################### + +class TestTESDistancesFunction(unittest.TestCase): + def test_tes_distances_peak_before_TES(self): + self.assertEqual(tes_distances(Peak('chr1','100','200'), + Feature('NM1','chr1','250','400','+')), + (200,300)) + self.assertEqual(tes_distances(Peak('chr1','100','200'), + Feature('NM1','chr1','250','400','-')), + (50,150)) + def test_tes_distances_TES_before_peak(self): + self.assertEqual(tes_distances(Peak('chr1','250','400'), + Feature('NM2','chr1','100','200','+')), + (50,200)) + self.assertEqual(tes_distances(Peak('chr1','250','400'), + Feature('NM2','chr1','100','200','-')), + (150,300)) + def test_tes_distances_peak_contains_TES(self): + self.assertEqual(tes_distances(Peak('chr1','100','250'), + Feature('NM3','chr1','50','200','+')), + (0,75)) + self.assertEqual(tes_distances(Peak('chr1','250','350'), + Feature('NM4','chr1','300','400','-')), + (0,50)) + ######################################################################## # # TestDirectionFunction From db3b64a434dd617839ba5795e2d04c25134b5028 Mon Sep 17 00:00:00 2001 From: Peter Briggs Date: Mon, 5 Feb 2024 17:41:46 +0000 Subject: [PATCH 2/4] analysis: add functions for sorting peaks and features by TES distances. --- rnachipintegrator/analysis.py | 38 ++++++++++++++++++++++++++++++++--- 1 file changed, 35 insertions(+), 3 deletions(-) diff --git a/rnachipintegrator/analysis.py b/rnachipintegrator/analysis.py index fc61c57..7e8d343 100644 --- a/rnachipintegrator/analysis.py +++ b/rnachipintegrator/analysis.py @@ -1,8 +1,8 @@ #!/bin/env python # # analysis.py: analyses of peaks vs features and vice versa -# Copyright (C) University of Manchester 2011-2019 Peter Briggs, Leo Zeef -# & Ian Donaldson +# Copyright (C) University of Manchester 2011-2019,2024 Peter Briggs, +# Leo Zeef & Ian Donaldson # """ analysis.py @@ -161,7 +161,23 @@ def sort_features_by_tss_distances(peak,features): """ features.features = sorted(features.features, key = lambda f: - distances.tss_distances(peak,f)) + (distances.tss_distances(peak,f),f.id)) + +def sort_features_by_tes_distances(peak,features): + """ + Sort features by TES-to-edge distances to a peak + + Arguments: + peak (Peak): peak instance + features (FeatureSet): set of features that will + be sorted into order according to the smallest + distance of their TES positions to the edges of + the peak. The sorting is done in place. + + """ + features.features = sorted(features.features, + key = lambda f: + (distances.tes_distances(peak,f),f.id)) def sort_peaks_by_edge_distances(feature,peaks): """ @@ -194,3 +210,19 @@ def sort_peaks_by_tss_distances(feature,peaks): peaks.peaks = sorted(peaks.peaks, key = lambda p: distances.tss_distances(p,feature)) + +def sort_peaks_by_tes_distances(feature,peaks): + """ + Sort peaks by edge-to-TES distances to a feature + + Arguments: + feature (Feature): feature instance + peaks (PeakSet): set of peaks that will be + sorted into order according to the smallest + distance of their edges from the TES position + of the feature. The sorting is done in place. + + """ + peaks.peaks = sorted(peaks.peaks, + key = lambda p: + distances.tes_distances(p,feature)) From 98e8ddf9d43eba7e97ef9da2cd2485b30a1550ca Mon Sep 17 00:00:00 2001 From: Peter Briggs Date: Mon, 5 Feb 2024 17:45:31 +0000 Subject: [PATCH 3/4] Add support for using TES distances (breaking changes from previous versions). --- rnachipintegrator/analysis.py | 34 ++- rnachipintegrator/cli.py | 40 ++-- test/test_analysis.py | 436 +++++++++++++++++++++++++++++++++- test/test_cli.py | 6 +- 4 files changed, 479 insertions(+), 37 deletions(-) diff --git a/rnachipintegrator/analysis.py b/rnachipintegrator/analysis.py index 7e8d343..73e221c 100644 --- a/rnachipintegrator/analysis.py +++ b/rnachipintegrator/analysis.py @@ -29,7 +29,7 @@ # distances from an arbitrary zero, and then calculating distances # between things with reference to that? -def find_nearest_features(peaks,features,distance=None,tss_only=False, +def find_nearest_features(peaks,features,distance=None,use_edge='BOTH', only_differentially_expressed=False): """ Locate nearest features for each peak @@ -38,9 +38,10 @@ def find_nearest_features(peaks,features,distance=None,tss_only=False, features (FeatureList): list of features peaks (PeakList): list of peaks distance (int): optional cut-off distance to apply - tss_only (bool): only consider distances from the - feature TSS (default is to consider distances from - both the TSS and TES) + use_edge (str): one of 'BOTH' (the default), 'TSS' + or 'TSS'. 'Both' considers distances from both the + feature TSS and TES and picks the nearest, whereas + 'tss' and 'tes' only consider the specified edge. only_differentially_expressed (bool): only consider features that are flagged as differentially expressed pad (bool): add extra 'None' items to output @@ -58,17 +59,22 @@ def find_nearest_features(peaks,features,distance=None,tss_only=False, # Differentially-expressed features only? if only_differentially_expressed: feature_list = feature_list.filterByFlag(1) - if tss_only: + if use_edge == "TSS": sort_features_by_tss_distances(peak,feature_list) + elif use_edge == "TES": + sort_features_by_tes_distances(peak,feature_list) else: sort_features_by_edge_distances(peak,feature_list) # Apply distance cut-off if distance is not None: closest = FeatureSet() for feature in feature_list: - if tss_only: + if use_edge == "TSS": if distances.distance_tss(peak,feature) > distance: break + elif use_edge == "TES": + if distances.distance_tes(peak,feature) > distance: + break else: if distances.distance_closest_edge(peak,feature) > distance: break @@ -80,7 +86,7 @@ def find_nearest_features(peaks,features,distance=None,tss_only=False, # Return result yield (peak,feature_list) -def find_nearest_peaks(features,peaks,distance=None,tss_only=False, +def find_nearest_peaks(features,peaks,distance=None,use_edge='BOTH', only_differentially_expressed=False): """ Locate nearest peaks for each feature @@ -89,9 +95,10 @@ def find_nearest_peaks(features,peaks,distance=None,tss_only=False, features (FeatureList): list of features peaks (PeakList): list of peaks distance (int): optional cut-off distance to apply - tss_only (bool): only consider distances from the - feature TSS (default is to consider distances from - both the TSS and TES) + use_edge (str): one of 'BOTH' (the default), 'TSS' + or 'TSS'. 'Both' considers distances from both the + feature TSS and TES and picks the nearest, whereas + 'tss' and 'tes' only consider the specified edge. only_differentially_expressed (bool): only consider features that are flagged as differentially expressed @@ -102,9 +109,12 @@ def find_nearest_peaks(features,peaks,distance=None,tss_only=False, """ # Set functions according to edges - if tss_only: + if use_edge == "TSS": sort_peaks = sort_peaks_by_tss_distances get_distance = distances.distance_tss + elif use_edge == "TES": + sort_peaks = sort_peaks_by_tes_distances + get_distance = distances.distance_tes else: sort_peaks = sort_peaks_by_edge_distances get_distance = distances.distance_closest_edge @@ -145,7 +155,7 @@ def sort_features_by_edge_distances(peak,features): """ features.features = sorted(features.features, key = lambda f: - distances.edge_distances(peak,f)) + (distances.edge_distances(peak,f),f.id)) def sort_features_by_tss_distances(peak,features): """ diff --git a/rnachipintegrator/cli.py b/rnachipintegrator/cli.py index 8bb2e17..043f746 100644 --- a/rnachipintegrator/cli.py +++ b/rnachipintegrator/cli.py @@ -1,8 +1,8 @@ #!/usr/bin/env python # # RnaChipIntegrator.py: analyse genomic features (genes) with peak data -# Copyright (C) University of Manchester 2011-19 Peter Briggs, Leo Zeef -# & Ian Donaldson +# Copyright (C) University of Manchester 2011-19,2024 Peter Briggs, +# Leo Zeef & Ian Donaldson # # This code is free software; you can redistribute it and/or modify it # under the terms of the Artistic License 2.0 (see the file LICENSE @@ -207,13 +207,13 @@ def add_edge_option(self,group=None): """ self.add_option('--edge', action='store',dest="edge", - choices=('tss','both'), + choices=('tss','tes','both'), default='tss', help="Gene edges to consider when calculating " "distances between genes and peaks, either: " - "'tss' (default: only use gene TSS) or 'both' " - "(use whichever of TSS or TES gives shortest " - "distance)", + "'tss' (default: only use gene TSS), 'tes' " + "(only use gene TES), or 'both' (use whichever " + "of TSS or TES gives shortest distance)", group=group) def add_only_de_option(self,group=None): @@ -497,7 +497,8 @@ class AnalysisParams(object): genes: FeatureSet instance peaks: PeakSet instance cutoff: cutoff distance - tss_only: whether to use TSS or both TSS and TES + use_edge: whether to use both TSS and TES, TSS only + or TES only only_differentially_expressed: whether to restrict genes to those flagged as differenitally expressed @@ -514,7 +515,7 @@ def __init__(self,**kws): genes=None, peaks=None, cutoff=None, - tss_only=False, + use_edge="BOTH", only_differentially_expressed=False) for key in kws: if key not in self._params: @@ -531,8 +532,8 @@ def peaks(self): def cutoff(self): return self._params['cutoff'] @property - def tss_only(self): - return self._params['tss_only'] + def use_edge(self): + return self._params['use_edge'] @property def only_differentially_expressed(self): return self._params['only_differentially_expressed'] @@ -675,7 +676,7 @@ def find_nearest_features(params): for peak,genes in analysis.find_nearest_features( params.peaks, params.genes, - tss_only=params.tss_only, + use_edge=params.use_edge, distance=params.cutoff, only_differentially_expressed=\ params.only_differentially_expressed): @@ -695,7 +696,7 @@ def find_nearest_peaks(params): for gene,peaks in analysis.find_nearest_peaks( params.genes, params.peaks, - tss_only=params.tss_only, + use_edge=params.use_edge, distance=params.cutoff, only_differentially_expressed=\ params.only_differentially_expressed): @@ -853,9 +854,11 @@ def main(args=None): # Gene edge to use if options.edge == 'tss': - tss_only = True + use_edge = "TSS" + elif options.edge == 'tes': + use_edge = "TES" else: - tss_only = False + use_edge = "BOTH" # Promoter region promoter = (abs(int(options.promoter_region.split(',')[0])), @@ -960,8 +963,7 @@ def main(args=None): print("Cutoffs (bp) : %s" % ','.join([str(d) if d is not None else "no cutoff" for d in cutoffs])) - print("Edge : %s" % ('TSS only' if tss_only - else 'TSS or TES')) + print("Edge : %s" % use_edge) print("DE only : %s" % ('yes' if options.only_diff_expressed else 'no')) print("Nprocs : %s" % options.nprocs) @@ -1021,7 +1023,7 @@ def main(args=None): genes=gene_lists[genes], peaks=peak_lists[peaks], cutoff=cutoff, - tss_only=tss_only, + use_edge=use_edge, only_differentially_expressed= options.only_diff_expressed )) @@ -1182,8 +1184,10 @@ def main(args=None): else "%d" % options.max_closest)) xlsx.append_to_notes("Promoter region (bp from TSS)\t-%d to %d" % promoter) - if tss_only: + if use_edge == "TSS": xlsx.append_to_notes("Distances calculated from\tTSS only") + elif use_edge == "TES": + xlsx.append_to_notes("Distances calculated from\tTES only") else: xlsx.append_to_notes("Distances calculated from\tTSS or TES") xlsx.append_to_notes("Only use differentially expressed %ss\t%s" % diff --git a/test/test_analysis.py b/test/test_analysis.py index 1a64a9a..55879e7 100644 --- a/test/test_analysis.py +++ b/test/test_analysis.py @@ -162,7 +162,7 @@ def test_find_nearest_features_summits_tss_only(self): # Run the analysis results = list(find_nearest_features(self.summits, self.features, - tss_only=True)) + use_edge="TSS")) # Correct number of results self.assertEqual(len(results),5) # TSS distances for 1st peak @@ -252,6 +252,100 @@ def test_find_nearest_features_summits_tss_only(self): self.assertEqual(features[1].id,'CG14448-RA') self.assertEqual(features[2].id,'CG9130-RB') + def test_find_nearest_features_summits_tes_only(self): + # Run the analysis + results = list(find_nearest_features(self.summits, + self.features, + use_edge="TES")) + # Correct number of results + self.assertEqual(len(results),5) + # TES distances for 1st summit + # CG31973 41409 + # CG2674-RC 47621 + # CG2674-RE 47621 + # CG2674-RA 47621 + # CG3625-RA 216573 + # CG3625-RB 216573 + # CG3625-RC 216573 + # CG2762-RA 473730 + # CG17941-RA 573209 + peak,features = results[0] + self.assertEqual(peak,Peak('chr2L','66811','66812')) + self.assertEqual(len(features),9) + self.assertEqual(features[0].id,'CG31973') + self.assertEqual(features[1].id,'CG2674-RA') + self.assertEqual(features[2].id,'CG2674-RC') + self.assertEqual(features[3].id,'CG2674-RE') + self.assertEqual(features[4].id,'CG3625-RA') + self.assertEqual(features[5].id,'CG3625-RB') + self.assertEqual(features[6].id,'CG3625-RC') + self.assertEqual(features[7].id,'CG2762-RA') + self.assertEqual(features[8].id,'CG17941-RA') + # TSS distances for 2nd summit + # CG3625-RA 34207 + # CG3625-RB 34207 + # CG3625-RC 34207 + # CG2674-RC 134744 + # CG2674-RE 134744 + # CG2674-RA 134744 + # CG31973 223775 + # CG2762-RA 291364 + # CG17941-RA 390843 + peak,features = results[1] + self.assertEqual(peak,Peak('chr2L','249177','249178')) + self.assertEqual(len(features),9) + self.assertEqual(features[0].id,'CG3625-RA') + self.assertEqual(features[1].id,'CG3625-RB') + self.assertEqual(features[2].id,'CG3625-RC') + self.assertEqual(features[3].id,'CG2674-RA') + self.assertEqual(features[4].id,'CG2674-RC') + self.assertEqual(features[5].id,'CG2674-RE') + self.assertEqual(features[6].id,'CG31973') + self.assertEqual(features[7].id,'CG2762-RA') + self.assertEqual(features[8].id,'CG17941-RA') + # TSS distances for 3rd summit + # CG17941-RA 34070 + # CG2762-RA 65408 + # CG3625-RA 322565 + # CG3625-RB 322565 + # CG3625-RC 322565 + # CG2674-RC 491517 + # CG2674-RE 491517 + # CG2674-RA 491517 + # CG31973 580548 + peak,features = results[2] + self.assertEqual(peak,Peak('chr2L','605950','605951')) + self.assertEqual(len(features),9) + self.assertEqual(features[0].id,'CG17941-RA') + self.assertEqual(features[1].id,'CG2762-RA') + self.assertEqual(features[2].id,'CG3625-RA') + self.assertEqual(features[3].id,'CG3625-RB') + self.assertEqual(features[4].id,'CG3625-RC') + self.assertEqual(features[5].id,'CG2674-RA') + self.assertEqual(features[6].id,'CG2674-RC') + self.assertEqual(features[7].id,'CG2674-RE') + self.assertEqual(features[8].id,'CG31973') + # TSS distances for 4th summit + # CG9130-RB 1002200 + # CG13051-RA 13999724 + # CG14448-RA 20524164 + peak,features = results[3] + self.assertEqual(peak,Peak('chr3L','2258189','2258190')) + self.assertEqual(len(features),3) + self.assertEqual(features[0].id,'CG9130-RB') + self.assertEqual(features[1].id,'CG13051-RA') + self.assertEqual(features[2].id,'CG14448-RA') + # TSS distances for 5th peak + # CG13051-RA 3240126 + # CG14448-RA 3284313 + # CG9130-RB 18242051 + peak,features = results[4] + self.assertEqual(peak,Peak('chr3L','19498040','19498041')) + self.assertEqual(len(features),3) + self.assertEqual(features[0].id,'CG13051-RA') + self.assertEqual(features[1].id,'CG14448-RA') + self.assertEqual(features[2].id,'CG9130-RB') + def test_find_nearest_features_summits_differentially_expressed(self): # Run the analysis results = list(find_nearest_features(self.summits, @@ -508,7 +602,7 @@ def test_find_nearest_features_regions_tss_only(self): # Run the analysis results = list(find_nearest_features(self.peaks, self.features, - tss_only=True)) + use_edge="TSS")) # Correct number of results self.assertEqual(len(results),5) # Closest distances for 1st peak @@ -598,6 +692,100 @@ def test_find_nearest_features_regions_tss_only(self): self.assertEqual(features[1].id,'CG14448-RA') self.assertEqual(features[2].id,'CG9130-RB') + def test_find_nearest_features_regions_tes_only(self): + # Run the analysis + results = list(find_nearest_features(self.peaks, + self.features, + use_edge="TES")) + # Correct number of results + self.assertEqual(len(results),5) + # Closest distances for 1st peak + # CG31973 41309 + # CG2674-RC 47522 + # CG2674-RE 47522 + # CG2674-RA 47522 + # CG3625-RA 216474 + # CG3625-RB 216474 + # CG3625-RC 216474 + # CG2762-RA 473631 + # CG17941-RA 573110 + peak,features = results[0] + self.assertEqual(peak,Peak('chr2L','66711','66911')) + self.assertEqual(len(features),9) + self.assertEqual(features[0].id,'CG31973') + self.assertEqual(features[1].id,'CG2674-RA') + self.assertEqual(features[2].id,'CG2674-RC') + self.assertEqual(features[3].id,'CG2674-RE') + self.assertEqual(features[4].id,'CG3625-RA') + self.assertEqual(features[5].id,'CG3625-RB') + self.assertEqual(features[6].id,'CG3625-RC') + self.assertEqual(features[7].id,'CG2762-RA') + self.assertEqual(features[8].id,'CG17941-RA') + # Closest distances for 2nd peak + # CG3625-RA 34108 + # CG3625-RB 34108 + # CG3625-RC 34108 + # CG2674-RC 134644 + # CG2674-RE 134644 + # CG2674-RA 134644 + # CG31973 223675 + # CG2762-RA 291265 + # CG17941-RA 390744 + peak,features = results[1] + self.assertEqual(peak,Peak('chr2L','249077','249277')) + self.assertEqual(len(features),9) + self.assertEqual(features[0].id,'CG3625-RA') + self.assertEqual(features[1].id,'CG3625-RB') + self.assertEqual(features[2].id,'CG3625-RC') + self.assertEqual(features[3].id,'CG2674-RA') + self.assertEqual(features[4].id,'CG2674-RC') + self.assertEqual(features[5].id,'CG2674-RE') + self.assertEqual(features[6].id,'CG31973') + self.assertEqual(features[7].id,'CG2762-RA') + self.assertEqual(features[8].id,'CG17941-RA') + # Closest distances for 3rd peak + # CG17941-RA 33971 + # CG2762-RA 65308 + # CG3625-RA 322465 + # CG3625-RB 322465 + # CG3625-RC 322465 + # CG2674-RC 491417 + # CG2674-RE 491417 + # CG2674-RA 491417 + # CG31973 580448 + peak,features = results[2] + self.assertEqual(peak,Peak('chr2L','605850','606050')) + self.assertEqual(len(features),9) + self.assertEqual(features[0].id,'CG17941-RA') + self.assertEqual(features[1].id,'CG2762-RA') + self.assertEqual(features[2].id,'CG3625-RA') + self.assertEqual(features[3].id,'CG3625-RB') + self.assertEqual(features[4].id,'CG3625-RC') + self.assertEqual(features[5].id,'CG2674-RA') + self.assertEqual(features[6].id,'CG2674-RC') + self.assertEqual(features[7].id,'CG2674-RE') + self.assertEqual(features[8].id,'CG31973') + # Closest distances for 4th peak + # CG9130-RB 1002100 + # CG13051-RA 13999624 + # CG14448-RA 20524064 + peak,features = results[3] + self.assertEqual(peak,Peak('chr3L','2258089','2258290')) + self.assertEqual(len(features),3) + self.assertEqual(features[0].id,'CG9130-RB') + self.assertEqual(features[1].id,'CG13051-RA') + self.assertEqual(features[2].id,'CG14448-RA') + # Closest distances for 5th peak + # CG13051-RA 3240026 + # CG14448-RA 3284214 + # CG9130-RB 18241951 + peak,features = results[4] + self.assertEqual(peak,Peak('chr3L','19497940','19498140')) + self.assertEqual(len(features),3) + self.assertEqual(features[0].id,'CG13051-RA') + self.assertEqual(features[1].id,'CG14448-RA') + self.assertEqual(features[2].id,'CG9130-RB') + def test_find_nearest_features_regions_differentially_expressed(self): # Run the analysis results = list(find_nearest_features(self.peaks, @@ -877,7 +1065,7 @@ def test_find_nearest_peaks_summits_tss_only(self): # Run the analysis results = list(find_nearest_peaks(self.features, self.summits, - tss_only=True)) + use_edge="TSS")) # Convenience variables for summits summits = (Peak('chr2L','66811','66812'), Peak('chr2L','249177','249178'), @@ -993,6 +1181,126 @@ def test_find_nearest_peaks_summits_tss_only(self): expected.addPeak(summits[0]) self.assertEqual(peaks,expected) + def test_find_nearest_peaks_summits_tes_only(self): + # Run the analysis + results = list(find_nearest_peaks(self.features, + self.summits, + use_edge="TES")) + # Convenience variables for summits + summits = (Peak('chr2L','66811','66812'), + Peak('chr2L','249177','249178'), + Peak('chr2L','605950','605951')) + # Correct number of results + self.assertEqual(len(results),12) + # Closest distances for 1st feature + # #0 41409 + # #1 223775 + # #2 580548 + feature,peaks = results[0] + self.assertEqual(feature,Feature('CG31973', + 'chr2L','25402','59243','-')) + expected = PeakSet() + expected.addPeak(summits[0]) + expected.addPeak(summits[1]) + expected.addPeak(summits[2]) + self.assertEqual(peaks,expected) + # Closest distances for 2nd feature + # #0 47621 + # #1 134744 + # #2 491517 + feature,peaks = results[1] + self.assertEqual(feature,Feature('CG2674-RC', + 'chr2L','107926','114433','+')) + expected = PeakSet() + expected.addPeak(summits[0]) + expected.addPeak(summits[1]) + expected.addPeak(summits[2]) + self.assertEqual(peaks,expected) + # Closest distances for 3rd feature + # #0 47621 + # #1 134744 + # #2 491517 + feature,peaks = results[2] + self.assertEqual(feature,Feature('CG2674-RE', + 'chr2L','106903','114433','+')) + expected = PeakSet() + expected.addPeak(summits[0]) + expected.addPeak(summits[1]) + expected.addPeak(summits[2]) + self.assertEqual(peaks,expected) + # Closest distances for 4th feature + # #0 47621 + # #1 134744 + # #2 491517 + feature,peaks = results[3] + self.assertEqual(feature,Feature('CG2674-RA', + 'chr2L','107760','114433','+')) + expected = PeakSet() + expected.addPeak(summits[0]) + expected.addPeak(summits[1]) + expected.addPeak(summits[2]) + self.assertEqual(peaks,expected) + # Closest distances for 5th feature + # #1 34207 + # #0 216573 + # #2 322565 + feature,peaks = results[4] + self.assertEqual(feature,Feature('CG3625-RA', + 'chr2L','283385','286528','-')) + expected = PeakSet() + expected.addPeak(summits[1]) + expected.addPeak(summits[0]) + expected.addPeak(summits[2]) + self.assertEqual(peaks,expected) + # Closest distances for 6th feature + # #1 34207 + # #0 216573 + # #2 322565 + feature,peaks = results[5] + self.assertEqual(feature,Feature('CG3625-RB', + 'chr2L','283385','285777','-')) + expected = PeakSet() + expected.addPeak(summits[1]) + expected.addPeak(summits[0]) + expected.addPeak(summits[2]) + self.assertEqual(peaks,expected) + # Closest distances for 7th feature + # #1 34207 + # #0 216573 + # #2 322565 + feature,peaks = results[6] + self.assertEqual(feature,Feature('CG3625-RC', + 'chr2L','283385','291011','-')) + expected = PeakSet() + expected.addPeak(summits[1]) + expected.addPeak(summits[0]) + expected.addPeak(summits[2]) + self.assertEqual(peaks,expected) + # Closest distances for 8th feature + # #2 65408 + # #1 291364 + # #0 473730 + feature,peaks = results[7] + self.assertEqual(feature,Feature('CG2762-RA', + 'chr2L','523467','540542','+')) + expected = PeakSet() + expected.addPeak(summits[2]) + expected.addPeak(summits[1]) + expected.addPeak(summits[0]) + self.assertEqual(peaks,expected) + # Closest distances for 9th feature + # #2 34070 + # #1 390843 + # #0 573209 + feature,peaks = results[8] + self.assertEqual(feature,Feature('CG17941-RA', + 'chr2L','640021','714969','-')) + expected = PeakSet() + expected.addPeak(summits[2]) + expected.addPeak(summits[1]) + expected.addPeak(summits[0]) + self.assertEqual(peaks,expected) + def test_find_nearest_peaks_summits_differentially_expressed(self): # Run the analysis results = list(find_nearest_peaks(self.features, @@ -1286,7 +1594,7 @@ def test_find_nearest_peaks_regions_tss_only(self): # Run the analysis results = list(find_nearest_peaks(self.features, self.peaks, - tss_only=True)) + use_edge="TSS")) # Convenience variables for summits peaks_ = (Peak('chr2L','66711','66911'), Peak('chr2L','249077','249277'), @@ -1402,6 +1710,126 @@ def test_find_nearest_peaks_regions_tss_only(self): expected.addPeak(peaks_[0]) self.assertEqual(peaks,expected) + def test_find_nearest_peaks_regions_tes_only(self): + # Run the analysis + results = list(find_nearest_peaks(self.features, + self.peaks, + use_edge="TES")) + # Convenience variables for summits + peaks_ = (Peak('chr2L','66711','66911'), + Peak('chr2L','249077','249277'), + Peak('chr2L','605850','606050')) + # Correct number of results + self.assertEqual(len(results),12) + # Closest TES distances for 1st feature + # #0 41309 + # #1 223675 + # #2 580448 + feature,peaks = results[0] + self.assertEqual(feature,Feature('CG31973', + 'chr2L','25402','59243','-')) + expected = PeakSet() + expected.addPeak(peaks_[0]) + expected.addPeak(peaks_[1]) + expected.addPeak(peaks_[2]) + self.assertEqual(peaks,expected) + # Closest distances for 2nd feature + # #0 47522 + # #1 134644 + # #2 491417 + feature,peaks = results[1] + self.assertEqual(feature,Feature('CG2674-RC', + 'chr2L','107926','114433','+')) + expected = PeakSet() + expected.addPeak(peaks_[0]) + expected.addPeak(peaks_[1]) + expected.addPeak(peaks_[2]) + self.assertEqual(peaks,expected) + # Closest distances for 3rd feature + # #0 47522 + # #1 134644 + # #2 491417 + feature,peaks = results[2] + self.assertEqual(feature,Feature('CG2674-RE', + 'chr2L','106903','114433','+')) + expected = PeakSet() + expected.addPeak(peaks_[0]) + expected.addPeak(peaks_[1]) + expected.addPeak(peaks_[2]) + self.assertEqual(peaks,expected) + # Closest distances for 4th feature + # #0 47522 + # #1 134644 + # #2 491417 + feature,peaks = results[3] + self.assertEqual(feature,Feature('CG2674-RA', + 'chr2L','107760','114433','+')) + expected = PeakSet() + expected.addPeak(peaks_[0]) + expected.addPeak(peaks_[1]) + expected.addPeak(peaks_[2]) + self.assertEqual(peaks,expected) + # Closest distances for 5th feature + # #1 34108 + # #0 216474 + # #2 322465 + feature,peaks = results[4] + self.assertEqual(feature,Feature('CG3625-RA', + 'chr2L','283385','286528','-')) + expected = PeakSet() + expected.addPeak(peaks_[1]) + expected.addPeak(peaks_[0]) + expected.addPeak(peaks_[2]) + self.assertEqual(peaks,expected) + # Closest distances for 6th feature + # #1 34108 + # #0 216474 + # #2 322465 + feature,peaks = results[5] + self.assertEqual(feature,Feature('CG3625-RB', + 'chr2L','283385','285777','-')) + expected = PeakSet() + expected.addPeak(peaks_[1]) + expected.addPeak(peaks_[0]) + expected.addPeak(peaks_[2]) + self.assertEqual(peaks,expected) + # Closest distances for 7th feature + # #1 34108 + # #0 216474 + # #2 322465 + feature,peaks = results[6] + self.assertEqual(feature,Feature('CG3625-RC', + 'chr2L','283385','291011','-')) + expected = PeakSet() + expected.addPeak(peaks_[1]) + expected.addPeak(peaks_[0]) + expected.addPeak(peaks_[2]) + self.assertEqual(peaks,expected) + # Closest distances for 8th feature + # #2 65308 + # #1 291265 + # #0 473631 + feature,peaks = results[7] + self.assertEqual(feature,Feature('CG2762-RA', + 'chr2L','523467','540542','+')) + expected = PeakSet() + expected.addPeak(peaks_[2]) + expected.addPeak(peaks_[1]) + expected.addPeak(peaks_[0]) + self.assertEqual(peaks,expected) + # Closest distances for 9th feature + # #2 33971 + # #1 390744 + # #0 573110 + feature,peaks = results[8] + self.assertEqual(feature,Feature('CG17941-RA', + 'chr2L','640021','714969','-')) + expected = PeakSet() + expected.addPeak(peaks_[2]) + expected.addPeak(peaks_[1]) + expected.addPeak(peaks_[0]) + self.assertEqual(peaks,expected) + def test_find_nearest_peaks_regions_differentially_expressed(self): # Run the analysis results = list(find_nearest_peaks(self.features, diff --git a/test/test_cli.py b/test/test_cli.py index 88c0397..ea4e767 100644 --- a/test/test_cli.py +++ b/test/test_cli.py @@ -183,19 +183,19 @@ def test_defaults(self): self.assertEqual(params.peaks,None) self.assertEqual(params.genes,None) self.assertEqual(params.cutoff,None) - self.assertFalse(params.tss_only) + self.assertEqual(params.use_edge,"BOTH") self.assertFalse(params.only_differentially_expressed) def test_set_values(self): params = AnalysisParams(peaks="peaks", genes="genes", cutoff=100000, - tss_only=True, + use_edge="TSS", only_differentially_expressed=True) self.assertEqual(params.peaks,"peaks") self.assertEqual(params.genes,"genes") self.assertEqual(params.cutoff,100000) - self.assertTrue(params.tss_only) + self.assertEqual(params.use_edge,"TSS") self.assertTrue(params.only_differentially_expressed) class TestBatchNamer(unittest.TestCase): From 17638a6e950b29ce70234cca2a9bbe51f3f96156 Mon Sep 17 00:00:00 2001 From: Peter Briggs Date: Mon, 5 Feb 2024 17:52:35 +0000 Subject: [PATCH 4/4] Update the '--edge' option documentation in "Usage". --- docs/usage.rst | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/docs/usage.rst b/docs/usage.rst index f0bf971..7d146d0 100644 --- a/docs/usage.rst +++ b/docs/usage.rst @@ -58,9 +58,18 @@ example: .. image:: nearest_tss.png :align: center -Alternatively distances can be calculated as the shortest distance -between either of the peak edges to either the TSS or the TES of -the gene, by specifying the ``--edge=both`` option:: +(This behaviour can be made explicit by specifying the +``--edge=tss`` option.) + +Alternatively distances can be calculated as the distance from +the gene TES (by specifying the ``--edge=tes``), or as the shortest +distance between either of the peak edges to whichever of the TSS or +the TES of the gene is closest (by specifying the ``--edge=both`` +option). + +For example: + +:: RnaChipIntegrator --edge=both GENES PEAKS