forked from reefgenomics/SymPortal_framework
-
Notifications
You must be signed in to change notification settings - Fork 0
/
data_sub_collection_run.py
executable file
·4232 lines (3662 loc) · 248 KB
/
data_sub_collection_run.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
from __future__ import division
from dbApp.models import data_set, reference_sequence, data_set_sample_sequence, analysis_type, analysis_group, \
data_set_sample, data_analysis, clade_collection, clade_collection_type
# import math
import itertools
import numpy as np
from scipy.stats import gaussian_kde
import operator
import subprocess
import os
import re
import json
import string
from collections import defaultdict
import timeit
from multiprocessing import Queue, Process, Manager, current_process
from django import db
import pickle
import shutil
import sys
from general import writeListToDestination, readDefinedFileToList
from distance import generate_within_clade_UniFrac_distances_ITS2_type_profiles, generate_within_clade_BrayCurtis_distances_ITS2_type_profiles
from output import formatOutput_ord
from plotting import plot_between_its2_type_prof_dist_scatter
if 'PYCHARM_HOSTED' in os.environ:
convert = False # in PyCharm, we should disable convert
strip = False
else:
convert = None
strip = None
###### Profile Discovery functions ######
def profileDiscovery(nProcessors):
if not analysisObj.initialTypeDiscoComplete:
############# FIND RAW FOOTPRINTS ###############
cladeList = ['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I']
# Get the cladeCollections that are found in the listof datasubmissions that are found in the analysisObj
cladeCollectionsOfAnalysis = analysisObj.getCladeCollections()
# List that will hold a dictionary for each clade
# Each dictionary will hold key = footprint (set of sequences) value = [[] []] where []0 = list of cladeCollections containing given footprint
# and []1 = list of majority sequence for given sample
masterCladalListOfFootprintDicts = [{} for clade in cladeList]
# For each clade Collection add its footprint to the count dict
# by associating the majority sequence and sample to the corect cladal dict
# This queue will have the cladeCollections to be processed
taskQueue = Queue()
# This will hold the outputted results from the CC being processed
outputQueue = Queue()
# This queue will be used to fetch the output dict list from the second listenerworker
dictResultQueue = Queue()
for cladecollection in cladeCollectionsOfAnalysis:
taskQueue.put(cladecollection)
numProcessors = nProcessors
for N in range(numProcessors):
taskQueue.put('STOP')
allProcesses = []
# close all connections to the db so that they are automatically recreated for each process
# http://stackoverflow.com/questions/8242837/django-multiprocessing-and-database-connections
db.connections.close_all()
# Then start the workers
# workerDiscoveryTwoWorker will process the CCs and pass this info on to the second queue which
# workerDiscoveryTwoListener will work on
# Finally workerDiscoveryTwoListener will output its results to the third queue
for N in range(numProcessors):
p = Process(target=workerDiscoveryTwoWorker, args=(taskQueue, outputQueue, analysisObj.withinCladeCutOff))
allProcesses.append(p)
p.start()
# Process the output of the multiprocessing
# We were doing this in a separate worker but it turnsout that queues have a fairly small size limit
# so we weren't able to pass out the mastercladallistoffootprintdicts after it had been populated
# As we were only able to do this with one process only it is no loss for us to process this directly in the
# main process.
## http://stackoverflow.com/questions/21641887/python-multiprocessing-process-hangs-on-join-for-large-queue (bob)
killNum = 0
while 1:
passedElement = outputQueue.get()
if passedElement == 'kill':
killNum += 1
if killNum == numProcessors:
break
else:
# footprintInQ = dictelement[0]
# cladalDictionaryKey = dictelement[1]
# CC = dictelement[2]
# CC.maj() = dictelement[3]
# 07/12/17 here we are going to start to make changes to the format of the footprintlist
# called mastercldallistoffootprintdicts
# For the maj types (passedElement[3]) we are going to put them into their own list rather than
# have them as items in a single list
# e.g. a 3d list instead of a 2D list
# if passedElement[0] not in masterCladalListOfFootprintDicts[passedElement[1]]:
# masterCladalListOfFootprintDicts[passedElement[1]][passedElement[0]] = [[passedElement[2]],
# [passedElement[3]]]
# else:
# masterCladalListOfFootprintDicts[passedElement[1]][passedElement[0]][0].append(passedElement[2])
# masterCladalListOfFootprintDicts[passedElement[1]][passedElement[0]][1].append(passedElement[3])
if passedElement[0] not in masterCladalListOfFootprintDicts[passedElement[1]]:
masterCladalListOfFootprintDicts[passedElement[1]][passedElement[0]] = [[passedElement[2]],
[[passedElement[3]]]]
else:
masterCladalListOfFootprintDicts[passedElement[1]][passedElement[0]][0].append(passedElement[2])
masterCladalListOfFootprintDicts[passedElement[1]][passedElement[0]][1].append([passedElement[3]])
# First wait for the workers to finish
for p in allProcesses:
p.join()
################################################
########### CHECK RAW FOOTPRINTS FOR SUPPORT AND GENERATE SUPPORTED TYPE PROFILES ###############
# Now work clade by clade
for footPrintDict in masterCladalListOfFootprintDicts:
if footPrintDict: # If there are some clade collections for the given clade
# The fact that there are this few cladecollections of a clade
# will be very rare, and in this rare case the Majs will simply be associated to the footprints
ccCount = sum(len(footPrintDict[akey][0]) for akey in footPrintDict.keys())
# ##### Debugging (Delete Me) ######
# for keyItem in footPrintDict.keys():
# print(','.join([str(item) for item in keyItem]))
# print(','.join([str(cc) for cc in footPrintDict[keyItem][0]]))
# ##################################
######## FIND WHICH FOOTPRINTS ARE SUPPORTED AND WHICH CCs SUPPORT THEM #######
# collapsedFootPrintDict = collapsePotentialProfiles(footprintList = footPrintDict, reqsupport=max(4, math.ceil(analysisObj.typeSupport*ccCount)), nprocessors=nProcessors)
# ### DEBUG ###
# if masterCladalListOfFootprintDicts.index(footPrintDict) == 2:
# foo = 'bar'
# ### DEBUG ###
collapsedFootPrintDict = collapsePotentialProfiles_initType_objects(footprintList=footPrintDict,
reqsupport=4,
nprocessors=nProcessors)
# collapsedFootPrintDict = collapsePotentialProfiles_initType_objects(footprintList=masterCladalListOfFootprintDicts[2],
# reqsupport=4,
# nprocessors=nProcessors)
###############################################################################
##### Debugging (Delete Me) ######
if masterCladalListOfFootprintDicts.index(footPrintDict) == 2:
# this is Cs
apple = 'asdf'
## DEBUG
for initT in collapsedFootPrintDict:
print(str(initT.profile))
print(','.join([str(cc) for cc in initT.cladeCollection_list]))
for cc in initT.cladeCollection_list:
if str(cc) in ['July2016-113poc-R1', 'July2016-59poc-R1', 'March2016-125poc-R1',
'March2016-57poc-R1', 'May2016-113poc-R1', 'May2016-135poc-R1',
'May2016-294por-R1', 'March2016-294poc-R1', 'May2016-59poc-R1',
'May2016-125poc-R1']:
apples = 'pwers'
# thi is a cct that ends up as the C1
if str(cc) in ['July2016-159poc-R1', 'May2016-159poc-R1', 'March2016-55poc-R1',
'May2016-55poc-R1', 'March2016-135poc-R1', 'July2016-55poc-R1',
'July2016-135poc-R1', 'May2016-235poc-R1']:
apples = 'asdf'
# then this is the C1k types
## DEBUG
##################################
############################################################################################
############ CREATE ANALYSIS TYPES BASED ON DISCOVRED FOOTPRINTS ############################
# 08/12/17 we need to be careful here when we initiate the types as the types we were previously
''' generating would represent essentially the majoirty of the ccts sequences. but now some of the types
will be smaller proportions of the ccts so we should check to see how the initial abundnace of the types
are calculated. e.g. are they calculated as the proportion of the total seqs in the cct or are we
already working on as proportions of the seqs of the type in question. Hopefully it is the latter
and we were just going with the types that represented the largest number of sequences for the cct.
'''
# 08/12/17 I have carefully looked through the type initTypeAtributes method
''' Firsly it always works in the context of the sequences found in the type. It produces absoulte
counts per sequence in the type for each cladeCollection that the type was supported by.
It also produces a count that is relative proportions of each sequence of the type for each CC.
Hopefully this is what we are using when we do the second artefact check. I.e. we are looking
for the types in the CCs again.
For each type, the abslute counts per type sequence per CC are stored in type.footprintSeqAbundances
in the order of initalCCs and orderedfootprint list. THere is also the relative version wihich is
stored as type.footprintSeqRatios'''
# for every footprint that will become an analysis_type
timeOne = 0
timeTwo = 0
print('\n\nCreating analysis types clade {}'.format(
cladeList[masterCladalListOfFootprintDicts.index(footPrintDict)]))
for initialType in collapsedFootPrintDict:
footPrintNameString = ','.join([str(refseq) for refseq in initialType.profile])
# Work out the corresponding reference_sequence for each Maj of the samples with that corresponding type
# Then do len(set()) and see if it is a coDom, i.e. different Maj seqs within the type
timeitone = timeit.default_timer()
listOfSampSeqs = []
# setOfMajSeqs = set(listOfSampSeqs)
timeOne += timeit.default_timer() - timeitone
timeittwo = timeit.default_timer()
if len(initialType.set_of_maj_ref_seqs) > 1: # Then this is a coDom
coDom = True
# the Counter class (from collections import Counter) may be useful
# http://stackoverflow.com/questions/2600191/how-can-i-count-the-occurrences-of-a-list-item-in-python
newAnalysisType = analysis_type(coDom=True, dataAnalysisFrom=analysisObj, clade=cladeList[
masterCladalListOfFootprintDicts.index(footPrintDict)])
newAnalysisType.setMajRefSeqSet(initialType.set_of_maj_ref_seqs)
newAnalysisType.initTypeAttributes(initialType.cladeCollection_list, initialType.profile)
newAnalysisType.save()
print('\rCreating analysis type: {}'.format(newAnalysisType.name), end='')
else:
coDom = False
newAnalysisType = analysis_type(coDom=False, dataAnalysisFrom=analysisObj,
clade=cladeList[
masterCladalListOfFootprintDicts.index(footPrintDict)])
newAnalysisType.setMajRefSeqSet(initialType.set_of_maj_ref_seqs)
newAnalysisType.initTypeAttributes(initialType.cladeCollection_list, initialType.profile)
newAnalysisType.save()
print('\rCreating analysis type: {}'.format(newAnalysisType.name), end='')
apples = 'asdf'
timeTwo += timeit.default_timer() - timeitone
print('\nTimeOne = {}'.format(timeOne))
print('TimeTne = {}'.format(timeTwo))
# # DEBUG#
# for at in analysis_type.objects.filter(dataAnalysisFrom=analysisObj):
# print(str(at))
# print(','.join([str(cc) for cc in at.getCladeCollectionsFoundInInitially()]))
# if 'January2017-280por' in [str(cc) for cc in at.getCladeCollectionsFoundInInitially()]:
# appeles = 'pwers'
# ###
#############################################################################################
############ CHECK FOR ADDITIONAL TYPES NOT FOUND DUE TO INTRAS SPANNING THE WITHINCLADECUTOFF BOUNDARY #########
print('\n\nChecking for additional artefact types')
### CREATION OF SUPER TYPES DUE TO WITHINCLADECOLLECTIONCUTOFF ARTEFACTS ###
CCToTotalSeqsDict, CCToRefSeqListAndAbundances, typeFootPrintDict, CCToInitialTypeDict = checkForAdditionalArtefactTypes(
nProcessors)
############################################################################
testDirPath = os.path.join(os.path.dirname(__file__), 'temp/{}'.format(analysisObj.id))
os.makedirs(testDirPath, exist_ok=True)
os.chdir(testDirPath)
# I think that we must convert the Manager().dict objets to normal dicts before we pickle them.
# I think this is the reason we are unable to pickle.load the files.
CCToTotalSeqsDictToDump = dict(CCToTotalSeqsDict)
pickle.dump(CCToTotalSeqsDictToDump, open("CCToTotalSeqsDict_{}".format(analysisObj.id), "wb"))
CCToRefSeqListAndAbundancesToDump = dict(CCToRefSeqListAndAbundances)
pickle.dump(CCToRefSeqListAndAbundancesToDump,
open("CCToRefSeqListAndAbundances_{}".format(analysisObj.id), "wb"))
typeFootPrintDictToDump = dict(typeFootPrintDict)
pickle.dump(typeFootPrintDictToDump, open("typeFootPrintDict_{}".format(analysisObj.id), "wb"))
CCToInitialTypeDictToDump = dict(CCToInitialTypeDict)
pickle.dump(CCToInitialTypeDictToDump, open("CCToInitialTypeDict_{}".format(analysisObj.id), "wb"))
# also we can perhaps have a save point like analysisTypesDefined below that is associated to the analysisObj
analysisObj.initialTypeDiscoComplete = True
analysisObj.save()
reassessSupportOfArtefactDIVContainingTypes(CCToTotalSeqsDict, CCToRefSeqListAndAbundances, typeFootPrintDict,
CCToInitialTypeDict, nProcessors)
else:
# print('Checking for additional artefact types')
# CCToTotalSeqsDict, CCToRefSeqListAndAbundances, typeFootPrintDict, CCToInitialTypeDict = checkForAdditionalArtefactTypes()
testDirPath = os.path.join(os.path.dirname(__file__), 'temp/{}'.format(analysisObj.id))
os.chdir(testDirPath)
CCToTotalSeqsDict = Manager().dict(
pickle.load(open("{}/CCToTotalSeqsDict_{}".format(testDirPath, analysisObj.id), "rb")))
CCToRefSeqListAndAbundances = Manager().dict(
pickle.load(open("CCToRefSeqListAndAbundances_{}".format(analysisObj.id), "rb")))
typeFootPrintDict = Manager().dict(pickle.load(open("typeFootPrintDict_{}".format(analysisObj.id), "rb")))
CCToInitialTypeDict = Manager().dict(pickle.load(open("CCToInitialTypeDict_{}".format(analysisObj.id), "rb")))
reassessSupportOfArtefactDIVContainingTypes(CCToTotalSeqsDict, CCToRefSeqListAndAbundances, typeFootPrintDict,
CCToInitialTypeDict, nProcessors)
# # Now print a list of the types
# allTypesFromDataAnalysis = analysis_type.objects.filter(dataAnalysisFrom=analysisObj)
# for an in allTypesFromDataAnalysis:
# print(an.name)
# if an.name == 'D1/D4/D17-D6-D2.2-D2':
# apples = 'asdf'
# apples = 'asdf'
#################################################################################################################
analysisObj.analysisTypesDefined = True
analysisObj.save()
return
def reassessSupportOfArtefactDIVContainingTypes(CCToTotalSeqsDict, CCToRefSeqListAndAbundances, typeFootPrintDict,
CCToInitialTypeDict, cores):
# 08/12/17 13:41 this is where we're at. We have fixed the cc to initial type dict for the other artefact
# checking but still need to work on that here as well as the other issues noted below.
'''
08/12/17 This is going to cause some problems with the basal comparisons.
firstly it is assuming that each cct can only associate with only one type.
Instead we will just have to make sure to do checks that mean that each cct can only support one type of each
basal group.
We will also have to write checks in at the stages where there is a potential for new types to be created.
It is important that we are checking types according to relative abundances and ratios. We know that this
information is stored in the types. For more information on this see the comment that is made whwere
we first create the inital types.
Although we are unlocking the DIVs that are found at low abundance in the type Assignement this still does
not enable a lot of CCs to be assigned to the unlocked types even if they meet the requirements of the unlocked
DIV. This is because they fall outside the acceptable range of one or more of the other DIVs. This is due to
the fact that these other ranges are still being defined by the inital CCs that are associated to these types.
To fix this we must reassess the initally associated CCs to every one of the types that contains an artefact DIV.
For each type currently found, if it contains an artefact div, go through all CCs to see if they fit the normal
requeirements e.g. >0.03 for each of the DIVs, BUT allow the 0.005 cutoff for the unlocked DIVs. If you find
a CC that does fit the type, and it is not that CCs current inital type, then assess to see whether the new
type represents more seqs than the CCs current inital type. If it does, then add it to a list of CCs that will
need to be added to this type. There is code from the previous 'checkForAdditionalAretfactTypes' that will
help with this. E.g. collecting all of the CCs first, then goinging type by type for the CCs that need to be
removed and checking to see if the types are still viable. If they are not, then we need to delete the type
and rehome the CCs that are hanging. This was also done in the preivous section. At least no new types will
be being created.
By this means, more CCs will be able to be associated to the types that contain these artefact DIVs.
'''
# Get list of all types
allTypesFromDataAnalysis = analysis_type.objects.filter(dataAnalysisFrom=analysisObj)
# Get list of clades that are represented by the types
cladeList = set()
for at in allTypesFromDataAnalysis:
cladeList.add(at.clade)
for currentClade in cladeList:
checked = []
while 1:
restart = False
# Get list of type from scratch as we may be restarting this
allTypesFromDataAnalysis = analysis_type.objects.filter(dataAnalysisFrom=analysisObj)
cladalTypesIDs = [at.id for at in allTypesFromDataAnalysis if at.clade == currentClade]
# For each type
for typeToCheckID in cladalTypesIDs:
# if this type has not already been checked
if typeToCheckID not in checked:
print('\n\nChecking {}'.format(analysis_type.objects.get(id=typeToCheckID)))
# If this type contains artefact DIVs
if typeFootPrintDict[typeToCheckID][2]:
# Then this is a type we need to check
# This is the list that will hold the CCs that are going to be added to the type's
# inital CCs list.
supportList = []
# This is the info we have to work with from the previous dictionaries
artefactDIVIDs = typeFootPrintDict[typeToCheckID][2]
nonArtefactDIVIDs = typeFootPrintDict[typeToCheckID][0]
refSeqObjsOfTypeList = typeFootPrintDict[typeToCheckID][3]
# For each type generate a requirements dict
# Dict will be DIV:RequiredRelAbund
# Otherwise known as pnt
requirementDict = {refSeqObj: (0.03 if refSeqObj.id in nonArtefactDIVIDs else unlockedAbund) for
refSeqObj in refSeqObjsOfTypeList}
listOfCCsToCheck = [cc for cc in analysisObj.getCladeCollections() if cc.clade == currentClade]
taskQueue = Queue()
supportListMan = Manager()
supportList = supportListMan.list()
# outputQueue = Queue()
for CC in listOfCCsToCheck:
taskQueue.put(CC)
for N in range(cores):
taskQueue.put('STOP')
allProcesses = []
# In this case the db.connections were not being recreated and I think this might have something
# to do with a connection only being created when there is writing to be done.
# as there was only reading operations to be done in the worker task I think that maybe no new connections
# were being made.
db.connections.close_all()
# I think this might be being caused due to the changing of the address of the db in the settings.py file to a relative path
# yep this is what was causing the problem so we will need to find some way of defining the location of the database relatively.
# DONE. I have changed the settings.py file so that the DB location is now relative to the position of the settings.py file
for N in range(cores):
p = Process(target=workerArtefactTwo, args=(
taskQueue, supportList, CCToInitialTypeDict, typeToCheckID, CCToRefSeqListAndAbundances,
refSeqObjsOfTypeList, requirementDict))
allProcesses.append(p)
p.start()
for p in allProcesses:
p.join()
if supportList:
restart = True
# Then we have found CCs that should be moved to the Type in Q
################# REDISTRIBUTE SUPPORTING TYPES ###############
# Now remove the CCs from the types they were previously associated to and update the
# CCToInitialTypeDict accordingly. # We will also likely need to reinitiate each of the types
# There are likely to be types that have more than one CC being removed from them so the most effective
# way to process this is not to go CC by CC but rather type by type
# So first get a list of the types that have been effected
setOfTypesAffected = set()
# at the same time create a dict that tracks which CCs need to be remvoed from each type
typeToCCToBeRemovedDict = defaultdict(
list) # we may have to do this manually instead of defaultdict
# for CC in supportList:
# if CC.id in CCToInitialTypeDict.keys():
# initialTypeID = CCToInitialTypeDict[CC.id]
# setOfTypesAffected.add(initialTypeID)
# typeToCCToBeRemovedDict[initialTypeID].append(CC)
# else:
# # we don't need to remove this CC from any type but we should add it to the CCToInitialTypeDict
# CCToInitialTypeDict[CC.id] = typeToCheckID
for CC in supportList:
if CC.id in CCToInitialTypeDict.keys():
initialTypeID = None
if len(CCToInitialTypeDict[CC.id]) == 1:
initialTypeID = CCToInitialTypeDict[CC.id][0]
setOfTypesAffected.add(initialTypeID)
typeToCCToBeRemovedDict[initialTypeID].append(CC)
else:
initialTypeID = find_which_type_is_same_basal_type_as_pnt(
requirementDict,
analysis_type.objects.filter(id__in=CCToInitialTypeDict[CC.id])).id
### DEBUG ###
if not initialTypeID:
foo = 'bar'
### DEBUG ###
setOfTypesAffected.add(initialTypeID)
typeToCCToBeRemovedDict[initialTypeID].append(CC)
else: # we don't need to remove this CC from any type but we should add it to the CCToInitialTypeDict
CCToInitialTypeDict[CC.id] = [typeToCheckID]
# Here we have a set of the type IDs for the types affected and a dict associating the CCs to these types
# Now go through the types and remove the CCs from the type, change the dict association
# and reinitialise the type
# THe list to catch and reconsider all of the stranded CCs that might be left over in this process
############## TYPE BY TYPE OF REDISTRIBUTED CCs DELETE OR REINITIATE #################
print('Reassessing support of affected types')
strandedCCs = []
for anType in typeToCCToBeRemovedDict.keys():
anTypeInQ = analysis_type.objects.get(id=anType)
#################### REMOVE CCs FROM TYPES ######################
# Remove CCs from type
listOfCCsToBeRemovedStrID = [str(cc.id) for cc in typeToCCToBeRemovedDict[anType]]
anTypeInQ.removeCCListFromInitialCladeCollectionList(listOfCCsToBeRemovedStrID)
# Change the CC associations in the cctocurrent... dict
for ccstrid in listOfCCsToBeRemovedStrID:
CCToInitialTypeDict[int(ccstrid)].remove(anType)
CCToInitialTypeDict[int(ccstrid)].append(typeToCheckID)
################### REASSESS SUPPORT OF TYPE ###################
# Now check to see if the typeinQ still has sufficient support
# If it does then reinitiate it
# BUT if type is only 1 intra in length, then don't require 4 for support
# As some of these will not have 4 to start with
################ IF SUPPORTED ##################
listOfCCsInType = [cc for cc in clade_collection.objects.filter(
id__in=[int(x) for x in anTypeInQ.listOfCladeCollectionsFoundInInitially.split(',')
if x != ''])]
if listOfCCsInType and len(anTypeInQ.getOrderedFootprintList()) == 1:
print('Short Type {} supported by {} CCs. Reinitiating.'.format(anTypeInQ.name,
len(
listOfCCsInType)))
# Then this still has suffiecient support
# reinitiate the type
# listOfCCs = [cc for cc in clade_collection.objects.filter(id__in=[int(x) for x in anTypeInQ.listOfCladeCollectionsFoundInInitially.split(',')])]
anTypeInQ.initTypeAttributes(listOfCC=listOfCCsInType,
footprintlistofrefseqs=anTypeInQ.getOrderedFootprintList())
elif len(listOfCCsInType) >= 4:
print('Type {} supported by {} CCs. Reinitiating.'.format(anTypeInQ.name,
len(listOfCCsInType)))
# Then this still has suffiecient support
# reinitiate the type
# listOfCCs = [cc for cc in clade_collection.objects.filter(id__in=[int(x) for x in anTypeInQ.listOfCladeCollectionsFoundInInitially.split(',')])]
anTypeInQ.initTypeAttributes(listOfCC=listOfCCsInType,
footprintlistofrefseqs=anTypeInQ.getOrderedFootprintList())
################ IF UNSUPPORTED #################
else:
# THen this antype no longer has sufficient support
# put the CCs into the stranded list and delete this type
# also remove the CCs from the dict of types they are associated to
print(
'Type {} no longer supported. Deleting. {} CCs stranded.'.format(anTypeInQ.name,
str(len(
listOfCCsInType))))
del typeFootPrintDict[anTypeInQ.id]
anTypeInQ.delete()
for cc in listOfCCsInType:
if len(CCToInitialTypeDict[cc.id]) > 1:
# Then there are other types associated to this cladeCollection and we should simply
# remove the type in question from the list
CCToInitialTypeDict[cc.id].remove(anType)
else:
# then this only contains one type and we should delte the cc entry in the dict and it will
# become stranded.
del CCToInitialTypeDict[cc.id]
# strandedCCs.extend(listOfCCsInType)
strandedCCs.append(cc)
#################### ATTEMPT TO HOME STRANDED TYPES ######################
################### GET COMMON INTRAS #################
# Only attempt to make a new type to associate the stranded CCs to if there is sufficient support
if len(strandedCCs) >= 4:
totalIntraSet = set()
for CC in strandedCCs:
totalIntraSet.update(CC.cutOffFootprint(analysisObj.withinCladeCutOff))
# Now go through each CC again and remove any intra from the total list that isn't found in the
# CCinQ to produce an in common list
refSeqsToRemove = set()
for CC in strandedCCs:
intrasInCCInQ = CC.cutOffFootprint(analysisObj.withinCladeCutOff)
for refSeq in list(totalIntraSet):
if refSeq not in intrasInCCInQ:
refSeqsToRemove.add(refSeq)
# Now create the commons list
intrasInCommonList = [refSeq for refSeq in list(totalIntraSet) if
refSeq not in refSeqsToRemove]
exists = False
if intrasInCommonList:
############ CHECK IF POTENTIAL NEW TYPE FOOTPRINT ALREADY EXISTS ############
# Check to see if the potential newtypes footprint already exists
pntFootprint = set([refSeq.id for refSeq in intrasInCommonList])
typeThatExistsID = 0
for key, footprintdictvalues in typeFootPrintDict.items():
if footprintdictvalues[1] == pntFootprint:
exists = True
typeThatExistsID = key
break
############ IF ALREADY EXISTS; ASSOCIATE STRANDED CCs TO EXISTING TYPE #################
if exists:
# If this type already exists then the CCsInQ will have a good type
# to be assigned to and we need do no more
# if this footprint already exists then we should associate these CCs to the type that has this footprint
# so do this in the dictionary but also add these CCs to the cladeCollectionFoundInInitially list and
# reinitiate the type
associateCCsToExistingTypeAndUpdateDicts(
cctocurrentinitialtypedict=CCToInitialTypeDict, strandedCCs=strandedCCs,
typeThatExistsID=typeThatExistsID, typefootprintdict=typeFootPrintDict)
############ IF DOESN'T EXIST; MAKE NEW TYPE AND ASSOCIATED CCs ###########
elif not exists and intrasInCommonList:
# 08/12/17 here we need to make sure that the intrasIncommonList doesn't contain a mixture of
# basal seqs
# will return True if there are multiple basal type in the intras in common list
if not check_if_intrasInCommonList_contains_multiple_basal_seqs(intrasInCommonList):
# He we should create a new type based on the CC collection and footprint above
# and house the CCs in it
makeNewTypeAndAssociateCCsAndUpdateDicts(
cctocurrentinitialtypedict=CCToInitialTypeDict, clade=currentClade,
intrasInCommonList=intrasInCommonList, strandedCCs=strandedCCs,
typefootprintdict=typeFootPrintDict)
############# FIND A NEW HOME IN EXISTING TYPES IF INSUFFICIENT SUPPORT FOR NEW TYPE ##################
else:
reassociateCCsToExistingTypesAndUpdateDicts(
cctocurrentinitialtypedict=CCToInitialTypeDict,
cctorefabunddict=CCToRefSeqListAndAbundances, clade=currentClade,
strandedCCs=strandedCCs, typefootprintdict=typeFootPrintDict)
############# FINALLY, ADD CCs TO TYPEINQ AND REINITIATE ##############
# N.B. The CCs have already had their CCToInitialTypeDict adjusted
typeToCheckObj = analysis_type.objects.get(id=typeToCheckID)
currentListOfCCsInType = [cc for cc in clade_collection.objects.filter(
id__in=[int(x) for x in typeToCheckObj.listOfCladeCollectionsFoundInInitially.split(',')
if x != ''])]
updatedListOfCCsInType = []
updatedListOfCCsInType.extend(currentListOfCCsInType)
updatedListOfCCsInType.extend(supportList)
typeToCheckObj.initTypeAttributes(listOfCC=updatedListOfCCsInType,
footprintlistofrefseqs=typeToCheckObj.getOrderedFootprintList())
print('Added {} CCs to {}'.format(len(supportList), typeToCheckObj))
checked.append(typeToCheckID)
if restart:
break
else:
checked.append(typeToCheckID)
# If we make it here then we should have been all the way through without having to restart
# All that remains is to break the While
if not restart:
break
# ### DEBUG ###
# for at in analysis_type.objects.filter(dataAnalysisFrom=analysisObj, clade=currentClade):
# print('{}'.format(at))
# print('{}'.format(','.join([str(cct) for cct in at.getCladeCollectionsFoundInInitially()])))
# if 'January2017-280por' in [str(cct) for cct in at.getCladeCollectionsFoundInInitially()]:
# appeles = 'pwers'
# #############
return
def check_if_intrasInCommonList_contains_multiple_basal_seqs(intras_in_common_list):
basalCount = 0
C15Found = False
for rs in intras_in_common_list:
if 'C3' == rs.name:
basalCount += 1
elif 'C1' == rs.name:
basalCount += 1
elif 'C15' in rs.name and C15Found == False:
basalCount += 1
C15Found = True
if basalCount > 1:
return True
return False
def check_whether_pnt_needs_comparing_to_current_type(requirementDict, list_of_analysis_types):
'''This should return the typeobject that has the same basal sequence as the pnt
either C3, C15, C1 or None'''
# first get the basal of the pnt
basalpnt = False
for rs, abund in requirementDict.items():
basalpnt = False
if 'C3' == rs.name:
basalpnt = 'C3'
break
elif 'C1' == rs.name:
basalpnt = 'C1'
break
elif 'C15' in rs.name:
basalpnt = 'C15'
break
# for each at check to see if the result is informative
for at in list_of_analysis_types:
basal = False
for rs in at.getOrderedFootprintList():
if 'C3' == rs.name:
basal = 'C3'
break
elif 'C1' == rs.name:
basal = 'C1'
break
elif 'C15' in rs.name:
basal = 'C15'
break
if basalpnt == basal:
# then both types contain the same basal and so this is our type
return at
# we will not let CC support types e.g. pnt's that arent the same basal as a type that is already found in the CC's types
return False
def find_which_type_is_same_basal_type_as_pnt(requirementDict, list_of_analysis_types):
'''This should return the typeobject that has the same basal sequence as the pnt
either C3, C15, C1 or None'''
# first get the basal of the pnt
basalpnt = False
for rs, abund in requirementDict.items():
basalpnt = False
if 'C3' == rs.name:
basalpnt = 'C3'
break
elif 'C1' == rs.name:
basalpnt = 'C1'
break
elif 'C15' in rs.name:
basalpnt = 'C15'
break
# here we know if pnt contains a basal seq
# for each at check to see if the result is informative
for at in list_of_analysis_types:
basal = False
for rs in at.getOrderedFootprintList():
if 'C3' == rs.name:
basal = 'C3'
break
elif 'C1' == rs.name:
basal = 'C1'
break
elif 'C15' in rs.name:
basal = 'C15'
break
if basalpnt == basal:
# then both types contain the same basal and so this is our type
return at
return False
def workerArtefactTwo(input, supportList, CCToInitialTypeDict, typeToCheckID, CCToRefSeqListAndAbundances,
refSeqObjsOfTypeList, requirementDict):
# Now that we have created all of the type this analysis is going to have, i.e. by doing the artefact checks
# we now need to check to see if any of the CCs want to support these new types
# hence here we go through each of the types and each of the CCs in turn for each of these types
# If we find support then we change the current type of the CC
for CC in iter(input.get, 'STOP'):
print('\r{}'.format(CC), end='')
######## MP HERE ##########
# We will need to have a managed list here.
# We will be able to pass in the managed dicts
# if str(CC) in ['January2016-148poc', 'January2017-148poc', 'January2017-81poc']:
# if str(analysis_type.objects.get(id=typeToCheckID)) == 'C42g-C42a-C42.2-C42h-C1-C42b':
# apples = 'pears'
# DEBUG it seems that some of the CCs' ID are not being found in the CCToInitialTypeDict
# The dict is created earlier on line 1609. It is created by going through every type
# in the collecting all of the CCs that it was found in initially and then creating the dict by
# key = CC id and value = the type.
# So it is quite possible that if we are going through each CC in the analysis and if a CC didn't have
# an initial type associated to it then we will have a key error here as the CC would not be incorporated into
# the CCToInitialTypeDict
# check to see if we are going through all CCs in this MP
# It turns out that we are going through the 'list of CCs to check'
# @ line 609: listOfCCsToCheck = [cc for cc in analysisObj.getCladeCollections() if cc.clade == currentClade]
# So as you can see it is entirely possible that some of the CCs did not have initial types assigned to them
# So the question is, why did I make such a rookie error and why did this not break earlier
# Which leads me to question should each of the CCs be given an initial type, and if so, why are some of the
# samples not being given initial types.
# If the CCs current type is the type in question, go to next CC.
# I am putting in a conditional here to check that the CC.id is in the dict first
# I see no harm in this. If the CC doesn't already have a type associated to it then we should check
# to see whether the the current type could fit the cc
if CC.id in CCToInitialTypeDict.keys():
if typeToCheckID in CCToInitialTypeDict[CC.id]:
# then the type we are checking is already associated with the CC in question
continue
# else if the CC doesn't currently have a type associated to it then there is no problem continuting to see
# if it could support the current type in Q
# Check to see that each of the types DIVs are found in the CC
CCRelAbundDict = CCToRefSeqListAndAbundances[CC.id]
refSeqsInCC = CCRelAbundDict.keys()
# We don't need to do a maj seq check in here because of the check_whether_pnt_needs_comparing_to_current_type
# function below that will only allow a CC to support a type if it already contains a type of the same basal
# sequence. This way it is very unlikely that a new pnt will be able to represent more seqs than the current
# type unless it has the maj seq in it. And even if it does, fair enough, it covers more seqs.
if set(refSeqObjsOfTypeList).issubset(set(refSeqsInCC)):
# Then the CC in question contains the intras of the type in question
# Now check to see if the rel abund requirements are met
notMet = False
for refSeqKey in requirementDict.keys():
if CCRelAbundDict[refSeqKey] < requirementDict[refSeqKey]:
# Then this CC does not have the required DIVs at the required rel abund for typeInQ
# Move on to check the next CC
notMet = True
break
if notMet:
continue
# If we have got here then the CC does have the required DIVs at the required
# rel abundance for the typeInQ.
# Now must check to see if typeInQ covers more seqs than its initial type
# Get tot abundance for current inital type
# 08/12/17
# So here the question is 1 - does the CC already associate with a type of the basal sequence of the type
# in question. If yes then we need to compare against this type. If no, then we can carry on with a
# attempt at finding an association
if CC.id in CCToInitialTypeDict.keys():
# this will also check to make sure that the maj of pnt's basal type is found in the CC
# if it is not then we should not let the CC associate with the potential new type
currentInitialType = check_whether_pnt_needs_comparing_to_current_type(requirementDict,
analysis_type.objects.filter(
id__in=CCToInitialTypeDict[
CC.id]))
# we will only allow a CC to support a potential new type if the pnt is of the same basal as one of the CCs current types
# This is because we run into trouble in knowing which type to compare the pnt to if they are not the same basal type
if currentInitialType:
# then the clade in question already associated to a type that has the same basal type as the type in question
# so we need to check if the new type represents more of the CCs sequences
# analysis_type.objects.get(id=CCToInitialTypeDict[CC.id][0])
currentTypeSeqRelAbundForCC = []
for refSeq in currentInitialType.getOrderedFootprintList():
relAbund = CCRelAbundDict[refSeq]
currentTypeSeqRelAbundForCC.append(relAbund)
# Get tot abundance for type In Q
typeInQAbundForCC = []
for refSeq in refSeqObjsOfTypeList:
relAbund = CCRelAbundDict[refSeq]
typeInQAbundForCC.append(relAbund)
if sum(typeInQAbundForCC) > sum(currentTypeSeqRelAbundForCC):
# Then this CC should be transfered to support the typeInQ
# For the time being we will simply hold these CCs in a list
# As we can then process all of the CCs from a given type at once
# because we will need to update and assess the types that they have come from
# to see whether they still have support
# Processing the CCs one by one would be far slower
supportList.append(CC)
# else: # We should only allow a CC to support a type if they are differnt basal types.
# # supportList.append(CC)
else:
# if the CC doesn't currently have a type associated to it then there is no need to see
# if the current type in Q is a better fit. we can simply add it to the supportList
# Only allow the CC to give support to the type if the CC's Maj is in the potential type
if CC.maj().referenceSequenceOf in [rs for rs, abund in requirementDict.items()]:
supportList.append(CC)
# else do not allow CC to support potential new type
return
def checkTypePairingForArtefactType(typeA, typeB, typefootprintdict, clade, cctocurrentinitialtypedict, cctototdict,
cctorefabunddict, cores):
# NB that the cctocurrentinitialtypedict is based on IDs rather than actual db objects
################## CREATE POTENTIAL NEW TYPE PROGRAMATICALLY ###############
# Create the potential new type that will be tested. This will be made up of all of the combined intras of the two
# types in question. The abundance requriement for those intras that are not artefact effected will still
# be 0.03 but for those that are potentially artefact effected it will be lowered to 0.005
totalListOfIntraIDs = set(typefootprintdict[typeA.id][1])
totalListOfIntraIDs.update(typefootprintdict[typeB.id][1])
totalListOfArtefactIntras = set(typefootprintdict[typeA.id][2])
totalListOfArtefactIntras.update(typefootprintdict[typeB.id][2])
# We will programatically represent the potential new type (pnt) as a list of tuples, one for each intra
# first item in tuple will be the refseq of the intra and second item the required abundance for that intra
pnt = []
for intraID in totalListOfIntraIDs:
if intraID in totalListOfArtefactIntras:
requiredAbundance = unlockedAbund
else:
requiredAbundance = 0.03
pnt.append((reference_sequence.objects.get(id=intraID), requiredAbundance))
############################################################################
# Check to see if the potential newtypes footprint already exists
pntFootprint = set([item[0].id for item in pnt])
exists = False
for key, footprintdictvalues in typefootprintdict.items():
try:
if footprintdictvalues[1] == pntFootprint:
exists = True
break
except:
apples = 'kjhg'
if exists:
print('Assessing new type:{}'.format(
[reference_sequence.objects.get(id=refSeqID).name for refSeqID in pntFootprint]))
print('Potential new type already exists')
return False
# Go through each of the CCs and see if the relative intra abundances meet the requirements of the pnt
# if they do then add the id of the CC to the supportList
# This will be used to count if there is sufficient support and to add the CCs to the new type once it is created
# and to remove these CCs from the old types
# this is causing a problem as some CCs are not being considered even though they would fit into this type
# This is because they were not found in either of types A or B initially.
# I think that really we should be looking through all CCs at this point. At least all CCs that are of the clade
# The only thing we need to bear in mind is whether we would still enforce the rule of allowing one CC to only
# support one initial type profile. I think we do need to enforce this rule else, you will have really basic
# types being supported by CCs. e.g. C3 would get loads of support.
# This could end up being very expensive so I would consider the following work flow to get this working.
# Have a dictionary that is CC: current initial typeProfile found in. This will need to be kept upto date.
# Then for each profile we are testing support for, get list of CCs of the clade, go through each CC as
# before looking to see if there is support. When you find a CC that matches the requirements, look up the
# type it was found in initially. Then only give support if current type in consideration represents more of its
# seqs than its current type. If this is so then add this CC to a support type.
# Once the list of CCs that support has been made, then check to see if support is great enough. If it is then
# create the new type as before, but now go through each of the CCs and remove it from the type it was previously
# associated to. Once you have removed it from the type, check to see if they type still has support.
# If it is now below support, add the CC to a list of stranded CCs and delete the type
# Once this is completed we have the new type and a list of stranded CCs. We then need to get the intras in common
# for the stranded CCs (use 0.03 cutoff). If this profile already exists, then do nothing. else if it doesn't
# already exisit then we can create a new type from the CC collection and footprint just identified.
# when doing all of the above, be sure to keep the footprint dict up to date.
# also the CC to type dict will need to be kept uptodate.
# Get list of CCs in this analysis that are also of the clade in Q
# I'm making a change. We can include all CCs here and simply modify the workerOne code so that if a CC doesn't have an itnitial type
# currently it can give support to the PNT if the PNT's DIVs are found in the CC.
listOfCCsToCheck = [cc for cc in analysisObj.getCladeCollections() if cc.clade == typeA.clade if
cc.id in cctocurrentinitialtypedict.keys()]
# listOfCCsToCheck = [cc for cc in analysisObj.getCladeCollections() if cc.clade == typeA.clade if cc.id in cctocurrentinitialtypedict.keys()]
supportList = []
print('Assessing support for potential new type:{}'.format(
[reference_sequence.objects.get(id=refSeqID).name for refSeqID in pntFootprint]))
######################### CHECK EVERY CC OF THE ANALYSISOBJ ################################
# To see whether the CC supports the pnt
############# NEW MP CODE #############
taskQueue = Queue()
supportListManager = Manager()
supportList = supportListManager.list()
# outputQueue = Queue()
for CC in listOfCCsToCheck:
taskQueue.put(CC)
for N in range(cores):
taskQueue.put('STOP')
allProcesses = []
db.connections.close_all()
for N in range(cores):
p = Process(target=workerArtefactOne,
args=(taskQueue, supportList, cctorefabunddict, pnt, cctocurrentinitialtypedict))
allProcesses.append(p)
p.start()
for p in allProcesses:
p.join()
#######################################
####################### IF PNT SUPPORTED CREATE PNT AND REDISTRIBUTE SUPPORTING CCs ############
# Once the list of CCs that support has been made, then check to see if support is great enough. If it is then
# create the new type as before, but now go through each of the CCs and remove it from the type it was previously
# associated to. Once you have removed it from the type, check to see if the type still has support.
# If it is now below support, add the CC to a list of stranded CCs and delete the type
if len(supportList) >= 4:
##### CREATE NEW TYPE #########
newAnalysisType = analysis_type(dataAnalysisFrom=analysisObj, clade=clade)
# listOfCCs = [cc for cc in clade_collection.objects.filter(id__in=supportList)]
newAnalysisType.initTypeAttributes(supportList, [pntItem[0] for pntItem in pnt])
newAnalysisType.save()
print('\nSupport found. Creating new type:{}'.format(newAnalysisType))
# We need to keep the typefootprintdict upto date when types are created or deleted
# get list of refseqs in type
####### UPDATE TYPEFOOTPRINT ########
refSeqIDs = set([refSeq.id for refSeq in newAnalysisType.getOrderedFootprintList()])
artefactIntraIDs = set([int(x) for x in newAnalysisType.artefactIntras.split(',') if x != ''])
nonArtefactIDs = [id for id in refSeqIDs if id not in artefactIntraIDs]
footprint = newAnalysisType.getOrderedFootprintList()
typefootprintdict[newAnalysisType.id] = [nonArtefactIDs, refSeqIDs, artefactIntraIDs, footprint]
################# REDISTRIBUTE SUPPORTING TYPES ###############
# Now remove the CCs from the types they were previously associated to and update the
# cctocurrentinitialtypedict accordingly. # We will also likely need to reinitiate each of the types
# There are likely to be types that have more than one CC being removed from them so the most effective
# way to process this is not to go CC by CC but rather type by type
# So first get a list of the types that have been effected
setOfTypesAffected = set()
# at the same time create a dict that tracks which CCs need to be remvoed from each type
typeToCCToBeRemovedDict = defaultdict(list) # we may have to do this manually instead of defaultdict
for CC in supportList:
if CC.id in cctocurrentinitialtypedict.keys():
# 08/12/17 again we need to work out which of the types if there are multiples, was effected: DONE but check the new method check_which_type...
initialTypeID = None
if len(cctocurrentinitialtypedict[CC.id]) == 1:
initialTypeID = cctocurrentinitialtypedict[CC.id][0]
else:
# this will return the analysis_type from the list of analysis types that matches the pnt
initial_type = check_which_type_has_same_basal_seq(pnt, analysis_type.objects.filter(
id__in=cctocurrentinitialtypedict[CC.id]))
if initial_type:
initialTypeID = initial_type.id
else:
# If False then this means that the pnt didn't share any basal seqs in common with the current
# init types. In this case, we don't need to remove any of the current initTypes