-
Notifications
You must be signed in to change notification settings - Fork 0
/
EulerPath.py
69 lines (56 loc) · 1.84 KB
/
EulerPath.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
from typing import Sequence
import networkx, matplotlib.pyplot as plot
def kmers(seq,k):
return [seq[i:i+k] for i in range(len(seq)-k+1)]
def DeBruijnGraph(reads,k):
dicti={}
E=[]
for read in reads:
KMers=kmers(read,k)
edges=[kmers(mer,k-1) for mer in KMers]
for edge in edges:
if edge[0] not in dicti.keys(): dicti[edge[0]]=[]
if edge[1] not in dicti.keys(): dicti[edge[1]]=[]
dicti[edge[0]].append(edge[1])
E.append(edge)
V=list(dicti.keys())
graph={'nodes':V,'edges':E}
#print(graph)
return (graph,dicti)
def visualizeDBGraph(graph):
dbGraph = networkx.DiGraph()
dbGraph.add_nodes_from(graph['nodes']) #Add the nodes to the graph
dbGraph.add_edges_from(graph['edges']) #Add the edges to the graph
networkx.draw(dbGraph, with_labels=True, node_size=1000)
plot.show()
graph,dicti=DeBruijnGraph(['TTACGTT','CCGTTA','GTTAC','GTTCGA','CGTTC'],5)
#graph,dicti=DeBruijnGraph(['TAATGCCATGGGATGTT'],3)
#graph,dicti=DeBruijnGraph(['ATGG', 'TGCC', 'TAAT', 'CCAT', 'GGG', 'GGATG', 'ATGTT'],3)
#visualizeDBGraph(graph)
#print(dicti)
#[No. Of Nodes (out)] - [No. Of Nodes (in)]
Diff={}
for v in dicti.keys(): Diff[v]=len(dicti[v])
for v in dicti.keys():
for u in dicti[v]: Diff[u]-=1
#print(Diff)
startNode =""
for i in Diff.keys():
if(Diff[i] == 1):
startNode = i
#Euler Path
path = []
def EulerPath(dicti,startNode):
for i in dicti[startNode]:
dicti[startNode].remove(i)
EulerPath(dicti,i)
path.append(startNode)
EulerPath(dicti,startNode)
path.reverse()
#print(path)
def getSequence(path):
sequence = path[0]
for i in range(1,len(path)):
sequence += path[i][-1]
return sequence
print(getSequence(path))