-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathVisualize_compare_paths.py
executable file
·120 lines (105 loc) · 5.09 KB
/
Visualize_compare_paths.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
import pandas as pd
from comparison_sol import str_path
from comparison_sol import path_str
from comparison_sol import solution_to_bps_size
import subprocess
import shlex
import os
import shutil
import signal
# This script can be used to visualize LU paths using arrow plots and dot plots.
# However, I have created new scripts like arrowplot.py and dot_plot.py to do it better with only python. Therefore, I will soon remove this script.
# Note: all perl scripts were taken from (Shen et al., 2016) and the author is Yun Wang. https://github.com/baderzone/scramble_2016
# All python and bash scripts were generated by Marco Monti.
def save_paths_in_file(paths, paths_name="A_path", file_name="A_test", PATH="./"):
if paths == [] or paths == [[]] or paths == "[]" or paths == "[[]]":
return None
if isinstance(paths, str):
paths = str_path(paths)
if isinstance(paths_name, str):
paths_name = [paths_name]
file = open(PATH + file_name + ".txt", "w+")
if isinstance(paths[0], list):
for i in range(len(paths)):
if paths_name == ["A_path"]:
path_name = "path_" + str(i)
else:
path_name = paths_name[i]
if isinstance(paths[i], str):
sol_MM = path_str(paths[i])
else:
sol_MM = path_str(paths[i])
file.write(path_name + "\t" + sol_MM + "\n")
else:
file.write(paths_name[0] + "\t" + path_str(paths) + "\n")
file.close()
return None
def arrow_plot_csv(file_name="", Linux=True, debug=False):
perl_command = "perl ./bin/SCBINgrams.pl ./bin/linearGradient.rgb.tsv " + file_name + ".csv -prefix " + file_name + " -outdir arrow_plots"
if debug:
print(perl_command)
if Linux:
perl_script = subprocess.Popen(perl_command, stdin=subprocess.PIPE, shell=True, preexec_fn=os.setsid)
else:
perl_script = subprocess.Popen(perl_command, stdin=subprocess.PIPE)
perl_script.communicate() # I am not sure of the function of .communicate()
return None
def arrow_plot(paths, paths_name="A_path", file_name="", Linux=True, debug=False):
# save paths in a txt file
save_paths_in_file(paths, paths_name=paths_name, file_name="Arrows_" + file_name)
# Generate the arrow plot
perl_command = "perl ./bin/SCBINgrams.pl ./bin/linearGradient.rgb.tsv Arrows_" + file_name + ".txt -prefix " + file_name + " -outdir arrow_plots"
if debug:
print(perl_command)
if Linux:
perl_script = subprocess.Popen(perl_command, stdin=subprocess.PIPE, shell=True, preexec_fn=os.setsid)
else:
perl_script = subprocess.Popen(perl_command, stdin=subprocess.PIPE)
perl_script.communicate() # I am not sure of the function of .communicate()
return None
def dot_plot(path1, path2, paths_name=[""], Linux=True, debug=False):
if paths_name == [""] or paths_name == "":
paths_name = ["path1", "path2"]
if paths_name[0] == paths_name[1]:
paths_name[1] = paths_name[1] + "_2"
# save path1 and path2 in a txt file
save_paths_in_file(path1, paths_name=paths_name[0], file_name="A_" + paths_name[0])
save_paths_in_file(path2, paths_name=paths_name[1], file_name="A_" + paths_name[1])
# generate the dot plot
perl_command = "perl ./bin/SCBdotplot_compr.pl A_" + paths_name[0] + ".txt A_" + paths_name[1] + ".txt -prefix dot_plot -outdir arrow_plots"
if debug:
print(perl_command)
if Linux:
perl_script = subprocess.Popen(perl_command, stdin=subprocess.PIPE, shell=True, preexec_fn=os.setsid)
else:
perl_script = subprocess.Popen(perl_command, stdin=subprocess.PIPE)
perl_script.communicate() # I am not sure of the function of .communicate()
if not debug:
os.remove("A_" + paths_name[0] + ".txt")
os.remove("A_" + paths_name[1] + ".txt")
return None
# test the code
if __name__ == "__main__":
paths = [[1,2,3,4,5,6,7,8,9,10], [-2,-3,4,5,-4,-3,-2,-1,6,7,8,8,8,-7]]
save_paths_in_file(paths, paths_name="A_path", file_name="A_test_A", PATH="subpaths_SIM/")
#dot_plot(paths[0], paths[1], paths_name=["path_A_test", "path_B_test"], Linux=True, debug=True)
#arrow_plot_csv(file_name="Syn9R_MM_sol_vs_Chantal-arrows_5", Linux=True, debug=True)
#arrow_plot(paths, paths_name=["path_A_test", "path_B_test"], file_name="A_TEST", Linux=True, debug=True)
"""
# Dot plot of all syn9R strains
if __name__ == "__main__":
compared_sol = pd.read_excel("Syn9R_MM_sol_vs_Chantal-16-scores_sorted.xlsx", engine="openpyxl")
ID_MM = list(compared_sol["ID_MM"])
ID_Chantal = list(compared_sol["ID_Chantal"])
sol_MM = list(compared_sol["MM_seq"])
sol_Chantal = list(compared_sol["Chantal_seq"])
Percentage_score = list(compared_sol["Percentage_score"])
JS094 = list(range(1, 45))
for i in range(len(ID_MM)):
ID_MM_new = ID_MM[i] + "_LR"
ID_Chantal_new = ID_Chantal[i] + "_SR"
if Percentage_score[i] != 1:
print(i, Percentage_score[i])
dot_plot(sol_MM[i], sol_Chantal[i], paths_name=[ID_MM_new, ID_Chantal_new], Linux=True, debug=False)
dot_plot(JS094, sol_MM[i], paths_name=["JS094", ID_MM_new], Linux=True, debug=False)
"""