Skip to content

Commit

Permalink
Added 'first n most repeated' for cogs parameter
Browse files Browse the repository at this point in the history
  • Loading branch information
vsaona committed Nov 2, 2022
1 parent c434e80 commit c8161f9
Show file tree
Hide file tree
Showing 3 changed files with 40 additions and 33 deletions.
30 changes: 22 additions & 8 deletions scripts/GenoVi.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,7 @@ def visualiseGenome(input_file, status, output_file = "circos",
cogs_unclassified = True, deepnog_confidence_threshold = 0, alignment = "center", scale = "variable", keep_temporary_files = False, reuse_predictions = False, window = 5000, verbose = False,
captions = True, captionsPosition = "auto", title = "", title_position = "center", italic_words = 2, size = False,
colour_scheme = "auto", background_colour = "transparent", font_colour = "0, 0, 0", GC_content = "auto", GC_skew ='auto', tRNA = 'auto', rRNA = 'auto', CDS_positive = 'auto', CDS_negative = 'auto', skew_line_colour = '0, 0, 0',
wanted_cogs = ['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', '+']):
wanted_cogs = "ABCDEFGHIJKLMNOPQRSTUVWXYZ+"):

if not cairo:
print("There's been an error finding cairoSVG library, so PNG images might be different from expected. Please prefer using SVG output.")
Expand All @@ -83,8 +83,23 @@ def visualiseGenome(input_file, status, output_file = "circos",
if verbose:
print("DeepNOG lower bound must be between 0 and 1")
raise Exception("DeepNOG lower bound must be between 0 and 1")
if len(wanted_cogs) > 26:
wanted_cogs = wanted_cogs.upper()
if wanted_cogs == "MET-" or wanted_cogs == "METABOLISM":
wanted_cogs = ['C', 'E', 'F', 'G', 'H', 'I', 'P', 'Q']
elif wanted_cogs == "CEL-" or wanted_cogs == "CELLULAR PROCESSES AND SIGNALING":
wanted_cogs = ['D', 'M', 'N', 'O', 'Y', 'U', 'V', 'W', 'Y', 'Z']
elif wanted_cogs == "INF-" or wanted_cogs == "INFORMATION STORAGE AND PROCESSING":
wanted_cogs = ['A', 'B', 'J', 'K', 'L', 'X']
elif wanted_cogs == "POO-" or wanted_cogs == "POORLY CHARACTERIZED":
wanted_cogs = ["R", "S", "None"]
elif len(wanted_cogs) > 26:
wanted_cogs = ['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', 'None']
else:
try:
wanted_cogs = int(wanted_cogs)
except ValueError:
wanted_cogs = list(wanted_cogs)


if which("circos") == None:
if verbose:
Expand Down Expand Up @@ -127,7 +142,7 @@ def visualiseGenome(input_file, status, output_file = "circos",
file = temp_folder + "/" + str(i) + ".gbk"
if (not reuse_predictions) and os.path.exists(temp_folder + "/" + output_file_part + "_prediction_deepnog.csv"):
os.remove(temp_folder + "/contig_" + str(i) + "-" + output_file + "_prediction_deepnog.csv")
sizes, cogs_p, cogs_n, lengths, chrms, hist = base(file, temp_folder + "/" + output_file_part, output_file + "/" + output_file, True, True, cogs_unclassified, cogs_unclassified, False, True, deepnog_confidence_threshold, verbose, wanted_cogs=wanted_cogs)
sizes, cogs_p, cogs_n, lengths, chrms, hist, wanted_cogs = base(file, temp_folder + "/" + output_file_part, output_file + "/" + output_file, True, True, cogs_unclassified, cogs_unclassified, False, True, deepnog_confidence_threshold, verbose, wanted_cogs=wanted_cogs)
#sizes_full = sizes_full + sizes
lengths_full = lengths_full + lengths
chrms_full = chrms_full + chrms
Expand Down Expand Up @@ -210,14 +225,13 @@ def visualiseGenome(input_file, status, output_file = "circos",
else:
if (not reuse_predictions) and os.path.exists(temp_folder + "/" + output_file + "_prediction_deepnog.csv"):
os.remove(temp_folder + "/" + output_file + "_prediction_deepnog.csv")
sizes, cogs_p, cogs_n, lengths, chrms, hist = base(input_file, temp_folder + "/" + output_file, output_file + "/" + output_file, True, True, cogs_unclassified, cogs_unclassified, False, True, deepnog_confidence_threshold, verbose, wanted_cogs=wanted_cogs)

sizes, cogs_p, cogs_n, lengths, chrms, hist, wanted_cogs = base(input_file, temp_folder + "/" + output_file, output_file + "/" + output_file, True, True, cogs_unclassified, cogs_unclassified, False, True, deepnog_confidence_threshold, verbose, wanted_cogs=wanted_cogs)
if hist is not None:
draw_histogram(hist, output_file + "/" + output_file)

cogs_p = set(map(lambda x : "None" if x == None else x[0], cogs_p))
cogs_n = set(map(lambda x : "None" if x == None else x[0], cogs_n))

gbkToFna(input_file, temp_folder + "/" + output_file + ".fna", verbose)
maxmins, gc_avg = makeGC(temp_folder + "/" + output_file + ".fna", temp_folder + "/" + output_file, window)

Expand Down Expand Up @@ -295,7 +309,7 @@ def get_args():
parser.add_argument("-o", "--output_file", type=str, help="Directory for output files. Default: circos", default = "circos")
parser.add_argument("-cu", "--cogs_unclassified", action='store_false', help="Do not classify each protein sequence into COG categories.", required = False)
parser.add_argument("-b", "--deepnog_confidence_threshold", type=float, help="Lower threshold for DeepNOG prediction certainty to be considered. Values in range [0,1] Default: 0", default = 0)
parser.add_argument("--cogs", type=str, help="Symbol of each COG to draw. For example, for drawing only information storage and processing related, use 'ABJKLX'. By default, draws all of them.", default = "ABCDEFGHIJKLMNOPQRSTUVWXYZ+")
parser.add_argument("--cogs", type=str, help="Symbol of each COG to draw. For example, for drawing only information storage and processing related, use 'ABJKLX'. When using a number n, the n most common categories will be drawn. By default, draws all of them.", default = "ABCDEFGHIJKLMNOPQRSTUVWXYZ+")
parser.add_argument("-a", "--alignment", type=str, choices=["center", "top", "bottom", "A", "<", "U", "matrix", "two_lines"], help="When using --status complete, this defines the vertical alignment of every circular representation. Options: center, top, bottom, A (First on top), < (first to the left), U (Two on top, the rest below), matrix (multiple rows). By default this is defined by contig sizes", default = "auto")
parser.add_argument("--scale", type=str, choices=["variable", "linear", "sqrt"], help="To select the scale-up ratio between each circular representations when the file is processes as a complete genome. This is useful to ensure visibility of each representation when the length difference is too high. Options: variable, linear, sqrt. Default: sqrt", default = "sqrt")
parser.add_argument("-k", "--keep_temporary_files", action='store_true', help="Do not delete files used for circos image generation, including protein categories prediction by Deepnog.", required = False)
Expand Down Expand Up @@ -333,7 +347,7 @@ def get_args():
args.cogs_unclassified, args.deepnog_confidence_threshold, args.alignment, args.scale, args.keep_temporary_files, args.reuse_predictions, args.window, args.verbose,
args.captions_not_included, args.captions_position, args.title, args.title_position, args.italic_words, args.size,
args.colour_scheme, args.background, args.font_colour, args.GC_content_colour, args.GC_skew_colour, args.tRNA_colour, args.rRNA_colour, args.CDS_positive_colour, args.CDS_negative_colour, args.GC_skew_line_colour,
list(args.cogs))
args.cogs)

def main():
visualiseGenome(*get_args())
Expand Down
38 changes: 15 additions & 23 deletions scripts/colours.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,43 +36,35 @@ def parseColours(colour_scheme = "auto", background_colour = "none", GC_content
CDS_positive = "191, 204, 217" if CDS_positive == "auto" else CDS_positive
CDS_negative = "171, 178, 217" if CDS_negative == "auto" else CDS_negative
skew_line_colour = "163, 191, 217" if skew_line_colour == "auto" else skew_line_colour
elif colour_scheme == "strong" or colour_scheme == "super":
GC_content = "0, 158, 115" if GC_content == "auto" else GC_content
GC_skew = 'eval(sprintf("%d,%d,%d",remap_int(var(value),0,0,213,0),remap_int(var(value),0,0,94,0),remap_int(var(value),0,0,0,0)))' if GC_skew == "auto" else GC_skew
tRNA = "230, 159, 0" if tRNA == "auto" else tRNA
rRNA = "204, 121, 167" if rRNA == "auto" else rRNA
CDS_positive = "86, 180, 233" if CDS_positive == "auto" else CDS_positive
CDS_negative = "0, 114, 178" if CDS_negative == "auto" else CDS_negative
skew_line_colour = "163, 163, 217" if skew_line_colour == "auto" else skew_line_colour
elif colour_scheme == "paradise" or colour_scheme == "tropical":
GC_content = "246,232,195" if GC_content == "auto" else GC_content
GC_skew = 'eval(sprintf("brbg-11-div-%d",remap_int(var(value),0,0,0,1)))' if GC_skew == "auto" else GC_skew
tRNA = "140,81,10" if tRNA == "auto" else tRNA
rRNA = "191,129,45" if rRNA == "auto" else rRNA
rRNA = "223,194,125" if rRNA == "auto" else rRNA
CDS_positive = "1,102,94" if CDS_positive == "auto" else CDS_positive
CDS_negative = "128,205,193" if CDS_negative == "auto" else CDS_negative
skew_line_colour = "163, 163, 217" if skew_line_colour == "auto" else skew_line_colour
elif colour_scheme == "blossom" or colour_scheme == "cherry":
elif colour_scheme == "blossom" or colour_scheme == "cherry" or colour_scheme == "sakura":
GC_content = "230,245,208" if GC_content == "auto" else GC_content
GC_skew = 'eval(sprintf("piyg-11-div-%d",remap_int(var(value),0,0,11,10)))' if GC_skew == "auto" else GC_skew
tRNA = "184,134,224" if tRNA == "auto" else tRNA
rRNA = "127,188,65" if rRNA == "auto" else rRNA
tRNA = "241,182,218" if tRNA == "auto" else tRNA
rRNA = "184,225,134" if rRNA == "auto" else rRNA
CDS_positive = "142,1,82" if CDS_positive == "auto" else CDS_positive
CDS_negative = "222,119,174" if CDS_negative == "auto" else CDS_negative
skew_line_colour = "163, 163, 217" if skew_line_colour == "auto" else skew_line_colour
elif colour_scheme == "dawn" or colour_scheme == "sunrise":
GC_content = "0,0,0" if GC_content == "auto" else GC_content
GC_skew = 'eval(sprintf("%d,%d,%d",remap_int(var(value),0,0,230,213),remap_int(var(value),0,0,159,94),remap_int(var(value),0,0,0,0)))' if GC_skew == "auto" else GC_skew
tRNA = "86, 180, 233" if tRNA == "auto" else tRNA
tRNA = "240, 228, 66" if tRNA == "auto" else tRNA
rRNA = "0, 158, 115" if rRNA == "auto" else rRNA
CDS_positive = "204, 121, 167" if CDS_positive == "auto" else CDS_positive
CDS_negative = "0, 114, 178" if CDS_negative == "auto" else CDS_negative
skew_line_colour = "163, 163, 217" if skew_line_colour == "auto" else skew_line_colour
elif colour_scheme == "autumn" or colour_scheme == "fall":
elif colour_scheme == "autumn" or colour_scheme == "fall": # Color Universal Design, Suggested by Okabe, Ito, in "Color Universal Design (CUD) - How to make figures and presentations that are friendly to Colorblind people", available at https://jfly.uni-koeln.de/color/
GC_content = "0,0,0" if GC_content == "auto" else GC_content
GC_skew = 'eval(sprintf("%d,%d,%d",remap_int(var(value),0,0,0,240),remap_int(var(value),0,0,158,228),remap_int(var(value),0,0,115,66)))' if GC_skew == "auto" else GC_skew
tRNA = "0, 114, 178" if tRNA == "auto" else tRNA
rRNA = "106, 200, 253" if rRNA == "auto" else rRNA
tRNA = "204, 121, 167" if tRNA == "auto" else tRNA
rRNA = "86, 180, 233" if rRNA == "auto" else rRNA
CDS_positive = "213, 94, 0" if CDS_positive == "auto" else CDS_positive
CDS_negative = "230, 159, 0" if CDS_negative == "auto" else CDS_negative
skew_line_colour = "163, 163, 217" if skew_line_colour == "auto" else skew_line_colour
Expand Down Expand Up @@ -228,13 +220,13 @@ def parseColours(colour_scheme = "auto", background_colour = "none", GC_content
CDS_positive = "186, 186, 186" if CDS_positive == "auto" else CDS_positive
CDS_negative = "140, 140, 140" if CDS_negative == "auto" else CDS_negative
skew_line_colour = "171, 171, 171" if skew_line_colour == "auto" else skew_line_colour
else: # Color Universal Design, Suggested by Okabe, Ito, in "Color Universal Design (CUD) - How to make figures and presentations that are friendly to Colorblind people", available at https://jfly.uni-koeln.de/color/
GC_content = "204, 121, 167" if GC_content == "auto" else GC_content
GC_skew = 'eval(sprintf("%d,%d,%d",remap_int(var(value),0,0,86,0),remap_int(var(value),0,0,180,158),remap_int(var(value),0,0,233,115)))' if GC_skew == "auto" else GC_skew
else: # if colour_scheme == "strong" # Color-blind friendly palette
GC_content = "0, 158, 115" if GC_content == "auto" else GC_content
GC_skew = 'eval(sprintf("%d,%d,%d",remap_int(var(value),0,0,213,0),remap_int(var(value),0,0,94,0),remap_int(var(value),0,0,0,0)))' if GC_skew == "auto" else GC_skew
tRNA = "230, 159, 0" if tRNA == "auto" else tRNA
rRNA = "0, 114, 178" if rRNA == "auto" else rRNA
CDS_positive = "240, 228, 66" if CDS_positive == "auto" else CDS_positive
CDS_negative = "213, 94, 0" if CDS_negative == "auto" else CDS_negative
skew_line_colour = "171, 171, 200" if skew_line_colour == "auto" else skew_line_colour
rRNA = "204, 121, 167" if rRNA == "auto" else rRNA
CDS_positive = "86, 180, 233" if CDS_positive == "auto" else CDS_positive
CDS_negative = "0, 114, 178" if CDS_negative == "auto" else CDS_negative
skew_line_colour = "163, 163, 217" if skew_line_colour == "auto" else skew_line_colour

return colour_scheme, background_colour, GC_content, GC_skew, tRNA, rRNA, CDS_positive, CDS_negative, skew_line_colour
5 changes: 3 additions & 2 deletions scripts/create_raw.py
Original file line number Diff line number Diff line change
Expand Up @@ -214,7 +214,6 @@ def write_lines(locations, output_, chrx, locus, cogs, verbose = False):
return hist

def write_cog_files(locations, output, chrx, locus, cogs, verbose = False, categories = None):

if len(cogs) == 0:
return
cogs_df = pd.DataFrame.from_dict(cogs)
Expand Down Expand Up @@ -631,6 +630,8 @@ def base(gbk_file, tmp, output, cds, trna, get_cats, divided, complete, rrna = F
flag = False
elif get_cats:
cogs_dict = get_categories(gbk_file, tmp, deepnog_confidence)
if type(wanted_cogs) == type(1): #If it is a number
wanted_cogs = list(map(lambda x: x[1], sorted([(list(map(lambda cat: cat[0], cogs_dict.values())).count(x), x) for x in "ABCDEFGHIJKLMNOPQRSTUVWXYZ"], reverse = True)[:wanted_cogs])) # Transform it to n more repeated COGs
else:
cogs_dict = None

Expand Down Expand Up @@ -662,7 +663,7 @@ def base(gbk_file, tmp, output, cds, trna, get_cats, divided, complete, rrna = F
cogs_p, cogs_n, _, _, hist = create_feature(gbk_file, tmp, output, sizes, "CDS", cogs_dict, divided, verbose = verbose, complete=complete, wanted_cogs = wanted_cogs)


return ((sizes, cogs_p, cogs_n, lengths, chrms, hist))
return ((sizes, cogs_p, cogs_n, lengths, chrms, hist, wanted_cogs))

def createRaw():
gbk_file, output, cds, trna, rrna, get_cats, divided, complete = getArgs()[:]
Expand Down

0 comments on commit c8161f9

Please sign in to comment.