From 84e6c57bdbd5f663ca11eb9bef72b9e9f8561c2b Mon Sep 17 00:00:00 2001 From: Josiah Seaman Date: Mon, 28 Jan 2019 16:11:14 +0000 Subject: [PATCH] #78 dice_self_contigs for large chromosomes, inserting fake entries. Mouse retreival changed to work with streaming. --- DDV/TileLayout.py | 34 +++++++++++++++++++++++---- DDV/fluentdna.py | 2 +- DDV/html_template/nucleotideNumber.js | 17 +++++++------- 3 files changed, 40 insertions(+), 13 deletions(-) diff --git a/DDV/TileLayout.py b/DDV/TileLayout.py index 7d857c4..8b47926 100644 --- a/DDV/TileLayout.py +++ b/DDV/TileLayout.py @@ -247,12 +247,14 @@ def draw_nucleotides(self, verbose=True): def output_fasta(self, output_folder, fasta, no_webpage, extract_contigs, sort_contigs, append_fasta_sources=True): + """This method is destructive. It does not preserve the original self.contigs""" bare_file = os.path.basename(fasta) if append_fasta_sources: self.fasta_sources.append(bare_file) #also make single file if not no_webpage: + self.dice_self_contigs(1000000) write_contigs_to_chunks_dir(output_folder, bare_file, self.contigs) self.remember_contig_spacing() fasta_destination = os.path.join(output_folder, 'sources', bare_file) @@ -541,10 +543,13 @@ def contig_struct(self): break # I don't want to use a slice operator on the for loop because that will copy it xy_seq_start += contig.reset_padding + contig.title_padding xy_seq_end = xy_seq_start + len(contig.seq) - json.append({"name": contig.name.replace("'", ""), "xy_seq_start": xy_seq_start, "xy_seq_end": xy_seq_end, - "title_padding": contig.title_padding, "tail_padding": contig.tail_padding, - "xy_title_start": xy_seq_start - contig.title_padding, - "nuc_title_start": contig.nuc_title_start, "nuc_seq_start": contig.nuc_seq_start}) + fake_start = 0 if not hasattr(contig, 'fake_start') else contig.fake_start + json.append( + {"name": contig.name.replace("'", ""), "xy_seq_start": xy_seq_start, "xy_seq_end": xy_seq_end, + "title_padding": contig.title_padding, "tail_padding": contig.tail_padding, + "xy_title_start": xy_seq_start - contig.title_padding, + "nuc_title_start": contig.nuc_title_start, "nuc_seq_start": contig.nuc_seq_start, + "fake_start": fake_start}) xy_seq_start += len(contig.seq) + contig.tail_padding return json @@ -600,6 +605,27 @@ def all_layouts_json(self): def remember_contig_spacing(self): self.contig_memory.append(self.contig_struct()) + def dice_self_contigs(self, chunk_size): + for i in reversed(range(len(self.contigs))): + print(i, end='') + if len(self.contigs[i].seq) > chunk_size: + temp = self.contigs[i] + positions= range(0, len(temp.seq), chunk_size) + self.contigs[i:i+1] = [ContigChunk(temp, start, chunk_size) for start in positions] + + +class ContigChunk(Contig): + def __init__(self, original, fake_start, size): + super(ContigChunk, self).__init__(original.name, original.seq[fake_start: fake_start+size]) + self.fake_start=fake_start + at_the_end = len(self.seq) != size + first_chunk = not fake_start + self.title_padding = original.title_padding if first_chunk else 0 + self.nuc_title_start = original.nuc_title_start if first_chunk else fake_start + self.nuc_seq_start = original.nuc_seq_start + fake_start + self.tail_padding = original.tail_padding if at_the_end else 0 + self.reset_padding = original.reset_padding if at_the_end else 0 + def write_contigs_to_chunks_dir(project_dir, fasta_name, contigs): chunks_dir = os.path.join(project_dir, 'chunks', fasta_name) diff --git a/DDV/fluentdna.py b/DDV/fluentdna.py index 2e79233..45dc593 100644 --- a/DDV/fluentdna.py +++ b/DDV/fluentdna.py @@ -121,7 +121,7 @@ def done(args, output_dir): Otherwise system exit.""" if args.run_server: run_server(output_dir) - # beep() + beep() hold_console_for_windows() if __name__ == "__main__": sys.exit(0) diff --git a/DDV/html_template/nucleotideNumber.js b/DDV/html_template/nucleotideNumber.js index aecc345..ea8a53f 100644 --- a/DDV/html_template/nucleotideNumber.js +++ b/DDV/html_template/nucleotideNumber.js @@ -251,8 +251,8 @@ function showNucleotideNumber(event, viewer) { start = Nucleotide - 1; stop = Nucleotide; } - if(contigs[position_info.fasta_index].hasOwnProperty(position_info.contig_name)){ - theSequence = contigs[position_info.fasta_index][position_info.contig_name].substring(start, stop); + if(contigs[position_info.fasta_index].hasOwnProperty(position_info.contig_index)){ + theSequence = contigs[position_info.fasta_index][position_info.contig_index].substring(start, stop); //theSequence = theSequence.replace(/\s+/g, '') fragmentid = position_info.contig_name + ": (" + numberWithCommas(start + 1) + " - " + numberWithCommas(stop) + ")"; @@ -334,21 +334,22 @@ function getSequence(fasta_index, contig_index) { contentType: "text/html", success: function (sequence_received) { file_transfer_in_progress = false; - read_contigs(sequence_received, fasta_index); + read_contigs(sequence_received, fasta_index, contig_index); }, error: processInitSequenceError }); } } -function read_contigs(sequence_received, fasta_index) { +function read_contigs(sequence_received, fasta_index, contig_index) { //read_contigs equiv in javascript theSequenceSplit = sequence_received.split(/\r?\n(?=>)/);// begin line, caret ">"); - for (let contig_s of theSequenceSplit) { - var lines = contig_s.split(/\r?\n/); - var title = lines[0].slice(1) + //for (var contig_index =0; contig_index < theSequenceSplit.length; contig_index++) + {//Each chunk file is only one contig, no need to loop + var lines = theSequenceSplit[0].split(/\r?\n/); + //var title = lines[0].slice(1) var seq = lines.slice(1).join(''); - contigs[fasta_index][title] = seq; + contigs[fasta_index][contig_index] = seq; } return contigs }