Skip to content

Commit

Permalink
#78 dice_self_contigs for large chromosomes, inserting fake entries. …
Browse files Browse the repository at this point in the history
…Mouse retreival changed to work with streaming.
  • Loading branch information
josiahseaman committed Jan 28, 2019
1 parent 0ff9afa commit 84e6c57
Show file tree
Hide file tree
Showing 3 changed files with 40 additions and 13 deletions.
34 changes: 30 additions & 4 deletions DDV/TileLayout.py
Original file line number Diff line number Diff line change
Expand Up @@ -247,12 +247,14 @@ def draw_nucleotides(self, verbose=True):


def output_fasta(self, output_folder, fasta, no_webpage, extract_contigs, sort_contigs, append_fasta_sources=True):
"""This method is destructive. It does not preserve the original self.contigs"""
bare_file = os.path.basename(fasta)
if append_fasta_sources:
self.fasta_sources.append(bare_file)

#also make single file
if not no_webpage:
self.dice_self_contigs(1000000)
write_contigs_to_chunks_dir(output_folder, bare_file, self.contigs)
self.remember_contig_spacing()
fasta_destination = os.path.join(output_folder, 'sources', bare_file)
Expand Down Expand Up @@ -541,10 +543,13 @@ def contig_struct(self):
break # I don't want to use a slice operator on the for loop because that will copy it
xy_seq_start += contig.reset_padding + contig.title_padding
xy_seq_end = xy_seq_start + len(contig.seq)
json.append({"name": contig.name.replace("'", ""), "xy_seq_start": xy_seq_start, "xy_seq_end": xy_seq_end,
"title_padding": contig.title_padding, "tail_padding": contig.tail_padding,
"xy_title_start": xy_seq_start - contig.title_padding,
"nuc_title_start": contig.nuc_title_start, "nuc_seq_start": contig.nuc_seq_start})
fake_start = 0 if not hasattr(contig, 'fake_start') else contig.fake_start
json.append(
{"name": contig.name.replace("'", ""), "xy_seq_start": xy_seq_start, "xy_seq_end": xy_seq_end,
"title_padding": contig.title_padding, "tail_padding": contig.tail_padding,
"xy_title_start": xy_seq_start - contig.title_padding,
"nuc_title_start": contig.nuc_title_start, "nuc_seq_start": contig.nuc_seq_start,
"fake_start": fake_start})
xy_seq_start += len(contig.seq) + contig.tail_padding
return json

Expand Down Expand Up @@ -600,6 +605,27 @@ def all_layouts_json(self):
def remember_contig_spacing(self):
self.contig_memory.append(self.contig_struct())

def dice_self_contigs(self, chunk_size):
for i in reversed(range(len(self.contigs))):
print(i, end='')
if len(self.contigs[i].seq) > chunk_size:
temp = self.contigs[i]
positions= range(0, len(temp.seq), chunk_size)
self.contigs[i:i+1] = [ContigChunk(temp, start, chunk_size) for start in positions]


class ContigChunk(Contig):
def __init__(self, original, fake_start, size):
super(ContigChunk, self).__init__(original.name, original.seq[fake_start: fake_start+size])
self.fake_start=fake_start
at_the_end = len(self.seq) != size
first_chunk = not fake_start
self.title_padding = original.title_padding if first_chunk else 0
self.nuc_title_start = original.nuc_title_start if first_chunk else fake_start
self.nuc_seq_start = original.nuc_seq_start + fake_start
self.tail_padding = original.tail_padding if at_the_end else 0
self.reset_padding = original.reset_padding if at_the_end else 0


def write_contigs_to_chunks_dir(project_dir, fasta_name, contigs):
chunks_dir = os.path.join(project_dir, 'chunks', fasta_name)
Expand Down
2 changes: 1 addition & 1 deletion DDV/fluentdna.py
Original file line number Diff line number Diff line change
Expand Up @@ -121,7 +121,7 @@ def done(args, output_dir):
Otherwise system exit."""
if args.run_server:
run_server(output_dir)
# beep()
beep()
hold_console_for_windows()
if __name__ == "__main__":
sys.exit(0)
Expand Down
17 changes: 9 additions & 8 deletions DDV/html_template/nucleotideNumber.js
Original file line number Diff line number Diff line change
Expand Up @@ -251,8 +251,8 @@ function showNucleotideNumber(event, viewer) {
start = Nucleotide - 1;
stop = Nucleotide;
}
if(contigs[position_info.fasta_index].hasOwnProperty(position_info.contig_name)){
theSequence = contigs[position_info.fasta_index][position_info.contig_name].substring(start, stop);
if(contigs[position_info.fasta_index].hasOwnProperty(position_info.contig_index)){
theSequence = contigs[position_info.fasta_index][position_info.contig_index].substring(start, stop);
//theSequence = theSequence.replace(/\s+/g, '')
fragmentid = position_info.contig_name + ": (" +
numberWithCommas(start + 1) + " - " + numberWithCommas(stop) + ")";
Expand Down Expand Up @@ -334,21 +334,22 @@ function getSequence(fasta_index, contig_index) {
contentType: "text/html",
success: function (sequence_received) {
file_transfer_in_progress = false;
read_contigs(sequence_received, fasta_index);
read_contigs(sequence_received, fasta_index, contig_index);
},
error: processInitSequenceError
});
}
}

function read_contigs(sequence_received, fasta_index) {
function read_contigs(sequence_received, fasta_index, contig_index) {
//read_contigs equiv in javascript
theSequenceSplit = sequence_received.split(/\r?\n(?=>)/);// begin line, caret ">");
for (let contig_s of theSequenceSplit) {
var lines = contig_s.split(/\r?\n/);
var title = lines[0].slice(1)
//for (var contig_index =0; contig_index < theSequenceSplit.length; contig_index++)
{//Each chunk file is only one contig, no need to loop
var lines = theSequenceSplit[0].split(/\r?\n/);
//var title = lines[0].slice(1)
var seq = lines.slice(1).join('');
contigs[fasta_index][title] = seq;
contigs[fasta_index][contig_index] = seq;
}
return contigs
}
Expand Down

0 comments on commit 84e6c57

Please sign in to comment.