Skip to content

Commit

Permalink
Fixed a bug that was causing some custom sequencing error models to e…
Browse files Browse the repository at this point in the history
…rror out when running
  • Loading branch information
joshfactorial committed Oct 11, 2024
1 parent 2d8c252 commit 587029a
Show file tree
Hide file tree
Showing 2 changed files with 27 additions and 5 deletions.
29 changes: 26 additions & 3 deletions neat/model_sequencing_error/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,18 @@ def expand_counts(count_array: list, scores: list):
return np.array(ret_list)


def _make_gen(reader):
"""
solution from stack overflow to quickly count lines in a file.
https://stackoverflow.com/questions/19001402/how-to-count-the-total-number-of-lines-in-a-text-file-using-python
"""
b = reader(1024 * 1024)
while b:
yield b
b = reader(1024 * 1024)


def parse_file(input_file: str, quality_scores: list, max_reads: int, qual_offset: int, readlen: int):
"""
Parses an individual file for statistics
Expand Down Expand Up @@ -84,6 +96,13 @@ def parse_file(input_file: str, quality_scores: list, max_reads: int, qual_offse
line = fq_in.readline().strip()
readlens.append(len(line))

# solution from stack overflow to quickly count lines in a file.
# https://stackoverflow.com/questions/19001402/how-to-count-the-total-number-of-lines-in-a-text-file-using-python
if max_reads == np.inf:
f = open(input_file, 'rb')
f_gen = _make_gen(f.raw.read)
max_reads = sum(buf.count(b'\n') for buf in f_gen)

readlens = np.array(readlens)

# Using the statistical mode seems like the right approach here. We expect the readlens to be roughly the same.
Expand Down Expand Up @@ -153,8 +172,12 @@ def parse_file(input_file: str, quality_scores: list, max_reads: int, qual_offse
for i in range(read_length):
this_counts = q_count_by_pos[i]
expanded_counts = expand_counts(this_counts, quality_scores)
average_q = np.average(expanded_counts)
st_d_q = np.std(expanded_counts)
if not expanded_counts:
average_q = 0
st_d_q = 0
else:
average_q = np.average(expanded_counts)
st_d_q = np.std(expanded_counts)
avg_std_by_pos.append((average_q, st_d_q))

# TODO In progress, working on ensuring the error model produces the right shape
Expand Down Expand Up @@ -191,7 +214,7 @@ def plot_stuff(init_q, real_q, q_range, prob_q, actual_readlen, plot_path):
plt.figure(1)
z = np.array(init_q).T
x, y = np.meshgrid(range(0, len(z[0]) + 1), range(0, len(z) + 1))
plt.pcolormesh(x, Y, z, vmin=0., vmax=0.25)
plt.pcolormesh(x, y, z, vmin=0., vmax=0.25)
plt.axis([0, len(z[0]), 0, len(z)])
plt.yticks(range(0, len(z), 10), range(0, len(z), 10))
plt.xticks(range(0, len(z[0]), 10), range(0, len(z[0]), 10))
Expand Down
3 changes: 1 addition & 2 deletions neat/read_simulator/runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,8 +54,7 @@ def initialize_all_models(options: Options):
homozygous_freq=default_cancer_homozygous_freq,
variant_probs=default_cancer_variant_probs,
insert_len_model=default_cancer_insert_len_model,
is_cancer=True,
rng=options.rng
is_cancer=True
)

_LOG.debug("Mutation models loaded")
Expand Down

0 comments on commit 587029a

Please sign in to comment.