diff --git a/CESAR_wrapper.py b/CESAR_wrapper.py
index d16c058..90fc6ed 100755
--- a/CESAR_wrapper.py
+++ b/CESAR_wrapper.py
@@ -60,6 +60,8 @@
 EXP_REG_EXTRA_FLANK = 50
 EXON_SEQ_FLANK = 10
 
+ERR_CODE_FRAGM_ERR = 2
+
 # alias; works for Hillerlab-only
 two_bit_templ = "/projects/hillerlab/genome/gbdb-HL/{0}/{0}.2bit"
 chain_alias_template = "/projects/hillerlab/genome/gbdb-HL/{0}/lastz/vs_{1}/axtChain/{0}.{1}.allfilled.chain.gz"
@@ -82,6 +84,8 @@
     LOCATION, "modules", "extract_subchain_slib.so"
 )
 DEFAULT_CESAR = os.path.join(LOCATION, "CESAR2.0", "cesar")
+OPT_CESAR_LOCATION = os.path.join(LOCATION, "cesar_input_optimiser.py")
+
 ex_lib = ctypes.CDLL(extract_subchain_lib_path)
 ex_lib.extract_subchain.argtypes = [ctypes.c_char_p, ctypes.c_char_p, ctypes.c_char_p]
 ex_lib.extract_subchain.restype = ctypes.POINTER(ctypes.c_char_p)
@@ -176,6 +180,8 @@
 EQ_ACC_PROFILE = os.path.join(LOCATION, "supply", "eq_acc_profile.txt")
 EQ_DO_PROFILE = os.path.join(LOCATION, "supply", "eq_donor_profile.txt")
 
+ORTH_LOC_LINE_SUFFIX = "#ORTHLOC"
+
 FRAGMENT = -1
 
 
@@ -359,7 +365,8 @@ def parse_args():
     app.add_argument(
         "--exon_flanks_file",
         default=None,
-        help="Temporary file containing exon flanks ",
+        help=("Temporary file containing exon flanks; do use only if you run "
+              "the optimized CESAR version"),
     )
     app.add_argument(
         "--predefined_regions",
@@ -369,7 +376,44 @@ def parse_args():
         "for CESAR optimisation. If set: call optimised CESAR without LASTZ",
     )
     app.add_argument(
-        "--opt_precompute", action="store_true", dest="opt_precompute", help="later"
+        "--opt_precompute",
+        action="store_true",
+        dest="opt_precompute",
+        help="Do only precompute memory consumption for the optimized CESAR version"
+    )
+    app.add_argument(
+        "--save_orth_locus",
+        action="store_true",
+        dest="save_orth_locus",
+        help="Do only precompute orth regions for the optimized CESAR version"
+    )
+    app.add_argument(
+        "--precomputed_orth_loci",
+        default=None,
+        help="Path to loci saved with --save_orth_locus"
+    )
+    app.add_argument(
+        "--do_not_check_exon_chain_intersection",
+        default=False,
+        action="store_true",
+        dest="do_not_check_exon_chain_intersection",
+        help="Do not extract chain blocks (not recommended)"
+    )
+    app.add_argument(
+        "--alt_frame_del",
+        "--lfd",
+        default=False,
+        action="store_true",
+        dest="alt_frame_del",
+        help="Consider codons in alternative frame (between compensated FS) deleted"
+    )
+    app.add_argument(
+        "--mask_all_first_10p",
+        "--m_f10p",
+        action="store_true",
+        dest="mask_all_first_10p",
+        help="Automatically mask all inactivating mutations in first 10% of "
+             "the reading frame, ignoring ATG codons distribution."
     )
 
     if len(sys.argv) == 1:
@@ -1415,6 +1459,8 @@ def run_cesar(
         cesar_cmd += " --no_lastz"
     if opt_precompute:
         cesar_cmd += " --memory_consumption"
+    
+    verbose(f"Calling CESAR command:\n{cesar_cmd}")
     p = subprocess.Popen(
         cesar_cmd, shell=True, stderr=subprocess.PIPE, stdout=subprocess.PIPE
     )
@@ -1880,9 +1926,10 @@ def process_cesar_out(cesar_raw_out, query_loci, inverts):
         # extract protein sequences also here, just to do it in one place
         part_pIDs, part_blosums, prot_seqs_part = compute_score(codons_data)
         prot_seqs[chain_id] = prot_seqs_part
-         # convert codon list to {"ref": [t_codons], "que": [q_codons]}
+        # convert codon list to {"ref": [t_codons], "que": [q_codons]}
         codon_seqs_part = extract_codon_data(codons_data)
         codon_seqs[chain_id] = codon_seqs_part
+
         (
             exon_query_seqs,
             exon_ref_seqs,
@@ -1925,6 +1972,7 @@ def process_cesar_out(cesar_raw_out, query_loci, inverts):
         percIDs[chain_id] = part_pIDs
         blosums[chain_id] = part_blosums
         query_coords[chain_id] = abs_coords
+
     ret = (
         exon_queries,
         exon_refs,
@@ -1940,6 +1988,18 @@ def process_cesar_out(cesar_raw_out, query_loci, inverts):
     return ret
 
 
+def __check_fragm_coords_intersect_in_q(query_coords):
+    """Check whether coordinates of predicted exons intersect in the query."""
+    exon_ranges_w_chrom = list(query_coords.values())
+    exon_ranges = [x.split(":")[1].split("-") for x in exon_ranges_w_chrom]
+    exon_starts = [int(x[0]) for x in exon_ranges]
+    exon_ends = [int(x[1]) for x in exon_ranges]
+    exon_intervals = list(zip(exon_starts, exon_ends))
+    exon_intervals_sorted = sorted(exon_intervals, key=lambda x: x[0])
+    intersecting_intervals = _check_seq_of_intervals_intersect(exon_intervals_sorted)
+    return intersecting_intervals
+
+
 def process_cesar_out__fragments(cesar_raw_out, fragm_data, query_loci, inverts):
     """Process CESAR output for assembled from fragments gene."""
     exon_queries, exon_refs, percIDs, blosums, query_coords, = (
@@ -1953,7 +2013,7 @@ def process_cesar_out__fragments(cesar_raw_out, fragm_data, query_loci, inverts)
     cesar_raw_lines = cesar_raw_out.split("\n")
     target_seq_raw = cesar_raw_lines[1]
     query_seq_raw = cesar_raw_lines[3]
-    chain_id_to_codon_table = {} 
+    chain_id_to_codon_table = {}  # chain id -> raw codon table
 
     codons_data = parse_cesar_out(target_seq_raw, query_seq_raw, v=VERBOSE)
     chain_id_to_codon_table[FRAGMENT] = codons_data
@@ -2031,6 +2091,14 @@ def process_cesar_out__fragments(cesar_raw_out, fragm_data, query_loci, inverts)
             # this is completely OK
             exon_grange = f"{q_chrom}:{exon_abs_start}-{exon_abs_end}"
         abs_coords[exon_num] = exon_grange
+
+    # check that abs coords do not intersect
+    coords_intersect = __check_fragm_coords_intersect_in_q(abs_coords)
+    if coords_intersect:  # error, cannot go further
+        print("Error! Cannot stitch fragments properly, exon intervals intersect after merge")
+        print(f"Intersecting intervals are: {coords_intersect}")
+        print("Abort")
+        sys.exit(ERR_CODE_FRAGM_ERR)
     # add to the global dicts
     exon_queries[FRAGMENT] = exon_query_seqs
     exon_refs[FRAGMENT] = exon_ref_seqs
@@ -2048,7 +2116,7 @@ def process_cesar_out__fragments(cesar_raw_out, fragm_data, query_loci, inverts)
         prot_seqs,
         codon_seqs,
         aa_sat_seq,
-        chain_id_to_codon_table
+        chain_id_to_codon_table,
     )
     return ret
 
@@ -2185,10 +2253,12 @@ def invert_complement(seq):
     return reverse_complement
 
 
-def save_prot(gene_name, prot_seq, prot_out):
+def save_prot(gene_name, prot_seq, prot_out, del_mis_exons_):
     """Save protein sequences."""
     if prot_out is None:
         return
+    del_mis_exons = set() if del_mis_exons_ is None else set(del_mis_exons_)
+
     f = open(prot_out, "w") if prot_out != "stdout" else sys.stdout
     for chain_id, seq_collection in prot_seq.items():
         proj_name = f"{gene_name}.{chain_id}"
@@ -2197,8 +2267,11 @@ def save_prot(gene_name, prot_seq, prot_out):
         que_seqs = []
         for exon in exons:
             ref_part = seq_collection[exon]["ref"]
-            que_part = seq_collection[exon]["que"]
             ref_seqs.append(ref_part)
+            if exon in del_mis_exons:
+                que_part = ["-" for _ in ref_part]
+            else:
+                que_part = seq_collection[exon]["que"]
             que_seqs.append(que_part)
         ref_aa_seq = "".join(ref_seqs)
         que_aa_seq = "".join(que_seqs)
@@ -2353,7 +2426,7 @@ def extend_rel_regions(extended_regions, query_len):
     return ext_regions
 
 
-def read_predefined_regions(arg):
+def read_predefined_regions(arg, gene):
     """Read predefined regions."""
     ret = defaultdict(list)
     if arg is None:
@@ -2361,23 +2434,67 @@ def read_predefined_regions(arg):
     f = open(arg, "r")
     for line in f:
         line_data = line.rstrip().split("\t")
-        line_strip = line.rstrip()
-        chain_id = int(line_data[0])
-        ret[chain_id].append(line_strip)
+        line_gene = line_data[0]
+        if line_gene != gene:
+            continue
+        chain_id = int(line_data[1])
+        start = line_data[2]
+        end = line_data[3]
+        line_to_out = f"{chain_id}\t{start}\t{end}"
+        ret[chain_id].append(line_to_out)
     f.close()
     return ret
 
 
+def _check_seq_of_intervals_intersect(intervals):
+    intervals_num = len(intervals)
+    for i in range(intervals_num - 1):
+        # (start, end)
+        curr_one = intervals[i]
+        next_one = intervals[i + 1]
+        # sorted by beginning
+        # if start of the next < end of the curr
+        # -> they intersect
+        if next_one[0] < curr_one[1]:
+            return (curr_one[0], curr_one[1])
+    return None  # nothing suspicious found
+
+
+def parse_precomp_orth_loci(transcript_name, path):
+    """Read precomputed orthologous loci from a file."""
+    ret_1 = {}  # chain to search locus
+    ret_2 = {}  # chain to subchain locus
+
+    f = open(path, "r")
+    # sample file line:
+    # #ORTHLOC	ENST00000262455	1169	JH567521:462931-522892	JH567521:462931-522892
+    # suffix - transcript - chain - search locus - subch locus
+    for line in f:
+        ld = line.rstrip().split("\t")
+        if ld[1] != transcript_name:
+            continue
+        chain_id = int(ld[2])
+        ret_1[chain_id] = ld[3]
+        ret_2[chain_id] = ld[4]
+    f.close()
+
+    return ret_1, ret_2
+
+
 def redo_codon_sequences(codon_tables, del_mis_exons):
     """Rebuild codon alignments: now excluding deleted and missing exons."""
-    ret = {}
+    codon_ret = {}
     for chain_id, codon_table in codon_tables.items():
         excl_exons = del_mis_exons.get(str(chain_id), set())
         codon_seqs_upd = extract_codon_data(codon_table, excl_exons=excl_exons)
-        ret[chain_id] = codon_seqs_upd
-    return ret
+        codon_ret[chain_id] = codon_seqs_upd
+    return codon_ret
 
 
+def extract_prot_sequences_from_codon(codon_s):
+    """Extract protein sequences from codon"""
+    return []
+
 
 def realign_exons(args):
     """Entry point."""
@@ -2419,11 +2536,26 @@ def realign_exons(args):
     aa_block_sat_chain = {}  # one of dicts to mark exceptionally good exon predictions
     fragments_data = []  # required for fragmented genomes
     chains_in_input = True
-    force_include_regions_opt_v = (
-        []
-    )  # regions we force to include in the optimized version
+
+    # regions we force to include in the optimized version
+    # those regions != orthologous loci in the query, those are used in optimised 
+    # cesar versions as regions within ortholoogus regions that must be included
+    # in the CESAR run itself
+    force_include_regions_opt_v = []
+    
+
     verbose("Reading query regions")
-    chain_to_predefined_regions = read_predefined_regions(args["predefined_regions"])
+    chain_to_predefined_regions = read_predefined_regions(args["predefined_regions"], args["gene"])
+
+    # no need to extract chain blocks and compute where is the orthologous locus
+    # it's already precomputed
+    chain_to_precomp_search_loci = {}
+    chain_to_precomp_subch_loci = {}
+    if args["precomputed_orth_loci"]:
+        plc_ = parse_precomp_orth_loci(args["gene"], args["precomputed_orth_loci"])
+        chain_to_precomp_search_loci = plc_[0]
+        chain_to_precomp_subch_loci = plc_[1]
+
     # if chains and args["fragments"]:
     # the normal branch: call CESAR vs 1+ query sequences
     for chain_id in chains:  # in region more this part is skipped
@@ -2435,9 +2567,34 @@ def realign_exons(args):
         # most likely we need only the chain part that intersects the gene
         # and skip the rest:
         verbose("Cutting the chain...")
-        search_locus, subch_locus, chain_data = chain_cut(
-            chain_str, gene_range, args["gene_flank"], args["extra_flank"]
-        )
+        
+        if args["precomputed_orth_loci"]:
+            search_locus = chain_to_precomp_search_loci[chain_id]
+            subch_locus = chain_to_precomp_subch_loci[chain_id]
+            # TODO: chain_data contains non-necessary information
+            # it's already present in the chain_header variable
+            # can be optimised (historical reasons)
+            _t_strand = True if chain_header[2] == "+" else False
+            _q_strand = True if chain_header[7] == "+" else False
+            _t_size = int(chain_header[3])
+            _q_size = int(chain_header[8])
+            chain_data = (_t_strand, _t_size, _q_strand, _q_size),
+        else:
+            search_locus, subch_locus, chain_data = chain_cut(
+                chain_str, gene_range, args["gene_flank"], args["extra_flank"]
+            )
+
+        if args["save_orth_locus"]:
+            # write STDOUT LINE about orthologous locus
+            # warning: to be used only on precomoute step
+            # do not use it for main CESAR calls (those that actually run CESAR)
+            # if you still like to do this in the main pipeline: exclude lines starting with
+            # ORTH_LOC_LINE_SUFFIX from CESAR wrapper output
+            g_ = args["gene"]
+            line = f"{ORTH_LOC_LINE_SUFFIX}\t{g_}\t{chain_id}\t{search_locus}\t{subch_locus}\n"
+            sys.stdout.write(line)
+
+
         # chain data: t_strand, t_size, q_strand, q_size
         chain_qStrand = chain_data[2]
         chain_qSize = chain_data[3]
@@ -2470,6 +2627,8 @@ def realign_exons(args):
             _query_seq_ext, subch_locus, args["gap_size"], directed
         )
         # blocks are [target_start, target_end, query_start, query_end]
+        # TODO: can be optimised here
+        # and also can be written to log - if same chain & bed - same output
         subchain_blocks_raw = extract_subchain(chain_str, subch_locus)
         # swap blocks in correct orientation and fill interblock ranges
         subchain_blocks = orient_blocks(subchain_blocks_raw, chain_data)
@@ -2581,6 +2740,7 @@ def realign_exons(args):
             fragments_data = sorted(fragments_data, key=lambda x: x[5])
         else:  # gene is - -> reverse sort of chains
             fragments_data = sorted(fragments_data, key=lambda x: x[6], reverse=True)
+
         # merge query feat dictionaries
         exon_gap = merge_dicts(chain_exon_gap.values())
         exon_class = merge_dicts(chain_exon_class.values())
@@ -2588,6 +2748,7 @@ def realign_exons(args):
         aa_block_sat = merge_dicts(aa_block_sat_chain.values())
         missing_exons = intersect_lists(chain_missed.values())
 
+
         query_seq_chunks = []
         for elem in fragments_data:
             # stitch query seq in a proper order; elem[0] -> chain_id
@@ -2643,7 +2804,18 @@ def realign_exons(args):
     append_u12(args["u12"], args["gene"], ref_ss_data)
     make_cesar_in(prepared_exons, query_sequences, cesar_in_filename, ref_ss_data)
     # run cesar itself
+    # TODO: exclude later
+    # print(force_include_regions_opt_v)
     cesar_bin = args.get("cesar_binary") if args.get("cesar_binary") else DEFAULT_CESAR
+    if args.get("cesar_binary"):
+        cesar_bin = args.get("cesar_binary")
+    elif args["opt_cesar"] is True:
+        cesar_bin = OPT_CESAR_LOCATION
+    elif args["opt_precompute"]:
+        cesar_bin = OPT_CESAR_LOCATION
+    else:
+        cesar_bin = DEFAULT_CESAR
+
     if not args["cesar_output"]:
         cesar_raw_out = run_cesar(
             cesar_in_filename,
@@ -2662,8 +2834,6 @@ def realign_exons(args):
     else:  # very specific case, load already saved CESAR output
         with open(args["cesar_output"], "r") as f:
             cesar_raw_out = f.read()
-    # if force_include_reg_file:
-    #     os.remove(force_include_reg_file) if os.path.isfile(force_include_reg_file) else None
     os.remove(cesar_in_filename) if is_temp else None  # wipe temp if temp
     # save raw CESAR output and close if required
     save(cesar_raw_out, args["raw_output"], t0) if args["raw_output"] else None
@@ -2682,13 +2852,12 @@ def realign_exons(args):
     query_coords = proc_out[4]  # genomic coordinates in the query
     exon_num_corr = proc_out[5]  # in case of intron del: ref/que correspondence
     prot_s = proc_out[6]  # protein sequences in query
-    codon_s = proc_out[7]
+    codon_s = proc_out[7]  # dict containing sequences of ref and query codons
     aa_cesar_sat = proc_out[8]  # says whether an exon has outstanding quality
-        # raw codon table \ superset of "codon_s" basically
+    # raw codon table \ superset of "codon_s" basically
     # after changes in TOGA1.1 is needed again
     # TODO: needs refactoring
     codon_tables = proc_out[9]  
-
     aa_eq_len = aa_eq_len_check(exon_sequences, query_exon_sequences)
 
     if chains:
@@ -2697,6 +2866,7 @@ def realign_exons(args):
         chain_exon_class = get_a_plus(
             chain_exon_class, aa_cesar_sat, aa_block_sat_chain, aa_eq_len
         )
+
     # time to arrange all these data altogether
     final_output, chain_ex_inc = arrange_output(
         args["gene"],
@@ -2734,21 +2904,25 @@ def realign_exons(args):
             ref_ss=ref_ss_data,
             sec_codons=sec_codons,
             no_fpi=args["no_fpi"],
+            alt_f_del=args["alt_frame_del"],
+            mask_all_first_10p=args["mask_all_first_10p"],
         )
     else:  # do not call inact mut scanner
         loss_report = None
         del_mis_exons = None
-
+    
     # del_mis_exons contains:
     # chain_id(string) = [0-based exon nums]
-    if del_mis_exons is not None and len(del_mis_exons.keys()) > 0:
+    need_correct_codon_and_prot = del_mis_exons is not None and len(del_mis_exons.keys()) > 0
+    if need_correct_codon_and_prot:
         # if exists, need to filter codon alignment accordingly
         # chain id is numeric in "codon_table"
         codon_s = redo_codon_sequences(codon_tables, del_mis_exons)
 
+    prot_s = extract_prot_sequences_from_codon(codon_s)
 
     # save protein/codon ali and text output
-    save_prot(args["gene"], prot_s, args["prot_out"])
+    # save_prot(args["gene"], prot_s, args["prot_out"], del_mis_exons)
     save_codons(args["gene"], codon_s, args["codon_out"])
     save(final_output, args["output"], t0, loss_report)
     sys.exit(0)
diff --git a/cesar_runner.py b/cesar_runner.py
index fcf0b72..f59999a 100755
--- a/cesar_runner.py
+++ b/cesar_runner.py
@@ -12,6 +12,9 @@
 __credits__ = ["Michael Hiller", "Virag Sharma", "David Jebb"]
 
 MAX_ATTEMPTS = 2
+ZERO_CODE = 0
+ERR_CODE = 1
+FRAGM_CHAIN_ISSUE_CODE = 2
 
 
 def parse_args():
@@ -23,6 +26,7 @@ def parse_args():
         "--check_loss", default=None, help="File to save gene loss data if requested"
     )
     app.add_argument("--rejected_log", default=None, help="Log gene rejection events")
+    app.add_argument("--unproc_log", "--ul", default=None, help="Log unprocessed genes")
     # print help if there are no args
     if len(sys.argv) < 2:
         app.print_help()
@@ -43,13 +47,22 @@ def call_job(cmd):
         rc = p.returncode
         cmd_out = b_stdout.decode("utf-8")
         err_msg = b_stderr.decode("utf-8").replace("\n", " ")
-        if rc == 0:
-            return cmd_out, 0
+        if rc == ZERO_CODE:
+            return cmd_out, ZERO_CODE
+        elif rc == FRAGM_CHAIN_ISSUE_CODE:
+            err_msg = f"CESAR_wrapper.py detected that fragments overlap for {cmd}, abort"
+            return err_msg, FRAGM_CHAIN_ISSUE_CODE
         else:
             eprint(err_msg)
             eprint(f"\n{cmd} FAILED")
             attempts += 1
-    return err_msg, 1  # send failure signal
+    return err_msg, ERR_CODE  # send failure signal
+
+
+def __job_to_transcript(job):
+    """Extract transcript ID from job."""
+    fields = job.split()
+    return fields[1]
 
 
 def main():
@@ -60,6 +73,7 @@ def main():
         # text file, a command per line
         jobs = [x.rstrip() for x in f.readlines()]
     jobs_num = len(jobs)
+    unprocessed_genes = []
 
     out = open(args.output, "w")  # handle output file
     gene_loss_data = []  # list to keep gene loss detector out
@@ -69,6 +83,12 @@ def main():
         eprint(f"Calling:\n{job}")
         # catch job stdout
         job_out, rc = call_job(job)
+        if rc == FRAGM_CHAIN_ISSUE_CODE:
+            # very special case -> nothig we can do
+            # mark as missnig, I guess
+            rejected.append(f"{job}\tfragment chains oevrlap\n")
+            unprocessed_genes.append(__job_to_transcript(job))
+            continue
         if rc == 1:
             # a job failed with code 1 -> send the signal upstream
             # abort execution, write what job exactly failed
@@ -136,6 +156,11 @@ def main():
         f.write("".join(rejected))
         f.close()
 
+    if args.unproc_log and len(unprocessed_genes) > 0:
+        f = open(args.unproc_log, "w")
+        for elem in unprocessed_genes:
+            f.write(f"{elem}\n")
+        f.close()
 
 if __name__ == "__main__":
     main()
diff --git a/chain_runner.py b/chain_runner.py
index 013b790..8cfcd28 100755
--- a/chain_runner.py
+++ b/chain_runner.py
@@ -560,12 +560,14 @@ def main():
     # 2) an argument: "chain ,-sep list of genes"
     batch = read_input(args.input_file)
     task_size = len(batch)
+    # TODO: check whether I don't need .bst
     # load chains dict; it would be much faster to load chain_ID: (start_byte, offset)
     # python dict once than ask HDF5 database each time TOGA needs another chain
     index_file = args.chain_file.replace(".chain", ".chain_ID_position")
     chain_dict = load_chain_dict(index_file)
 
     # call main processing tool
+    # TODO: rename genes to transcripts where appropropriate
     for job_num, (chain, genes) in enumerate(batch.items(), 1):
         # one unit: one chain + intersected genes
         # call routine that extracts chain feature
diff --git a/modules/GLP_values.py b/modules/GLP_values.py
index b633036..a152587 100644
--- a/modules/GLP_values.py
+++ b/modules/GLP_values.py
@@ -6,7 +6,12 @@
 DEL_MISS = {MISS_EXON, DEL_EXON}
 COMPENSATION = "COMPENSATION"
 SSM = "SSM"
+# (ag)acceptor-EXON-donor(gt)
+SSM_D = "SSMD"  # Donor, right, GT,GC
+SSM_A = "SSMA"  # Acceptor, left, AG 
+
 START_MISSING = "START_MISSING"
+ATG = "ATG"
 FS_DEL = "FS_DEL"
 FS_INS = "FS_INS"
 BIG_DEL = "BIG_DEL"
diff --git a/modules/classify_chains.py b/modules/classify_chains.py
index 3ec1255..e7e080d 100755
--- a/modules/classify_chains.py
+++ b/modules/classify_chains.py
@@ -91,6 +91,7 @@ def classify_chains(
     # -> then this is a proc pseudogene
 
     # move trans chains to a different dataframe
+    # TODO: rename trans -> spanning
     # trans chain -> a syntenic chain that passes throw the gene body
     #                but has no aligning bases in the CDS
     trans_lines = df[(df["exon_cover"] == 0) & (df["synt"] > 1)]
diff --git a/modules/gene_losses_summary.py b/modules/gene_losses_summary.py
index fee28a9..7d21130 100755
--- a/modules/gene_losses_summary.py
+++ b/modules/gene_losses_summary.py
@@ -144,13 +144,13 @@ def read_loss_data(loss_dir):
                 perc = float(line_data[2].split()[1])
                 projection_to_p_intact_M_intact[projection_id] = perc
                 continue
-            elif line_data[2].startswith("MIDDLE_80%_INTACT"):
-                # flag: are there inact mutations in the middle 80% of CDS?
+            elif line_data[2].startswith("MIDDLE_IS_INTACT"):
+                # flag: are there inact mutations in the first 90%/mid 80% of CDS?
                 raw_val = line_data[2].split()[1]
                 val = True if raw_val == "TRUE" else False
                 proj_to_80_p_intact[projection_id] = val
                 continue
-            elif line_data[2].startswith("MIDDLE_80%_PRESENT"):
+            elif line_data[2].startswith("MIDDLE_IS_PRESENT"):
                 # flag: any missing fragment in the middle 80% if CDS?
                 raw_val = line_data[2].split()[1]
                 val = True if raw_val == "TRUE" else False
@@ -246,6 +246,7 @@ def get_projection_classes(
         p_intact_M_ign = p_to_pint_m_ign.get(projection, -1)
         p_intact_M_int = p_to_pint_m_int.get(projection, -1)
         p_i_codons = p_to_i_codon_prop.get(projection, -1)
+        # TODO: rename to NO LOSS IN FIRST 90%
         no_loss_in_80_p = p_80_int.get(projection, None)
         m_80_present = p_80_pre.get(projection, None)
         frame_oub = p_to_p_out_of_bord.get(projection, 0.0)
diff --git a/modules/inact_mut_check.py b/modules/inact_mut_check.py
index c4809d2..059fa8d 100755
--- a/modules/inact_mut_check.py
+++ b/modules/inact_mut_check.py
@@ -124,7 +124,17 @@ def mask_mut(mut):
     return upd_mut
 
 
-def analyse_splice_sites(ref, query, gene, chain, u12_data=None, v=None):
+def analyse_splice_sites(
+    ref,
+    query,
+    gene,
+    chain,
+    codon_table,
+    atg_codons_data,
+    mask_all_first_10p=False,
+    u12_data=None,
+    v=None,
+):
     """Check correctness of the splice sites."""
     u12_data = set() if not u12_data else u12_data  # load U12 data if provided
     mut_counter = 1  # mut counter -> for mutations IDs
@@ -172,57 +182,99 @@ def analyse_splice_sites(ref, query, gene, chain, u12_data=None, v=None):
         # so we can get splice site coordinates
         # if there is N in the splice site -> we don't know what's there
         # -> we are not sure -> mask this mutation
-        left_splice_site = query[start - 2 : start]
-        left_splice_site_N = "n" in left_splice_site or "N" in left_splice_site
-        right_splice_site = query[end + 1 : end + 3]
-        right_splice_size_N = "n" in right_splice_site or "N" in right_splice_site
+        acceptor_splice_site = query[start - 2 : start]  # donor
+        acceptor_splice_site_N = (
+            "n" in acceptor_splice_site or "N" in acceptor_splice_site
+        )
+        donor_splice_site = query[end + 1 : end + 3]  # acceptor
+        donor_splice_size_N = "n" in donor_splice_site or "N" in donor_splice_site
+
+        # assign to the last / first codon of the exon (depends on what splice site is affected)
+        codon_pos_at_exon = [
+            n for n, c in enumerate(codon_table) if c["t_exon_num"] == exon_num_
+        ]
+        acceptor_codon_num = (
+            codon_pos_at_exon[0] if len(codon_pos_at_exon) > 0 else None
+        )
+        donor_codon_num = codon_pos_at_exon[-1] if len(codon_pos_at_exon) > 0 else None
 
         # check that splice sites are canonical
-        left_site_wrong = left_splice_site not in LEFT_SPLICE_CORR
-        right_site_wrong = right_splice_site not in RIGHT_SPLICE_CORR
+        acceptor_site_wrong = acceptor_splice_site not in LEFT_SPLICE_CORR
+        donor_site_wrong = donor_splice_site not in RIGHT_SPLICE_CORR
+
         eprint(
-            f"Exon {exon_num}; L_SPS: {left_splice_site}; R_SPS: {right_splice_site}"
+            f"Exon {exon_num}; L_SPS: {acceptor_splice_site}; R_SPS: {donor_splice_site}"
         ) if v else None
 
-        if exon_num != 1 and left_site_wrong:
+        # print(exon_num)
+        # print(acceptor_site_wrong)
+        # print(donor_site_wrong)
+
+        if exon_num != 1 and acceptor_site_wrong and acceptor_codon_num:
             # add mutation for left (acceptor) splice site
             # doesn't apply to the first exon obviously
             # mask this mutation if it's suspected to be U12 splice site
             mask = True if (exon_num, 0) in u12_data else False
             # if N in the splice site -> also mask it
-            mask = True if left_splice_site_N else mask
+            mask = True if acceptor_splice_site_N else mask
+
+            # if splice site mutation in first 10% but followed by ATG in first 10% then
+            atg_mask = _define_whether_mask(
+                acceptor_codon_num,
+                atg_codons_data["left_t"],
+                atg_codons_data["right_t"],
+                atg_codons_data["atg_codon_nums"],
+                mask_all_first_10p=mask_all_first_10p,
+            )
+
+            mask = True if atg_mask is True else mask
+
             # create mutation object, describe what happened
-            mut_ = f"{LEFT_SPLICE_CORR}->{left_splice_site}"
-            mut_id = f"SSM_{mut_counter}"
+            mut_ = f"{LEFT_SPLICE_CORR}->{acceptor_splice_site}"
+            mut_id = f"{SSM_A}_{mut_counter}"
+            # print(mut_id)
             mut_counter += 1
             mut = Mutation(
                 gene=gene,
                 chain=chain,
                 exon=exon_num,
-                position=0,
-                mclass=SSM,
+                position=acceptor_codon_num,
+                mclass=SSM_A,
                 mut=mut_,
                 masked=mask,
                 mut_id=mut_id,
             )
             sps_report.append(mut)  # add mutation to the list
 
-        if exon_num != exons_num and right_site_wrong:
+        if exon_num != exons_num and donor_site_wrong and donor_codon_num:
             # add mutation for right (donor) splice site
             # doesn't apply to the last exon
             # mask this mutation if it's suspected to be U12 splice site
             mask = True if (exon_num, 1) in u12_data else False
-            mask = True if right_splice_size_N else mask  # if N in mutation -> mask it
+            mask = True if donor_splice_size_N else mask  # if N in mutation -> mask it
+
+            # if splice site mutation in first 10% but followed by ATG in first 10% then
+            atg_mask = _define_whether_mask(
+                donor_codon_num,
+                atg_codons_data["left_t"],
+                atg_codons_data["right_t"],
+                atg_codons_data["atg_codon_nums"],
+                mask_all_first_10p=mask_all_first_10p,
+            )
+
+            mask = True if atg_mask is True else mask
+
             # create mutation object
-            mut_ = f"{RIGHT_SPLICE_CORR}->{right_splice_site}"
-            mut_id = f"SSM_{mut_counter}"
+            mut_ = f"{RIGHT_SPLICE_CORR}->{donor_splice_site}"
+            mut_id = f"{SSM_D}_{mut_counter}"
+            # print(mut_id)
             mut_counter += 1
             mut = Mutation(
                 gene=gene,
                 chain=chain,
                 exon=exon_num,
-                position=1,
-                mclass=SSM,
+                position=donor_codon_num,
+                mclass=SSM_D,
                 mut=mut_,
                 masked=mask,
                 mut_id=mut_id,
@@ -242,16 +294,22 @@ def analyse_splice_sites(ref, query, gene, chain, u12_data=None, v=None):
         curr = sps_report[i]
         curr_exon = curr.exon
         prev_exon = prev.exon
+
         if curr_exon != prev_exon + 1:
             # if current exon doesn't follow the previous immediately -> not the case
             # like prev mut exon is 3 and current is 6
             continue
+
         # if exons follow each other (like 3 and 4) then continue
         curr_pos = curr.position
         prev_pos = prev.position
+        curr_type = curr.mclass
+        prev_type = curr.mclass
+
         # they must belong to the same intron, check this
-        if not (curr_pos == 0 and prev_pos == 1):
+        if not (prev_type == SSM_D and curr_type == SSM_A):
             continue
+
         prev_to_what = prev.mut.split("->")[1]
         curr_to_what = curr.mut.split("->")[1]
         # if it was -- in both cases: this is intron deletion
@@ -323,15 +381,78 @@ def corr_exon_num_or_no_fs(codon, exon_num):
         return exon_num
 
 
+def _find_atg_codons(codon_table):
+    """Find reference codons aligned to ATG."""
+    atg_codon_nums = []
+    for num, codon in enumerate(codon_table, 1):
+        que_codon = codon["que_codon"]
+        que_codon_no_gap = que_codon.replace("-", "")
+        if not que_codon_no_gap:
+            # there are only gaps -> nothing to catch
+            continue
+        triplets = parts(que_codon_no_gap, n=3)
+        start_triplets = [x for x in triplets if x == "ATG"]
+        if len(start_triplets) > 0:
+            atg_codon_nums.append(num)
+    return atg_codon_nums
+
+
+def _get_next_bigger_num(num, lst):
+    for elem in lst:
+        if elem >= num:
+            return elem
+    return 999999999  # TODO: ideally, last codon position
+
+
+def _define_whether_mask(
+    num, left_t, right_t, atg_codon_nums, mask_all_first_10p=False
+):
+    """Check whether the mutation is going to be masked due to first/last 10% or not."""
+    # TODO: optimise this part, the only ATG position needed is the closest to 10%
+    # which is not above 10%, that's it.
+    if num >= right_t:
+        return True
+    elif left_t < num < right_t:
+        return False
+    if num <= left_t and mask_all_first_10p is True:
+        # automatically mask mut in first 10%
+        # don't account for ATG codons distribution
+        return True
+    # num in first 10%, need to find the next start
+    next_atg_pos = _get_next_bigger_num(num, atg_codon_nums)
+    return next_atg_pos <= left_t
+
+
+def make_atg_data(codon_table):
+    codons_num = len(codon_table)
+
+    # get first or last 10% if CDS: mask mutations in this region:
+    perc_10 = codons_num // 10
+    left_t = perc_10
+    right_t = codons_num - perc_10
+
+    atg_codon_nums = _find_atg_codons(codon_table)
+
+    # TODO: refactor this
+    atg_codon_nums_data = {
+        "left_t": left_t,
+        "right_t": right_t,
+        "atg_codon_nums": atg_codon_nums,
+    }
+    return atg_codon_nums_data
+
+
 def scan_rf(
     codon_table,
     gene,
     chain,
+    atg_codon_nums_data,
     exon_stat=None,
     v=False,
     big_indel_thrs=None,
     sec_codons=None,
     no_fpi=False,
+    mask_all_first_10p=False,
 ):
     """Scan codon table for inactivating mutations."""
     # sec_codons -> selenocysteine-coding codons in reference
@@ -339,13 +460,9 @@ def scan_rf(
     codons_num = len(codon_table)
     in_mut_report = []  # save mutations here
 
-    # get first or last 10% if CDS: mask mutations in this region:
-    perc_10 = codons_num // 10
-    left_t = perc_10
-    right_t = codons_num - perc_10
-
     # init mutation counters: for IDs
     mut_number_stop = 1
+    mut_number_atg = 1  # not inact, but need to track
     mut_number_fs = 1
     mut_number_big_indel = 1
     q_dels_in_a_row = 0  # number of deletions in a row: for big indel detection
@@ -353,7 +470,14 @@ def scan_rf(
     for num, codon in enumerate(codon_table, 1):
         # go codon-by-codon
         # if in first/last 10%: it will be masked
-        mask = True if num <= left_t or num >= right_t else False
+        # mask = True if num <= left_t or num >= right_t else False
+        mask = _define_whether_mask(
+            num,
+            atg_codon_nums_data["left_t"],
+            atg_codon_nums_data["right_t"],
+            atg_codon_nums_data["atg_codon_nums"],
+            mask_all_first_10p=mask_all_first_10p,
+        )
         # determine whether it's the first or last exon:
         last_codon = True if num == codons_num else False
         first_codon = True if num == 1 else False
@@ -407,7 +531,10 @@ def scan_rf(
                 position=1,
                 mclass=START_MISSING,
                 mut=que_codon_,
-                masked=mask,
+                # this mutation should not affect the classification
+                # so it's always masked, but saved to indicate that
+                # 1st ATG is missing:
+                masked=True,
                 mut_id=mut_id,
             )
             in_mut_report.append(mut)
@@ -487,6 +614,8 @@ def scan_rf(
         # need to split query sequence in triplets
         # check that any of them is a stop-codon
         stop_triplets = [x for x in triplets if x in STOPS]
+        start_triplets = [x for x in triplets if x == "ATG"]
+
         if len(stop_triplets) > 0 and not last_codon:
             # we have premature stop codon
             mut_ = f"{ref_codon.replace('-', '')}->{stop_triplets[0]}"
@@ -532,6 +661,27 @@ def scan_rf(
             if v:
                 eprint("Detected STOP")
                 eprint(codon)
+        if len(start_triplets) > 0:
+            # not an inactivating mutation but needs to be saved
+            mut_ = f"{ref_codon.replace('-', '')}->ATG"
+            mclass = ATG
+            mut_id = f"ATG_{mut_number_atg}"
+            st_ex_num = ex_num
+            mut_number_atg += 1
+            mut = Mutation(
+                gene=gene,
+                chain=chain,
+                exon=st_ex_num,
+                position=num,
+                mclass=mclass,
+                mut=mut_,
+                masked=True,
+                mut_id=mut_id,
+            )
+            in_mut_report.append(mut)
+            if v:
+                eprint("Detected ATG (non-inactivating)")
+                eprint(codon)
     # I can infer number of codons in each exon directly from codon table
     return in_mut_report
 
@@ -550,7 +700,7 @@ def detect_compensations(inact_mut, codon_table):
     fs_num = len(fs)
     if fs_num <= 1:
         # need at least 2 frameshifs, otherwise there is nothing to compensate
-        return answer
+        return answer, []
     # first iteration: detect potential compensatory events, based only
     # on their sizes
     potent_compensations = []
@@ -583,10 +733,12 @@ def detect_compensations(inact_mut, codon_table):
                 potent_compensations.append(fs_ids)
                 break
     if len(potent_compensations) == 0:
-        return []  # no potential compensations, skip this
+        return [], []  # no potential compensations, skip this
 
     # verify potential compensations, check for stop codons in alt frame
     what_is_compensated = set()  # to avoid twice compensated FS
+    alt_frame_codons = []  # lenghts of codons in alt frame
+
     for comp in potent_compensations:
         # comp -> a list of potentially compensated FS IDs
         if len(what_is_compensated.intersection(comp)) > 0:
@@ -610,7 +762,7 @@ def detect_compensations(inact_mut, codon_table):
         # add compensation track
         # no stop codons
         # to create a mut object we need gene name, chain id, exon num etc
-        # I just use the first mut in the row for that (ethalon)
+        alt_frame_codons.append((start_pos - 1, end_pos))
         ethalon_mut = comp_muts[0]
         gene = ethalon_mut.gene
         chain = ethalon_mut.chain
@@ -620,7 +772,11 @@ def detect_compensations(inact_mut, codon_table):
         mut_id = f"C_{m_counter}"
         # mutation -> comma-separated list of compensated mutation IDs
         fs_ids = [x.split("_")[1] for x in comp]
-        mut = "FS_{}".format(",".join(fs_ids))
+        # mut = "FS_{}".format(",".join(fs_ids))
+        # changed format FEB 2022: FS_{start_num}-{end_num}
+        # just a comma-separated list can be too long
+        _ids_range = f"{fs_ids[0]}-{fs_ids[-1]}"
+        mut = f"FS_{_ids_range}"
         comp_mut = Mutation(
             gene=gene,
             chain=chain,
@@ -632,11 +788,12 @@ def detect_compensations(inact_mut, codon_table):
             mut_id=mut_id,
         )
         answer.append(comp_mut)
+        # calculieren positions of first and last alt frame codons
         for c in comp:  # add compensated muts to comp muts set
             # to avoid adding comp mutations twice
             what_is_compensated.add(c)
         m_counter += 1
-    return answer
+    return answer, alt_frame_codons
 
 
 def mask_compensated_fs(mut_list):
@@ -648,7 +805,11 @@ def mask_compensated_fs(mut_list):
     comp_fs_ids = []  # there are compensatory events
     for cmp in comp_muts:
         # get list of comp FS IDs
-        fs_nums = cmp.mut.split("_")[1].split(",")
+        # fs_nums = cmp.mut.split("_")[1].split(",")
+        comp_ids_range_str = cmp.mut.split("_")[1].split("-")
+        _comp_start = int(comp_ids_range_str[0])
+        _comp_end = int(comp_ids_range_str[1])
+        fs_nums = list(range(_comp_start, _comp_end + 1))
         fs_ids = [f"FS_{x}" for x in fs_nums]
         comp_fs_ids.extend(fs_ids)
     comp_fs_ids = set(comp_fs_ids)
@@ -680,6 +841,13 @@ def compute_percent_id(seq_1, seq_2):
     return pid
 
 
+def _get_last_codon_for_each_exon(codon_table):
+    ret = {}
+    for num, elem in enumerate(codon_table, 1):
+        ret[elem["t_exon_num"] + 1] = num
+    return ret
+
+
 def classify_exons(
     gene,
     que,
@@ -690,16 +858,20 @@ def classify_exons(
     exon_blosum,
     missing_exons,
     ex_inc,
+    atg_codons_data,
+    mask_all_first_10p=False,
     v=False,
 ):
     """Classify exons as intact, deleted and missing."""
     del_num, miss_num = 1, 1  # counters for mutation IDs
     # get a list of exon numbers:
     exon_nums = list(range(codon_table[-1]["t_exon_num"] + 1))
+    exon_to_last_codon_of_exon = _get_last_codon_for_each_exon(codon_table)
     exons_report = []  # save data heve
     exon_stat = [
         "X",
     ]  # exon status, start with 1, X - placeholder
+
     del_miss_nums = []
 
     for exon_num in exon_nums:
@@ -734,7 +906,7 @@ def classify_exons(
         # classify whether it's deleted or not:
         # print(f"Exon {exon_num} classification with the following params: ") if v else None
         # print(ex_class, exon_excl, ex_pid, ex_blosum) if v else None
-        del_, q = classify_exon(ex_class, exon_excl, ex_pid, ex_blosum, v=v)
+        ex_non_del, q = classify_exon(ex_class, exon_excl, ex_pid, ex_blosum, v=v)
         # print(f"Results are: {del_} {q}") if v else None
 
         if ex_class == "M" or ex_gap:
@@ -755,8 +927,18 @@ def classify_exons(
             exon_stat.append("M")
             del_miss_nums.append(exon_num)
             continue
-        elif del_ is False:
+        elif ex_non_del is False:
             # exon is deleted: need to write about this
+            last_codon_num = exon_to_last_codon_of_exon.get(ex_num_, 0)
+
+            atg_mask = _define_whether_mask(
+                last_codon_num,
+                atg_codons_data["left_t"],
+                atg_codons_data["right_t"],
+                atg_codons_data["atg_codon_nums"],
+                mask_all_first_10p=mask_all_first_10p,
+            )
+
             mut = Mutation(
                 gene=gene,
                 chain=que,
@@ -764,7 +946,7 @@ def classify_exons(
                 position=0,
                 mut="-",
                 mclass=DEL_EXON,
-                masked=False,
+                masked=atg_mask,
                 mut_id=f"DEL_{del_num}",
             )
             del_num += 1
@@ -817,11 +999,11 @@ def muts_to_text(
         strings.append(out_of_ch_line)
     for q, val in m_80_i.items():
         val_str = "TRUE" if val is True else "FALSE"
-        m_80_line = f"# {gene}\t{q}\tMIDDLE_80%_INTACT {val_str}"
+        m_80_line = f"# {gene}\t{q}\tMIDDLE_IS_INTACT {val_str}"
         strings.append(m_80_line)
     for q, val in m_80_p.items():
         val_str = "TRUE" if val is True else "FALSE"
-        m_80_line = f"# {gene}\t{q}\tMIDDLE_80%_PRESENT {val_str}"
+        m_80_line = f"# {gene}\t{q}\tMIDDLE_IS_PRESENT {val_str}"
         strings.append(m_80_line)
     return "\n".join(strings) + "\n"
 
@@ -836,7 +1018,15 @@ def get_exon_num_corr(codons_data):
     return ans
 
 
-def compute_intact_perc(codon_table, mutations, q_name, v=False):
+def compute_intact_perc(
+    codon_table,
+    mutations,
+    q_name,
+    alt_frame_ranges,
+    alt_f_del=False,
+    v=False,
+    mask_all_first_10p=False,
+):
     """Compute intact %ID-related features."""
     # compute per query
     query_muts = [m for m in mutations if m.chain == q_name]
@@ -855,6 +1045,8 @@ def compute_intact_perc(codon_table, mutations, q_name, v=False):
     del_codon_nums = [
         n for n, c in enumerate(codon_table) if c["t_exon_num"] in del_exons
     ]
+    # read codons in alt frame as deleted
+
     safe_del_codon_nums = [
         n for n, c in enumerate(codon_table) if c["t_exon_num"] in safe_del_exons
     ]
@@ -862,6 +1054,14 @@ def compute_intact_perc(codon_table, mutations, q_name, v=False):
         n for n, c in enumerate(codon_table) if c["t_exon_num"] in miss_exons
     ]
 
+    if alt_f_del is True:
+        # if so, we consider codons in alternative frame deleted
+        # alt frame -> between compensated frameshifts
+        for elem in alt_frame_ranges:
+            s_, e_ = elem
+            for codon_num in range(s_, e_):
+                safe_del_codon_nums.append(codon_num)
+
     # update codons status -> what is Missing, Deleted or Lost (not-safely deleted exons)
     for del_codon in del_codon_nums:
         codon_status[del_codon] = "L"
@@ -871,9 +1071,26 @@ def compute_intact_perc(codon_table, mutations, q_name, v=False):
         codon_status[miss_codon] = "M"
 
     # get IDs of compensated FS
-    compensations = [m.mut for m in query_muts if m.mclass == COMPENSATION]
-    comp_nums = ",".join([c.split("_")[1] for c in compensations]).split(",")
-    comp_fs = {f"FS_{c}" for c in comp_nums}
+    compensations = [m for m in query_muts if m.mclass == COMPENSATION]
+    comp_fs = []  # list to keep compensated frameshifts
+    for comp in compensations:
+        comp_field = comp.mut
+        comp_ids_range_str = comp_field.split("_")[1].split("-")
+        # # fmt: FS_{start}-{end}
+        _comp_start = int(comp_ids_range_str[0])
+        _comp_end = int(comp_ids_range_str[1])
+        comp_ids = list(range(_comp_start, _comp_end + 1))
+        comp_fs_strings = [f"FS_{i}" for i in comp_ids]
+        comp_fs.append(comp_fs_strings)
+
+    # comp_nums = ",".join([c.split("_")[1] for c in compensations]).split(",")
+    # comp_ids_range_str = comp_field.split("_")[1].split("-")
+    # # fmt: FS_{start}-{end}
+    # _comp_start = int(comp_ids_range_str[0])
+    # _comp_end = int(comp_ids_range_str[1])
+    # comp_ids = list(range(_comp_start, _comp_end + 1))
+    # comp_pairs = get_comp_pairs(comp_ids)
+    # comp_fs = {f"FS_{c}" for c in comp_nums}
 
     # put inactivating mutations coordinates in codon status table
     for m in query_muts:
@@ -890,7 +1107,7 @@ def compute_intact_perc(codon_table, mutations, q_name, v=False):
             continue
         elif m.masked is True:
             continue
-        elif m.mclass == SSM:
+        elif m.mclass == SSM_A or m.mclass == SSM_D:
             # deal with splice site mutations
             if m.masked is True:
                 # U12 or N-containing splice site -> do not consider
@@ -972,8 +1189,14 @@ def compute_intact_perc(codon_table, mutations, q_name, v=False):
     ten_perc = gene_len // 10
     # cut middle 80% codons states:
     middle = codon_status_string[ten_perc:-ten_perc]
-    middle_80_intact = False if "L" in middle else True
-    middle_80_present = False if "M" in middle else True
+    first_90 = codon_status_string[:-ten_perc]
+
+    if mask_all_first_10p is True:
+        middle_is_intact = False if "L" in middle else True
+    else:
+        middle_is_intact = False if "L" in first_90 else True
+    middle_is_present = False if "M" in middle else True
+
     # compute non-missing sequence
     not_m = len(codon_status) - codon_status.count("M")
     if not_m > 0:  # beware of zero division error
@@ -985,8 +1208,9 @@ def compute_intact_perc(codon_table, mutations, q_name, v=False):
         p_intact_ignore_m,
         p_intact_intact_m,
         num_of_I_codons,
-        middle_80_intact,
-        middle_80_present,
+        middle_is_intact,
+        # first_90_intact,
+        middle_is_present,
     )
 
 
@@ -1198,13 +1422,17 @@ def get_exon_pairs(exon_stat):
     return pairs
 
 
-def detect_split_stops(codon_table, gene, q_name, exon_stat):
+def detect_split_stops(
+    codon_table, gene, q_name, exon_stat, atg_codons_data, mask_all_first_10p=False
+):
     """Considering all exon deletions find all split stop codons."""
     # we need to get pairs of intact (not deleted or missing) exons
     # between what there is a row of Deleted exons (but not missing)
     # if there is a missing exon in between -> do not make any conclusions
     # split stop codons may occur between these pairs
     i_exon_pairs = get_exon_pairs(exon_stat)
+    exon_to_last_codon_of_exon = _get_last_codon_for_each_exon(codon_table)
+
     if len(i_exon_pairs) == 0:
         # no such pairs -> no way to detect split stop
         return []  # return nothing
@@ -1212,6 +1440,7 @@ def detect_split_stops(codon_table, gene, q_name, exon_stat):
     mut_num = 1
     muts = []
     for pair in i_exon_pairs:
+        # those are 1-based
         first_exon = pair[0]
         second_exon = pair[1]
         # in codon table numbers are 0-based, so correct
@@ -1248,14 +1477,23 @@ def detect_split_stops(codon_table, gene, q_name, exon_stat):
         # ex_num = second_exon if s_part_len > f_part_len else first_exon
         mut_id = f"SP_STOP_{mut_num}"
         mut_num += 1
+        position = exon_to_last_codon_of_exon[first_exon]
+        atg_mask = _define_whether_mask(
+            position,
+            atg_codons_data["left_t"],
+            atg_codons_data["right_t"],
+            atg_codons_data["atg_codon_nums"],
+            mask_all_first_10p=mask_all_first_10p,
+        )
+
         mut = Mutation(
             gene=gene,
             chain=q_name,
             exon=second_exon,
-            position=0,
+            position=position,
             mclass=STOP,
             mut=mut_,
-            masked=False,
+            masked=atg_mask,
             mut_id=mut_id,
         )
         muts.append(mut)
@@ -1285,6 +1523,8 @@ def inact_mut_check(
     ref_ss=None,
     sec_codons=None,
     no_fpi=False,
+    alt_f_del=False,
+    mask_all_first_10p=False,  # mask all inact mutations in 1st 10% regardless on ATGs
 ):
     """Detect inactivating mutations in the CESAR output."""
     # read cesar output
@@ -1304,12 +1544,11 @@ def inact_mut_check(
     # initiate lists/ dicts for them
     p_intact_ignore_M = {}
     p_intact_intact_M = {}
-    middle_80_intact = {}
-    middle_80_present = {}
+    middle_is_intact = {}
+    middle_is_present = {}
     i_codons_prop = {}
     out_of_b_vals = {}
     mutations = []
-    ex_prop_provided = ex_prop is not None
     del_miss_exons = {}
 
     for cesar_fraction in cesar_fractions:
@@ -1354,21 +1593,15 @@ def inact_mut_check(
             #     err_msg = f"Cannot find CESAR wrapper features for query {q_name}"
             #     raise ValueError(err_msg)
 
-        # now we extract inactivation mutations
-        # then add them to fraction_mutations list
-
-        # extract splice site mutations
-        sps_mutations = analyse_splice_sites(
-            ref, query, gene, q_name, u12_introns_data, v=v
-        )
-        fraction_mutations.extend(sps_mutations)
-        # create codon table to extract other mutations
+        #  create codon table to extract mutations
         # codon table: list of objects, describing a codon
         # such as sequence in reference and query, exon number and so on
         codon_table = parse_cesar_out(ref, query)
+        atg_codons_data = make_atg_data(codon_table)
 
-        # next loop -> for deleted/missed exons
+                # next loop -> for deleted/missed exons
         if ex_prop:  # if extra data provided by CESAR wrapper we can classify exons
+            # dm list -> list of 0-based exon nums which are del or missing
             exon_del_miss_, exon_stat_, dm_list = classify_exons(
                 gene,
                 q_name,
@@ -1379,6 +1612,8 @@ def inact_mut_check(
                 exon_blosum,
                 missing_exons,
                 ex_inc,
+                atg_codons_data,
+                mask_all_first_10p=mask_all_first_10p,
                 v=v,
             )
             # get lists of deleted/missing exons
@@ -1398,32 +1633,59 @@ def inact_mut_check(
         # big indels may be classified as inactivating mutations
         # but the bigger the exon: the bigger an indel should be
         # define the thresholds
-        big_indel_thrs = infer_big_indel_thresholds(ex_lens)
         del_miss_exons[q_name] = set(dm_list)
+        big_indel_thrs = infer_big_indel_thresholds(ex_lens)
 
         # scan reading frame (codon table) for the rest on inact mutations
         inact_muts = scan_rf(
             codon_table,
             gene,
             q_name,
+            atg_codons_data,
             exon_stat=exon_stat,
             v=v,
             big_indel_thrs=big_indel_thrs,
             sec_codons=sec_codons,
             no_fpi=no_fpi,
+            mask_all_first_10p=mask_all_first_10p,
         )
         # save this data
         fraction_mutations.extend(inact_muts)
 
+        # now we extract inactivation mutations
+        # then add them to fraction_mutations list
+        # extract splice site mutations
+        sps_mutations = analyse_splice_sites(
+            ref,
+            query,
+            gene,
+            q_name,
+            codon_table,
+            atg_codons_data,
+            mask_all_first_10p,
+            u12_introns_data,
+            v=v,
+        )
+        fraction_mutations.extend(sps_mutations)
+
+
+
         # get a list of split stop codons: stop codons that appear after exon deletions
         # such as:
         # GCAAACGCAGCt-------------[DELETED EXON]-------agTCCCATTTCCAACTGATC
         # exon deletion raises an inframe stop codon: t + ag
-        split_stop_codons = detect_split_stops(codon_table, gene, q_name, exon_stat)
+        split_stop_codons = detect_split_stops(
+            codon_table,
+            gene,
+            q_name,
+            exon_stat,
+            atg_codons_data,
+            mask_all_first_10p=mask_all_first_10p,
+        )
         fraction_mutations.extend(split_stop_codons)
 
         # detect compensated frameshifts
-        compensations = detect_compensations(inact_muts, codon_table)
+        compensations, alt_frame_ranges = detect_compensations(inact_muts, codon_table)
         fraction_mutations.extend(compensations)
         # also mask compensated frameshifts:
         fraction_mutations = mask_compensated_fs(fraction_mutations)
@@ -1434,13 +1696,19 @@ def inact_mut_check(
 
         # extract and save %intact-related features
         pintact_features = compute_intact_perc(
-            codon_table, fraction_mutations, q_name, v=v
+            codon_table,
+            fraction_mutations,
+            q_name,
+            alt_frame_ranges,
+            alt_f_del=alt_f_del,
+            v=v,
+            mask_all_first_10p=mask_all_first_10p,
         )
         p_intact_ignore_M[q_name] = pintact_features[0]
         p_intact_intact_M[q_name] = pintact_features[1]
         i_codons_prop[q_name] = pintact_features[2]
-        middle_80_intact[q_name] = pintact_features[3]
-        middle_80_present[q_name] = pintact_features[4]
+        middle_is_intact[q_name] = pintact_features[3]
+        middle_is_present[q_name] = pintact_features[4]
 
         # compute %of gene that lies outside chain borders
         out_of_borders_prop = get_out_of_borders_prop(codon_table, missing_exons)
@@ -1454,8 +1722,8 @@ def inact_mut_check(
         p_intact_intact_M,
         i_codons_prop,
         out_of_b_vals,
-        middle_80_intact,
-        middle_80_present,
+        middle_is_intact,
+        middle_is_present,
         gene,
     )
     return report, del_miss_exons
diff --git a/split_exon_realign_jobs.py b/split_exon_realign_jobs.py
index f6e6901..e100ca1 100755
--- a/split_exon_realign_jobs.py
+++ b/split_exon_realign_jobs.py
@@ -31,7 +31,7 @@
 WRAPPER_TEMPLATE = (
     WRAPPER_ABSPATH
     + " {0} {1} {2} {3} {4} {5} --cesar_binary {6}"
-    + " --uhq_flank {7} --memlim {8}"
+    + " --uhq_flank {7}"
 )
 CESAR_RUNNER = os.path.abspath(
     os.path.join(LOCATION, "cesar_runner.py")
@@ -40,7 +40,7 @@
     "GGLOB",
     "TRANS",
 }  # chain classes that could lead to very long query loci
-BIGMEM_LIM = 1000  # mem limit for bigmem partition
+BIGMEM_LIM = 500  # mem limit for bigmem partition
 REL_LENGTH_THR = 50
 ABS_LENGTH_TRH = 1000000
 EXTRA_MEM = 100000  # extra memory "just in case"
@@ -59,6 +59,10 @@
 PROJECTION = "PROJECTION"
 TRANSCRIPT = "TRANSCRIPT"
 
+MEM_FIT = "MEM_FIT"
+MEM_BIGMEM = "MEM_BIGMEM"
+MEM_DONTFIT = "MEM_DONTFIT"
+
 # connect shared lib; define input and output data types
 chain_coords_conv_lib_path = os.path.join(
     LOCATION, "modules", "chain_coords_converter_slib.so"
@@ -191,6 +195,12 @@ def parse_args():
         default=None,
         help="Memory consumption was already precomputed",
     )
+    app.add_argument(
+        "--precomp_regions_data_dir",
+        default=None,
+        help=("Directory containing files with precomputed regions. "
+              "Likely is not needed for standalone script scenario. ")
+    )
     app.add_argument(
         "--predefined_glp_class_path",
         default=None,
@@ -198,6 +208,28 @@ def parse_args():
         "(i) Projections with too short query region (L or M) and "
         "(ii) Projections with very long query region (M)",
     )
+    app.add_argument(
+        "--unprocessed_log",
+        "--unp",
+        default=None,
+        help="Store unprocessed genes in a separate file."
+    )
+    app.add_argument(
+        "--debug",
+        "-d",
+        action="store_true",
+        dest="debug",
+        help="Log debugging data"
+    )
+    app.add_argument(
+        "--mask_all_first_10p",
+        "--m_f10p",
+        action="store_true",
+        dest="mask_all_first_10p",
+        help="Automatically mask all inactivating mutations in first 10% of "
+             "the reading frame, ignoring ATG codons distribution. "
+             "(Default mode in V1.0, not recommended to use in later versions)"
+    )
     # print help if there are no args
     if len(sys.argv) < 2:
         app.print_help()
@@ -495,7 +527,8 @@ def fill_buckets(buckets, all_jobs):
         # memlim[5] -> jobs that require <= 5Gb
         # memlim[10] -> jobs that require > 5Gb AND <= 10Gb
         buckets[memlim] = [
-            job for job, job_mem in all_jobs.items() if prev_lim < job_mem <= memlim
+            f"{job} --memlim {memlim}" for job, job_mem in all_jobs.items()
+            if prev_lim < job_mem <= memlim
         ]
         prev_lim = memlim
     # remove empty
@@ -507,10 +540,17 @@ def save_jobs(filled_buckets, bucket_jobs_num, jobs_dir):
     """Save cesar calls in the dir assigned."""
     os.mkdir(jobs_dir) if not os.path.isdir(jobs_dir) else None
     file_num, to_combine = 0, []
+    bucket_saved = {k: True for k in filled_buckets.keys()}
+
     for bucket_id, jobs in filled_buckets.items():
         num_of_files = bucket_jobs_num[bucket_id]
         # just in case
         num_of_files = len(jobs) if num_of_files >= len(jobs) else num_of_files
+        if num_of_files == 0:
+            # avoiding zero division error
+            bucket_saved[bucket_id] = False
+            print(f"Warning! No files to save jobs for bucket {bucket_id}")
+            continue
         size_of_file = len(jobs) // num_of_files
         # size_of_file = size_of_file + 1 if len(jobs) % num_of_files != 0 else size_of_file
         jobs_split = parts(jobs, n=size_of_file)
@@ -522,6 +562,11 @@ def save_jobs(filled_buckets, bucket_jobs_num, jobs_dir):
             f.write("\n".join(part) + "\n")
             f.close()
             to_combine.append(file_path)
+    # check if anything saved
+    if all(x is False for x in bucket_saved.values()):
+        print("Could not create any CESAR job. Probably, ALL genes require much more memory than the available.")
+        print("If this result is unexpected, please contact the developers.")
+        sys.exit(1)
     return to_combine
 
 
@@ -551,7 +596,13 @@ def save_bigmem_jobs(bigmem_joblist, jobs_dir):
 
 
 def save_combined_joblist(
-    to_combine, combined_file, results_dir, inact_mut_dat, rejected_log, name=""
+    to_combine,
+    combined_file,
+    results_dir,
+    inact_mut_dat,
+    rejected_log,
+    unproc_log,
+    name=""
 ):
     """Save joblist of joblists (combined joblist)."""
     f = open(combined_file, "w")
@@ -565,6 +616,9 @@ def save_combined_joblist(
         if rejected_log:
             log_path = os.path.join(rejected_log, f"{basename}.txt")
             combined_command += f" --rejected_log {log_path}"
+        if unproc_log:
+            log_path = os.path.join(unproc_log, f"{basename}.txt")
+            combined_command += f" --unproc_log {log_path}"
         f.write(combined_command + "\n")
     f.close()
 
@@ -603,6 +657,133 @@ def read_precomp_mem(precomp_file):
     return ret
 
 
+def get_trans_to_regions_file(precomp_regions_data_dir):
+    ret = {}
+    if precomp_regions_data_dir is None:
+        return ret
+    files_in = os.listdir(precomp_regions_data_dir)
+    for filename in files_in:
+        path = os.path.abspath(os.path.join(precomp_regions_data_dir, filename))
+        f = open(path, "r")
+        transcripts_in_file = set(x.split("\t")[0] for x in f)
+        f.close()
+        for t in transcripts_in_file:
+            ret[t] = path
+    return ret
+
+
+def build_job(gene, chains_arg, args, gene_fragments, trans_to_reg_precomp_file, u12_this_gene, mask_all_first_10p=False):
+    """Build CESAR job."""
+    # # 0 gene; 1 chains; 2 bed_file; 3 bdb chain_file; 4 tDB; 5 qDB; 6 output; 7 cesar_bin
+    job = WRAPPER_TEMPLATE.format(
+        gene,
+        chains_arg,
+        os.path.abspath(args.bdb_bed_file),
+        os.path.abspath(args.bdb_chain_file),
+        os.path.abspath(args.tDB),
+        os.path.abspath(args.qDB),
+        os.path.abspath(args.cesar_binary),
+        args.uhq_flank,
+        # gig,
+    )
+    # add some flags if required
+    job = job + " --mask_stops" if args.mask_stops else job
+    job = job + " --check_loss" if args.check_loss else job
+    job = job + " --no_fpi" if args.no_fpi else job
+    job = job + " --fragments" if gene_fragments else job
+    job = job + " --opt_cesar" if args.opt_cesar else job
+    job = job + " --alt_frame_del"  # TODO: toga master script parameter
+
+    precomp_file = trans_to_reg_precomp_file.get(gene)
+    job = job + f" --predefined_regions {precomp_file}" if precomp_file else job
+
+    # add U12 introns data if this gene has them:
+    job = job + f" --u12 {os.path.abspath(args.u12)}" if u12_this_gene else job
+
+    # add mask_all_first_10p flag if needed
+    job = job + f" --mask_all_first_10p" if mask_all_first_10p else job
+    return job
+
+
+def compute_ref_part_of_mem(block_sizes):
+    """Compute num_states and r_length."""
+    num_states, r_length = 0, 0
+    for block_size in block_sizes:
+        # num_states += 6 + 6 * reference->num_codons + 1 + 2 + 2 + 22 + 6;
+        #  /* 22 and 6 for acc and donor states */
+        num_codons = block_size // 3
+        num_states += 6 + 6 * num_codons + 1 + 2 + 2 + 22 + 6
+        # r_length += 11 + 6 * fasta.references[i]->length
+        # + donors[i]->length + acceptors[i]->length;
+        r_length += block_size
+    return num_states, r_length
+
+
+def compute_memory_gig_for_qlen(num_states, r_length, q_length_max):
+    memory = (
+        (num_states * 4 * 8)
+        + (num_states * q_length_max * 4)
+        + (num_states * 304)
+        + (2 * q_length_max + r_length) * 8
+        + (q_length_max + r_length) * 2 * 1
+        + EXTRA_MEM
+    )
+    gig = math.ceil(memory / 1000000000) + 0.25
+    return gig
+
+
+def _get_chain_arg_and_gig_arg(chains_list, chain_to_mem):
+    gig_arg = max([chain_to_mem[c] for c in chains_list])
+    return gig_arg
+
+
+def compute_memory(chains, precomp_gig, block_sizes, gene_chains_data, gene_fragments, mem_limit):
+    """Compute memory requirements for different chains."""
+    if precomp_gig:
+        # chains arg: just all chains, everything is precomputed
+        return {tuple(chains): (precomp_gig, MEM_FIT)}
+    # proceed to memory estimation
+    # the same procedure as inside CESAR2.0 code
+    # required memory depends on numerous params
+    # first, we need reference transcript-related parameters
+    # query-related parameters will be later
+    num_states, r_length = compute_ref_part_of_mem(block_sizes)
+
+    # branch 2: fragmented gene, here we have to use sum of query lengts
+    if gene_fragments: 
+        # in case of fragmented genome: we stitch queries together
+        # so query length = sum of all queries
+        q_length_max = sum([v for v in gene_chains_data.values()])
+        gig = compute_memory_gig_for_qlen(num_states, r_length, q_length_max)
+        return {tuple(chains): (gig, MEM_FIT)}
+
+    # branch 3, maybe the most common one
+    ret = {}
+    chain_to_mem_consumption = {}
+    for chain_id, q_length in gene_chains_data.items():
+        chain_gig = compute_memory_gig_for_qlen(num_states, q_length, q_length)
+        chain_to_mem_consumption[chain_id] = chain_gig
+    # for every chain, we have the memory consumption
+    chains_that_fit = [c_id for c_id, gig in chain_to_mem_consumption.items() if gig <= mem_limit]
+    chain_that_goto_bigmem = [c_id for c_id, gig in chain_to_mem_consumption.items() if mem_limit < gig <= BIGMEM_LIM]
+    chains_that_dont_fit = [c_id for c_id, gig in chain_to_mem_consumption.items() if gig > BIGMEM_LIM]
+
+    # process each bucket of chains individually
+    if len(chains_that_fit) > 0:
+        # good, there are some chains that can be easily processed
+        mem = _get_chain_arg_and_gig_arg(chains_that_fit, chain_to_mem_consumption)
+        ret[tuple(chains_that_fit)] = (mem, MEM_FIT)
+    if len(chain_that_goto_bigmem) > 0:
+        # there are some bigmem jobs -> to be executed separately
+        # Deprecated branch, TODO: check whether it makes sense nowadays
+        mem = _get_chain_arg_and_gig_arg(chain_that_goto_bigmem, chain_to_mem_consumption)
+        ret[tuple(chain_that_goto_bigmem)] = (mem, MEM_BIGMEM)
+    if len(chains_that_dont_fit) > 0:
+        mem = _get_chain_arg_and_gig_arg(chains_that_dont_fit, chain_to_mem_consumption)
+        ret[tuple(chains_that_dont_fit)] = (mem, MEM_DONTFIT)
+    return ret
+
+
 def main():
     """Entry point."""
     t0 = dt.now()
@@ -618,6 +799,10 @@ def main():
 
     # if memory is precomputed: use it
     precomp_mem = read_precomp_mem(args.precomp_memory_data)
+    # precomp_regions = read_precomp_reg(args.precomp_regions_data)
+
+    # TODO: to optimize later
+    trans_to_reg_precomp_file = get_trans_to_regions_file(args.precomp_regions_data_dir)
     # get lists of orthologous chains per each gene
     # skipped_1 - no chains found -> log them
     predefined_glp_class = {}  # for projections which are M and L without CESAR
@@ -658,6 +843,7 @@ def main():
         args.qDB,
     )
     predefined_glp_class.update(predef_glp)
+    predef_glp_class__chains_step = {}
 
     # start making the jobs
     all_jobs = {}
@@ -667,8 +853,8 @@ def main():
     for gene in batch.keys():
         u12_this_gene = u12_data.get(gene)
         block_sizes = bed_data[gene][3]
-
         gene_chains_data = regions.get(gene)
+
         # check that there is something for this gene
         if not gene_chains_data:
             continue
@@ -681,94 +867,47 @@ def main():
             gene_chains_data = {
                 k: v for k, v in gene_chains_data.items() if k in gene_fragments
             }
+
         chains = gene_chains_data.keys()
         if len(chains) == 0:
             continue
-        chains_arg = ",".join(chains)  # chain ids -> one of the cmd args
-
-        # if memory is precomputed then use it
         precomp_gig = precomp_mem.get(gene, None)
-        if precomp_gig is None:
-            # proceed to memory estimation
-            # the same procedure as inside CESAR2.0 code
-            num_states, r_length = 0, 0
-
-            # required memory depends on numerous params
-            # first, we need reference transcript-related parameters
-            # query-related parameters will be later
-            for block_size in block_sizes:
-                # num_states += 6 + 6 * reference->num_codons + 1 + 2 + 2 + 22 + 6;
-                #  /* 22 and 6 for acc and donor states */
-                num_codons = block_size // 3
-                num_states += 6 + 6 * num_codons + 1 + 2 + 2 + 22 + 6
-                # r_length += 11 + 6 * fasta.references[i]->length
-                # + donors[i]->length + acceptors[i]->length;
-                r_length += block_size
-
-            # now compute query sequence-related parameters
-            query_lens = [v for v in gene_chains_data.values()]
-            if (
-                gene_fragments
-            ):  # in case of fragmented genome: we stitch queries together
-                # so query length = sum of all queries
-                q_length_max = sum(query_lens)
-            else:  # not fragmented genome: processins queries separately
-                # thus we need only the max length
-                q_length_max = max(query_lens)
-            # and now compute the amount of required memory
-            memory = (
-                (num_states * 4 * 8)
-                + (num_states * q_length_max * 4)
-                + (num_states * 304)
-                + (2 * q_length_max + r_length) * 8
-                + (q_length_max + r_length) * 2 * 1
-                + EXTRA_MEM
-            )
-            gig = math.ceil(memory / 1000000000) + 0.25
-        else:
-            # memory was precomputed
-            gig = precomp_gig
-
-        # gig = compute_amount_of_memory(block_sizes, q_length_max, args.opt_cesar)
-        # # 0 gene; 1 chains; 2 bed_file; 3 bdb chain_file; 4 tDB; 5 qDB; 6 output; 7 cesar_bin
-        job = WRAPPER_TEMPLATE.format(
-            gene,
-            chains_arg,
-            os.path.abspath(args.bdb_bed_file),
-            os.path.abspath(args.bdb_chain_file),
-            os.path.abspath(args.tDB),
-            os.path.abspath(args.qDB),
-            os.path.abspath(args.cesar_binary),
-            args.uhq_flank,
-            gig,
-        )
-        # add some flags if required
-        job = job + " --mask_stops" if args.mask_stops else job
-        job = job + " --check_loss" if args.check_loss else job
-        job = job + " --no_fpi" if args.no_fpi else job
-        job = job + " --fragments" if gene_fragments else job
-        job = job + " --opt_cesar" if args.opt_cesar else job
-
-        # add U12 introns data if this gene has them:
-        job = job + f" --u12 {os.path.abspath(args.u12)}" if u12_this_gene else job
-
-        # define whether it's an ordinary or a bigmem job
-        # depending on the memory requirements
-        if gig <= mem_limit:  # ordinary job
-            all_jobs[job] = gig
-        elif gig <= BIGMEM_LIM:
-            skipped_3.append((gene, ",".join(chains), f"requires {gig}) -> bigmem job"))
-            predef_glp[gene] = f"{TRANSCRIPT}\tM"
-            bigmem_jobs.append(job)
-        else:
-            skipped_3.append(
-                (
-                    gene,
-                    ",".join(chains),
-                    f"big mem limit ({BIGMEM_LIM} gig) exceeded (needs {gig})",
-                )
-            )
-            predef_glp[gene] = f"{TRANSCRIPT}\tM"
+        chain_arg_to_gig = compute_memory(chains,
+                                          precomp_gig,
+                                          block_sizes,
+                                          gene_chains_data,
+                                          gene_fragments,
+                                          mem_limit)
+        for chains_tup, (gig, stat) in chain_arg_to_gig.items():
+            chains_arg = ",".join(chains_tup)
+            job = build_job(gene,
+                            chains_arg,
+                            args,
+                            gene_fragments,
+                            trans_to_reg_precomp_file,
+                            u12_this_gene,
+                            mask_all_first_10p=args.mask_all_first_10p)
+
+            # define whether it's an ordinary or a bigmem job
+            # depending on the memory requirements
+            if stat == MEM_FIT:  # ordinary job
+                all_jobs[job] = gig
+            elif stat == MEM_BIGMEM:
+                to_app = (gene, chains_arg, f"requires {gig}) -> bigmem job")
+                skipped_3.append(to_app)
+                bigmem_jobs.append(job)
+                for chain_id in chains_tup:
+                    proj_id = f"{gene}.{chain_id}"
+                    predef_glp_class__chains_step[proj_id] = f"{PROJECTION}\tM"
+            else:
+                to_app = (gene, chains_arg, f"big mem limit ({BIGMEM_LIM} gig) exceeded (needs {gig})",)
+                skipped_3.append(to_app)
+                for chain_id in chains_tup:
+                    proj_id = f"{gene}.{chain_id}"
+                    predef_glp_class__chains_step[proj_id] = f"{PROJECTION}\tM"
+    
+    # TODO: predefined GLP classes to be refactored 
+    predefined_glp_class.update(predef_glp_class__chains_step)
 
     eprint(f"\nThere are {len(all_jobs.keys())} jobs in total.")
     eprint("Splitting the jobs.")
@@ -796,7 +935,12 @@ def main():
 
     # save joblist of joblists
     save_combined_joblist(
-        to_combine, args.combined, args.results, args.check_loss, args.rejected_log
+        to_combine,
+        args.combined,
+        args.results,
+        args.check_loss,
+        args.rejected_log,
+        args.unprocessed_log
     )
 
     # save bigmem jobs, a bit different logic
@@ -808,6 +952,7 @@ def main():
             args.results,
             args.check_loss,
             args.rejected_log,
+            None,  # TODO: decide what we do with this branch
             name="bigmem",
         )
 
diff --git a/supply/extract_codon_alignment.py b/supply/extract_codon_alignment.py
index e87b0ee..bb7016c 100644
--- a/supply/extract_codon_alignment.py
+++ b/supply/extract_codon_alignment.py
@@ -27,7 +27,6 @@
 # actually, it was nearly random CESAR alignment for this exon
 # v2.2 fixed a bug: wrong handling of completely deleted exons
 
-
 SEQ_NUMBER_LIMIT = 1500
 CESAR_RESULTS_FILE = "cesar_results.txt"
 EXON_SEQ_CLASSES = {"query_exon", "reference_exon"}
@@ -217,6 +216,22 @@ def parse_args():
         action="store_true",
         help="Do not consider UL projections as orthologous (NOT IMPLEMENTED YET)",
     )
+    app.add_argument(
+        "--force_repair",
+        "--fr",
+        dest="force_repair",
+        action="store_true",
+        help=("Force repair missing parts of the alignment. "
+              "Please use in case the script continuously fails to produce "
+              "the result. Can be needed in case of massive alignments with "
+              "abundant missing/corrupted sequence.")
+    )
+    app.add_argument(
+        "--save_aligner_commands",
+        default=None,
+        help="Save a sequence of MACSE commands to the specified location. "
+             "Temporary files will not be deleted!"
+    )
 
     # if no args: print help message
     if len(sys.argv) < MIN_ARG_NUM:
@@ -610,13 +625,16 @@ def macse_alignment(in_fasta, temp_dir, macse_caller, v=False):
         )
         cmd = f"{macse_caller} -prog alignSequences -seq {STDIN} -out_NT {out_file} -out_AA {to_del_file}"
         if v:
-            print_stderr("Aligning:")
-            print_stderr(in_fasta)
+            print_stderr("Calling:\n")
+            print_stderr(cmd)
+            # print_stderr("Aligning:")
+            # print_stderr(in_fasta)
         p = subprocess.Popen(cmd, stdin=PIPE, stderr=PIPE, stdout=PIPE, shell=True)
         _, stderr_ = p.communicate(input=in_fasta.encode())
         rc = p.returncode
         if rc != 0:  # MACSE crashed
             print_stderr("# Error! Macse CRASHED")
+            # print_stderr(f"Input file: {in_fasta}")
             err_msg = stderr_.decode(UTF_8)
             print_stderr(err_msg)
             sys.exit(1)
@@ -742,19 +760,20 @@ def split_into_exons(sp_to_codon_alis, codon_to_exon, codon_to_seq, debug=False)
     ret = {}
     ali_id_to_seqs = defaultdict(dict)
     codon_tot_num_ = len(codon_to_exon)
-    if debug:
-        print_stderr("Codon to exon_dict")
-        print_stderr(codon_to_exon)
-        print_stderr(f"In total {codon_tot_num_} codons")
+    # if debug:
+        # TODO: add to level 2 verbosity
+        # print_stderr("Codon to exon_dict")
+        # print_stderr(codon_to_exon)
+        # print_stderr(f"In total {codon_tot_num_} codons")
 
-        print_stderr(f"Codon sequences:\n{codon_to_seq}")
+        # print_stderr(f"Codon sequences:\n{codon_to_seq}")
     for seq_id, seq in sp_to_codon_alis.items():
         sp, proj_id, is_ref = seq_id
         key_ = (sp, proj_id)
         ali_id_to_seqs[key_][is_ref] = seq
 
     for k, v in ali_id_to_seqs.items():
-        print_stderr(f"processing {k}") if debug else None
+        # print_stderr(f"processing {k}") if debug else None
         ref_codons = v[True].split()
         que_codons = v[False].split()
         codon_num = len(ref_codons)
@@ -767,13 +786,13 @@ def split_into_exons(sp_to_codon_alis, codon_to_exon, codon_to_seq, debug=False)
         for i_num, (r_c, q_c) in enumerate(zip(ref_codons, que_codons)):
             exon_num = codon_to_exon[ref_codon_num]
             debug_line = f"{k}: extracting codon {ref_codon_num} out of {codon_tot_num_} | exon {exon_num}"
-            print_stderr(debug_line) if debug else None
+            # print_stderr(debug_line) if debug else None
 
             expected_ref_codon = codon_to_seq[ref_codon_num]
             debug_line = (
                 f"{r_c} -> {expected_ref_codon} {q_c} {ref_codon_num} {exon_num}"
             )
-            print_stderr(debug_line) if debug else None
+            # print_stderr(debug_line) if debug else None
             ref_is_gap = r_c == CODON_GAP
             ref_is_XXX = r_c == CODON_XXX
             ref_has_N = "N" in r_c
@@ -781,7 +800,7 @@ def split_into_exons(sp_to_codon_alis, codon_to_exon, codon_to_seq, debug=False)
             ref_is_undefined = ref_is_gap or ref_is_XXX or ref_has_N
 
             debug_line = f"# ref gap: {ref_is_gap} ref unknown: {ref_is_XXX} ref expected: {ref_is_exp}"
-            print_stderr(debug_line) if debug else None
+            # print_stderr(debug_line) if debug else None
 
             if not ref_is_gap:
                 ref_codon_num += 1
@@ -790,12 +809,12 @@ def split_into_exons(sp_to_codon_alis, codon_to_exon, codon_to_seq, debug=False)
                 and ref_is_undefined is False
                 and exp_codon_lock is False
             ):
-                print_stderr(f"Warning! {k}: Fixing ref codon_num..")
+                # print_stderr(f"Warning! {k}: Fixing ref codon_num..")
                 wrong_codon_num = ref_codon_num
                 ref_codon_num = fix_codon_num(
                     wrong_codon_num, expected_ref_codon, codon_to_seq
                 )
-                print_stderr(f"Corrected from {wrong_codon_num} to {ref_codon_num}")
+                # print_stderr(f"Corrected from {wrong_codon_num} to {ref_codon_num}")
 
             codon_pair = (r_c, q_c)
             exon_to_sequences[exon_num].append(codon_pair)
@@ -804,7 +823,7 @@ def split_into_exons(sp_to_codon_alis, codon_to_exon, codon_to_seq, debug=False)
                 # this will cause an index error
                 # need to handle this overflow correctrly
                 exp_codon_lock = True
-                print_stderr(f"Warning! {k}: Codon sequence overflow...")
+                # print_stderr(f"Warning! {k}: Codon sequence overflow...")
                 ref_codon_num = codon_tot_num_ - 1
 
         ret[k] = exon_to_sequences
@@ -816,19 +835,28 @@ def reformat_codon_alis(sp_to_exon_codon_alis, all_exons):
     ret = defaultdict(dict)
     for (sp, proj_id), v in sp_to_exon_codon_alis.items():
         for exon in all_exons:
-            exon_related_seq = v[exon]
-            ref_codons = [x[0] for x in exon_related_seq]
-            que_codons = [x[1] for x in exon_related_seq]
-            ref_seq = "".join(ref_codons)
-            que_seq = "".join(que_codons)
+            exon_related_seq = v.get(exon, None)
             ref_seq_key = (sp, proj_id, True)
             que_seq_key = (sp, proj_id, False)
-            ret[exon][ref_seq_key] = ref_seq
-            ret[exon][que_seq_key] = que_seq
+
+            if exon_related_seq:
+                ref_codons = [x[0] for x in exon_related_seq]
+                que_codons = [x[1] for x in exon_related_seq]
+                ref_seq = "".join(ref_codons)
+                que_seq = "".join(que_codons)
+                ret[exon][ref_seq_key] = ref_seq
+                ret[exon][que_seq_key] = que_seq
+            else:
+                # print(sp_to_exon_codon_alis)
+                # print(f"Error here, cannot find key: {exon}")
+                # print(v)
+                # sys.exit(1)
+                ret[exon][ref_seq_key] = "NNN"
+                ret[exon][que_seq_key] = "NNN"
     return ret
 
 
-def merge_exon_fastas(fastas_list, fasta_headers):
+def merge_exon_fastas(fastas_list, fasta_headers, force_repair=False):
     """Merge per-exon fastas."""
     id_to_chunks = defaultdict(list)
     fasta_lines = []
@@ -1221,6 +1249,7 @@ def main():
             )
     # ALIGNING EXON-BY-EXON
     else:
+        commands_sequence = []
         fasta_headers_all = set([f"{x[0]}\t{x[1]}" for x in sp_to_codon_alis.keys()])
         # we like to align exons separately, first of all get
         # codon number -> exon number mapping
@@ -1260,7 +1289,9 @@ def main():
                 aligned_fasta_exon, f"aligned_exon_{exon}", args.intermediate_data
             )
             aligned_fastas.append(aligned_fasta_exon)
-        aligned_fasta = merge_exon_fastas(aligned_fastas, fasta_headers_all)
+        aligned_fasta = merge_exon_fastas(aligned_fastas,
+                                          fasta_headers_all,
+                                          force_repair=args.force_repair)
 
     f = open(args.output, "w") if args.output else sys.stdout
     f.write(aligned_fasta)
diff --git a/toga.py b/toga.py
index c61df23..e68931c 100755
--- a/toga.py
+++ b/toga.py
@@ -23,8 +23,8 @@
 from modules.merge_chains_output import merge_chains_output
 from modules.make_pr_pseudogenes_anno import create_ppgene_track
 from modules.merge_cesar_output import merge_cesar_output
-from modules.gene_losses_summary import I, gene_losses_summary
-from modules.orthology_type_map import orthology_type_map, trim_prefix
+from modules.gene_losses_summary import gene_losses_summary
+from modules.orthology_type_map import orthology_type_map
 from modules.classify_chains import classify_chains
 from modules.get_transcripts_quality import classify_transcripts
 from modules.make_query_isoforms import get_query_isoforms_data
@@ -57,10 +57,19 @@
 CESAR_RUNNER_TMP = "{0} {1} {2} --check_loss {3} --rejected_log {4}"
 CESAR_PRECOMPUTED_REGIONS_DIRNAME = "cesar_precomputed_regions"
 CESAR_PRECOMPUTED_MEMORY_DIRNAME = "cesar_precomputed_memory"
+CESAR_PRECOMPUTED_ORTHO_LOCI_DIRNAME = "cesar_precomputed_orthologous_loci"
+
 CESAR_PRECOMPUTED_MEMORY_DATA = "cesar_precomputed_memory.tsv"
+CESAR_PRECOMPUTED_REGIONS_DATA = "cesar_precomputed_regions.tsv"
+CESAR_PRECOMPUTED_ORTHO_LOCI_DATA = "cesar_precomputed_orthologous_loci.tsv"
+
+NUM_CESAR_MEM_PRECOMP_JUBS = 500
+
+
 TEMP_CHAIN_CLASS = "temp_chain_trans_class"
 MODULES_DIR = "modules"
 RUNNING = "RUNNING"
+CRASHED = "CRASHED"
 TEMP = "temp"
 
 # automatically enable flush
@@ -92,10 +101,11 @@ def __init__(self, args):
         else:  # if none: we cannot call os.path.abspath method
             self.nextflow_bigmem_config = None
         self.__check_nf_config()
+
         # to avoid crash on filesystem without locks:
-        os.environ["HDF5_USE_FILE_LOCKING"] = "FALSE"  # otherwise it could crash
+        os.environ["HDF5_USE_FILE_LOCKING"] = "FALSE"
         # temporary fix for DSL error in recent NF versions
-        os.environ["NXF_DEFAULT_DSL"] = "1"  
+        os.environ["NXF_DEFAULT_DSL"] = "1"
 
         chain_basename = os.path.basename(args.chain_input)
 
@@ -184,7 +194,11 @@ def __init__(self, args):
         self.cesar_binary = (
             self.DEFAULT_CESAR if not args.cesar_binary else args.cesar_binary
         )
+        self.opt_cesar_binary = os.path.abspath(
+            os.path.join(LOCATION, "cesar_input_optimiser.py")
+        )
         self.cesar_is_opt = args.using_optimized_cesar
+        self.output_opt_cesar_regions = args.output_opt_cesar_regions
         self.time_log = args.time_marks
         self.stop_at_chain_class = args.stop_at_chain_class
         self.rejected_log = os.path.join(self.wd, "genes_rejection_reason.tsv")
@@ -207,6 +221,7 @@ def __init__(self, args):
         self.keep_nf_logs = args.do_not_del_nf_logs
         self.exec_cesar_parts_sequentially = args.cesar_exec_seq
         self.ld_model_arg = args.ld_model
+        self.mask_all_first_10p = args.mask_all_first_10p
 
         self.cesar_ok_merged = (
             None  # Flag: indicates whether any cesar job BATCHES crashed
@@ -237,12 +252,20 @@ def __init__(self, args):
         self.gene_loss_data = os.path.join(self.temp_wd, "inact_mut_data")
         self.query_annotation = os.path.join(self.wd, "query_annotation.bed")
         self.loss_summ = os.path.join(self.wd, "loss_summ_data.tsv")
+        # directory to store intermediate files with technically unprocessable transcripts:
+        self.technical_cesar_err = os.path.join(self.temp_wd, "technical_cesar_err")
+        # unprocessed transcripts to be considered Missing:
+        self.technical_cesar_err_merged = os.path.join(
+            self.temp_wd, "technical_cesar_err.txt"
+        )
+
         self.bed_fragm_exons_data = os.path.join(
             self.temp_wd, "bed_fragments_to_exons.tsv"
         )
         self.precomp_mem_cesar = os.path.join(
             self.temp_wd, CESAR_PRECOMPUTED_MEMORY_DATA
         )
+        self.precomp_reg_dir = None
         self.cesar_mem_was_precomputed = False
         self.u12_arg = args.u12
         self.u12 = None  # assign after U12 file check
@@ -517,14 +540,14 @@ def __check_isoforms_file(self, t_in_bed):
             f.write(f"{gene}\t{trans}\n")
         print("Isoforms file is OK")
 
-    @staticmethod
-    def die(msg, rc=1):
+    def die(self, msg, rc=1):
         """Show msg in stderr, exit with the rc given."""
         print(msg)
         print(f"Program finished with exit code {rc}\n")
         # for t_file in self.temp_files:  # remove temp files if required
         #     os.remove(t_file) if os.path.isfile(t_file) and not self.keep_temp else None
         #     shutil.rmtree(t_file) if os.path.isdir(t_file) and not self.keep_temp else None
+        self.__mark_crashed()
         sys.exit(rc)
 
     def __modules_addr(self):
@@ -695,7 +718,7 @@ def __call_proc(self, cmd, extra_msg=None):
         rc = subprocess.call(cmd, shell=True)
         if rc != 0:
             print(extra_msg) if extra_msg else None
-            self.die(f"Error! Process {cmd} died! Abort.")
+            self.die(f"Error! Process:\n{cmd}\ndied! Abort.")
         print(f"{cmd} done with code 0")
 
     def __find_two_bit(self, db):
@@ -770,7 +793,7 @@ def run(self):
         # 9) classify projections/genes as lost/intact
         # also measure projections confidence levels
         print("#### STEP 9: Gene loss pipeline classification\n")
-        self.__transcript_quality()
+        self.__transcript_quality()  # maybe remove -> not used anywhere
         self.__gene_loss_summary()
         self.__time_mark("Got gene loss summary")
 
@@ -793,7 +816,6 @@ def run(self):
             print(f"{self.cesar_crashed_batches_log}\n")
         print(f"Saved results to {self.wd}")
         self.__left_done_mark()
-        self.die(f"Done! Estimated time: {dt.now() - self.t0}", rc=0)
 
     def __mark_start(self):
         """Indicate that TOGA process have started."""
@@ -803,6 +825,16 @@ def __mark_start(self):
         f.write(f"TOGA process started at {now_}\n")
         f.close()
 
+    def __mark_crashed(self):
+        """Indicate that TOGA process died."""
+        running_f = os.path.join(self.wd, RUNNING)
+        crashed_f = os.path.join(self.wd, CRASHED)
+        os.remove(running_f) if os.path.isfile(running_f) else None
+        f = open(crashed_f, "w")
+        now_ = str(dt.now())
+        f.write(f"TOGA CRASHED AT {now_}\n")
+        f.close()
+
     def __make_indexed_chain(self):
         """Make chain index file."""
         # make *.bb file
@@ -920,7 +952,9 @@ def __classify_chains(self):
         self.pred_scores = os.path.join(self.temp_wd, "orthology_scores.tsv")
         self.se_model = os.path.join(self.LOCATION, "models", "se_model.dat")
         self.me_model = os.path.join(self.LOCATION, "models", "me_model.dat")
-        self.ld_model = os.path.join(self.LOCATION, "long_distance_model", "long_dist_model.dat")
+        self.ld_model = os.path.join(
+            self.LOCATION, "long_distance_model", "long_dist_model.dat"
+        )
         cl_rej_log = os.path.join(self.rejected_dir, "classify_chains_rejected.txt")
         ld_arg_ = self.ld_model if self.ld_model_arg else None
         if not os.path.isfile(self.se_model) or not os.path.isfile(self.me_model):
@@ -933,7 +967,7 @@ def __classify_chains(self):
             rejected=cl_rej_log,
             raw_out=self.pred_scores,
             annot_threshold=self.orth_score_threshold,
-            ld_model=ld_arg_
+            ld_model=ld_arg_,
         )
         # extract not classified transcripts
         # first column in the rejected log
@@ -968,6 +1002,128 @@ def __split_file(src, dst_dir, pieces_num):
             f.write("".join(piece))
             f.close()
         return paths_to_pieces
+    
+    def __get_transcript_to_strand(self):
+        """Get """
+        ret = {}
+        f = open(self.ref_bed, 'r')
+        for line in f:
+            ld = line.rstrip().split("\t")
+            trans = ld[3]
+            direction = ld[5]
+            ret[trans] = direction
+        f.close()
+        return ret
+
+    def __get_chain_to_qstrand(self):
+        ret = {}
+        f = open(self.chain_file, "r")
+        for line in f:
+            if not line.startswith("chain"):
+                continue
+            fields = line.rstrip().split()
+            chain_id = int(fields[-1])
+            q_strand = fields[9]
+            ret[chain_id] = q_strand
+        f.close()
+        return ret
+
+    def __fold_exon_data(self, exons_data, out_bed):
+        """Convert exon data into bed12."""
+        projection_to_exons = defaultdict(list)
+        # 1: make projection:
+        f = open(exons_data, "r")
+        for line in f:
+            ld = line.rstrip().split("\t")
+            transcript, _chain, _start, _end = ld
+            chain = int(_chain)
+            start = int(_start)
+            end = int(_end)
+            region = (start, end)
+            projection_to_exons[(transcript, chain)].append(region)
+        # 2 - get search loci for each projection
+        projection_to_search_loc = {}
+
+        # CESAR_PRECOMPUTED_ORTHO_LOCI_DATA = "cesar_precomputed_orthologous_loci.tsv"
+        f = open(self.precomp_query_loci_path, "r")
+        for elem in f:
+            elem_data = elem.split("\t")
+            transcript = elem_data[1]
+            chain = int(elem_data[2])
+            projection = f"{transcript}.{chain}"
+            search_locus = elem_data[3]
+            chrom, s_e = search_locus.split(":")
+            s_e_split = s_e.split("-")
+            start, end = int(s_e_split[0]), int(s_e_split[1])
+            projection_to_search_loc[(transcript, chain)] = (chrom, start, end)
+        f.close()
+        trans_to_strand = self.__get_transcript_to_strand()
+        chain_to_qstrand = self.__get_chain_to_qstrand()
+        # 3 - save bed 12
+        f = open(out_bed, "w")
+        # print(projection_to_search_loc)
+        for (transcript, chain), exons in projection_to_exons.items():
+            # print(transcript, chain)
+            projection = f"{transcript}.{chain}"
+            trans_strand = trans_to_strand[transcript]
+            chain_strand = chain_to_qstrand[chain]
+            to_invert = trans_strand != chain_strand
+
+            exons_sort = sorted(exons, key=lambda x: x[0])
+            if to_invert:
+                exons_sort = exons_sort[::-1]
+
+            # crash here
+            search_locus = projection_to_search_loc[(transcript, chain)]
+            chrom = search_locus[0]
+            search_start = search_locus[1]
+            search_end = search_locus[2]
+
+            if to_invert:
+                bed_start = search_end - exons_sort[0][1]
+                bed_end = search_end - exons_sort[-1][0]
+            else:
+                bed_start = exons_sort[0][0] + search_start
+                bed_end = exons_sort[-1][1] + search_start
+
+            block_sizes, block_starts = [], []
+            for exon in exons_sort:
+                abs_start_in_s, abs_end_in_s = exon
+                # print(exon)
+
+                if to_invert:
+                    abs_start = search_end - abs_end_in_s
+                    abs_end = search_end - abs_start_in_s
+                else:
+                    abs_start = abs_start_in_s + search_start
+                    abs_end = abs_end_in_s + search_start
+
+                # print(abs_start, abs_end)
+
+                rel_start = abs_start - bed_start
+                block_size = abs_end - abs_start
+                block_sizes.append(block_size)
+                block_starts.append(rel_start)
+            block_sizes_field = ",".join(map(str, block_sizes))
+            block_starts_field = ",".join(map(str, block_starts))
+            all_fields = (
+                chrom,
+                bed_start,
+                bed_end,
+                projection,
+                0,
+                0,
+                bed_start,
+                bed_end,
+                "0,0,0",
+                len(exons),
+                block_sizes_field,
+                block_starts_field,
+            )
+            f.write("\t".join(map(str, all_fields)))
+            f.write("\n")
+        f.close()
+        f.close()
 
     def __precompute_data_for_opt_cesar(self):
         """Precompute memory data for optimised CESAR.
@@ -977,28 +1133,56 @@ def __precompute_data_for_opt_cesar(self):
         """
         if not self.cesar_is_opt:
             return  # in case we use standard CESAR this procedure is not needed
+
+        # need gene: chains
+        # artificial bed file with all possible exons per gene, maybe the longest if intersection... or not?
+        # what if there is a giant exon that does not align but smaller versions do?
         print("COMPUTING MEMORY REQUIREMENTS FOR OPTIMISED CESAR")
         mem_dir = os.path.join(self.wd, CESAR_PRECOMPUTED_MEMORY_DIRNAME)
+        self.precomp_reg_dir = os.path.join(self.wd, CESAR_PRECOMPUTED_REGIONS_DIRNAME)
 
         chain_class_temp_dir = os.path.join(self.wd, TEMP_CHAIN_CLASS)
+        self.precomp_query_loci_dir = os.path.join(
+            self.wd, CESAR_PRECOMPUTED_ORTHO_LOCI_DIRNAME
+        )
+        self.precomp_query_loci_path = os.path.join(
+            self.temp_wd, CESAR_PRECOMPUTED_ORTHO_LOCI_DATA
+        )
+
         os.mkdir(mem_dir) if not os.path.isdir(mem_dir) else None
         os.mkdir(chain_class_temp_dir) if not os.path.isdir(
             chain_class_temp_dir
         ) else None
+        os.mkdir(self.precomp_reg_dir) if not os.path.isdir(
+            self.precomp_reg_dir
+        ) else None
+        os.mkdir(self.precomp_query_loci_dir) if not os.path.isdir(
+            self.precomp_query_loci_dir
+        ) else None
+
         self.temp_files.append(mem_dir)
         self.temp_files.append(chain_class_temp_dir)
+        self.temp_files.append(self.precomp_reg_dir)
+        self.temp_files.append(self.precomp_query_loci_dir)
+
         # split file containing gene: orthologous/paralogous chains into 100 pieces
-        class_pieces = self.__split_file(self.orthologs, chain_class_temp_dir, 100)
+        class_pieces = self.__split_file(
+            self.orthologs, chain_class_temp_dir, NUM_CESAR_MEM_PRECOMP_JUBS
+        )
         precompute_jobs = []
         # for nextflow abspath is better:
         precomp_abspath = os.path.abspath(self.PRECOMPUTE_OPT_CESAR_DATA)
-        cesar_bin_abspath = os.path.abspath(self.cesar_binary)
+        # cesar_bin_abspath = os.path.abspath(self.cesar_binary)
+
         # create joblist: one job per piece
         for num, class_piece in enumerate(class_pieces, 1):
-            out_path = os.path.join(mem_dir, f"part_{num}")
+            out_m_path = os.path.join(mem_dir, f"part_{num}")
+            out_reg_path = os.path.join(self.precomp_reg_dir, f"part_{num}")
+            out_ol_path = os.path.join(self.precomp_query_loci_dir, f"path_{num}")
+
             cmd = (
-                f"{precomp_abspath} {class_piece} {self.wd} {cesar_bin_abspath} "
-                f"{self.t_2bit} {self.q_2bit} {out_path}"
+                f"{precomp_abspath} {class_piece} {self.wd} {self.opt_cesar_binary} "
+                f"{self.t_2bit} {self.q_2bit} {out_m_path} --ro {out_reg_path} --ol {out_ol_path}"
             )
             precompute_jobs.append(cmd)
         # save joblist
@@ -1016,7 +1200,7 @@ def __precompute_data_for_opt_cesar(self):
         print(f"Project path: {project_path}")
 
         if self.para:  # run jobs with para, skip nextflow
-            cmd = f'para make {project_name} {precomp_mem_joblist} -q="short"'
+            cmd = f'para make {project_name} {precomp_mem_joblist} -q="shortmed"'
             print(f"Calling {cmd}")
             rc = subprocess.call(cmd, shell=True)
         else:  # calling jobs with nextflow
@@ -1038,8 +1222,18 @@ def __precompute_data_for_opt_cesar(self):
             shutil.rmtree(project_path) if os.path.isdir(project_path) else None
         # merge files and quit
         self.__merge_dir(mem_dir, self.precomp_mem_cesar)
+        # self.__merge_dir(precomp_reg_dir, self.precomp_reg_cesar)
         self.cesar_mem_was_precomputed = True
 
+        if self.output_opt_cesar_regions:
+            save_to = os.path.join(self.wd, "precomp_regions_for_cesar.bed")
+            exon_regions_df = os.path.join(self.wd, "precomp_regions_exons.tsv")
+            self.__merge_dir(self.precomp_reg_dir, exon_regions_df)
+            self.__merge_dir(self.precomp_query_loci_dir, self.precomp_query_loci_path)
+            self.__fold_exon_data(exon_regions_df, save_to)
+            print(f"Bed file containing precomputed regions is saved to: {save_to}")
+            sys.exit(0)
+
     def __split_cesar_jobs(self):
         """Call split_exon_realign_jobs.py."""
         if not self.t_2bit or not self.q_2bit:
@@ -1073,9 +1267,14 @@ def __split_cesar_jobs(self):
         self.cesar_combined = os.path.join(self.temp_wd, "cesar_combined")
         self.cesar_results = os.path.join(self.temp_wd, "cesar_results")
         self.cesar_bigmem_jobs = os.path.join(self.temp_wd, "cesar_bigmem")
+
         self.temp_files.append(self.cesar_jobs_dir)
         self.temp_files.append(self.cesar_combined)
         self.temp_files.append(self.predefined_glp_cesar_split)
+        self.temp_files.append(self.technical_cesar_err)
+        os.mkdir(self.technical_cesar_err) if not os.path.isdir(
+            self.technical_cesar_err
+        ) else None
 
         # different field names depending on --ml flag
 
@@ -1083,6 +1282,9 @@ def __split_cesar_jobs(self):
         self.temp_files.append(self.gene_loss_data)
         skipped_path = os.path.join(self.rejected_dir, "SPLIT_CESAR.txt")
         self.paralogs_log = os.path.join(self.temp_wd, "paralogs.txt")
+        cesar_binary_to_use = (
+            self.opt_cesar_binary if self.cesar_is_opt else self.cesar_binary
+        )
 
         split_cesar_cmd = (
             f"{self.SPLIT_EXON_REALIGN_JOBS} "
@@ -1099,15 +1301,17 @@ def __split_cesar_jobs(self):
             f"--chains_limit {self.cesar_chain_limit} "
             f"--skipped_genes {skipped_path} "
             f"--rejected_log {self.rejected_dir} "
-            f"--cesar_binary {self.cesar_binary} "
+            f"--cesar_binary {cesar_binary_to_use} "
             f"--paralogs_log {self.paralogs_log} "
             f"--uhq_flank {self.uhq_flank} "
-            f"--predefined_glp_class_path {self.predefined_glp_cesar_split}"
+            f"--predefined_glp_class_path {self.predefined_glp_cesar_split} "
+            f"--unprocessed_log {self.technical_cesar_err}"
         )
 
         if self.annotate_paralogs:  # very rare occasion
             split_cesar_cmd += f" --annotate_paralogs"
-
+        if self.mask_all_first_10p:
+            split_cesar_cmd += f" --mask_all_first_10p"
         # split_cesar_cmd = split_cesar_cmd + f" --cesar_binary {self.cesar_binary}" \
         #     if self.cesar_binary else split_cesar_cmd
         split_cesar_cmd = (
@@ -1131,6 +1335,7 @@ def __split_cesar_jobs(self):
             split_cesar_cmd += f" --fragments_data {fragm_dict_file}"
         if self.cesar_mem_was_precomputed:
             split_cesar_cmd += f" --precomp_memory_data {self.precomp_mem_cesar}"
+            split_cesar_cmd += f" --precomp_regions_data_dir {self.precomp_reg_dir}"
         self.__call_proc(split_cesar_cmd, "Could not split CESAR jobs!")
 
     def __get_cesar_jobs_for_bucket(self, comb_file, bucket_req):
@@ -1255,7 +1460,7 @@ def __run_cesar_jobs(self):
             nf_project_path = os.path.join(self.nextflow_dir, nf_project_name)
             project_paths.append(nf_project_path)
 
-             # create subprocess object
+            # create subprocess object
             if not self.para:  # create nextflow cmd
                 os.mkdir(nf_project_path) if not os.path.isdir(
                     nf_project_path
@@ -1271,7 +1476,7 @@ def __run_cesar_jobs(self):
                 if memory_mb > 0:
                     cmd += f" --memoryMb={memory_mb}"
                 p = subprocess.Popen(cmd, shell=True)
-           
+
             sys.stderr.write(f"Pushed cluster jobs with {cmd}\n")
 
             # wait for the process if calling processes sequentially
@@ -1389,8 +1594,11 @@ def __rebuild_crashed_jobs(self, crashed_jobs):
             chain_ids = self.__read_chain_arg(chains_arg)
             if chain_ids is None:
                 continue
-            memlim_arg_ind = elem_args.index(MEMLIM_ARG) + 1
-            mem_val = float(elem_args[memlim_arg_ind])
+            try:
+                memlim_arg_ind = elem_args.index(MEMLIM_ARG) + 1
+                mem_val = float(elem_args[memlim_arg_ind])
+            except ValueError:
+                mem_val = self.cesar_mem_limit
             bucket_lim = self.__get_bucket_val(mem_val, buckets)
 
             if FRAGM_ARG in elem_args:
@@ -1502,6 +1710,8 @@ def __check_cesar_completeness(self):
             p_objects.append(p)
             time.sleep(CESAR_PUSH_INTERVAL)
         self.__monitor_jobs(p_objects, project_paths, die_if_sc_1=True)
+        # TODO: maybe some extra sanity check
+
         # need to check whether anything crashed again
         crashed_twice = []
         for elem in err_log_files:
@@ -1521,6 +1731,19 @@ def __check_cesar_completeness(self):
         err_msg = f"Some CESAR jobs crashed twice, please check {crashed_log}; Abort"
         self.die(err_msg, 1)
 
+    def __append_technicall_err_to_predef_class(self, transcripts_path, out_path):
+        """Append file with predefined clasifications."""
+        if not os.path.isfile(transcripts_path):
+            # in this case, we don't have transcripts with tech error
+            # can simply quit the function
+            return
+        with open(transcripts_path, "r") as f:
+            transcripts_list = [x.rstrip() for x in f]
+        f = open(out_path, "a")
+        for elem in transcripts_list:
+            f.write(f"TRANSCRIPT\t{elem}\tM\n")
+        f.close()
+
     def __merge_cesar_output(self):
         """Merge CESAR output, save final fasta and bed."""
         print("Merging CESAR output to make fasta and bed files.")
@@ -1538,6 +1761,17 @@ def __merge_cesar_output(self):
             self.trash_exons,
             fragm_data=self.bed_fragm_exons_data,
         )
+
+        # need to merge files containing transcripts that were not processed
+        # for some technical reason, such as intersecting fragments in the query
+        self.__merge_dir(
+            self.technical_cesar_err, self.technical_cesar_err_merged, ignore_empty=True
+        )
+
+        self.__append_technicall_err_to_predef_class(
+            self.technical_cesar_err_merged, self.predefined_glp_cesar_split
+        )
+
         if len(all_ok) == 0:
             # there are no empty output files -> parsed without errors
             print("CESAR results merged")
@@ -1617,11 +1851,16 @@ def __orthology_type_map(self):
         )
 
     @staticmethod
-    def __merge_dir(dir_name, output):
+    def __merge_dir(dir_name, output, ignore_empty=False):
         """Merge all files in a directory into one."""
         files_list = os.listdir(dir_name)
-        if len(files_list) == 0:
+        if len(files_list) == 0 and ignore_empty is False:
             sys.exit(f"Error! {dir_name} is empty")
+        elif len(files_list) == 0 and ignore_empty is True:
+            # in this case we allow empty directories
+            # just remove the directory and return
+            shutil.rmtree(dir_name)
+            return
         buffer = open(output, "w")
         for filename in files_list:
             path = os.path.join(dir_name, filename)
@@ -1632,7 +1871,7 @@ def __merge_dir(dir_name, output):
             # buffer.write(lines)
             buffer.write(content)
         buffer.close()
-        shutil.rmtree(dir_name)
+        # shutil.rmtree(dir_name)
 
     def __check_crashed_cesar_jobs(self):
         """Check whether any CESAR jobs crashed.
@@ -1649,6 +1888,11 @@ def __check_crashed_cesar_jobs(self):
             if "CESAR" not in line:
                 # not related to CESAR
                 continue
+            if "fragment chains oevrlap" in line:
+                # they are not really "crashed"
+                # those jobs could not produce a meaningful result
+                # only intersecting exons
+                continue
             # extract cesar wrapper command from the log
             cesar_cmd = line.split("\t")[0]
             self.crashed_cesar_jobs.append(cesar_cmd)
@@ -1771,9 +2015,6 @@ def parse_args():
         default=None,
         help="Find orthologs " "for a single reference chromosome only",
     )
-    # app.add_argument("--limit_to_query_chrom", default=None, help="Annotate "
-    #                  "a particular query scaffold/chromosome only")
-    # nextflow related
     app.add_argument(
         "--nextflow_dir",
         "--nd",
@@ -1857,6 +2098,14 @@ def parse_args():
         dest="using_optimized_cesar",
         help="Instead of CESAR, use lastz-based optimized version",
     )
+    app.add_argument(
+        "--output_opt_cesar_regions",
+        "--oocr",
+        action="store_true",
+        dest="output_opt_cesar_regions",
+        help="If optimised CESAR is used, save the included regions, "
+        "and quit. The parameter is defined for debugging purposes only.",
+    )
     app.add_argument(
         "--mask_stops",
         "--ms",
@@ -1880,7 +2129,7 @@ def parse_args():
         action="store_true",
         dest="cesar_exec_seq",
         help="Execute different CESAR jobs partitions sequentially, "
-             "not in parallel."
+        "not in parallel.",
     )
     app.add_argument(
         "--cesar_chain_limit",
@@ -1946,13 +2195,30 @@ def parse_args():
         dest="annotate_paralogs",
         action="store_true",
         help="Annotate paralogs instead of orthologs. "
-             "(experimental feature for very specific needs)",
+        "(experimental feature for very specific needs)",
+    )
+    app.add_argument(
+        "--mask_all_first_10p",
+        "--m_f10p",
+        action="store_true",
+        dest="mask_all_first_10p",
+        help="Automatically mask all inactivating mutations in first 10 percent of "
+        "the reading frame, ignoring ATG codons distribution. "
+        "(Default mode in V1.0, not recommended to use in later versions)",
     )
     # print help if there are no args
     if len(sys.argv) < 2:
         app.print_help()
         sys.exit(0)
     args = app.parse_args()
+
+    # some sanity checks
+    if args.output_opt_cesar_regions and not args.using_optimized_cesar:
+        err_msg = (
+            "Error! Please use --output_opt_cesar_regions parameter "
+            " with --using_optimized_cesar flag only"
+        )
+        sys.exit(err_msg)
     return args
 
 
diff --git a/ucsc_browser_visualisation/bb_schema.as b/ucsc_browser_visualisation/bb_schema.as
new file mode 100644
index 0000000..d867477
--- /dev/null
+++ b/ucsc_browser_visualisation/bb_schema.as
@@ -0,0 +1,43 @@
+table togaBigBed
+"TOGA predicted gene model"
+(
+    string chrom;       "Reference sequence chromosome or scaffold"
+    uint   chromStart;  "Start position in chromosome"
+    uint   chromEnd;    "End position in chromosome"
+    string name;        "Name or ID of item, ideally both human readable and unique"
+    uint score;         "Score (0-1000)"
+    char[1] strand;     "+ or - for strand"
+    uint thickStart;    "Start of where display should be thick (start codon)"
+    uint thickEnd;      "End of where display should be thick (stop codon)"
+    uint itemRgb;       "RGB value (use R,G,B string in input file)"
+    int blockCount;     "Number of blocks"
+    int[blockCount] blockSizes; "Comma separated list of block sizes"
+    int[blockCount] chromStarts; "Start positions relative to chromStart"
+
+    string ref_trans_id;  "Reference transcript ID"
+    string ref_region; "Transcript region in the reference"
+    string query_region; "Region in the query"
+    float chain_score; "Chain orthology probability score"
+    
+    float chain_synteny; "Chain synteny log10 value"
+    float chain_flank; "Chain flank feature"
+    float chain_gl_cds_fract; "Chain global CDS fraction value"
+    float chain_loc_cds_fract; "Chain local CDS fraction value"
+    float chain_exon_cov; "Chain exon coverage value"
+    
+    float chain_intron_cov;  "Chain intron coverage value"
+    string status;  "Gene loss classification"
+    float perc_intact_ign_M;  "% intact ignoring missing"
+    float perc_intact_int_M;  "% intact considering missing as intact"
+    float intact_codon_prop;  "% intact codons"
+    
+    float ouf_prop;  "% out of chain"
+    string mid_intact;  "Is middle 80% intact"
+    string mid_pres;  "Is middle 80% fully present"
+    lstring prot_alignment; "HTML-formatted protein alignment"
+    lstring svg_line;  "SVG inactivating mutations visualization"
+    
+    lstring ref_link;  "Reference transcript link"
+    lstring inact_mut_html_table;  "HTML-formatted inactivating mutations table"
+    lstring exon_ali_html; "HTML-formatted exon alignment"
+)
diff --git a/ucsc_browser_visualisation/generate_tab_files.py b/ucsc_browser_visualisation/generate_tab_files.py
index 6933d88..c17c583 100755
--- a/ucsc_browser_visualisation/generate_tab_files.py
+++ b/ucsc_browser_visualisation/generate_tab_files.py
@@ -16,8 +16,8 @@
     "INTACT_PERC_INTACT_M",
     "INTACT_CODONS_PROP",
     "OUT_OF_CHAIN_PROP",
-    "MIDDLE_80%_INTACT",
-    "MIDDLE_80%_PRESENT",
+    "MIDDLE_IS_INTACT",
+    "MIDDLE_IS_PRESENT",
 ]
 # for assembled from fragments: we cannot get chain class features
 FRAGM_FEATS = (0, 0.0, 0.0, 0.0, 0.0, 0.0)
@@ -61,6 +61,11 @@ def parse_args():
               "data from default output fasta files. Will be "
               "the default behaviour in the future"),
     )
+    app.add_argument("--no_plots",
+                    "--np",
+                    dest="no_plots",
+                    action="store_true",
+                    help="If inactivating mutation plots are already generated, do not recreate them")
     if len(sys.argv) < 3:
         app.print_help()
         sys.exit(0)
@@ -449,7 +454,7 @@ def get_seq_data_from_fasta(wd, all_projections):
     return [], projection_to_prot_ali, projection_to_codon_ali
 
 
-def get_sequence_data(wd, all_projections):
+def get_sequence_data(wd, all_projections, exon_to_stat):
     """Parse nucleotide and protein sequence data."""
     print("Reading sequence data")
     gigafasta = os.path.join(wd, TEMP, CESAR_RESULTS)
@@ -525,6 +530,7 @@ def get_sequence_data(wd, all_projections):
                 ali_class = header_data[7]
                 exp_range = header_data[8]
                 in_exp = ONE_S if header_data[9] == "INC" else ZERO_S
+                is_del_or_no = exon_to_stat.get(exon_id, "I")
                 exon_data = (
                     location,
                     pid,
@@ -533,6 +539,7 @@ def get_sequence_data(wd, all_projections):
                     ali_class,
                     exp_range,
                     in_exp,
+                    is_del_or_no,  # added 26 Aug 2022
                     sequence,
                 )
                 query_exon_to_nucl_data[exon_id] = exon_data
@@ -604,6 +611,7 @@ def get_inact_data(wd, all_projections):
     projection_to_inact_muts = []
     proj_to_inact_features = defaultdict(dict)
     proj_to_inact_features_rows = []
+    exon_to_del_miss = {}
     f = open(inact_mut_file, "r")
     for line in f:
         if not line.startswith("#"):
@@ -614,25 +622,35 @@ def get_inact_data(wd, all_projections):
         projection = f"{trans}.{chain}"
         if projection not in all_projections:
             continue
+
         if len(line_data) == 8:
             # inactivating mutation data
+
+            # IF Deleted exon OR Missing exon -> also save the data
+            # needed in exon ali visualizations
             mask_field = line_data[6]
             is_inact = ZERO_S if mask_field == "masked" else ONE_S
             line_data[6] = is_inact
+            mut_trimmed = line_data[5][:20]
+            line_data[5] = mut_trimmed
             mut_track = line_data[2:]
             sql_row = (projection, *mut_track)
             projection_to_inact_muts.append(sql_row)
+            if line_data[4] == "Deleted exon":
+                exon_to_del_miss[(projection, int(line_data[2]))] = "D"
+            elif line_data[4] == "Missing exon":
+                exon_to_del_miss[(projection, int(line_data[2]))] = "M"
             continue
         else:
             # features
             feat, val_ = line_data[2].split()
             # boolean features to num (for SQL table)
-            if feat == "MIDDLE_80%_PRESENT":
+            if feat == "MIDDLE_IS_PRESENT":
                 if val_ == "TRUE":
                     val = ONE_S
                 else:
                     val = ZERO_S
-            elif feat == "MIDDLE_80%_INTACT":
+            elif feat == "MIDDLE_IS_INTACT":
                 if val_ == "TRUE":
                     val = ONE_S
                 else:
@@ -657,7 +675,7 @@ def get_inact_data(wd, all_projections):
             vals.append(val)
         sql_row = (proj, *vals)
         proj_to_inact_features_rows.append(sql_row)
-    return proj_to_inact_features_rows, projection_to_inact_muts
+    return proj_to_inact_features_rows, projection_to_inact_muts, exon_to_del_miss
 
 
 def save_toga_inact_mut_tab(out_dir, inact_mut_data):
@@ -694,16 +712,17 @@ def main():
     proj_to_chain_score = get_chain_scores(args.wd, all_projections)
     proj_to_chain_features = get_chain_features(args.wd, all_projections)
     projection_to_loss_class = get_projection_class(args.wd)
+    proj_to_inact_feat, proj_to_inact_mut, exon_to_stat = get_inact_data(args.wd, all_projections)
+
     if args.no_raw_cesar_output:
         raise NotImplementedError("Cancelled branch")
         seq_data = get_seq_data_from_fasta(args.wd, all_projections)
         exit()
     else:
-        seq_data = get_sequence_data(args.wd, all_projections)
+        seq_data = get_sequence_data(args.wd, all_projections, exon_to_stat)
     projection_exon_data = seq_data[0]
     projection_to_prot_ali = seq_data[1]
     projection_to_codon_ali = seq_data[2]
-    proj_to_inact_feat, proj_to_inact_mut = get_inact_data(args.wd, all_projections)
     save_toga_info_tab(
         args.output,
         all_projections,
diff --git a/ucsc_browser_visualisation/get_names_from_bed.py b/ucsc_browser_visualisation/get_names_from_bed.py
new file mode 100755
index 0000000..1cc316e
--- /dev/null
+++ b/ucsc_browser_visualisation/get_names_from_bed.py
@@ -0,0 +1,24 @@
+#!/usr/bin/env python3
+"""Just extract names from toga output bed file.
+
+Works like xenoRefGenelx.pl"""
+import sys
+
+if len(sys.argv) != 2:
+    to_read = None
+    sys.exit(f"Usage: {sys.argv[0]} [query_annotation.bed] | sort -u > ix.txt")
+else:
+    to_read = sys.argv[1]
+
+f = open(to_read, "r")
+for line in f:
+    id_field = line.rstrip().split("\t")[3]
+    no_chain_id = ".".join(id_field.split(".")[:-1])
+    dot_split = no_chain_id.split(".")
+    if len(dot_split) > 1:
+        to_out = [id_field, no_chain_id] + dot_split
+    else:
+        to_out = [id_field, no_chain_id]
+    line = "\t".join(to_out)
+    print(line)
+f.close()
diff --git a/ucsc_browser_visualisation/make_bigbed_data_public.py b/ucsc_browser_visualisation/make_bigbed_data_public.py
new file mode 100755
index 0000000..8a2a118
--- /dev/null
+++ b/ucsc_browser_visualisation/make_bigbed_data_public.py
@@ -0,0 +1,608 @@
+#!/usr/bin/env python3
+"""Perform all operations to create BigBed file for UCSC browser."""
+import os
+from re import sub
+import sys
+import argparse
+import subprocess
+from collections import defaultdict
+import getpass
+
+
+GENERATE_TAB_FILES = "generate_tab_files.py"
+GENERATE_PLOTS = "make_togaPlot.py"
+LOCATION = os.path.dirname(__file__)
+SCHEMA_LOCATION = os.path.join(LOCATION, "bb_schema.as")
+
+IFEAT_PLACEHOLDER = ["0.0" for _ in range(6)]
+PROT_PLACEHOLDER = ["<TT>NO_DATA</TT>"]
+UNDEF = "UNDEFINED"
+SVG_PLACEHOLDER = """<svg>
+<rect fill="#aaa" stroke="#000" x="0" y="0" width="400" height="100"/>
+<line x1="0" y1="0" x2="400" y2="100" stroke="red" stroke-width="4" />
+<line x1="0" y1="100" x2="400" y2="0" stroke="red" stroke-width="4" />
+</svg>
+"""
+PLOT_PLACEHOLDER = [SVG_PLACEHOLDER, ]
+
+
+def parse_args():
+    """Parse args."""
+    app = argparse.ArgumentParser()
+    app.add_argument("project_dir", help="Directory containing TOGA output")
+    app.add_argument("--do_not_cleanup",
+                     action="store_true",
+                     dest="do_not_cleanup",
+                     help="Do not clean tabs dir up.")
+    app.add_argument("--bb_version",
+                     default="v1",
+                     help="Version tag")
+    app.add_argument("--no_plots",
+                     "--np",
+                     dest="no_plots",
+                     action="store_true",
+                     help="If inactivating mutation plots are already generated, do not recreate them")
+    # TODO: chroms sizes if not standard or not HL
+    if len(sys.argv) < 2:
+        app.print_help()
+        sys.exit(0)
+    args = app.parse_args()
+    return args
+
+
+def call_gen_tab_files(project_dir):
+    """Call Generate tab files subprocess."""
+    print(f"Calling {GENERATE_TAB_FILES}")
+    out_dir = os.path.join(project_dir, "tabs")
+    os.mkdir(out_dir) if not os.path.isdir(out_dir) else None
+    exe_ = os.path.join(LOCATION, GENERATE_TAB_FILES)
+    cmd = f"{exe_} {project_dir} {out_dir}"
+    rc = subprocess.call(cmd, shell=True)
+    if rc != 0:
+        raise ValueError(f"Command {cmd} died - cannot generate tab files")
+    print("5 of 6 tab files generated")
+
+
+def call_gen_plot_files(project_dir, no_plots=False):
+    """Generate svg plots."""
+    print(f"Calling {GENERATE_PLOTS}")
+    out_file = os.path.join(project_dir, "tabs", "togaPlot.tab")
+    if no_plots is True and os.path.isfile(out_file):
+        print(f"--no_plots flag is on, found {out_file} --> skip making plots")
+        return
+    elif no_plots is True and not os.path.isfile(out_file):
+        print(f"Error! --no_plots flag is on, but {out_file} not found\nAbort")
+        sys.exit(1)
+    exe_ = os.path.join(LOCATION, GENERATE_PLOTS)
+    cmd = f"{exe_} {project_dir} {out_file}"
+    rc = subprocess.call(cmd, shell=True)
+    if rc != 0:
+        raise ValueError(f"Command {cmd} died - cannot generate plots")
+    print("All tab files generated")
+
+
+def read_tab_file(tab_file):
+    """Make trans_id: data track."""
+    trans_to_dat = {}
+    f = open(tab_file, "r")
+    for line in f:
+        line_data = line.rstrip().split("\t")
+        trans = line_data[0]
+        data = line_data[1:]
+        trans_to_dat[trans] = data
+    f.close()
+    return trans_to_dat
+
+
+
+def __gen_info_placeholder(p_name):
+    """Generate info placeholder."""
+    t_name = ".".join(p_name.split(".")[:-1])
+    placeholder = [t_name, UNDEF, UNDEF, "0.0", "0", "0.0", "0.0", "0.0", "0.0", "0.0", UNDEF]
+    return placeholder
+
+
+def make_toga_data(project_dir):
+    """Merge togaPlot, togaProt, togaInfo and togaInactFeat tables into togaData."""
+    tabs_dir = os.path.join(project_dir, "tabs")
+    toga_info_file = os.path.join(tabs_dir, "togaInfo.tab")
+    toga_inact_feat_file = os.path.join(tabs_dir, "togaInactFeat.tab")
+    toga_plot_file = os.path.join(tabs_dir, "togaPlot.tab")
+    toga_prot_file = os.path.join(tabs_dir, "togaProt.tab")
+    out_file = os.path.join(tabs_dir, "togaData.tab")
+
+    print("Reading tab files to merge")
+    trans_to_info = read_tab_file(toga_info_file)
+    trans_to_ifeat = read_tab_file(toga_inact_feat_file)
+    trans_to_plot = read_tab_file(toga_plot_file)
+    trans_to_prot = read_tab_file(toga_prot_file)
+
+    transcripts = trans_to_info.keys()
+    print(f"Got data for {len(transcripts)} transcripts")
+
+    f = open(out_file, "w")
+    for t in transcripts:
+        info = trans_to_info.get(t)
+        if info is None:
+            info = __gen_info_placeholder(t)
+        ifeat = trans_to_ifeat.get(t, IFEAT_PLACEHOLDER.copy())
+        prot = trans_to_prot.get(t, PROT_PLACEHOLDER.copy())
+        plot = trans_to_plot.get(t, PLOT_PLACEHOLDER.copy())
+        data_lst = [t] + info + ifeat + prot + plot
+        tab_str = "\t".join(data_lst)
+        f.write(tab_str)
+        f.write("\n")
+    f.close()
+    # do cleanup
+    os.remove(toga_info_file)
+    os.remove(toga_inact_feat_file)
+    os.remove(toga_plot_file)
+    os.remove(toga_prot_file)
+    print("Done")
+
+
+def _check_seq_of_intervals_intersect(intervals):
+    """Check whether any interval in the seq intersects"""
+    intervals_num = len(intervals)
+    for i in range(intervals_num - 1):
+        # (start, end)
+        curr_one = intervals[i]
+        next_one = intervals[i + 1]
+        # sorted by beginning
+        # if start of the next < end of the curr
+        # -> they intersect
+        if next_one[0] < curr_one[1]:
+            return (curr_one[0], curr_one[1])
+    return None  # nothing suspicious found
+
+
+def _filter_bed(in_bed, out_bed):
+    """Filter out bed entries with:
+     - None chromosome
+     - Intersecting blocks."""
+    _in = open(in_bed, "r")
+    _out = open(out_bed, "w")
+    for line in _in:
+        ld = line.rstrip().split("\t")
+        chrom = ld[0]
+        if chrom == "None":
+            continue
+        block_sizes = [int(x) for x in ld[10].split(",") if x != ""]
+        block_starts = [int(x) for x in ld[11].split(",") if x != ""]
+        block_count = len(block_starts)
+        if block_count == 1:
+            # no need to check whether exons intersect if there is only 1 exon
+            _out.write(line)
+            continue
+        block_ends = [block_starts[i] + block_sizes[i] for i in range(block_count)]
+        exon_ranges = sorted(zip(block_starts, block_ends), key=lambda x: x[0])
+        ranges_intersect = _check_seq_of_intervals_intersect(exon_ranges)
+        if ranges_intersect:
+            continue
+        _out.write(line)
+    _in.close()
+    _out.close()
+
+
+def make_sorted_bed(project_dir):
+    """For bigBed, we need properly sorted bed file."""
+    non_sorted_bed = os.path.join(project_dir, "query_annotation.bed")
+    tabs_dir = os.path.join(project_dir, "tabs")
+    bed_no_none = os.path.join(project_dir, "query_annotation_intact_chroms.bed")
+    _filter_bed(non_sorted_bed, bed_no_none)
+    sorted_bed = os.path.join(tabs_dir, "query_annot_sorted.bed")
+    cmd = f"sort -k1,1 -k2,2n {bed_no_none} > {sorted_bed}"
+    subprocess.call(cmd, shell=True)
+    # sorted bed file, good
+
+
+def __get_ts_from_bed(bed):
+    """Get transcript IDs from bed file."""
+    f = open(bed, 'r')
+    ts = []
+    ts_to_bed_line = {}
+    for line in f:
+        ld = line.rstrip().split("\t")
+        t = ld[3]
+        ts.append(t)
+        ts_to_bed_line[t] = ld
+    f.close()
+    return ts, ts_to_bed_line
+
+
+def __get_ts_to_toga_data(toga_data_file):
+    """Get transID: togaData fields."""
+    ret = {}
+    f = open(toga_data_file, "r")
+    for line in f:
+        ld = line.rstrip().split("\t")
+        t = ld[0]
+        fields = ld[1:]
+        ret[t] = fields
+    f.close()
+    return ret
+
+
+def __get_ts_to_toga_inact(toga_inact_file):
+    """Read inact mut data."""
+    ts_to_exon_to_data = defaultdict(dict)
+    f = open(toga_inact_file, 'r')
+    for line in f:
+        line_data = line.rstrip().split("\t")
+        ts = line_data[0]
+        exon_num = int(line_data[1])
+        rest = line_data[2:]
+        ts_to_exon_to_data[ts][exon_num] = rest
+    f.close()
+    # now: TS to HTML_formatted line
+    trans_to_inact_line = {}
+    for t in ts_to_exon_to_data.keys():
+        exon_nums_unsort = ts_to_exon_to_data[t].keys()
+        exon_nums = sorted(exon_nums_unsort)
+        t_lines = []
+        for exon_num in exon_nums:
+            inact_mut_data = ts_to_exon_to_data[t][exon_num]
+            """
+            transcript varchar(50) not null,  -- unique projection ID: ${ref_transcript_ID}.${chain_ID}
+            exon_num int unsigned not null,  -- exon number
+            position int unsigned not null,  -- possition where mutation happened
+            mut_class varchar(15) not null,  -- mutation class such as FS deletion
+            mutation varchar(20) not null,  -- what exactly happened
+            is_inact tinyint unsigned not null,  -- is this mutation inactivating, yes 1 or not 0
+            mut_id varchar(10) not null  -- mut identifier
+            """
+            # template:
+            """
+            struct togaInactMut *info = NULL;
+            info = togaInactMutLoad(row);
+            printf("<tr>\n");
+            printf("<td>%s</td>\n", info->exon_num);
+            printf("<td>%s</td>\n", info->position);
+            printf("<td>%s</td>\n", info->mut_class);
+            printf("<td>%s</td>\n", info->mutation);
+            if (sameWord(info->is_inact, ONE_)){
+                printf("<td>%s</td>\n", YES_);
+            } else {
+                printf("<td>%s</td>\n", NO_);
+            }
+            printf("<td>%s</td>\n", info->mut_id);
+            printf("</tr>\n");
+            togaInactMutFree(&info);
+            """
+            position = inact_mut_data[0]
+            mut_class  = inact_mut_data[1]
+            mutation = inact_mut_data[2]
+            is_inact = "YES" if inact_mut_data[3] == "1" else "NO"
+            mut_id = inact_mut_data[4]
+            lines = []
+            lines.append("<tr>")
+            lines.append(f"<td>{exon_num}</td>")
+            lines.append(f"<td>{position}</td>")
+            lines.append(f"<td>{mut_class}</td>")
+            lines.append(f"<td>{mutation}</td>")
+            lines.append(f"<td>{is_inact}</td>")
+            lines.append(f"<td>{mut_id}</td>")
+            lines.append("</tr>")
+            inact_mut_line = "".join(lines)
+            t_lines.append(inact_mut_line)
+        t_line = "".join(t_lines)
+        trans_to_inact_line[t] = t_line
+    return trans_to_inact_line
+
+
+def __get_ts_to_toga_exons(toga_exons_file):
+    trans_to_exon_line = {}
+    """Table schema
+    
+    transcript varchar(50) not null,  -- unique projection ID: ${ref_transcript_ID}.${chain_ID}
+    exon_num int unsigned not null,  -- exon number
+    exon_region varchar(100) not null,  -- region where exon was detected
+    pid float not null,  -- nucleotide %id
+    blosum float not null,  -- normalized blosum score
+    gaps tinyint unsigned not null,  -- are there any asm gaps near? 1 - yes 0 - no
+    ali_class varchar(4) not null,  -- alignemnt class: A, B, C, A+
+    exp_region varchar(50) not null,  -- where exon was expected
+    in_exp_region tinyint unsigned not null,  -- detected in expected region or not 1 yes 0 no
+    is_del_or_no char -- D - deleted, M - missing, I - intact
+    alignment longblob not null  -- exon sequence in query
+    """
+    ts_to_exon_to_data = defaultdict(dict)
+    f = open(toga_exons_file, "r")
+    for line in f:
+        ld = line.rstrip().split("\t")
+        t = ld[0]
+        ex_num = int(ld[1])
+        datum = ld[2:]
+        ts_to_exon_to_data[t][ex_num] = datum
+    f.close()
+
+    
+    for transcript in ts_to_exon_to_data.keys():
+        exons_unsorted = ts_to_exon_to_data[transcript].keys()
+        exon_nums = sorted(exons_unsorted)
+        transcript_lines = []
+        for exon_num in exon_nums:
+            exon_lines = []
+            datum = ts_to_exon_to_data[transcript][exon_num]
+            """To convert this:
+            struct togaNucl *info = NULL;
+            info = togaNuclLoad(row);
+            printf("<h5>Exon number: %s</h5><BR>\n", info->exon_num);
+            printf("<B>Exon region:</B> %s<BR>\n", info->exon_region);
+            printf("<B>Nucleotide percent identity:</B> %s | <B>BLOSUM:</B> %s <BR>\n", info->pid, info->blosum);
+            if (sameWord(info->gaps, ONE_)){
+                printf("<B>Intersects assembly gaps:</B> %s<BR>\n", YES_);
+            } else {
+                printf("<B>Intersects assembly gaps:</B> %s<BR>\n", NO_);
+            }
+            printf("<B>Exon alignment class:</B> %s<BR>\n", info->ali_class);
+            if (sameWord(info->in_exp_region, ONE_)){
+                printf("<B>Detected within expected region (%s):</B> %s<BR>\n", info->exp_region, YES_);
+            } else {
+                printf("<B>Detected within expected region (%s):</B> %s<BR>\n", info->exp_region, NO_);
+            }
+            // printf("<B>Expected region:</B> %s<BR>\n", info->exp_region);
+            printf("<BR>\n");
+            printf("<B>Sequence alignment between reference and query exon:</B><BR>\n");
+            printf("%s<BR>\n", info->alignment);
+            togaNuclFree(&info);
+            """
+            exon_region = datum[0]
+            pid = datum[1]
+            blosum = datum[2]
+            gaps = "YES" if datum[3] == "1" else "NO"
+            ali_class = datum[4]
+            exp_reg = datum[5]
+            in_exp_region = "YES" if datum[6] == "1" else "NO"
+            is_del_or_no = datum[7]
+            ali = datum[8]
+
+            if is_del_or_no == "D":
+                exon_lines.append(f"<h5>Exon number: {exon_num} - Deleted</h5><BR>")
+            elif is_del_or_no == "M":
+                exon_lines.append(f"<h5>Exon number: {exon_num} - Missing</h5><BR>")
+            else:
+                exon_lines.append(f"<h5>Exon number: {exon_num}</h5><BR>")
+
+            exon_lines.append(f"<B>Exon region:</B> {exon_region}</B><BR>")
+            exon_lines.append(f"<B>Nucleotide percent identity:</B> {pid} | <B>BLOSUM:</B> {blosum} <BR>")
+            exon_lines.append(f"<B>Intersects assembly gaps:</B> {gaps}<BR>")
+            exon_lines.append(f"<B>Exon alignment class:</B> {ali_class}<BR>")
+            exon_lines.append(f"<B>Detected within expected region ({exp_reg}):</B> {in_exp_region}<BR>")
+            exon_lines.append(f"<BR>")
+            exon_lines.append(f"<B>Sequence alignment between reference and query exon:</B><BR>")
+            exon_lines.append(f"{ali}<BR>")
+            exon_line = "".join(exon_lines)
+            transcript_lines.append(exon_line)
+        transcript_line = "".join(transcript_lines)
+        trans_to_exon_line[transcript] = transcript_line
+    return trans_to_exon_line
+
+
+def __get_ref_transcript(q_trans):
+    return ".".join(q_trans.split(".")[:-1])
+
+
+def get_ref_ts_to_link(p_dir):
+    """Get ref_transcript: link dict."""
+    ret = {}
+    ref_2bit_link = os.path.join(p_dir, "t2bit.link")
+    ref_2bit_path = os.readlink(ref_2bit_link)
+    ref_2bit_basename = os.path.basename(ref_2bit_path)
+    ref_genome_gname = ".".join(ref_2bit_basename.split(".")[:-1])
+    ref_toga_dir = f"/projects/hillerlab/genome/gbdb-HL/{ref_genome_gname}/TOGA/"
+    ref_ts_to_link_path = os.path.join(ref_toga_dir, "toga.transcript2Link.txt")
+    # dirname = os.path.dirname(ref_2bit_path)
+    # chrom_sizes_path = os.path.join(dirname, "chrom.sizes")
+    if not os.path.isfile(ref_ts_to_link_path):
+        return ret, ref_genome_gname
+    f = open(ref_ts_to_link_path, "r")
+    for line in f:
+        ld = line.rstrip().split("\t")
+        ret[ld[0]] = ld[1]
+    f.close()
+    return ret, ref_genome_gname
+
+
+def get_que_chrom_sizes(p_dir):
+    que_2bit_link = os.path.join(p_dir, "q2bit.link")
+    que_2bit_path = os.readlink(que_2bit_link)
+    dirname = os.path.dirname(que_2bit_path)
+    chrom_sizes_path = os.path.join(dirname, "chrom.sizes")
+    que_2bit_basename = os.path.basename(que_2bit_path)
+    que_genome_gname = ".".join(que_2bit_basename.split(".")[:-1])
+    return chrom_sizes_path, que_genome_gname
+
+
+def merge_all_tables(project_dir, r_ts_to_link):
+    """Merge all tabs in ready-to-merge bed file."""
+    print("Joining tsv for bigbed")
+    tabs_dir = os.path.join(project_dir, "tabs")
+    sorted_bed = os.path.join(tabs_dir, "query_annot_sorted.bed")
+    toga_data_file = os.path.join(tabs_dir, "togaData.tab")
+    toga_inact_file = os.path.join(tabs_dir, "togaInactMut.tab")
+    toga_nucl_file = os.path.join(tabs_dir, "togaNucl.tab")
+    transcripts_ordered, t_to_bed = __get_ts_from_bed(sorted_bed)
+    ts_to_data_track = __get_ts_to_toga_data(toga_data_file)
+    ts_to_inact_data = __get_ts_to_toga_inact(toga_inact_file)
+    ts_to_exon_data = __get_ts_to_toga_exons(toga_nucl_file)
+    
+
+    bigbed_material = os.path.join(tabs_dir, "query_annot_for_big.tsv")
+    f = open(bigbed_material, "w")
+    for transcript in transcripts_ordered:
+        ref_trans = __get_ref_transcript(transcript)
+        bed_fields = t_to_bed[transcript]
+        data_track = ts_to_data_track[transcript]
+        inact_line = ts_to_inact_data.get(transcript, "<BR>")
+        exon_line = ts_to_exon_data[transcript]
+        link = r_ts_to_link.get(ref_trans, ref_trans)  # just reference transcript if no link
+        combined_datum_lst = bed_fields + data_track + [link] + [inact_line] + [exon_line]
+        combined_datum = "\t".join(combined_datum_lst)
+        f.write(f"{combined_datum}\n")
+    f.close()
+
+
+def sort_and_make_bb(project_dir, chrom_sizes):
+    """bed12+22 as the result
+    sort -k1,1 -k2,2n query_annot_for_big.tsv > query_annot_for_big_sorta.tsv
+    bedToBigBed -type=bed12+22 query_annot_for_big_sorta.tsv
+    /projects/hillerlab/genome/gbdb-HL/mm10/chrom.sizes query_annot.bb -tab
+    """
+    print("Making bigbed...")
+    tabs_dir = os.path.join(project_dir, "tabs")
+    bigbed_material_not_sorted = os.path.join(tabs_dir, "query_annot_for_big.tsv")
+    bigbed_material = os.path.join(tabs_dir, "query_annot_for_big__sorted.tsv")
+    sort_cmd = f"sort -k1,1 -k2,2n {bigbed_material_not_sorted} > {bigbed_material}"
+    rc = subprocess.call(sort_cmd, shell=True)
+    if rc != 0:
+        sys.exit(f"Error! Command:\n{sort_cmd}\ncrashed")
+    output = os.path.join(tabs_dir, "query_annotation.bb")
+    # now transform into bigbed
+    print("Calling:")
+    bb_cmd = f"bedToBigBed -type=bed12+22 {bigbed_material} {chrom_sizes} {output} -tab -extraIndex=name -as={SCHEMA_LOCATION}"
+    print(bb_cmd)
+    rc = subprocess.call(bb_cmd, shell=True)
+    if rc != 0:
+        sys.exit(f"Error! Command:\n{bb_cmd}\ncrashed")
+
+    # make ix.txt
+    query_annot = os.path.join(project_dir, "query_annotation.bed")
+    make_ix_script = os.path.join(LOCATION, "get_names_from_bed.py")
+    ix_txt = os.path.join(tabs_dir, "query_annotation.ix.txt")
+    ix_cmd = f"{make_ix_script} {query_annot} | sort -u > {ix_txt}"
+    rc = subprocess.call(ix_cmd, shell=True)
+    if rc != 0:
+        sys.exit(f"Error! Command:\n{ix_cmd}\ncrashed")
+    
+    # make .ix and .ixx indexes
+    ix_path = os.path.join(tabs_dir, "query_annotation.bb.ix")
+    ixx_path = os.path.join(tabs_dir, "query_annotation.bb.ixx")
+    ixixx_cmd = f"ixIxx {ix_txt} {ix_path} {ixx_path}"
+    rc = subprocess.call(ixixx_cmd, shell=True)
+    if rc != 0:
+        sys.exit(f"Error! Command:\n{ixixx_cmd}\ncrashed")
+
+
+def cleanup(project_dir, dont):
+    """Clean project dir up."""
+    if dont:  # do not cleanup
+        return
+    tabs_dir = os.path.join(project_dir, "tabs")
+    bigbed_material_not_sorted = os.path.join(tabs_dir, "query_annot_for_big.tsv")
+    bigbed_material = os.path.join(tabs_dir, "query_annot_for_big__sorted.tsv")
+    toga_info_file = os.path.join(tabs_dir, "togaInfo.tab")
+    toga_inact_feat_file = os.path.join(tabs_dir, "togaInactFeat.tab")
+    toga_plot_file = os.path.join(tabs_dir, "togaPlot.tab")
+    toga_prot_file = os.path.join(tabs_dir, "togaProt.tab")
+    sorted_bed = os.path.join(tabs_dir, "query_annot_sorted.bed")
+    toga_data_file = os.path.join(tabs_dir, "togaData.tab")
+    toga_inact_file = os.path.join(tabs_dir, "togaInactMut.tab")
+    toga_nucl_file = os.path.join(tabs_dir, "togaNucl.tab")
+    to_rm = [bigbed_material_not_sorted, bigbed_material, toga_info_file, 
+             toga_inact_feat_file, toga_plot_file, toga_prot_file,
+             sorted_bed, toga_data_file, toga_inact_file, toga_nucl_file]
+    for path in to_rm:
+        os.remove(path) if os.path.isfile(path) else None
+
+
+def __ssh_mkdir(uname, dirname):
+    cmd = f"ssh {uname}@genome mkdir -p {dirname}"
+    print(f"Calling {cmd}")
+    subprocess.call(cmd, shell=True)
+
+
+def __ssh_ln(uname, src, dest):
+    """Call ln on delta"""
+    cmd = f"ssh {uname}@genome ln -sf {src} {dest}"
+    print(f"Calling {cmd}")
+    subprocess.call(cmd, shell=True)
+
+
+def __get_ccase_refname(refname):
+    if refname == "hg38":
+        return "Hg38"
+    elif refname == "mm10":
+        return "Mm10"
+    elif refname == "mm39":
+        return "Mm39"
+    else:
+        return refname
+
+
+def load_bb_track(project_dir, dont, ref, que, bb_ver):
+    if dont:  # do not load anything
+        return
+    tabs_dir = os.path.join(project_dir, "tabs")
+    bb_file = os.path.join(tabs_dir, "query_annotation.bb")
+    uname = getpass.getuser()
+    # scp query_annotation.bb genome:/genome/gbdb-HL/$ref/TOGA/vs_$query/HLTOGAannotVs$refv1.bb
+    ref_dir__genome = f"/genome/gbdb-HL/{ref}"
+    toga_ref_dir__genome = f"{ref_dir__genome}/TOGA"
+    # make if needed
+    __ssh_mkdir(uname, toga_ref_dir__genome)
+    vs_que_dir__genome = f"{toga_ref_dir__genome}/vs_{que}"
+    __ssh_mkdir(uname, vs_que_dir__genome)
+    # transfer data
+    bb_file = os.path.join(tabs_dir, "query_annotation.bb")
+    ix_file = os.path.join(tabs_dir, "query_annotation.bb.ix")
+    ixx_file = os.path.join(tabs_dir, "query_annotation.bb.ixx")
+
+    ref_camel_case = __get_ccase_refname(ref)
+    bb_filename__genome = f"HLTOGAannotVs{ref_camel_case}{bb_ver}.bb"
+    ix_filename__genome = f"HLTOGAannotVs{ref_camel_case}{bb_ver}.ix"
+    ixx_filename__genome = f"HLTOGAannotVs{ref_camel_case}{bb_ver}.ixx"
+
+    bb_path_genome = f"{vs_que_dir__genome}/{bb_filename__genome}"
+    ix_path_genome = f"{vs_que_dir__genome}/{ix_filename__genome}"
+    ixx_path_genome = f"{vs_que_dir__genome}/{ixx_filename__genome}"
+
+    rsync_cmd = f"rsync -av {bb_file} {uname}@genome:{bb_path_genome}"
+    subprocess.call(rsync_cmd, shell=True)
+
+    rsync_cmd = f"rsync -av {ix_file} {uname}@genome:{ix_path_genome}"
+    subprocess.call(rsync_cmd, shell=True)
+
+    rsync_cmd = f"rsync -av {ixx_file} {uname}@genome:{ixx_path_genome}"
+    subprocess.call(rsync_cmd, shell=True)
+
+    # make link in /var/www
+    www_data__genome = "/var/www/data"
+    www_que__genome = f"{www_data__genome}/{que}"
+    __ssh_mkdir(uname, www_que__genome)
+
+    www_bb_link_dest = f"{www_que__genome}/{bb_filename__genome}"
+    www_ix_link_dest = f"{www_que__genome}/{ix_filename__genome}"
+    www_ixx_link_dest = f"{www_que__genome}/{ixx_filename__genome}"
+
+    __ssh_ln(uname, bb_path_genome, www_bb_link_dest)
+    __ssh_ln(uname, ix_path_genome, www_ix_link_dest)
+    __ssh_ln(uname, ixx_path_genome, www_ixx_link_dest)
+
+
+def main():
+    """Entry point."""
+    args = parse_args()
+    r_ts_to_link, ref_name = get_ref_ts_to_link(args.project_dir)
+    chrom_sizes, que_name = get_que_chrom_sizes(args.project_dir)
+    # call generate_tab_files.py and make_togaPlot.py
+    call_gen_tab_files(args.project_dir)
+    call_gen_plot_files(args.project_dir, no_plots=args.no_plots)
+    # merge 4 transcriptID-related tables into a single one
+    make_toga_data(args.project_dir)
+
+    # OK, now making bigBed
+    # togaNucl.tab and togaInactMut.tab
+    # -> merge into HTML_formatted strings
+    # -> add to togaData
+    # save to as.tab for bedToBigBed
+    make_sorted_bed(args.project_dir)
+
+    merge_all_tables(args.project_dir, r_ts_to_link)
+    sort_and_make_bb(args.project_dir, chrom_sizes)
+
+    cleanup(args.project_dir, args.do_not_cleanup)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/ucsc_browser_visualisation/make_sql_data.py b/ucsc_browser_visualisation/make_sql_data.py
deleted file mode 100755
index 41eba17..0000000
--- a/ucsc_browser_visualisation/make_sql_data.py
+++ /dev/null
@@ -1,131 +0,0 @@
-#!/usr/bin/env python3
-"""Perform all operations to create SQL tables for UCSC browser."""
-import os
-import sys
-import argparse
-import subprocess
-
-GENERATE_TAB_FILES = "generate_tab_files.py"
-GENERATE_PLOTS = "make_togaPlot.py"
-LOCATION = os.path.dirname(__file__)
-
-IFEAT_PLACEHOLDER = ["0.0" for _ in range(6)]
-PROT_PLACEHOLDER = ["<TT>NO_DATA</TT>"]
-UNDEF = "UNDEFINED"
-SVG_PLACEHOLDER = """<svg>
-<rect fill="#aaa" stroke="#000" x="0" y="0" width="400" height="100"/>
-<line x1="0" y1="0" x2="400" y2="100" stroke="red" stroke-width="4" />
-<line x1="0" y1="100" x2="400" y2="0" stroke="red" stroke-width="4" />
-</svg>
-"""
-PLOT_PLACEHOLDER = [SVG_PLACEHOLDER, ]
-
-
-def parse_args():
-    """Parse args."""
-    app = argparse.ArgumentParser()
-    app.add_argument("project_dir", help="Directory containing TOGA output")
-    if len(sys.argv) < 2:
-        app.print_help()
-        sys.exit(0)
-    args = app.parse_args()
-    return args
-
-
-def call_gen_tab_files(project_dir):
-    """Call Generate tab files subprocess."""
-    print(f"Calling {GENERATE_TAB_FILES}")
-    out_dir = os.path.join(project_dir, "tabs")
-    os.mkdir(out_dir) if not os.path.isdir(out_dir) else None
-    exe_ = os.path.join(LOCATION, GENERATE_TAB_FILES)
-    cmd = f"{exe_} {project_dir} {out_dir}"
-    rc = subprocess.call(cmd, shell=True)
-    if rc != 0:
-        raise ValueError(f"Command {cmd} died - cannot generate tab files")
-    print("5 of 6 tab files generated")
-
-
-def call_gen_plot_files(project_dir):
-    """Generate svg plots."""
-    print(f"Calling {GENERATE_PLOTS}")
-    out_file = os.path.join(project_dir, "tabs", "togaPlot.tab")
-    exe_ = os.path.join(LOCATION, GENERATE_PLOTS)
-    cmd = f"{exe_} {project_dir} {out_file}"
-    rc = subprocess.call(cmd, shell=True)
-    if rc != 0:
-        raise ValueError(f"Command {cmd} died - cannot generate plots")
-    print("All tab files generated")
-
-
-def read_tab_file(tab_file):
-    """Make trans_id: data track."""
-    trans_to_dat = {}
-    f = open(tab_file, "r")
-    for line in f:
-        line_data = line.rstrip().split("\t")
-        trans = line_data[0]
-        data = line_data[1:]
-        trans_to_dat[trans] = data
-    f.close()
-    return trans_to_dat
-
-
-
-def __gen_info_placeholder(p_name):
-    """Generate info placeholder."""
-    t_name = ".".join(p_name.split(".")[:-1])
-    placeholder = [t_name, UNDEF, UNDEF, "0.0", "0", "0.0", "0.0", "0.0", "0.0", "0.0", UNDEF]
-    return placeholder
-
-
-def make_toga_data(project_dir):
-    """Merge togaPlot, togaProt, togaInfo and togaInactFeat tables into togaData."""
-    tabs_dir = os.path.join(project_dir, "tabs")
-    toga_info_file = os.path.join(tabs_dir, "togaInfo.tab")
-    toga_inact_feat_file = os.path.join(tabs_dir, "togaInactFeat.tab")
-    toga_plot_file = os.path.join(tabs_dir, "togaPlot.tab")
-    toga_prot_file = os.path.join(tabs_dir, "togaProt.tab")
-    out_file = os.path.join(tabs_dir, "togaData.tab")
-
-    print("Reading tab files to merge")
-    trans_to_info = read_tab_file(toga_info_file)
-    trans_to_ifeat = read_tab_file(toga_inact_feat_file)
-    trans_to_plot = read_tab_file(toga_plot_file)
-    trans_to_prot = read_tab_file(toga_prot_file)
-
-    transcripts = trans_to_info.keys()
-    print(f"Got data for {len(transcripts)} transcripts")
-
-    f = open(out_file, "w")
-    for t in transcripts:
-        info = trans_to_info.get(t)
-        if info is None:
-            info = __gen_info_placeholder(t)
-        ifeat = trans_to_ifeat.get(t, IFEAT_PLACEHOLDER.copy())
-        prot = trans_to_prot.get(t, PROT_PLACEHOLDER.copy())
-        plot = trans_to_plot.get(t, PLOT_PLACEHOLDER.copy())
-        data_lst = [t] + info + ifeat + prot + plot
-        tab_str = "\t".join(data_lst)
-        f.write(tab_str)
-        f.write("\n")
-    f.close()
-    # do cleanup
-    os.remove(toga_info_file)
-    os.remove(toga_inact_feat_file)
-    os.remove(toga_plot_file)
-    os.remove(toga_prot_file)
-    print("Done")
-
-
-def main():
-    """Entry point."""
-    args = parse_args()
-    # call generate_tab_files.py and make_togaPlot.py
-    call_gen_tab_files(args.project_dir)
-    call_gen_plot_files(args.project_dir)
-    # merge 4 transcriptID-related tables into a single one
-    make_toga_data(args.project_dir)
-
-
-if __name__ == "__main__":
-    main()
diff --git a/ucsc_browser_visualisation/readme.md b/ucsc_browser_visualisation/readme.md
index 4241c7e..c2f53d2 100644
--- a/ucsc_browser_visualisation/readme.md
+++ b/ucsc_browser_visualisation/readme.md
@@ -76,13 +76,22 @@ We will add another condition to handle TOGA tracks.
 
 Put another condition as shown here:
 
+
 ```c
-else if (startsWith("HLTOGA", table) && hTableExists(database, "TOGAData"))
-{
-    doHillerLabTOGAGene(tdb, item);
-}
+else if (startsWith("HLTOGAannot", trackHubSkipHubName(table)))
+    {
+    doHillerLabTOGAGene(database, tdb, item, table);
+    }
 ```
 
+
+[//]: # (```c)
+[//]: # (else if (startsWith("HLTOGA", table) && hTableExists(database, "TOGAData")))
+[//]: # ({)
+[//]: # (    doHillerLabTOGAGene(database, tdb, item, table);)
+[//]: # (}
+[//]: # (```)
+
 Then re-build your browser.
 
 ### Loading TOGA tables
@@ -92,26 +101,18 @@ Then re-build your browser.
 ${project_dir}: directory containing TOGA results and intermediate data.
 
 ```shell
-./ucsc_browser_visualisation/make_sql_data.py ${project_dir}```
+./ucsc_browser_visualisation/make_bigbed_data_public.py ${project_dir}```
 # Wait, it will take a few minutes, most likely less than an hour
 ```
 
 #### Load tab files to browser database
 
-${tab_files_dir} = ${project_dir}/tabs
-${query} - annotated genome identifier.
-Use *.sql files located in the ucsc_browser_visualisation directory.
-Call the following commands:
+Transfer created *.bb, *.ix, and *.ixx files to the machine hosting your instance of UCSC genome browser, if need be.
+Create a table schema for bigBed tracks, using bigDataUrl field to specify the bigBed URL, and searchTrix field to specify the *.ix file URL (no need to specify *.ixx file separately, it just should be located in the same directory with the *.ix file.)
 
-```shell
-hgLoadSqlTab ${query} TOGAData togaData.sql ${tab_files_dir}/togaInfo.tab
-hgLoadSqlTab ${query} TOGANucl togaNucl.sql ${tab_files_dir}/togaNucl.tab
-hgLoadSqlTab ${query} TOGAInactMut togaInactMut.sql ${tab_files_dir}/togaInactMut.tab
+Please also specify the following fields:
 ```
-
-Also load bed file using hgLoadBed command.
-Create a track starting with HLTOGA, for instance HLTOGAannotation.
-
-```shell script
-hgLoadBed ${query} HLTOGAannotation ${project_dir}/query_annotation.bed
+type bigBed 12 +
+labelFields name
+searchIndex name
 ```
diff --git a/ucsc_browser_visualisation/togaClick.c b/ucsc_browser_visualisation/togaClick.c
index e2f10e6..e3d53c8 100644
--- a/ucsc_browser_visualisation/togaClick.c
+++ b/ucsc_browser_visualisation/togaClick.c
@@ -3,6 +3,44 @@
 #include "hgc.h"
 #include "togaClick.h"
 #include "string.h"
+#include "htmshell.h"
+#include "chromAlias.h"
+
+
+struct togaDataBB *togaDataBBLoad(char **row)
+/* Load a togaData from row fetched with select * from togaData
+ * from database.  Dispose of this with togaDataFree(). */
+{
+    struct togaDataBB *ret;
+    AllocVar(ret);
+    ret->projection = cloneString(row[0]);
+    ret->ref_trans_id = cloneString(row[1]);
+    ret->ref_region = cloneString(row[2]);
+    ret->query_region = cloneString(row[3]);
+    ret->chain_score = cloneString(row[4]);
+
+    ret->chain_synteny = cloneString(row[5]);
+    ret->chain_flank = cloneString(row[6]);
+    ret->chain_gl_cds_fract = cloneString(row[7]);
+    ret->chain_loc_cds_fract = cloneString(row[8]);
+    ret->chain_exon_cov = cloneString(row[9]);
+
+    ret->chain_intron_cov = cloneString(row[10]);
+    ret->status = cloneString(row[11]);
+    ret->perc_intact_ign_M = cloneString(row[12]);
+    ret->perc_intact_int_M = cloneString(row[13]);
+    ret->intact_codon_prop = cloneString(row[14]);
+
+    ret->ouf_prop = cloneString(row[15]);
+    ret->mid_intact = cloneString(row[16]);
+    ret->mid_pres = cloneString(row[17]);
+    ret->prot_alignment = cloneString(row[18]);
+    ret->svg_line = cloneString(row[19]);
+    ret->ref_link = cloneString(row[20]);
+    ret->inact_mut_html_table = cloneString(row[21]);
+    ret->exon_ali_html = cloneString(row[22]);
+    return ret;
+}
 
 
 struct togaData *togaDataLoad(char **row)
@@ -38,6 +76,43 @@ struct togaData *togaDataLoad(char **row)
 }
 
 
+void togaDataBBFree(struct togaDataBB **pEl)
+/* Free a single dynamically allocated togaDatasuch as created
+ * with togaDataLoad(). */
+{
+    struct togaDataBB *el;
+
+    if ((el = *pEl) == NULL) return;
+    freeMem(el->projection);
+    freeMem(el->ref_trans_id);
+    freeMem(el->ref_region);
+    freeMem(el->query_region);
+    freeMem(el->chain_score);
+
+    freeMem(el->chain_synteny);
+    freeMem(el->chain_flank);
+    freeMem(el->chain_gl_cds_fract);
+    freeMem(el->chain_loc_cds_fract);
+    freeMem(el->chain_exon_cov);
+
+    freeMem(el->chain_intron_cov);
+    freeMem(el->status);
+    freeMem(el->perc_intact_ign_M);
+    freeMem(el->perc_intact_int_M);
+    freeMem(el->intact_codon_prop);
+
+    freeMem(el->ouf_prop);
+    freeMem(el->mid_intact);
+    freeMem(el->mid_pres);
+    freeMem(el->prot_alignment);
+    freeMem(el->svg_line);
+    freeMem(el->ref_link);
+    freeMem(el->inact_mut_html_table);
+    freeMem(el->exon_ali_html);
+    freez(pEl);
+}
+
+
 void togaDataFree(struct togaData **pEl)
 /* Free a single dynamically allocated togaDatasuch as created
  * with togaDataLoad(). */
@@ -168,10 +243,207 @@ Prefix must be HLTOGAannot */
 }
 
 
-void doHillerLabTOGAGene(struct trackDb *tdb, char *item, char *table_name)
+void HLprintQueryProtSeqForAli(char *proteinAlignment) {
+    // take protein sequence alignment
+    // print only the query sequence
+    char *str = proteinAlignment;
+    int printed_char_num = 0;
+    while ((str = strstr(str, "que:")) != NULL)
+    { 
+        str += 10;
+        char ch;
+        while ((ch = *str++) != '<') {
+            if (ch != '-') {
+                putchar(ch);
+                ++printed_char_num;
+            }
+            if (printed_char_num == 80) {
+                printed_char_num = 0;
+                printf("<BR>");
+            }
+        }
+    }   
+}
+
+
+
+void doHillerLabTOGAGeneBig(char *database, struct trackDb *tdb, char *item, char *table_name)
+/* Put up TOGA Gene track info. */
+// To think about -> put into a single bigBed
+// string: HTML formatted inact mut
+// string: HTML formatted exon ali section
+{
+int start = cartInt(cart, "o");
+int end = cartInt(cart, "t");
+char *chrom = cartString(cart, "c");
+char *fileName = bbiNameFromSettingOrTable(tdb, NULL, tdb->table);
+struct bbiFile *bbi =  bigBedFileOpenAlias(hReplaceGbdb(fileName), chromAliasFindAliases);
+struct lm *lm = lmInit(0);
+struct bigBedInterval *bbList = bigBedIntervalQuery(bbi, chrom, start, end, 0, lm);
+struct bigBedInterval *bb;
+char *fields[bbi->fieldCount];
+for (bb = bbList; bb != NULL; bb = bb->next)
+    {
+    if (!(bb->start == start && bb->end == end))
+	continue;
+
+    // our names are unique
+    char *name = cloneFirstWordByDelimiterNoSkip(bb->rest, '\t');
+    boolean match = (isEmpty(name) && isEmpty(item)) || sameOk(name, item);
+    if (!match)
+        continue;
+
+    char startBuf[16], endBuf[16];
+    bigBedIntervalToRow(bb, chrom, startBuf, endBuf, fields, bbi->fieldCount);
+    break;
+    }
+
+printf("<h3>Projection %s</h3><BR>\n", item);
+struct togaDataBB       *info = togaDataBBLoad(&fields[11]);  // Bogdan: why 11? 0-11 are bed-like fields likely 
+
+printf("<B>Reference transcript: </B>%s<BR>", info->ref_link);
+printf("<B>Genomic locus in reference: </B>%s<BR>\n", info->ref_region);
+printf("<B>Genomic locus in query: </B>%s<BR>\n", info->query_region);
+
+printf("<B>Projection classification: </B>%s<BR>\n", info->status);
+printf("<B>Probability that query locus is orthologous: </B>%s<BR>\n", info->chain_score);
+// list of chain features (for orthology classification)
+printf("<a data-toggle=\"collapse\" href=\"#collapseChain\">Show features used for ortholog probability</a>\n");
+printf("<div id=\"collapseChain\" class=\"panel-collapse collapse\">\n");
+printf("<ul>\n");
+printf("<li>Synteny (log10 value): %s</li>\n", info->chain_synteny);
+printf("<li>Global CDS fraction: %s</li>\n", info->chain_gl_cds_fract);
+printf("<li>Local CDS fraction: %s</li>\n", info->chain_loc_cds_fract);
+printf("<li>Local intron fraction: %s</li>\n", info->chain_intron_cov);
+printf("<li>Local CDS coverage: %s</li>\n", info->chain_exon_cov);
+printf("<li>Flank fraction: %s</li>\n", info->chain_flank);
+printf("</ul>\n");
+
+printf("<br>\n<b>Feature description:</b>\n");
+printf("For each projection (one reference transcript and one overlapping chain),\n");
+printf("TOGA computes the following features by intersecting the reference coordinates of aligning\n");
+printf("blocks in the chain with different gene parts (coding exons, UTR (untranslated region) exons, introns)\n");
+printf("and the respective intergenic regions.\n<br>\n");
+
+printf("We define the following variables:\n<ul>\n");
+printf("<li>c: number of reference bases in the intersection between chain blocks and coding exons of the gene under consideration.</li>\n");
+printf("<li>C: number of reference bases in the intersection between chain blocks and coding exons of all genes. </li>\n");
+printf("<li>a: number of reference bases in the intersection between chain blocks and coding exons and introns of the gene under consideration. </li>\n");
+printf("<li>A: number of reference bases in the intersection between chain blocks and coding exons and introns of all genes and the intersection\n");
+printf("between chain blocks and intergenic regions (excludes UTRs). </li>\n");
+printf("<li>f: number of reference bases in chain blocks overlapping the 10 kb flanks of the gene under consideration.\n");
+printf("Alignment blocks overlapping exons of another gene that is located in these 10 kb flanks are ignored. </li>\n");
+printf("<li>i: number of reference bases in the intersection between chain blocks and introns of the gene under consideration. </li>\n");
+printf("<li>CDS (coding sequence): length of the coding region of the gene under consideration. </li>\n");
+printf("<li>I: sum of all intron lengths of the gene under consideration. </li>\n");
+printf("</ul>\n");
+printf("Using these variables, TOGA computes the following features:\n");
+printf("<ul>\n");
+printf("<li>“global CDS fraction” as C / A. Chains with a high value have alignments that largely overlap coding exons,");
+printf("which is a hallmark of paralogous or processed pseudogene chains. In contrast, chains with a low value also align many ");
+printf("intronic and intergenic regions, which is a hallmark of orthologous chains. </li>\n");
+printf("<li>“local CDS fraction” as c / a. Orthologous chains tend to have a lower value, as intronic ");
+printf("regions partially align. This feature is not computed for single-exon genes. </li>\n");
+printf("<li>“local intron fraction” as i / I. Orthologous chains tend to have a higher value.");
+printf("This feature is not computed for single-exon genes. </li>\n");
+printf("<li>“flank fraction” as f / 20,000. Orthologous chains tend to have higher values,");
+printf("as flanking intergenic regions partially align. This feature is important to detect orthologous loci of single-exon genes. </li>\n");
+printf("<li>“synteny” as log10 of the number of genes, whose coding exons overlap by at least one base aligning");
+printf("blocks of this chain. Orthologous chains tend to cover several genes located in a conserved order, resulting in higher synteny values. </li>\n");
+printf("<li>“local CDS coverage” as c / CDS, which is only used for single-exon genes. </li>\n");
+printf("</ul>\n");
+
+
+printf("</ul>\n</div>\n<BR>\n");
+htmlHorizontalLine();
+
+// show inact mut plot
+printf("<h4>Visualization of inactivating mutations on exon-intron structure</h4>\n");
+printf("%s<BR>\n", info->svg_line);
+printf("<BR>Exons shown in grey are missing (often overlap assembly gaps).\nExons shown in");
+printf(" red or blue are deleted or do not align at all.\nRed indicates that the exon deletion ");
+printf("shifts the reading frame, while blue indicates that exon deletion(s) are framepreserving.<br>\n");
+
+// GLP features
+printf("<a data-toggle=\"collapse\" href=\"#collapseGLP\">Show features used for transcript classification</a>\n");
+printf("<div id=\"collapseGLP\" class=\"panel-collapse collapse\">\n");
+printf("<ul>\n");
+printf("<li>Percent intact, ignoring missing sequence: %s</li>\n", info->perc_intact_ign_M);
+printf("<li>Percent intact, treating missing as intact sequence: %s</li>\n", info->perc_intact_int_M);
+printf("<li>Proportion of intact codons: %s</li>\n", info->intact_codon_prop);
+printf("<li>Percent of CDS not covered by this chain (0 unless the chain covers only a part of the gene): %s</li>\n", info->ouf_prop);
+if (sameWord(info->mid_intact, ONE_))
+{
+    printf("<li>Middle 80 percent of CDS intact: %s</li>\n", YES_);
+} else {
+    printf("<li>Middle 80 percent of CDS intact: %s</li>\n", NO_);
+}
+if (sameWord(info->mid_pres, ONE_))
+{
+    printf("<li>Middle 80 percent of CDS present: %s</li>\n", YES_);
+} else {
+    printf("<li>Middle 80 percent of CDS present: %s</li>\n", NO_);
+}
+printf("</ul>\n</div>\n<BR>\n");
+
+
+htmlHorizontalLine();
+
+printf("<h4>Predicted protein sequence</h4><BR>\n");
+
+printf("<a data-toggle=\"collapse\" href=\"#collapseProt\">Show protein sequence of query</a>\n");
+printf("<div id=\"collapseProt\" class=\"panel-collapse collapse\">\n");
+// printf("<TT>{protein seq of the query without dashes or other things. Should end with *}\n");
+printf("<TT>");
+HLprintQueryProtSeqForAli(info->prot_alignment);
+printf("\n<BR>\n</TT>\n</div>\n");
+
+// and show protein sequence
+htmlHorizontalLine();
+printf("<h4>Protein sequence alignment</h4><BR>\n");
+printf("<a data-toggle=\"collapse\" href=\"#collapseProtAli\">Show alignment between reference and query</a>\n");
+printf("<div id=\"collapseProtAli\" class=\"panel-collapse collapse\">\n");
+printf("<TT>%s</TT><BR>\n", info->prot_alignment);
+printf("</div>\n<BR><BR>\n");
+
+// show inactivating mutations if required
+printf("<h4>List of inactivating mutations</h4><BR>\n");
+
+printf("<a data-toggle=\"collapse\" href=\"#collapseMuts\">Show inactivating mutations</a>\n");
+printf("<div id=\"collapseMuts\" class=\"panel-collapse collapse\">\n");
+printf("<table border = \"1\" width = \"640\">\n");  // init table
+printf("<tr><th>Exon number</th><th>Codon number</th><th>Mutation class</th><th>Mutation</th><th>Treated as inactivating</th><th>Mutation ID</th>\n");
+printf("</tr>\n");
+printf("%s\n", info->inact_mut_html_table);
+printf("</table>\n");
+printf("</div>\n<BR>\n");
+
+// show exons data
+htmlHorizontalLine();
+printf("<h4>Exon alignments</h4><BR>\n");
+
+printf("<a data-toggle=\"collapse\" href=\"#collapseExons\">Show exon sequences and features</a><BR><BR>\n");
+printf("<div id=\"collapseExons\" class=\"panel-collapse collapse\">\n");
+// printf("%s\n", info->exon_ali_string);
+printf("%s\n", info->exon_ali_html);
+
+htmlHorizontalLine();
+
+// TODO: check whether I need this
+printf("%s", hgTracksPathAndSettings());
+hPrintf("<link rel=\"stylesheet\" href=\"https://maxcdn.bootstrapcdn.com/bootstrap/3.4.1/css/bootstrap.min.css\">");
+hPrintf("<script src=\"https://ajax.googleapis.com/ajax/libs/jquery/3.5.1/jquery.min.js\"></script>");
+hPrintf("<script src=\"https://maxcdn.bootstrapcdn.com/bootstrap/3.4.1/js/bootstrap.min.js\"></script>");
+
+
+printTrackHtml(tdb);  // and do I need this?
+}
+
+
+void doHillerLabTOGAGene(char *database, struct trackDb *tdb, char *item, char *table_name)
 /* Put up TOGA Gene track info. */
 {
-    int start = cartInt(cart, "o");
+    //int start = cartInt(cart, "o");
     char headerTitle[512];
     char suffix[512];
     strcpy(suffix, table_name);
@@ -180,6 +452,13 @@ void doHillerLabTOGAGene(struct trackDb *tdb, char *item, char *table_name)
     genericHeader(tdb, headerTitle);
     printf("<h2>TOGA gene annotation</h2>\n");
     // htmlHorizontalLine();
+
+    if (startsWith("bigBed", tdb->type))
+        {
+        doHillerLabTOGAGeneBig(database, tdb, item, table_name);
+        return;
+        }
+
     struct sqlConnection *conn = hAllocConn(database);
     
     // define TOGA table names: initate with pre-defined prefixes
@@ -209,57 +488,103 @@ void doHillerLabTOGAGene(struct trackDb *tdb, char *item, char *table_name)
         if ((row = sqlNextRow(sr)) != NULL) {
             info = togaDataLoad(row);  // parse sql output
             // fill HTML template:
-            printf("<B>Projected via: </B><A HREF=\"http://www.ensembl.org/Homo_sapiens/transview?transcript=%s\" target=_blank>%s</A><BR>",
+            printf("<B>Reference transcript: </B><A HREF=\"http://www.ensembl.org/Homo_sapiens/transview?transcript=%s\" target=_blank>%s</A><BR>",
                    info->ref_trans_id, info->ref_trans_id);
-            printf("<B>Region in reference: </B>%s<BR>\n", info->ref_region);
-            printf("<B>Region in query: </B>%s<BR>\n", info->query_region);
+            printf("<B>Genomic locus in reference: </B>%s<BR>\n", info->ref_region);
+            printf("<B>Genomic locus in query: </B>%s<BR>\n", info->query_region);
 
-            printf("<B>Projection class: </B>%s<BR>\n", info->status);
-            printf("<B>Chain score: </B>%s<BR>\n", info->chain_score);
+            printf("<B>Projection classification: </B>%s<BR>\n", info->status);
+            printf("<B>Probability that query locus is orthologous: </B>%s<BR>\n", info->chain_score);
             // list of chain features (for orthology classification)
-            printf("<a data-toggle=\"collapse\" href=\"#collapseChain\">Show chain features for classification</a>\n");
+            printf("<a data-toggle=\"collapse\" href=\"#collapseChain\">Show features used for ortholog probability</a>\n");
             printf("<div id=\"collapseChain\" class=\"panel-collapse collapse\">\n");
             printf("<ul>\n");
-            printf("<li>Synteny: %s</li>\n", info->chain_synteny);
+            printf("<li>Synteny (log10 value): %s</li>\n", info->chain_synteny);
             printf("<li>Global CDS fraction: %s</li>\n", info->chain_gl_cds_fract);
             printf("<li>Local CDS fraction: %s</li>\n", info->chain_loc_cds_fract);
             printf("<li>Local intron fraction: %s</li>\n", info->chain_intron_cov);
             printf("<li>Local CDS coverage: %s</li>\n", info->chain_exon_cov);
             printf("<li>Flank fraction: %s</li>\n", info->chain_flank);
+            printf("</ul>\n");
+
+            printf("<br>\n<b>Feature description:</b>\n");
+            printf("For each projection (one reference transcript and one overlapping chain),\n");
+            printf("TOGA computes the following features by intersecting the reference coordinates of aligning\n");
+            printf("blocks in the chain with different gene parts (coding exons, UTR (untranslated region) exons, introns)\n");
+            printf("and the respective intergenic regions.\n<br>\n");
+            
+            printf("We define the following variables:\n<ul>\n");
+            printf("<li>c: number of reference bases in the intersection between chain blocks and coding exons of the gene under consideration.</li>\n");
+            printf("<li>C: number of reference bases in the intersection between chain blocks and coding exons of all genes. </li>\n");
+            printf("<li>a: number of reference bases in the intersection between chain blocks and coding exons and introns of the gene under consideration. </li>\n");
+            printf("<li>A: number of reference bases in the intersection between chain blocks and coding exons and introns of all genes and the intersection\n");
+            printf("between chain blocks and intergenic regions (excludes UTRs). </li>\n");
+            printf("<li>f: number of reference bases in chain blocks overlapping the 10 kb flanks of the gene under consideration.\n");
+            printf("Alignment blocks overlapping exons of another gene that is located in these 10 kb flanks are ignored. </li>\n");
+            printf("<li>i: number of reference bases in the intersection between chain blocks and introns of the gene under consideration. </li>\n");
+            printf("<li>CDS (coding sequence): length of the coding region of the gene under consideration. </li>\n");
+            printf("<li>I: sum of all intron lengths of the gene under consideration. </li>\n");
+            printf("</ul>\n");
+            printf("Using these variables, TOGA computes the following features:\n");
+            printf("<ul>\n");
+            printf("<li>“global CDS fraction” as C / A. Chains with a high value have alignments that largely overlap coding exons,");
+            printf("which is a hallmark of paralogous or processed pseudogene chains. In contrast, chains with a low value also align many ");
+            printf("intronic and intergenic regions, which is a hallmark of orthologous chains. </li>\n");
+            printf("<li>“local CDS fraction” as c / a. Orthologous chains tend to have a lower value, as intronic ");
+            printf("regions partially align. This feature is not computed for single-exon genes. </li>\n");
+            printf("<li>“local intron fraction” as i / I. Orthologous chains tend to have a higher value.");
+            printf("This feature is not computed for single-exon genes. </li>\n");
+            printf("<li>“flank fraction” as f / 20,000. Orthologous chains tend to have higher values,");
+            printf("as flanking intergenic regions partially align. This feature is important to detect orthologous loci of single-exon genes. </li>\n");
+            printf("<li>“synteny” as log10 of the number of genes, whose coding exons overlap by at least one base aligning");
+            printf("blocks of this chain. Orthologous chains tend to cover several genes located in a conserved order, resulting in higher synteny values. </li>\n");
+            printf("<li>“local CDS coverage” as c / CDS, which is only used for single-exon genes. </li>\n");
+            printf("</ul>\n");
+
+
             printf("</ul>\n</div>\n<BR>\n");
             htmlHorizontalLine();
 
             // show inact mut plot
-            printf("<h4>Inactivating mutations plot</h4>\n");
+            printf("<h4>Visualization of inactivating mutations on exon-intron structure</h4>\n");
             printf("%s<BR>\n", info->svg_line);
+            printf("<BR>Exons shown in grey are missing (often overlap assembly gaps).\nExons shown in");
+            printf(" red or blue are deleted or do not align at all.\nRed indicates that the exon deletion ");
+            printf("shifts the reading frame, while blue indicates that exon deletion(s) are framepreserving.<br>\n");
 
             // GLP features
-            printf("<a data-toggle=\"collapse\" href=\"#collapseGLP\">Show GLP features</a>\n");
+            printf("<a data-toggle=\"collapse\" href=\"#collapseGLP\">Show features used for transcript classification</a>\n");
             printf("<div id=\"collapseGLP\" class=\"panel-collapse collapse\">\n");
             printf("<ul>\n");
-            printf("<li>Percent intact ignoring missing seq: %s</li>\n", info->perc_intact_ign_M);
-            printf("<li>Percent intact (miss == intact): %s</li>\n", info->perc_intact_int_M);
-            printf("<li>Intact codon proportion %s</li>\n", info->intact_codon_prop);
-            printf("<li>Out of chain proportion: %s</li>\n", info->ouf_prop);
+            printf("<li>Percent intact, ignoring missing sequence: %s</li>\n", info->perc_intact_ign_M);
+            printf("<li>Percent intact, treating missing as intact sequence: %s</li>\n", info->perc_intact_int_M);
+            printf("<li>Proportion of intact codons: %s</li>\n", info->intact_codon_prop);
+            printf("<li>Percent of CDS not covered by this chain (0 unless the chain covers only a part of the gene): %s</li>\n", info->ouf_prop);
             if (sameWord(info->mid_intact, ONE_))
             {
-                printf("<li>Middle 80 percent intact: %s</li>\n", YES_);
+                printf("<li>Middle 80 percent of CDS intact: %s</li>\n", YES_);
             } else {
-                printf("<li>Middle 80 percent intact: %s</li>\n", NO_);
+                printf("<li>Middle 80 percent of CDS intact: %s</li>\n", NO_);
             }
             if (sameWord(info->mid_pres, ONE_))
             {
-                printf("<li>Middle 80 percent present: %s</li>\n", YES_);
+                printf("<li>Middle 80 percent of CDS present: %s</li>\n", YES_);
             } else {
-                printf("<li>Middle 80 percent present: %s</li>\n", NO_);
+                printf("<li>Middle 80 percent of CDS present: %s</li>\n", NO_);
             }
             printf("</ul>\n</div>\n<BR>\n");
+            printf("<HR ALIGN=\"CENTER\"><h4>Query protein sequence</h4><BR>");
+
+            printf("<a data-toggle=\"collapse\" href=\"#collapseProt\">Show protein sequence of query</a>\n");
+            printf("<div id=\"collapseProt\" class=\"panel-collapse collapse\">\n");
+            printf("<TT>{protein seq of the query without dashes or other things. Should end with *}\n");
+            printf("<BR>\n</TT>\n</div>\n");
 
             // and show protein sequence
             htmlHorizontalLine();
-            printf("<h4>Protein sequence</h4><BR>\n");
-            printf("<a data-toggle=\"collapse\" href=\"#collapseProt\">Show protein alignment</a>\n");
-            printf("<div id=\"collapseProt\" class=\"panel-collapse collapse\">\n");
+            printf("<h4>Protein sequence alignment</h4><BR>\n");
+            printf("<a data-toggle=\"collapse\" href=\"#collapseProtAli\">Show alignment between reference and query</a>\n");
+            printf("<div id=\"collapseProtAli\" class=\"panel-collapse collapse\">\n");
             printf("<TT>%s</TT><BR>\n", info->prot_alignment);
             printf("</div>\n<BR><BR>\n");
 
@@ -273,7 +598,7 @@ void doHillerLabTOGAGene(struct trackDb *tdb, char *item, char *table_name)
     }
 
     // show inactivating mutations if required
-    printf("<h4>Inactivating mutations</h4><BR>\n");
+    printf("<h4>List of inactivating mutations</h4><BR>\n");
 
     if (hTableExists(database, togaInactMutTableName))
     {
@@ -285,7 +610,7 @@ void doHillerLabTOGAGene(struct trackDb *tdb, char *item, char *table_name)
         printf("<a data-toggle=\"collapse\" href=\"#collapseMuts\">Show inactivating mutations</a>\n");
         printf("<div id=\"collapseMuts\" class=\"panel-collapse collapse\">\n");
         printf("<table border = \"1\" width = \"640\">\n");  // init table
-        printf("<tr><th>exon</th><th>pos</th><th>m_class</th><th>mut</th><th>is_inact</th><th>mut_id</th>\n");
+        printf("<tr><th>Exon number</th><th>Codon number</th><th>Mutation class</th><th>Mutation</th><th>Treated as inactivating</th><th>Mutation ID</th>\n");
         printf("</tr>\n");
         while ((row = sqlNextRow(sr)) != NULL)
         {
@@ -314,7 +639,7 @@ void doHillerLabTOGAGene(struct trackDb *tdb, char *item, char *table_name)
 
     // show exons data
     htmlHorizontalLine();
-    printf("<h4>Exons data</h4><BR>\n");
+    printf("<h4>Exon alignments</h4><BR>\n");
 
     if (hTableExists(database, togaNuclTableName))
     {
@@ -323,8 +648,6 @@ void doHillerLabTOGAGene(struct trackDb *tdb, char *item, char *table_name)
         char **row;
         printf("<a data-toggle=\"collapse\" href=\"#collapseExons\">Show exon sequences and features</a>\n");
         printf("<div id=\"collapseExons\" class=\"panel-collapse collapse\">\n");
-        // TODO: make sure this clause is not necessary
-        // order by exon_num
         sqlSafef(query, sizeof(query), "select * from %s where transcript='%s'", togaNuclTableName, item);
         sr = sqlGetResult(conn, query);
 
@@ -342,13 +665,13 @@ void doHillerLabTOGAGene(struct trackDb *tdb, char *item, char *table_name)
             }
             printf("<B>Exon alignment class:</B> %s<BR>\n", info->ali_class);
             if (sameWord(info->in_exp_region, ONE_)){
-                printf("<B>Detected within expected region:</B> %s<BR>\n", YES_);
+                printf("<B>Detected within expected region (%s):</B> %s<BR>\n", info->exp_region, YES_);
             } else {
-                printf("<B>Detected within expected region:</B> %s<BR>\n", NO_);
+                printf("<B>Detected within expected region (%s):</B> %s<BR>\n", info->exp_region, NO_);
             }
-            printf("<B>Expected region:</B> %s<BR>\n", info->exp_region);
+            // printf("<B>Expected region:</B> %s<BR>\n", info->exp_region);
             printf("<BR>\n");
-            printf("<B>Sequence alignment:</B><BR>\n");
+            printf("<B>Sequence alignment between reference and query exon:</B><BR>\n");
             printf("%s<BR>\n", info->alignment);
             togaNuclFree(&info);
         }
diff --git a/ucsc_browser_visualisation/togaClick.h b/ucsc_browser_visualisation/togaClick.h
index 00cecee..c94e80b 100644
--- a/ucsc_browser_visualisation/togaClick.h
+++ b/ucsc_browser_visualisation/togaClick.h
@@ -15,6 +15,35 @@
 #define HLTOGA_MAXCHAR 255
 
 
+struct togaDataBB
+{
+    char *projection;
+    char *ref_trans_id;
+    char *ref_region;
+    char *query_region;
+    char *chain_score;
+    char *chain_synteny;
+    char *chain_flank;
+    char *chain_gl_cds_fract;
+    char *chain_loc_cds_fract;
+    char *chain_exon_cov;
+    char *chain_intron_cov;
+    char *status;
+    char *perc_intact_ign_M;
+    char *perc_intact_int_M;
+    char *intact_codon_prop;
+    char *ouf_prop;
+    char *mid_intact;
+    char *mid_pres;
+    char *prot_alignment;
+    char *svg_line;
+    char *ref_link;
+    char *inact_mut_html_table;
+    char *exon_ali_html;
+};
+
+
+
 struct togaData
 {
     char *projection;
@@ -67,10 +96,19 @@ struct togaInactMut
 };
 
 
+struct togaDataBB *togaDataBBLoad(char **row);
+/* Load a togaData from row fetched with select * from togaData
+ * from database.  Dispose of this with togaDataFree(). */
+
+
 struct togaData *togaDataLoad(char **row);
 /* Load a togaData from row fetched with select * from togaData
  * from database.  Dispose of this with togaDataFree(). */
 
+void togaDataBBFree(struct togaDataBB **pEl);
+/* Free a single dynamically allocated togaDatasuch as created
+ * with togaDataLoad(). */
+
 void togaDataFree(struct togaData **pEl);
 /* Free a single dynamically allocated togaDatasuch as created
  * with togaDataLoad(). */
@@ -95,7 +133,8 @@ void extractHLTOGAsuffix(char *suffix);
 /* Extract suffix from TOGA table name.
 Prefix must be HLTOGAannot */
 
-void doHillerLabTOGAGene(struct trackDb *tdb, char *item, char *table_name);
+// void doHillerLabTOGAGene(struct trackDb *tdb, char *item, char *table_name);  // old
+void doHillerLabTOGAGene(char *database, struct trackDb *tdb, char *item, char *table_name);
 /* Put up TOGA Gene track info. */
 
 #endif  // TOGACLICK_H
diff --git a/ucsc_browser_visualisation/togaData.sql b/ucsc_browser_visualisation/togaData.sql
deleted file mode 100644
index ab4415e..0000000
--- a/ucsc_browser_visualisation/togaData.sql
+++ /dev/null
@@ -1,23 +0,0 @@
-CREATE TABLE TOGAData (
-	transcript varchar(50) not null,  -- unique projection ID: ${ref_transcript_ID}.${chain_ID}
-    ref_transcript_ID varchar(50) not null,  -- transcript ID in reference
-    ref_region varchar(100) not null,  -- transcript region in reference
-    query_region varchar(100) not null,  -- projection region in query
-    chain_score float not null,  -- chain orthology score
-    chain_synteny int unsigned not null,  -- chain synteny
-    chain_flank float not null,  -- flank coverage
-    chain_gl_cds_fract float not null,  -- global CDS fraction
-    chain_loc_cds_fract float not null,  -- local CDS fraction
-    chain_exon_cov float not null,  -- local CDS coverage
-    chain_intron_cov float not null,  -- local intron coverage
-    status varchar(24) not null,  -- projection GLP status: loss, intact, etc
-    perc_intact_ign_M float not null,  -- %intact ignoring Missing
-    perc_intact_int_M float not null,  -- %intact considering missing seq intact
-    intact_codon_prop float not null,  -- % of intact codons
-    ouf_prop float not null,  -- out of chain proportion
-    mid_intact tinyint unsigned not null,  -- middle 80% intact? 1 - True 0 - False, else - undefined
-    mid_pres tinyint unsigned not null,  -- middle 80% present? 1 - True 0 - False, else - undefined
-    prot_sequence longblob not null,  -- protein sequence
-    svg_plot longblob not null,  -- svg string
-    PRIMARY KEY(transcript)
-);
diff --git a/ucsc_browser_visualisation/togaInactMut.sql b/ucsc_browser_visualisation/togaInactMut.sql
deleted file mode 100644
index eb53d35..0000000
--- a/ucsc_browser_visualisation/togaInactMut.sql
+++ /dev/null
@@ -1,9 +0,0 @@
-CREATE TABLE TOGAInactMut (
-	transcript varchar(50) not null,  -- unique projection ID: ${ref_transcript_ID}.${chain_ID}
-    exon_num int unsigned not null,  -- exon number
-    position int unsigned not null,  -- possition where mutation happened
-    mut_class varchar(15) not null,  -- mutation class such as FS deletion
-    mutation varchar(20) not null,  -- what exactly happened
-    is_inact tinyint unsigned not null,  -- is this mutation inactivating, yes 1 or not 0
-    mut_id varchar(10) not null  -- mut identifier
-);
diff --git a/ucsc_browser_visualisation/togaNucl.sql b/ucsc_browser_visualisation/togaNucl.sql
deleted file mode 100644
index 9d2e189..0000000
--- a/ucsc_browser_visualisation/togaNucl.sql
+++ /dev/null
@@ -1,12 +0,0 @@
-CREATE TABLE TOGANucl (
-	transcript varchar(50) not null,  -- unique projection ID: ${ref_transcript_ID}.${chain_ID}
-    exon_num int unsigned not null,  -- exon number
-    exon_region varchar(100) not null,  -- region where exon was detected
-    pid float not null,  -- nucleotide %id
-    blosum float not null,  -- normalized blosum score
-    gaps tinyint unsigned not null,  -- are there any asm gaps near? 1 - yes 0 - no
-    ali_class varchar(4) not null,  -- alignemnt class: A, B, C, A+
-    exp_region varchar(50) not null,  -- where exon was expected
-    in_exp_region tinyint unsigned not null,  -- detected in expected region or not 1 yes 0 no
-    alignment longblob not null  -- exon sequence in query
-);