see subfolder 'ngs_pipeline'
conda env create --file annot_env.yml
conda activate annot_env
expression matrix + sample info from pre-processing NOTE: adjust in MIRSORT_ANNOTATION_DF.json accordingly
python assemble_anndata.py
Use fasta file of sequences after pre-processing for sequence annotation Allow 1 missmatch in annotation pipelines (unitas and sports)
NOTE: snoDB tsv file downloaded from https://bioinfo-scottgroup.med.usherbrooke.ca/snoDB/
python snoDB2fa.py
Run unitas (https://www.smallrnagroup.uni-mainz.de/software.html)
perl unitas_1.7.7.pl -i features_detected_sequences__publication.fa -species homo_sapiens -species_miR_only -tail 2 -intmod 1 -mismatch 1 -insdel 0 -refseq snoDB.fa -dump_prefix unitas_annotation/UNITAS
Run sports (https://github.com/junchaoshi/sports1.1)
NOTE: get sports pre-compiled 'Homo_sapiens' annotation database from https://ncrnainfo-my.sharepoint.com/personal/sports_ncrna_info/_layouts/15/guestaccess.aspx?docid=0773ed3d5f6b74f35bbd643e1af221c31&authkey=AcRxf8walnGUIEhgI--8CDc
perl sports.pl -i features_detected_sequences__publication.fa -p 4 -k -M 1 -g Homo_sapiens/genome/hg38/genome -m Homo_sapiens/miRBase/21/miRBase_21-hsa -r Homo_sapiens/rRNAdb/human_rRNA -t Homo_sapiens/GtRNAdb/hg19/hg19-tRNAs -w Homo_sapiens/piRBase/piR_human -e Homo_sapiens/Ensembl/release-89/Homo_sapiens.GRCh38.ncrna -f Homo_sapiens/Rfam/12.3/Rfam-12.3-human -o sports_annotation/
cd unitas_annotation/UNITAS_dd-mm-yyyy_features_detected_sequences__publication.fa_#1
awk 'NF>=3' unitas.full_annotation_matrix.txt | awk '$3 !~ "low_complexity" {print $0}' > unitas.full_annotation_matrix_justannoseqs.txt
cd sports_annotation/1_features_detected_sequences__publication/features_detected_sequences__publication_result
awk '$6 !~ "NO_Annotation" {print $0}' features_detected_sequences__publication_output.txt > features_detected_sequences__publication_output_justannoseqs.txt
python merge_sRNAclass_annotations.py
python rRNA_position_classification.py
python generate_sRNA_sub_class_annotation_df.py
python create_seq_annotation_df.py
python ad_reduce_features.py
python ad_aggregate.py
python create_aggregated_annotation_df.py
python ad_reduce_features_further.py
python ad2csv.py
python compare_2_Juzenas.py