Skip to content

Commit

Permalink
Added macaca mulatta templates
Browse files Browse the repository at this point in the history
Signed-off-by: Douwe Schulte <d.schulte@uu.nl>
  • Loading branch information
douweschulte committed Oct 20, 2023
1 parent 56d388e commit 1bf7879
Show file tree
Hide file tree
Showing 9 changed files with 940 additions and 10 deletions.
23 changes: 13 additions & 10 deletions stitch/Program.cs
Original file line number Diff line number Diff line change
Expand Up @@ -28,9 +28,10 @@ static int Main() {
new Argument<int>("live", new Option<string>("l"), new Option<int>(-1), "Prepare the HTML report for use with VS Code Live Server on the given port, -1 turns it off"),
new Argument<bool>("quiet", new Option<string>("q"), new Option<bool>(false), "Turns off any output on the command line in normal operation"),
}),
new Subcommand("download", "Download annotated templates from IMGT (cleans and annotates itself)", new List<IArgument>{
new Subcommand("download", "Download annotated templates from IMGT (cleans and annotates itself). NOTE: this is highly unstable and mainly intended for the maintainers of this program", new List<IArgument>{
new Argument<string>("species", new Option<string>(), new Option<string>(), "The latin name of the animal to download the data for (as used by IMGT: http://www.imgt.org/IMGTrepertoire/Proteins/)"),
new Argument<string>("segments", new Option<string>(), new Option<string>("IGHV IGKV,IGLV IGHJ IGKJ,IGLJ IGHC IGKC,IGLC"), "The segments to download, multiple segments combined with a , will be combined in a single file")
new Argument<string>("segments", new Option<string>(), new Option<string>("IGHV IGKV,IGLV IGHJ IGKJ,IGLJ IGHC IGKC,IGLC"), "The segments to download, multiple segments combined with a , will be combined in a single file"),
new Argument<bool>("debug", new Option<string>("d"), new Option<bool>(false), "Show debug info"),
}),
new Subcommand("refine", "Use Xle disambiguation to refine raw data (can also be done in an actual run)", new List<IArgument>{
new Argument<string>("input", new Option<string>(), new Option<string>(), "The peaks file to open and refine"),
Expand Down Expand Up @@ -72,7 +73,7 @@ static int Main() {
GenerateAnnotatedTemplate(content, string.IsNullOrEmpty(ou) ? inp : ou);
} else if (args.ContainsKey("download")) {
var sub_args = (Dictionary<string, (Type, object)>)args["download"].Item2;
DownloadSpecies((string)sub_args["species"].Item2, (string)sub_args["segments"].Item2);
DownloadSpecies((string)sub_args["species"].Item2, (string)sub_args["segments"].Item2, (bool)sub_args["debug"].Item2);
} else if (args.ContainsKey("refine")) {
var sub_args = (Dictionary<string, (Type, object)>)args["refine"].Item2;
RefineRawData((string)sub_args["input"].Item2, (string)sub_args["raw-data-dir"].Item2, (string)sub_args["output"].Item2, (string)sub_args["peaks-version"].Item2);
Expand Down Expand Up @@ -219,7 +220,7 @@ private static void SaveAndCleanFasta(string output, List<ReadFormat.General> re
("platypus", "platypus", "Pl", "Ornithorhynchus anatinus"),
("rabbit", "rabbit", "Rb", "Oryctolagus cuniculus"),
("rainbow trout", "", "", "Oncorhynchus mykiss"),
("Rhesus monkey", "", "", "Macaca mulatta"),
("Rhesus monkey", "Rhesus monkey", "", "Macaca mulatta"),
("Ring-tailed lemur", "", "", "Lemur catta"),
("river trout", "", "", "Salmo trutta"),
("sandbar shark", "", "", "Carcharhinus plumbeus"),
Expand All @@ -236,16 +237,16 @@ private static void SaveAndCleanFasta(string output, List<ReadFormat.General> re
("zebrafish", "", "", "Danio rerio"),
};

static void DownloadSpecies(string name, string segments = "IGHV IGKV,IGLV IGHJ IGKJ,IGLJ IGHC IGKC,IGLC") {
static void DownloadSpecies(string name, string segments = "IGHV IGKV,IGLV IGHJ IGKJ,IGLJ IGHC IGKC,IGLC", bool debug = false) {
foreach (var single in name.Split(',')) {
DownloadSingleSpecies(single, segments);
DownloadSingleSpecies(single, segments, debug);
}
}

/// <summary> Download a set of templates for a mammalian organism assuming the same structure as Homo sapiens. </summary>
/// <param name="name"> The name of the species, it will be matched to all predefined names. It can
/// be scientific name, common name, shorthand or short name (matched in that order). </param>
static void DownloadSingleSpecies(string name, string segments = "IGHV IGKV,IGLV IGHJ IGKJ,IGLJ IGHC IGKC,IGLC") {
static void DownloadSingleSpecies(string name, string segments = "IGHV IGKV,IGLV IGHJ IGKJ,IGLJ IGHC IGKC,IGLC", bool debug = false) {
(string CommonName, string ShortName, string ShortHand, string ScientificName) species = ("", "", "", "");
var found = false;
name = name.ToLower().Trim();
Expand All @@ -263,8 +264,8 @@ static void DownloadSingleSpecies(string name, string segments = "IGHV IGKV,IGLV
Console.WriteLine("Could not find given species");
return;
}
//var basename = $"https://www.imgt.org/IMGTrepertoire/Proteins/proteinDisplays.php?species={new CultureInfo("en-UK", false).TextInfo.ToTitleCase(species.ShortName).Replace(" ", "%20")}&latin={species.ScientificName.Replace(" ", "%20")}&group=";
var basename = $"http://www.imgt.org/3Dstructure-DB/cgi/DomainDisplay-include.cgi?species={species.ScientificName.Replace(" ", "%20")}&groups=";
//var basename = $"https://www.imgt.org/IMGTrepertoire/Proteins/proteinDisplays.php?species={species.ShortName.Replace(" ", "%20")}&latin={species.ScientificName.Replace(" ", "%20")}&group=";
var basename = $"https://www.imgt.org/3Dstructure-DB/cgi/DomainDisplay-include.cgi?species={species.ScientificName.Replace(" ", "%20")}&groups=";
HttpClient client = new();
Console.WriteLine(species.ScientificName);
foreach (var segment in segments.Split(' ')) {
Expand Down Expand Up @@ -295,11 +296,13 @@ static void DownloadSingleSpecies(string name, string segments = "IGHV IGKV,IGLV
}
} else {
var download = client.GetStringAsync(basename + segment);
if (debug) Console.WriteLine(basename + segment);
download.Wait();
GenerateAnnotatedTemplate(download.Result, species.ScientificName.Replace(' ', '_') + "_" + segment + ".fasta");
}
} catch {
} catch (Exception e) {
Console.WriteLine(" Not available");
if (debug) Console.WriteLine(e);
}
}
File.Delete("temp.html");
Expand Down
17 changes: 17 additions & 0 deletions templates/Macaca_mulatta_Heavy_Chain.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
Macaca mulatta Heavy Chain->
Segment->
Path : Macaca_mulatta_IGHV.fasta
Name : IGHV
Identifier: ^(([a-zA-Z]+\d*)[\w-]*)
<-
Segment->
Path : Macaca_mulatta_IGHJ.fasta
Name : IGHJ
Identifier: ^(([a-zA-Z]+\d*)[\w-]*)
<-
Segment->
Path : Macaca_mulatta_IGHC.fasta
Name : IGHC
Identifier: ^(([a-zA-Z]+\d*)[\w-]*)
<-
<-
40 changes: 40 additions & 0 deletions templates/Macaca_mulatta_IGHC.fasta
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
>IGHA
PTKPKVFPLSLEGTQSDNVVVACLVQGFFPQKPLNVTWSKSGAGVTVINFPPRQDASGGLYTTSSQLTLPAAQCPASESVTCHVEHYTNPRQDVAVPCR
CDKPRLSLRRPALEDLLLGSEANLTCTLTGLRDPSGATFTWTPSSGKNAVQQSPEHDPCGCYSVSSVLPGCAEPWNNRVTFTCTAEHPELETQLTATISKS
GNTFRPEVHLLPPPSEELALNELVTLTCLARGFSPEDVLVRWLKGTEQLPRDKYLTWESRKEPSQGTTTFAVTSILRVAAEDWKKGDTFSCMVGHEALPLAFTQKTIDRLA
VPPPKCPSLK
>IGHD
DVFPIISACQLPKDNSPVVLACLITGYNPKSVTVTWHLGTQIQNQIMFPETEREGSYTTSSQLAPTPPLSQQHQGEYKCTVKHTPSDTSKEKTFRWP
ECPSHTQPLGVYLLPPALQDLWFQDKVTFTCFVVGSDLQDAHLSWEVAGKVPKGGMEEGPLEQHSNGSQSQHSRLALPRSLWNAGTSVTCTLNHPSLPSQKLMALREP
AAQAPVRLSLNLLASSDPPEAASWLLCEVSDFSPPNILLMWLENQREVNTSWFATTHPTPQPGSTMFWAWSVLRVPGPTSPQPATYTCVVSHEDSRTLLNASRSLEVS
ESPKAQYPSVPTVQPQAEGGLSKATRPPATNRNTGRREKEDEEEKEQQEGETKTP
>IGHE
ASIQSPFVFPLIPCCKHIASNATSVTLGCLATGYFPEPVMVTWDAGSLNRSTMTLPATTFTPSGHYATISLLTVSGAWAKETFTCHVVHTPSSADKEVNKTFG
VCSRNFTPPTVKILQSSCDDDGHFPPTIQLLCLISGYTPGAINVTWLENGQVMKVNSPTPPATQEGELASTQSEFTLAQKHWLSDRTYTCQVTYQGTTYNDSTKKCA
DSNPRGVSAYLSRPSPFDLFISKSPTITCLVVDLAPSKETVNLTWSRASGKPVPHIPATEKKQQRNGTLTVTSILPVVTQDWIEGETYQCRVTHPHLPRALVRSMTKTS
GPRAAPEVYVFATPEKLESRDKRTLACLIQNFMPEDISVQWLHSDVQLPDARHSVTQPRKTKGSGFFVFSRLEVTKAEWEQKDEFICRAVHEAASPSWIVQQAVSVNP
>IGHG1
ASTKGPSVFPLAPSSRSTSESTAALGCLVKDYFPEPVTVSWNSGSLTSGVHTFPAVLQSSGLYSLSSVVTVPSSSLGTQTYVCNVNHKPSNTKVDKRV
APELLGGPSVFLFPPKPKDTLMISRTPEVTCVVVDVSQEDPDVKFNWYVNGAEVHHAQTKPRETQYNSTYRVVSVLTVTHQDWLNGKEYTCKVSNKALPAPIQKTISKDK
GQPREPQVYTLPPSREELTKNQVSLTCLVKGFYPSDIVVEWESSGQPENTYKTTPPVLDSDGSYFLYSKLTVDKSRWQQGNVFSCSVMHEALHNHYTQKSLSVSP
EIKTCGGGSKPPTCPPCP
>IGHG2
SVFPLASCSRSTSQSTAALGCLVKDYFPEPVTVSWNSGALTSGVHTFQAVLQSSGLYSLSSVVTVPSSSLGTQTYVCNVVHEPSNTKVDKTV
AELLGGPSVFLFPPKPKDTLMISRTPEVTCVVVDVSQEEPDVKFNWYVDGVEVHNAQTKPREEQFNSTYRVVSVLTVTHQDWLNGKEYTCKVSNKALPAPRQKTVSKTK
GQPREPQVYTLPPPREELTKNQVSLTCLVKGFYPSDIVVEWASNGQPENTYKTTPPVLDSDGSYFLYSKLTVDKSRWQQGNTFSCSVMHEA
GLPCRSTCPPCP
>IGHG3
SVFPLASCSRSTSQSTAALGCLVKDYFPEPVTVSWNSGALTSGVHTFPAVLQSSGLYSLSSVVTVPSSSLGTQTYVCNVVHEPSNTKVDKRV
APELLGGPSVFLFPPKPKDTLMISRTPEVTCVVVDVSQEDPEVQFNWYVDGAEVHHAQTKPRERQFNSTYRVVSVLTVTHQDWLNGKEYTCKVSNKGLPAPIEKTISKAK
GQPREPQVYILPPPQEELTKNQVSLTCLVTGFYPSDIAVEWESNGQPENTYKTTPPVLDSDGSYFLYSKLTVDKSRWQQGNTFSCSVMHEA
EFTPPCGDTTPPCPPCP
>IGHG4
SVFPLASSSRSTSESTAALGCLVKDYFPEPVTVSWNSGALTSGVHTFPAVLQSSGLYSLSSVVTVPSSSLGTQTYVCNVVHEPSNTKVDKRV
APELLGGPSVFLFPPKPKDTLMISRTPEVTCVVVDVSQEDPEVQFNWYVDGVEVHNAQTKPRERQFNSTYRVVSVLTVTHQDWLNGKEYTCKVSNKGLPAPIEKTISKAK
GQPREPQVYILPPPQEELTKNQVSLTCLVTGFYPSDIAVEWESNGQPENTYKTTPPVLDSDGSYLLYSKLTVNKSRWQPGNIFTCSVMHEA
EFTPPCPPCP
>IGHM
GSASAPTLFPLVSCENAPLDTNEVAVGCLAQDFLPDSITFSWKFKNNSNISKGVWGFPSVLRGGKYAATSQVLLASKDVMQGTDEHVVCKVQHPNGNKEQNVPLP
VLAERPPNVSVFVPPRDGFVGNPRESKLICQATGFSPRQIEVSWLREGKQVGSGITTDRVEAEAKESGPTTFKVTSTLTVSERDWLSQSVFTCRVDHRGLTFQKNVSSVCGP
NPDTAIRVFAIPPSFASIFLTKSTKLTCLVTDLATYDSVTITWTRQNGEALKTHTNISESHPNGTFSAVGEASICEDDWNSGERFRCTVTHTDLPSPLKQTISRPK
GVAMHRPDVYLLPPAREQLNLRESATITCLVTGFSPADIFVQWMQRGQPLSPEKYVTSAPMPEPQAPGRYFAHSILTVSEEDWNTGETYTCVVAHEALPNRVTERTVDKST
14 changes: 14 additions & 0 deletions templates/Macaca_mulatta_IGHJ.fasta
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
>IGHJ1
AEYFEFWGQGALVTVSS
>IGHJ2
YWYFDLWGPGTPITISS
>IGHJ3
DAFDFWGQGLRVTVSS
>IGHJ4
YFDYWGQGVLVTVSS
>IGHJ5-1
NRFDVWGPGVLVTVSS
>IGHJ5-2
NSLDVWGQGVLVTVSS
>IGHJ6
YYGLDSWGQGVVVTVSS
Loading

0 comments on commit 1bf7879

Please sign in to comment.