Skip to content

Commit

Permalink
more info from illumina, relaxing patterns, better tests
Browse files Browse the repository at this point in the history
  • Loading branch information
nickp60 committed Mar 1, 2024
1 parent 4de1e48 commit 4d11c47
Show file tree
Hide file tree
Showing 2 changed files with 151 additions and 48 deletions.
149 changes: 102 additions & 47 deletions fcid/run.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,56 +3,111 @@


InstrumentIDs = [
["HWI-M[0-9]{4}$", [["MiSeq"]]],
["HWI-M[0-9]{4}", [["MiSeq"]]],
["HWUSI", [["Genome Analyzer IIx"]]],
["M[0-9]{5}$", [["MiSeq"]]],
["HWI-C[0-9]{5}$", [["HiSeq 1500"]]],
["C[0-9]{5}$", [["HiSeq 1500"]]],
["HWI-D[0-9]{5}$", [["HiSeq 2500"]]],
["D[0-9]{5}$", [["HiSeq 2500"]]],
["J[0-9]{5}$", [["HiSeq 3000"]]],
["K[0-9]{5}$", [["HiSeq 3000", "HiSeq 4000"]]],
["E[0-9]{5}$", [["HiSeq X"]]],
["NB[0-9]{6}$", [["NextSeq"]]],
["NS[0-9]{6}$", [["NextSeq"]]],
["MN[0-9]{5}$", [["MiniSeq"]]],
["A[0-9]{5}$", [["NovaSeq"]]],
["NA[0-9]{5}$", [["NovaSeq"]]],
["SN[0-9]{3}$", [["HiSeq2000", "HiSeq2500"]]],
["SN[0-9]{3}$", [["HiSeq2000", "HiSeq2500"]]],
["M[0-9]{5}", [["MiSeq"]]],
["HWI-C[0-9]{5}", [["HiSeq 1500"]]],
["C[0-9]{5}", [["HiSeq 1500"]]],
["HWI-D[0-9]{5}", [["HiSeq 2500"]]],
["D[0-9]{5}", [["HiSeq 2500"]]],
["J[0-9]{5}", [["HiSeq 3000"]]],
["K[0-9]{5}", [["HiSeq 3000", "HiSeq 4000"]]],
["E[0-9]{5}", [["HiSeq X"]]],
["NB[0-9]{6}", [["NextSeq"]]],
["NS[0-9]{6}", [["NextSeq"]]],
["MN[0-9]{5}", [["MiniSeq"]]],
["A[0-9]{5}", [["NovaSeq"]]],
["NA[0-9]{5}", [["NovaSeq"]]],
["SN[0-9]{3}", [["HiSeq2000", "HiSeq2500"]]],
["SN[0-9]{3}", [["HiSeq2000", "HiSeq2500"]]],
[".*", [["Unknown"]]]
]
# Below ar ethe relevant texts from the 3 emails with tech support
# ------------------------------------
# xxxxxBCxx: HiSeq 2500 rapid v2
# xxxxxACxx: HiSeq 2500 TruSeq v3
# xxxxxANxx: HiSeq 2500 High Output v4
# xxxxxBBxx: HiSEQ 3000/4000
# xxxxxALxx; xxxxxCCxx: HiSeqX
#
# xxxxxDRxx: NovaSeq SP,S1
# xxxxxDMxx: NovaSeq S2
# xxxxxDSxx: NovaSeq S4
#
# xxxxxAFxx: NextSeq 500/550 Mid Output
# xxxxxBGxx; xxxxxAGxx: NextSeq 500/550 High Output
#--------------------------------------
# I believe the format for 10B NovaSeq X cells will be "XXXXXXLT3". The 1.5B and 25B cells will likely use a different format
#---------------------------------------
# iSeq 100 BNTxxxxx-xxxx (BRB/BPC/BPG/BPA/BPL/BNT/BTR)
# MiniSeq Mid Output 000Hxxxxx
# MiniSeq High Output 000Hxxxxx
# MiSeq Nano Dxxxx
# MiSeq Micro Gxxxx
# MiSeq Standard Bxxxx; Cxxxx; Jxxxx; Kxxxx; Lxxxx
# NextSeq 500/550 Mid Output xxxxxAFxx
# NextSeq 500/550 High Output xxxxxBGxx; xxxxxAGxx
# NextSeq 1000/2000 P1 xxxxxxxM5
# NextSeq 1000/2000 P2 xxxxxxxM5
# NextSeq 2000 P3 xxxxxxxHV
# HiSeq 2500 Rapid v2 xxxxxBCxx
# HiSeq 2500 TruSeq v3 xxxxxACxx
# HiSeq 2500 High Output v4 xxxxxANxx
# HiSeq 3000/4000 xxxxxBBxx
# HiSeq X xxxxxALxx; xxxxxCCxx
# NovaSeq 6000 SP and S1 xxxxxDRxx
# NovaSeq 6000 S2 xxxxxDMxx
# NovaSeq 6000 S4 xxxxxDSxx
# NovaSeq X/X Plus 10B xxxxxxLTx


# Three emails: 12/7/2022, 9/19/2023, and 2/29/2024
FCIDs = [
["^C[A-Z,0-9]{4}ANXX$", [["HiSeq 1500", "HiSeq 2000", "HiSeq 2500"], "High Output (8-lane) v4 flow cell"]],
["^C[A-Z,0-9]{4}AC[A-Z,0-9]{2}$", [["HiSeq 2500"], "High Output (8-lane) v4 flow cell"]],
["^C[A-Z,0-9]{4}ACXX$", [["HiSeq 1000", "HiSeq 1500", "HiSeq 2000", "HiSeq 2500"], "High Output (8-lane) v3 flow cell"]],
["^C[A-Z,0-9]{4}AC[A-Z,0-9]{2}$", [["HiSeq 2500"], "TrueSeq v3 flow "]],
["^H[A-Z,0-9]{4}ADXX$", [["HiSeq 1500", "HiSeq 2500"], "Rapid Run (2-lane) v1 flow cell"]],
["^H[A-Z,0-9]{4}ADXY$", [["HiSeq 1500", "HiSeq 2500"], "Rapid Run (2-lane) v1 flow cell"]],
["^H[A-Z,0-9]{4}BCXX$", [["HiSeq 1500", "HiSeq 2500"], "Rapid Run (2-lane) v2 flow cell"]],
["^H[A-Z,0-9]{4}BCXY$", [["HiSeq 1500", "HiSeq 2500"], "Rapid Run (2-lane) v2 flow cell"]],
["^H[A-Z,0-9]{4}BC[A-Z,0-9]{2}$", [["HiSeq 2500"], "Rapid Run (2-lane) v2 flow cell"]],
["^H[A-Z,0-9]{4}BBXX$", [["HiSeq 4000"], "(8-lane) v1 flow cell"]],
["^H[A-Z,0-9]{4}BBXY$", [["HiSeq 4000"], "(8-lane) v1 flow cell"]],
["^H[A-Z,0-9]{4}BBXY[A-Z,0-9]{2}$", [["HiSeq 4000"], "(8-lane) v1 flow cell"]],
["^H[A-Z,0-9]{4}CCXX$", [["HiSeq X"], "(8-lane) flow cell"]],
["^H[A-Z,0-9]{4}CCXY$", [["HiSeq X"], "(8-lane) flow cell"]],
["^H[A-Z,0-9]{4}AL[A-Z,0-9]{2}$", [["HiSeq X"], "(8-lane) flow cell"]],
["^H[A-Z,0-9]{4}BGXX$", [["NextSeq"], "High output flow cell"]],
["^H[A-Z,0-9]{4}BGXY$", [["NextSeq"], "High output flow cell"]],
["^H[A-Z,0-9]{4}BG[A-Z,0-9]{2}$", [["NextSeq"], "High output flow cell"]],
["^H[A-Z,0-9]{4}AF[A-Z,0-9]{2}$", [["NextSeq"], "Mid output flow cell"]],
["^A[A-Z,0-9]{4}$", [["MiSeq"], "MiSeq flow cell"]],
["^B[A-Z,0-9]{4}$", [["MiSeq"], "MiSeq flow cell"]],
["^D[A-Z,0-9]{4}$", [["MiSeq"], "MiSeq nano flow cell"]],
["^G[A-Z,0-9]{4}$", [["MiSeq"], "MiSeq micro flow cell"]],
["^H[A-Z,0-9]{4}DM[A-Z,0-9]{2}$", [["NovaSeq"], "S2 flow cell"]],
["^H[A-Z,0-9]{4}DR[A-Z,0-9]{2}$", [["NovaSeq"], "SP or S1 flow cell"]],
["^H[A-Z,0-9]{4}DS[A-Z,0-9]{2}$", [["NovaSeq"], "S4 flow cell"]],
["^[A-Z0-9]{6}LT3$", [["NovaSeq X"], "(10B)"]],
["^C[A-Z,0-9]{4}$", [["MiSeq"], "MiSeq flow cell"]],
["^J[A-Z,0-9]{4}$", [["MiSeq"], "MiSeq flow cell"]],
["^K[A-Z,0-9]{4}$", [["MiSeq"], "MiSeq flow cell"]],
["[BNT][A-Z,0-9]{5}-[A-Z,0-9]{4}", [["iSeq 100"], "Standard Output flow cell"]],
["[BRB][A-Z,0-9]{5}-[A-Z,0-9]{4}", [["iSeq 100"], "Standard Output flow cell"]],
["[BPC][A-Z,0-9]{5}-[A-Z,0-9]{4}", [["iSeq 100"], "Standard Output flow cell"]],
["[BPG][A-Z,0-9]{5}-[A-Z,0-9]{4}", [["iSeq 100"], "Standard Output flow cell"]],
["[BPA][A-Z,0-9]{5}-[A-Z,0-9]{4}", [["iSeq 100"], "Standard Output flow cell"]],
["[BPL][A-Z,0-9]{5}-[A-Z,0-9]{4}", [["iSeq 100"], "Standard Output flow cell"]],
["[BNT][A-Z,0-9]{5}-[A-Z,0-9]{4}", [["iSeq 100"], "Standard Output flow cell"]],
["[BTR][A-Z,0-9]{5}-[A-Z,0-9]{4}", [["iSeq 100"], "Standard Output flow cell"]],
["000H[A-Z,0-9]{5}", [["MiniSeq"], "Mid or High Output flow cell"]],
["D[A-Z,0-9]{4}", [["MiSeq"], "MiSeq Nano flow cell"]],
["G[A-Z,0-9]{4}", [["MiSeq"], "MiSeq Micro flow cell"]],
["A[A-Z,0-9]{4}", [["MiSeq"], "MiSeq Standard v2 flow cell"]],
["B[A-Z,0-9]{4}", [["MiSeq"], "MiSeq Standard flow cell"]],
["C[A-Z,0-9]{4}", [["MiSeq"], "MiSeq Standard flow cell"]],
["J[A-Z,0-9]{4}", [["MiSeq"], "MiSeq Standard flow cell"]],
["K[A-Z,0-9]{4}", [["MiSeq"], "MiSeq Standard flow cell"]],
["L[A-Z,0-9]{4}", [["MiSeq"], "MiSeq Standard flow cell"]],
["[A-Z,0-9]{5}AF[A-Z,0-9]{2}", [["NextSeq 500", "NextSeq 550"], "Mid Output flow cell"]],
["[A-Z,0-9]{5}AG[A-Z,0-9]{2}", [["NextSeq 500", "NextSeq 550"], "High Output flow cell"]],
["[A-Z,0-9]{5}BG[A-Z,0-9]{2}", [["NextSeq 500", "NextSeq 550"], "High Output flow cell"]],
["[A-Z,0-9]{7}M5", [["NextSeq 1000", "NextSeq 2000"], "P1 or P2 flow cell"]],
["[A-Z,0-9]{7}HV", [["NextSeq 1000", "NextSeq 2000"], "P3 flow cell"]],

["H[A-Z,0-9]{4}BGXX", [["NextSeq"], "High output flow cell"]],
["H[A-Z,0-9]{4}BGXY", [["NextSeq"], "High output flow cell"]],

["[A-Z,0-9]{5}BC[A-Z,0-9]{2}", [["HiSeq 2500"], "Rapid Run (2-lane) v2 flow cell"]],
["[A-Z,0-9]{5}AC[A-Z,0-9]{2}", [["HiSeq 2500"], "TrueSeq v3 flow cell"]],
["[A-Z,0-9]{5}AN[A-Z,0-9]{2}", [["HiSeq 2500"], "High Output v3 flow cell"]],
["[A-Z,0-9]{5}BB[A-Z,0-9]{2}", [["HiSeq 3000", "HiSeq 4000"], "(8-lane) v1 flow cell"]],
["[A-Z,0-9]{5}AL[A-Z,0-9]{2}", [["HiSeq X"], "(8-lane) flow cell"]],
["[A-Z,0-9]{5}CC[A-Z,0-9]{2}", [["HiSeq X"], "(8-lane) flow cell"]],
["[A-Z,0-9]{5}DR[A-Z,0-9]{2}", [["NovaSeq 6000"], "SP or S1 flow cell"]],
["[A-Z,0-9]{5}DM[A-Z,0-9]{2}", [["NovaSeq 6000"], "S2 flow cell"]],
["[A-Z,0-9]{5}DS[A-Z,0-9]{2}", [["NovaSeq 6000"], "S4 flow cell"]],
["[A-Z0-9]{6}LT[A-Z,0-9]", [["NovaSeq X", "NovaSeq X Plus"], "10B flow cell"]],

# ["[A-Z,0-9]{5}ACXX", [["HiSeq 1000", "HiSeq 1500", "HiSeq 2000", "HiSeq 2500"], "High Output (8-lane) v3 flow cell"]],
# ["H[A-Z,0-9]{4}BCXY", [["HiSeq 1500", "HiSeq 2500"], "Rapid Run (2-lane) v2 flow cell"]],
# ["C[A-Z,0-9]{4}ANXX", [["HiSeq 1500", "HiSeq 2000", "HiSeq 2500"], "High Output (8-lane) v4 flow cell"]],
# ["C[A-Z,0-9]{4}AC[A-Z,0-9]{2}", [["HiSeq 2500"], "High Output (8-lane) v4 flow cell"]],
# ["H[A-Z,0-9]{4}ADXX", [["HiSeq 1500", "HiSeq 2500"], "Rapid Run (2-lane) v1 flow cell"]],
# ["H[A-Z,0-9]{4}ADXY", [["HiSeq 1500", "HiSeq 2500"], "Rapid Run (2-lane) v1 flow cell"]],
# ["H[A-Z,0-9]{4}BC[A-Z,0-9]{2}", [["HiSeq 2500"], "Rapid Run (2-lane) v2 flow cell"]],
# ["H[A-Z,0-9]{4}BBXY[A-Z,0-9]{2}", [["HiSeq 4000"], "(8-lane) v1 flow cell"]],
[".*", [["Unknown Machine"], "Unknown flowcell"]
]
]
Expand All @@ -61,7 +116,7 @@
def get_tech_type(flowcell, d):
flowcell = flowcell.replace("000000000-", "")
for pattern, value in d:
if re.search(pattern, flowcell):
if re.search("^" + pattern + "$", flowcell):
return value
return None

Expand Down
50 changes: 49 additions & 1 deletion fcid/test_fcid.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,55 @@ def test_by_machine(capsys):
assert outerr.out == "HiSeq 3000,HiSeq 4000\n"

def test_by_flowcell():
assert get_tech_type("22C37GLT3", FCIDs)[0] == ["NovaSeq X"]
print(get_tech_type("22C37GLT3", FCIDs))
assert get_tech_type("22C37GLT3", FCIDs)[0][0] == "NovaSeq X"

def test_by_flowcell_weird_zeros():
assert get_tech_type("000000000-BLW8J", FCIDs)[0] == ["MiSeq"]

def test_given_patterns():
# here I have tried to list the pattern provided by illumina tech support
templates = {
"xxxxxBCxx": ["HiSeq 2500", " rapid v2"],
"xxxxxACxx": ["HiSeq 2500", " TruSeq v3"],
"xxxxxANxx": ["HiSeq 2500", " High Output v4"],
"xxxxxBBxx": ["HiSeq 3000", "/4000"],
"xxxxxALxx": ["HiSeq X"],
"xxxxxCCxx": ["HiSeq X"],
"xxxxxDRxx": ["NovaSeq", "SP,S1"],
"xxxxxDMxx": ["NovaSeq", "S2"],
"xxxxxDSxx": ["NovaSeq", "S4"],
"xxxxxAFxx": ["NextSeq 500", "/550] Mid Output"],
"xxxxxAGxx": ["NextSeq 500", "/550 High Output"],
"xxxxxBGxx": ["NextSeq 500", "/550 High Output"],
"XXXXXXLT3": ["NovaSeq X"],
"BNTxxxxx-xxxx":["iSeq 100"],
"BPLxxxxx-xxxx":["iSeq 100"],
"000Hxxxxx": ["MiniSeq"],
"Dxxxx": ["MiSeq"],
"Gxxxx": ["MiSeq"],
"Lxxxx": ["MiSeq"],
"xxxxxAFxx": ["NextSeq 500"],
"xxxxxAGxx":["NextSeq 500"],
"xxxxxBGxx": ["NextSeq 500"],
"xxxxxxxM5": ["NextSeq 1000"],
"xxxxxxxM5": ["NextSeq 1000"],
"xxxxxxxHV": ["NextSeq 1000"],
"xxxxxBCxx": ["HiSeq 2500"],
"xxxxxACxx": ["HiSeq 2500"],
"xxxxxANxx": ["HiSeq 2500"],
"xxxxxBBxx": ["HiSeq 3000"],
"xxxxxCCxx": ["HiSeq X"],
"xxxxxALxx": ["HiSeq X"],
"xxxxxDRxx": ["NovaSeq 6000"],
"xxxxxDMxx": ["NovaSeq 6000"],
"xxxxxDSxx": ["NovaSeq 6000"],
"xxxxxxLTx": ["NovaSeq X"]
}
for k,v in templates.items():
res = get_tech_type(k.upper(), FCIDs)
print(res)
# check machine
assert res[0][0] == v[0]
# check chemistry
# TODO

0 comments on commit 4d11c47

Please sign in to comment.