From f15592ab8357e25792b0a24166dd3062a286e625 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Iv=C3=A1n=20Pulido?= <2949729+ijpulidos@users.noreply.github.com> Date: Mon, 28 Oct 2024 11:25:12 -0400 Subject: [PATCH] Convergence tests for protein mutation protocol (#95) * Neutral protein mutation convergence test * Charge changing protein mutation convergence test * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Using GUFE's JSON decoder * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Using forward-reverse utility function in all tests * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --------- Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> --- feflow/tests/data/capped_AAs/ARG_capped.pdb | 53 +++ feflow/tests/data/capped_AAs/LYS_capped.pdb | 51 +++ .../data/capped_AAs/ala_to_arg_mapping.json | 1 + .../data/capped_AAs/ala_to_lys_mapping.json | 1 + feflow/tests/test_protein_mutation.py | 306 +++++++++++++++++- 5 files changed, 401 insertions(+), 11 deletions(-) create mode 100644 feflow/tests/data/capped_AAs/ARG_capped.pdb create mode 100644 feflow/tests/data/capped_AAs/LYS_capped.pdb create mode 100644 feflow/tests/data/capped_AAs/ala_to_arg_mapping.json create mode 100644 feflow/tests/data/capped_AAs/ala_to_lys_mapping.json diff --git a/feflow/tests/data/capped_AAs/ARG_capped.pdb b/feflow/tests/data/capped_AAs/ARG_capped.pdb new file mode 100644 index 0000000..d41e7d3 --- /dev/null +++ b/feflow/tests/data/capped_AAs/ARG_capped.pdb @@ -0,0 +1,53 @@ +REMARK 1 CREATED WITH OPENMM 8.1.1, 2024-10-22 +HETATM 1 H1 ACE A 1 1.328 2.173 0.000 1.00 0.00 H +HETATM 2 CH3 ACE A 1 2.006 1.318 0.000 1.00 0.00 C +HETATM 3 H2 ACE A 1 2.634 1.352 0.890 1.00 0.00 H +HETATM 4 H3 ACE A 1 2.634 1.352 -0.890 1.00 0.00 H +HETATM 5 C ACE A 1 1.229 0.000 0.000 1.00 0.00 C +HETATM 6 O ACE A 1 0.000 0.000 0.000 1.00 0.00 O +ATOM 7 N ARG A 2 1.954 -1.121 0.000 1.00 0.00 N +ATOM 8 H ARG A 2 3.113 -0.999 -0.240 1.00 0.00 H +ATOM 9 CA ARG A 2 1.337 -2.432 -0.001 1.00 0.00 C +ATOM 10 HA ARG A 2 0.663 -2.344 -0.981 1.00 0.00 H +ATOM 11 CB ARG A 2 2.391 -3.535 -0.001 1.00 0.00 C +ATOM 12 HB2 ARG A 2 3.180 -3.267 -0.861 1.00 0.00 H +ATOM 13 HB3 ARG A 2 3.041 -3.594 1.003 1.00 0.00 H +ATOM 14 C ARG A 2 0.461 -2.626 1.229 1.00 0.00 C +ATOM 15 O ARG A 2 -0.621 -3.201 1.139 1.00 0.00 O +ATOM 16 CG ARG A 2 2.084 -5.004 -0.344 1.00 0.00 C +ATOM 17 HG2 ARG A 2 1.598 -5.507 0.625 1.00 0.00 H +ATOM 18 HG3 ARG A 2 3.136 -5.550 -0.506 1.00 0.00 H +ATOM 19 CD ARG A 2 1.099 -5.210 -1.499 1.00 0.00 C +ATOM 20 HD2 ARG A 2 1.752 -5.185 -2.502 1.00 0.00 H +ATOM 21 HD3 ARG A 2 0.191 -4.493 -1.801 1.00 0.00 H +ATOM 22 NE ARG A 2 0.404 -6.514 -1.519 1.00 0.00 N +ATOM 23 HE ARG A 2 -0.766 -6.478 -1.303 1.00 0.00 H +ATOM 24 CZ ARG A 2 0.893 -7.771 -1.573 1.00 0.00 C +ATOM 25 NH1 ARG A 2 2.182 -8.043 -1.320 1.00 0.00 N +ATOM 26 HH11 ARG A 2 2.609 -8.561 -0.336 1.00 0.00 H +ATOM 27 HH12 ARG A 2 2.902 -8.196 -2.257 1.00 0.00 H +ATOM 28 NH2 ARG A 2 0.038 -8.772 -1.831 1.00 0.00 N +ATOM 29 HH21 ARG A 2 -0.588 -8.881 -2.837 1.00 0.00 H +ATOM 30 HH22 ARG A 2 -0.095 -9.737 -1.148 1.00 0.00 H +HETATM 31 N NME A 3 0.933 -2.143 2.381 1.00 0.00 N +HETATM 32 H NME A 3 1.829 -1.678 2.404 1.00 0.00 H +HETATM 33 C NME A 3 0.194 -2.264 3.621 1.00 0.00 C +HETATM 34 H1 NME A 3 -0.745 -2.786 3.436 1.00 0.00 H +HETATM 35 H2 NME A 3 0.786 -2.827 4.344 1.00 0.00 H +HETATM 36 H3 NME A 3 -0.014 -1.271 4.019 1.00 0.00 H +TER 37 NME A 3 +CONECT 1 2 +CONECT 2 5 1 3 4 +CONECT 3 2 +CONECT 4 2 +CONECT 5 2 6 7 +CONECT 6 5 +CONECT 7 5 +CONECT 14 31 +CONECT 31 14 33 32 +CONECT 32 31 +CONECT 33 34 35 36 31 +CONECT 34 33 +CONECT 35 33 +CONECT 36 33 +END diff --git a/feflow/tests/data/capped_AAs/LYS_capped.pdb b/feflow/tests/data/capped_AAs/LYS_capped.pdb new file mode 100644 index 0000000..1dab32d --- /dev/null +++ b/feflow/tests/data/capped_AAs/LYS_capped.pdb @@ -0,0 +1,51 @@ +REMARK 1 CREATED WITH OPENMM 8.1.1, 2024-10-22 +HETATM 1 H1 ACE A 1 1.328 2.173 0.000 1.00 0.00 H +HETATM 2 CH3 ACE A 1 2.006 1.318 0.000 1.00 0.00 C +HETATM 3 H2 ACE A 1 2.634 1.352 0.890 1.00 0.00 H +HETATM 4 H3 ACE A 1 2.634 1.352 -0.890 1.00 0.00 H +HETATM 5 C ACE A 1 1.229 0.000 0.000 1.00 0.00 C +HETATM 6 O ACE A 1 0.000 0.000 0.000 1.00 0.00 O +ATOM 7 N LYS A 2 1.954 -1.121 0.000 1.00 0.00 N +ATOM 8 H LYS A 2 3.122 -1.025 -0.195 1.00 0.00 H +ATOM 9 CA LYS A 2 1.337 -2.432 -0.001 1.00 0.00 C +ATOM 10 HA LYS A 2 0.565 -2.458 -0.915 1.00 0.00 H +ATOM 11 CB LYS A 2 2.391 -3.535 -0.001 1.00 0.00 C +ATOM 12 HB2 LYS A 2 1.702 -4.515 0.088 1.00 0.00 H +ATOM 13 HB3 LYS A 2 3.047 -3.497 0.998 1.00 0.00 H +ATOM 14 C LYS A 2 0.461 -2.626 1.229 1.00 0.00 C +ATOM 15 O LYS A 2 -0.621 -3.201 1.139 1.00 0.00 O +ATOM 16 CG LYS A 2 3.224 -3.837 -1.235 1.00 0.00 C +ATOM 17 HG2 LYS A 2 2.315 -3.880 -2.010 1.00 0.00 H +ATOM 18 HG3 LYS A 2 4.035 -3.108 -1.721 1.00 0.00 H +ATOM 19 CD LYS A 2 4.148 -4.963 -0.767 1.00 0.00 C +ATOM 20 HD2 LYS A 2 4.973 -4.351 -0.150 1.00 0.00 H +ATOM 21 HD3 LYS A 2 3.850 -5.847 -0.026 1.00 0.00 H +ATOM 22 CE LYS A 2 4.914 -5.474 -1.977 1.00 0.00 C +ATOM 23 HE2 LYS A 2 5.858 -5.939 -1.402 1.00 0.00 H +ATOM 24 HE3 LYS A 2 5.434 -4.831 -2.835 1.00 0.00 H +ATOM 25 NZ LYS A 2 4.338 -6.749 -2.436 1.00 0.00 N +ATOM 26 HZ1 LYS A 2 4.059 -6.551 -3.589 1.00 0.00 H +ATOM 27 HZ2 LYS A 2 5.104 -7.672 -2.508 1.00 0.00 H +ATOM 28 HZ3 LYS A 2 3.279 -7.099 -1.995 1.00 0.00 H +HETATM 29 N NME A 3 0.933 -2.143 2.381 1.00 0.00 N +HETATM 30 H NME A 3 1.829 -1.678 2.404 1.00 0.00 H +HETATM 31 C NME A 3 0.194 -2.264 3.621 1.00 0.00 C +HETATM 32 H1 NME A 3 -0.745 -2.786 3.436 1.00 0.00 H +HETATM 33 H2 NME A 3 0.786 -2.827 4.344 1.00 0.00 H +HETATM 34 H3 NME A 3 -0.014 -1.271 4.019 1.00 0.00 H +TER 35 NME A 3 +CONECT 1 2 +CONECT 2 5 1 3 4 +CONECT 3 2 +CONECT 4 2 +CONECT 5 2 6 7 +CONECT 6 5 +CONECT 7 5 +CONECT 14 29 +CONECT 29 14 31 30 +CONECT 30 29 +CONECT 31 32 33 34 29 +CONECT 32 31 +CONECT 33 31 +CONECT 34 31 +END diff --git a/feflow/tests/data/capped_AAs/ala_to_arg_mapping.json b/feflow/tests/data/capped_AAs/ala_to_arg_mapping.json new file mode 100644 index 0000000..8528731 --- /dev/null +++ b/feflow/tests/data/capped_AAs/ala_to_arg_mapping.json @@ -0,0 +1 @@ +{"componentA": {"atoms": [[1, "1", 1, 0, " ", "H1", "ACE", 1, "N", 0], [6, "1", 2, 0, " ", "CH3", "ACE", 1, "N", 0], [1, "1", 3, 0, " ", "H2", "ACE", 1, "N", 0], [1, "1", 4, 0, " ", "H3", "ACE", 1, "N", 0], [6, "1", 5, 0, " ", "C", "ACE", 1, "N", 0], [8, "1", 6, 0, " ", "O", "ACE", 1, "N", 0], [7, "1", 7, 0, " ", "N", "ALA", 2, "N", 0], [1, "1", 8, 0, " ", "H", "ALA", 2, "N", 0], [6, "1", 9, 0, " ", "CA", "ALA", 2, "N", 0], [1, "1", 10, 0, " ", "HA", "ALA", 2, "N", 0], [6, "1", 11, 0, " ", "CB", "ALA", 2, "N", 0], [1, "1", 12, 0, " ", "HB1", "ALA", 2, "N", 0], [1, "1", 13, 0, " ", "HB2", "ALA", 2, "N", 0], [1, "1", 14, 0, " ", "HB3", "ALA", 2, "N", 0], [6, "1", 15, 0, " ", "C", "ALA", 2, "N", 0], [8, "1", 16, 0, " ", "O", "ALA", 2, "N", 0], [7, "1", 17, 0, " ", "N", "NME", 3, "N", 0], [1, "1", 18, 0, " ", "H", "NME", 3, "N", 0], [6, "1", 19, 0, " ", "C", "NME", 3, "N", 0], [1, "1", 20, 0, " ", "H1", "NME", 3, "N", 0], [1, "1", 21, 0, " ", "H2", "NME", 3, "N", 0], [1, "1", 22, 0, " ", "H3", "NME", 3, "N", 0]], "bonds": [[4, 1, "SINGLE", "N"], [4, 5, "DOUBLE", "N"], [1, 0, "SINGLE", "N"], [1, 2, "SINGLE", "N"], [1, 3, "SINGLE", "N"], [4, 6, "SINGLE", "N"], [14, 8, "SINGLE", "N"], [14, 15, "DOUBLE", "N"], [8, 10, "SINGLE", "N"], [8, 9, "SINGLE", "N"], [8, 6, "SINGLE", "N"], [10, 11, "SINGLE", "N"], [10, 12, "SINGLE", "N"], [10, 13, "SINGLE", "N"], [7, 6, "SINGLE", "N"], [14, 16, "SINGLE", "N"], [18, 19, "SINGLE", "N"], [18, 20, "SINGLE", "N"], [18, 21, "SINGLE", "N"], [18, 16, "SINGLE", "N"], [17, 16, "SINGLE", "N"]], "name": "", "conformers": ["\u0093NUMPY\u0001\u0000v\u0000{'descr': ' ALA -> ARG + Negative: LYS -> ALA -> LYS + + The need to do it this way is because since we are introducing counterions the energies for + each will not match, therefore to cancel this contribution we compare between both positive + and negative full cycles. + """ + # Create and execute DAGs + arg_results = self.execute_forward_reverse_dag( + arg_capped, + ala_capped, + arg_capped_system, + ala_capped_system, + arg_to_ala_mapping, + tmpdir, + ) + lys_results = self.execute_forward_reverse_dag( + lys_capped, + ala_capped, + lys_capped_system, + ala_capped_system, + lys_to_ala_mapping, + tmpdir, + ) + + # they should add up to close to zero + arg_forward_reverse_sum = arg_results["forward"][0] + arg_results["reverse"][0] + arg_forward_reverse_sum_error = ( + arg_results["forward"][1] ** 2 + arg_results["reverse"][1] ** 2 + ) + lys_forward_reverse_sum = lys_results["forward"][0] + lys_results["reverse"][0] + lys_forward_reverse_sum_error = ( + lys_results["forward"][1] ** 2 + lys_results["reverse"][1] ** 2 + ) + + # FE estimates are the first element, errors are the second element in the tuple + arg_lys_diff = abs(arg_forward_reverse_sum - lys_forward_reverse_sum) + arg_lys_diff_error = np.sqrt( + arg_forward_reverse_sum_error + lys_forward_reverse_sum_error + ) + + print( + f"DDG: {arg_lys_diff}, 6*dDDG: {6 * arg_lys_diff_error}" + ) # debug control print + assert arg_lys_diff < 6 * arg_lys_diff_error, ( + f"DDG ({arg_lys_diff}) is greater than " + f"6 * dDDG ({6 * arg_lys_diff_error})" + )