From 91806a83cc8cd4a89697808181983eb4b1c0f5da Mon Sep 17 00:00:00 2001
From: Wei-Tse Hsu <wehs7661@colorado.edu>
Date: Tue, 9 Apr 2024 04:47:53 +0800
Subject: [PATCH] Added an example run_REXEE_log.txt and a unit test for
 get_swaps

---
 ensemble_md/analysis/analyze_traj.py     |   6 +-
 ensemble_md/tests/data/run_REXEE_log.txt | 363 +++++++++++++++++++++++
 ensemble_md/tests/test_analyze_traj.py   |  22 +-
 3 files changed, 384 insertions(+), 7 deletions(-)
 create mode 100644 ensemble_md/tests/data/run_REXEE_log.txt

diff --git a/ensemble_md/analysis/analyze_traj.py b/ensemble_md/analysis/analyze_traj.py
index 7db40204..996fdda1 100644
--- a/ensemble_md/analysis/analyze_traj.py
+++ b/ensemble_md/analysis/analyze_traj.py
@@ -105,7 +105,7 @@ def stitch_time_series(files, rep_trajs, shifts=None, dhdl=True, col_idx=-1, sav
                 traj = list(np.array(traj) + shifts[shift_idx])
             else:
                 traj = np.loadtxt(files_sorted[i][j], comments=['#', '@'])[:, col_idx]
-            
+
             if j != n_iter - 1:
                 traj = traj[:-1]
 
@@ -180,7 +180,7 @@ def stitch_time_series_for_sim(files, shifts, dhdl=True, col_idx=-1, save=True):
                 traj = traj[:-1]  # remove the last frame, which is the same as the first of the next time series.
 
             trajs[i].extend(traj)
-        
+
         # All segments for the same replica should have the same shift
         trajs[i] = list(np.array(trajs[i]) + shifts[i])
 
@@ -835,7 +835,7 @@ def plot_g_vecs(g_vecs, refs=None, refs_err=None, plot_rmse=True):
 
 def get_swaps(REXEE_log='run_REXEE_log.txt'):
     """
-    For each replica, identifies the states where exchanges were proposed and accepted.
+    For each replica, identifies the states involved in proposed and accepted exchanges.
     (Todo: We should be able to only use :code:`rep_trajs.npy` and :code:`state_trajs.npy`
     instead of parsing the REXEE log file to reach the same goal.)
 
diff --git a/ensemble_md/tests/data/run_REXEE_log.txt b/ensemble_md/tests/data/run_REXEE_log.txt
new file mode 100644
index 00000000..471996a0
--- /dev/null
+++ b/ensemble_md/tests/data/run_REXEE_log.txt
@@ -0,0 +1,363 @@
+Current time: 19/06/2023 04:08:37
+Command line: /jet/home/wehs7661/.local/python3.9/bin/run_EEXE
+
+Important parameters of EXEE
+============================
+Python version: 3.9.12 (main, Feb 28 2023, 14:59:18) 
+[GCC 10.2.0]
+GROMACS executable: /jet/home/wehs7661/pkgs/gromacs/2022.5/bin/gmx
+GROMACS version: 2022.5-dev-20230428-fdf57150ad
+ensemble_md version: 0.6.0+40.gba0d9e6.dirty
+Simulation inputs: anthracene.gro, anthracene.top, expanded.mdp
+Verbose log file: True
+Proposal scheme: exhaustive
+Acceptance scheme for swapping simulations: metropolis
+Whether to perform weight combination: False
+Histogram cutoff: -1
+Number of replicas: 4
+Number of iterations: 12500
+Number of attempted swaps in one exchange interval: N^3
+Length of each replica: 4.0 ps
+Frequency for checkpointing: 100 iterations
+Total number of states: 8
+Additional grompp arguments: None
+Additional runtime arguments: {'-nt': '16', '-ntmpi': '1'}
+Alchemical ranges of each replica in EEXE:
+  - Replica 0: States [0, 1, 2, 3, 4]
+  - Replica 1: States [1, 2, 3, 4, 5]
+  - Replica 2: States [2, 3, 4, 5, 6]
+  - Replica 3: States [3, 4, 5, 6, 7]
+
+Iteration 0:  0.0 -  4.0 ps
+===========================
+Generating a TPR file on rank 0 ...
+Running an EXE simulation on rank 0 ...
+
+Below are the final states being visited:
+  Simulation 0: Global state 1, (coul, vdw) =                 (0.18,)
+  Simulation 1: Global state 2, (coul, vdw) =                 (0.42,)
+  Simulation 2: Global state 3, (coul, vdw) =                 (0.57,)
+  Simulation 3: Global state 4, (coul, vdw) =                 (0.68,)
+
+Parsing sim_0/iteration_0/md.log ...
+Parsing sim_1/iteration_0/md.log ...
+Parsing sim_2/iteration_0/md.log ...
+Parsing sim_3/iteration_0/md.log ...
+
+Swappable pairs: [(0, 1), (1, 2), (2, 3)]
+
+Proposed swap: (2, 3)
+  Proposing a move from (x^i_m, x^j_n) to (x^i_n, x^j_m) ...
+  U^i_n - U^i_m = 1.91 kT, U^j_m - U^j_n = 6.93 kT, Total dU: 8.84 kT
+  g^i_n - g^i_m = 0.43 kT, g^j_m - g^j_n = -0.43 kT, Total dg: 0.00 kT
+  Acceptance rate: 0.000 / Random number drawn: 0.212
+  Swap rejected! 
+  Current list of configurations: [0, 1, 2, 3]
+
+Remaining swappable pairs: [(0, 1)]
+
+Proposed swap: (0, 1)
+  Proposing a move from (x^i_m, x^j_n) to (x^i_n, x^j_m) ...
+  U^i_n - U^i_m = 5.12 kT, U^j_m - U^j_n = 1.53 kT, Total dU: 6.65 kT
+  g^i_n - g^i_m = 4.94 kT, g^j_m - g^j_n = -4.94 kT, Total dg: 0.00 kT
+  Acceptance rate: 0.001 / Random number drawn: 0.958
+  Swap rejected! 
+  Current list of configurations: [0, 1, 2, 3]
+
+The finally adopted swap pattern: [0, 1, 2, 3]
+The list of configurations sampled in each replica in the next iteration: [0, 1, 2, 3]
+
+Note: No histogram correction will be performed.
+Note: No weight combination will be performed.
+
+Iteration 1:  4.0 -  8.0 ps
+===========================
+Generating a TPR file on rank 0 ...
+Running an EXE simulation on rank 0 ...
+
+Below are the final states being visited:
+  Simulation 0: Global state 1, (coul, vdw) =                 (0.18,)
+  Simulation 1: Global state 1, (coul, vdw) =                 (0.18,)
+  Simulation 2: Global state 3, (coul, vdw) =                 (0.57,)
+  Simulation 3: Global state 6, (coul, vdw) =                 (0.86,)
+
+Parsing sim_0/iteration_1/md.log ...
+Parsing sim_1/iteration_1/md.log ...
+Parsing sim_2/iteration_1/md.log ...
+Parsing sim_3/iteration_1/md.log ...
+
+Swappable pairs: [(0, 1), (2, 3)]
+
+Proposed swap: (0, 1)
+  Proposing a move from (x^i_m, x^j_n) to (x^i_n, x^j_m) ...
+  U^i_n - U^i_m = 0.00 kT, U^j_m - U^j_n = 0.00 kT, Total dU: 0.00 kT
+  g^i_n - g^i_m = 0.00 kT, g^j_m - g^j_n = 0.00 kT, Total dg: 0.00 kT
+  Acceptance rate: 1.000 / Random number drawn: 0.047
+  Swap accepted! 
+  Current list of configurations: [1, 0, 2, 3]
+
+Remaining swappable pairs: [(2, 3)]
+
+Proposed swap: (2, 3)
+  Proposing a move from (x^i_m, x^j_n) to (x^i_n, x^j_m) ...
+  U^i_n - U^i_m = 8.64 kT, U^j_m - U^j_n = 41.64 kT, Total dU: 50.28 kT
+  g^i_n - g^i_m = -5.23 kT, g^j_m - g^j_n = 5.23 kT, Total dg: 0.00 kT
+  Acceptance rate: 0.000 / Random number drawn: 0.347
+  Swap rejected! 
+  Current list of configurations: [1, 0, 2, 3]
+
+The finally adopted swap pattern: [1, 0, 2, 3]
+The list of configurations sampled in each replica in the next iteration: [1, 0, 2, 3]
+
+Note: No histogram correction will be performed.
+Note: No weight combination will be performed.
+
+Iteration 2:  8.0 -  12.0 ps
+============================
+Generating a TPR file on rank 0 ...
+Running an EXE simulation on rank 0 ...
+
+Below are the final states being visited:
+  Simulation 0: Global state 1, (coul, vdw) =                 (0.18,)
+  Simulation 1: Global state 5, (coul, vdw) =                 (0.76,)
+  Simulation 2: Global state 4, (coul, vdw) =                 (0.68,)
+  Simulation 3: Global state 7, (coul, vdw) =                 (1.0,)
+
+Parsing sim_0/iteration_2/md.log ...
+Parsing sim_1/iteration_2/md.log ...
+Parsing sim_2/iteration_2/md.log ...
+Parsing sim_3/iteration_2/md.log ...
+
+n_ex is set back to 1 since there is only 1 swappable pair.
+Swappable pairs: [(1, 2)]
+
+Proposed swap: (1, 2)
+  Proposing a move from (x^i_m, x^j_n) to (x^i_n, x^j_m) ...
+  U^i_n - U^i_m = 9.19 kT, U^j_m - U^j_n = -2.31 kT, Total dU: 6.88 kT
+  g^i_n - g^i_m = 1.45 kT, g^j_m - g^j_n = -1.45 kT, Total dg: 0.00 kT
+  Acceptance rate: 0.001 / Random number drawn: 0.303
+  Swap rejected! 
+  Current list of configurations: [1, 0, 2, 3]
+
+The finally adopted swap pattern: [0, 1, 2, 3]
+The list of configurations sampled in each replica in the next iteration: [1, 0, 2, 3]
+
+Note: No histogram correction will be performed.
+Note: No weight combination will be performed.
+
+Iteration 3:  12.0 -  16.0 ps
+=============================
+Generating a TPR file on rank 0 ...
+Running an EXE simulation on rank 0 ...
+
+Below are the final states being visited:
+  Simulation 0: Global state 0, (coul, vdw) =                 (0.0,)
+  Simulation 1: Global state 5, (coul, vdw) =                 (0.76,)
+  Simulation 2: Global state 6, (coul, vdw) =                 (0.86,)
+  Simulation 3: Global state 7, (coul, vdw) =                 (1.0,)
+
+Parsing sim_0/iteration_3/md.log ...
+Parsing sim_1/iteration_3/md.log ...
+Parsing sim_2/iteration_3/md.log ...
+Parsing sim_3/iteration_3/md.log ...
+
+Swappable pairs: []
+
+The finally adopted swap pattern: [0, 1, 2, 3]
+The list of configurations sampled in each replica in the next iteration: [1, 0, 2, 3]
+
+Note: No histogram correction will be performed.
+Note: No weight combination will be performed.
+
+Iteration 4:  16.0 -  20.0 ps
+=============================
+Generating a TPR file on rank 0 ...
+Running an EXE simulation on rank 0 ...
+
+Below are the final states being visited:
+  Simulation 0: Global state 1, (coul, vdw) =                 (0.18,)
+  Simulation 1: Global state 1, (coul, vdw) =                 (0.18,)
+  Simulation 2: Global state 3, (coul, vdw) =                 (0.57,)
+  Simulation 3: Global state 6, (coul, vdw) =                 (0.86,)
+
+Parsing sim_0/iteration_4/md.log ...
+Parsing sim_1/iteration_4/md.log ...
+Parsing sim_2/iteration_4/md.log ...
+Parsing sim_3/iteration_4/md.log ...
+
+Swappable pairs: [(0, 1), (2, 3)]
+
+Proposed swap: (0, 1)
+  Proposing a move from (x^i_m, x^j_n) to (x^i_n, x^j_m) ...
+  U^i_n - U^i_m = 0.00 kT, U^j_m - U^j_n = 0.00 kT, Total dU: 0.00 kT
+  g^i_n - g^i_m = 0.00 kT, g^j_m - g^j_n = 0.00 kT, Total dg: 0.00 kT
+  Acceptance rate: 1.000 / Random number drawn: 0.601
+  Swap accepted! 
+  Current list of configurations: [0, 1, 2, 3]
+
+Remaining swappable pairs: [(2, 3)]
+
+Proposed swap: (2, 3)
+  Proposing a move from (x^i_m, x^j_n) to (x^i_n, x^j_m) ...
+  U^i_n - U^i_m = 7.79 kT, U^j_m - U^j_n = 52.74 kT, Total dU: 60.53 kT
+  g^i_n - g^i_m = -5.23 kT, g^j_m - g^j_n = 5.23 kT, Total dg: 0.00 kT
+  Acceptance rate: 0.000 / Random number drawn: 0.530
+  Swap rejected! 
+  Current list of configurations: [0, 1, 2, 3]
+
+The finally adopted swap pattern: [1, 0, 2, 3]
+The list of configurations sampled in each replica in the next iteration: [0, 1, 2, 3]
+
+Note: No histogram correction will be performed.
+Note: No weight combination will be performed.
+
+Iteration 5:  20.0 -  24.0 ps
+=============================
+Generating a TPR file on rank 0 ...
+Running an EXE simulation on rank 0 ...
+
+Below are the final states being visited:
+  Simulation 0: Global state 2, (coul, vdw) =                 (0.42,)
+  Simulation 1: Global state 4, (coul, vdw) =                 (0.68,)
+  Simulation 2: Global state 2, (coul, vdw) =                 (0.42,)
+  Simulation 3: Global state 6, (coul, vdw) =                 (0.86,)
+
+Parsing sim_0/iteration_5/md.log ...
+Parsing sim_1/iteration_5/md.log ...
+Parsing sim_2/iteration_5/md.log ...
+Parsing sim_3/iteration_5/md.log ...
+
+Swappable pairs: [(0, 1), (0, 2), (1, 2)]
+
+Proposed swap: (0, 2)
+  Proposing a move from (x^i_m, x^j_n) to (x^i_n, x^j_m) ...
+  U^i_n - U^i_m = 0.00 kT, U^j_m - U^j_n = 0.00 kT, Total dU: 0.00 kT
+  g^i_n - g^i_m = 0.00 kT, g^j_m - g^j_n = 0.00 kT, Total dg: 0.00 kT
+  Acceptance rate: 1.000 / Random number drawn: 0.883
+  Swap accepted! 
+  Current list of configurations: [2, 1, 0, 3]
+
+Remaining swappable pairs: []
+
+The finally adopted swap pattern: [2, 1, 0, 3]
+The list of configurations sampled in each replica in the next iteration: [2, 1, 0, 3]
+
+Note: No histogram correction will be performed.
+Note: No weight combination will be performed.
+
+Iteration 6:  24.0 -  28.0 ps
+=============================
+Generating a TPR file on rank 0 ...
+Running an EXE simulation on rank 0 ...
+
+Below are the final states being visited:
+  Simulation 0: Global state 0, (coul, vdw) =                 (0.0,)
+  Simulation 1: Global state 4, (coul, vdw) =                 (0.68,)
+  Simulation 2: Global state 2, (coul, vdw) =                 (0.42,)
+  Simulation 3: Global state 7, (coul, vdw) =                 (1.0,)
+
+Parsing sim_0/iteration_6/md.log ...
+Parsing sim_1/iteration_6/md.log ...
+Parsing sim_2/iteration_6/md.log ...
+Parsing sim_3/iteration_6/md.log ...
+
+n_ex is set back to 1 since there is only 1 swappable pair.
+Swappable pairs: [(1, 2)]
+
+Proposed swap: (1, 2)
+  Proposing a move from (x^i_m, x^j_n) to (x^i_n, x^j_m) ...
+  U^i_n - U^i_m = 11.12 kT, U^j_m - U^j_n = 6.74 kT, Total dU: 17.86 kT
+  g^i_n - g^i_m = -2.63 kT, g^j_m - g^j_n = 2.63 kT, Total dg: 0.00 kT
+  Acceptance rate: 0.000 / Random number drawn: 0.041
+  Swap rejected! 
+  Current list of configurations: [2, 1, 0, 3]
+
+The finally adopted swap pattern: [0, 1, 2, 3]
+The list of configurations sampled in each replica in the next iteration: [2, 1, 0, 3]
+
+Note: No histogram correction will be performed.
+Note: No weight combination will be performed.
+
+Iteration 7:  28.0 -  32.0 ps
+=============================
+Generating a TPR file on rank 0 ...
+Running an EXE simulation on rank 0 ...
+
+Below are the final states being visited:
+  Simulation 0: Global state 0, (coul, vdw) =                 (0.0,)
+  Simulation 1: Global state 1, (coul, vdw) =                 (0.18,)
+  Simulation 2: Global state 4, (coul, vdw) =                 (0.68,)
+  Simulation 3: Global state 6, (coul, vdw) =                 (0.86,)
+
+Parsing sim_0/iteration_7/md.log ...
+Parsing sim_1/iteration_7/md.log ...
+Parsing sim_2/iteration_7/md.log ...
+Parsing sim_3/iteration_7/md.log ...
+
+n_ex is set back to 1 since there is only 1 swappable pair.
+Swappable pairs: [(2, 3)]
+
+Proposed swap: (2, 3)
+  Proposing a move from (x^i_m, x^j_n) to (x^i_n, x^j_m) ...
+  U^i_n - U^i_m = 0.02 kT, U^j_m - U^j_n = 17.39 kT, Total dU: 17.41 kT
+  g^i_n - g^i_m = -5.66 kT, g^j_m - g^j_n = 5.66 kT, Total dg: 0.00 kT
+  Acceptance rate: 0.000 / Random number drawn: 0.494
+  Swap rejected! 
+  Current list of configurations: [2, 1, 0, 3]
+
+The finally adopted swap pattern: [0, 1, 2, 3]
+The list of configurations sampled in each replica in the next iteration: [2, 1, 0, 3]
+
+Note: No histogram correction will be performed.
+Note: No weight combination will be performed.
+
+Iteration 8:  32.0 -  36.0 ps
+=============================
+Generating a TPR file on rank 0 ...
+Running an EXE simulation on rank 0 ...
+
+Below are the final states being visited:
+  Simulation 0: Global state 1, (coul, vdw) =                 (0.18,)
+  Simulation 1: Global state 2, (coul, vdw) =                 (0.42,)
+  Simulation 2: Global state 2, (coul, vdw) =                 (0.42,)
+  Simulation 3: Global state 7, (coul, vdw) =                 (1.0,)
+
+Parsing sim_0/iteration_8/md.log ...
+Parsing sim_1/iteration_8/md.log ...
+Parsing sim_2/iteration_8/md.log ...
+Parsing sim_3/iteration_8/md.log ...
+
+Swappable pairs: [(0, 1), (1, 2)]
+
+Proposed swap: (1, 2)
+  Proposing a move from (x^i_m, x^j_n) to (x^i_n, x^j_m) ...
+  U^i_n - U^i_m = 0.00 kT, U^j_m - U^j_n = 0.00 kT, Total dU: 0.00 kT
+  g^i_n - g^i_m = 0.00 kT, g^j_m - g^j_n = 0.00 kT, Total dg: 0.00 kT
+  Acceptance rate: 1.000 / Random number drawn: 0.254
+  Swap accepted! 
+  Current list of configurations: [2, 0, 1, 3]
+
+Remaining swappable pairs: []
+
+The finally adopted swap pattern: [0, 2, 1, 3]
+The list of configurations sampled in each replica in the next iteration: [2, 0, 1, 3]
+
+Note: No histogram correction will be performed.
+Note: No weight combination will be performed.
+
+Iteration 9:  36.0 -  40.0 ps
+=============================
+Generating a TPR file on rank 0 ...
+Running an EXE simulation on rank 0 ...
+
+----- Saving .npy files to checkpoint the simulation ---
+
+Summary of the simulation ensemble
+==================================
+Simulation status:
+- Rep 0: The weights were fixed throughout the simulation.
+- Rep 1: The weights were fixed throughout the simulation.
+- Rep 2: The weights were fixed throughout the simulation.
+- Rep 3: The weights were fixed throughout the simulation.
+
+Time elapsed: xx hour(s) xx minute(s) xx second(s)
\ No newline at end of file
diff --git a/ensemble_md/tests/test_analyze_traj.py b/ensemble_md/tests/test_analyze_traj.py
index d29d040e..73b0dfb2 100644
--- a/ensemble_md/tests/test_analyze_traj.py
+++ b/ensemble_md/tests/test_analyze_traj.py
@@ -12,7 +12,6 @@
 """
 import os
 import pytest
-import shutil
 import numpy as np
 from unittest.mock import patch, MagicMock
 from ensemble_md.analysis import analyze_traj
@@ -128,7 +127,7 @@ def test_stitch_time_series_for_sim():
     trajs = analyze_traj.stitch_time_series_for_sim(files, shifts)
 
     trajs[0] == [
-        0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 1, 1, 1, 1, 
+        0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 1, 1, 1, 1,
         1, 1, 1, 2, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 2, 2, 1, 0, 1, 1,
         1, 1, 1, 0, 1, 1, 1, 0, 1, 2, 0, 2, 1, 1, 0, 0, 1, 0, 1, 0, 1
     ]
@@ -160,7 +159,7 @@ def test_stitch_time_series_for_sim():
     os.rename(f'{folder}/sim_2/iteration_1/dhdl.xvg', f'{folder}/sim_2/iteration_1/dhdl_temp.xvg')
     os.rename(f'{folder}/sim_2/iteration_1/dhdl_short.xvg', f'{folder}/sim_2/iteration_1/dhdl.xvg')
 
-    match_str = 'The first frame of iteration 2 in replica 2 is not continuous with the last frame of the previous iteration. '
+    match_str = 'The first frame of iteration 2 in replica 2 is not continuous with the last frame of the previous iteration. '  # noqa: E501
     match_str += f'Please check files {folder}/sim_2/iteration_1/dhdl.xvg and {folder}/sim_2/iteration_2/dhdl.xvg'
     with pytest.raises(ValueError, match=match_str):
         trajs = analyze_traj.stitch_time_series_for_sim(files, shifts)
@@ -788,7 +787,22 @@ def test_plot_g_vecs(mock_plt):
 
 
 def test_get_swaps():
-    pass
+    input_file = os.path.join('ensemble_md/tests/data', 'run_REXEE_log.txt')
+    proposed_swaps, accepted_swaps = analyze_traj.get_swaps(input_file)
+
+    assert proposed_swaps == [
+        {0: 0, 1: 3, 2: 1, 3: 0, 4: 0},
+        {1: 2, 2: 2, 3: 0, 4: 1, 5: 1},
+        {2: 3, 3: 3, 4: 2, 5: 0, 6: 0},
+        {3: 0, 4: 1, 5: 0, 6: 3, 7: 0},
+    ]
+
+    assert accepted_swaps == [
+        {0: 0, 1: 2, 2: 1, 3: 0, 4: 0},
+        {1: 2, 2: 1, 3: 0, 4: 0, 5: 0},
+        {2: 2, 3: 0, 4: 0, 5: 0, 6: 0},
+        {3: 0, 4: 0, 5: 0, 6: 0, 7: 0},
+    ]
 
 
 def test_plot_swaps():