extra doc

recursionpharma · Aug 22, 2023 · 597943d · 597943d
1 parent 7389e8b
commit 597943d
Show file tree

Hide file tree

Showing 2 changed files with 22 additions and 10 deletions.
diff --git a/src/gflownet/algo/config.py b/src/gflownet/algo/config.py
@@ -4,6 +4,8 @@
 
 
 class TBVariant(Enum):
+    """See algo.trajectory_balance.TrajectoryBalance for details."""
+
     TB = 0
     SubTB1 = 1
     DB = 2
@@ -21,8 +23,8 @@ class TBConfig:
         The epsilon parameter in log-flow smoothing (see paper)
     reward_loss_multiplier : float
         The multiplier for the reward loss when bootstrapping the reward. (deprecated)
-    do_subtb : bool
-        Whether to use the full N^2 subTB loss
+    variant : TBVariant
+        The loss variant. See algo.trajectory_balance.TrajectoryBalance for details.
     do_correct_idempotent : bool
         Whether to correct for idempotent actions
     do_parameterize_p_b : bool

diff --git a/src/gflownet/algo/trajectory_balance.py b/src/gflownet/algo/trajectory_balance.py
@@ -66,10 +66,23 @@ def logZ(self, cond_info: Tensor) -> Tensor:
 
 
 class TrajectoryBalance(GFNAlgorithm):
-    """TB implementation, see
-    "Trajectory Balance: Improved Credit Assignment in GFlowNets Nikolay Malkin, Moksh Jain,
-    Emmanuel Bengio, Chen Sun, Yoshua Bengio"
-    https://arxiv.org/abs/2201.13259"""
+    """Trajectory-based GFN loss implementations. Implements
+    - TB: Trajectory Balance: Improved Credit Assignment in GFlowNets Nikolay Malkin, Moksh Jain,
+    Emmanuel Bengio, Chen Sun, Yoshua Bengio
+    https://arxiv.org/abs/2201.13259
+
+    - SubTB(1): Learning GFlowNets from partial episodes for improved convergence and stability, Kanika Madan, Jarrid
+    Rector-Brooks, Maksym Korablyov, Emmanuel Bengio, Moksh Jain, Andrei Cristian Nica, Tom Bosc, Yoshua Bengio,
+    Nikolay Malkin
+    https://arxiv.org/abs/2209.12782
+    Note: We implement the lambda=1 version of SubTB here (this choice is based on empirical results from the paper)
+
+    - DB: GFlowNet Foundations, Yoshua Bengio, Salem Lahlou, Tristan Deleu, Edward J. Hu, Mo Tiwari, Emmanuel Bengio
+    https://arxiv.org/abs/2111.09266
+    Note: This is the trajectory version of Detailed Balance (i.e. transitions are not iid, but trajectories are).
+    Empirical results in subsequent papers suggest that DB may be improved by training on iid transitions (sampled from
+    a replay buffer) instead of trajectories.
+    """
 
     def __init__(
         self,
@@ -78,10 +91,7 @@ def __init__(
         rng: np.random.RandomState,
         cfg: Config,
     ):
-        """TB implementation, see
-        "Trajectory Balance: Improved Credit Assignment in GFlowNets Nikolay Malkin, Moksh Jain,
-        Emmanuel Bengio, Chen Sun, Yoshua Bengio"
-        https://arxiv.org/abs/2201.13259
+        """Instanciate a TB algorithm.
 
         Parameters
         ----------