RandomGreedy: sample costmod and temp

jcmgray · May 7, 2024 · 4b38626 · 4b38626
1 parent 183aff0
commit 4b38626
Show file tree

Hide file tree

Showing 9 changed files with 146 additions and 103 deletions.
diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
@@ -44,4 +44,6 @@ jobs:
  run: pytest --cov=cotengra tests/ --cov-report=xml tests
 
  - name: Report to codecov
- uses: codecov/codecov-action@v3
+ uses: codecov/codecov-action@v4
+ with:
+ token: ${{ secrets.CODECOV_TOKEN }}
diff --git a/cotengra/core.py b/cotengra/core.py
@@ -2525,6 +2525,7 @@ def compressed_reconfigure(
  self.output,
  self.size_dict,
  ssa_path=ssa_path,
+ objective=minimize,
  )
  if inplace:
  self.set_state_from(rtree)
@@ -3550,31 +3551,31 @@ def describe(self, info="normal", join=" "):
  if info == "normal":
  return join.join(
  (
- f"log10[FLOPs]={self.total_flops(log=10):.4g}",
- f"log2[SIZE]={self.max_size(log=2):.4g}",
+ f"log10[FLOPs]={self.total_flops(log=10):.2f}",
+ f"log2[SIZE]={self.max_size(log=2):.2f}",
  )
  )
 
  elif info == "full":
  s = [
- f"log10[FLOPS]={self.total_flops(log=10):.4g}",
- f"log10[COMBO]={self.combo_cost(log=10):.4g}",
- f"log2[SIZE]={self.max_size(log=2):.4g}",
- f"log2[PEAK]={self.peak_size(log=2):.4g}",
+ f"log10[FLOPS]={self.total_flops(log=10):.2f}",
+ f"log10[COMBO]={self.combo_cost(log=10):.2f}",
+ f"log2[SIZE]={self.max_size(log=2):.2f}",
+ f"log2[PEAK]={self.peak_size(log=2):.2f}",
  ]
  if self.sliced_inds:
- s.append(f"NSLICES={self.multiplicity:.4g}")
+ s.append(f"NSLICES={self.multiplicity:.2f}")
  return join.join(s)
 
  elif info == "concise":
  s = [
- f"F={self.total_flops(log=10):.4g}",
- f"C={self.combo_cost(log=10):.4g}",
- f"S={self.max_size(log=2):.4g}",
- f"P={self.peak_size(log=2):.4g}",
+ f"F={self.total_flops(log=10):.2f}",
+ f"C={self.combo_cost(log=10):.2f}",
+ f"S={self.max_size(log=2):.2f}",
+ f"P={self.peak_size(log=2):.2f}",
  ]
  if self.sliced_inds:
- s.append(f"$={self.multiplicity:.4g}")
+ s.append(f"$={self.multiplicity:.2f}")
  return join.join(s)
 
  def __repr__(self):

diff --git a/cotengra/hyperoptimizers/hyper.py b/cotengra/hyperoptimizers/hyper.py
@@ -326,8 +326,8 @@ def progress_description(best, info="concise"):
  return best["tree"].describe(info=info)
  except KeyError:
  return (
- f"log10[FLOPs]={log10(best['flops']):.4g} "
- f"log2[SIZE]={log2(best['size']):.4g}"
+ f"log10[FLOPs]={log10(best['flops']):.2f} "
+ f"log2[SIZE]={log2(best['size']):.2f}"
  )
 
 

diff --git a/cotengra/pathfinders/path_basic.py b/cotengra/pathfinders/path_basic.py
@@ -527,26 +527,23 @@ def optimize_greedy(
  seed=None,
  ):
  """ """
-
  if temperature == 0.0:
 
  def local_score(sa, sb, sab):
- return sab - costmod * (sa + sb)
+ return sab / costmod - (sa + sb) * costmod
 
  else:
  gmblgen = GumbelBatchedGenerator(seed)
 
  def local_score(sa, sb, sab):
- score = sab - costmod * (sa + sb)
+ score = sab / costmod - (sa + sb) * costmod
  if score > 0:
  return math.log(score) - temperature * gmblgen()
  elif score < 0:
  return -math.log(-score) - temperature * gmblgen()
  else:
  return -temperature * gmblgen()
 
- # return sab - costmod * (sa + sb) - temperature * gmblgen()
-
  node_sizes = {}
  for i, ilegs in self.nodes.items():
  node_sizes[i] = compute_size(ilegs, self.sizes)
@@ -863,7 +860,7 @@ def optimize_greedy(
  When assessing local greedy scores how much to weight the size of the
  tensors removed compared to the size of the tensor added::
 
- score = size_ab - costmod * (size_a + size_b)
+ score = size_ab / costmod - (size_a + size_b) * costmod
 
  This can be a useful hyper-parameter to tune.
  temperature : float, optional
@@ -912,8 +909,8 @@ def optimize_random_greedy_track_flops(
  output,
  size_dict,
  ntrials=1,
- costmod=1.0,
- temperature=0.01,
+ costmod=(0.1, 4.0),
+ temperature=(0.001, 1.0),
  seed=None,
  simplify=True,
  use_ssa=False,
@@ -932,20 +929,21 @@ def optimize_random_greedy_track_flops(
  A dictionary mapping indices to their dimension.
  ntrials : int, optional
  The number of random greedy trials to perform. The default is 1.
- costmod : float, optional
+ costmod : (float, float), optional
  When assessing local greedy scores how much to weight the size of the
  tensors removed compared to the size of the tensor added::
 
- score = size_ab - costmod * (size_a + size_b)
+ score = size_ab / costmod - (size_a + size_b) * costmod
 
- This can be a useful hyper-parameter to tune.
- temperature : float, optional
+ It is sampled uniformly from the given range.
+ temperature : (float, float), optional
  When asessing local greedy scores, how much to randomly perturb the
  score. This is implemented as::
 
  score -> sign(score) * log(|score|) - temperature * gumbel()
 
- which implements boltzmann sampling.
+ which implements boltzmann sampling. It is sampled log-uniformly from
+ the given range.
  seed : int, optional
  The seed for the random number generator.
  simplify : bool, optional
@@ -984,9 +982,38 @@ def optimize_random_greedy_track_flops(
  if simplify:
  cp0.simplify()
 
+ if isinstance(costmod, float):
+ # constant
+
+ def _next_costmod():
+ return costmod
+
+ else:
+ # uniformly sample
+
+ def _next_costmod():
+ return rng.uniform(*costmod)
+
+ if isinstance(temperature, float):
+ # constant
+
+ def _next_temperature():
+ return temperature
+
+ else:
+ # log-uniformly sample
+ logtempmin, logtempmax = map(math.log, temperature)
+
+ def _next_temperature():
+ return math.exp(rng.uniform(logtempmin, logtempmax))
+
  for _ in range(ntrials):
  cp = cp0.copy()
- cp.optimize_greedy(costmod=costmod, temperature=temperature, seed=rng)
+ cp.optimize_greedy(
+ costmod=_next_costmod(),
+ temperature=_next_temperature(),
+ seed=rng,
+ )
  # handle disconnected subgraphs
  cp.optimize_remaining_by_size()
 
@@ -1208,20 +1235,21 @@ class RandomGreedyOptimizer(PathOptimizer):
  ----------
  max_repeats : int, optional
  The number of random greedy trials to perform.
- costmod : float, optional
+ costmod : (float, float), optional
  When assessing local greedy scores how much to weight the size of the
  tensors removed compared to the size of the tensor added::
 
- score = size_ab - costmod * (size_a + size_b)
+ score = size_ab / costmod - (size_a + size_b) * costmod
 
- This can be a useful hyper-parameter to tune.
- temperature : float, optional
+ It is sampled uniformly from the given range.
+ temperature : (float, float), optional
  When asessing local greedy scores, how much to randomly perturb the
  score. This is implemented as::
 
  score -> sign(score) * log(|score|) - temperature * gumbel()
 
- which implements boltzmann sampling.
+ which implements boltzmann sampling. It is sampled log-uniformly from
+ the given range.
  seed : int, optional
  The seed for the random number generator. Note that deterministic
  behavior is only guaranteed within the python or rust backend
@@ -1257,16 +1285,26 @@ class RandomGreedyOptimizer(PathOptimizer):
  def __init__(
  self,
  max_repeats=32,
- costmod=1.0,
- temperature=0.01,
+ costmod=(0.1, 4.0),
+ temperature=(0.001, 1.0),
  seed=None,
  simplify=True,
  accel="auto",
  parallel="auto",
  ):
  self.max_repeats = max_repeats
- self.costmod = costmod
- self.temperature = temperature
+
+ # for cotengrust, ensure these are always ranges
+ if isinstance(costmod, float):
+ self.costmod = (costmod, costmod)
+ else:
+ self.costmod = tuple(costmod)
+
+ if isinstance(temperature, float):
+ self.temperature = (temperature, temperature)
+ else:
+ self.temperature = tuple(temperature)
+
  self.simplify = simplify
  self.rng = get_rng(seed)
  self.best_ssa_path = None

diff --git a/cotengra/pathfinders/path_compressed_greedy.py b/cotengra/pathfinders/path_compressed_greedy.py
@@ -1,7 +1,6 @@
-"""Greedy contraction tree finders.
-"""
+"""Greedy contraction tree finders."""
+
 import collections
-import functools
 import heapq
 import itertools
 import math
@@ -10,68 +9,11 @@
  ContractionTree,
  ContractionTreeCompressed,
  get_hypergraph,
- jitter_dict,
 )
 from ..hyperoptimizers.hyper import register_hyper_function
 from ..utils import BadTrial, GumbelBatchedGenerator, get_rng, oset
-from .path_basic import get_optimize_greedy, ssa_to_linear
-
-ssa_greedy_optimize = functools.partial(get_optimize_greedy(), use_ssa=True)
-
-# ------------------------------ GREEDY HYPER ------------------------------- #
-
-
-def trial_greedy(
- inputs,
- output,
- size_dict,
- random_strength=0.0,
- temperature=0.0,
- costmod=1.0,
-):
- if random_strength != 0.0:
- # don't supply randomized sizes to actual contraction tree
- greedy_size_dict = jitter_dict(size_dict, random_strength)
- else:
- greedy_size_dict = size_dict
-
- ssa_path = ssa_greedy_optimize(
- inputs,
- output,
- greedy_size_dict,
- temperature=temperature,
- costmod=costmod,
- )
-
- return ContractionTree.from_path(
- inputs, output, size_dict, ssa_path=ssa_path
- )
-
-
-register_hyper_function(
- name="greedy",
- ssa_func=trial_greedy,
- space={
- "random_strength": {"type": "FLOAT_EXP", "min": 0.001, "max": 1.0},
- "temperature": {"type": "FLOAT_EXP", "min": 0.001, "max": 1.0},
- "costmod": {"type": "FLOAT", "min": 0.0, "max": 50.0},
- },
-)
-
-# greedy but don't explore costmod or add index size noise
-# -> better for a small number of runs
-register_hyper_function(
- name="random-greedy",
- ssa_func=trial_greedy,
- space={
- "temperature": {"type": "FLOAT_EXP", "min": 0.001, "max": 0.01},
- },
- constants={
- "costmod": 1.0,
- "random_strength": 0.0,
- },
-)
-
+from .path_basic import ssa_to_linear
+from .path_greedy import ssa_greedy_optimize
 
 # --------------------------------------------------------------------------- #
 

diff --git a/cotengra/pathfinders/path_greedy.py b/cotengra/pathfinders/path_greedy.py
@@ -0,0 +1,60 @@
+import functools
+
+from ..core import ContractionTree, jitter_dict
+from ..hyperoptimizers.hyper import register_hyper_function
+from .path_basic import get_optimize_greedy
+
+ssa_greedy_optimize = functools.partial(get_optimize_greedy(), use_ssa=True)
+
+# ------------------------------ GREEDY HYPER ------------------------------- #
+
+
+def trial_greedy(
+ inputs,
+ output,
+ size_dict,
+ random_strength=0.0,
+ temperature=0.0,
+ costmod=1.0,
+):
+ if random_strength != 0.0:
+ # don't supply randomized sizes to actual contraction tree
+ greedy_size_dict = jitter_dict(size_dict, random_strength)
+ else:
+ greedy_size_dict = size_dict
+
+ ssa_path = ssa_greedy_optimize(
+ inputs,
+ output,
+ greedy_size_dict,
+ temperature=temperature,
+ costmod=costmod,
+ )
+
+ return ContractionTree.from_path(
+ inputs, output, size_dict, ssa_path=ssa_path
+ )
+
+
+register_hyper_function(
+ name="greedy",
+ ssa_func=trial_greedy,
+ space={
+ "random_strength": {"type": "FLOAT_EXP", "min": 0.001, "max": 1.0},
+ "temperature": {"type": "FLOAT_EXP", "min": 0.001, "max": 1.0},
+ "costmod": {"type": "FLOAT", "min": 0.1, "max": 4.0},
+ },
+)
+
+# greedy but less exploratative -> better for a small number of runs
+register_hyper_function(
+ name="random-greedy",
+ ssa_func=trial_greedy,
+ space={
+ "temperature": {"type": "FLOAT_EXP", "min": 0.001, "max": 0.1},
+ "costmod": {"type": "FLOAT", "min": 0.5, "max": 3.0},
+ },
+ constants={
+ "random_strength": 0.0,
+ },
+)
diff --git a/cotengra/plot.py b/cotengra/plot.py
@@ -220,7 +220,7 @@ def parse_label(z):
  ax.text(
  bestx[-1],
  besty[-1],
- f"{besty[-1]:.4g}",
+ f"{besty[-1]:.2f}",
  ha="left",
  va="center",
  color=(0, 0.7, 0.3),