Skip to content

Commit

Permalink
add more options, upgrade hivemind, add test
Browse files Browse the repository at this point in the history
  • Loading branch information
Vectorrent committed Dec 18, 2023
1 parent e4756d3 commit 519d308
Show file tree
Hide file tree
Showing 5 changed files with 69 additions and 3 deletions.
22 changes: 22 additions & 0 deletions Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
FROM debian:bookworm

ENV DEBIAN_FRONTEND="noninteractive"

RUN apt-get update \
&& apt-get install -y --no-install-recommends \
python3-full \
python3-pip \
python3-packaging \
&& rm -rf /var/lib/apt/lists/*

WORKDIR /app

COPY requirements.txt ./
RUN pip install --break-system-packages -r requirements.txt

COPY tests/requirements.txt tests/requirements.txt
RUN python3 -m pip install --break-system-packages -r tests/requirements.txt

COPY . ./

RUN python3 -m pip install --break-system-packages .
14 changes: 14 additions & 0 deletions docker-compose.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
version: '3.9'

services:
tests:
image: lightning-universe/lightning-hivemind:latest
command: pytest tests/ -v
tty: true
stdin_open: true
build:
context: .
dockerfile: Dockerfile
volumes:
- ./src:/app/src
- ./tests:/app/tests
2 changes: 1 addition & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
lightning >=2.0.0
hivemind >=1.1.0, <=1.1.5; sys_platform == 'linux'
hivemind >=1.1.0, <=1.1.10.post2; sys_platform == 'linux'

pydantic <2.0.0 # fixme: lift when resolved
10 changes: 9 additions & 1 deletion src/lightning_hivemind/strategy.py
Original file line number Diff line number Diff line change
Expand Up @@ -113,6 +113,10 @@ class HivemindStrategy(Strategy):
bootstrap_timeout: after one of peers responds, await other peers for at most this many seconds
use_relay: disable circuit relay functionality in libp2p (see https://docs.libp2p.io/concepts/nat/circuit-relay/)
use_auto_relay: look for libp2p relays to become reachable if we are behind NAT/firewall
**optimizer_kwargs: kwargs are passed to the :class:`hivemind.Optimizer` class.
"""

Expand All @@ -139,6 +143,8 @@ def __init__(
use_ipfs: bool = False,
wait_timeout: int = 3,
bootstrap_timeout: Optional[float] = None,
use_relay: bool = True,
use_auto_relay: bool = False,
**optimizer_kwargs: Any,
):
if platform.system() != "Linux":
Expand Down Expand Up @@ -177,9 +183,11 @@ def __init__(
initial_peers=initial_peers,
host_maddrs=host_maddrs if host_maddrs is not None else ["/ip4/0.0.0.0/tcp/0", "/ip4/0.0.0.0/udp/0/quic"],
use_ipfs=use_ipfs,
ensure_bootstrap_success=bool(not use_ipfs),
wait_timeout=wait_timeout,
bootstrap_timeout=bootstrap_timeout,
ensure_bootstrap_success=bool(not use_ipfs),
use_relay=use_relay,
use_auto_relay=use_auto_relay
)

visible_addresses = [
Expand Down
24 changes: 23 additions & 1 deletion tests/test_strategy.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,28 @@ def configure_optimizers(self):
)
trainer.fit(model)

@mock.patch.dict(os.environ, {"HIVEMIND_MEMORY_SHARING_STRATEGY": "file_descriptor"}, clear=True)
def test_ipfs_integration():
class TestModel(BoringModel):
def on_before_backward(self, loss: Tensor) -> None:
scheduler = self.trainer.lr_scheduler_configs[0].scheduler
assert isinstance(scheduler, HiveMindScheduler)

def configure_optimizers(self):
optimizer = torch.optim.SGD(self.layer.parameters(), lr=0.1)
return [optimizer], [torch.optim.lr_scheduler.ExponentialLR(optimizer, gamma=0.9)]

model = TestModel()
trainer = Trainer(
strategy=HivemindStrategy(
target_batch_size=1,
use_ipfs=True,
use_relay=True,
use_auto_relay=True
),
fast_dev_run=True,
)
trainer.fit(model)

@mock.patch.dict(
os.environ,
Expand Down Expand Up @@ -139,7 +161,7 @@ def test_raise_exception_no_batch_size(mock__extract_batch_size):
[(True, True, True), (False, True, False)],
)
def test_warn_if_argument_passed(delay_grad_averaging, delay_state_averaging, delay_optimizer_step):
"""Ensure that valid combination of HiveMind delay arguments warn if scheduler isn't passed in as a function."""
"""Ensure that valid combination of HiveMind delay arguments warn if scheduler isn't passed in as a function."""
model = BoringModel()
trainer = Trainer(
strategy=HivemindStrategy(
Expand Down

0 comments on commit 519d308

Please sign in to comment.