From cfc6f3ed2a4dc0e5d6326b1c0b6947dc42fb90d0 Mon Sep 17 00:00:00 2001 From: Javier Date: Fri, 13 Sep 2024 08:38:37 +0200 Subject: [PATCH] break(examples) Remove `simulation-` examples (#4188) --- README.md | 1 - examples/doc/source/conf.py | 2 + examples/simulation-pytorch/README.md | 103 --- examples/simulation-pytorch/pyproject.toml | 19 - examples/simulation-pytorch/requirements.txt | 4 - examples/simulation-pytorch/sim.ipynb | 629 ------------------ examples/simulation-pytorch/sim.py | 225 ------- examples/simulation-pytorch/utils.py | 63 -- examples/simulation-tensorflow/README.md | 104 --- examples/simulation-tensorflow/pyproject.toml | 16 - .../simulation-tensorflow/requirements.txt | 4 - examples/simulation-tensorflow/sim.ipynb | 347 ---------- examples/simulation-tensorflow/sim.py | 186 ------ 13 files changed, 2 insertions(+), 1701 deletions(-) delete mode 100644 examples/simulation-pytorch/README.md delete mode 100644 examples/simulation-pytorch/pyproject.toml delete mode 100644 examples/simulation-pytorch/requirements.txt delete mode 100644 examples/simulation-pytorch/sim.ipynb delete mode 100644 examples/simulation-pytorch/sim.py delete mode 100644 examples/simulation-pytorch/utils.py delete mode 100644 examples/simulation-tensorflow/README.md delete mode 100644 examples/simulation-tensorflow/pyproject.toml delete mode 100644 examples/simulation-tensorflow/requirements.txt delete mode 100644 examples/simulation-tensorflow/sim.ipynb delete mode 100644 examples/simulation-tensorflow/sim.py diff --git a/README.md b/README.md index c36e012d5644..9f2604ad37b0 100644 --- a/README.md +++ b/README.md @@ -147,7 +147,6 @@ Other [examples](https://github.com/adap/flower/tree/main/examples): - [Federated Finetuning of a Vision Transformer](https://github.com/adap/flower/tree/main/examples/flowertune-vit) - [Advanced Flower with TensorFlow/Keras](https://github.com/adap/flower/tree/main/examples/advanced-tensorflow) - [Advanced Flower with PyTorch](https://github.com/adap/flower/tree/main/examples/advanced-pytorch) -- Single-Machine Simulation of Federated Learning Systems ([PyTorch](https://github.com/adap/flower/tree/main/examples/simulation-pytorch)) ([Tensorflow](https://github.com/adap/flower/tree/main/examples/simulation-tensorflow)) - [Comprehensive Flower+XGBoost](https://github.com/adap/flower/tree/main/examples/xgboost-comprehensive) - [Flower through Docker Compose and with Grafana dashboard](https://github.com/adap/flower/tree/main/examples/flower-via-docker-compose) - [Flower with KaplanMeierFitter from the lifelines library](https://github.com/adap/flower/tree/main/examples/federated-kaplan-meier-fitter) diff --git a/examples/doc/source/conf.py b/examples/doc/source/conf.py index 3500d7f0b59c..04185caad0f4 100644 --- a/examples/doc/source/conf.py +++ b/examples/doc/source/conf.py @@ -68,6 +68,8 @@ "app-secure-aggregation": "flower-secure-aggregation.html", "llm-flowertune": "flowertune-llm.html", "vit-finetune": "flowertune-vit.html", + "simulation-pytorch": "quickstart-pytorch.html", + "simulation-tensorflow": "quickstart-tensorflow.html", } diff --git a/examples/simulation-pytorch/README.md b/examples/simulation-pytorch/README.md deleted file mode 100644 index 2dbfbc849ab7..000000000000 --- a/examples/simulation-pytorch/README.md +++ /dev/null @@ -1,103 +0,0 @@ ---- -tags: [basic, vision, fds, simulation] -dataset: [MNIST] -framework: [torch, torchvision] ---- - -# Flower Simulation example using PyTorch - -This introductory example uses the simulation capabilities of Flower to simulate a large number of clients on a single machine. Take a look at the [Documentation](https://flower.ai/docs/framework/how-to-run-simulations.html) for a deep dive into how Flower simulation works. This example uses [Flower Datasets](https://flower.ai/docs/datasets/) to download, partition and preprocess the MNIST dataset. This examples uses 100 clients by default. - -## Running the example (via Jupyter Notebook) - -Run the example on Google Colab: [![Open in Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/adap/flower/blob/main/examples/simulation-pytorch/sim.ipynb) - -Alternatively, you can run `sim.ipynb` locally or in any other Jupyter environment. - -## Running the example - -Start by cloning the code example. We prepared a single-line command that you can copy into your shell which will checkout the example for you: - -```shell -git clone --depth=1 https://github.com/adap/flower.git && mv flower/examples/simulation-pytorch . && rm -rf flower && cd simulation-pytorch -``` - -This will create a new directory called `simulation-pytorch` containing the following files: - -``` --- README.md <- Your're reading this right now --- sim.ipynb <- Example notebook --- sim.py <- Example code --- utils.py <- auxiliary functions for this example --- pyproject.toml <- Example dependencies --- requirements.txt <- Example dependencies -``` - -### Installing Dependencies - -Project dependencies (such as `torch` and `flwr`) are defined in `pyproject.toml` and `requirements.txt`. We recommend [Poetry](https://python-poetry.org/docs/) to install those dependencies and manage your virtual environment ([Poetry installation](https://python-poetry.org/docs/#installation)) or [pip](https://pip.pypa.io/en/latest/development/), but feel free to use a different way of installing dependencies and managing virtual environments if you have other preferences. - -#### Poetry - -```shell -poetry install -poetry shell -``` - -Poetry will install all your dependencies in a newly created virtual environment. To verify that everything works correctly you can run the following command: - -```shell -poetry run python -c "import flwr" -``` - -If you don't see any errors you're good to go! - -#### pip - -Write the command below in your terminal to install the dependencies according to the configuration file requirements.txt. - -```shell -pip install -r requirements.txt -``` - -### Run with `start_simulation()` - -Ensure you have activated your environment then: - -```bash -# and then run the example -python sim.py -``` - -You can adjust the CPU/GPU resources you assign to each of your virtual clients. By default, your clients will only use 1xCPU core. For example: - -```bash -# Will assign 2xCPUs to each client -python sim.py --num_cpus=2 - -# Will assign 2xCPUs and 25% of the GPU's VRAM to each client -# This means that you can have 4 concurrent clients on each GPU -# (assuming you have enough CPUs) -python sim.py --num_cpus=2 --num_gpus=0.25 -``` - -### Run with Flower Next (preview) - -Ensure you have activated your environment, then execute the command below. All `ClientApp` instances will run on CPU but the `ServerApp` will run on the GPU if one is available. Note that this is the case because the `Simulation Engine` only exposes certain resources to the `ClientApp` (based on the `client_resources` in `--backend-config`). - -```bash -# Run with the default backend-config. -# `--server-app` points to the `server` object in the sim.py file in this example. -# `--client-app` points to the `client` object in the sim.py file in this example. -flower-simulation --client-app=sim:client --server-app=sim:server --num-supernodes=100 -``` - -You can change the default resources assigned to each `ClientApp` by means of the `--backend-config` argument: - -```bash -# Tells the VCE to reserve 2x CPUs and 25% of available VRAM for each ClientApp -flower-simulation --client-app=sim:client --server-app=sim:server --num-supernodes=100 \ - --backend-config='{"client_resources": {"num_cpus":2, "num_gpus":0.25}}' -``` - -Take a look at the [Documentation](https://flower.ai/docs/framework/how-to-run-simulations.html) for more details on how you can customise your simulation. diff --git a/examples/simulation-pytorch/pyproject.toml b/examples/simulation-pytorch/pyproject.toml deleted file mode 100644 index 5978c17f2c60..000000000000 --- a/examples/simulation-pytorch/pyproject.toml +++ /dev/null @@ -1,19 +0,0 @@ -[build-system] -requires = ["poetry-core>=1.4.0"] -build-backend = "poetry.core.masonry.api" - -[tool.poetry] -name = "simulation-pytorch" -version = "0.1.0" -description = "Federated Learning Simulation with Flower and PyTorch" -authors = ["The Flower Authors "] - -[tool.poetry.dependencies] -python = ">=3.8,<3.11" -flwr = { extras = ["simulation"], version = ">=1.0,<2.0" } -flwr-datasets = { extras = ["vision"], version = ">=0.0.2,<1.0.0" } -torch = "2.1.1" -torchvision = "0.16.1" - -[tool.poetry.group.dev.dependencies] -ipykernel = "^6.27.0" diff --git a/examples/simulation-pytorch/requirements.txt b/examples/simulation-pytorch/requirements.txt deleted file mode 100644 index 4dbecab3e546..000000000000 --- a/examples/simulation-pytorch/requirements.txt +++ /dev/null @@ -1,4 +0,0 @@ -flwr[simulation]>=1.0, <2.0 -torch==2.1.1 -torchvision==0.16.1 -flwr-datasets[vision]>=0.0.2, <1.0.0 \ No newline at end of file diff --git a/examples/simulation-pytorch/sim.ipynb b/examples/simulation-pytorch/sim.ipynb deleted file mode 100644 index d225069cb444..000000000000 --- a/examples/simulation-pytorch/sim.ipynb +++ /dev/null @@ -1,629 +0,0 @@ -{ - "cells": [ - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Environment Setup\n", - "\n", - "To start working with Flower, very little is required once you have activated your Python environment (e.g. via `conda`, `virtualenv`, `pyenv`, etc). If you are running this code on Colab, there is really nothing to do except to install Flower and other dependencies. The steps below have been verified to run in Colab.\n", - "\n", - "## Installing Flower\n", - "\n", - "You can install flower very conveniently from `pip`:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# depending on your shell, you might need to add `\\` before `[` and `]`.\n", - "!pip install -q flwr[simulation]\n", - "!pip install flwr_datasets[vision]" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "We will be using the _simulation_ mode in Flower, which allows you to run a large number of clients without the overheads of manually managing devices. This is achieved via the [Virtual Client Engine](https://flower.ai/docs/framework/how-to-run-simulations.html) in Flower. With simulation, you can dynamically scale your experiments whether you run the code on your laptop, a machine with a single GPU, a server with multiple GPUs os even on a cluster with multiple servers. The `Virtual Client Engine` handles everything transparently and it allows you to specify how many resources (e.g. CPU cores, GPU VRAM) should be assigned to each virtual client." - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "\n", - "Flower is agnostic to your choice of ML Framework. Flower works with `PyTorch`, `Tensorflow`, `NumPy`, `🤗 Transformers`, `MXNet`, `JAX`, `scikit-learn`, `fastai`, `Pandas`. Flower also supports all major platforms: `iOS`, `Android` and plain `C++`. You can find a _quickstart-_ example for each of the above in the [Flower Repository](https://github.com/adap/flower/tree/main/examples) inside the `examples/` directory.\n", - "\n", - "In this tutorial we are going to use PyTorch, it comes pre-installed in your Collab runtime so there is no need to installed it again. If you wouuld like to install another version, you can still do that in the same way other packages are installed via `!pip`" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "We are going to install some other dependencies you are likely familiar with. Let's install `maplotlib` to plot our results at the end." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "outputId": "58b7af77-609f-4118-bd5b-5629a4b5a296" - }, - "outputs": [], - "source": [ - "!pip install matplotlib" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Preparing the experiment\n", - "\n", - "This tutorial is not so much about novel architectural designs so we keep things simple and make use of a typical CNN that is adequate for the MNIST image classification task.\n", - "\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import torch\n", - "import torch.nn as nn\n", - "import torch.nn.functional as F\n", - "from torch.utils.data import DataLoader\n", - "\n", - "\n", - "class Net(nn.Module):\n", - " def __init__(self, num_classes: int) -> None:\n", - " super(Net, self).__init__()\n", - " self.conv1 = nn.Conv2d(1, 6, 5)\n", - " self.pool = nn.MaxPool2d(2, 2)\n", - " self.conv2 = nn.Conv2d(6, 16, 5)\n", - " self.fc1 = nn.Linear(16 * 4 * 4, 120)\n", - " self.fc2 = nn.Linear(120, 84)\n", - " self.fc3 = nn.Linear(84, num_classes)\n", - "\n", - " def forward(self, x: torch.Tensor) -> torch.Tensor:\n", - " x = self.pool(F.relu(self.conv1(x)))\n", - " x = self.pool(F.relu(self.conv2(x)))\n", - " x = x.view(-1, 16 * 4 * 4)\n", - " x = F.relu(self.fc1(x))\n", - " x = F.relu(self.fc2(x))\n", - " x = self.fc3(x)\n", - " return x" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "We'll be training the model in a Federated setting. In order to do that, we need to define two functions:\n", - "\n", - "* `train()` that will train the model given a dataloader.\n", - "* `test()` that will be used to evaluate the performance of the model on held-out data, e.g., a training set." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "def train(net, trainloader, optim, epochs, device: str):\n", - " \"\"\"Train the network on the training set.\"\"\"\n", - " criterion = torch.nn.CrossEntropyLoss()\n", - " net.train()\n", - " for _ in range(epochs):\n", - " for batch in trainloader:\n", - " images, labels = batch[\"image\"].to(device), batch[\"label\"].to(device)\n", - " optim.zero_grad()\n", - " loss = criterion(net(images), labels)\n", - " loss.backward()\n", - " optim.step()\n", - "\n", - "\n", - "def test(net, testloader, device: str):\n", - " \"\"\"Validate the network on the entire test set.\"\"\"\n", - " criterion = torch.nn.CrossEntropyLoss()\n", - " correct, loss = 0, 0.0\n", - " net.eval()\n", - " with torch.no_grad():\n", - " for data in testloader:\n", - " images, labels = data[\"image\"].to(device), data[\"label\"].to(device)\n", - " outputs = net(images)\n", - " loss += criterion(outputs, labels).item()\n", - " _, predicted = torch.max(outputs.data, 1)\n", - " correct += (predicted == labels).sum().item()\n", - " accuracy = correct / len(testloader.dataset)\n", - " return loss, accuracy" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "The code we have written so far is not specific to Federated Learning. Then, what are the key differences between Federated Learning and Centralised Training? If you could only pick you, probably you'd say:\n", - "* Federated Learning is distributed -- the model is trained on-device by the participating clients.\n", - "* Data remains private and is owned by a specific _client_ -- the data is never sent to the central server.\n", - "\n", - "The are several more differences. But the above two are the main ones to always consider and that are common to all flavours of Federated Learning (e.g. _cross-device_ or _cross-silo_). The remaining of this tutorial is going to focus in transforming the code we have written so far for the centralised setting and construct a Federated Learning pipeline using Flower and PyTorch.\n", - "\n", - "Let's begin! 🚀" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## One Client, One Data Partition\n", - "\n", - "To start designing a Federated Learning pipeline we need to meet one of the key properties in FL: each client has its own data partition. To accomplish this with the MNIST dataset, we are going to generate N random partitions, where N is the total number of clients in our FL system.\n", - "\n", - "We can use [Flower Datasets](https://flower.ai/docs/datasets/) to effortlessly obtain an off-the-shelf partitioned dataset or partition one that isn't pre-partitioned. Let's choose MNIST." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "from datasets import Dataset\n", - "from flwr_datasets import FederatedDataset\n", - "from datasets.utils.logging import disable_progress_bar\n", - "\n", - "# Let's set a simulation involving a total of 100 clients\n", - "NUM_CLIENTS = 100\n", - "\n", - "# Download MNIST dataset and partition the \"train\" partition (so one can be assigned to each client)\n", - "mnist_fds = FederatedDataset(dataset=\"mnist\", partitioners={\"train\": NUM_CLIENTS})\n", - "# Let's keep the test set as is, and use it to evaluate the global model on the server\n", - "centralized_testset = mnist_fds.load_split(\"test\")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Let's create a function that returns a set of transforms to apply to our images" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "from torchvision.transforms import ToTensor, Normalize, Compose\n", - "\n", - "\n", - "def apply_transforms(batch):\n", - " \"\"\"Get transformation for MNIST dataset\"\"\"\n", - "\n", - " # transformation to convert images to tensors and apply normalization\n", - " transforms = Compose([ToTensor(), Normalize((0.1307,), (0.3081,))])\n", - " batch[\"image\"] = [transforms(img) for img in batch[\"image\"]]\n", - " return batch" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Let's next define how our FL clients will behave.\n", - "\n", - "## Defining a Flower Client\n", - "\n", - "You can think of a client in FL as an entity that owns some data and trains a model using this data. The caveat is that the model is being trained _collaboratively_ in Federation by multiple clients (sometimes up to hundreds of thousands) and, in most instances of FL, is sent by a central server.\n", - "\n", - "A Flower Client is a simple Python class with four distinct methods:\n", - "\n", - "* `fit()`: With this method, the client does on-device training for a number of epochs using its own data. At the end, the resulting model is sent back to the server for aggregation.\n", - "\n", - "* `evaluate()`: With this method, the server can evaluate the performance of the global model on the local validation set of a client. This can be used for instance when there is no centralised dataset on the server for validation/test. Also, this method can be use to asses the degree of personalisation of the model being federated.\n", - "\n", - "* `set_parameters()`: This method takes the parameters sent by the server and uses them to initialise the parameters of the local model that is ML framework specific (e.g. TF, Pytorch, etc).\n", - "\n", - "* `get_parameters()`: It extract the parameters from the local model and transforms them into a list of NumPy arrays. This ML framework-agnostic representation of the model will be sent to the server.\n", - "\n", - "Let's start by importing Flower!" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import flwr as fl" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Now let's defice our Flower Client class:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "from collections import OrderedDict\n", - "from typing import Dict, List, Tuple\n", - "\n", - "from flwr.common import NDArrays, Scalar\n", - "\n", - "\n", - "class FlowerClient(fl.client.NumPyClient):\n", - " def __init__(self, trainloader, valloader) -> None:\n", - " super().__init__()\n", - "\n", - " self.trainloader = trainloader\n", - " self.valloader = valloader\n", - " self.model = Net(num_classes=10)\n", - " # Determine device\n", - " self.device = torch.device(\"cuda:0\" if torch.cuda.is_available() else \"cpu\")\n", - " self.model.to(self.device) # send model to device\n", - "\n", - " def set_parameters(self, parameters):\n", - " \"\"\"With the model parameters received from the server,\n", - " overwrite the uninitialise model in this class with them.\"\"\"\n", - "\n", - " params_dict = zip(self.model.state_dict().keys(), parameters)\n", - " state_dict = OrderedDict({k: torch.Tensor(v) for k, v in params_dict})\n", - " # now replace the parameters\n", - " self.model.load_state_dict(state_dict, strict=True)\n", - "\n", - " def get_parameters(self, config: Dict[str, Scalar]):\n", - " \"\"\"Extract all model parameters and conver them to a list of\n", - " NumPy arryas. The server doesn't work with PyTorch/TF/etc.\"\"\"\n", - " return [val.cpu().numpy() for _, val in self.model.state_dict().items()]\n", - "\n", - " def fit(self, parameters, config):\n", - " \"\"\"This method train the model using the parameters sent by the\n", - " server on the dataset of this client. At then end, the parameters\n", - " of the locally trained model are communicated back to the server\"\"\"\n", - "\n", - " # copy parameters sent by the server into client's local model\n", - " self.set_parameters(parameters)\n", - "\n", - " # read from config\n", - " lr, epochs = config[\"lr\"], config[\"epochs\"]\n", - "\n", - " # Define the optimizer\n", - " optim = torch.optim.SGD(self.model.parameters(), lr=lr, momentum=0.9)\n", - "\n", - " # do local training\n", - " train(self.model, self.trainloader, optim, epochs=epochs, device=self.device)\n", - "\n", - " # return the model parameters to the server as well as extra info (number of training examples in this case)\n", - " return self.get_parameters({}), len(self.trainloader), {}\n", - "\n", - " def evaluate(self, parameters: NDArrays, config: Dict[str, Scalar]):\n", - " \"\"\"Evaluate the model sent by the server on this client's\n", - " local validation set. Then return performance metrics.\"\"\"\n", - "\n", - " self.set_parameters(parameters)\n", - " loss, accuracy = test(self.model, self.valloader, device=self.device)\n", - " # send statistics back to the server\n", - " return float(loss), len(self.valloader), {\"accuracy\": accuracy}" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Spend a few minutes to inspect the `FlowerClient` class above. Please ask questions if there is something unclear !\n", - "\n", - "Then keen-eyed among you might have realised that if we were to fuse the client's `fit()` and `evaluate()` methods, we'll end up with essentially the same as in the `run_centralised()` function we used in the Centralised Training part of this tutorial. And it is true!! In Federated Learning, the way clients perform local training makes use of the same principles as more traditional centralised setup. The key difference is that the dataset now is much smaller and it's never _\"seen\"_ by the entity running the FL workload (i.e. the central server).\n", - "\n", - "\n", - "Talking about the central server... we should define what strategy we want to make use of so the updated models sent from the clients back to the server at the end of the `fit()` method are aggregate.\n", - "\n", - "\n", - "## Choosing a Flower Strategy\n", - "\n", - "\n", - "A strategy sits at the core of the Federated Learning experiment. It is involved in all stages of a FL pipeline: sampling clients; sending the _global model_ to the clients so they can do `fit()`; receive the updated models from the clients and **aggregate** these to construct a new _global model_; define and execute global or federated evaluation; and more.\n", - "\n", - "Flower comes with [many strategies built-in](https://github.com/adap/flower/tree/main/src/py/flwr/server/strategy) and more to be available in the next release (`1.5` already!). For this tutorial, let's use what is arguable the most popular strategy out there: `FedAvg`.\n", - "\n", - "The way `FedAvg` works is simple but performs surprisingly well in practice. It is therefore one good strategy to start your experimentation. `FedAvg`, as its name implies, derives a new version of the _global model_ by taking the average of all the models sent by clients participating in the round. You can read all the details [in the paper](https://arxiv.org/abs/1602.05629).\n", - "\n", - "Let's see how we can define `FedAvg` using Flower. We use one of the callbacks called `evaluate_fn` so we can easily evaluate the state of the global model using a small centralised testset. Note this functionality is user-defined since it requires a choice in terms of ML-framework. (if you recall, Flower is framework agnostic).\n", - "\n", - "> This being said, centralised evaluation of the global model is only possible if there exists a centralised dataset that somewhat follows a similar distribution as the data that's spread across clients. In some cases having such centralised dataset for validation is not possible, so the only solution is to federate the evaluation of the _global model_. This is the default behaviour in Flower. If you don't specify teh `evaluate_fn` argument in your strategy, then, centralised global evaluation won't be performed." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "def get_evaluate_fn(centralized_testset: Dataset):\n", - " \"\"\"This is a function that returns a function. The returned\n", - " function (i.e. `evaluate_fn`) will be executed by the strategy\n", - " at the end of each round to evaluate the stat of the global\n", - " model.\"\"\"\n", - "\n", - " def evaluate_fn(server_round: int, parameters, config):\n", - " \"\"\"This function is executed by the strategy it will instantiate\n", - " a model and replace its parameters with those from the global model.\n", - " The, the model will be evaluate on the test set (recall this is the\n", - " whole MNIST test set).\"\"\"\n", - "\n", - " model = Net(num_classes=10)\n", - "\n", - " # Determine device\n", - " device = torch.device(\"cuda:0\" if torch.cuda.is_available() else \"cpu\")\n", - " model.to(device) # send model to device\n", - "\n", - " # set parameters to the model\n", - " params_dict = zip(model.state_dict().keys(), parameters)\n", - " state_dict = OrderedDict({k: torch.Tensor(v) for k, v in params_dict})\n", - " model.load_state_dict(state_dict, strict=True)\n", - "\n", - " # Apply transform to dataset\n", - " testset = centralized_testset.with_transform(apply_transforms)\n", - "\n", - " testloader = DataLoader(testset, batch_size=50)\n", - " # call test\n", - " loss, accuracy = test(model, testloader, device)\n", - " return loss, {\"accuracy\": accuracy}\n", - "\n", - " return evaluate_fn" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "We could now define a strategy just as shown (commented) above. Instead, let's see how additional (but entirely optional) functionality can be easily added to our strategy. We are going to define two additional auxiliary functions to: (1) be able to configure how clients do local training; and (2) define a function to aggregate the metrics that clients return after running their `evaluate` methods:\n", - "\n", - "1. `fit_config()`. This is a function that will be executed inside the strategy when configuring a new `fit` round. This function is relatively simple and only requires as input argument the round at which the FL experiment is at. In this example we simply return a Python dictionary to specify the number of epochs and learning rate each client should made use of inside their `fit()` methods. A more versatile implementation would add more hyperparameters (e.g. the learning rate) and adjust them as the FL process advances (e.g. reducing the learning rate in later FL rounds).\n", - "2. `weighted_average()`: This is an optional function to pass to the strategy. It will be executed after an evaluation round (i.e. when client run `evaluate()`) and will aggregate the metrics clients return. In this example, we use this function to compute the weighted average accuracy of clients doing `evaluate()`." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "from flwr.common import Metrics\n", - "\n", - "\n", - "def fit_config(server_round: int) -> Dict[str, Scalar]:\n", - " \"\"\"Return a configuration with static batch size and (local) epochs.\"\"\"\n", - " config = {\n", - " \"epochs\": 1, # Number of local epochs done by clients\n", - " \"lr\": 0.01, # Learning rate to use by clients during fit()\n", - " }\n", - " return config\n", - "\n", - "\n", - "def weighted_average(metrics: List[Tuple[int, Metrics]]) -> Metrics:\n", - " \"\"\"Aggregation function for (federated) evaluation metrics, i.e. those returned by\n", - " the client's evaluate() method.\"\"\"\n", - " # Multiply accuracy of each client by number of examples used\n", - " accuracies = [num_examples * m[\"accuracy\"] for num_examples, m in metrics]\n", - " examples = [num_examples for num_examples, _ in metrics]\n", - "\n", - " # Aggregate and return custom metric (weighted average)\n", - " return {\"accuracy\": sum(accuracies) / sum(examples)}" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Now we can define our strategy:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "strategy = fl.server.strategy.FedAvg(\n", - " fraction_fit=0.1, # Sample 10% of available clients for training\n", - " fraction_evaluate=0.05, # Sample 5% of available clients for evaluation\n", - " on_fit_config_fn=fit_config,\n", - " evaluate_metrics_aggregation_fn=weighted_average, # aggregates federated metrics\n", - " evaluate_fn=get_evaluate_fn(centralized_testset), # global evaluation function\n", - ")" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "So far we have:\n", - "* created the dataset partitions (one for each client)\n", - "* defined the client class\n", - "* decided on a strategy to use\n", - "\n", - "Now we just need to launch the Flower FL experiment... not so fast! just one final function: let's create another callback that the Simulation Engine will use in order to span VirtualClients. As you can see this is really simple: construct a FlowerClient object, assigning each their own data partition." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "from torch.utils.data import DataLoader\n", - "\n", - "\n", - "def get_client_fn(dataset: FederatedDataset):\n", - " \"\"\"Return a function to construct a client.\n", - "\n", - " The VirtualClientEngine will execute this function whenever a client is sampled by\n", - " the strategy to participate.\n", - " \"\"\"\n", - "\n", - " def client_fn(cid: str) -> fl.client.Client:\n", - " \"\"\"Construct a FlowerClient with its own dataset partition.\"\"\"\n", - "\n", - " # Let's get the partition corresponding to the i-th client\n", - " client_dataset = dataset.load_partition(int(cid), \"train\")\n", - "\n", - " # Now let's split it into train (90%) and validation (10%)\n", - " client_dataset_splits = client_dataset.train_test_split(test_size=0.1, seed=42)\n", - "\n", - " trainset = client_dataset_splits[\"train\"]\n", - " valset = client_dataset_splits[\"test\"]\n", - "\n", - " # Now we apply the transform to each batch.\n", - " trainloader = DataLoader(\n", - " trainset.with_transform(apply_transforms), batch_size=32, shuffle=True\n", - " )\n", - " valloader = DataLoader(valset.with_transform(apply_transforms), batch_size=32)\n", - "\n", - " # Create and return client\n", - " return FlowerClient(trainloader, valloader).to_client()\n", - "\n", - " return client_fn\n", - "\n", - "\n", - "client_fn_callback = get_client_fn(mnist_fds)" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Now we are ready to launch the FL experiment using Flower simulation:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "outputId": "9ad8dcea-8004-4c6e-a025-e168da636c88" - }, - "outputs": [], - "source": [ - "# With a dictionary, you tell Flower's VirtualClientEngine that each\n", - "# client needs exclusive access to these many resources in order to run\n", - "client_resources = {\"num_cpus\": 1, \"num_gpus\": 0.0}\n", - "\n", - "# Let's disable tqdm progress bar in the main thread (used by the server)\n", - "disable_progress_bar()\n", - "\n", - "history = fl.simulation.start_simulation(\n", - " client_fn=client_fn_callback, # a callback to construct a client\n", - " num_clients=NUM_CLIENTS, # total number of clients in the experiment\n", - " config=fl.server.ServerConfig(num_rounds=10), # let's run for 10 rounds\n", - " strategy=strategy, # the strategy that will orchestrate the whole FL pipeline\n", - " client_resources=client_resources,\n", - " actor_kwargs={\n", - " \"on_actor_init_fn\": disable_progress_bar # disable tqdm on each actor/process spawning virtual clients\n", - " },\n", - ")" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Doing 10 rounds should take less than 2 minutes on a CPU-only Colab instance <-- Flower Simulation is fast! 🚀\n", - "\n", - "You can then use the resturned `History` object to either save the results to disk or do some visualisation (or both of course, or neither if you like chaos). Below you can see how you can plot the centralised accuracy obtainined at the end of each round (including at the very beginning of the experiment) for the _global model_. This is want the function `evaluate_fn()` that we passed to the strategy reports." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/", - "height": 508 - }, - "outputId": "d8eab106-cee9-4266-9082-0944882cdba8" - }, - "outputs": [], - "source": [ - "import matplotlib.pyplot as plt\n", - "\n", - "print(f\"{history.metrics_centralized = }\")\n", - "\n", - "global_accuracy_centralised = history.metrics_centralized[\"accuracy\"]\n", - "round = [data[0] for data in global_accuracy_centralised]\n", - "acc = [100.0 * data[1] for data in global_accuracy_centralised]\n", - "plt.plot(round, acc)\n", - "plt.grid()\n", - "plt.ylabel(\"Accuracy (%)\")\n", - "plt.xlabel(\"Round\")\n", - "plt.title(\"MNIST - IID - 100 clients with 10 clients per round\")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Congratulations! With that, you built a Flower client, customized it's instantiation through the `client_fn`, customized the server-side execution through a `FedAvg` strategy configured for this workload, and started a simulation with 100 clients (each holding their own individual partition of the MNIST dataset).\n", - "\n", - "Next, you can continue to explore more advanced Flower topics:\n", - "\n", - "- Deploy server and clients on different machines using `start_server` and `start_client`\n", - "- Customize the server-side execution through custom strategies\n", - "- Customize the client-side execution through `config` dictionaries\n", - "\n", - "Get all resources you need!\n", - "\n", - "* **[DOCS]** Our complete documenation: https://flower.ai/docs/\n", - "* **[Examples]** All Flower examples: https://flower.ai/docs/examples/\n", - "* **[VIDEO]** Our Youtube channel: https://www.youtube.com/@flowerlabs\n", - "\n", - "Don't forget to join our Slack channel: https://flower.ai/join-slack/\n" - ] - } - ], - "metadata": { - "colab": { - "provenance": [], - "toc_visible": true - }, - "kernelspec": { - "display_name": "Python 3 (ipykernel)", - "language": "python", - "name": "python3" - } - }, - "nbformat": 4, - "nbformat_minor": 4 -} diff --git a/examples/simulation-pytorch/sim.py b/examples/simulation-pytorch/sim.py deleted file mode 100644 index a435db6d7724..000000000000 --- a/examples/simulation-pytorch/sim.py +++ /dev/null @@ -1,225 +0,0 @@ -import argparse -from collections import OrderedDict -from typing import Dict, List, Tuple - -import flwr as fl -import torch -from datasets import Dataset -from datasets.utils.logging import disable_progress_bar -from flwr.common import Metrics -from flwr.common.typing import Scalar -from flwr_datasets import FederatedDataset -from torch.utils.data import DataLoader - -from utils import Net, apply_transforms, test, train - -parser = argparse.ArgumentParser(description="Flower Simulation with PyTorch") - -parser.add_argument( - "--num_cpus", - type=int, - default=1, - help="Number of CPUs to assign to a virtual client", -) -parser.add_argument( - "--num_gpus", - type=float, - default=0.0, - help="Ratio of GPU memory to assign to a virtual client", -) - -NUM_CLIENTS = 100 -NUM_ROUNDS = 10 - - -# Flower client, adapted from Pytorch quickstart example -class FlowerClient(fl.client.NumPyClient): - def __init__(self, trainset, valset): - self.trainset = trainset - self.valset = valset - - # Instantiate model - self.model = Net() - - # Determine device - self.device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") - self.model.to(self.device) # send model to device - - def get_parameters(self, config): - return [val.cpu().numpy() for _, val in self.model.state_dict().items()] - - def fit(self, parameters, config): - set_params(self.model, parameters) - - # Read from config - batch, epochs = config["batch_size"], config["epochs"] - - # Construct dataloader - trainloader = DataLoader(self.trainset, batch_size=batch, shuffle=True) - - # Define optimizer - optimizer = torch.optim.SGD(self.model.parameters(), lr=0.01, momentum=0.9) - # Train - train(self.model, trainloader, optimizer, epochs=epochs, device=self.device) - - # Return local model and statistics - return self.get_parameters({}), len(trainloader.dataset), {} - - def evaluate(self, parameters, config): - set_params(self.model, parameters) - - # Construct dataloader - valloader = DataLoader(self.valset, batch_size=64) - - # Evaluate - loss, accuracy = test(self.model, valloader, device=self.device) - - # Return statistics - return float(loss), len(valloader.dataset), {"accuracy": float(accuracy)} - - -def get_client_fn(dataset: FederatedDataset): - """Return a function to construct a client. - - The VirtualClientEngine will execute this function whenever a client is sampled by - the strategy to participate. - """ - - def client_fn(context) -> fl.client.Client: - """Construct a FlowerClient with its own dataset partition.""" - - # Let's get the partition corresponding to the i-th client - client_dataset = dataset.load_partition( - int(context.node_config["partition-id"]), "train" - ) - - # Now let's split it into train (90%) and validation (10%) - client_dataset_splits = client_dataset.train_test_split(test_size=0.1, seed=42) - - trainset = client_dataset_splits["train"] - valset = client_dataset_splits["test"] - - # Now we apply the transform to each batch. - trainset = trainset.with_transform(apply_transforms) - valset = valset.with_transform(apply_transforms) - - # Create and return client - return FlowerClient(trainset, valset).to_client() - - return client_fn - - -def fit_config(server_round: int) -> Dict[str, Scalar]: - """Return a configuration with static batch size and (local) epochs.""" - config = { - "epochs": 1, # Number of local epochs done by clients - "batch_size": 32, # Batch size to use by clients during fit() - } - return config - - -def set_params(model: torch.nn.ModuleList, params: List[fl.common.NDArrays]): - """Set model weights from a list of NumPy ndarrays.""" - params_dict = zip(model.state_dict().keys(), params) - state_dict = OrderedDict({k: torch.Tensor(v) for k, v in params_dict}) - model.load_state_dict(state_dict, strict=True) - - -def weighted_average(metrics: List[Tuple[int, Metrics]]) -> Metrics: - """Aggregation function for (federated) evaluation metrics, i.e. those returned by - the client's evaluate() method.""" - # Multiply accuracy of each client by number of examples used - accuracies = [num_examples * m["accuracy"] for num_examples, m in metrics] - examples = [num_examples for num_examples, _ in metrics] - - # Aggregate and return custom metric (weighted average) - return {"accuracy": sum(accuracies) / sum(examples)} - - -def get_evaluate_fn( - centralized_testset: Dataset, -): - """Return an evaluation function for centralized evaluation.""" - - def evaluate( - server_round: int, parameters: fl.common.NDArrays, config: Dict[str, Scalar] - ): - """Use the entire CIFAR-10 test set for evaluation.""" - - # Determine device - device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") - - model = Net() - set_params(model, parameters) - model.to(device) - - # Apply transform to dataset - testset = centralized_testset.with_transform(apply_transforms) - - # Disable tqdm for dataset preprocessing - disable_progress_bar() - - testloader = DataLoader(testset, batch_size=50) - loss, accuracy = test(model, testloader, device=device) - - return loss, {"accuracy": accuracy} - - return evaluate - - -# Download MNIST dataset and partition it -mnist_fds = FederatedDataset(dataset="mnist", partitioners={"train": NUM_CLIENTS}) -centralized_testset = mnist_fds.load_split("test") - -from flwr.server import ServerAppComponents - - -def server_fn(context): - # Configure the strategy - strategy = fl.server.strategy.FedAvg( - fraction_fit=0.1, # Sample 10% of available clients for training - fraction_evaluate=0.05, # Sample 5% of available clients for evaluation - min_available_clients=10, - on_fit_config_fn=fit_config, - evaluate_metrics_aggregation_fn=weighted_average, # Aggregate federated metrics - evaluate_fn=get_evaluate_fn(centralized_testset), # Global evaluation function - ) - return ServerAppComponents( - strategy=strategy, config=fl.server.ServerConfig(num_rounds=NUM_ROUNDS) - ) - - -# ClientApp for Flower-Next -client = fl.client.ClientApp( - client_fn=get_client_fn(mnist_fds), -) - -# ServerApp for Flower-Next -server = fl.server.ServerApp(server_fn=server_fn) - - -def main(): - # Parse input arguments - args = parser.parse_args() - - # Resources to be assigned to each virtual client - client_resources = { - "num_cpus": args.num_cpus, - "num_gpus": args.num_gpus, - } - - # Start simulation - fl.simulation.start_simulation( - client_fn=get_client_fn(mnist_fds), - num_clients=NUM_CLIENTS, - client_resources=client_resources, - config=fl.server.ServerConfig(num_rounds=NUM_ROUNDS), - strategy=strategy, - actor_kwargs={ - "on_actor_init_fn": disable_progress_bar # disable tqdm on each actor/process spawning virtual clients - }, - ) - - -if __name__ == "__main__": - main() diff --git a/examples/simulation-pytorch/utils.py b/examples/simulation-pytorch/utils.py deleted file mode 100644 index 702e9886615e..000000000000 --- a/examples/simulation-pytorch/utils.py +++ /dev/null @@ -1,63 +0,0 @@ -import torch -import torch.nn as nn -import torch.nn.functional as F -from torchvision.transforms import Compose, Normalize, ToTensor - - -# transformation to convert images to tensors and apply normalization -def apply_transforms(batch): - transforms = Compose([ToTensor(), Normalize((0.1307,), (0.3081,))]) - batch["image"] = [transforms(img) for img in batch["image"]] - return batch - - -# Model (simple CNN adapted from 'PyTorch: A 60 Minute Blitz') -class Net(nn.Module): - def __init__(self, num_classes: int = 10) -> None: - super(Net, self).__init__() - self.conv1 = nn.Conv2d(1, 6, 5) - self.pool = nn.MaxPool2d(2, 2) - self.conv2 = nn.Conv2d(6, 16, 5) - self.fc1 = nn.Linear(16 * 4 * 4, 120) - self.fc2 = nn.Linear(120, 84) - self.fc3 = nn.Linear(84, num_classes) - - def forward(self, x: torch.Tensor) -> torch.Tensor: - x = self.pool(F.relu(self.conv1(x))) - x = self.pool(F.relu(self.conv2(x))) - x = x.view(-1, 16 * 4 * 4) - x = F.relu(self.fc1(x)) - x = F.relu(self.fc2(x)) - x = self.fc3(x) - return x - - -# borrowed from Pytorch quickstart example -def train(net, trainloader, optim, epochs, device: str): - """Train the network on the training set.""" - criterion = torch.nn.CrossEntropyLoss() - net.train() - for _ in range(epochs): - for batch in trainloader: - images, labels = batch["image"].to(device), batch["label"].to(device) - optim.zero_grad() - loss = criterion(net(images), labels) - loss.backward() - optim.step() - - -# borrowed from Pytorch quickstart example -def test(net, testloader, device: str): - """Validate the network on the entire test set.""" - criterion = torch.nn.CrossEntropyLoss() - correct, loss = 0, 0.0 - net.eval() - with torch.no_grad(): - for data in testloader: - images, labels = data["image"].to(device), data["label"].to(device) - outputs = net(images) - loss += criterion(outputs, labels).item() - _, predicted = torch.max(outputs.data, 1) - correct += (predicted == labels).sum().item() - accuracy = correct / len(testloader.dataset) - return loss, accuracy diff --git a/examples/simulation-tensorflow/README.md b/examples/simulation-tensorflow/README.md deleted file mode 100644 index 047cb4379659..000000000000 --- a/examples/simulation-tensorflow/README.md +++ /dev/null @@ -1,104 +0,0 @@ ---- -tags: [basic, vision, fds, simulation] -dataset: [MNIST] -framework: [tensorflow, Keras] ---- - -# Flower Simulation example using TensorFlow/Keras - -This introductory example uses the simulation capabilities of Flower to simulate a large number of clients on a single machine. Take a look at the [Documentation](https://flower.ai/docs/framework/how-to-run-simulations.html) for a deep dive into how Flower simulation works. This example uses [Flower Datasets](https://flower.ai/docs/datasets/) to download, partition and preprocess the MNIST dataset. This examples uses 100 clients by default. - -## Running the example (via Jupyter Notebook) - -Run the example on Google Colab: [![Open in Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/adap/flower/blob/main/examples/simulation-tensorflow/sim.ipynb) - -Alternatively, you can run `sim.ipynb` locally or in any other Jupyter environment. - -## Running the example - -Start by cloning the code example. We prepared a single-line command that you can copy into your shell which will checkout the example for you: - -```shell -git clone --depth=1 https://github.com/adap/flower.git && mv flower/examples/simulation-tensorflow . && rm -rf flower && cd simulation-tensorflow -``` - -This will create a new directory called `simulation-tensorflow` containing the following files: - -``` --- README.md <- Your're reading this right now --- sim.ipynb <- Example notebook --- sim.py <- Example code --- pyproject.toml <- Example dependencies --- requirements.txt <- Example dependencies -``` - -### Installing Dependencies - -Project dependencies (such as `tensorflow` and `flwr`) are defined in `pyproject.toml` and `requirements.txt`. We recommend [Poetry](https://python-poetry.org/docs/) to install those dependencies and manage your virtual environment ([Poetry installation](https://python-poetry.org/docs/#installation)) or [pip](https://pip.pypa.io/en/latest/development/), but feel free to use a different way of installing dependencies and managing virtual environments if you have other preferences. - -#### Poetry - -```shell -poetry install -poetry shell -``` - -Poetry will install all your dependencies in a newly created virtual environment. To verify that everything works correctly you can run the following command: - -```shell -poetry run python -c "import flwr" -``` - -If you don't see any errors you're good to go! - -#### pip - -Write the command below in your terminal to install the dependencies according to the configuration file requirements.txt. - -```shell -pip install -r requirements.txt -``` - -### Run with `start_simulation()` - -Ensure you have activated your environment then: - -```bash -# and then run the example -python sim.py -``` - -You can adjust the CPU/GPU resources you assign to each of your virtual clients. By default, your clients will only use 2xCPU core. For example: - -```bash -# Will assign 2xCPUs to each client -python sim.py --num_cpus=2 - -# Will assign 2xCPUs and 25% of the GPU's VRAM to each client -# This means that you can have 4 concurrent clients on each GPU -# (assuming you have enough CPUs) -python sim.py --num_cpus=2 --num_gpus=0.25 -``` - -Because TensorFlow by default maps all the available VRAM, we need to [enable GPU memory growth](https://www.tensorflow.org/guide/gpu#limiting_gpu_memory_growth), see how it is done in the example (`sim.py`) for both the "main" process (where the server/strategy runs) and for the clients (using the `actor_kwargs`) - -### Run with Flower Next (preview) - -Ensure you have activated your environment, then execute the command below. All `ClientApp` instances will run on CPU but the `ServerApp` will run on the GPU if one is available. Note that this is the case because the `Simulation Engine` only exposes certain resources to the `ClientApp` (based on the `client_resources` in `--backend-config`). For TensorFlow simulations, it is desirable to make use of TF's [memory growth](https://www.tensorflow.org/api_docs/python/tf/config/experimental/set_memory_growth) feature. You can enable that easily with the `--enable-tf-gpu-growth` flag. - -```bash -# Run with the default backend-config. -# `--server-app` points to the `server` object in the sim.py file in this example. -# `--client-app` points to the `client` object in the sim.py file in this example. -flower-simulation --client-app=sim:client --server-app=sim:server --num-supernodes=100 --enable-tf-gpu-growth -``` - -You can change the default resources assigned to each `ClientApp` using the `--backend-config` argument. - -```bash -# Tells the VCE to reserve 2x CPUs and 25% of available VRAM for each ClientApp -flower-simulation --client-app=sim:client --server-app=sim:server --num-supernodes=100 \ - --backend-config='{"client_resources": {"num_cpus":2, "num_gpus":0.25}}' --enable-tf-gpu-growth -``` - -Take a look at the [Documentation](https://flower.ai/docs/framework/how-to-run-simulations.html) for more details on how you can customise your simulation. diff --git a/examples/simulation-tensorflow/pyproject.toml b/examples/simulation-tensorflow/pyproject.toml deleted file mode 100644 index ad8cc2032b2d..000000000000 --- a/examples/simulation-tensorflow/pyproject.toml +++ /dev/null @@ -1,16 +0,0 @@ -[build-system] -requires = ["poetry-core>=1.4.0"] -build-backend = "poetry.core.masonry.api" - -[tool.poetry] -name = "simulation-tensorflow" -version = "0.1.0" -description = "Federated Learning Simulation with Flower and Tensorflow" -authors = ["The Flower Authors "] - -[tool.poetry.dependencies] -python = ">=3.8,<3.11" -flwr = { extras = ["simulation"], version = ">=1.0,<2.0" } -flwr-datasets = { extras = ["vision"], version = ">=0.0.2,<1.0.0" } -tensorflow = { version = "^2.9.1, !=2.11.1", markers = "platform_machine == 'x86_64'" } -tensorflow-macos = { version = "^2.9.1, !=2.11.1", markers = "sys_platform == 'darwin' and platform_machine == 'arm64'" } diff --git a/examples/simulation-tensorflow/requirements.txt b/examples/simulation-tensorflow/requirements.txt deleted file mode 100644 index bb69a87be1b4..000000000000 --- a/examples/simulation-tensorflow/requirements.txt +++ /dev/null @@ -1,4 +0,0 @@ -flwr[simulation]>=1.0, <2.0 -flwr-datasets[vision]>=0.0.2, <1.0.0 -tensorflow-macos>=2.9.1, != 2.11.1 ; sys_platform == "darwin" and platform_machine == "arm64" -tensorflow-cpu>=2.9.1, != 2.11.1 ; platform_machine == "x86_64" diff --git a/examples/simulation-tensorflow/sim.ipynb b/examples/simulation-tensorflow/sim.ipynb deleted file mode 100644 index 26b7260b5f1c..000000000000 --- a/examples/simulation-tensorflow/sim.ipynb +++ /dev/null @@ -1,347 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Flower Quickstart (Simulation with TensorFlow/Keras)\n", - "\n", - "Welcome to Flower, a friendly federated learning framework!\n", - "\n", - "In this notebook, we'll simulate a federated learning system with 100 clients. The clients will use TensorFlow/Keras to define model training and evaluation. Let's start by installing Flower (published as `flwr` on PyPI) with the `simulation` extra:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "!pip install -q flwr[\"simulation\"] tensorflow\n", - "!pip install -q flwr_datasets[\"vision\"]" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Let's also install Matplotlib so we can make some plots once the simulation is completed" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "!pip install matplotlib" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Next, we import the required dependencies. The most important imports are Flower (`flwr`) and TensorFlow:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "from typing import Dict, List, Tuple\n", - "\n", - "import tensorflow as tf\n", - "\n", - "import flwr as fl\n", - "from flwr.common import Metrics\n", - "from flwr.simulation.ray_transport.utils import enable_tf_gpu_growth\n", - "\n", - "from datasets import Dataset\n", - "from flwr_datasets import FederatedDataset\n", - "\n", - "VERBOSE = 0\n", - "NUM_CLIENTS = 100" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Let's start by defining the model we want to federated. Since we will be working with MNIST, using a fully connected model is sufficient. You can of course customize this model." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "def get_model():\n", - " \"\"\"Constructs a simple model architecture suitable for MNIST.\"\"\"\n", - " model = tf.keras.models.Sequential(\n", - " [\n", - " tf.keras.layers.Flatten(input_shape=(28, 28)),\n", - " tf.keras.layers.Dense(128, activation=\"relu\"),\n", - " tf.keras.layers.Dropout(0.2),\n", - " tf.keras.layers.Dense(10, activation=\"softmax\"),\n", - " ]\n", - " )\n", - " model.compile(\"adam\", \"sparse_categorical_crossentropy\", metrics=[\"accuracy\"])\n", - " return model" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "With that out of the way, let's move on to the interesting bits. Federated learning systems consist of a server and multiple clients. In Flower, we create clients by implementing subclasses of `flwr.client.Client` or `flwr.client.NumPyClient`. We use `NumPyClient` in this tutorial because it is easier to implement and requires us to write less boilerplate.\n", - "\n", - "To implement the Flower client, we create a subclass of `flwr.client.NumPyClient` and implement the three methods `get_parameters`, `fit`, and `evaluate`:\n", - "\n", - "- `get_parameters`: Return the current local model parameters\n", - "- `fit`: Receive model parameters from the server, train the model parameters on the local data, and return the (updated) model parameters to the server \n", - "- `evaluate`: Received model parameters from the server, evaluate the model parameters on the local data, and return the evaluation result to the server\n", - "\n", - "We mentioned that our clients will use TensorFlow/Keras for the model training and evaluation. Keras models provide methods that make the implementation straightforward: we can update the local model with server-provides parameters through `model.set_weights`, we can train/evaluate the model through `fit/evaluate`, and we can get the updated model parameters through `model.get_weights`.\n", - "\n", - "Let's see a simple implementation:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "class FlowerClient(fl.client.NumPyClient):\n", - " def __init__(self, trainset, valset) -> None:\n", - " # Create model\n", - " self.model = get_model()\n", - " self.trainset = trainset\n", - " self.valset = valset\n", - "\n", - " def get_parameters(self, config):\n", - " return self.model.get_weights()\n", - "\n", - " def fit(self, parameters, config):\n", - " self.model.set_weights(parameters)\n", - " self.model.fit(self.trainset, epochs=1, verbose=VERBOSE)\n", - " return self.model.get_weights(), len(self.trainset), {}\n", - "\n", - " def evaluate(self, parameters, config):\n", - " self.model.set_weights(parameters)\n", - " loss, acc = self.model.evaluate(self.valset, verbose=VERBOSE)\n", - " return loss, len(self.valset), {\"accuracy\": acc}" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Our class `FlowerClient` defines how local training/evaluation will be performed and allows Flower to call the local training/evaluation through `fit` and `evaluate`. Each instance of `FlowerClient` represents a *single client* in our federated learning system. Federated learning systems have multiple clients (otherwise, there's not much to federate, is there?), so each client will be represented by its own instance of `FlowerClient`. If we have, for example, three clients in our workload, we'd have three instances of `FlowerClient`. Flower calls `FlowerClient.fit` on the respective instance when the server selects a particular client for training (and `FlowerClient.evaluate` for evaluation).\n", - "\n", - "In this notebook, we want to simulate a federated learning system with 100 clients on a single machine. This means that the server and all 100 clients will live on a single machine and share resources such as CPU, GPU, and memory. Having 100 clients would mean having 100 instances of `FlowerClient` in memory. Doing this on a single machine can quickly exhaust the available memory resources, even if only a subset of these clients participates in a single round of federated learning.\n", - "\n", - "In addition to the regular capabilities where server and clients run on multiple machines, Flower, therefore, provides special simulation capabilities that create `FlowerClient` instances only when they are actually necessary for training or evaluation. To enable the Flower framework to create clients when necessary, we need to implement a function called `client_fn` that creates a `FlowerClient` instance on demand. Flower calls `client_fn` whenever it needs an instance of one particular client to call `fit` or `evaluate` (those instances are usually discarded after use). Clients are identified by a client ID, or short `cid`. The `cid` can be used, for example, to load different local data partitions for each client" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "We now define four auxiliary functions for this example (note the last two are entirely optional):\n", - "* `get_client_fn()`: Is a function that returns another function. The returned `client_fn` will be executed by Flower's VirtualClientEngine each time a new _virtual_ client (i.e. a client that is simulated in a Python process) needs to be spawn. When are virtual clients spawned? Each time the strategy samples them to do either `fit()` (i.e. train the global model on the local data of a particular client) or `evaluate()` (i.e. evaluate the global model on the validation set of a given client).\n", - "\n", - "* `weighted_average()`: This is an optional function to pass to the strategy. It will be executed after an evaluation round (i.e. when client run `evaluate()`) and will aggregate the metrics clients return. In this example, we use this function to compute the weighted average accuracy of clients doing `evaluate()`.\n", - "\n", - "* `get_evaluate_fn()`: This is again a function that returns another function. The returned function will be executed by the strategy at the end of a `fit()` round and after a new global model has been obtained after aggregation. This is an optional argument for Flower strategies. In this example, we use the whole MNIST test set to perform this server-side evaluation." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "def get_client_fn(dataset: FederatedDataset):\n", - " \"\"\"Return a function to construct a client.\n", - "\n", - " The VirtualClientEngine will execute this function whenever a client is sampled by\n", - " the strategy to participate.\n", - " \"\"\"\n", - "\n", - " def client_fn(cid: str) -> fl.client.Client:\n", - " \"\"\"Construct a FlowerClient with its own dataset partition.\"\"\"\n", - "\n", - " # Extract partition for client with id = cid\n", - " client_dataset = dataset.load_partition(int(cid), \"train\")\n", - "\n", - " # Now let's split it into train (90%) and validation (10%)\n", - " client_dataset_splits = client_dataset.train_test_split(test_size=0.1, seed=42)\n", - "\n", - " trainset = client_dataset_splits[\"train\"].to_tf_dataset(\n", - " columns=\"image\", label_cols=\"label\", batch_size=32\n", - " )\n", - " valset = client_dataset_splits[\"test\"].to_tf_dataset(\n", - " columns=\"image\", label_cols=\"label\", batch_size=64\n", - " )\n", - "\n", - " # Create and return client\n", - " return FlowerClient(trainset, valset).to_client()\n", - "\n", - " return client_fn\n", - "\n", - "\n", - "def weighted_average(metrics: List[Tuple[int, Metrics]]) -> Metrics:\n", - " \"\"\"Aggregation function for (federated) evaluation metrics, i.e. those returned by\n", - " the client's evaluate() method.\"\"\"\n", - " # Multiply accuracy of each client by number of examples used\n", - " accuracies = [num_examples * m[\"accuracy\"] for num_examples, m in metrics]\n", - " examples = [num_examples for num_examples, _ in metrics]\n", - "\n", - " # Aggregate and return custom metric (weighted average)\n", - " return {\"accuracy\": sum(accuracies) / sum(examples)}\n", - "\n", - "\n", - "def get_evaluate_fn(testset: Dataset):\n", - " \"\"\"Return an evaluation function for server-side (i.e. centralised) evaluation.\"\"\"\n", - "\n", - " # The `evaluate` function will be called after every round by the strategy\n", - " def evaluate(\n", - " server_round: int,\n", - " parameters: fl.common.NDArrays,\n", - " config: Dict[str, fl.common.Scalar],\n", - " ):\n", - " model = get_model() # Construct the model\n", - " model.set_weights(parameters) # Update model with the latest parameters\n", - " loss, accuracy = model.evaluate(testset, verbose=VERBOSE)\n", - " return loss, {\"accuracy\": accuracy}\n", - "\n", - " return evaluate" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "We now have `FlowerClient` which defines client-side training and evaluation, and `client_fn`, which allows Flower to create `FlowerClient` instances whenever it needs to call `fit` or `evaluate` on one particular client. The last step is to start the actual simulation using `flwr.simulation.start_simulation`. \n", - "\n", - "The function `start_simulation` accepts a number of arguments, amongst them the `client_fn` used to create `FlowerClient` instances, the number of clients to simulate `num_clients`, the number of rounds `num_rounds`, and the strategy. The strategy encapsulates the federated learning approach/algorithm, for example, *Federated Averaging* (FedAvg).\n", - "\n", - "Flower comes with a number of built-in strategies, but we can also use our own strategy implementations to customize nearly all aspects of the federated learning approach. For this example, we use the built-in `FedAvg` implementation and customize it using a few basic parameters. The last step is the actual call to `start_simulation` which - you guessed it - actually starts the simulation.\n", - "\n", - "We can use [Flower Datasets](https://flower.ai/docs/datasets/) to effortlessly obtain an off-the-shelf partitioned dataset or partition one that isn't pre-partitioned. Let's choose MNIST." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# Enable GPU growth in your main process\n", - "enable_tf_gpu_growth()\n", - "\n", - "# Download MNIST dataset and partition it\n", - "mnist_fds = FederatedDataset(dataset=\"mnist\", partitioners={\"train\": NUM_CLIENTS})\n", - "# Get the whole test set for centralised evaluation\n", - "centralized_testset = mnist_fds.load_split(\"test\").to_tf_dataset(\n", - " columns=\"image\", label_cols=\"label\", batch_size=64\n", - ")\n", - "\n", - "\n", - "# Create FedAvg strategy\n", - "strategy = fl.server.strategy.FedAvg(\n", - " fraction_fit=0.1, # Sample 10% of available clients for training\n", - " fraction_evaluate=0.05, # Sample 5% of available clients for evaluation\n", - " min_fit_clients=10, # Never sample less than 10 clients for training\n", - " min_evaluate_clients=5, # Never sample less than 5 clients for evaluation\n", - " min_available_clients=int(\n", - " NUM_CLIENTS * 0.75\n", - " ), # Wait until at least 75 clients are available\n", - " evaluate_metrics_aggregation_fn=weighted_average, # aggregates federated metrics\n", - " evaluate_fn=get_evaluate_fn(centralized_testset), # global evaluation function\n", - ")\n", - "\n", - "# With a dictionary, you tell Flower's VirtualClientEngine that each\n", - "# client needs exclusive access to these many resources in order to run\n", - "client_resources = {\"num_cpus\": 1, \"num_gpus\": 0.0}\n", - "\n", - "# Start simulation\n", - "history = fl.simulation.start_simulation(\n", - " client_fn=get_client_fn(mnist_fds),\n", - " num_clients=NUM_CLIENTS,\n", - " config=fl.server.ServerConfig(num_rounds=10),\n", - " strategy=strategy,\n", - " client_resources=client_resources,\n", - " actor_kwargs={\n", - " \"on_actor_init_fn\": enable_tf_gpu_growth # Enable GPU growth upon actor init.\n", - " },\n", - ")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "You can then use the resturned History object to either save the results to disk or do some visualisation (or both of course, or neither if you like chaos). Below you can see how you can plot the centralised accuracy obtainined at the end of each round (including at the very beginning of the experiment) for the global model. This is want the function `evaluate_fn()` that we passed to the strategy reports." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import matplotlib.pyplot as plt\n", - "\n", - "print(f\"{history.metrics_centralized = }\")\n", - "\n", - "global_accuracy_centralised = history.metrics_centralized[\"accuracy\"]\n", - "round = [data[0] for data in global_accuracy_centralised]\n", - "acc = [100.0 * data[1] for data in global_accuracy_centralised]\n", - "plt.plot(round, acc)\n", - "plt.grid()\n", - "plt.ylabel(\"Accuracy (%)\")\n", - "plt.xlabel(\"Round\")\n", - "plt.title(\"MNIST - IID - 100 clients with 10 clients per round\")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Congratulations! With that, you built a Flower client, customized it's instantiation through the `client_fn`, customized the server-side execution through a `FedAvg` strategy configured for this workload, and started a simulation with 100 clients (each holding their own individual partition of the MNIST dataset).\n", - "\n", - "Next, you can continue to explore more advanced Flower topics:\n", - "\n", - "- Deploy server and clients on different machines using `start_server` and `start_client`\n", - "- Customize the server-side execution through custom strategies\n", - "- Customize the client-side execution through `config` dictionaries\n", - "\n", - "Get all resources you need!\n", - "\n", - "* **[DOCS]** Our complete documenation: https://flower.ai/docs/\n", - "* **[Examples]** All Flower examples: https://flower.ai/docs/examples/\n", - "* **[VIDEO]** Our Youtube channel: https://www.youtube.com/@flowerlabs\n", - "\n", - "Don't forget to join our Slack channel: https://flower.ai/join-slack/" - ] - } - ], - "metadata": { - "colab": { - "name": "flower.ipynb", - "provenance": [] - }, - "kernelspec": { - "display_name": "Python 3 (ipykernel)", - "language": "python", - "name": "python3" - } - }, - "nbformat": 4, - "nbformat_minor": 4 -} diff --git a/examples/simulation-tensorflow/sim.py b/examples/simulation-tensorflow/sim.py deleted file mode 100644 index 1ae2db41ab4b..000000000000 --- a/examples/simulation-tensorflow/sim.py +++ /dev/null @@ -1,186 +0,0 @@ -import argparse -import os -from typing import Dict, List, Tuple - -import flwr as fl -import tensorflow as tf -from datasets import Dataset -from flwr.common import Metrics -from flwr.simulation.ray_transport.utils import enable_tf_gpu_growth -from flwr_datasets import FederatedDataset - -# Make TensorFlow logs less verbose -os.environ["TF_CPP_MIN_LOG_LEVEL"] = "3" - -parser = argparse.ArgumentParser(description="Flower Simulation with Tensorflow/Keras") - -parser.add_argument( - "--num_cpus", - type=int, - default=1, - help="Number of CPUs to assign to a virtual client", -) -parser.add_argument( - "--num_gpus", - type=float, - default=0.0, - help="Ratio of GPU memory to assign to a virtual client", -) - -NUM_CLIENTS = 100 -NUM_ROUNDS = 10 -VERBOSE = 0 - - -class FlowerClient(fl.client.NumPyClient): - def __init__(self, trainset, valset) -> None: - # Create model - self.model = get_model() - self.trainset = trainset - self.valset = valset - - def get_parameters(self, config): - return self.model.get_weights() - - def fit(self, parameters, config): - self.model.set_weights(parameters) - self.model.fit(self.trainset, epochs=1, verbose=VERBOSE) - return self.model.get_weights(), len(self.trainset), {} - - def evaluate(self, parameters, config): - self.model.set_weights(parameters) - loss, acc = self.model.evaluate(self.valset, verbose=VERBOSE) - return loss, len(self.valset), {"accuracy": acc} - - -def get_model(): - """Constructs a simple model architecture suitable for MNIST.""" - model = tf.keras.models.Sequential( - [ - tf.keras.layers.Flatten(input_shape=(28, 28)), - tf.keras.layers.Dense(128, activation="relu"), - tf.keras.layers.Dropout(0.2), - tf.keras.layers.Dense(10, activation="softmax"), - ] - ) - model.compile("adam", "sparse_categorical_crossentropy", metrics=["accuracy"]) - return model - - -def get_client_fn(dataset: FederatedDataset): - """Return a function to construct a client. - - The VirtualClientEngine will execute this function whenever a client is sampled by - the strategy to participate. - """ - - def client_fn(cid: str) -> fl.client.Client: - """Construct a FlowerClient with its own dataset partition.""" - - # Extract partition for client with id = cid - client_dataset = dataset.load_partition(int(cid), "train") - - # Now let's split it into train (90%) and validation (10%) - client_dataset_splits = client_dataset.train_test_split(test_size=0.1, seed=42) - - trainset = client_dataset_splits["train"].to_tf_dataset( - columns="image", label_cols="label", batch_size=32 - ) - valset = client_dataset_splits["test"].to_tf_dataset( - columns="image", label_cols="label", batch_size=64 - ) - - # Create and return client - return FlowerClient(trainset, valset).to_client() - - return client_fn - - -def weighted_average(metrics: List[Tuple[int, Metrics]]) -> Metrics: - """Aggregation function for (federated) evaluation metrics. - - It ill aggregate those metrics returned by the client's evaluate() method. - """ - # Multiply accuracy of each client by number of examples used - accuracies = [num_examples * m["accuracy"] for num_examples, m in metrics] - examples = [num_examples for num_examples, _ in metrics] - - # Aggregate and return custom metric (weighted average) - return {"accuracy": sum(accuracies) / sum(examples)} - - -def get_evaluate_fn(testset: Dataset): - """Return an evaluation function for server-side (i.e. centralised) evaluation.""" - - # The `evaluate` function will be called after every round by the strategy - def evaluate( - server_round: int, - parameters: fl.common.NDArrays, - config: Dict[str, fl.common.Scalar], - ): - model = get_model() # Construct the model - model.set_weights(parameters) # Update model with the latest parameters - loss, accuracy = model.evaluate(testset, verbose=VERBOSE) - return loss, {"accuracy": accuracy} - - return evaluate - - -# Download MNIST dataset and partition it -mnist_fds = FederatedDataset(dataset="mnist", partitioners={"train": NUM_CLIENTS}) -# Get the whole test set for centralised evaluation -centralized_testset = mnist_fds.load_split("test").to_tf_dataset( - columns="image", label_cols="label", batch_size=64 -) - -# Create FedAvg strategy -strategy = fl.server.strategy.FedAvg( - fraction_fit=0.1, # Sample 10% of available clients for training - fraction_evaluate=0.05, # Sample 5% of available clients for evaluation - min_fit_clients=10, # Never sample less than 10 clients for training - evaluate_metrics_aggregation_fn=weighted_average, # aggregates federated metrics - evaluate_fn=get_evaluate_fn(centralized_testset), # global evaluation function -) - - -# ClientApp for Flower-Next -client = fl.client.ClientApp( - client_fn=get_client_fn(mnist_fds), -) - -# ServerApp for Flower-Next -server = fl.server.ServerApp( - config=fl.server.ServerConfig(num_rounds=NUM_ROUNDS), - strategy=strategy, -) - - -def main() -> None: - # Parse input arguments - args = parser.parse_args() - - # With a dictionary, you tell Flower's VirtualClientEngine that each - # client needs exclusive access to these many resources in order to run - client_resources = { - "num_cpus": args.num_cpus, - "num_gpus": args.num_gpus, - } - - # Start simulation - fl.simulation.start_simulation( - client_fn=get_client_fn(mnist_fds), - num_clients=NUM_CLIENTS, - config=fl.server.ServerConfig(NUM_ROUNDS), - strategy=strategy, - client_resources=client_resources, - actor_kwargs={ - "on_actor_init_fn": enable_tf_gpu_growth # Enable GPU growth upon actor init - # does nothing if `num_gpus` in client_resources is 0.0 - }, - ) - - -if __name__ == "__main__": - # Enable GPU growth in your main process - enable_tf_gpu_growth() - main()