diff --git a/cmd/backup/main.go b/cmd/backup/main.go index eda6019c..0aead03a 100644 --- a/cmd/backup/main.go +++ b/cmd/backup/main.go @@ -44,12 +44,12 @@ func main() { }() s.must(s.withLabeledCommands(lifecyclePhaseArchive, func() error { - restartContainers, err := s.stopContainers() + restartContainersOrServices, err := s.stopContainersOrServices() // The mechanism for restarting containers is not using hooks as it // should happen as soon as possible (i.e. before uploading backups or // similar). defer func() { - s.must(restartContainers()) + s.must(restartContainersOrServices()) }() if err != nil { return err diff --git a/cmd/backup/script.go b/cmd/backup/script.go index 29d8d730..18a829ab 100644 --- a/cmd/backup/script.go +++ b/cmd/backup/script.go @@ -30,10 +30,10 @@ import ( "github.com/ProtonMail/go-crypto/openpgp" "github.com/containrrr/shoutrrr" "github.com/containrrr/shoutrrr/pkg/router" + "github.com/docker/cli/cli/command/service/progress" "github.com/docker/docker/api/types" ctr "github.com/docker/docker/api/types/container" "github.com/docker/docker/api/types/filters" - "github.com/docker/docker/api/types/swarm" "github.com/docker/docker/client" "github.com/leekchan/timeutil" "github.com/offen/envconfig" @@ -318,14 +318,107 @@ func newScript() (*script, error) { return s, nil } -// stopContainers stops all Docker containers that are marked as to being -// stopped during the backup and returns a function that can be called to -// restart everything that has been stopped. -func (s *script) stopContainers() (func() error, error) { +type noopWriteCloser struct { + io.Writer +} + +func (noopWriteCloser) Close() error { + return nil +} + +func (s *script) stopContainersOrServices() (func() error, error) { if s.cli == nil { return noop, nil } + dockerInfo, err := s.cli.Info(context.Background()) + if err != nil { + return noop, fmt.Errorf("stopContainers: error getting docker info: %w", err) + } + isDockerSwarm := dockerInfo.Swarm.LocalNodeState != "inactive" + if isDockerSwarm { + return s.stopServices() + } + return s.stopContainers() + +} + +func (s *script) stopServices() (func() error, error) { + serviceLabel := fmt.Sprintf( + "docker-volume-backup.stop-during-backup=%s", + s.c.BackupStopContainerLabel, + ) + allServices, err := s.cli.ServiceList(context.Background(), types.ServiceListOptions{}) + if err != nil { + return noop, fmt.Errorf("stopServices: error querying services: %w", err) + } + + matchingServices, err := s.cli.ServiceList(context.Background(), types.ServiceListOptions{ + Filters: filters.NewArgs(filters.KeyValuePair{ + Key: "label", + Value: serviceLabel, + }), + }) + if err != nil { + return noop, fmt.Errorf("stopServices: error querying services: %w", err) + } + + s.logger.Info( + fmt.Sprintf( + "Scaling down %d services(s) labeled `%s` out of %d running services(s).", + len(matchingServices), + serviceLabel, + len(allServices), + ), + ) + + var prevReplicas []uint64 + for idx, service := range matchingServices { + var zero uint64 + prevReplicas[idx] = *service.Spec.Mode.Replicated.Replicas + service.Spec.Mode.Replicated.Replicas = &zero + _, err := s.cli.ServiceUpdate(context.Background(), service.ID, service.Version, service.Spec, types.ServiceUpdateOptions{}) + if err != nil { + return noop, fmt.Errorf("stopServices: error scaling down services: %w", err) + } + if err := progress.ServiceProgress(context.Background(), s.cli, service.ID, &noopWriteCloser{io.Discard}); err != nil { + return noop, fmt.Errorf("stopServices: error converging service: %w", err) + } + } + + s.stats.Containers = ContainersStats{ + All: uint(len(allServices)), + ToStop: uint(len(matchingServices)), + Stopped: uint(len(matchingServices)), + } + + return func() error { + for idx, service := range matchingServices { + fmt.Println("restarting", service.ID, service.PreviousSpec) + service.Spec.Mode.Replicated.Replicas = &prevReplicas[idx] + _, err := s.cli.ServiceUpdate(context.Background(), service.ID, service.Version, service.Spec, types.ServiceUpdateOptions{}) + if err != nil { + return fmt.Errorf("stopServices: error scaling up services: %w", err) + } + fmt.Println("tried to scale back servie", service.ID) + if err := progress.ServiceProgress(context.Background(), s.cli, service.ID, &noopWriteCloser{io.Discard}); err != nil { + return fmt.Errorf("stopServices: error converging service: %w", err) + } + } + s.logger.Info( + fmt.Sprintf( + "Restarted %d services(s).", + len(matchingServices), + ), + ) + return nil + }, nil +} + +// stopContainers stops all Docker containers or services that are marked as to being +// stopped during the backup and returns a function that can be called to +// restart everything that has been stopped. +func (s *script) stopContainers() (func() error, error) { allContainers, err := s.cli.ContainerList(context.Background(), types.ContainerListOptions{}) if err != nil { return noop, fmt.Errorf("stopContainers: error querying for containers: %w", err) @@ -385,42 +478,13 @@ func (s *script) stopContainers() (func() error, error) { } return func() error { - servicesRequiringUpdate := map[string]struct{}{} - var restartErrors []error for _, container := range stoppedContainers { - if swarmServiceName, ok := container.Labels["com.docker.swarm.service.name"]; ok { - servicesRequiringUpdate[swarmServiceName] = struct{}{} - continue - } if err := s.cli.ContainerStart(context.Background(), container.ID, types.ContainerStartOptions{}); err != nil { restartErrors = append(restartErrors, err) } } - if len(servicesRequiringUpdate) != 0 { - services, _ := s.cli.ServiceList(context.Background(), types.ServiceListOptions{}) - for serviceName := range servicesRequiringUpdate { - var serviceMatch swarm.Service - for _, service := range services { - if service.Spec.Name == serviceName { - serviceMatch = service - break - } - } - if serviceMatch.ID == "" { - return fmt.Errorf("stopContainers: couldn't find service with name %s", serviceName) - } - serviceMatch.Spec.TaskTemplate.ForceUpdate += 1 - if _, err := s.cli.ServiceUpdate( - context.Background(), serviceMatch.ID, - serviceMatch.Version, serviceMatch.Spec, types.ServiceUpdateOptions{}, - ); err != nil { - restartErrors = append(restartErrors, err) - } - } - } - if len(restartErrors) != 0 { return fmt.Errorf( "stopContainers: %d error(s) restarting containers and services: %w", @@ -430,7 +494,7 @@ func (s *script) stopContainers() (func() error, error) { } s.logger.Info( fmt.Sprintf( - "Restarted %d container(s) and the matching service(s).", + "Restarted %d container(s).", len(stoppedContainers), ), ) diff --git a/go.mod b/go.mod index 47814b4b..14ba1b08 100644 --- a/go.mod +++ b/go.mod @@ -22,9 +22,11 @@ require ( ) require ( + github.com/Azure/go-ansiterm v0.0.0-20170929234023-d6e3b3328b78 // indirect github.com/cloudflare/circl v1.3.7 // indirect github.com/golang-jwt/jwt/v5 v5.0.0 // indirect github.com/golang/protobuf v1.5.3 // indirect + golang.org/x/time v0.0.0-20220609170525-579cf78fd858 // indirect google.golang.org/appengine v1.6.7 // indirect google.golang.org/protobuf v1.31.0 // indirect ) @@ -35,6 +37,7 @@ require ( github.com/AzureAD/microsoft-authentication-library-for-go v1.1.1 // indirect github.com/Microsoft/go-winio v0.5.2 // indirect github.com/ProtonMail/go-crypto v0.0.0-20230717121422-5aa5874ade95 + github.com/docker/cli v24.0.7+incompatible github.com/docker/distribution v2.8.2+incompatible // indirect github.com/docker/go-connections v0.4.0 // indirect github.com/docker/go-units v0.4.0 // indirect diff --git a/go.sum b/go.sum index 6f675755..e1708c94 100644 --- a/go.sum +++ b/go.sum @@ -255,6 +255,8 @@ github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/dnaeon/go-vcr v1.2.0 h1:zHCHvJYTMh1N7xnV7zf1m1GPBF9Ad0Jk/whtQ1663qI= github.com/dnaeon/go-vcr v1.2.0/go.mod h1:R4UdLID7HZT3taECzJs4YgbbH6PIGXB6W/sc5OLb6RQ= +github.com/docker/cli v24.0.7+incompatible h1:wa/nIwYFW7BVTGa7SWPVyyXU9lgORqUb1xfI36MSkFg= +github.com/docker/cli v24.0.7+incompatible/go.mod h1:JLrzqnKDaYBop7H2jaqPtU4hHvMKP+vjCwu2uszcLI8= github.com/docker/distribution v2.8.2+incompatible h1:T3de5rq0dB1j30rp0sA2rER+m322EBzniBPB6ZIzuh8= github.com/docker/distribution v2.8.2+incompatible/go.mod h1:J2gT2udsDAN96Uj4KfcMRqY0/ypR+oyYUYmja8H+y+w= github.com/docker/docker v24.0.7+incompatible h1:Wo6l37AuwP3JaMnZa226lzVXGA3F9Ig1seQen0cKYlM= @@ -1259,6 +1261,7 @@ gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C gopkg.in/yaml.v3 v3.0.0-20210107192922-496545a6307b/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= +gotest.tools v2.2.0+incompatible h1:VsBPFP1AI068pPrMxtb/S8Zkgf9xEmTLJjfM+P5UIEo= gotest.tools v2.2.0+incompatible/go.mod h1:DsYFclhRJ6vuDpmuTbkuFWG+y2sxOXAzmJt81HFBacw= gotest.tools/v3 v3.0.2/go.mod h1:3SzNCllyD9/Y+b5r9JIKQ474KzkZyqLqEfYqMsX94Bk= gotest.tools/v3 v3.0.3 h1:4AuOwCGf4lLR9u3YOe2awrHygurzhO/HeQ6laiA6Sx0= diff --git a/test/swarm/docker-compose.yml b/test/swarm/docker-compose.yml index 7b04be3c..018b7d56 100644 --- a/test/swarm/docker-compose.yml +++ b/test/swarm/docker-compose.yml @@ -41,11 +41,11 @@ services: offen: image: offen/offen:latest - labels: - - docker-volume-backup.stop-during-backup=true healthcheck: disable: true deploy: + labels: + - docker-volume-backup.stop-during-backup=true replicas: 2 restart_policy: condition: on-failure @@ -54,11 +54,11 @@ services: image: postgres:14-alpine environment: POSTGRES_PASSWORD: example - labels: - - docker-volume-backup.stop-during-backup=true volumes: - pg_data:/var/lib/postgresql/data deploy: + labels: + - docker-volume-backup.stop-during-backup=true restart_policy: condition: on-failure