Skip to content

Commit

Permalink
🌱 Hot Reload for secrets (#49)
Browse files Browse the repository at this point in the history
  • Loading branch information
guettli authored Dec 17, 2024
1 parent 62cb91e commit aa5bc33
Show file tree
Hide file tree
Showing 16 changed files with 624 additions and 81 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ permissions:
jobs:
manager-image:
name: Build and push manager image
runs-on: ubuntu-latest
runs-on: ubuntu-24.04
steps:
- name: Checkout code
uses: actions/checkout@eef61447b9ff4aafe5dcd4e0bbf5d482be7e7871 # v4.2.1
Expand Down
4 changes: 2 additions & 2 deletions .github/workflows/release.yml
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ permissions:
jobs:
manager-image:
name: Build and push manager image
runs-on: ubuntu-latest
runs-on: ubuntu-24.04
steps:
- name: Checkout code
uses: actions/checkout@eef61447b9ff4aafe5dcd4e0bbf5d482be7e7871 # v4.2.1
Expand Down Expand Up @@ -123,7 +123,7 @@ jobs:
release:
name: Create draft release
runs-on: ubuntu-latest
runs-on: ubuntu-24.04
needs:
- manager-image
steps:
Expand Down
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -9,3 +9,4 @@ deploy/gen/
hack/.*
/*.kubeconfig
/etc
/*.yaml
25 changes: 25 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ Additional PRs we should create in upstream, so that we can use upstream instead

PRs which are **not** needed in upstream, because upstream has this feature:

* [PR hotreload credentials, when mounted secret changed](https://github.com/syself/hetzner-cloud-controller-manager/pull/49)
* [PR getInstanceTypeOfRobotServer: convert invalid characters to dashes](https://github.com/syself/hetzner-cloud-controller-manager/pull/40)
* [Make robot client optional for lb client](https://github.com/syself/hetzner-cloud-controller-manager/pull/37): upstream uses ROBOT_ENABLED. We need to set that env var.
* [Fix InstanceExists for baremetal servers, check node name](https://github.com/syself/hetzner-cloud-controller-manager/pull/32)
Expand Down Expand Up @@ -70,6 +71,30 @@ helm upgrade --install ccm syself/ccm-hetzner --version X.Y.Z \

See [CAPH docs](https://syself.com/docs/caph/topics/baremetal/creating-workload-cluster#deploying-the-hetzner-cloud-controller-manager) for more details.

## Usage

We recommend to mount the secret `hetzner` as volume and make it avaiable for the container as `/etc/hetzner-secret`.
Then the credentials are automatically reloaded, when the secret changes.
You see an example in the [ccm helm chart](https://github.com/syself/charts/tree/main/charts/ccm-hetzner)

## Env Variables

ROBOT_DEBUG: When set to `true`, then api calls to the hetzner robot API will be logged.

CACHE_TIMEOUT: Timeout of the Robot API Cache. See [ParseDuration](https://pkg.go.dev/time#ParseDuration) for supported syntax.

HCLOUD_ENDPOINT: Defaults to `https://api.hetzner.cloud/v1`

Additional Env Variables are defined at the top of [cloud.go](https://github.com/syself/hetzner-cloud-controller-manager/blob/master/hcloud/cloud.go)

Deprecated (use mounted secret instead):

```
HCLOUD_TOKEN
ROBOT_USER_NAME
ROBOT_PASSWORD
```

---

End of "About the fork"
Expand Down
2 changes: 1 addition & 1 deletion go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ module github.com/syself/hetzner-cloud-controller-manager
go 1.23.0

require (
github.com/fsnotify/fsnotify v1.8.0
github.com/hetznercloud/hcloud-go/v2 v2.17.0
github.com/prometheus/client_golang v1.20.5
github.com/spf13/pflag v1.0.5
Expand Down Expand Up @@ -32,7 +33,6 @@ require (
github.com/emicklei/go-restful/v3 v3.12.1 // indirect
github.com/evanphx/json-patch v4.12.0+incompatible // indirect
github.com/felixge/httpsnoop v1.0.4 // indirect
github.com/fsnotify/fsnotify v1.8.0 // indirect
github.com/go-logr/logr v1.4.2 // indirect
github.com/go-logr/stdr v1.2.2 // indirect
github.com/go-openapi/jsonpointer v0.21.0 // indirect
Expand Down
115 changes: 57 additions & 58 deletions hcloud/cloud.go
Original file line number Diff line number Diff line change
Expand Up @@ -27,16 +27,14 @@ import (
"runtime/debug"
"strconv"
"strings"
"time"

"github.com/hetznercloud/hcloud-go/v2/hcloud"
"github.com/hetznercloud/hcloud-go/v2/hcloud/metadata"
"github.com/syself/hetzner-cloud-controller-manager/internal/credentials"
"github.com/syself/hetzner-cloud-controller-manager/internal/hcops"
"github.com/syself/hetzner-cloud-controller-manager/internal/metrics"
robotclient "github.com/syself/hetzner-cloud-controller-manager/internal/robot/client"
"github.com/syself/hetzner-cloud-controller-manager/internal/robot/client/cache"
"github.com/syself/hetzner-cloud-controller-manager/internal/util"
hrobot "github.com/syself/hrobot-go"
corev1 "k8s.io/api/core/v1"
"k8s.io/client-go/tools/record"
cloudprovider "k8s.io/cloud-provider"
Expand All @@ -51,16 +49,10 @@ const (
hcloudDebugENVVar = "HCLOUD_DEBUG"
robotDebugENVVar = "ROBOT_DEBUG"

robotUserNameENVVar = "ROBOT_USER_NAME"
robotPasswordENVVar = "ROBOT_PASSWORD"

// Only as reference - is used in hcops package.
// Default is 5 minutes.
RateLimitWaitTimeRobot = "RATE_LIMIT_WAIT_TIME_ROBOT"

// default is 5 minutes.
CacheTimeout = "CACHE_TIMEOUT"

// Disable the "master/server is attached to the network" check against the metadata service.
hcloudNetworkDisableAttachedCheckENVVar = "HCLOUD_NETWORK_DISABLE_ATTACHED_CHECK"
hcloudNetworkRoutesEnabledENVVar = "HCLOUD_NETWORK_ROUTES_ENABLED"
Expand All @@ -73,7 +65,6 @@ const (
hcloudLoadBalancersDisableIPv6 = "HCLOUD_LOAD_BALANCERS_DISABLE_IPV6"
hcloudMetricsEnabledENVVar = "HCLOUD_METRICS_ENABLED"
hcloudMetricsAddress = ":8233"
nodeNameENVVar = "NODE_NAME"
providerName = "hcloud"
hostNamePrefixRobot = "bm-"
)
Expand All @@ -84,7 +75,7 @@ var errMissingRobotCredentials = errors.New("missing robot credentials - cannot
var providerVersion = "unknown"

type cloud struct {
client *hcloud.Client
hcloudClient *hcloud.Client
robotClient robotclient.Client
instances *instances
routes *routes
Expand All @@ -111,22 +102,21 @@ func (lt *LoggingTransport) RoundTrip(req *http.Request) (resp *http.Response, e
return resp, nil
}

func newCloud(_ io.Reader) (cloudprovider.Interface, error) {
const op = "hcloud/newCloud"
metrics.OperationCalled.WithLabelValues(op).Inc()

token := os.Getenv(hcloudTokenENVVar)
if token == "" {
return nil, fmt.Errorf("environment variable %q is required", hcloudTokenENVVar)
func newHcloudClient(rootDir string) (*hcloud.Client, error) {
credentialsDir := credentials.GetDirectory(rootDir)
token, err := credentials.GetInitialHcloudCredentialsFromDirectory(credentialsDir)
if err != nil {
klog.V(1).Infof("reading Hetzner Cloud token from directory failed. Will try env var: %s", err.Error())
token = os.Getenv(hcloudTokenENVVar)
if token == "" {
return nil, fmt.Errorf("Either token from directory %q or environment variable %q is required", credentialsDir, hcloudTokenENVVar)
}
} else {
klog.V(1).Infof("reading Hetzner Cloud token from %q. The controller will reload the credentials, when the file changes", credentialsDir)
}
if len(token) != 64 {
return nil, fmt.Errorf("entered token is invalid (must be exactly 64 characters long)")
}
nodeName := os.Getenv(nodeNameENVVar)
if nodeName == "" {
return nil, fmt.Errorf("environment variable %q is required", nodeNameENVVar)
}

opts := []hcloud.ClientOption{
hcloud.WithToken(token),
hcloud.WithApplication("hetzner-cloud-controller", providerVersion),
Expand All @@ -146,43 +136,43 @@ func newCloud(_ io.Reader) (cloudprovider.Interface, error) {
opts = append(opts, hcloud.WithEndpoint(endpoint))
}
client := hcloud.NewClient(opts...)
metadataClient := metadata.NewClient()
return client, nil
}

robotUserName := os.Getenv(robotUserNameENVVar)
robotPassword := os.Getenv(robotPasswordENVVar)
func newCloud(_ io.Reader) (cloudprovider.Interface, error) {
const op = "hcloud/newCloud"
metrics.OperationCalled.WithLabelValues(op).Inc()

cacheTimeout, err := util.GetEnvDuration(CacheTimeout)
rootDir, err := os.Getwd()
if err != nil {
return nil, fmt.Errorf("%s: %w", op, err)
}

if cacheTimeout == 0 {
cacheTimeout = 5 * time.Minute
hcloudClient, err := newHcloudClient(rootDir)
if err != nil {
return nil, fmt.Errorf("%s: %w", op, err)
}
metadataClient := metadata.NewClient()

var robotClient robotclient.Client
if robotUserName != "" && robotPassword != "" {
var c hrobot.RobotClient
if os.Getenv(robotDebugENVVar) == "true" {
client := &http.Client{
Transport: &LoggingTransport{
roundTripper: http.DefaultTransport,
},
}
c = hrobot.NewBasicAuthClientWithCustomHttpClient(robotUserName, robotPassword, client)
klog.Info("Enabled robot API debugging")
} else {
c = hrobot.NewBasicAuthClient(robotUserName, robotPassword)
klog.Infof("Not enabling robot API debugging. Set env var %s=true to enable it.", robotDebugENVVar)
var httpClient *http.Client
if os.Getenv(robotDebugENVVar) == "true" {
httpClient = &http.Client{
Transport: &LoggingTransport{
roundTripper: http.DefaultTransport,
},
}
robotClient = cache.NewClient(c, cacheTimeout)
} else {
klog.Infof("Hetzner robot is not support because of insufficient credentials. Robot user name specified: %v. Robot password specified: %v", robotUserName != "", robotPassword != "")
}
robotClient, err := cache.NewCachedRobotClient(rootDir, httpClient, "")
if err != nil {
return nil, fmt.Errorf("%s: %w", op, err)
}

if robotClient == nil {
klog.Info("Robot client is nil, will not be able to manage bare metal servers.")
}

var networkID int64
if v, ok := os.LookupEnv(hcloudNetworkENVVar); ok {
n, _, err := client.Network.Get(context.Background(), v)
n, _, err := hcloudClient.Network.Get(context.Background(), v)
if err != nil {
return nil, fmt.Errorf("%s: %w", op, err)
}
Expand Down Expand Up @@ -210,7 +200,7 @@ func newCloud(_ io.Reader) (cloudprovider.Interface, error) {
}

// Validate that the provided token works, and we have network connectivity to the Hetzner Cloud API
_, _, err = client.Server.List(context.Background(), hcloud.ServerListOpts{})
_, _, err = hcloudClient.Server.List(context.Background(), hcloud.ServerListOpts{})
if err != nil {
return nil, fmt.Errorf("%s: %w", op, err)
}
Expand All @@ -228,30 +218,39 @@ func newCloud(_ io.Reader) (cloudprovider.Interface, error) {
lbRecorder := eventBroadcaster.NewRecorder(scheme.Scheme, corev1.EventSource{Component: "hetzner-ccm-loadbalancer"})

lbOps := &hcops.LoadBalancerOps{
LBClient: &client.LoadBalancer,
CertOps: &hcops.CertificateOps{CertClient: &client.Certificate},
ActionClient: &client.Action,
NetworkClient: &client.Network,
LBClient: &hcloudClient.LoadBalancer,
CertOps: &hcops.CertificateOps{CertClient: &hcloudClient.Certificate},
ActionClient: &hcloudClient.Action,
NetworkClient: &hcloudClient.Network,
RobotClient: robotClient,
NetworkID: networkID,
Recorder: lbRecorder,
Defaults: lbOpsDefaults,
}

loadBalancers := newLoadBalancers(lbOps, &client.Action, lbDisablePrivateIngress, lbDisableIPv6)
loadBalancers := newLoadBalancers(lbOps, &hcloudClient.Action, lbDisablePrivateIngress, lbDisableIPv6)
if os.Getenv(hcloudLoadBalancersEnabledENVVar) == "false" {
loadBalancers = nil
}

instancesAddressFamily, err := addressFamilyFromEnv()
if err != nil {
return nil, fmt.Errorf("%s: %w", op, err)
}

credentialsDir := credentials.GetDirectory(rootDir)
_, err = os.Stat(credentialsDir)
if err == nil {
// Watch for changes in the secrets directory
err := credentials.Watch(credentialsDir, hcloudClient, robotClient)
if err != nil {
return nil, fmt.Errorf("%s: %w", op, err)
}
}

return &cloud{
client: client,
hcloudClient: hcloudClient,
robotClient: robotClient,
instances: newInstances(client, robotClient, instancesAddressFamily, networkID),
instances: newInstances(hcloudClient, robotClient, instancesAddressFamily, networkID),
loadBalancer: loadBalancers,
routes: nil,
networkID: networkID,
Expand Down Expand Up @@ -288,7 +287,7 @@ func (c *cloud) Clusters() (cloudprovider.Clusters, bool) {

func (c *cloud) Routes() (cloudprovider.Routes, bool) {
if c.networkID > 0 && os.Getenv(hcloudNetworkRoutesEnabledENVVar) != "false" {
r, err := newRoutes(c.client, c.networkID)
r, err := newRoutes(c.hcloudClient, c.networkID)
if err != nil {
klog.ErrorS(err, "create routes provider", "networkID", c.networkID)
return nil, false
Expand Down
Loading

0 comments on commit aa5bc33

Please sign in to comment.