Skip to content

Commit

Permalink
feat: trying to migrate to cilium without fucking up
Browse files Browse the repository at this point in the history
  • Loading branch information
kashalls committed Jun 9, 2023
1 parent 29dc203 commit 66c3cd9
Show file tree
Hide file tree
Showing 41 changed files with 400 additions and 374 deletions.
2 changes: 1 addition & 1 deletion .github/renovate.json
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@
"kubernetes/.+\\.ya?ml$"
],
"matchStrings": [
"datasource=(?<datasource>\\S+) depName=(?<depName>\\S+)( versioning=(?<versioning>\\S+))?\n.*?\"(?<currentValue>.*)\"\n"
"datasource=(?<datasource>\\S+) depName=(?<depName>\\S+)( registryUrl=(?<registryUrl>\\S+))?\n.*?\"(?<currentValue>.*)\"\n"
],
"datasourceTemplate": "{{#if datasource}}{{{datasource}}}{{else}}github-releases{{/if}}",
"versioningTemplate": "{{#if versioning}}{{{versioning}}}{{else}}semver{{/if}}"
Expand Down
19 changes: 5 additions & 14 deletions .github/renovate/groups.json
Original file line number Diff line number Diff line change
Expand Up @@ -12,20 +12,11 @@
"separateMinorPatch": true
},
{
"description": "Immich images",
"groupName": "Immich",
"matchPackagePatterns": ["immich"],
"matchDatasources": ["docker"],
"group": {
"commitMessageTopic": "{{{groupName}}} group"
},
"separateMinorPatch": true
},
{
"description": "Rook-Ceph image and chart",
"groupName": "Rook-Ceph",
"matchPackagePatterns": ["rook.ceph"],
"matchDatasources": ["docker", "helm"],
"description": "Flux Group",
"groupName": "Flux",
"matchPackagePatterns": ["flux"],
"matchDatasources": ["docker", "github-tags"],
"versioning": "semver",
"group": {
"commitMessageTopic": "{{{groupName}}} group"
},
Expand Down
3 changes: 1 addition & 2 deletions .taskfiles/ClusterTasks.yml
Original file line number Diff line number Diff line change
Expand Up @@ -26,8 +26,7 @@ tasks:
reconcile:
desc: Force update Flux to pull in changes from your Git repository
cmds:
- flux reconcile -n flux-system source git home-kubernetes
- flux reconcile -n flux-system kustomization cluster
- flux reconcile -n flux-system kustomization cluster --with-source

hr-restart:
desc: Restart all failed Helm Releases
Expand Down
15 changes: 8 additions & 7 deletions ansible/inventory/group_vars/kubernetes/k3s.yml
Original file line number Diff line number Diff line change
Expand Up @@ -28,12 +28,9 @@ k3s_registration_address: "{{ kubevip_address }}"

# (list) A list of URLs to deploy on the primary control plane. Read notes below.
k3s_server_manifests_urls:
# Kube-vip
# Kube-vip RBAC
- url: https://raw.githubusercontent.com/kube-vip/kube-vip/main/docs/manifests/rbac.yaml
filename: custom-kube-vip-rbac.yaml
# Tigera Operator
- url: https://raw.githubusercontent.com/projectcalico/calico/v3.25.1/manifests/tigera-operator.yaml
filename: custom-calico-tigera-operator.yaml
# Prometheus Operator
- url: https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.65.1/example/prometheus-operator-crd/monitoring.coreos.com_alertmanagerconfigs.yaml
filename: custom-prometheus-alertmanagerconfigs.yaml
Expand All @@ -56,8 +53,12 @@ k3s_server_manifests_urls:
- url: https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.65.1/example/prometheus-operator-crd/monitoring.coreos.com_prometheusagents.yaml
filename: custom-prometheus-prometheusagents.yaml

# (list) A flat list of templates to deploy on the primary control plane
# (list) A flat list of templates to deploy on the primary control plane nodes
# /var/lib/rancher/k3s/server/manifests
k3s_server_manifests_templates:
- custom-calico-installation.yaml.j2
- custom-kube-vip-daemonset.yaml.j2
- cilium-helmchart.yaml.j2

# (list) A flat list of templates to deploy as static pods on all the control plane nodes
# /var/lib/rancher/k3s/agent/pod-manifests
k3s_server_pod_manifests_templates:
- kube-vip-static-pod.yaml.j2
3 changes: 0 additions & 3 deletions ansible/inventory/group_vars/kubernetes/kube-vip.yml

This file was deleted.

17 changes: 0 additions & 17 deletions ansible/inventory/group_vars/kubernetes/os.yml
Original file line number Diff line number Diff line change
@@ -1,25 +1,8 @@
---
# (string) Timezone for the servers
# timezone: "America/New_York"

# (list) Additional ssh public keys to add to the nodes
# ssh_authorized_keys:

fedora:
packages:
- dnf-plugin-system-upgrade
- dnf-utils
- hdparm
- htop
- ipvsadm
- lm_sensors
- nano
- nvme-cli
- socat
- python3-kubernetes
- python3-libselinux
- python3-pyyaml

ubuntu:
packages:
- hdparm
Expand Down
5 changes: 5 additions & 0 deletions ansible/inventory/group_vars/kubernetes/supplemental.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
---
timezone: "America/Los_Angeles"
kubevip_address: "10.69.69.2"
cluster_cidr: "10.98.0.0/16"
service_cidr: "10.99.0.0/16"
4 changes: 1 addition & 3 deletions ansible/inventory/group_vars/master/k3s.yml
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ k3s_server:
- traefik
disable-network-policy: true
disable-cloud-controller: true
disable-kube-proxy: true
write-kubeconfig-mode: "644"
# Network CIDR to use for pod IPs
cluster-cidr: "10.42.0.0/16"
Expand All @@ -34,9 +35,6 @@ k3s_server:
kube-controller-manager-arg:
# Required to monitor kube-controller-manager with kube-prometheus-stack
- "bind-address=0.0.0.0"
kube-proxy-arg:
# Required to monitor kube-proxy with kube-prometheus-stack
- "metrics-bind-address=0.0.0.0"
kube-scheduler-arg:
# Required to monitor kube-scheduler with kube-prometheus-stack
- "bind-address=0.0.0.0"
Expand Down
92 changes: 53 additions & 39 deletions ansible/playbooks/cluster-installation.yml
Original file line number Diff line number Diff line change
Expand Up @@ -45,53 +45,67 @@
regexp: "https://127.0.0.1:6443"
replace: "https://{{ k3s_registration_address }}:6443"

- name: Resource Readiness Check
# Unmanaging and removing the Cilium HelmChart is required for
# flux to take over managing the lifecycle of Cilium

- name: Post installation of custom manifests tasks
run_once: true
kubernetes.core.k8s_info:
kubeconfig: /etc/rancher/k3s/k3s.yaml
kind: "{{ item.kind }}"
name: "{{ item.name }}"
namespace: "{{ item.namespace | default('') }}"
wait: true
wait_sleep: 10
wait_timeout: 360
loop:
- kind: Deployment
name: tigera-operator
namespace: tigera-operator
- kind: DaemonSet
name: kube-vip
namespace: kube-system
- kind: Installation
name: default
- kind: CustomResourceDefinition
name: alertmanagerconfigs.monitoring.coreos.com
- kind: CustomResourceDefinition
name: alertmanagers.monitoring.coreos.com
- kind: CustomResourceDefinition
name: podmonitors.monitoring.coreos.com
- kind: CustomResourceDefinition
name: probes.monitoring.coreos.com
- kind: CustomResourceDefinition
name: prometheuses.monitoring.coreos.com
- kind: CustomResourceDefinition
name: prometheusrules.monitoring.coreos.com
- kind: CustomResourceDefinition
name: servicemonitors.monitoring.coreos.com
- kind: CustomResourceDefinition
name: thanosrulers.monitoring.coreos.com
- kind: CustomResourceDefinition
name: scrapeconfigs.monitoring.coreos.com
- kind: CustomResourceDefinition
name: prometheusagents.monitoring.coreos.com
when:
- k3s_server_manifests_templates | length > 0
or k3s_server_manifests_urls | length > 0
- k3s_control_node is defined
- k3s_control_node
block:
- name: Wait for custom manifests to rollout
kubernetes.core.k8s_info:
kubeconfig: /etc/rancher/k3s/k3s.yaml
kind: "{{ item.kind }}"
name: "{{ item.name }}"
namespace: "{{ item.namespace | default('') }}"
wait: true
wait_sleep: 10
wait_timeout: 360
loop:
- name: cilium
kind: HelmChart
namespace: kube-system
- name: podmonitors.monitoring.coreos.com
kind: CustomResourceDefinition
- name: prometheusrules.monitoring.coreos.com
kind: CustomResourceDefinition
- name: servicemonitors.monitoring.coreos.com
kind: CustomResourceDefinition
- name: Wait for Cilium to rollout
kubernetes.core.k8s_info:
kubeconfig: /etc/rancher/k3s/k3s.yaml
kind: Job
name: helm-install-cilium
namespace: kube-system
wait: true
wait_condition:
type: Complete
status: true
wait_timeout: 360
- name: Patch the Cilium HelmChart to unmanage it
kubernetes.core.k8s_json_patch:
kubeconfig: /etc/rancher/k3s/k3s.yaml
name: cilium
kind: HelmChart
namespace: kube-system
patch:
- op: add
path: /metadata/annotations/helmcharts.helm.cattle.io~1unmanaged
value: "true"
- name: Remove the Cilium HelmChart CR
kubernetes.core.k8s:
kubeconfig: /etc/rancher/k3s/k3s.yaml
name: cilium
kind: HelmChart
namespace: kube-system
state: absent

# NOTE
# Cleaning up the manifests from the /var/lib/rancher/k3s/server/manifests directory
# Cleaning up certain manifests from the /var/lib/rancher/k3s/server/manifests directory
# is needed because k3s has an awesome "feature" to always re-deploy them when the k3s
# service is restarted. Removing them does not uninstall the manifests from your cluster.

Expand Down
21 changes: 20 additions & 1 deletion ansible/playbooks/cluster-nuke.yml
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
- name: nuke
prompt: |-
Are you sure you want to nuke this cluster?
Type YES I WANT TO DESTROY THIS CLUSTER to proceed
Type 'YES I WANT TO DESTROY THIS CLUSTER' to proceed
default: "n"
private: false
pre_tasks:
Expand All @@ -22,6 +22,25 @@
ansible.builtin.pause:
seconds: 5
tasks:
- name: Uninstall Cilium
when:
- k3s_control_node is defined
- k3s_control_node
ansible.builtin.shell: |
cilium uninstall --wait
environment:
KUBECONFIG: /etc/rancher/k3s/k3s.yaml

- name: Prevent k3s from starting on reboot
ansible.builtin.systemd:
name: k3s
enabled: false

- name: Reboot
ansible.builtin.reboot:
msg: Rebooting nodes
reboot_timeout: 3600

- name: Uninstall k3s
ansible.builtin.include_role:
name: xanmanning.k3s
Expand Down
54 changes: 16 additions & 38 deletions ansible/playbooks/cluster-prepare.yml
Original file line number Diff line number Diff line change
Expand Up @@ -21,25 +21,13 @@
- name: Networking | Set hostname to inventory hostname
ansible.builtin.hostname:
name: "{{ inventory_hostname }}"
- name: Networking | Update /etc/hosts to include inventory hostname
- name: Networking | Update hosts file to include inventory hostname
ansible.builtin.blockinfile:
path: /etc/hosts
block: |
127.0.1.1 {{ inventory_hostname }}
- name: Packages | Fedora
block:
- name: Packages | Install required packages
ansible.builtin.dnf:
name: "{{ fedora.packages | default([]) }}"
state: present
update_cache: true
- name: Packages | Remove leaf packages
ansible.builtin.dnf:
autoremove: true
when: ansible_facts['distribution'] == 'Fedora'

- name: Packages | Ubuntu
- name: Packages
block:
- name: Packages | Install required packages
ansible.builtin.apt:
Expand All @@ -55,7 +43,17 @@
install_recommends: false
notify: Reboot
when: "'raspi' in ansible_kernel"
when: ansible_facts['distribution'] == 'Ubuntu'

- name: Packages | Cilium CLI
ansible.builtin.include_role:
name: githubixx.cilium_cli
public: true
vars:
cilium_cli_arch: "{{ 'amd64' if ansible_architecture == 'x86_64' else 'arm64' }}"
cilium_cli_tmp_directory: /tmp
when:
- k3s_control_node is defined
- k3s_control_node

- name: User Configuration
block:
Expand All @@ -67,20 +65,12 @@

- name: System Configuration (1)
block:
- name: System Configuration (1) | Disable firewalld | Fedora
ansible.builtin.systemd:
service: firewalld.service
enabled: false
masked: true
state: stopped
when: ansible_facts['distribution'] == 'Fedora'
- name: System Configuration (1) | Disable ufw | Ubuntu
- name: System Configuration (1) | Disable ufw
ansible.builtin.systemd:
service: ufw.service
enabled: false
masked: true
state: stopped
when: ansible_facts['distribution'] == 'Ubuntu'
- name: System Configuration (1) | Enable fstrim
ansible.builtin.systemd:
service: fstrim.timer
Expand Down Expand Up @@ -117,28 +107,16 @@
net.bridge.bridge-nf-call-ip6tables: 1
fs.inotify.max_user_watches: 524288
fs.inotify.max_user_instances: 512
- name: System Configuration (2) | Disable swap | Fedora
ansible.builtin.dnf:
name: zram-generator-defaults
state: absent
when: ansible_facts['distribution'] == 'Fedora'
- name: System Configuration (2) | Disable swap at runtime | Ubuntu
- name: System Configuration (2) | Disable swap at runtime
ansible.builtin.command: swapoff -a
when:
- ansible_facts['distribution'] == 'Ubuntu'
- ansible_swaptotal_mb > 0
- name: System Configuration (2) | Disable swap at boot | Ubuntu
- name: System Configuration (2) | Disable swap at boot
ansible.posix.mount:
name: "{{ item }}"
fstype: swap
state: absent
loop: ["none", "swap"]
when: ansible_facts['distribution'] == 'Ubuntu'
- name: System Configuration (2) | Permissive SELinux | Fedora
ansible.posix.selinux:
state: permissive
policy: targeted
when: ansible_facts['distribution'] == 'Fedora'
notify: Reboot

handlers:
Expand Down
27 changes: 27 additions & 0 deletions ansible/playbooks/cluster-upgrade.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
---
- hosts:
- master
- worker
become: true
gather_facts: true
any_errors_fatal: true
pre_tasks:
- name: Pausing for 5 seconds...
ansible.builtin.pause:
seconds: 5
tasks:
- name: Ensure Kubernetes is running
ansible.builtin.include_role:
name: xanmanning.k3s
public: true
vars:
k3s_state: started

- name: Upgrade kube-vip
when:
- k3s_control_node is defined
- k3s_control_node
ansible.builtin.template:
src: templates/kube-vip-static-pod.yaml.j2
dest: "{{ k3s_server_pod_manifests_dir }}/kube-vip-static-pod.yaml"
mode: preserve
Loading

0 comments on commit 66c3cd9

Please sign in to comment.