Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: Bump rook-ceph-cluster to v1.14.7 #2358

Closed
wants to merge 7 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -699,7 +699,7 @@ spec:
- --metrics-bind-address=127.0.0.1:8080
- --leader-elect
- --namespace=${NAMESPACE:=git-operator-system}
image: docker.io/mesosphere/git-operator:v0.8.2@sha256:7dde5a0836d212d8fe31f65c42bec32c91962858f6cb7da5526a0b60727141fb
image: docker.io/mesosphere/git-operator:v0.8.3@sha256:3a4c42890549394bbe3842d99c7bf143334c19dc6e8d13413cd796c219aead8e
livenessProbe:
httpGet:
path: /healthz
Expand Down
278 changes: 278 additions & 0 deletions services/rook-ceph-cluster/1.14.7/defaults/cm.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,278 @@
apiVersion: v1
kind: ConfigMap
metadata:
name: rook-ceph-cluster-1.14.7-d2iq-defaults
namespace: ${releaseNamespace}
data:
values.yaml: |
---
operatorNamespace: ${releaseNamespace}
clusterName: dkp-ceph-cluster
toolbox:
# If needed, enable a toolbox for debugging (creates a pod with ceph CLI)
# The name is hardcoded, so if deploying more than one `rook-ceph-custer` then this flag needs to be set to false.
# This is enabled by default to workaround D2IQ-96634
enabled: true
priorityClassName: dkp-high-priority

# All values below are taken from the CephCluster CRD
cephClusterSpec:
labels:
monitoring:
prometheus.kommander.d2iq.io/select: "true"
dataDirHostPath: /var/lib/rook

cephVersion:
# This image was patched to fix CVEs. To build new version of the image:
# See: https://github.com/mesosphere/dkp-container-images
image: ghcr.io/mesosphere/dkp-container-images/ceph/ceph:v18.2.2

resources:
mgr-sidecar:
limits:
cpu: "1"
memory: 500Mi
requests:
cpu: "100m"
memory: "40Mi"

mon:
count: 3
# Mons should only be allowed on the same node for test environments where data loss is acceptable.
allowMultiplePerNode: false
volumeClaimTemplate:
spec:
# Use the default storage class configured in cluster.
# not setting storageClass to let it fall to environment default
volumeMode: FileSystem
resources:
requests:
storage: 10Gi

mgr:
count: 2
allowMultiplePerNode: false

dashboard:
enabled: true
urlPrefix: ""
port: 8443
ssl: false

# priority classes to apply to ceph resources
priorityClassNames:
crashcollector: system-cluster-critical

storage:
storageClassDeviceSets:
- name: rook-ceph-osd-set1
count: 4
# This should be set to false if your StorageClass does not support porting PVs from one node to another (E.g.: ebs-sc does support this, but local provisioner does not).
portable: true
encrypted: false
placement:
topologySpreadConstraints:
- maxSkew: 1
topologyKey: topology.kubernetes.io/zone # The nodes in the same rack have the same topology.kubernetes.io/zone label.
whenUnsatisfiable: ScheduleAnyway
labelSelector:
matchExpressions:
- key: app
operator: In
values:
- rook-ceph-osd
- rook-ceph-osd-prepare
- maxSkew: 1
topologyKey: kubernetes.io/hostname
whenUnsatisfiable: ScheduleAnyway
labelSelector:
matchExpressions:
- key: app
operator: In
values:
- rook-ceph-osd
- rook-ceph-osd-prepare
volumeClaimTemplates:
# If there are some faster devices and some slower devices, it is more efficient to use
# separate metadata, wal, and data devices.
# Refer https://rook.io/docs/rook/v1.10/CRDs/Cluster/pvc-cluster/#dedicated-metadata-and-wal-device-for-osd-on-pvc
- metadata:
name: data
spec:
resources:
requests:
storage: 40Gi
# Use the default storage class configured in cluster.
# not setting storageClass to let it fall to environment default
# OSD Requires Block storage.
volumeMode: Block
accessModes:
- ReadWriteOnce
onlyApplyOSDPlacement: false
useAllDevices: false
useAllNodes: false

ingress:
dashboard:
enabled: true
annotations:
kubernetes.io/ingress.class: kommander-traefik
ingress.kubernetes.io/auth-response-headers: X-Forwarded-User
traefik.ingress.kubernetes.io/router.tls: "true"
traefik.ingress.kubernetes.io/router.middlewares: "${releaseNamespace}-stripprefixes@kubernetescrd,${releaseNamespace}-forwardauth@kubernetescrd"
host:
name: ""
path: "/dkp/kommander/ceph-dashboard"
tls: []

logLevel: DEBUG

cephBlockPools: []
cephFileSystems: []
cephObjectStores:
- name: dkp-object-store
# see https://github.com/rook/rook/blob/master/Documentation/CRDs/Object-Storage/ceph-object-store-crd.md#object-store-settings for available configuration
spec:
metadataPool:
failureDomain: osd
# Must use replicated pool ONLY. Erasure coding is not supported.
replicated:
size: 3
dataPool:
# The failure domain: osd/host/(region or zone if available) - technically also any type in the crush map
failureDomain: osd
# Data pool can use either replication OR erasure coding. Consider the following example scenarios:
# - Replication:
# - Smallest possible replicas count is 2 for HA. count: 3 is more commonly used.
# - With replication of size: 2, we store 2 copies of data and this tolerates loss of one copy of data.
# - 50% Storage efficiency in this scenario with fault tolerance of 1 out 4 nodes (loss in number of storage nodes hosts or OSDs).
# - ErasureCoded:
# - Slices an object into k data fragments and computes m parity fragments.
# - The k + m = n fragments are spread across n Storage Nodes to provide data protection.
# - At least k out of n fragments (could be parity or could be data fragments) are needed for recreation of data. This means we can afford to lose at most m fragments.
# - Smallest possible count is k = 2, m = 1 i.e., n = k + m = 3. Works only if there are at least n = 3 storage nodes (hosts or OSDs).
# - Storage overhead is m / k percentage.
# - Example configurations: (NKP docs recommend that a cluster has at least a 4 nodes):
# - With 3 data chunks and 1 parity chunks (which is what shipped in 2.3.x with MinIO), we can afford to lose up to 1 chunk. 1/3 = 33% Storage overhead and fault tolerance of 1 out of 4 nodes.
# - With 6 data chunks and 2 parity chunks its same as above but this needs 8 OSDs (NKP Cluster need not have 8 nodes - Each node can have more than 1 OSD).
#
erasureCoded:
dataChunks: 3
codingChunks: 1
preservePoolsOnDelete: false
gateway:
port: 80
# securePort: 443
instances: 2
priorityClassName: system-cluster-critical
resources:
limits:
cpu: "750m"
memory: "1Gi"
requests:
cpu: "250m"
memory: "500Mi"
storageClass:
enabled: true
name: dkp-object-store # Defined once per namespace
reclaimPolicy: Delete

monitoring:
enabled: false
# Set this to true to deploy PrometheusRules if Prometheus is already installed.
# If you do not have an existing installation of Prometheus, attempting to deploy
# PrometheusRules may result in a failed installation of this chart.
createPrometheusRules: false

resources:
mgr:
limits:
cpu: "250m"
memory: "1Gi"
requests:
cpu: "100m"
memory: "512Mi"
mon:
limits:
cpu: "250m"
memory: "1Gi"
requests:
cpu: "100m"
memory: "512Mi"
osd:
limits:
cpu: "750m"
memory: "2Gi"
requests:
cpu: "250m"
memory: "1Gi"
prepareosd:
# limits: It is not recommended to set limits on the OSD prepare job since it's a one-time burst for memory
# that must be allowed to complete without an OOM kill
requests:
cpu: "500m"
memory: "50Mi"
mgr-sidecar:
limits:
cpu: "500m"
memory: "100Mi"
requests:
cpu: "100m"
memory: "40Mi"
crashcollector:
limits:
cpu: "250m"
memory: "60Mi"
requests:
cpu: "100m"
memory: "60Mi"
logcollector:
limits:
cpu: "500m"
memory: "1Gi"
requests:
cpu: "100m"
memory: "100Mi"
cleanup:
limits:
cpu: "500m"
memory: "1Gi"
requests:
cpu: "500m"
memory: "100Mi"

# this is used in object-bucket-claims overrides
kubectlImage: bitnami/kubectl:1.29.6

#################################################################
## BEGIN NKP specific config overrides ##
## This is added as a workaround to use the same configmap for ##
## both rook-ceph-cluster & object-bucket-claims helmreleases. ##
#################################################################
dkp:
# The name of the bucket is important here as rook creates
# a ConfigMap and Secret with same name as bucket.
# velero/Loki refers to the secret by name and changing the bucket name
# here would affect velero/loki configuration.
velero:
enabled: true
bucketName: dkp-velero
storageClassName: dkp-object-store
enableOBCHealthCheck: true
priorityClassName: system-cluster-critical
ttlSecondsAfterFinished: 100
additionalConfig:
maxSize: "20G"
grafana-loki:
enabled: true
bucketName: dkp-loki
storageClassName: dkp-object-store
enableOBCHealthCheck: true
priorityClassName: system-cluster-critical
ttlSecondsAfterFinished: 100
additionalConfig:
# maxObjects:
maxSize: "80G"
#################################################################
## END of dkp specific config overrides ##
#################################################################
4 changes: 4 additions & 0 deletions services/rook-ceph-cluster/1.14.7/defaults/kustomization.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
apiVersion: kustomize.config.k8s.io/v1beta1
kind: Kustomization
resources:
- cm.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
apiVersion: kustomize.config.k8s.io/v1beta1
kind: Kustomization
namespace: ${releaseNamespace}
configMapGenerator:
- name: grafana-dashboards-rook-ceph-cluster
namespace: ${releaseNamespace}
files:
- rook-ceph-cluster.json
- rook-ceph-osd.json
- rook-ceph-pools.json
generatorOptions:
labels:
grafana_dashboard: "1"
Loading
Loading