Skip to content

Commit

Permalink
allow user-specified git-sync config settings (#335)
Browse files Browse the repository at this point in the history
* concatenate user-supplied configs to internal value

* updated changelog

* added explanation

* Update rust/crd/src/lib.rs

Co-authored-by: Sebastian Bernauer <sebastian.bernauer@stackable.de>

* refined test to check internal config settings

* corrected changelog

* replace lcoal variable with constant

* review feedback

* move git-sync struct/impl/tests into separate class

---------

Co-authored-by: Sebastian Bernauer <sebastian.bernauer@stackable.de>
  • Loading branch information
adwk67 and sbernauer authored Oct 13, 2023
1 parent 8ee3628 commit 3f55278
Show file tree
Hide file tree
Showing 6 changed files with 241 additions and 141 deletions.
2 changes: 2 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -18,13 +18,15 @@
### Fixed

- BREAKING: Rename Service port name from `airflow` to `http` for consistency reasons. This change should normally not be breaking, as we only change the name, not the port. However, there might be some e.g. Ingresses that rely on the port name and need to be updated ([#316]).
- Fix user-supplied gitsync git-config settings ([#335]).

[#303]: https://github.com/stackabletech/airflow-operator/pull/303
[#308]: https://github.com/stackabletech/airflow-operator/pull/308
[#311]: https://github.com/stackabletech/airflow-operator/pull/311
[#312]: https://github.com/stackabletech/airflow-operator/pull/312
[#316]: https://github.com/stackabletech/airflow-operator/pull/316
[#330]: https://github.com/stackabletech/airflow-operator/pull/330
[#335]: https://github.com/stackabletech/airflow-operator/pull/335

## [23.7.0] - 2023-07-14

Expand Down
1 change: 1 addition & 0 deletions docs/modules/airflow/examples/example-airflow-gitsync.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -21,5 +21,6 @@ spec:
--rev: HEAD # <10>
# --rev: git-sync-tag # N.B. tag must be covered by "depth" (the number of commits to clone)
# --rev: 39ee3598bd9946a1d958a448c9f7d3774d7a8043 # N.B. commit must be covered by "depth"
--git-config: http.sslCAInfo:/tmp/ca-cert/ca.crt # <11>
webservers:
...
1 change: 1 addition & 0 deletions docs/modules/airflow/pages/usage-guide/mounting-dags.adoc
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@ include::example$example-airflow-gitsync.yaml[]
<8> The name of the `Secret` used to access the repository if it is not public. This should include two fields: `user` and `password` (which can be either a password - which is not recommended - or a github token, as described https://github.com/kubernetes/git-sync/tree/v3.6.4#flags-which-configure-authentication[here])
<9> A map of optional configuration settings that are listed in https://github.com/kubernetes/git-sync/tree/v3.6.4#primary-flags[this] configuration section (and the ones that follow on that link)
<10> An example showing how to specify a target revision (the default is HEAD). The revision can also a be tag or a commit, though this assumes that the target hash is contained within the number of commits specified by `depth`. If a tag or commit hash is specified, then git-sync will recognise that and not perform further cloning.
<11> Git-sync settings can be provided inline, although some of these (`--dest`, `--root`) are specified internally in the operator and will be ignored if provided by the user. Git-config settings can also be specified, although a warning will be logged if `safe.directory` is specified as this is defined internally, and should not be defined by the user.


IMPORTANT: The example above shows a _*list*_ of git-sync definitions, with a single element. This is to avoid breaking-changes in future releases. Currently, only one such git-sync definition is considered and processed.
231 changes: 231 additions & 0 deletions rust/crd/src/git_sync.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,231 @@
use crate::{GIT_LINK, GIT_ROOT, GIT_SAFE_DIR, GIT_SYNC_DEPTH, GIT_SYNC_WAIT};
use serde::{Deserialize, Serialize};
use stackable_operator::schemars::{self, JsonSchema};
use std::collections::BTreeMap;

#[derive(Clone, Debug, Default, Deserialize, JsonSchema, PartialEq, Eq, Serialize)]
#[serde(rename_all = "camelCase")]
pub struct GitSync {
pub repo: String,
pub branch: Option<String>,
pub git_folder: Option<String>,
pub depth: Option<u8>,
pub wait: Option<u16>,
pub credentials_secret: Option<String>,
pub git_sync_conf: Option<BTreeMap<String, String>>,
}
impl GitSync {
pub fn get_args(&self) -> Vec<String> {
let mut args: Vec<String> = vec![];
let mut git_config = format!("{GIT_SAFE_DIR}:{GIT_ROOT}");

args.extend(vec![
"/stackable/git-sync".to_string(),
format!("--repo={}", self.repo.clone()),
format!(
"--branch={}",
self.branch.clone().unwrap_or_else(|| "main".to_string())
),
format!("--depth={}", self.depth.unwrap_or(GIT_SYNC_DEPTH)),
format!("--wait={}", self.wait.unwrap_or(GIT_SYNC_WAIT)),
format!("--dest={GIT_LINK}"),
format!("--root={GIT_ROOT}"),
]);
if let Some(git_sync_conf) = self.git_sync_conf.as_ref() {
for (key, value) in git_sync_conf {
// config options that are internal details have
// constant values and will be ignored here
if key.eq_ignore_ascii_case("--dest") || key.eq_ignore_ascii_case("--root") {
tracing::warn!("Config option {:?} will be ignored...", key);
} else {
// both "-git-config" and "--gitconfig" are recognized by gitsync
if key.to_lowercase().ends_with("-git-config") {
if value.to_lowercase().contains(GIT_SAFE_DIR) {
tracing::warn!("Config option {value:?} contains a value for {GIT_SAFE_DIR} that overrides
the value of this operator. Git-sync functionality will probably not work as expected!");
}
git_config = format!("{git_config},{value}");
} else {
args.push(format!("{key}={value}"));
}
}
}
args.push(format!("--git-config='{git_config}'"));
}
args
}
}

#[cfg(test)]
mod tests {
use crate::AirflowCluster;
use rstest::rstest;

#[test]
fn test_git_sync() {
let cluster = "
apiVersion: airflow.stackable.tech/v1alpha1
kind: AirflowCluster
metadata:
name: airflow
spec:
image:
productVersion: 2.6.1
clusterConfig:
loadExamples: false
exposeConfig: false
credentialsSecret: simple-airflow-credentials
dagsGitSync:
- name: git-sync
repo: https://github.com/stackabletech/airflow-operator
branch: feat/git-sync
wait: 20
gitSyncConf: {}
gitFolder: tests/templates/kuttl/mount-dags-gitsync/dags
webservers:
roleGroups:
default:
config: {}
celeryExecutors:
roleGroups:
default:
config: {}
schedulers:
roleGroups:
default:
config: {}
";

let deserializer = serde_yaml::Deserializer::from_str(cluster);
let cluster: AirflowCluster =
serde_yaml::with::singleton_map_recursive::deserialize(deserializer).unwrap();

assert!(cluster.git_sync().is_some(), "git_sync was not Some!");
assert_eq!(
Some("tests/templates/kuttl/mount-dags-gitsync/dags".to_string()),
cluster.git_sync().unwrap().git_folder
);
}

#[test]
fn test_git_sync_config() {
let cluster = "
apiVersion: airflow.stackable.tech/v1alpha1
kind: AirflowCluster
metadata:
name: airflow
spec:
image:
productVersion: 2.6.1
clusterConfig:
loadExamples: false
exposeConfig: false
credentialsSecret: simple-airflow-credentials
dagsGitSync:
- name: git-sync
repo: https://github.com/stackabletech/airflow-operator
branch: feat/git-sync
wait: 20
gitSyncConf:
--rev: c63921857618a8c392ad757dda13090fff3d879a
gitFolder: tests/templates/kuttl/mount-dags-gitsync/dags
webservers:
roleGroups:
default:
config: {}
celeryExecutors:
roleGroups:
default:
config: {}
schedulers:
roleGroups:
default:
config: {}
";

let deserializer = serde_yaml::Deserializer::from_str(cluster);
let cluster: AirflowCluster =
serde_yaml::with::singleton_map_recursive::deserialize(deserializer).unwrap();

assert!(cluster
.git_sync()
.unwrap()
.get_args()
.iter()
.any(|c| c == "--rev=c63921857618a8c392ad757dda13090fff3d879a"));
}

#[rstest]
#[case(
"\"--git-config\": \"http.sslCAInfo:/tmp/ca-cert/ca.crt\"",
"--git-config='safe.directory:/tmp/git,http.sslCAInfo:/tmp/ca-cert/ca.crt'"
)]
#[case(
"\"-git-config\": \"http.sslCAInfo:/tmp/ca-cert/ca.crt\"",
"--git-config='safe.directory:/tmp/git,http.sslCAInfo:/tmp/ca-cert/ca.crt'"
)]
#[case(
"\"--git-config\": http.sslCAInfo:/tmp/ca-cert/ca.crt",
"--git-config='safe.directory:/tmp/git,http.sslCAInfo:/tmp/ca-cert/ca.crt'"
)]
#[case(
"--git-config: http.sslCAInfo:/tmp/ca-cert/ca.crt",
"--git-config='safe.directory:/tmp/git,http.sslCAInfo:/tmp/ca-cert/ca.crt'"
)]
#[case(
"'--git-config': 'http.sslCAInfo:/tmp/ca-cert/ca.crt'",
"--git-config='safe.directory:/tmp/git,http.sslCAInfo:/tmp/ca-cert/ca.crt'"
)]
#[case(
"--git-config: 'http.sslCAInfo:/tmp/ca-cert/ca.crt,safe.directory:/tmp/git2'",
"--git-config='safe.directory:/tmp/git,http.sslCAInfo:/tmp/ca-cert/ca.crt,safe.directory:/tmp/git2'"
)]
fn test_git_sync_git_config(#[case] input: &str, #[case] output: &str) {
let cluster = format!(
"
apiVersion: airflow.stackable.tech/v1alpha1
kind: AirflowCluster
metadata:
name: airflow
spec:
image:
productVersion: 2.6.1
clusterConfig:
loadExamples: false
exposeConfig: false
credentialsSecret: simple-airflow-credentials
dagsGitSync:
- name: git-sync
repo: https://github.com/stackabletech/airflow-operator
branch: feat/git-sync
wait: 20
gitSyncConf:
{input}
gitFolder: tests/templates/kuttl/mount-dags-gitsync/dags
webservers:
roleGroups:
default:
replicas: 1
celeryExecutors:
roleGroups:
default:
replicas: 1
schedulers:
roleGroups:
default:
replicas: 1
"
);

let deserializer = serde_yaml::Deserializer::from_str(cluster.as_str());
let cluster: AirflowCluster =
serde_yaml::with::singleton_map_recursive::deserialize(deserializer).unwrap();

assert!(cluster
.git_sync()
.unwrap()
.get_args()
.iter()
.any(|c| c == { output }));
}
}
Loading

0 comments on commit 3f55278

Please sign in to comment.