Skip to content

Commit

Permalink
WIP GHA as local cache
Browse files Browse the repository at this point in the history
  • Loading branch information
Alphare committed Apr 4, 2024
1 parent a859c3b commit 37bf416
Show file tree
Hide file tree
Showing 6 changed files with 183 additions and 13 deletions.
8 changes: 4 additions & 4 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -18,10 +18,10 @@ name = "sccache"
name = "sccache-dist"
required-features = ["dist-server"]

[profile.release]
codegen-units = 1
lto = true
strip = true
# [profile.release]
# codegen-units = 1
# lto = true
# strip = true

[dependencies]
anyhow = { version = "1.0", features = ["backtrace"] }
Expand Down
59 changes: 52 additions & 7 deletions src/cache/cache.rs
Original file line number Diff line number Diff line change
Expand Up @@ -591,12 +591,25 @@ pub fn storage_from_config(
return Ok(Arc::new(storage));
}
#[cfg(feature = "gha")]
CacheType::GHA(config::GHACacheConfig { ref version, .. }) => {
debug!("Init gha cache with version {version}");

let storage = GHACache::build(version)
.map_err(|err| anyhow!("create gha cache failed: {err:?}"))?;
return Ok(Arc::new(storage));
CacheType::GHA(config::GHACacheConfig {
ref version,
as_local,
..
}) => {
if *as_local {
// TODO use sccache version as cache-busting or
// a specific one?
debug!("Init gha as local cache");
let downloaded_path = GHACache::download_to_local(config)
.map_err(|err| anyhow!("download gha cache as local failed: {err:?}"))?;
let storage = disk_cache_from_config(config, pool, downloaded_path)?;
return Ok(storage);
} else {
debug!("Init gha cache with version {version}");
let storage = GHACache::build(version)
.map_err(|err| anyhow!("create gha cache failed: {err:?}"))?;
return Ok(Arc::new(storage));
}
}
#[cfg(feature = "memcached")]
CacheType::Memcached(config::MemcachedCacheConfig {
Expand Down Expand Up @@ -724,7 +737,21 @@ pub fn storage_from_config(
}
}

let (dir, size) = (&config.fallback_cache.dir, config.fallback_cache.size);
disk_cache_from_config(config, pool, None)
}

fn disk_cache_from_config(
config: &Config,
pool: &tokio::runtime::Handle,
root_override: Option<PathBuf>,
) -> Result<Arc<dyn Storage>> {
let (mut dir, size) = (
config.fallback_cache.dir.to_owned(),
config.fallback_cache.size,
);
if let Some(new_root) = root_override {
dir = dir.join(new_root);
}
let preprocessor_cache_mode_config = config.fallback_cache.preprocessor_cache_mode;
let rw_mode = config.fallback_cache.rw_mode.into();
debug!("Init disk cache with dir {:?}, size {}", dir, size);
Expand All @@ -737,6 +764,24 @@ pub fn storage_from_config(
)))
}

#[cfg(feature = "gha")]
pub fn upload_local_cache(config: &Config) -> Result<()> {
match &config.cache {
Some(CacheType::GHA(gha_config)) => {
if !gha_config.enabled {
bail!("GHA cache is disabled in config");
}
if !gha_config.as_local {
bail!("GHA not configured `as_local`")
}
GHACache::upload_local_cache(config)
}
_ => {
bail!("Uploading the local cache is only possible when using GitHub Actions")
}
}
}

#[cfg(test)]
mod test {
use super::*;
Expand Down
90 changes: 90 additions & 0 deletions src/cache/gha.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,13 +12,20 @@
// See the License for the specific language governing permissions and
// limitations under the License.

use std::path::PathBuf;

use opendal::layers::LoggingLayer;
use opendal::services::Ghac;
use opendal::Operator;

use crate::config::Config;
use crate::errors::*;
use crate::VERSION;

// TODO condition the cache for different OS / architectures (maybe other things?)
// Allow users to pass in a cache key?
const FULL_GHA_CACHE_ROOT: &str = "sccache-full-ghacache";

/// A cache that stores entries in GHA Cache Services.
pub struct GHACache;

Expand All @@ -43,4 +50,87 @@ impl GHACache {
.finish();
Ok(op)
}

/// Download a copy of the entire GHA cache and return the path
/// to the root folder on the local disk.
pub fn download_to_local(config: &Config) -> Result<Option<PathBuf>> {
// TODO maybe make this a folder and get the latest one?
// How does concurrency work in GHA? What happens if multiple jobs
// write at the same time?
// Is it just the user's responsibility to namespace the cache?
let tarball_path = local_cache_tarball_path(config);
let op = Operator::new(Ghac::default())?
.layer(LoggingLayer::default())
.finish()
.blocking();
match op.reader(FULL_GHA_CACHE_ROOT) {
Ok(mut reader) => {
debug!("Found full gha cache");
let mut writer = std::fs::OpenOptions::new()
.write(true)
.open(&tarball_path)?;
std::io::copy(&mut reader, &mut writer)?;
}
Err(error) => match error.kind() {
opendal::ErrorKind::NotFound => {
debug!("Full gha cache not found");
return Ok(None);
}
// TODO when/if using a folder
// opendal::ErrorKind::IsADirectory => todo!(),
// opendal::ErrorKind::NotADirectory => todo!(),
_ => bail!(error),
},
};

let cache = local_cache_path(config);
let tarball =
std::fs::File::open(tarball_path).context("Failed to open the GHA cache tarball")?;
tar::Archive::new(tarball)
.unpack(&cache)
.context("Failed to extract the GHA cache tarball")?;

Ok(Some(cache))
}

/// Upload a tarball of the local cache
pub fn upload_local_cache(config: &Config) -> Result<()> {
// TODO same as in [`Self::download_to_local`]
let cache = local_cache_path(config);
if !cache.exists() {
bail!("Local cache does not exist: nothing to do")
}
debug!("Found local gha cache at {}", cache.display());

let mut tar_builder = tar::Builder::new(vec![]);
tar_builder
.append_dir_all(local_cache_path(config), ".")
.context("Failed to create GHA local cache tarball")?;
let source = local_cache_tarball_path(config);
std::fs::write(&source, tar_builder.into_inner()?)
.context("Failed to write the GHA local cache tarball to disk")?;

let op = Operator::new(Ghac::default())?
.layer(LoggingLayer::default())
.finish()
.blocking();
match op.writer(FULL_GHA_CACHE_ROOT) {
Ok(mut writer) => {
let mut reader = std::fs::File::open(&source)?;
std::io::copy(&mut reader, &mut writer)?;
Ok(())
}
Err(error) => bail!(error),
}
}
}

fn local_cache_tarball_path(config: &Config) -> PathBuf {
let mut path = config.fallback_cache.dir.join(FULL_GHA_CACHE_ROOT);
path.set_extension(".tar");
path
}

fn local_cache_path(config: &Config) -> PathBuf {
config.fallback_cache.dir.join(FULL_GHA_CACHE_ROOT)
}
18 changes: 18 additions & 0 deletions src/cmdline.rs
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,9 @@ pub enum Command {
env_vars: Vec<(OsString, OsString)>,
},
DebugPreprocessorCacheEntries,
/// Uploads the local cache to a shared storage
#[cfg(feature = "gha")]
UploadCache,
}

fn flag_infer_long_and_short(name: &'static str) -> Arg {
Expand Down Expand Up @@ -156,6 +159,10 @@ fn get_clap_command() -> clap::Command {
.value_name("FMT")
.value_parser(clap::value_parser!(StatsFormat))
.default_value(StatsFormat::default().as_str()),
#[cfg(feature = "gha")]
flag_infer_long("upload-cache")
.help("upload the local cache to the configured shared storage")
.action(ArgAction::SetTrue),
Arg::new("CMD")
.value_parser(clap::value_parser!(OsString))
.trailing_var_arg(true)
Expand All @@ -171,6 +178,8 @@ fn get_clap_command() -> clap::Command {
"show-adv-stats",
"start-server",
"stop-server",
#[cfg(feature = "gha")]
"upload-cache",
"zero-stats",
"package-toolchain",
"CMD",
Expand Down Expand Up @@ -274,6 +283,15 @@ pub fn try_parse() -> Result<Command> {
Ok(Command::DistAuth)
} else if matches.get_flag("dist-status") {
Ok(Command::DistStatus)
} else if matches.get_flag("upload-cache") {
#[cfg(feature = "gha")]
{
Ok(Command::UploadCache)
}
#[cfg(not(feature = "gha"))]
{
unreachable!("--upload-cache is only defined for GHA")
}
} else if matches.contains_id("package-toolchain") {
let mut toolchain_values = matches
.get_many("package-toolchain")
Expand Down
4 changes: 4 additions & 0 deletions src/commands.rs
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,8 @@
// limitations under the License.

use crate::cache::storage_from_config;
#[cfg(feature = "gha")]
use crate::cache::upload_local_cache;
use crate::client::{connect_to_server, connect_with_retry, ServerConnection};
use crate::cmdline::{Command, StatsFormat};
use crate::compiler::ColorMode;
Expand Down Expand Up @@ -743,6 +745,8 @@ pub fn run_command(cmd: Command) -> Result<i32> {
}
};
}
#[cfg(feature = "gha")]
Command::UploadCache => upload_local_cache(config)?,
#[cfg(not(feature = "dist-client"))]
Command::DistAuth => bail!(
"Distributed compilation not compiled in, please rebuild with the dist-client feature"
Expand Down
17 changes: 15 additions & 2 deletions src/config.rs
Original file line number Diff line number Diff line change
Expand Up @@ -213,6 +213,11 @@ pub struct GHACacheConfig {
/// Version for gha cache is a namespace. By setting different versions,
/// we can avoid mixed caches.
pub version: String,
/// Download the entire cache to be used like a local cache, then upload
/// it back if anything changed.
/// This is useful in CI contexts to reduce the number of requests,
/// hence avoiding rate limiting and improving overall cache speed.
pub as_local: bool,
}

/// Memcached's default value of expiration is 10800s (3 hours), which is too
Expand Down Expand Up @@ -784,24 +789,30 @@ fn config_from_env() -> Result<EnvConfig> {
});

// ======= GHA =======
let gha = if let Ok(version) = env::var("SCCACHE_GHA_VERSION") {
let mut gha = if let Ok(version) = env::var("SCCACHE_GHA_VERSION") {
// If SCCACHE_GHA_VERSION has been set, we don't need to check
// SCCACHE_GHA_ENABLED's value anymore.
Some(GHACacheConfig {
enabled: true,
version,
as_local: false,
})
} else if bool_from_env_var("SCCACHE_GHA_ENABLED")?.unwrap_or(false) {
// If only SCCACHE_GHA_ENABLED has been set to the true value, enable with
// default version.
Some(GHACacheConfig {
enabled: true,
version: "".to_string(),
as_local: false,
})
} else {
None
};

if let Some(gha) = &mut gha {
gha.as_local = bool_from_env_var("SCCACHE_GHA_AS_LOCAL")?.unwrap_or(false);
}

// ======= Azure =======
let azure = if let (Ok(connection_string), Ok(container)) = (
env::var("SCCACHE_AZURE_CONNECTION_STRING"),
Expand Down Expand Up @@ -1453,6 +1464,7 @@ service_account = "example_service_account"
[cache.gha]
enabled = true
version = "sccache"
as_local = false
[cache.memcached]
# Deprecated alias for `endpoint`
Expand Down Expand Up @@ -1519,7 +1531,8 @@ no_credentials = true
}),
gha: Some(GHACacheConfig {
enabled: true,
version: "sccache".to_string()
version: "sccache".to_string(),
as_local: false,
}),
redis: Some(RedisCacheConfig {
url: Some("redis://user:passwd@1.2.3.4:6379/?db=1".to_owned()),
Expand Down

0 comments on commit 37bf416

Please sign in to comment.