From 14f05e38cad037a6c15f33ff96ac0186a06c5db1 Mon Sep 17 00:00:00 2001 From: Luke Peterson Date: Sun, 14 Apr 2024 21:05:19 -0700 Subject: [PATCH 01/77] Moving pkg_mgmt feature into its own subdirectory in the repo --- lib/Cargo.toml | 1 + lib/src/metta/runner/environment.rs | 2 +- lib/src/metta/runner/mod.rs | 9 ++++++++- lib/src/metta/runner/modules/mod.rs | 8 +------- lib/src/metta/runner/{modules => pkg_mgmt}/catalog.rs | 7 +++++-- 5 files changed, 16 insertions(+), 11 deletions(-) rename lib/src/metta/runner/{modules => pkg_mgmt}/catalog.rs (99%) diff --git a/lib/Cargo.toml b/lib/Cargo.toml index 794307579..0f552b062 100644 --- a/lib/Cargo.toml +++ b/lib/Cargo.toml @@ -11,6 +11,7 @@ directories = "5.0.1" # For Environment to find platform-specific config locatio smallvec = "1.10.0" im = "15.1.0" xxhash-rust = {version="0.8.7", features=["xxh3"], optional=true} +git2 = {version="0.18.3", optional=true} rand = "0.8.5" bitset = "0.1.2" dyn-fmt = "0.4.0" diff --git a/lib/src/metta/runner/environment.rs b/lib/src/metta/runner/environment.rs index eef79ca0e..7f55959ac 100644 --- a/lib/src/metta/runner/environment.rs +++ b/lib/src/metta/runner/environment.rs @@ -5,7 +5,7 @@ use std::fs; use std::sync::Arc; #[cfg(feature = "pkg_mgmt")] -use crate::metta::runner::modules::catalog::{ModuleCatalog, DirCatalog, FsModuleFormat, SingleFileModuleFmt, DirModuleFmt}; +use crate::metta::runner::pkg_mgmt::{ModuleCatalog, DirCatalog, FsModuleFormat, SingleFileModuleFmt, DirModuleFmt}; use directories::ProjectDirs; diff --git a/lib/src/metta/runner/mod.rs b/lib/src/metta/runner/mod.rs index 1834227c5..a4ca46725 100644 --- a/lib/src/metta/runner/mod.rs +++ b/lib/src/metta/runner/mod.rs @@ -66,8 +66,15 @@ use super::types::validate_atom; pub mod modules; use modules::{MettaMod, ModNameNode, ModuleLoader, ResourceKey, TOP_MOD_NAME, ModNameNodeDisplayWrapper}; + +//GOAT +// #[cfg(feature = "pkg_mgmt")] +// use modules::catalog::{ModuleDescriptor, loader_for_module_at_path}; + +#[cfg(feature = "pkg_mgmt")] +pub mod pkg_mgmt; #[cfg(feature = "pkg_mgmt")] -use modules::catalog::{ModuleDescriptor, loader_for_module_at_path}; +use pkg_mgmt::*; use std::rc::Rc; use std::path::PathBuf; diff --git a/lib/src/metta/runner/modules/mod.rs b/lib/src/metta/runner/modules/mod.rs index 6add4a503..3567746b7 100644 --- a/lib/src/metta/runner/modules/mod.rs +++ b/lib/src/metta/runner/modules/mod.rs @@ -1,6 +1,5 @@ -use std::path::{Path, PathBuf}; -use std::collections::HashMap; +use std::path::Path; use crate::metta::*; use crate::metta::runner::*; @@ -15,11 +14,6 @@ use super::interpreter_minimal::interpret; #[cfg(feature = "minimal")] use super::stdlib_minimal::*; -#[cfg(feature = "pkg_mgmt")] -pub mod catalog; -#[cfg(feature = "pkg_mgmt")] -use catalog::*; - mod mod_names; pub(crate) use mod_names::{ModNameNode, mod_name_from_path, mod_name_relative_path, module_name_is_legal, ModNameNodeDisplayWrapper}; pub use mod_names::{TOP_MOD_NAME, SELF_MOD_NAME, MOD_NAME_SEPARATOR}; diff --git a/lib/src/metta/runner/modules/catalog.rs b/lib/src/metta/runner/pkg_mgmt/catalog.rs similarity index 99% rename from lib/src/metta/runner/modules/catalog.rs rename to lib/src/metta/runner/pkg_mgmt/catalog.rs index 540c81326..00eb0bc22 100644 --- a/lib/src/metta/runner/modules/catalog.rs +++ b/lib/src/metta/runner/pkg_mgmt/catalog.rs @@ -70,6 +70,7 @@ // // I think my personal preference is for #2. +use std::path::Path; use std::collections::hash_map::DefaultHasher; use std::hash::Hasher; use std::ffi::{OsStr, OsString}; @@ -139,9 +140,11 @@ pub struct DepEntry { /// If the fs_path is specified, the other pkg_info attributes will be ignored. //QUESTION: We need a MeTTa "style guide" for these field names, since they are effective going // to be part of the API, because a PkgInfo will be deserialized from atoms - pub fs_path: Option + pub fs_path: Option, + + /// Indicates that the dependency module should be fetched from the specified `git` URI + pub git_uri: Option, - //TODO: field for fetching from a specific git repo //TODO: field to indicate acceptable version range for dependency } From e8137b9ec3b06f9ddcc90239c0e755ec7e82f8e6 Mon Sep 17 00:00:00 2001 From: Luke Peterson Date: Mon, 15 Apr 2024 15:01:18 -0700 Subject: [PATCH 02/77] Adding support for automatic git fetching and local caching for remote modules specified in PkgInfo --- docs/modules_dev.md | 2 +- lib/Cargo.toml | 2 +- lib/src/metta/runner/modules/mod.rs | 8 +- lib/src/metta/runner/pkg_mgmt/catalog.rs | 64 ++++++++- lib/src/metta/runner/pkg_mgmt/git_cache.rs | 147 +++++++++++++++++++++ python/tests/test_environment.py | 4 +- 6 files changed, 216 insertions(+), 11 deletions(-) create mode 100644 lib/src/metta/runner/pkg_mgmt/git_cache.rs diff --git a/docs/modules_dev.md b/docs/modules_dev.md index 696a98fbb..8852ee495 100644 --- a/docs/modules_dev.md +++ b/docs/modules_dev.md @@ -96,7 +96,7 @@ More information on the individual module file formats is available in the MeTTa ## The PkgInfo Structure -Each module has an associated [PkgInfo] structure, which provides the module author a place to specify meta-data about the module and express requirements for the module's dependencies. Additionally a [PkgInfo] can provide explicit loading instructions such as file system paths or github URIs for dependent modules. The [PkgInfo] structure is the same concept as the Cargo.toml file used in Cargo/Rust. +Each module has an associated [PkgInfo] structure, which provides the module author a place to specify meta-data about the module and express requirements for the module's dependencies. Additionally a [PkgInfo] can provide explicit loading instructions such as file system paths or github URLs for dependent modules. The [PkgInfo] structure is the same concept as the Cargo.toml file used in Cargo/Rust. The [PkgInfo] should be initialized inside the module's loader function. If it is not initialized then default values will be used. diff --git a/lib/Cargo.toml b/lib/Cargo.toml index 0f552b062..291a5442e 100644 --- a/lib/Cargo.toml +++ b/lib/Cargo.toml @@ -28,4 +28,4 @@ default = ["pkg_mgmt"] minimal = [] # enables minimal MeTTa interpreter variable_operation = [] # enables evaluation of the expressions which have # a variable on the first position -pkg_mgmt = ["xxhash-rust"] +pkg_mgmt = ["xxhash-rust", "git2"] diff --git a/lib/src/metta/runner/modules/mod.rs b/lib/src/metta/runner/modules/mod.rs index e2e48afc9..42cd60dcd 100644 --- a/lib/src/metta/runner/modules/mod.rs +++ b/lib/src/metta/runner/modules/mod.rs @@ -1,6 +1,5 @@ -use std::path::{Path, PathBuf}; -use std::collections::HashMap; +use std::path::Path; use std::cell::RefCell; use crate::metta::*; @@ -306,6 +305,11 @@ impl MettaMod { &self.pkg_info } + #[cfg(feature = "pkg_mgmt")] + pub fn pkg_info_mut(&mut self) -> &mut PkgInfo { + &mut self.pkg_info + } + pub fn space(&self) -> &DynSpace { &self.space } diff --git a/lib/src/metta/runner/pkg_mgmt/catalog.rs b/lib/src/metta/runner/pkg_mgmt/catalog.rs index 00eb0bc22..ce2c51268 100644 --- a/lib/src/metta/runner/pkg_mgmt/catalog.rs +++ b/lib/src/metta/runner/pkg_mgmt/catalog.rs @@ -76,8 +76,8 @@ use std::hash::Hasher; use std::ffi::{OsStr, OsString}; use crate::metta::text::OwnedSExprParser; -use crate::metta::runner::*; use crate::metta::runner::modules::*; +use crate::metta::runner::{*, git_cache::*}; use xxhash_rust::xxh3::xxh3_64; @@ -142,8 +142,12 @@ pub struct DepEntry { // to be part of the API, because a PkgInfo will be deserialized from atoms pub fs_path: Option, - /// Indicates that the dependency module should be fetched from the specified `git` URI - pub git_uri: Option, + /// Indicates that the dependency module should be fetched from the specified `git` URL + pub git_url: Option, + + /// A `git`` branch to fetch. Will be ignored if `git_url` is `None`. Uses the repo's + /// default branch if left unspecified + pub git_branch: Option, //TODO: field to indicate acceptable version range for dependency } @@ -168,8 +172,13 @@ impl PkgInfo { return loader_for_module_at_path(&context.metta, path, Some(mod_name), context.module().resource_dir()); } - //TODO, if git URI is specified in the dep entry, clone the repo to a location in the environment - // dir with a unique path (based on a random uuid), and resolve it within that directory's catalog + //If a git URL is specified in the dep entry, see if we have it in the git-cache and + // clone it locally if we don't + if let Some(url) = &entry.git_url { + let cached_mod = CachedModule::new(context.metta.environment(), None, mod_name, url, url, entry.git_branch.as_ref().map(|s| s.as_str()))?; + cached_mod.update(UpdateMode::PullIfMissing)?; + return loader_for_module_at_path(&context.metta, cached_mod.local_path(), Some(mod_name), context.module().resource_dir()); + } //TODO, If a version range is specified in the dep entry, then use that version range to specify // modules discovered in the catalogs @@ -603,3 +612,48 @@ fn recursive_submodule_import_test() { //LP-TODO-NEXT, Add a test for loading a module from a DirCatalog by passing a name with an extension (ie. `my_mod.metta`) to `resolve`, // and make sure the loaded module that comes back doesn't have the extension +#[derive(Debug)] +struct TestLoader; + +impl ModuleLoader for TestLoader { + fn load(&self, context: &mut RunContext) -> Result<(), String> { + let space = DynSpace::new(GroundingSpace::new()); + context.init_self_module(space, None); + + //Set up the module [PkgInfo] so it knows to load a sub-module from git + let pkg_info = context.module_mut().unwrap().pkg_info_mut(); + pkg_info.name = "test-mod".to_string(); + pkg_info.deps.insert("metta-morph".to_string(), DepEntry{ + fs_path: None, + //TODO: We probably want a smaller test repo + git_url: Some("https://github.com/trueagi-io/metta-morph/".to_string()), + git_branch: None, //Some("Hyperpose".to_string()), + }); + + Ok(()) + } +} + +/// Tests that a module can be fetched from git and loaded, when the git URL is specified in +/// the module's PkgInfo. This test requires a network connection +/// +/// NOTE. Ignored because we may not want it fetching from the internet when running the +/// test suite. Invoke `cargo test git_pkginfo_fetch_test -- --ignored` to run it. +#[ignore] +#[test] +fn git_pkginfo_fetch_test() { + + //Make a new runner, with the working dir in `/tmp/hyperon-test/` + let runner = Metta::new(Some(EnvBuilder::test_env().set_working_dir(Some(Path::new("/tmp/hyperon-test/"))))); + let _mod_id = runner.load_module_direct(Box::new(TestLoader), "test-mod").unwrap(); + + let result = runner.run(SExprParser::new("!(import! &self test-mod:metta-morph:mettamorph)")); + assert_eq!(result, Ok(vec![vec![expr!()]])); + + //Test that we can use a function imported from the module + let result = runner.run(SExprParser::new("!(sequential (A B))")); + assert_eq!(result, Ok(vec![vec![sym!("A"), sym!("B")]])); + + runner.display_loaded_modules(); +} + diff --git a/lib/src/metta/runner/pkg_mgmt/git_cache.rs b/lib/src/metta/runner/pkg_mgmt/git_cache.rs new file mode 100644 index 000000000..33df567cd --- /dev/null +++ b/lib/src/metta/runner/pkg_mgmt/git_cache.rs @@ -0,0 +1,147 @@ + +//! Manages a local cache of modules cloned from git +//! +//! Currently all network activity is synchronous. At some point it makes sense to move +//! to async in order to parallelize downloads, etc. When that time comes I would like +//! to look at the `asyncgit` crate. `https://crates.io/crates/asyncgit/0.26.0` +//! + +use std::path::{Path, PathBuf}; + +use xxhash_rust::xxh3::xxh3_64; +use git2::{*, build::*}; + +use crate::metta::runner::environment::Environment; + +/// Indicates the desired behavior for updating the locally-cached repo +#[derive(Clone, Copy, Debug, PartialEq, Eq)] +pub enum UpdateMode { + PullIfMissing, + PullLatest, +} + +pub struct CachedModule { + _mod_name: String, + url: String, + branch: Option, + local_path: PathBuf, +} + +impl CachedModule { + /// Initializes a new CachedModule object + /// + /// * `cache_name` - A name to describe the cache. For the default cache for URLs specified + /// from the [PkgInfo], the cache is named `git-modules` + /// * `mod_name` - The catalog name of the module in the cache + /// * `ident_str` - An ascii string that identifies the specific module among other verions + /// of the module. + /// * `url` - The URL from which to fetch the module + /// * `branch` - The branch to use, or default if None + pub fn new(env: &Environment, cache_name: Option<&str>, mod_name: &str, ident_str: &str, url: &str, branch: Option<&str>) -> Result { + let cache_name = cache_name.unwrap_or("git-modules"); + let working_dir = env.working_dir().ok_or_else(|| "Unable to clone git repository; no local working directory available".to_string())?; + let branch_str = match &branch { + Some(s) => s, + None => "" + }; + + let unique_id = xxh3_64(format!("{}{}", ident_str, branch_str).as_bytes()); + let local_filename = format!("{mod_name}.{unique_id:16x}"); + let local_path = working_dir.join(cache_name).join(local_filename); + + std::fs::create_dir_all(&local_path).map_err(|e| e.to_string())?; + + Ok(Self { + _mod_name: mod_name.to_string(), + url: url.to_string(), + branch: branch.map(|s| s.to_owned()), + local_path, + }) + } + + /// Updates a local cached repo with a remote repo, using `mode` behavior + pub fn update(&self, mode: UpdateMode) -> Result<(), String> { + match Repository::open(&self.local_path) { + + //We have an existing repo on disk + Ok(repo) => { + + //Do a `git pull` to bring it up to date + if mode == UpdateMode::PullLatest { + let mut remote = repo.find_remote("origin").map_err(|e| format!("Failed find 'origin' in git repo: {}, {}", self.url, e))?; + remote.connect(Direction::Fetch).map_err(|e| format!("Failed to connect to origin repo: {}, {}", self.url, e))?; + + let branch = self.get_branch(&remote)?; + remote.fetch(&[&branch], None, None).map_err(|e| format!("Failed fetch updates to git repo: {}, {}", self.url, e))?; + + let fetch_head = repo.find_reference("FETCH_HEAD").map_err(|e| e.to_string())?; + self.merge(&repo, &branch, &fetch_head).map_err(|e| format!("Failed to merge remote git repo: {}, {}", self.url, e))?; + } + Ok(()) + }, + Err(_) => { + + //We don't have a local repo, so clone it fresh + let mut repo_builder = RepoBuilder::new(); + match &self.branch { + Some(branch) => { + repo_builder.branch(branch); + }, + None => {} + } + match repo_builder.clone(&self.url, &self.local_path) { + Ok(_repo) => Ok(()), + Err(e) => Err(format!("Failed to clone git repo: {}, {}", self.url, e)), + } + }, + } + } + + /// Internal method to get the branch name + fn get_branch(&self, remote: &Remote) -> Result { + Ok(match &self.branch { + Some(b) => b.to_owned(), + None => remote.default_branch() + .map_err(|e| format!("Failed to resolve default branch name for git repo: {}, {}", self.url, e))? + .as_str().unwrap().to_string() + }) + } + + /// Internal method to perform a merge. Intended to approximate the `git merge` command-line behavior + fn merge(&self, repo: &Repository, branch: &str, incomming_commit_ref: &Reference) -> Result<(), git2::Error> { + let annotated_commit = repo.reference_to_annotated_commit(incomming_commit_ref)?; + let analysis = repo.merge_analysis(&[&annotated_commit])?; + + if analysis.0.is_up_to_date() { + return Ok(()); + } else if analysis.0.is_fast_forward() { + // Fast-forwarding... + let mut reference = repo.find_reference(branch)?; + + reference.set_target(annotated_commit.id(), "Fast-forward")?; + repo.checkout_head(None)?; + } else { + //NOTE: the below code appears to work, but it isn't needed at the moment + unreachable!(); + // // Normal merge... + // let head_commit = repo.head()?.peel_to_commit()?; + // let incomming_commit = Reference::peel_to_commit(incomming_commit_ref)?; + // let mut idx = repo.merge_commits(&head_commit, &incomming_commit, None)?; + + // if idx.has_conflicts() { + // return Err(git2::Error::from_str("Merge conflicts detected")); + // } + + // let result_tree = repo.find_tree(idx.write_tree_to(&repo)?)?; + // let signature = repo.signature()?; + // repo.commit(Some("HEAD"), &signature, &signature, "Merge commit", &result_tree, &[&head_commit, &incomming_commit])?; + } + + Ok(()) + } + + /// Returns the file system path for the locally cloned repository + pub fn local_path(&self) -> &Path { + &self.local_path + } +} diff --git a/python/tests/test_environment.py b/python/tests/test_environment.py index b86cd4ec3..14b10c0ab 100644 --- a/python/tests/test_environment.py +++ b/python/tests/test_environment.py @@ -8,7 +8,7 @@ def __init__(self, methodName): super().__init__(methodName) def testEnvironment(self): - self.assertTrue(Environment.init_common_env(config_dir = "/tmp/test_dir", create_config = True)) - self.assertEqual(Environment.config_dir(), "/tmp/test_dir") + self.assertTrue(Environment.init_common_env(config_dir = "/tmp/hyperon-test", create_config = True)) + self.assertEqual(Environment.config_dir(), "/tmp/hyperon-test") self.assertFalse(Environment.init_common_env(disable_config = True)) From 069b3faf57feb2a632e7d0e5a2c5951fabd4984a Mon Sep 17 00:00:00 2001 From: Luke Peterson Date: Mon, 15 Apr 2024 17:14:10 -0700 Subject: [PATCH 03/77] Adding `git-module!` op to explicitly load modules from `git` without a catalog Adding `TryPullLatest` `UpdateMode` which will attempt to update the module if an internet connection is available, but won't fail if the connection isn't --- lib/src/metta/runner/modules/mod.rs | 2 +- lib/src/metta/runner/modules/mod_names.rs | 17 +++++- lib/src/metta/runner/pkg_mgmt/git_cache.rs | 25 ++++++++- lib/src/metta/runner/stdlib.rs | 62 ++++++++++++++++++++++ lib/src/metta/runner/stdlib_minimal.rs | 2 + 5 files changed, 104 insertions(+), 4 deletions(-) diff --git a/lib/src/metta/runner/modules/mod.rs b/lib/src/metta/runner/modules/mod.rs index 42cd60dcd..e4fa0e1a0 100644 --- a/lib/src/metta/runner/modules/mod.rs +++ b/lib/src/metta/runner/modules/mod.rs @@ -16,7 +16,7 @@ use super::interpreter_minimal::interpret; use super::stdlib_minimal::*; mod mod_names; -pub(crate) use mod_names::{ModNameNode, mod_name_from_path, normalize_relative_module_name, module_name_is_legal, mod_name_remove_prefix, decompose_name_path, compose_name_path, ModNameNodeDisplayWrapper}; +pub(crate) use mod_names::{ModNameNode, mod_name_from_path, normalize_relative_module_name, module_name_is_legal, module_name_make_legal, mod_name_remove_prefix, decompose_name_path, compose_name_path, ModNameNodeDisplayWrapper}; pub use mod_names::{TOP_MOD_NAME, SELF_MOD_NAME, MOD_NAME_SEPARATOR}; /// A reference to a [MettaMod] that is loaded into a [Metta] runner diff --git a/lib/src/metta/runner/modules/mod_names.rs b/lib/src/metta/runner/modules/mod_names.rs index 13289dd88..673b757f9 100644 --- a/lib/src/metta/runner/modules/mod_names.rs +++ b/lib/src/metta/runner/modules/mod_names.rs @@ -568,7 +568,7 @@ impl std::fmt::Display for ModNameNode { /// Returns `true` if a str is a legal name for a module /// /// A module name must be an ascii string, containing only alpha-numeric characters plus [`_`, `-`] -pub(crate) fn module_name_is_legal(name: &str) -> bool { +pub fn module_name_is_legal(name: &str) -> bool { for the_char in name.chars() { if !the_char.is_ascii() { return false; @@ -582,6 +582,21 @@ pub(crate) fn module_name_is_legal(name: &str) -> bool { return true; } +/// Returns a legal module name composed from the supplied string, by removing or substituting +/// all illlegal characters. Returns None if that isn't possible +pub fn module_name_make_legal(name: &str) -> Option { + let new_name: String = name.chars().filter(|&the_char| { + the_char.is_ascii_alphanumeric() || + the_char == '-' || + the_char != '_' + }).collect(); + if new_name.len() > 0 { + Some(new_name) + } else { + None + } +} + /// This test is narrowly focussed on the module namespace path parsing behavior implemented in /// [ModNameNode], but it does not test any module operations #[test] diff --git a/lib/src/metta/runner/pkg_mgmt/git_cache.rs b/lib/src/metta/runner/pkg_mgmt/git_cache.rs index 33df567cd..c61a07756 100644 --- a/lib/src/metta/runner/pkg_mgmt/git_cache.rs +++ b/lib/src/metta/runner/pkg_mgmt/git_cache.rs @@ -12,12 +12,14 @@ use xxhash_rust::xxh3::xxh3_64; use git2::{*, build::*}; use crate::metta::runner::environment::Environment; +use crate::metta::runner::modules::module_name_make_legal; /// Indicates the desired behavior for updating the locally-cached repo #[derive(Clone, Copy, Debug, PartialEq, Eq)] pub enum UpdateMode { PullIfMissing, PullLatest, + TryPullLatest, } pub struct CachedModule { @@ -67,9 +69,16 @@ impl CachedModule { Ok(repo) => { //Do a `git pull` to bring it up to date - if mode == UpdateMode::PullLatest { + if mode == UpdateMode::PullLatest || mode == UpdateMode::TryPullLatest { let mut remote = repo.find_remote("origin").map_err(|e| format!("Failed find 'origin' in git repo: {}, {}", self.url, e))?; - remote.connect(Direction::Fetch).map_err(|e| format!("Failed to connect to origin repo: {}, {}", self.url, e))?; + match remote.connect(Direction::Fetch) { + Ok(_) => {}, + Err(e) => { + if mode == UpdateMode::PullLatest { + return Err(format!("Failed to connect to origin repo: {}, {}", self.url, e)) + } + } + } let branch = self.get_branch(&remote)?; remote.fetch(&[&branch], None, None).map_err(|e| format!("Failed fetch updates to git repo: {}, {}", self.url, e))?; @@ -145,3 +154,15 @@ impl CachedModule { &self.local_path } } + +/// Extracts the module name from a `.git` URL +/// +/// For example, `https://github.com/trueagi-io/hyperon-experimental.git` would be parsed +/// into "hyperon-experimental". Returns None if the form of the URL isn't recognized +pub fn mod_name_from_url(url: &str) -> Option { + let without_ending = url.trim_end_matches(".git") + .trim_end_matches("/"); + let without_mod_name = without_ending.trim_end_matches(|c| c != '/'); + let mod_name = &without_ending[without_mod_name.len()..]; + module_name_make_legal(mod_name) +} \ No newline at end of file diff --git a/lib/src/metta/runner/stdlib.rs b/lib/src/metta/runner/stdlib.rs index 0d1e64ce1..4758c3d49 100644 --- a/lib/src/metta/runner/stdlib.rs +++ b/lib/src/metta/runner/stdlib.rs @@ -5,6 +5,7 @@ use crate::metta::*; use crate::metta::text::Tokenizer; use crate::metta::text::SExprParser; use crate::metta::runner::{Metta, RunContext, ModuleLoader, ResourceKey}; +use crate::metta::runner::git_cache::{CachedModule, UpdateMode, mod_name_from_url}; use crate::metta::types::{get_atom_types, get_meta_type}; use crate::common::shared::Shared; use crate::common::CachingMapper; @@ -321,6 +322,65 @@ impl Grounded for RegisterModuleOp { } } +/// Provides access to module in a remote git repo, from within MeTTa code +/// Similar to `register-module!`, this op will bypass the catalog search +#[derive(Clone, Debug)] +pub struct GitModuleOp { + metta: Metta +} + +impl PartialEq for GitModuleOp { + fn eq(&self, _other: &Self) -> bool { true } +} + +impl GitModuleOp { + pub fn new(metta: Metta) -> Self { + Self{ metta } + } +} + +impl Display for GitModuleOp { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "git-module!") + } +} + +impl Grounded for GitModuleOp { + fn type_(&self) -> Atom { + Atom::expr([ARROW_SYMBOL, ATOM_TYPE_ATOM, UNIT_TYPE()]) + } + + fn execute(&self, args: &[Atom]) -> Result, ExecError> { + let arg_error = "git-module! expects a URL; use quotes if needed"; + let url_arg_atom = args.get(0).ok_or_else(|| ExecError::from(arg_error))?; + // TODO: When we figure out how to address varargs, it will be nice to take an optional branch name + + // TODO: replace Symbol by grounded String? + let url = match url_arg_atom { + Atom::Symbol(url_arg) => url_arg.name(), + _ => return Err(arg_error.into()) + }; + let url = strip_quotes(url); + + // TODO: Depending on what we do with `register-module!`, we might want to let the + // caller provide an optional mod_name here too, rather than extracting it from the url + let mod_name = match mod_name_from_url(url) { + Some(mod_name) => mod_name, + None => return Err(ExecError::from("git-module! error extracting module name from URL")) + }; + + let cached_mod = CachedModule::new(self.metta.environment(), None, &mod_name, url, url, None)?; + cached_mod.update(UpdateMode::TryPullLatest)?; + self.metta.load_module_at_path(cached_mod.local_path(), Some(&mod_name)).map_err(|e| ExecError::from(e))?; + + unit_result() + } + + fn match_(&self, other: &Atom) -> MatchResultIter { + match_by_equality(self, other) + } +} + /// This operation prints the modules loaded from the top of the runner /// /// NOTE: This is a temporary stop-gap to help MeTTa users inspect which modules they have loaded and @@ -1584,6 +1644,8 @@ mod non_minimal_only_stdlib { tref.register_token(regex(r"get-metatype"), move |_| { get_meta_type_op.clone() }); let register_module_op = Atom::gnd(RegisterModuleOp::new(metta.clone())); tref.register_token(regex(r"register-module!"), move |_| { register_module_op.clone() }); + let git_module_op = Atom::gnd(GitModuleOp::new(metta.clone())); + tref.register_token(regex(r"git-module!"), move |_| { git_module_op.clone() }); let mod_space_op = Atom::gnd(ModSpaceOp::new(metta.clone())); tref.register_token(regex(r"mod-space!"), move |_| { mod_space_op.clone() }); let print_mods_op = Atom::gnd(PrintModsOp::new(metta.clone())); diff --git a/lib/src/metta/runner/stdlib_minimal.rs b/lib/src/metta/runner/stdlib_minimal.rs index c64af4637..bdf2d3daf 100644 --- a/lib/src/metta/runner/stdlib_minimal.rs +++ b/lib/src/metta/runner/stdlib_minimal.rs @@ -461,6 +461,8 @@ pub fn register_common_tokens(tref: &mut Tokenizer, _tokenizer: Shared Date: Mon, 15 Apr 2024 21:02:20 -0700 Subject: [PATCH 04/77] Adding time-stamps and `TryPullIfOlderThan` `UpdateMode` so that I can periodically re-fectch without incurring unneeded network traffic --- lib/src/metta/runner/environment.rs | 19 ++++- lib/src/metta/runner/pkg_mgmt/catalog.rs | 22 +++++- lib/src/metta/runner/pkg_mgmt/git_cache.rs | 92 +++++++++++++++------- lib/src/metta/runner/stdlib.rs | 6 +- 4 files changed, 100 insertions(+), 39 deletions(-) diff --git a/lib/src/metta/runner/environment.rs b/lib/src/metta/runner/environment.rs index 7f55959ac..d9668036d 100644 --- a/lib/src/metta/runner/environment.rs +++ b/lib/src/metta/runner/environment.rs @@ -16,6 +16,7 @@ use directories::ProjectDirs; #[derive(Debug)] pub struct Environment { config_dir: Option, + caches_dir: Option, init_metta_path: Option, working_dir: Option, is_test: bool, @@ -41,12 +42,20 @@ impl Environment { COMMON_ENV.get_or_init(|| Arc::new(EnvBuilder::new().build())).clone() } - /// Returns the Path to the config dir, in an OS-specific location + /// Returns the [Path] to the config dir, in an OS-specific location pub fn config_dir(&self) -> Option<&Path> { self.config_dir.as_deref() } - /// Returns the Path to the environment's working_dir + /// Returns the [Path] to a directory where the MeTTa runner can put persistent caches + /// + /// NOTE: Currently the `caches_dir` dir is within `cfg_dir`, but there may be a reason + /// to move it in the future. + pub fn caches_dir(&self) -> Option<&Path> { + self.caches_dir.as_deref() + } + + /// Returns the [Path] to the environment's working_dir /// /// NOTE: The Environment's working_dir is not the same as the process working directory, and /// changing the process's working directory will not affect the environment @@ -75,6 +84,7 @@ impl Environment { fn new() -> Self { Self { config_dir: None, + caches_dir: None, init_metta_path: None, working_dir: std::env::current_dir().ok(), is_test: false, @@ -260,7 +270,10 @@ impl EnvBuilder { if env.config_dir.is_none() { match ProjectDirs::from("io", "TrueAGI", "metta") { Some(proj_dirs) => { - env.config_dir = Some(proj_dirs.config_dir().into()); + let cfg_dir: PathBuf = proj_dirs.config_dir().into(); + let caches_dir = cfg_dir.join("caches"); + env.config_dir = Some(cfg_dir); + env.caches_dir = Some(caches_dir); }, None => { eprint!("Failed to initialize config with OS config directory!"); diff --git a/lib/src/metta/runner/pkg_mgmt/catalog.rs b/lib/src/metta/runner/pkg_mgmt/catalog.rs index ce2c51268..1c46b23c6 100644 --- a/lib/src/metta/runner/pkg_mgmt/catalog.rs +++ b/lib/src/metta/runner/pkg_mgmt/catalog.rs @@ -175,7 +175,7 @@ impl PkgInfo { //If a git URL is specified in the dep entry, see if we have it in the git-cache and // clone it locally if we don't if let Some(url) = &entry.git_url { - let cached_mod = CachedModule::new(context.metta.environment(), None, mod_name, url, url, entry.git_branch.as_ref().map(|s| s.as_str()))?; + let cached_mod = CachedRepo::new(context.metta.environment(), None, mod_name, url, url, entry.git_branch.as_ref().map(|s| s.as_str()))?; cached_mod.update(UpdateMode::PullIfMissing)?; return loader_for_module_at_path(&context.metta, cached_mod.local_path(), Some(mod_name), context.module().resource_dir()); } @@ -557,6 +557,22 @@ impl ModuleDescriptor { } } +/// Extracts the module name from a `.git` URL +/// +/// For example, `https://github.com/trueagi-io/hyperon-experimental.git` would be parsed +/// into "hyperon-experimental". Returns None if the form of the URL isn't recognized +pub fn mod_name_from_url(url: &str) -> Option { + let without_ending = url.trim_end_matches("/") + .trim_end_matches(".git"); + let without_mod_name = without_ending.trim_end_matches(|c| c != '/'); + let mod_name = &without_ending[without_mod_name.len()..]; + module_name_make_legal(mod_name) +} + +//-=-+-=-=-+-=-=-+-=-=-+-=-=-+-=-=-+-=-=-+-=-=-+-=-=-+-=-=-+-=-=-+-=-=-+-=-=-+-=-=-+-=-=-+-=-=-+- +// TESTS +//-=-+-=-=-+-=-=-+-=-=-+-=-=-+-=-=-+-=-=-+-=-=-+-=-=-+-=-=-+-=-=-+-=-=-+-=-=-+-=-=-+-=-=-+-=-=-+- + /// Bogus test catalog that returns a fake module in response to any query with a single capital letter /// used by `recursive_submodule_import_test` #[derive(Debug)] @@ -643,8 +659,8 @@ impl ModuleLoader for TestLoader { #[test] fn git_pkginfo_fetch_test() { - //Make a new runner, with the working dir in `/tmp/hyperon-test/` - let runner = Metta::new(Some(EnvBuilder::test_env().set_working_dir(Some(Path::new("/tmp/hyperon-test/"))))); + //Make a new runner, with the config dir in `/tmp/hyperon-test/` + let runner = Metta::new(Some(EnvBuilder::test_env().set_config_dir(Path::new("/tmp/hyperon-test/")))); let _mod_id = runner.load_module_direct(Box::new(TestLoader), "test-mod").unwrap(); let result = runner.run(SExprParser::new("!(import! &self test-mod:metta-morph:mettamorph)")); diff --git a/lib/src/metta/runner/pkg_mgmt/git_cache.rs b/lib/src/metta/runner/pkg_mgmt/git_cache.rs index c61a07756..a79bc0294 100644 --- a/lib/src/metta/runner/pkg_mgmt/git_cache.rs +++ b/lib/src/metta/runner/pkg_mgmt/git_cache.rs @@ -1,5 +1,5 @@ -//! Manages a local cache of modules cloned from git +//! Manages a local cache of cloned git repos //! //! Currently all network activity is synchronous. At some point it makes sense to move //! to async in order to parallelize downloads, etc. When that time comes I would like @@ -7,54 +7,65 @@ //! use std::path::{Path, PathBuf}; +use std::time::{SystemTime, Duration, UNIX_EPOCH}; +use std::fs::{File, read_to_string}; +use std::io::prelude::*; use xxhash_rust::xxh3::xxh3_64; use git2::{*, build::*}; use crate::metta::runner::environment::Environment; -use crate::metta::runner::modules::module_name_make_legal; + +const TIMESTAMP_FILENAME: &'static str = "_timestamp_"; /// Indicates the desired behavior for updating the locally-cached repo #[derive(Clone, Copy, Debug, PartialEq, Eq)] pub enum UpdateMode { + /// Clones the repo if it doesn't exist, otherwise leaves it alone PullIfMissing, + /// Pulls the latest from the remote repo. Fails if the remote is unavailable PullLatest, + /// Attempts to pull from the remote repo. Continues with the existing repo if + /// the remote is unavailable TryPullLatest, + /// Attempts to pull from the remote repo is the local cache is older than the + /// specified number of seconds. Otherwise continues with the repo on the disk + TryPullIfOlderThan(u64) } -pub struct CachedModule { - _mod_name: String, +pub struct CachedRepo { + _name: String, url: String, branch: Option, local_path: PathBuf, } -impl CachedModule { - /// Initializes a new CachedModule object +impl CachedRepo { + /// Initializes a new CachedRepo object /// /// * `cache_name` - A name to describe the cache. For the default cache for URLs specified /// from the [PkgInfo], the cache is named `git-modules` - /// * `mod_name` - The catalog name of the module in the cache - /// * `ident_str` - An ascii string that identifies the specific module among other verions - /// of the module. - /// * `url` - The URL from which to fetch the module + /// * `name` - The name of this repo within in the cache. Often equal to the catalog name of a module + /// * `ident_str` - An ascii string that identifies the specific repo among other verions. + /// For example this could be a version, for a MeTTa module catalog cache. + /// * `url` - The remote URL from which to fetch the repo /// * `branch` - The branch to use, or default if None - pub fn new(env: &Environment, cache_name: Option<&str>, mod_name: &str, ident_str: &str, url: &str, branch: Option<&str>) -> Result { + pub fn new(env: &Environment, cache_name: Option<&str>, name: &str, ident_str: &str, url: &str, branch: Option<&str>) -> Result { let cache_name = cache_name.unwrap_or("git-modules"); - let working_dir = env.working_dir().ok_or_else(|| "Unable to clone git repository; no local working directory available".to_string())?; + let caches_dir = env.caches_dir().ok_or_else(|| "Unable to clone git repository; no local \"caches\" directory available".to_string())?; let branch_str = match &branch { Some(s) => s, None => "" }; let unique_id = xxh3_64(format!("{}{}", ident_str, branch_str).as_bytes()); - let local_filename = format!("{mod_name}.{unique_id:16x}"); - let local_path = working_dir.join(cache_name).join(local_filename); + let local_filename = format!("{name}.{unique_id:016x}"); + let local_path = caches_dir.join(cache_name).join(local_filename); std::fs::create_dir_all(&local_path).map_err(|e| e.to_string())?; Ok(Self { - _mod_name: mod_name.to_string(), + _name: name.to_string(), url: url.to_string(), branch: branch.map(|s| s.to_owned()), local_path, @@ -69,7 +80,7 @@ impl CachedModule { Ok(repo) => { //Do a `git pull` to bring it up to date - if mode == UpdateMode::PullLatest || mode == UpdateMode::TryPullLatest { + if mode == UpdateMode::PullLatest || mode == UpdateMode::TryPullLatest || self.check_timestamp(mode) { let mut remote = repo.find_remote("origin").map_err(|e| format!("Failed find 'origin' in git repo: {}, {}", self.url, e))?; match remote.connect(Direction::Fetch) { Ok(_) => {}, @@ -85,6 +96,7 @@ impl CachedModule { let fetch_head = repo.find_reference("FETCH_HEAD").map_err(|e| e.to_string())?; self.merge(&repo, &branch, &fetch_head).map_err(|e| format!("Failed to merge remote git repo: {}, {}", self.url, e))?; + self.write_timestamp_file()?; } Ok(()) }, @@ -99,7 +111,10 @@ impl CachedModule { None => {} } match repo_builder.clone(&self.url, &self.local_path) { - Ok(_repo) => Ok(()), + Ok(_repo) => { + self.write_timestamp_file()?; + Ok(()) + }, Err(e) => Err(format!("Failed to clone git repo: {}, {}", self.url, e)), } }, @@ -130,8 +145,9 @@ impl CachedModule { reference.set_target(annotated_commit.id(), "Fast-forward")?; repo.checkout_head(None)?; } else { + panic!("Fatal Error: cached git repository at \"{}\" appears to be corrupt", self.local_path.display()); //NOTE: the below code appears to work, but it isn't needed at the moment - unreachable!(); + // // // Normal merge... // let head_commit = repo.head()?.peel_to_commit()?; // let incomming_commit = Reference::peel_to_commit(incomming_commit_ref)?; @@ -149,20 +165,36 @@ impl CachedModule { Ok(()) } + /// Internal function to write the timestamp file, with the value of "now" + fn write_timestamp_file(&self) -> Result<(), String> { + let duration_since_epoch = SystemTime::now().duration_since(UNIX_EPOCH).unwrap(); + let file_path = self.local_path.join(TIMESTAMP_FILENAME); + let mut file = File::create(&file_path).map_err(|e| format!("Error creating timestamp file at {}, {e}", file_path.display()))?; + file.write_all(&format!("{:016x}", duration_since_epoch.as_secs()).into_bytes()) + .map_err(|e| format!("Error writing file: {}, {e}", file_path.display())) + } + + /// Returns `true` if `mode == TryPullIfOlderThan`, and the timestamp file indicates + /// that amount of time has elapsed. Otherwise returns `false` + fn check_timestamp(&self, mode: UpdateMode) -> bool { + match mode { + UpdateMode::TryPullIfOlderThan(secs) => { + let file_path = self.local_path.join(TIMESTAMP_FILENAME); + match read_to_string(&file_path) { + Ok(file_contents) => { + let val = u64::from_str_radix(&file_contents, 16).unwrap(); + let timestamp_time = UNIX_EPOCH.checked_add(Duration::from_secs(val)).unwrap(); + timestamp_time.elapsed().unwrap().as_secs() > secs + }, + _ => true //No timestamp file means we should pull + } + }, + _ => false, + } + } + /// Returns the file system path for the locally cloned repository pub fn local_path(&self) -> &Path { &self.local_path } } - -/// Extracts the module name from a `.git` URL -/// -/// For example, `https://github.com/trueagi-io/hyperon-experimental.git` would be parsed -/// into "hyperon-experimental". Returns None if the form of the URL isn't recognized -pub fn mod_name_from_url(url: &str) -> Option { - let without_ending = url.trim_end_matches(".git") - .trim_end_matches("/"); - let without_mod_name = without_ending.trim_end_matches(|c| c != '/'); - let mod_name = &without_ending[without_mod_name.len()..]; - module_name_make_legal(mod_name) -} \ No newline at end of file diff --git a/lib/src/metta/runner/stdlib.rs b/lib/src/metta/runner/stdlib.rs index 4758c3d49..64efd9d4e 100644 --- a/lib/src/metta/runner/stdlib.rs +++ b/lib/src/metta/runner/stdlib.rs @@ -4,8 +4,8 @@ use crate::space::*; use crate::metta::*; use crate::metta::text::Tokenizer; use crate::metta::text::SExprParser; -use crate::metta::runner::{Metta, RunContext, ModuleLoader, ResourceKey}; -use crate::metta::runner::git_cache::{CachedModule, UpdateMode, mod_name_from_url}; +use crate::metta::runner::{Metta, RunContext, ModuleLoader, ResourceKey, mod_name_from_url}; +use crate::metta::runner::git_cache::{CachedRepo, UpdateMode}; use crate::metta::types::{get_atom_types, get_meta_type}; use crate::common::shared::Shared; use crate::common::CachingMapper; @@ -369,7 +369,7 @@ impl Grounded for GitModuleOp { None => return Err(ExecError::from("git-module! error extracting module name from URL")) }; - let cached_mod = CachedModule::new(self.metta.environment(), None, &mod_name, url, url, None)?; + let cached_mod = CachedRepo::new(self.metta.environment(), None, &mod_name, url, url, None)?; cached_mod.update(UpdateMode::TryPullLatest)?; self.metta.load_module_at_path(cached_mod.local_path(), Some(&mod_name)).map_err(|e| ExecError::from(e))?; From 2a6232566f078f0976fd8ab8b42ac5af458ffeb7 Mon Sep 17 00:00:00 2001 From: Luke Peterson Date: Tue, 16 Apr 2024 19:15:39 -0700 Subject: [PATCH 05/77] Updating CMake files to link downstream dependencies when using the static hyperonc lib --- c/CMakeLists.txt | 6 ++++++ c/tests/CMakeLists.txt | 10 +++++----- lib/Cargo.toml | 7 +++++-- lib/src/metta/runner/pkg_mgmt/mod.rs | 7 +++++++ python/CMakeLists.txt | 6 +++++- 5 files changed, 28 insertions(+), 8 deletions(-) create mode 100644 lib/src/metta/runner/pkg_mgmt/mod.rs diff --git a/c/CMakeLists.txt b/c/CMakeLists.txt index 0e94587c9..adf7f77d9 100644 --- a/c/CMakeLists.txt +++ b/c/CMakeLists.txt @@ -61,6 +61,12 @@ set_target_properties(hyperonc-static PROPERTIES ) add_dependencies(hyperonc-static build-hyperonc) +find_library(GIT2_LIBRARY NAMES git2) +find_library(ICONV_LIBRARY NAMES iconv) +find_library(Z_LIBRARY NAMES z) +add_library(hyperonc-static-interface INTERFACE) +target_link_libraries(hyperonc-static-interface INTERFACE hyperonc-static ${GIT2_LIBRARY} ${ICONV_LIBRARY} ${Z_LIBRARY}) + add_subdirectory(tests) set(BINARY_INSTALL_PATH "lib/hyperonc") diff --git a/c/tests/CMakeLists.txt b/c/tests/CMakeLists.txt index 35601fad1..230f5bddd 100644 --- a/c/tests/CMakeLists.txt +++ b/c/tests/CMakeLists.txt @@ -5,21 +5,21 @@ set(TEST_SOURCES util.c ) add_executable(check_atom check_atom.c ${TEST_SOURCES}) -target_link_libraries(check_atom hyperonc-static CONAN_PKG::libcheck) +target_link_libraries(check_atom hyperonc-static-interface CONAN_PKG::libcheck) add_test(NAME check_atom COMMAND check_atom) add_executable(check_space check_space.c ${TEST_SOURCES}) -target_link_libraries(check_space hyperonc-static CONAN_PKG::libcheck) +target_link_libraries(check_space hyperonc-static-interface CONAN_PKG::libcheck) add_test(NAME check_space COMMAND check_space) add_executable(check_sexpr_parser check_sexpr_parser.c ${TEST_SOURCES}) -target_link_libraries(check_sexpr_parser hyperonc-static CONAN_PKG::libcheck) +target_link_libraries(check_sexpr_parser hyperonc-static-interface CONAN_PKG::libcheck) add_test(NAME check_sexpr_parser COMMAND check_sexpr_parser) add_executable(check_types check_types.c ${TEST_SOURCES}) -target_link_libraries(check_types hyperonc-static CONAN_PKG::libcheck) +target_link_libraries(check_types hyperonc-static-interface CONAN_PKG::libcheck) add_test(NAME check_types COMMAND check_types) add_executable(check_runner check_runner.c ${TEST_SOURCES}) -target_link_libraries(check_runner hyperonc-static CONAN_PKG::libcheck) +target_link_libraries(check_runner hyperonc-static-interface CONAN_PKG::libcheck) add_test(NAME check_runner COMMAND check_runner) diff --git a/lib/Cargo.toml b/lib/Cargo.toml index 291a5442e..0738db290 100644 --- a/lib/Cargo.toml +++ b/lib/Cargo.toml @@ -10,12 +10,15 @@ env_logger = { workspace = true } directories = "5.0.1" # For Environment to find platform-specific config location smallvec = "1.10.0" im = "15.1.0" -xxhash-rust = {version="0.8.7", features=["xxh3"], optional=true} -git2 = {version="0.18.3", optional=true} rand = "0.8.5" bitset = "0.1.2" dyn-fmt = "0.4.0" +# pkg_mgmt deps +xxhash-rust = {version="0.8.7", features=["xxh3"], optional=true} +#NOTE: we'll need to link libssl and/or libcrypto in cmake if we re-enable ssl or https for git2 +git2 = {version="0.18.3", default-features=false, optional=true} + [lib] name = "hyperon" path = "src/lib.rs" diff --git a/lib/src/metta/runner/pkg_mgmt/mod.rs b/lib/src/metta/runner/pkg_mgmt/mod.rs new file mode 100644 index 000000000..94689a242 --- /dev/null +++ b/lib/src/metta/runner/pkg_mgmt/mod.rs @@ -0,0 +1,7 @@ + +mod catalog; +pub use catalog::*; + +pub(crate) mod git_cache; + +// pub(crate) mod git_catalog; \ No newline at end of file diff --git a/python/CMakeLists.txt b/python/CMakeLists.txt index ba2003d1a..19e2d23f0 100644 --- a/python/CMakeLists.txt +++ b/python/CMakeLists.txt @@ -52,8 +52,12 @@ include_directories(${nonstd_INCLUDE_DIRS}) find_package(hyperonc REQUIRED HINTS ${HYPERONC_INSTALL_PREFIX}) include_directories(${hyperonc_INCLUDE_DIRS}) +find_library(GIT2_LIBRARY NAMES git2) +find_library(ICONV_LIBRARY NAMES iconv) +find_library(Z_LIBRARY NAMES z) + pybind11_add_module(hyperonpy MODULE ./hyperonpy.cpp) -target_link_libraries(hyperonpy PRIVATE "${hyperonc_STATIC_LIBRARY}") +target_link_libraries(hyperonpy PRIVATE "${hyperonc_STATIC_LIBRARY}" ${GIT2_LIBRARY} ${ICONV_LIBRARY} ${Z_LIBRARY}) set(PYTHONPATH "${CMAKE_CURRENT_SOURCE_DIR}") add_subdirectory(tests) From 66a8d20378ca7a9204d353d6810eeef95eb26bb4 Mon Sep 17 00:00:00 2001 From: Luke Peterson Date: Tue, 16 Apr 2024 21:44:09 -0700 Subject: [PATCH 06/77] Linking static binary with openssl and libcrypto --- c/CMakeLists.txt | 5 ++++- lib/Cargo.toml | 5 +++-- python/CMakeLists.txt | 4 +++- 3 files changed, 10 insertions(+), 4 deletions(-) diff --git a/c/CMakeLists.txt b/c/CMakeLists.txt index adf7f77d9..1d80827e8 100644 --- a/c/CMakeLists.txt +++ b/c/CMakeLists.txt @@ -61,11 +61,14 @@ set_target_properties(hyperonc-static PROPERTIES ) add_dependencies(hyperonc-static build-hyperonc) +#NOTE: All these dependencies are needed to statically link the git2 functionality find_library(GIT2_LIBRARY NAMES git2) find_library(ICONV_LIBRARY NAMES iconv) find_library(Z_LIBRARY NAMES z) +find_library(CRYPTO_LIBRARY NAMES crypto) +find_library(SSL_LIBRARY NAMES ssl) add_library(hyperonc-static-interface INTERFACE) -target_link_libraries(hyperonc-static-interface INTERFACE hyperonc-static ${GIT2_LIBRARY} ${ICONV_LIBRARY} ${Z_LIBRARY}) +target_link_libraries(hyperonc-static-interface INTERFACE hyperonc-static ${GIT2_LIBRARY} ${ICONV_LIBRARY} ${Z_LIBRARY} ${SSL_LIBRARY} ${CRYPTO_LIBRARY}) add_subdirectory(tests) diff --git a/lib/Cargo.toml b/lib/Cargo.toml index 0738db290..b9b3593c1 100644 --- a/lib/Cargo.toml +++ b/lib/Cargo.toml @@ -16,8 +16,9 @@ dyn-fmt = "0.4.0" # pkg_mgmt deps xxhash-rust = {version="0.8.7", features=["xxh3"], optional=true} -#NOTE: we'll need to link libssl and/or libcrypto in cmake if we re-enable ssl or https for git2 -git2 = {version="0.18.3", default-features=false, optional=true} +#TODO: vendored-openssl is a lot slower to build, but static linking throws warnings +#git2 = {version="0.18.3", default-features=false, features=["vendored-openssl"], optional=true} +git2 = {version="0.18.3", optional=true} [lib] name = "hyperon" diff --git a/python/CMakeLists.txt b/python/CMakeLists.txt index 19e2d23f0..8c71e1498 100644 --- a/python/CMakeLists.txt +++ b/python/CMakeLists.txt @@ -55,9 +55,11 @@ include_directories(${hyperonc_INCLUDE_DIRS}) find_library(GIT2_LIBRARY NAMES git2) find_library(ICONV_LIBRARY NAMES iconv) find_library(Z_LIBRARY NAMES z) +find_library(CRYPTO_LIBRARY NAMES crypto) +find_library(SSL_LIBRARY NAMES ssl) pybind11_add_module(hyperonpy MODULE ./hyperonpy.cpp) -target_link_libraries(hyperonpy PRIVATE "${hyperonc_STATIC_LIBRARY}" ${GIT2_LIBRARY} ${ICONV_LIBRARY} ${Z_LIBRARY}) +target_link_libraries(hyperonpy PRIVATE "${hyperonc_STATIC_LIBRARY}" ${GIT2_LIBRARY} ${ICONV_LIBRARY} ${Z_LIBRARY} ${SSL_LIBRARY} ${CRYPTO_LIBRARY}) set(PYTHONPATH "${CMAKE_CURRENT_SOURCE_DIR}") add_subdirectory(tests) From 86996059c3050221cac6427580903e75d9abc2f5 Mon Sep 17 00:00:00 2001 From: Luke Peterson Date: Wed, 17 Apr 2024 10:36:19 -0700 Subject: [PATCH 07/77] Adding GitCatalog and fixing a handful of bugs surrounding git_cache and the Environment --- lib/Cargo.toml | 6 +- lib/src/metta/runner/environment.rs | 17 +++- lib/src/metta/runner/mod.rs | 2 +- lib/src/metta/runner/pkg_mgmt/catalog.rs | 11 ++- lib/src/metta/runner/pkg_mgmt/git_cache.rs | 48 ++++++---- lib/src/metta/runner/pkg_mgmt/git_catalog.rs | 94 ++++++++++++++++++++ lib/src/metta/runner/pkg_mgmt/mod.rs | 2 +- 7 files changed, 151 insertions(+), 29 deletions(-) create mode 100644 lib/src/metta/runner/pkg_mgmt/git_catalog.rs diff --git a/lib/Cargo.toml b/lib/Cargo.toml index b9b3593c1..5cfcfbdf5 100644 --- a/lib/Cargo.toml +++ b/lib/Cargo.toml @@ -18,7 +18,9 @@ dyn-fmt = "0.4.0" xxhash-rust = {version="0.8.7", features=["xxh3"], optional=true} #TODO: vendored-openssl is a lot slower to build, but static linking throws warnings #git2 = {version="0.18.3", default-features=false, features=["vendored-openssl"], optional=true} -git2 = {version="0.18.3", optional=true} +git2 = { version="0.18.3", optional=true} +serde = { version="1.0.198", features = ["derive"], optional=true } +serde_json = { version="1.0.116", optional=true } [lib] name = "hyperon" @@ -32,4 +34,4 @@ default = ["pkg_mgmt"] minimal = [] # enables minimal MeTTa interpreter variable_operation = [] # enables evaluation of the expressions which have # a variable on the first position -pkg_mgmt = ["xxhash-rust", "git2"] +pkg_mgmt = ["xxhash-rust", "git2", "serde", "serde_json"] diff --git a/lib/src/metta/runner/environment.rs b/lib/src/metta/runner/environment.rs index d9668036d..36ceb48ae 100644 --- a/lib/src/metta/runner/environment.rs +++ b/lib/src/metta/runner/environment.rs @@ -5,7 +5,7 @@ use std::fs; use std::sync::Arc; #[cfg(feature = "pkg_mgmt")] -use crate::metta::runner::pkg_mgmt::{ModuleCatalog, DirCatalog, FsModuleFormat, SingleFileModuleFmt, DirModuleFmt}; +use crate::metta::runner::pkg_mgmt::{ModuleCatalog, DirCatalog, FsModuleFormat, SingleFileModuleFmt, DirModuleFmt, git_catalog::*}; use directories::ProjectDirs; @@ -271,9 +271,7 @@ impl EnvBuilder { match ProjectDirs::from("io", "TrueAGI", "metta") { Some(proj_dirs) => { let cfg_dir: PathBuf = proj_dirs.config_dir().into(); - let caches_dir = cfg_dir.join("caches"); env.config_dir = Some(cfg_dir); - env.caches_dir = Some(caches_dir); }, None => { eprint!("Failed to initialize config with OS config directory!"); @@ -281,6 +279,8 @@ impl EnvBuilder { } } } + // Set the caches dir within the config dir. We may want to move it elsewhere in the future + env.caches_dir = env.config_dir.as_ref().map(|cfg_dir| cfg_dir.join("caches")); if let Some(config_dir) = &env.config_dir { @@ -310,7 +310,7 @@ impl EnvBuilder { env.config_dir = None; } - //Push the "modules" dir, as the last place to search after the other paths that were specified + //Push the "modules" dir, to search after the other paths that were specified //TODO: the config.metta file should be able to append / modify the catalogs, and can choose not to // include the "modules" dir in the future. #[cfg(feature = "pkg_mgmt")] @@ -318,6 +318,15 @@ impl EnvBuilder { proto_catalogs.push(ProtoCatalog::Path(modules_dir)); } + //Search the remote git-based catalog + #[cfg(feature = "pkg_mgmt")] + { + //TODO: Catalog should be moved to trueagi, and catalog settings should come from config + //let refresh_time = 259200; //3 days = 3 days * 24 hrs * 60 minutes * 60 seconds + let refresh_time = 60; //GOAT + proto_catalogs.push(ProtoCatalog::Other(Box::new(GitCatalog::new(&env, "luketpeterson-catalog", "https://github.com/luketpeterson/metta-mod-catalog.git", refresh_time).unwrap()))); + } + if init_metta_path.exists() { env.init_metta_path = Some(init_metta_path); } diff --git a/lib/src/metta/runner/mod.rs b/lib/src/metta/runner/mod.rs index 7da3f8975..f6d56901d 100644 --- a/lib/src/metta/runner/mod.rs +++ b/lib/src/metta/runner/mod.rs @@ -113,7 +113,7 @@ impl PartialEq for Metta { } #[derive(Debug)] -pub struct MettaContents { +pub(crate) struct MettaContents { /// All the runner's loaded modules modules: Mutex>>, /// A tree to locate loaded mods by name diff --git a/lib/src/metta/runner/pkg_mgmt/catalog.rs b/lib/src/metta/runner/pkg_mgmt/catalog.rs index 1c46b23c6..e5627771f 100644 --- a/lib/src/metta/runner/pkg_mgmt/catalog.rs +++ b/lib/src/metta/runner/pkg_mgmt/catalog.rs @@ -3,7 +3,7 @@ //! //! ## Behavior of Module Resolution //! -//! ```ignore +//! ```text //! ┌────────────────────┐ ⎽⎼⎻⎺ ⎺⎺⎺ ⎺⎻⎼⎽ ⎽⎼⎻⎺ ⎺⎺⎺ ⎺⎻⎼⎽ //! ╱ ╱ ⎽⎼⎻⎺ pkg-info in ⎺⎻⎼⎽ Yes ⎽⎼⎻⎺pkg-info entry ⎺⎻⎼⎽ No //! ╱ (import!) ╱─────►< &self has entry for >─────►< has fs_path attrib? >───┐ @@ -534,6 +534,10 @@ impl ModuleDescriptor { pub fn new_with_uid(name: String, uid: u64) -> Self { Self { name, uid: Some(uid) } } + pub fn new_with_ident_bytes_and_fmt_id(name: String, ident: &[u8], fmt_id: u64) -> Self { + let uid = xxh3_64(ident) ^ fmt_id; + ModuleDescriptor::new_with_uid(name, uid) + } /// Create a new ModuleDescriptor using a file system path and another unique id /// /// The descriptor's uid is based on a stable-hash of the path, because a module loaded by @@ -542,8 +546,7 @@ impl ModuleDescriptor { /// The purpose of the `fmt_id` is to ensure two different formats or catalogs don't generate /// the same ModuleDescriptor, but you can pass 0 if it doesn't matter pub fn new_with_path_and_fmt_id(name: String, path: &Path, fmt_id: u64) -> Self { - let uid = xxh3_64(path.as_os_str().as_encoded_bytes()) ^ fmt_id; - ModuleDescriptor::new_with_uid(name, uid) + Self::new_with_ident_bytes_and_fmt_id(name, path.as_os_str().as_encoded_bytes(), fmt_id) } /// Returns the name of the module represented by the ModuleDescriptor pub fn name(&self) -> &str { @@ -660,7 +663,7 @@ impl ModuleLoader for TestLoader { fn git_pkginfo_fetch_test() { //Make a new runner, with the config dir in `/tmp/hyperon-test/` - let runner = Metta::new(Some(EnvBuilder::test_env().set_config_dir(Path::new("/tmp/hyperon-test/")))); + let runner = Metta::new(Some(EnvBuilder::new().set_config_dir(Path::new("/tmp/hyperon-test/")))); let _mod_id = runner.load_module_direct(Box::new(TestLoader), "test-mod").unwrap(); let result = runner.run(SExprParser::new("!(import! &self test-mod:metta-morph:mettamorph)")); diff --git a/lib/src/metta/runner/pkg_mgmt/git_cache.rs b/lib/src/metta/runner/pkg_mgmt/git_cache.rs index a79bc0294..2c6afad54 100644 --- a/lib/src/metta/runner/pkg_mgmt/git_cache.rs +++ b/lib/src/metta/runner/pkg_mgmt/git_cache.rs @@ -1,5 +1,5 @@ -//! Manages a local cache of cloned git repos +//! Manages local caches of cloned git repos //! //! Currently all network activity is synchronous. At some point it makes sense to move //! to async in order to parallelize downloads, etc. When that time comes I would like @@ -33,8 +33,9 @@ pub enum UpdateMode { TryPullIfOlderThan(u64) } +#[derive(Debug)] pub struct CachedRepo { - _name: String, + name: String, url: String, branch: Option, local_path: PathBuf, @@ -53,27 +54,32 @@ impl CachedRepo { pub fn new(env: &Environment, cache_name: Option<&str>, name: &str, ident_str: &str, url: &str, branch: Option<&str>) -> Result { let cache_name = cache_name.unwrap_or("git-modules"); let caches_dir = env.caches_dir().ok_or_else(|| "Unable to clone git repository; no local \"caches\" directory available".to_string())?; - let branch_str = match &branch { - Some(s) => s, - None => "" - }; - let unique_id = xxh3_64(format!("{}{}", ident_str, branch_str).as_bytes()); - let local_filename = format!("{name}.{unique_id:016x}"); + let local_filename = if branch.is_some() || ident_str.len() > 0 { + let branch_str = match &branch { + Some(s) => s, + None => "" + }; + let unique_id = xxh3_64(format!("{}{}", ident_str, branch_str).as_bytes()); + format!("{name}.{unique_id:016x}") + } else { + name.to_string() + }; let local_path = caches_dir.join(cache_name).join(local_filename); std::fs::create_dir_all(&local_path).map_err(|e| e.to_string())?; Ok(Self { - _name: name.to_string(), + name: name.to_string(), url: url.to_string(), branch: branch.map(|s| s.to_owned()), local_path, }) } - /// Updates a local cached repo with a remote repo, using `mode` behavior - pub fn update(&self, mode: UpdateMode) -> Result<(), String> { + /// Updates a local cached repo with a remote repo, using `mode` behavior. Returns `true` if the + /// repo was updated, and `false` if the repo was left unchanged + pub fn update(&self, mode: UpdateMode) -> Result { match Repository::open(&self.local_path) { //We have an existing repo on disk @@ -87,6 +93,9 @@ impl CachedRepo { Err(e) => { if mode == UpdateMode::PullLatest { return Err(format!("Failed to connect to origin repo: {}, {}", self.url, e)) + } else { + // We couldn't connect, but the UpdateMode allows soft failure + return Ok(false) } } } @@ -97,12 +106,16 @@ impl CachedRepo { let fetch_head = repo.find_reference("FETCH_HEAD").map_err(|e| e.to_string())?; self.merge(&repo, &branch, &fetch_head).map_err(|e| format!("Failed to merge remote git repo: {}, {}", self.url, e))?; self.write_timestamp_file()?; + Ok(true) + } else { + // The UpdateMode is set such that we don't need to check + Ok(false) } - Ok(()) }, Err(_) => { //We don't have a local repo, so clone it fresh + log::info!("cloning remote git repo: {}", self.name); let mut repo_builder = RepoBuilder::new(); match &self.branch { Some(branch) => { @@ -113,7 +126,7 @@ impl CachedRepo { match repo_builder.clone(&self.url, &self.local_path) { Ok(_repo) => { self.write_timestamp_file()?; - Ok(()) + Ok(true) }, Err(e) => Err(format!("Failed to clone git repo: {}, {}", self.url, e)), } @@ -140,10 +153,11 @@ impl CachedRepo { return Ok(()); } else if analysis.0.is_fast_forward() { // Fast-forwarding... - let mut reference = repo.find_reference(branch)?; - - reference.set_target(annotated_commit.id(), "Fast-forward")?; - repo.checkout_head(None)?; + log::info!("fetching update from remote git repo: {}", self.name); + let mut branch_ref = repo.find_reference(branch)?; + branch_ref.set_target(annotated_commit.id(), "Fast-forward")?; + repo.checkout_tree(&repo.find_object(annotated_commit.id(), Some(ObjectType::Commit))?, Some(CheckoutBuilder::default().force()))?; + repo.set_head(branch_ref.name().unwrap())?; } else { panic!("Fatal Error: cached git repository at \"{}\" appears to be corrupt", self.local_path.display()); //NOTE: the below code appears to work, but it isn't needed at the moment diff --git a/lib/src/metta/runner/pkg_mgmt/git_catalog.rs b/lib/src/metta/runner/pkg_mgmt/git_catalog.rs new file mode 100644 index 000000000..753ba42c0 --- /dev/null +++ b/lib/src/metta/runner/pkg_mgmt/git_catalog.rs @@ -0,0 +1,94 @@ +//! Implements a [ModuleCatalog] serving remotely hosted modules via git +//! + +use std::fs::read_to_string; +use std::sync::Mutex; + +use serde::Deserialize; + +use crate::metta::runner::modules::*; +use crate::metta::runner::pkg_mgmt::{*, git_cache::*}; +use crate::metta::runner::environment::Environment; + +/// Struct that matches the catalog.json file fetched from the `catalog.repo` +#[derive(Deserialize, Debug)] +struct CatalogFileFormat { + modules: Vec +} + +/// A single module in a catalog.json file +#[derive(Deserialize, Debug)] +struct CatalogFileMod { + name: String, + remote_url: String, +} + +#[derive(Debug)] +pub struct GitCatalog { + name: String, + refresh_time: u64, + catalog_repo: CachedRepo, + catalog: Mutex>, +} + +impl GitCatalog { + /// Creates a new GitCatalog with the name and url specified. `refresh_time` is the time, in + /// seconds, between refreshes of the catalog file + pub fn new(env: &Environment, name: &str, url: &str, refresh_time: u64) -> Result { + let catalog_repo = CachedRepo::new(env, Some(&name), "catalog.repo", "", url, None)?; + Ok(Self { + name: name.to_string(), + refresh_time, + catalog_repo, + catalog: Mutex::new(None), + }) + } +} + +impl ModuleCatalog for GitCatalog { + fn lookup(&self, name: &str) -> Vec { + + //Get the catalog from the git cache + let did_update = match self.catalog_repo.update(UpdateMode::TryPullIfOlderThan(self.refresh_time)) { + Ok(did_update) => did_update, + Err(e) => { + log::warn!("Warning: error encountered attempting to fetch remote catalog: {}, {e}", self.name); + return vec![]; + } + }; + + //Parse the catalog JSON file + if did_update { + let catalog_file_path = self.catalog_repo.local_path().join("catalog.json"); + match read_to_string(&catalog_file_path) { + Ok(file_contents) => { + let mut catalog = self.catalog.lock().unwrap(); + *catalog = Some(serde_json::from_str(&file_contents).unwrap()); + }, + Err(e) => { + log::warn!("Warning: Error reading catalog file. remote catalog appears to be corrupt: {}, {e}", self.name); + return vec![]; + } + } + } + + //Find the modules that match in the catalog + let cat_lock = self.catalog.lock().unwrap(); + let catalog = cat_lock.as_ref().unwrap(); + let mut results = vec![]; + for cat_mod in catalog.modules.iter() { + if cat_mod.name == name { + let descriptor = ModuleDescriptor::new_with_ident_bytes_and_fmt_id(name.to_string(), cat_mod.remote_url.as_bytes(), 0); + results.push(descriptor); + } + } + + results + } + fn get_loader(&self, descriptor: &ModuleDescriptor) -> Result, String> { + + //TODO-NOW: Make a ModuleLoader object that contains the CachedRepo for the URL for the module being loaded + + Err("TODO-NOW".to_string()) + } +} \ No newline at end of file diff --git a/lib/src/metta/runner/pkg_mgmt/mod.rs b/lib/src/metta/runner/pkg_mgmt/mod.rs index 94689a242..a3c1a07a6 100644 --- a/lib/src/metta/runner/pkg_mgmt/mod.rs +++ b/lib/src/metta/runner/pkg_mgmt/mod.rs @@ -4,4 +4,4 @@ pub use catalog::*; pub(crate) mod git_cache; -// pub(crate) mod git_catalog; \ No newline at end of file +pub(crate) mod git_catalog; \ No newline at end of file From 613dbdc6a24fb0a68deb5154a376f5b79cd84b8a Mon Sep 17 00:00:00 2001 From: Luke Peterson Date: Thu, 18 Apr 2024 20:56:19 -0700 Subject: [PATCH 08/77] Rounding out functionality in git_catalog, and fixing several bugs --- lib/src/metta/runner/environment.rs | 21 +++-- lib/src/metta/runner/mod.rs | 2 +- lib/src/metta/runner/pkg_mgmt/catalog.rs | 16 ++-- lib/src/metta/runner/pkg_mgmt/git_cache.rs | 10 +-- lib/src/metta/runner/pkg_mgmt/git_catalog.rs | 89 ++++++++++++++------ lib/src/metta/runner/stdlib.rs | 3 +- 6 files changed, 90 insertions(+), 51 deletions(-) diff --git a/lib/src/metta/runner/environment.rs b/lib/src/metta/runner/environment.rs index 36ceb48ae..059f68f1a 100644 --- a/lib/src/metta/runner/environment.rs +++ b/lib/src/metta/runner/environment.rs @@ -279,8 +279,6 @@ impl EnvBuilder { } } } - // Set the caches dir within the config dir. We may want to move it elsewhere in the future - env.caches_dir = env.config_dir.as_ref().map(|cfg_dir| cfg_dir.join("caches")); if let Some(config_dir) = &env.config_dir { @@ -310,6 +308,9 @@ impl EnvBuilder { env.config_dir = None; } + // Set the caches dir within the config dir. We may want to move it elsewhere in the future + env.caches_dir = env.config_dir.as_ref().map(|cfg_dir| cfg_dir.join("caches")); + //Push the "modules" dir, to search after the other paths that were specified //TODO: the config.metta file should be able to append / modify the catalogs, and can choose not to // include the "modules" dir in the future. @@ -318,15 +319,6 @@ impl EnvBuilder { proto_catalogs.push(ProtoCatalog::Path(modules_dir)); } - //Search the remote git-based catalog - #[cfg(feature = "pkg_mgmt")] - { - //TODO: Catalog should be moved to trueagi, and catalog settings should come from config - //let refresh_time = 259200; //3 days = 3 days * 24 hrs * 60 minutes * 60 seconds - let refresh_time = 60; //GOAT - proto_catalogs.push(ProtoCatalog::Other(Box::new(GitCatalog::new(&env, "luketpeterson-catalog", "https://github.com/luketpeterson/metta-mod-catalog.git", refresh_time).unwrap()))); - } - if init_metta_path.exists() { env.init_metta_path = Some(init_metta_path); } @@ -353,6 +345,13 @@ impl EnvBuilder { } } } + + //Search the remote git-based catalog, if we have a caches dir to store the modules + if let Some(caches_dir) = &env.caches_dir { + //TODO: Catalog should be moved to trueagi github account, and catalog settings should come from config + let refresh_time = 259200; //3 days = 3 days * 24 hrs * 60 minutes * 60 seconds + env.catalogs.push(Box::new(GitCatalog::new(caches_dir, env.fs_mod_formats.clone(), "luketpeterson-catalog", "https://github.com/luketpeterson/metta-mod-catalog.git", refresh_time).unwrap())); + } } env diff --git a/lib/src/metta/runner/mod.rs b/lib/src/metta/runner/mod.rs index f6d56901d..955414148 100644 --- a/lib/src/metta/runner/mod.rs +++ b/lib/src/metta/runner/mod.rs @@ -782,7 +782,7 @@ impl<'input> RunContext<'_, '_, 'input> { }; // Get the loader and descriptor by trying the module formats - let (loader, descriptor) = match loader_for_module_at_path(&self.metta, &path, absolute_mod_name.as_deref(), self.module().resource_dir())? { + let (loader, descriptor) = match loader_for_module_at_path(self.metta.environment().fs_mod_formats(), &path, absolute_mod_name.as_deref(), self.module().resource_dir())? { Some((loader, descriptor)) => (loader, descriptor), None => return Err(format!("Failed to resolve module at path: {}", path.as_ref().display())) }; diff --git a/lib/src/metta/runner/pkg_mgmt/catalog.rs b/lib/src/metta/runner/pkg_mgmt/catalog.rs index e5627771f..4eeb9439f 100644 --- a/lib/src/metta/runner/pkg_mgmt/catalog.rs +++ b/lib/src/metta/runner/pkg_mgmt/catalog.rs @@ -169,15 +169,16 @@ impl PkgInfo { //If path is explicitly specified in the dep entry, then we must load the module at the // specified path, and cannot search anywhere else if let Some(path) = &entry.fs_path { - return loader_for_module_at_path(&context.metta, path, Some(mod_name), context.module().resource_dir()); + return loader_for_module_at_path(context.metta.environment().fs_mod_formats(), path, Some(mod_name), context.module().resource_dir()); } //If a git URL is specified in the dep entry, see if we have it in the git-cache and // clone it locally if we don't if let Some(url) = &entry.git_url { - let cached_mod = CachedRepo::new(context.metta.environment(), None, mod_name, url, url, entry.git_branch.as_ref().map(|s| s.as_str()))?; + let caches_dir = context.metta.environment().caches_dir().ok_or_else(|| "Unable to clone git repository; no local \"caches\" directory available".to_string())?; + let cached_mod = CachedRepo::new(caches_dir, None, mod_name, url, url, entry.git_branch.as_ref().map(|s| s.as_str()))?; cached_mod.update(UpdateMode::PullIfMissing)?; - return loader_for_module_at_path(&context.metta, cached_mod.local_path(), Some(mod_name), context.module().resource_dir()); + return loader_for_module_at_path(context.metta.environment().fs_mod_formats(), cached_mod.local_path(), Some(mod_name), context.module().resource_dir()); } //TODO, If a version range is specified in the dep entry, then use that version range to specify @@ -222,7 +223,7 @@ impl PkgInfo { } /// Internal function to get a loader for a module at a specific file system path, by trying each FsModuleFormat in order -pub(crate) fn loader_for_module_at_path>(metta: &Metta, path: P, name: Option<&str>, search_dir: Option<&Path>) -> Result, ModuleDescriptor)>, String> { +pub(crate) fn loader_for_module_at_path<'a, P: AsRef, FmtIter: Iterator>(fmts: FmtIter, path: P, name: Option<&str>, search_dir: Option<&Path>) -> Result, ModuleDescriptor)>, String> { //If the path is not an absolute path, assume it's relative to the running search_dir let path = if path.as_ref().is_absolute() { @@ -239,7 +240,7 @@ pub(crate) fn loader_for_module_at_path>(metta: &Metta, path: P, }; //Check all module formats, to try and load the module at the path - for fmt in metta.environment().fs_mod_formats() { + for fmt in fmts { if let Some((loader, descriptor)) = fmt.try_path(&path, name) { return Ok(Some((loader, descriptor))) } @@ -552,6 +553,11 @@ impl ModuleDescriptor { pub fn name(&self) -> &str { &self.name } + /// Returns `true` if the `ident_bytes` and `fmt_id` match what was used to create the descriptor + pub fn ident_bytes_and_fmt_id_matches(&self, ident: &[u8], fmt_id: u64) -> bool { + let uid = xxh3_64(ident) ^ fmt_id; + self.uid == Some(uid) + } /// Internal. Use the Hash trait to get a uid for the whole ModuleDescriptor pub fn hash(&self) -> u64 { let mut hasher = DefaultHasher::new(); diff --git a/lib/src/metta/runner/pkg_mgmt/git_cache.rs b/lib/src/metta/runner/pkg_mgmt/git_cache.rs index 2c6afad54..af07633e2 100644 --- a/lib/src/metta/runner/pkg_mgmt/git_cache.rs +++ b/lib/src/metta/runner/pkg_mgmt/git_cache.rs @@ -14,8 +14,6 @@ use std::io::prelude::*; use xxhash_rust::xxh3::xxh3_64; use git2::{*, build::*}; -use crate::metta::runner::environment::Environment; - const TIMESTAMP_FILENAME: &'static str = "_timestamp_"; /// Indicates the desired behavior for updating the locally-cached repo @@ -51,9 +49,8 @@ impl CachedRepo { /// For example this could be a version, for a MeTTa module catalog cache. /// * `url` - The remote URL from which to fetch the repo /// * `branch` - The branch to use, or default if None - pub fn new(env: &Environment, cache_name: Option<&str>, name: &str, ident_str: &str, url: &str, branch: Option<&str>) -> Result { + pub fn new(caches_dir: &Path, cache_name: Option<&str>, name: &str, ident_str: &str, url: &str, branch: Option<&str>) -> Result { let cache_name = cache_name.unwrap_or("git-modules"); - let caches_dir = env.caches_dir().ok_or_else(|| "Unable to clone git repository; no local \"caches\" directory available".to_string())?; let local_filename = if branch.is_some() || ident_str.len() > 0 { let branch_str = match &branch { @@ -198,7 +195,10 @@ impl CachedRepo { Ok(file_contents) => { let val = u64::from_str_radix(&file_contents, 16).unwrap(); let timestamp_time = UNIX_EPOCH.checked_add(Duration::from_secs(val)).unwrap(); - timestamp_time.elapsed().unwrap().as_secs() > secs + match timestamp_time.elapsed() { + Ok(duration_since_timestamp) => duration_since_timestamp.as_secs() > secs, + Err(_e) => false, //NOTE: for some reason the test harness overrides the time API to return time since boot, which wreaks havoc + } }, _ => true //No timestamp file means we should pull } diff --git a/lib/src/metta/runner/pkg_mgmt/git_catalog.rs b/lib/src/metta/runner/pkg_mgmt/git_catalog.rs index 753ba42c0..391225ba0 100644 --- a/lib/src/metta/runner/pkg_mgmt/git_catalog.rs +++ b/lib/src/metta/runner/pkg_mgmt/git_catalog.rs @@ -1,14 +1,14 @@ //! Implements a [ModuleCatalog] serving remotely hosted modules via git //! +use std::path::{Path, PathBuf}; use std::fs::read_to_string; use std::sync::Mutex; use serde::Deserialize; -use crate::metta::runner::modules::*; +use crate::metta::runner::*; use crate::metta::runner::pkg_mgmt::{*, git_cache::*}; -use crate::metta::runner::environment::Environment; /// Struct that matches the catalog.json file fetched from the `catalog.repo` #[derive(Deserialize, Debug)] @@ -21,11 +21,15 @@ struct CatalogFileFormat { struct CatalogFileMod { name: String, remote_url: String, + #[serde(default)] + branch: Option, } #[derive(Debug)] pub struct GitCatalog { name: String, + caches_dir: PathBuf, + fmts: Arc>>, refresh_time: u64, catalog_repo: CachedRepo, catalog: Mutex>, @@ -34,15 +38,41 @@ pub struct GitCatalog { impl GitCatalog { /// Creates a new GitCatalog with the name and url specified. `refresh_time` is the time, in /// seconds, between refreshes of the catalog file - pub fn new(env: &Environment, name: &str, url: &str, refresh_time: u64) -> Result { - let catalog_repo = CachedRepo::new(env, Some(&name), "catalog.repo", "", url, None)?; + pub fn new(caches_dir: &Path, fmts: Arc>>, name: &str, url: &str, refresh_time: u64) -> Result { + let catalog_repo = CachedRepo::new(caches_dir, Some(&name), "catalog.repo", "", url, None)?; Ok(Self { name: name.to_string(), + fmts, refresh_time, + caches_dir: caches_dir.to_owned(), catalog_repo, catalog: Mutex::new(None), }) } + fn find_mods_with_name(&self, name: &str) -> Vec { + let cat_lock = self.catalog.lock().unwrap(); + let catalog = cat_lock.as_ref().unwrap(); + let mut results = vec![]; + for cat_mod in catalog.modules.iter() { + if cat_mod.name == name { + //TODO: incorporate the name into the descriptor + let descriptor = ModuleDescriptor::new_with_ident_bytes_and_fmt_id(name.to_string(), cat_mod.remote_url.as_bytes(), 0); + results.push(descriptor); + } + } + results + } + fn find_mod_idx_with_descriptor(&self, descriptor: &ModuleDescriptor) -> Option { + let cat_lock = self.catalog.lock().unwrap(); + let catalog = cat_lock.as_ref().unwrap(); + for (mod_idx, cat_mod) in catalog.modules.iter().enumerate() { + //TODO: Also check version here + if cat_mod.name == descriptor.name() && descriptor.ident_bytes_and_fmt_id_matches(cat_mod.remote_url.as_bytes(), 0) { + return Some(mod_idx); + } + } + None + } } impl ModuleCatalog for GitCatalog { @@ -57,38 +87,41 @@ impl ModuleCatalog for GitCatalog { } }; - //Parse the catalog JSON file - if did_update { - let catalog_file_path = self.catalog_repo.local_path().join("catalog.json"); - match read_to_string(&catalog_file_path) { - Ok(file_contents) => { - let mut catalog = self.catalog.lock().unwrap(); - *catalog = Some(serde_json::from_str(&file_contents).unwrap()); - }, - Err(e) => { - log::warn!("Warning: Error reading catalog file. remote catalog appears to be corrupt: {}, {e}", self.name); - return vec![]; + //Parse the catalog JSON file if we need to + { + let mut catalog = self.catalog.lock().unwrap(); + if did_update || catalog.is_none() { + let catalog_file_path = self.catalog_repo.local_path().join("catalog.json"); + match read_to_string(&catalog_file_path) { + Ok(file_contents) => { + *catalog = Some(serde_json::from_str(&file_contents).unwrap()); + }, + Err(e) => { + log::warn!("Warning: Error reading catalog file. remote catalog appears to be corrupt: {}, {e}", self.name); + return vec![]; + } } } } //Find the modules that match in the catalog - let cat_lock = self.catalog.lock().unwrap(); - let catalog = cat_lock.as_ref().unwrap(); - let mut results = vec![]; - for cat_mod in catalog.modules.iter() { - if cat_mod.name == name { - let descriptor = ModuleDescriptor::new_with_ident_bytes_and_fmt_id(name.to_string(), cat_mod.remote_url.as_bytes(), 0); - results.push(descriptor); - } - } - - results + self.find_mods_with_name(name) } fn get_loader(&self, descriptor: &ModuleDescriptor) -> Result, String> { + let mod_idx = self.find_mod_idx_with_descriptor(descriptor).unwrap(); + let version_str = ""; //TODO, get the version from the descriptor - //TODO-NOW: Make a ModuleLoader object that contains the CachedRepo for the URL for the module being loaded + let cat_lock = self.catalog.lock().unwrap(); + let catalog = cat_lock.as_ref().unwrap(); + let module = catalog.modules.get(mod_idx).unwrap(); + + let mod_repo = CachedRepo::new(&self.caches_dir, Some(&self.name), descriptor.name(), version_str, &module.remote_url, module.branch.as_ref().map(|s| s.as_str()))?; + let _ = mod_repo.update(UpdateMode::PullIfMissing)?; + let loader = match loader_for_module_at_path(self.fmts.iter().map(|f| &**f), mod_repo.local_path(), Some(descriptor.name()), None)? { + Some((loader, _)) => loader, + None => unreachable!() + }; - Err("TODO-NOW".to_string()) + Ok(loader) } } \ No newline at end of file diff --git a/lib/src/metta/runner/stdlib.rs b/lib/src/metta/runner/stdlib.rs index 64efd9d4e..1a53dfa63 100644 --- a/lib/src/metta/runner/stdlib.rs +++ b/lib/src/metta/runner/stdlib.rs @@ -369,7 +369,8 @@ impl Grounded for GitModuleOp { None => return Err(ExecError::from("git-module! error extracting module name from URL")) }; - let cached_mod = CachedRepo::new(self.metta.environment(), None, &mod_name, url, url, None)?; + let caches_dir = self.metta.environment().caches_dir().ok_or_else(|| "Unable to clone git repository; no local \"caches\" directory available".to_string())?; + let cached_mod = CachedRepo::new(caches_dir, None, &mod_name, url, url, None)?; cached_mod.update(UpdateMode::TryPullLatest)?; self.metta.load_module_at_path(cached_mod.local_path(), Some(&mod_name)).map_err(|e| ExecError::from(e))?; From 302add8709b79b4415ea048e2c43b985f34d438f Mon Sep 17 00:00:00 2001 From: Luke Peterson Date: Mon, 22 Apr 2024 12:41:11 +0900 Subject: [PATCH 09/77] Using ssl vendored through git2 crate and adding zlib-ng crate. Adding platform-specific includes for where the libssl depends on the underlying OS. --- c/CMakeLists.txt | 24 ++++++++++++++++++------ c/conanfile.txt | 8 ++++++++ c/tests/CMakeLists.txt | 10 +++++----- lib/Cargo.toml | 9 ++++----- python/CMakeLists.txt | 8 +------- 5 files changed, 36 insertions(+), 23 deletions(-) diff --git a/c/CMakeLists.txt b/c/CMakeLists.txt index 1d80827e8..e488b0946 100644 --- a/c/CMakeLists.txt +++ b/c/CMakeLists.txt @@ -62,13 +62,25 @@ set_target_properties(hyperonc-static PROPERTIES add_dependencies(hyperonc-static build-hyperonc) #NOTE: All these dependencies are needed to statically link the git2 functionality -find_library(GIT2_LIBRARY NAMES git2) +# find_library(GIT2_LIBRARY NAMES git2) find_library(ICONV_LIBRARY NAMES iconv) -find_library(Z_LIBRARY NAMES z) -find_library(CRYPTO_LIBRARY NAMES crypto) -find_library(SSL_LIBRARY NAMES ssl) -add_library(hyperonc-static-interface INTERFACE) -target_link_libraries(hyperonc-static-interface INTERFACE hyperonc-static ${GIT2_LIBRARY} ${ICONV_LIBRARY} ${Z_LIBRARY} ${SSL_LIBRARY} ${CRYPTO_LIBRARY}) +# find_library(Z_LIBRARY NAMES z) +# find_library(CRYPTO_LIBRARY NAMES crypto) +# find_library(SSL_LIBRARY NAMES ssl) + +set(PLATFORM_LIBRARIES ${ICONV_LIBRARY} CONAN_PKG::libgit2) + +if(APPLE) + # macOS specific libraries + list(APPEND PLATFORM_LIBRARIES "-framework CoreFoundation" "-framework Security") +elseif(UNIX AND NOT APPLE) + # Linux specific libraries + # list(APPEND PLATFORM_LIBRARIES ssl crypto pthread dl) +elseif(WIN32) + # Windows specific libraries + list(APPEND PLATFORM_LIBRARIES Ws2_32 Userenv Crypt32) +endif() + add_subdirectory(tests) diff --git a/c/conanfile.txt b/c/conanfile.txt index f6e0d3b24..d7ea66181 100644 --- a/c/conanfile.txt +++ b/c/conanfile.txt @@ -1,5 +1,13 @@ [requires] libcheck/0.15.2 +libgit2/1.5.0 + +#NOTE: I am using the Rust git2 crate's "vendored-openssl" feature because I couldn't find a version +# of the conan package with all needed symbols implemented +#openssl/3.0.12 + +#NOTE: This package doesn't seem to export anything all +#libiconv/1.17 [generators] cmake diff --git a/c/tests/CMakeLists.txt b/c/tests/CMakeLists.txt index 230f5bddd..fcc2151d5 100644 --- a/c/tests/CMakeLists.txt +++ b/c/tests/CMakeLists.txt @@ -5,21 +5,21 @@ set(TEST_SOURCES util.c ) add_executable(check_atom check_atom.c ${TEST_SOURCES}) -target_link_libraries(check_atom hyperonc-static-interface CONAN_PKG::libcheck) +target_link_libraries(check_atom hyperonc-static CONAN_PKG::libcheck ${PLATFORM_LIBRARIES}) add_test(NAME check_atom COMMAND check_atom) add_executable(check_space check_space.c ${TEST_SOURCES}) -target_link_libraries(check_space hyperonc-static-interface CONAN_PKG::libcheck) +target_link_libraries(check_space hyperonc-static CONAN_PKG::libcheck ${PLATFORM_LIBRARIES}) add_test(NAME check_space COMMAND check_space) add_executable(check_sexpr_parser check_sexpr_parser.c ${TEST_SOURCES}) -target_link_libraries(check_sexpr_parser hyperonc-static-interface CONAN_PKG::libcheck) +target_link_libraries(check_sexpr_parser hyperonc-static CONAN_PKG::libcheck ${PLATFORM_LIBRARIES}) add_test(NAME check_sexpr_parser COMMAND check_sexpr_parser) add_executable(check_types check_types.c ${TEST_SOURCES}) -target_link_libraries(check_types hyperonc-static-interface CONAN_PKG::libcheck) +target_link_libraries(check_types hyperonc-static CONAN_PKG::libcheck ${PLATFORM_LIBRARIES}) add_test(NAME check_types COMMAND check_types) add_executable(check_runner check_runner.c ${TEST_SOURCES}) -target_link_libraries(check_runner hyperonc-static-interface CONAN_PKG::libcheck) +target_link_libraries(check_runner hyperonc-static CONAN_PKG::libcheck ${PLATFORM_LIBRARIES}) add_test(NAME check_runner COMMAND check_runner) diff --git a/lib/Cargo.toml b/lib/Cargo.toml index 5cfcfbdf5..c40e71933 100644 --- a/lib/Cargo.toml +++ b/lib/Cargo.toml @@ -15,10 +15,9 @@ bitset = "0.1.2" dyn-fmt = "0.4.0" # pkg_mgmt deps -xxhash-rust = {version="0.8.7", features=["xxh3"], optional=true} -#TODO: vendored-openssl is a lot slower to build, but static linking throws warnings -#git2 = {version="0.18.3", default-features=false, features=["vendored-openssl"], optional=true} -git2 = { version="0.18.3", optional=true} +xxhash-rust = {version="0.8.7", features=["xxh3"], optional=true } +git2 = { version="0.15.0", features=["vendored-openssl", "zlib-ng-compat"], optional=true } +libz-ng-sys = {version="1.1.15", optional=true } serde = { version="1.0.198", features = ["derive"], optional=true } serde_json = { version="1.0.116", optional=true } @@ -34,4 +33,4 @@ default = ["pkg_mgmt"] minimal = [] # enables minimal MeTTa interpreter variable_operation = [] # enables evaluation of the expressions which have # a variable on the first position -pkg_mgmt = ["xxhash-rust", "git2", "serde", "serde_json"] +pkg_mgmt = ["xxhash-rust", "git2", "serde", "serde_json", "libz-ng-sys"] diff --git a/python/CMakeLists.txt b/python/CMakeLists.txt index 8c71e1498..ba2003d1a 100644 --- a/python/CMakeLists.txt +++ b/python/CMakeLists.txt @@ -52,14 +52,8 @@ include_directories(${nonstd_INCLUDE_DIRS}) find_package(hyperonc REQUIRED HINTS ${HYPERONC_INSTALL_PREFIX}) include_directories(${hyperonc_INCLUDE_DIRS}) -find_library(GIT2_LIBRARY NAMES git2) -find_library(ICONV_LIBRARY NAMES iconv) -find_library(Z_LIBRARY NAMES z) -find_library(CRYPTO_LIBRARY NAMES crypto) -find_library(SSL_LIBRARY NAMES ssl) - pybind11_add_module(hyperonpy MODULE ./hyperonpy.cpp) -target_link_libraries(hyperonpy PRIVATE "${hyperonc_STATIC_LIBRARY}" ${GIT2_LIBRARY} ${ICONV_LIBRARY} ${Z_LIBRARY} ${SSL_LIBRARY} ${CRYPTO_LIBRARY}) +target_link_libraries(hyperonpy PRIVATE "${hyperonc_STATIC_LIBRARY}") set(PYTHONPATH "${CMAKE_CURRENT_SOURCE_DIR}") add_subdirectory(tests) From ea7ca63a41ae7a233b8b4c7224f72a7ff12079c0 Mon Sep 17 00:00:00 2001 From: Luke Peterson Date: Mon, 22 Apr 2024 12:56:15 +0900 Subject: [PATCH 10/77] Removing dependency on libiconv --- c/CMakeLists.txt | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/c/CMakeLists.txt b/c/CMakeLists.txt index e488b0946..eba71053c 100644 --- a/c/CMakeLists.txt +++ b/c/CMakeLists.txt @@ -63,12 +63,13 @@ add_dependencies(hyperonc-static build-hyperonc) #NOTE: All these dependencies are needed to statically link the git2 functionality # find_library(GIT2_LIBRARY NAMES git2) -find_library(ICONV_LIBRARY NAMES iconv) +# find_library(ICONV_LIBRARY NAMES iconv) # find_library(Z_LIBRARY NAMES z) # find_library(CRYPTO_LIBRARY NAMES crypto) # find_library(SSL_LIBRARY NAMES ssl) -set(PLATFORM_LIBRARIES ${ICONV_LIBRARY} CONAN_PKG::libgit2) +# set(PLATFORM_LIBRARIES ${ICONV_LIBRARY} CONAN_PKG::libgit2) +set(PLATFORM_LIBRARIES CONAN_PKG::libgit2) if(APPLE) # macOS specific libraries From e62a08a704ddec9e0afc0caeda8b411c360ddb35 Mon Sep 17 00:00:00 2001 From: Luke Peterson Date: Mon, 22 Apr 2024 13:41:30 +0900 Subject: [PATCH 11/77] Switching to vendored libgit, because it's 2 years newer than conan package and builds faster --- c/CMakeLists.txt | 22 +++++++--------------- c/conanfile.txt | 8 ++++---- lib/Cargo.toml | 2 +- 3 files changed, 12 insertions(+), 20 deletions(-) diff --git a/c/CMakeLists.txt b/c/CMakeLists.txt index eba71053c..37f542e8f 100644 --- a/c/CMakeLists.txt +++ b/c/CMakeLists.txt @@ -61,24 +61,16 @@ set_target_properties(hyperonc-static PROPERTIES ) add_dependencies(hyperonc-static build-hyperonc) -#NOTE: All these dependencies are needed to statically link the git2 functionality -# find_library(GIT2_LIBRARY NAMES git2) -# find_library(ICONV_LIBRARY NAMES iconv) -# find_library(Z_LIBRARY NAMES z) -# find_library(CRYPTO_LIBRARY NAMES crypto) -# find_library(SSL_LIBRARY NAMES ssl) - -# set(PLATFORM_LIBRARIES ${ICONV_LIBRARY} CONAN_PKG::libgit2) -set(PLATFORM_LIBRARIES CONAN_PKG::libgit2) - +#NOTE/TODO These are all downstream dependencies needed by libgit2, and specifically the way we +# statically link libgit2 into the python module. Therefore I think we can revisit this and clean +# it up after we update the python module to depend directly on Rust. https://github.com/trueagi-io/hyperon-experimental/issues/283 +set(PLATFORM_LIBRARIES) if(APPLE) - # macOS specific libraries - list(APPEND PLATFORM_LIBRARIES "-framework CoreFoundation" "-framework Security") + find_library(ICONV_LIBRARY NAMES iconv) + list(APPEND PLATFORM_LIBRARIES ${ICONV_LIBRARY} "-framework CoreFoundation" "-framework Security") elseif(UNIX AND NOT APPLE) - # Linux specific libraries - # list(APPEND PLATFORM_LIBRARIES ssl crypto pthread dl) + # Nothing needed specifically for Linux right now elseif(WIN32) - # Windows specific libraries list(APPEND PLATFORM_LIBRARIES Ws2_32 Userenv Crypt32) endif() diff --git a/c/conanfile.txt b/c/conanfile.txt index d7ea66181..d2c55a8e8 100644 --- a/c/conanfile.txt +++ b/c/conanfile.txt @@ -1,13 +1,13 @@ [requires] libcheck/0.15.2 -libgit2/1.5.0 + +#NOTE: Switching to "vendored-libgit2" because the conan package is almost 2 years old and takes a lot +# longer to build +#libgit2/1.5.0 #NOTE: I am using the Rust git2 crate's "vendored-openssl" feature because I couldn't find a version # of the conan package with all needed symbols implemented #openssl/3.0.12 -#NOTE: This package doesn't seem to export anything all -#libiconv/1.17 - [generators] cmake diff --git a/lib/Cargo.toml b/lib/Cargo.toml index c40e71933..0ee3b14f6 100644 --- a/lib/Cargo.toml +++ b/lib/Cargo.toml @@ -16,7 +16,7 @@ dyn-fmt = "0.4.0" # pkg_mgmt deps xxhash-rust = {version="0.8.7", features=["xxh3"], optional=true } -git2 = { version="0.15.0", features=["vendored-openssl", "zlib-ng-compat"], optional=true } +git2 = { version="0.15.0", features=["vendored-libgit2", "vendored-openssl", "zlib-ng-compat"], optional=true } libz-ng-sys = {version="1.1.15", optional=true } serde = { version="1.0.198", features = ["derive"], optional=true } serde_json = { version="1.0.116", optional=true } From 5c5b6be73ec2538527deb65ae4c9bc26474c4c6e Mon Sep 17 00:00:00 2001 From: Luke Peterson Date: Mon, 22 Apr 2024 13:51:26 +0900 Subject: [PATCH 12/77] Using shared hyperon library for C tests and cleaning up platform-specific build experiments from using static lib --- c/CMakeLists.txt | 14 -------------- c/conanfile.txt | 8 -------- c/tests/CMakeLists.txt | 10 +++++----- 3 files changed, 5 insertions(+), 27 deletions(-) diff --git a/c/CMakeLists.txt b/c/CMakeLists.txt index 37f542e8f..0e94587c9 100644 --- a/c/CMakeLists.txt +++ b/c/CMakeLists.txt @@ -61,20 +61,6 @@ set_target_properties(hyperonc-static PROPERTIES ) add_dependencies(hyperonc-static build-hyperonc) -#NOTE/TODO These are all downstream dependencies needed by libgit2, and specifically the way we -# statically link libgit2 into the python module. Therefore I think we can revisit this and clean -# it up after we update the python module to depend directly on Rust. https://github.com/trueagi-io/hyperon-experimental/issues/283 -set(PLATFORM_LIBRARIES) -if(APPLE) - find_library(ICONV_LIBRARY NAMES iconv) - list(APPEND PLATFORM_LIBRARIES ${ICONV_LIBRARY} "-framework CoreFoundation" "-framework Security") -elseif(UNIX AND NOT APPLE) - # Nothing needed specifically for Linux right now -elseif(WIN32) - list(APPEND PLATFORM_LIBRARIES Ws2_32 Userenv Crypt32) -endif() - - add_subdirectory(tests) set(BINARY_INSTALL_PATH "lib/hyperonc") diff --git a/c/conanfile.txt b/c/conanfile.txt index d2c55a8e8..f6e0d3b24 100644 --- a/c/conanfile.txt +++ b/c/conanfile.txt @@ -1,13 +1,5 @@ [requires] libcheck/0.15.2 -#NOTE: Switching to "vendored-libgit2" because the conan package is almost 2 years old and takes a lot -# longer to build -#libgit2/1.5.0 - -#NOTE: I am using the Rust git2 crate's "vendored-openssl" feature because I couldn't find a version -# of the conan package with all needed symbols implemented -#openssl/3.0.12 - [generators] cmake diff --git a/c/tests/CMakeLists.txt b/c/tests/CMakeLists.txt index fcc2151d5..08ff526fb 100644 --- a/c/tests/CMakeLists.txt +++ b/c/tests/CMakeLists.txt @@ -5,21 +5,21 @@ set(TEST_SOURCES util.c ) add_executable(check_atom check_atom.c ${TEST_SOURCES}) -target_link_libraries(check_atom hyperonc-static CONAN_PKG::libcheck ${PLATFORM_LIBRARIES}) +target_link_libraries(check_atom hyperonc-shared CONAN_PKG::libcheck) add_test(NAME check_atom COMMAND check_atom) add_executable(check_space check_space.c ${TEST_SOURCES}) -target_link_libraries(check_space hyperonc-static CONAN_PKG::libcheck ${PLATFORM_LIBRARIES}) +target_link_libraries(check_space hyperonc-shared CONAN_PKG::libcheck) add_test(NAME check_space COMMAND check_space) add_executable(check_sexpr_parser check_sexpr_parser.c ${TEST_SOURCES}) -target_link_libraries(check_sexpr_parser hyperonc-static CONAN_PKG::libcheck ${PLATFORM_LIBRARIES}) +target_link_libraries(check_sexpr_parser hyperonc-shared CONAN_PKG::libcheck) add_test(NAME check_sexpr_parser COMMAND check_sexpr_parser) add_executable(check_types check_types.c ${TEST_SOURCES}) -target_link_libraries(check_types hyperonc-static CONAN_PKG::libcheck ${PLATFORM_LIBRARIES}) +target_link_libraries(check_types hyperonc-shared CONAN_PKG::libcheck) add_test(NAME check_types COMMAND check_types) add_executable(check_runner check_runner.c ${TEST_SOURCES}) -target_link_libraries(check_runner hyperonc-static CONAN_PKG::libcheck ${PLATFORM_LIBRARIES}) +target_link_libraries(check_runner hyperonc-shared CONAN_PKG::libcheck) add_test(NAME check_runner COMMAND check_runner) From de002bc5626d41305160c56bc27353ec03b03e2b Mon Sep 17 00:00:00 2001 From: Luke Peterson Date: Mon, 22 Apr 2024 18:44:07 +0900 Subject: [PATCH 13/77] Unifying logic to specify git modules in pkginfo and git modules in the git-catalog --- lib/src/metta/runner/pkg_mgmt/catalog.rs | 40 +++++---- lib/src/metta/runner/pkg_mgmt/git_cache.rs | 35 +++++--- lib/src/metta/runner/pkg_mgmt/git_catalog.rs | 86 +++++++++++++++++--- lib/src/metta/runner/stdlib.rs | 6 +- 4 files changed, 122 insertions(+), 45 deletions(-) diff --git a/lib/src/metta/runner/pkg_mgmt/catalog.rs b/lib/src/metta/runner/pkg_mgmt/catalog.rs index 4eeb9439f..713da164e 100644 --- a/lib/src/metta/runner/pkg_mgmt/catalog.rs +++ b/lib/src/metta/runner/pkg_mgmt/catalog.rs @@ -77,9 +77,12 @@ use std::ffi::{OsStr, OsString}; use crate::metta::text::OwnedSExprParser; use crate::metta::runner::modules::*; -use crate::metta::runner::{*, git_cache::*}; +use crate::metta::runner::{*, git_catalog::*}; use xxhash_rust::xxh3::xxh3_64; +use serde::Deserialize; + +pub(crate) const EXPLICIT_GIT_MOD_CACHE_DIR: &'static str = "git-modules"; /// Implemented for types capable of locating MeTTa modules /// @@ -133,21 +136,16 @@ pub struct PkgInfo { } /// A single entry in a [PkgInfo]'s dependencies, specifying the properties of a module that will satisfy a dependency -#[derive(Clone, Debug, Default)] +#[derive(Clone, Debug, Default, Deserialize)] pub struct DepEntry { /// Indicates that the dependency module should be loaded from a specific FS path /// /// If the fs_path is specified, the other pkg_info attributes will be ignored. - //QUESTION: We need a MeTTa "style guide" for these field names, since they are effective going - // to be part of the API, because a PkgInfo will be deserialized from atoms + #[serde(default)] pub fs_path: Option, - /// Indicates that the dependency module should be fetched from the specified `git` URL - pub git_url: Option, - - /// A `git`` branch to fetch. Will be ignored if `git_url` is `None`. Uses the repo's - /// default branch if left unspecified - pub git_branch: Option, + #[serde(flatten)] + git_location: ModuleGitLocation, //TODO: field to indicate acceptable version range for dependency } @@ -172,13 +170,9 @@ impl PkgInfo { return loader_for_module_at_path(context.metta.environment().fs_mod_formats(), path, Some(mod_name), context.module().resource_dir()); } - //If a git URL is specified in the dep entry, see if we have it in the git-cache and - // clone it locally if we don't - if let Some(url) = &entry.git_url { - let caches_dir = context.metta.environment().caches_dir().ok_or_else(|| "Unable to clone git repository; no local \"caches\" directory available".to_string())?; - let cached_mod = CachedRepo::new(caches_dir, None, mod_name, url, url, entry.git_branch.as_ref().map(|s| s.as_str()))?; - cached_mod.update(UpdateMode::PullIfMissing)?; - return loader_for_module_at_path(context.metta.environment().fs_mod_formats(), cached_mod.local_path(), Some(mod_name), context.module().resource_dir()); + //Get the module if it's specified with git keys + if let Some(pair) = entry.git_location.get_loader(context.metta.environment().fs_mod_formats(), context.metta.environment().caches_dir(), EXPLICIT_GIT_MOD_CACHE_DIR, mod_name, None)? { + return Ok(Some(pair)); } //TODO, If a version range is specified in the dep entry, then use that version range to specify @@ -650,9 +644,13 @@ impl ModuleLoader for TestLoader { pkg_info.name = "test-mod".to_string(); pkg_info.deps.insert("metta-morph".to_string(), DepEntry{ fs_path: None, - //TODO: We probably want a smaller test repo - git_url: Some("https://github.com/trueagi-io/metta-morph/".to_string()), - git_branch: None, //Some("Hyperpose".to_string()), + git_location: ModuleGitLocation { + //TODO: We probably want a smaller test repo + git_url: Some("https://github.com/trueagi-io/metta-morph/".to_string()), + git_branch: None, //Some("Hyperpose".to_string()), + git_subdir: None, + git_main_file: Some(PathBuf::from("mettamorph.metta")), + } }); Ok(()) @@ -672,7 +670,7 @@ fn git_pkginfo_fetch_test() { let runner = Metta::new(Some(EnvBuilder::new().set_config_dir(Path::new("/tmp/hyperon-test/")))); let _mod_id = runner.load_module_direct(Box::new(TestLoader), "test-mod").unwrap(); - let result = runner.run(SExprParser::new("!(import! &self test-mod:metta-morph:mettamorph)")); + let result = runner.run(SExprParser::new("!(import! &self test-mod:metta-morph)")); assert_eq!(result, Ok(vec![vec![expr!()]])); //Test that we can use a function imported from the module diff --git a/lib/src/metta/runner/pkg_mgmt/git_cache.rs b/lib/src/metta/runner/pkg_mgmt/git_cache.rs index af07633e2..040770386 100644 --- a/lib/src/metta/runner/pkg_mgmt/git_cache.rs +++ b/lib/src/metta/runner/pkg_mgmt/git_cache.rs @@ -36,7 +36,9 @@ pub struct CachedRepo { name: String, url: String, branch: Option, + repo_local_path: PathBuf, local_path: PathBuf, + _subdir: Option, } impl CachedRepo { @@ -49,8 +51,7 @@ impl CachedRepo { /// For example this could be a version, for a MeTTa module catalog cache. /// * `url` - The remote URL from which to fetch the repo /// * `branch` - The branch to use, or default if None - pub fn new(caches_dir: &Path, cache_name: Option<&str>, name: &str, ident_str: &str, url: &str, branch: Option<&str>) -> Result { - let cache_name = cache_name.unwrap_or("git-modules"); + pub fn new(caches_dir: &Path, cache_name: &str, name: &str, ident_str: &str, url: &str, branch: Option<&str>, subdir: Option<&Path>) -> Result { let local_filename = if branch.is_some() || ident_str.len() > 0 { let branch_str = match &branch { @@ -62,22 +63,30 @@ impl CachedRepo { } else { name.to_string() }; - let local_path = caches_dir.join(cache_name).join(local_filename); + let repo_local_path = caches_dir.join(cache_name).join(local_filename); + let local_path = match subdir { + Some(subdir) => repo_local_path.join(subdir), + None => repo_local_path.clone() + }; - std::fs::create_dir_all(&local_path).map_err(|e| e.to_string())?; + std::fs::create_dir_all(&repo_local_path).map_err(|e| e.to_string())?; Ok(Self { name: name.to_string(), url: url.to_string(), branch: branch.map(|s| s.to_owned()), local_path, + repo_local_path, + _subdir: subdir.map(|s| s.to_owned()) }) } /// Updates a local cached repo with a remote repo, using `mode` behavior. Returns `true` if the /// repo was updated, and `false` if the repo was left unchanged pub fn update(&self, mode: UpdateMode) -> Result { - match Repository::open(&self.local_path) { + + //TODO: If there is a subdir then we can perform a sparse checkout and avoid cloning unnecessary data + match Repository::open(self.repo_local_path()) { //We have an existing repo on disk Ok(repo) => { @@ -120,7 +129,7 @@ impl CachedRepo { }, None => {} } - match repo_builder.clone(&self.url, &self.local_path) { + match repo_builder.clone(&self.url, self.repo_local_path()) { Ok(_repo) => { self.write_timestamp_file()?; Ok(true) @@ -156,7 +165,7 @@ impl CachedRepo { repo.checkout_tree(&repo.find_object(annotated_commit.id(), Some(ObjectType::Commit))?, Some(CheckoutBuilder::default().force()))?; repo.set_head(branch_ref.name().unwrap())?; } else { - panic!("Fatal Error: cached git repository at \"{}\" appears to be corrupt", self.local_path.display()); + panic!("Fatal Error: cached git repository at \"{}\" appears to be corrupt", self.repo_local_path().display()); //NOTE: the below code appears to work, but it isn't needed at the moment // // // Normal merge... @@ -179,7 +188,7 @@ impl CachedRepo { /// Internal function to write the timestamp file, with the value of "now" fn write_timestamp_file(&self) -> Result<(), String> { let duration_since_epoch = SystemTime::now().duration_since(UNIX_EPOCH).unwrap(); - let file_path = self.local_path.join(TIMESTAMP_FILENAME); + let file_path = self.repo_local_path().join(TIMESTAMP_FILENAME); let mut file = File::create(&file_path).map_err(|e| format!("Error creating timestamp file at {}, {e}", file_path.display()))?; file.write_all(&format!("{:016x}", duration_since_epoch.as_secs()).into_bytes()) .map_err(|e| format!("Error writing file: {}, {e}", file_path.display())) @@ -190,7 +199,7 @@ impl CachedRepo { fn check_timestamp(&self, mode: UpdateMode) -> bool { match mode { UpdateMode::TryPullIfOlderThan(secs) => { - let file_path = self.local_path.join(TIMESTAMP_FILENAME); + let file_path = self.repo_local_path().join(TIMESTAMP_FILENAME); match read_to_string(&file_path) { Ok(file_contents) => { let val = u64::from_str_radix(&file_contents, 16).unwrap(); @@ -207,8 +216,14 @@ impl CachedRepo { } } - /// Returns the file system path for the locally cloned repository + /// Returns the file system path for the locally cloned data, respecting `subdir` if there is one pub fn local_path(&self) -> &Path { &self.local_path } + + /// Returns the file system path for the top of the locally cloned repository + pub fn repo_local_path(&self) -> &Path { + &self.repo_local_path + } + } diff --git a/lib/src/metta/runner/pkg_mgmt/git_catalog.rs b/lib/src/metta/runner/pkg_mgmt/git_catalog.rs index 391225ba0..c3ecea9b8 100644 --- a/lib/src/metta/runner/pkg_mgmt/git_catalog.rs +++ b/lib/src/metta/runner/pkg_mgmt/git_catalog.rs @@ -10,6 +10,73 @@ use serde::Deserialize; use crate::metta::runner::*; use crate::metta::runner::pkg_mgmt::{*, git_cache::*}; +//TODO: +// * Need a function to clean up local repos that have been removed from the catalog file +// * Need a function to delete a whole catalog cache. Both of these interfaces should probably +// be added to the catalog trait as optional methods. + +/// A set of keys describing how to access a module via git. Deserialized from within a [PkgInfo] +/// or a catalog file [CatalogFileFormat] +#[derive(Clone, Debug, Default, Deserialize)] +pub struct ModuleGitLocation { + /// Indicates that the dependency module should be fetched from the specified `git` URL + #[serde(default)] + pub git_url: Option, + + /// A `git`` branch to fetch. Will be ignored if `git_url` is `None`. Uses the repo's + /// default branch if left unspecified + #[serde(default)] + pub git_branch: Option, + + /// A subdirectory within the git repo to use as the module, effectively ignoring the rest + /// of the repo contents. The subdir must be a relative path within the repo. + #[serde(default)] + pub git_subdir: Option, + + /// A file within the git repo to use as the module. The file path must be a relative path + /// within the repo or `git_subdir` directory if provided. + #[serde(default)] + pub git_main_file: Option, +} + +impl ModuleGitLocation { + pub(crate) fn get_loader<'a, FmtIter: Iterator>(&self, fmts: FmtIter, caches_dir: Option<&Path>, cache_name: &str, mod_name: &str, ident_str: Option<&str>) -> Result, ModuleDescriptor)>, String> { + + //If a git URL is specified in the entry, see if we have it in the git-cache and + // clone it locally if we don't + if self.git_url.is_some() { + let cached_repo = self.get_cache(caches_dir, cache_name, mod_name, ident_str)?; + cached_repo.update(UpdateMode::PullIfMissing)?; + + let mod_path = match &self.git_main_file { + Some(main_file) => cached_repo.local_path().join(main_file), + None => cached_repo.local_path().to_owned(), + }; + return loader_for_module_at_path(fmts, &mod_path, Some(mod_name), None); + } + + Ok(None) + } + pub(crate) fn get_cache(&self, caches_dir: Option<&Path>, cache_name: &str, mod_name: &str, ident_str: Option<&str>) -> Result { + let caches_dir = caches_dir.ok_or_else(|| "Unable to clone git repository; no local \"caches\" directory available".to_string())?; + let url = self.git_url.as_ref().unwrap(); + let ident_str = match ident_str { + Some(ident_str) => ident_str, + None => url, + }; + CachedRepo::new(caches_dir, cache_name, mod_name, ident_str, url, self.git_branch.as_ref().map(|s| s.as_str()), self.git_subdir.as_ref().map(|p| p.as_path())) + } + /// Returns a new ModuleGitLocation. This is a convenience; the usual interface involves deserializing this struct + pub(crate) fn new(url: String) -> Self { + let mut new_self = Self::default(); + new_self.git_url = Some(url); + new_self + } + pub(crate) fn get_url(&self) -> Option<&str> { + self.git_url.as_ref().map(|s| s.as_str()) + } +} + /// Struct that matches the catalog.json file fetched from the `catalog.repo` #[derive(Deserialize, Debug)] struct CatalogFileFormat { @@ -20,9 +87,8 @@ struct CatalogFileFormat { #[derive(Deserialize, Debug)] struct CatalogFileMod { name: String, - remote_url: String, - #[serde(default)] - branch: Option, + #[serde(flatten)] + git_location: ModuleGitLocation } #[derive(Debug)] @@ -39,7 +105,7 @@ impl GitCatalog { /// Creates a new GitCatalog with the name and url specified. `refresh_time` is the time, in /// seconds, between refreshes of the catalog file pub fn new(caches_dir: &Path, fmts: Arc>>, name: &str, url: &str, refresh_time: u64) -> Result { - let catalog_repo = CachedRepo::new(caches_dir, Some(&name), "catalog.repo", "", url, None)?; + let catalog_repo = CachedRepo::new(caches_dir, &name, "catalog.repo", "", url, None, None)?; Ok(Self { name: name.to_string(), fmts, @@ -55,8 +121,8 @@ impl GitCatalog { let mut results = vec![]; for cat_mod in catalog.modules.iter() { if cat_mod.name == name { - //TODO: incorporate the name into the descriptor - let descriptor = ModuleDescriptor::new_with_ident_bytes_and_fmt_id(name.to_string(), cat_mod.remote_url.as_bytes(), 0); + //TODO: incorporate the name into the descriptor's ident bytes + let descriptor = ModuleDescriptor::new_with_ident_bytes_and_fmt_id(name.to_string(), cat_mod.git_location.get_url().unwrap().as_bytes(), 0); results.push(descriptor); } } @@ -67,7 +133,7 @@ impl GitCatalog { let catalog = cat_lock.as_ref().unwrap(); for (mod_idx, cat_mod) in catalog.modules.iter().enumerate() { //TODO: Also check version here - if cat_mod.name == descriptor.name() && descriptor.ident_bytes_and_fmt_id_matches(cat_mod.remote_url.as_bytes(), 0) { + if cat_mod.name == descriptor.name() && descriptor.ident_bytes_and_fmt_id_matches(cat_mod.git_location.get_url().unwrap().as_bytes(), 0) { return Some(mod_idx); } } @@ -115,11 +181,9 @@ impl ModuleCatalog for GitCatalog { let catalog = cat_lock.as_ref().unwrap(); let module = catalog.modules.get(mod_idx).unwrap(); - let mod_repo = CachedRepo::new(&self.caches_dir, Some(&self.name), descriptor.name(), version_str, &module.remote_url, module.branch.as_ref().map(|s| s.as_str()))?; - let _ = mod_repo.update(UpdateMode::PullIfMissing)?; - let loader = match loader_for_module_at_path(self.fmts.iter().map(|f| &**f), mod_repo.local_path(), Some(descriptor.name()), None)? { + let loader = match module.git_location.get_loader(self.fmts.iter().map(|f| &**f), Some(&self.caches_dir), &self.name, descriptor.name(), Some(&version_str))? { Some((loader, _)) => loader, - None => unreachable!() + None => unreachable!(), }; Ok(loader) diff --git a/lib/src/metta/runner/stdlib.rs b/lib/src/metta/runner/stdlib.rs index 1a53dfa63..92aa62e75 100644 --- a/lib/src/metta/runner/stdlib.rs +++ b/lib/src/metta/runner/stdlib.rs @@ -5,7 +5,7 @@ use crate::metta::*; use crate::metta::text::Tokenizer; use crate::metta::text::SExprParser; use crate::metta::runner::{Metta, RunContext, ModuleLoader, ResourceKey, mod_name_from_url}; -use crate::metta::runner::git_cache::{CachedRepo, UpdateMode}; +use crate::metta::runner::{EXPLICIT_GIT_MOD_CACHE_DIR, git_catalog::ModuleGitLocation, git_cache::UpdateMode}; use crate::metta::types::{get_atom_types, get_meta_type}; use crate::common::shared::Shared; use crate::common::CachingMapper; @@ -369,8 +369,8 @@ impl Grounded for GitModuleOp { None => return Err(ExecError::from("git-module! error extracting module name from URL")) }; - let caches_dir = self.metta.environment().caches_dir().ok_or_else(|| "Unable to clone git repository; no local \"caches\" directory available".to_string())?; - let cached_mod = CachedRepo::new(caches_dir, None, &mod_name, url, url, None)?; + let git_mod_location = ModuleGitLocation::new(url.to_string()); + let cached_mod = git_mod_location.get_cache(self.metta.environment().caches_dir(), EXPLICIT_GIT_MOD_CACHE_DIR, &mod_name, None)?; cached_mod.update(UpdateMode::TryPullLatest)?; self.metta.load_module_at_path(cached_mod.local_path(), Some(&mod_name)).map_err(|e| ExecError::from(e))?; From f8ff55e45df9c64dd25086b41d21722fd36bb59a Mon Sep 17 00:00:00 2001 From: Luke Peterson Date: Tue, 23 Apr 2024 10:24:05 +0900 Subject: [PATCH 14/77] Comments update --- lib/src/metta/runner/pkg_mgmt/git_catalog.rs | 3 +++ 1 file changed, 3 insertions(+) diff --git a/lib/src/metta/runner/pkg_mgmt/git_catalog.rs b/lib/src/metta/runner/pkg_mgmt/git_catalog.rs index c3ecea9b8..4df89ffec 100644 --- a/lib/src/metta/runner/pkg_mgmt/git_catalog.rs +++ b/lib/src/metta/runner/pkg_mgmt/git_catalog.rs @@ -14,6 +14,9 @@ use crate::metta::runner::pkg_mgmt::{*, git_cache::*}; // * Need a function to clean up local repos that have been removed from the catalog file // * Need a function to delete a whole catalog cache. Both of these interfaces should probably // be added to the catalog trait as optional methods. +// * Funtion to trigger explicit updates. Accessible from metta ops +// - Update specific module, update to a specific version, latest, or latest stable +// - update all modules, to latest or latest stable /// A set of keys describing how to access a module via git. Deserialized from within a [PkgInfo] /// or a catalog file [CatalogFileFormat] From 4f23d1a1fd38d7141d14b9015ce8033384dbd92e Mon Sep 17 00:00:00 2001 From: Luke Peterson Date: Tue, 23 Apr 2024 15:38:00 +0900 Subject: [PATCH 15/77] Removing vendored OpenSSL from Cargo.toml, and trying to use Conan to fetch and build OpenSSL. Currently broken. --- c/conanfile.txt | 6 ++++++ lib/Cargo.toml | 6 +++--- python/CMakeLists.txt | 16 +++++++++++++++- python/conanfile.txt | 7 +++++++ 4 files changed, 31 insertions(+), 4 deletions(-) diff --git a/c/conanfile.txt b/c/conanfile.txt index f6e0d3b24..9abcd5a4f 100644 --- a/c/conanfile.txt +++ b/c/conanfile.txt @@ -1,5 +1,11 @@ [requires] libcheck/0.15.2 +openssl/3.2.1 [generators] cmake +CMakeDeps +CMakeToolchain + +[layout] +cmake_layout \ No newline at end of file diff --git a/lib/Cargo.toml b/lib/Cargo.toml index 0ee3b14f6..b13a9724f 100644 --- a/lib/Cargo.toml +++ b/lib/Cargo.toml @@ -16,8 +16,8 @@ dyn-fmt = "0.4.0" # pkg_mgmt deps xxhash-rust = {version="0.8.7", features=["xxh3"], optional=true } -git2 = { version="0.15.0", features=["vendored-libgit2", "vendored-openssl", "zlib-ng-compat"], optional=true } -libz-ng-sys = {version="1.1.15", optional=true } +git2 = { version="0.15.0", features=["vendored-libgit2"], optional=true } +libz-sys = { version="1.1.16", optional=true } serde = { version="1.0.198", features = ["derive"], optional=true } serde_json = { version="1.0.116", optional=true } @@ -33,4 +33,4 @@ default = ["pkg_mgmt"] minimal = [] # enables minimal MeTTa interpreter variable_operation = [] # enables evaluation of the expressions which have # a variable on the first position -pkg_mgmt = ["xxhash-rust", "git2", "serde", "serde_json", "libz-ng-sys"] +pkg_mgmt = ["xxhash-rust", "git2", "serde", "serde_json", "libz-sys"] diff --git a/python/CMakeLists.txt b/python/CMakeLists.txt index ba2003d1a..39e492f6f 100644 --- a/python/CMakeLists.txt +++ b/python/CMakeLists.txt @@ -45,6 +45,8 @@ execute_process( COMMAND conan install --build -- ${CMAKE_CURRENT_SOURCE_DIR} WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}) include(${CMAKE_CURRENT_BINARY_DIR}/conan_paths.cmake) +include(${CMAKE_CURRENT_BINARY_DIR}/conanbuildinfo.cmake) +conan_basic_setup(TARGETS) find_package(pybind11 REQUIRED) find_package(optional-lite REQUIRED) @@ -52,8 +54,20 @@ include_directories(${nonstd_INCLUDE_DIRS}) find_package(hyperonc REQUIRED HINTS ${HYPERONC_INSTALL_PREFIX}) include_directories(${hyperonc_INCLUDE_DIRS}) +get_target_property(openssl_libs CONAN_PKG::openssl INTERFACE_LINK_LIBRARIES) + +set(OpenSSL_ROOT_DIR ${CONAN_OPENSSL_ROOT} ${CONAN_LIB_DIRS_OPENSSL}) +set(OPENSSL_CRYPTO_LIBRARY ${CONAN_LIBS_OPENSSL_CRYPTO}) +set(OPENSSL_SSL_LIBRARY ${CONAN_LIBS_OPENSSL_SSL}) +find_package(OpenSSL REQUIRED) + +set(FULL_OPENSSL_SSL_LIBRARY "${CONAN_LIB_DIRS_OPENSSL}/libssl.a") +set(FULL_OPENSSL_CRYPTO_LIBRARY "${CONAN_LIB_DIRS_OPENSSL}/libcrypto.a") + +link_directories(${CONAN_LIB_DIRS_OPENSSL}) pybind11_add_module(hyperonpy MODULE ./hyperonpy.cpp) -target_link_libraries(hyperonpy PRIVATE "${hyperonc_STATIC_LIBRARY}") +set_target_properties(hyperonpy PROPERTIES LINK_FLAGS "-Wl,-force_load,${FULL_OPENSSL_SSL_LIBRARY} -Wl,-force_load,${FULL_OPENSSL_CRYPTO_LIBRARY}") +target_link_libraries(hyperonpy PRIVATE "${hyperonc_STATIC_LIBRARY}" ssl crypto) set(PYTHONPATH "${CMAKE_CURRENT_SOURCE_DIR}") add_subdirectory(tests) diff --git a/python/conanfile.txt b/python/conanfile.txt index aba4d1f13..5acaa67c7 100644 --- a/python/conanfile.txt +++ b/python/conanfile.txt @@ -1,7 +1,14 @@ [requires] pybind11/2.10.1 optional-lite/3.5.0 +openssl/3.2.1 [generators] +cmake +CMakeDeps +CMakeToolchain cmake_paths cmake_find_package + +[layout] +cmake_layout \ No newline at end of file From 2da4c5ea4994cbd0f4a34c555d281f66ae6f8488 Mon Sep 17 00:00:00 2001 From: Luke Peterson Date: Thu, 25 Apr 2024 09:07:11 +0900 Subject: [PATCH 16/77] Removing conan openssl dependency, and linking dynamic openssl --- c/conanfile.txt | 6 ------ python/CMakeLists.txt | 19 ++++--------------- python/conanfile.txt | 7 ------- 3 files changed, 4 insertions(+), 28 deletions(-) diff --git a/c/conanfile.txt b/c/conanfile.txt index 9abcd5a4f..f6e0d3b24 100644 --- a/c/conanfile.txt +++ b/c/conanfile.txt @@ -1,11 +1,5 @@ [requires] libcheck/0.15.2 -openssl/3.2.1 [generators] cmake -CMakeDeps -CMakeToolchain - -[layout] -cmake_layout \ No newline at end of file diff --git a/python/CMakeLists.txt b/python/CMakeLists.txt index 39e492f6f..337b737cb 100644 --- a/python/CMakeLists.txt +++ b/python/CMakeLists.txt @@ -45,8 +45,8 @@ execute_process( COMMAND conan install --build -- ${CMAKE_CURRENT_SOURCE_DIR} WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}) include(${CMAKE_CURRENT_BINARY_DIR}/conan_paths.cmake) -include(${CMAKE_CURRENT_BINARY_DIR}/conanbuildinfo.cmake) -conan_basic_setup(TARGETS) + +find_package(OpenSSL REQUIRED) find_package(pybind11 REQUIRED) find_package(optional-lite REQUIRED) @@ -54,20 +54,9 @@ include_directories(${nonstd_INCLUDE_DIRS}) find_package(hyperonc REQUIRED HINTS ${HYPERONC_INSTALL_PREFIX}) include_directories(${hyperonc_INCLUDE_DIRS}) -get_target_property(openssl_libs CONAN_PKG::openssl INTERFACE_LINK_LIBRARIES) - -set(OpenSSL_ROOT_DIR ${CONAN_OPENSSL_ROOT} ${CONAN_LIB_DIRS_OPENSSL}) -set(OPENSSL_CRYPTO_LIBRARY ${CONAN_LIBS_OPENSSL_CRYPTO}) -set(OPENSSL_SSL_LIBRARY ${CONAN_LIBS_OPENSSL_SSL}) -find_package(OpenSSL REQUIRED) - -set(FULL_OPENSSL_SSL_LIBRARY "${CONAN_LIB_DIRS_OPENSSL}/libssl.a") -set(FULL_OPENSSL_CRYPTO_LIBRARY "${CONAN_LIB_DIRS_OPENSSL}/libcrypto.a") - -link_directories(${CONAN_LIB_DIRS_OPENSSL}) pybind11_add_module(hyperonpy MODULE ./hyperonpy.cpp) -set_target_properties(hyperonpy PROPERTIES LINK_FLAGS "-Wl,-force_load,${FULL_OPENSSL_SSL_LIBRARY} -Wl,-force_load,${FULL_OPENSSL_CRYPTO_LIBRARY}") -target_link_libraries(hyperonpy PRIVATE "${hyperonc_STATIC_LIBRARY}" ssl crypto) +target_link_libraries(hyperonpy PRIVATE "${hyperonc_STATIC_LIBRARY}") +target_link_libraries(hyperonpy PUBLIC OpenSSL::SSL OpenSSL::Crypto) set(PYTHONPATH "${CMAKE_CURRENT_SOURCE_DIR}") add_subdirectory(tests) diff --git a/python/conanfile.txt b/python/conanfile.txt index 5acaa67c7..aba4d1f13 100644 --- a/python/conanfile.txt +++ b/python/conanfile.txt @@ -1,14 +1,7 @@ [requires] pybind11/2.10.1 optional-lite/3.5.0 -openssl/3.2.1 [generators] -cmake -CMakeDeps -CMakeToolchain cmake_paths cmake_find_package - -[layout] -cmake_layout \ No newline at end of file From 5e089c1073cba485c6d8c570de9fc12063df9b55 Mon Sep 17 00:00:00 2001 From: Luke Peterson Date: Thu, 25 Apr 2024 10:24:29 +0900 Subject: [PATCH 17/77] Adding openssl dependency to build environment for Python wheel --- python/install-hyperonc.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/install-hyperonc.sh b/python/install-hyperonc.sh index 26f0ff03a..7aef02b6c 100755 --- a/python/install-hyperonc.sh +++ b/python/install-hyperonc.sh @@ -22,7 +22,7 @@ echo "hyperonc revision $HYPERONC_REV" # This is to build subunit from Conan on CentOS based manylinux images. if test "$AUDITWHEEL_POLICY" = "manylinux2014"; then - yum install -y perl-devel + yum install -y perl-devel openssl-devel fi curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs > /tmp/rustup.sh From 8565468b90c39f4f10991a37fb28dd26d82b62f5 Mon Sep 17 00:00:00 2001 From: Luke Peterson Date: Thu, 25 Apr 2024 10:38:39 +0900 Subject: [PATCH 18/77] Also adding pkgconfig to build image --- python/install-hyperonc.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/install-hyperonc.sh b/python/install-hyperonc.sh index 7aef02b6c..850bc8d16 100755 --- a/python/install-hyperonc.sh +++ b/python/install-hyperonc.sh @@ -22,7 +22,7 @@ echo "hyperonc revision $HYPERONC_REV" # This is to build subunit from Conan on CentOS based manylinux images. if test "$AUDITWHEEL_POLICY" = "manylinux2014"; then - yum install -y perl-devel openssl-devel + yum install -y pkgconfig perl-devel openssl-devel fi curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs > /tmp/rustup.sh From 25b603c9ad72b87dd8f7cec054b5105106e14038 Mon Sep 17 00:00:00 2001 From: Luke Peterson Date: Thu, 25 Apr 2024 10:56:09 +0900 Subject: [PATCH 19/77] Trying installation of openssl-devel on all images --- python/install-hyperonc.sh | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/python/install-hyperonc.sh b/python/install-hyperonc.sh index 850bc8d16..5d51b36fa 100755 --- a/python/install-hyperonc.sh +++ b/python/install-hyperonc.sh @@ -22,9 +22,11 @@ echo "hyperonc revision $HYPERONC_REV" # This is to build subunit from Conan on CentOS based manylinux images. if test "$AUDITWHEEL_POLICY" = "manylinux2014"; then - yum install -y pkgconfig perl-devel openssl-devel + yum install -y perl-devel fi +yum install -y openssl-devel + curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs > /tmp/rustup.sh sh /tmp/rustup.sh -y && rm /tmp/rustup.sh export PATH="${PATH}:${HOME}/.cargo/bin" From 6401db747ce3b992eff5aad16a55ad7f603ce317 Mon Sep 17 00:00:00 2001 From: Luke Peterson Date: Thu, 25 Apr 2024 11:09:17 +0900 Subject: [PATCH 20/77] Installing both openssl and openssl-devel pkg through yum on python wheel build script --- python/install-hyperonc.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/install-hyperonc.sh b/python/install-hyperonc.sh index 5d51b36fa..63b7f97b4 100755 --- a/python/install-hyperonc.sh +++ b/python/install-hyperonc.sh @@ -25,7 +25,7 @@ if test "$AUDITWHEEL_POLICY" = "manylinux2014"; then yum install -y perl-devel fi -yum install -y openssl-devel +yum install -y openssl openssl-devel curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs > /tmp/rustup.sh sh /tmp/rustup.sh -y && rm /tmp/rustup.sh From 6b18f84b9d304fa833519463fd4ce2e750e82dfd Mon Sep 17 00:00:00 2001 From: Luke Peterson Date: Thu, 25 Apr 2024 11:51:59 +0900 Subject: [PATCH 21/77] Removing zlib dependency in Cargo.toml because it's implied Explicitly linking hyperonc shared lib because CI is failing --- c/CMakeLists.txt | 3 +++ lib/Cargo.toml | 3 +-- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/c/CMakeLists.txt b/c/CMakeLists.txt index 0e94587c9..8fe43ccd1 100644 --- a/c/CMakeLists.txt +++ b/c/CMakeLists.txt @@ -71,6 +71,8 @@ set(STATIC_LIBRARY_INSTALL_PATH "${BINARY_INSTALL_PATH}/${HYPERONC_STATIC_LIB_FI include(CMakePackageConfigHelpers) if(BUILD_SHARED_LIBS) + find_package(OpenSSL REQUIRED) + set(HYPERONC_SHARED_LIB_FILE ${CMAKE_SHARED_LIBRARY_PREFIX}hyperonc${CMAKE_SHARED_LIBRARY_SUFFIX}) set(HYPERONC_SHARED_LIB_PATH ${HYPERONC_TARGET_DIR}/${HYPERONC_SHARED_LIB_FILE}) set(SHARED_LIBRARY_INSTALL_PATH "${BINARY_INSTALL_PATH}/${HYPERONC_SHARED_LIB_FILE}") @@ -90,6 +92,7 @@ if(BUILD_SHARED_LIBS) # required to import hyperonc-shared by name not by relative path IMPORTED_NO_SONAME TRUE ) + target_link_libraries(hyperonc-shared INTERFACE OpenSSL::SSL OpenSSL::Crypto) add_dependencies(hyperonc-shared copy-hyperonc-shared build-hyperonc) install(FILES "${HYPERONC_SHARED_LIB_PATH}" diff --git a/lib/Cargo.toml b/lib/Cargo.toml index b13a9724f..ff309f24b 100644 --- a/lib/Cargo.toml +++ b/lib/Cargo.toml @@ -17,7 +17,6 @@ dyn-fmt = "0.4.0" # pkg_mgmt deps xxhash-rust = {version="0.8.7", features=["xxh3"], optional=true } git2 = { version="0.15.0", features=["vendored-libgit2"], optional=true } -libz-sys = { version="1.1.16", optional=true } serde = { version="1.0.198", features = ["derive"], optional=true } serde_json = { version="1.0.116", optional=true } @@ -33,4 +32,4 @@ default = ["pkg_mgmt"] minimal = [] # enables minimal MeTTa interpreter variable_operation = [] # enables evaluation of the expressions which have # a variable on the first position -pkg_mgmt = ["xxhash-rust", "git2", "serde", "serde_json", "libz-sys"] +pkg_mgmt = ["xxhash-rust", "git2", "serde", "serde_json"] From 347655ddb3251c286e5294a1c38f16e89577654e Mon Sep 17 00:00:00 2001 From: Luke Peterson Date: Thu, 25 Apr 2024 12:08:35 +0900 Subject: [PATCH 22/77] Adding zlib dependency --- c/CMakeLists.txt | 3 ++- python/CMakeLists.txt | 3 ++- python/install-hyperonc.sh | 2 +- 3 files changed, 5 insertions(+), 3 deletions(-) diff --git a/c/CMakeLists.txt b/c/CMakeLists.txt index 8fe43ccd1..7b9877a15 100644 --- a/c/CMakeLists.txt +++ b/c/CMakeLists.txt @@ -72,6 +72,7 @@ include(CMakePackageConfigHelpers) if(BUILD_SHARED_LIBS) find_package(OpenSSL REQUIRED) + find_package(ZLIB REQUIRED) set(HYPERONC_SHARED_LIB_FILE ${CMAKE_SHARED_LIBRARY_PREFIX}hyperonc${CMAKE_SHARED_LIBRARY_SUFFIX}) set(HYPERONC_SHARED_LIB_PATH ${HYPERONC_TARGET_DIR}/${HYPERONC_SHARED_LIB_FILE}) @@ -92,7 +93,7 @@ if(BUILD_SHARED_LIBS) # required to import hyperonc-shared by name not by relative path IMPORTED_NO_SONAME TRUE ) - target_link_libraries(hyperonc-shared INTERFACE OpenSSL::SSL OpenSSL::Crypto) + target_link_libraries(hyperonc-shared INTERFACE OpenSSL::SSL OpenSSL::Crypto ZLIB::ZLIB) add_dependencies(hyperonc-shared copy-hyperonc-shared build-hyperonc) install(FILES "${HYPERONC_SHARED_LIB_PATH}" diff --git a/python/CMakeLists.txt b/python/CMakeLists.txt index 337b737cb..bf3347eaa 100644 --- a/python/CMakeLists.txt +++ b/python/CMakeLists.txt @@ -47,6 +47,7 @@ execute_process( include(${CMAKE_CURRENT_BINARY_DIR}/conan_paths.cmake) find_package(OpenSSL REQUIRED) +find_package(ZLIB REQUIRED) find_package(pybind11 REQUIRED) find_package(optional-lite REQUIRED) @@ -56,7 +57,7 @@ include_directories(${hyperonc_INCLUDE_DIRS}) pybind11_add_module(hyperonpy MODULE ./hyperonpy.cpp) target_link_libraries(hyperonpy PRIVATE "${hyperonc_STATIC_LIBRARY}") -target_link_libraries(hyperonpy PUBLIC OpenSSL::SSL OpenSSL::Crypto) +target_link_libraries(hyperonpy PUBLIC OpenSSL::SSL OpenSSL::Crypto ZLIB::ZLIB) set(PYTHONPATH "${CMAKE_CURRENT_SOURCE_DIR}") add_subdirectory(tests) diff --git a/python/install-hyperonc.sh b/python/install-hyperonc.sh index 63b7f97b4..2a631ac3e 100755 --- a/python/install-hyperonc.sh +++ b/python/install-hyperonc.sh @@ -25,7 +25,7 @@ if test "$AUDITWHEEL_POLICY" = "manylinux2014"; then yum install -y perl-devel fi -yum install -y openssl openssl-devel +yum install -y openssl-devel zlib-devel curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs > /tmp/rustup.sh sh /tmp/rustup.sh -y && rm /tmp/rustup.sh From 546e6cd43ef8fceb0a05476299e1d5056c949d23 Mon Sep 17 00:00:00 2001 From: Luke Peterson Date: Thu, 25 Apr 2024 12:30:12 +0900 Subject: [PATCH 23/77] Adding explicit header include paths for hyperonc headers when building C tests --- c/tests/CMakeLists.txt | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/c/tests/CMakeLists.txt b/c/tests/CMakeLists.txt index 08ff526fb..1cb441123 100644 --- a/c/tests/CMakeLists.txt +++ b/c/tests/CMakeLists.txt @@ -4,22 +4,28 @@ set(TEST_SOURCES c_space.c util.c ) + add_executable(check_atom check_atom.c ${TEST_SOURCES}) +include_directories(${HYPERONC_INCLUDE_DIR}) target_link_libraries(check_atom hyperonc-shared CONAN_PKG::libcheck) add_test(NAME check_atom COMMAND check_atom) add_executable(check_space check_space.c ${TEST_SOURCES}) +include_directories(${HYPERONC_INCLUDE_DIR}) target_link_libraries(check_space hyperonc-shared CONAN_PKG::libcheck) add_test(NAME check_space COMMAND check_space) add_executable(check_sexpr_parser check_sexpr_parser.c ${TEST_SOURCES}) +include_directories(${HYPERONC_INCLUDE_DIR}) target_link_libraries(check_sexpr_parser hyperonc-shared CONAN_PKG::libcheck) add_test(NAME check_sexpr_parser COMMAND check_sexpr_parser) add_executable(check_types check_types.c ${TEST_SOURCES}) +include_directories(${HYPERONC_INCLUDE_DIR}) target_link_libraries(check_types hyperonc-shared CONAN_PKG::libcheck) add_test(NAME check_types COMMAND check_types) add_executable(check_runner check_runner.c ${TEST_SOURCES}) +include_directories(${HYPERONC_INCLUDE_DIR}) target_link_libraries(check_runner hyperonc-shared CONAN_PKG::libcheck) add_test(NAME check_runner COMMAND check_runner) From 24fce25fa5643a92b373a8012b62350b47a577ce Mon Sep 17 00:00:00 2001 From: Luke Peterson Date: Thu, 25 Apr 2024 12:40:07 +0900 Subject: [PATCH 24/77] Changing include pathing in C tests --- c/tests/CMakeLists.txt | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/c/tests/CMakeLists.txt b/c/tests/CMakeLists.txt index 1cb441123..2fe11ebda 100644 --- a/c/tests/CMakeLists.txt +++ b/c/tests/CMakeLists.txt @@ -6,26 +6,26 @@ set(TEST_SOURCES ) add_executable(check_atom check_atom.c ${TEST_SOURCES}) -include_directories(${HYPERONC_INCLUDE_DIR}) +include_directories(${HYPERONC_TARGET_DIR}) target_link_libraries(check_atom hyperonc-shared CONAN_PKG::libcheck) add_test(NAME check_atom COMMAND check_atom) add_executable(check_space check_space.c ${TEST_SOURCES}) -include_directories(${HYPERONC_INCLUDE_DIR}) +include_directories(${HYPERONC_TARGET_DIR}) target_link_libraries(check_space hyperonc-shared CONAN_PKG::libcheck) add_test(NAME check_space COMMAND check_space) add_executable(check_sexpr_parser check_sexpr_parser.c ${TEST_SOURCES}) -include_directories(${HYPERONC_INCLUDE_DIR}) +include_directories(${HYPERONC_TARGET_DIR}) target_link_libraries(check_sexpr_parser hyperonc-shared CONAN_PKG::libcheck) add_test(NAME check_sexpr_parser COMMAND check_sexpr_parser) add_executable(check_types check_types.c ${TEST_SOURCES}) -include_directories(${HYPERONC_INCLUDE_DIR}) +include_directories(${HYPERONC_TARGET_DIR}) target_link_libraries(check_types hyperonc-shared CONAN_PKG::libcheck) add_test(NAME check_types COMMAND check_types) add_executable(check_runner check_runner.c ${TEST_SOURCES}) -include_directories(${HYPERONC_INCLUDE_DIR}) +include_directories(${HYPERONC_TARGET_DIR}) target_link_libraries(check_runner hyperonc-shared CONAN_PKG::libcheck) add_test(NAME check_runner COMMAND check_runner) From 1671c56d10f3f5288e9c89491cd23ba5bc399c1b Mon Sep 17 00:00:00 2001 From: Luke Peterson Date: Thu, 25 Apr 2024 12:49:37 +0900 Subject: [PATCH 25/77] Linking C tests by library path --- c/tests/CMakeLists.txt | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/c/tests/CMakeLists.txt b/c/tests/CMakeLists.txt index 2fe11ebda..d8ecbc2b8 100644 --- a/c/tests/CMakeLists.txt +++ b/c/tests/CMakeLists.txt @@ -7,25 +7,25 @@ set(TEST_SOURCES add_executable(check_atom check_atom.c ${TEST_SOURCES}) include_directories(${HYPERONC_TARGET_DIR}) -target_link_libraries(check_atom hyperonc-shared CONAN_PKG::libcheck) +target_link_libraries(check_atom ${HYPERONC_SHARED_LIB_PATH} CONAN_PKG::libcheck) add_test(NAME check_atom COMMAND check_atom) add_executable(check_space check_space.c ${TEST_SOURCES}) include_directories(${HYPERONC_TARGET_DIR}) -target_link_libraries(check_space hyperonc-shared CONAN_PKG::libcheck) +target_link_libraries(check_space ${HYPERONC_SHARED_LIB_PATH} CONAN_PKG::libcheck) add_test(NAME check_space COMMAND check_space) add_executable(check_sexpr_parser check_sexpr_parser.c ${TEST_SOURCES}) include_directories(${HYPERONC_TARGET_DIR}) -target_link_libraries(check_sexpr_parser hyperonc-shared CONAN_PKG::libcheck) +target_link_libraries(check_sexpr_parser ${HYPERONC_SHARED_LIB_PATH} CONAN_PKG::libcheck) add_test(NAME check_sexpr_parser COMMAND check_sexpr_parser) add_executable(check_types check_types.c ${TEST_SOURCES}) include_directories(${HYPERONC_TARGET_DIR}) -target_link_libraries(check_types hyperonc-shared CONAN_PKG::libcheck) +target_link_libraries(check_types ${HYPERONC_SHARED_LIB_PATH} CONAN_PKG::libcheck) add_test(NAME check_types COMMAND check_types) add_executable(check_runner check_runner.c ${TEST_SOURCES}) include_directories(${HYPERONC_TARGET_DIR}) -target_link_libraries(check_runner hyperonc-shared CONAN_PKG::libcheck) +target_link_libraries(check_runner ${HYPERONC_SHARED_LIB_PATH} CONAN_PKG::libcheck) add_test(NAME check_runner COMMAND check_runner) From f13c053654fd0e74a2ecd47773721dad611dbab9 Mon Sep 17 00:00:00 2001 From: Luke Peterson Date: Thu, 25 Apr 2024 13:08:36 +0900 Subject: [PATCH 26/77] Reverting test cmake to use target rather than variable, and disabling BUILD_SHARED_LIBS under python wheel --- c/tests/CMakeLists.txt | 10 +++++----- python/install-hyperonc.sh | 4 +++- 2 files changed, 8 insertions(+), 6 deletions(-) diff --git a/c/tests/CMakeLists.txt b/c/tests/CMakeLists.txt index d8ecbc2b8..2fe11ebda 100644 --- a/c/tests/CMakeLists.txt +++ b/c/tests/CMakeLists.txt @@ -7,25 +7,25 @@ set(TEST_SOURCES add_executable(check_atom check_atom.c ${TEST_SOURCES}) include_directories(${HYPERONC_TARGET_DIR}) -target_link_libraries(check_atom ${HYPERONC_SHARED_LIB_PATH} CONAN_PKG::libcheck) +target_link_libraries(check_atom hyperonc-shared CONAN_PKG::libcheck) add_test(NAME check_atom COMMAND check_atom) add_executable(check_space check_space.c ${TEST_SOURCES}) include_directories(${HYPERONC_TARGET_DIR}) -target_link_libraries(check_space ${HYPERONC_SHARED_LIB_PATH} CONAN_PKG::libcheck) +target_link_libraries(check_space hyperonc-shared CONAN_PKG::libcheck) add_test(NAME check_space COMMAND check_space) add_executable(check_sexpr_parser check_sexpr_parser.c ${TEST_SOURCES}) include_directories(${HYPERONC_TARGET_DIR}) -target_link_libraries(check_sexpr_parser ${HYPERONC_SHARED_LIB_PATH} CONAN_PKG::libcheck) +target_link_libraries(check_sexpr_parser hyperonc-shared CONAN_PKG::libcheck) add_test(NAME check_sexpr_parser COMMAND check_sexpr_parser) add_executable(check_types check_types.c ${TEST_SOURCES}) include_directories(${HYPERONC_TARGET_DIR}) -target_link_libraries(check_types ${HYPERONC_SHARED_LIB_PATH} CONAN_PKG::libcheck) +target_link_libraries(check_types hyperonc-shared CONAN_PKG::libcheck) add_test(NAME check_types COMMAND check_types) add_executable(check_runner check_runner.c ${TEST_SOURCES}) include_directories(${HYPERONC_TARGET_DIR}) -target_link_libraries(check_runner ${HYPERONC_SHARED_LIB_PATH} CONAN_PKG::libcheck) +target_link_libraries(check_runner hyperonc-shared CONAN_PKG::libcheck) add_test(NAME check_runner COMMAND check_runner) diff --git a/python/install-hyperonc.sh b/python/install-hyperonc.sh index 2a631ac3e..c7c485393 100755 --- a/python/install-hyperonc.sh +++ b/python/install-hyperonc.sh @@ -46,7 +46,9 @@ git reset --hard FETCH_HEAD mkdir -p ${HOME}/hyperonc/c/build cd ${HOME}/hyperonc/c/build # Rust doesn't support building shared libraries under musllinux environment -cmake -DBUILD_SHARED_LIBS=OFF -DCMAKE_BUILD_TYPE=Release .. +# cmake -DBUILD_SHARED_LIBS=OFF -DCMAKE_BUILD_TYPE=Release .. +# NOTE: Need to discsuss with Vitaly, because C tests now use shared library +cmake -DBUILD_SHARED_LIBS=ON -DCMAKE_BUILD_TYPE=Release .. make make check make install From aa253503a5b2ff2ad4c989263c0e59885e5f84b5 Mon Sep 17 00:00:00 2001 From: Luke Peterson Date: Thu, 25 Apr 2024 14:00:59 +0900 Subject: [PATCH 27/77] Installing brew, zlib, and openssl on Mac --- python/install-hyperonc.sh | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) diff --git a/python/install-hyperonc.sh b/python/install-hyperonc.sh index c7c485393..52d513037 100755 --- a/python/install-hyperonc.sh +++ b/python/install-hyperonc.sh @@ -20,12 +20,23 @@ done echo "hyperonc repository URL $HYPERONC_URL" echo "hyperonc revision $HYPERONC_REV" +os_type=$(uname) + # This is to build subunit from Conan on CentOS based manylinux images. if test "$AUDITWHEEL_POLICY" = "manylinux2014"; then - yum install -y perl-devel + yum install -y perl-devel openssl-devel zlib-devel fi -yum install -y openssl-devel zlib-devel +# Install OpenSSL and Zlib on a Mac +if [ "$os_type" = "Darwin" ]; then + # Install Homebrew if it's not already present + if ! command -v brew >/dev/null 2>&1; then + /bin/bash -c "$(curl -fsSL https://raw.githubusercontent.com/Homebrew/install/HEAD/install.sh)" + fi + + brew install zlib + brew install openssl +fi curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs > /tmp/rustup.sh sh /tmp/rustup.sh -y && rm /tmp/rustup.sh @@ -47,7 +58,8 @@ mkdir -p ${HOME}/hyperonc/c/build cd ${HOME}/hyperonc/c/build # Rust doesn't support building shared libraries under musllinux environment # cmake -DBUILD_SHARED_LIBS=OFF -DCMAKE_BUILD_TYPE=Release .. -# NOTE: Need to discsuss with Vitaly, because C tests now use shared library +# QUESTION: Need to discsuss with Vitaly, because C tests now uses shared library, but +# the "Build wheels" step succeeds, presumably because only manylinux flavors are included cmake -DBUILD_SHARED_LIBS=ON -DCMAKE_BUILD_TYPE=Release .. make make check From ff3c0074c1bcb24e37ffcc2c373168de7ce54e4d Mon Sep 17 00:00:00 2001 From: Luke Peterson Date: Thu, 25 Apr 2024 15:23:28 +0900 Subject: [PATCH 28/77] Installing Dependency Libs in release-python github action --- .github/workflows/release-python.yml | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/.github/workflows/release-python.yml b/.github/workflows/release-python.yml index 32e82e4cb..bbbad3a2c 100644 --- a/.github/workflows/release-python.yml +++ b/.github/workflows/release-python.yml @@ -32,6 +32,18 @@ jobs: with: python-version: "3.8" + - name: Install Dependency Libs (macOS) + if: startsWith(matrix.os, 'macos') + run: | + brew install openssl + run: brew install zlib + + - name: Install Dependency Libs (Ubuntu) + if: startsWith(matrix.os, 'ubuntu') + run: | + sudo apt-get update && sudo apt-get install -y openssl libssl-dev + sudo apt-get install -y zlib1g-dev + - run: | echo "REF_NAME=${{github.ref_name}}" | tee -a $GITHUB_ENV echo "EVENT_NAME=${{github.event_name}}" | tee -a $GITHUB_ENV From e7e5188ac99898ddc727403c1c2cb7328923bca6 Mon Sep 17 00:00:00 2001 From: Luke Peterson Date: Thu, 25 Apr 2024 15:29:18 +0900 Subject: [PATCH 29/77] Fixing typo in githib CI action --- .github/workflows/release-python.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/release-python.yml b/.github/workflows/release-python.yml index bbbad3a2c..9d4f6b32d 100644 --- a/.github/workflows/release-python.yml +++ b/.github/workflows/release-python.yml @@ -35,14 +35,14 @@ jobs: - name: Install Dependency Libs (macOS) if: startsWith(matrix.os, 'macos') run: | - brew install openssl - run: brew install zlib + brew install zlib + brew install openssl@3 - name: Install Dependency Libs (Ubuntu) if: startsWith(matrix.os, 'ubuntu') run: | - sudo apt-get update && sudo apt-get install -y openssl libssl-dev sudo apt-get install -y zlib1g-dev + sudo apt-get update && sudo apt-get install -y openssl libssl-dev - run: | echo "REF_NAME=${{github.ref_name}}" | tee -a $GITHUB_ENV From ae7f838d3b84687f775bdbd8d61f5d59c7f6aff2 Mon Sep 17 00:00:00 2001 From: Luke Peterson Date: Fri, 26 Apr 2024 12:03:53 +0900 Subject: [PATCH 30/77] Adding platform-specific libraries for Mac and cleaning up experiments fetching OpenSSL explicitly --- .github/workflows/release-python.yml | 12 ------------ python/CMakeLists.txt | 7 ++++++- python/install-hyperonc.sh | 11 ----------- 3 files changed, 6 insertions(+), 24 deletions(-) diff --git a/.github/workflows/release-python.yml b/.github/workflows/release-python.yml index 9d4f6b32d..32e82e4cb 100644 --- a/.github/workflows/release-python.yml +++ b/.github/workflows/release-python.yml @@ -32,18 +32,6 @@ jobs: with: python-version: "3.8" - - name: Install Dependency Libs (macOS) - if: startsWith(matrix.os, 'macos') - run: | - brew install zlib - brew install openssl@3 - - - name: Install Dependency Libs (Ubuntu) - if: startsWith(matrix.os, 'ubuntu') - run: | - sudo apt-get install -y zlib1g-dev - sudo apt-get update && sudo apt-get install -y openssl libssl-dev - - run: | echo "REF_NAME=${{github.ref_name}}" | tee -a $GITHUB_ENV echo "EVENT_NAME=${{github.event_name}}" | tee -a $GITHUB_ENV diff --git a/python/CMakeLists.txt b/python/CMakeLists.txt index bf3347eaa..862480822 100644 --- a/python/CMakeLists.txt +++ b/python/CMakeLists.txt @@ -55,8 +55,13 @@ include_directories(${nonstd_INCLUDE_DIRS}) find_package(hyperonc REQUIRED HINTS ${HYPERONC_INSTALL_PREFIX}) include_directories(${hyperonc_INCLUDE_DIRS}) +if(APPLE) + # macOS specific libraries + set(APPEND PLATFORM_LIBRARIES "-framework CoreFoundation" "-framework Security") +endif() + pybind11_add_module(hyperonpy MODULE ./hyperonpy.cpp) -target_link_libraries(hyperonpy PRIVATE "${hyperonc_STATIC_LIBRARY}") +target_link_libraries(hyperonpy PRIVATE "${hyperonc_STATIC_LIBRARY}" ${PLATFORM_LIBRARIES}) target_link_libraries(hyperonpy PUBLIC OpenSSL::SSL OpenSSL::Crypto ZLIB::ZLIB) set(PYTHONPATH "${CMAKE_CURRENT_SOURCE_DIR}") diff --git a/python/install-hyperonc.sh b/python/install-hyperonc.sh index 52d513037..f606cf5a2 100755 --- a/python/install-hyperonc.sh +++ b/python/install-hyperonc.sh @@ -27,17 +27,6 @@ if test "$AUDITWHEEL_POLICY" = "manylinux2014"; then yum install -y perl-devel openssl-devel zlib-devel fi -# Install OpenSSL and Zlib on a Mac -if [ "$os_type" = "Darwin" ]; then - # Install Homebrew if it's not already present - if ! command -v brew >/dev/null 2>&1; then - /bin/bash -c "$(curl -fsSL https://raw.githubusercontent.com/Homebrew/install/HEAD/install.sh)" - fi - - brew install zlib - brew install openssl -fi - curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs > /tmp/rustup.sh sh /tmp/rustup.sh -y && rm /tmp/rustup.sh export PATH="${PATH}:${HOME}/.cargo/bin" From 90a0a7178e92be835e413583c6e79b5fddc2033c Mon Sep 17 00:00:00 2001 From: Luke Peterson Date: Fri, 26 Apr 2024 12:31:53 +0900 Subject: [PATCH 31/77] Oops. Typo in Cmake file. --- python/CMakeLists.txt | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/python/CMakeLists.txt b/python/CMakeLists.txt index 862480822..596e2dd27 100644 --- a/python/CMakeLists.txt +++ b/python/CMakeLists.txt @@ -57,12 +57,12 @@ include_directories(${hyperonc_INCLUDE_DIRS}) if(APPLE) # macOS specific libraries - set(APPEND PLATFORM_LIBRARIES "-framework CoreFoundation" "-framework Security") + set(PLATFORM_LIBRARIES "-framework CoreFoundation" "-framework Security") endif() pybind11_add_module(hyperonpy MODULE ./hyperonpy.cpp) -target_link_libraries(hyperonpy PRIVATE "${hyperonc_STATIC_LIBRARY}" ${PLATFORM_LIBRARIES}) -target_link_libraries(hyperonpy PUBLIC OpenSSL::SSL OpenSSL::Crypto ZLIB::ZLIB) +target_link_libraries(hyperonpy PRIVATE "${hyperonc_STATIC_LIBRARY}") +target_link_libraries(hyperonpy PUBLIC ${PLATFORM_LIBRARIES} OpenSSL::SSL OpenSSL::Crypto ZLIB::ZLIB) set(PYTHONPATH "${CMAKE_CURRENT_SOURCE_DIR}") add_subdirectory(tests) From 4e20ae997262ec1ac7c653da1e1e16fbb2f9cf98 Mon Sep 17 00:00:00 2001 From: Luke Peterson Date: Fri, 26 Apr 2024 14:31:11 +0900 Subject: [PATCH 32/77] Adding compile-time switch to enable git support, disabled by default --- lib/Cargo.toml | 6 ++-- lib/src/metta/runner/pkg_mgmt/git_cache.rs | 42 ++++++++++++++++++++-- 2 files changed, 43 insertions(+), 5 deletions(-) diff --git a/lib/Cargo.toml b/lib/Cargo.toml index b13a9724f..c34d89b3c 100644 --- a/lib/Cargo.toml +++ b/lib/Cargo.toml @@ -16,10 +16,9 @@ dyn-fmt = "0.4.0" # pkg_mgmt deps xxhash-rust = {version="0.8.7", features=["xxh3"], optional=true } -git2 = { version="0.15.0", features=["vendored-libgit2"], optional=true } -libz-sys = { version="1.1.16", optional=true } serde = { version="1.0.198", features = ["derive"], optional=true } serde_json = { version="1.0.116", optional=true } +git2 = { version="0.15.0", features=["vendored-libgit2"], optional=true } [lib] name = "hyperon" @@ -33,4 +32,5 @@ default = ["pkg_mgmt"] minimal = [] # enables minimal MeTTa interpreter variable_operation = [] # enables evaluation of the expressions which have # a variable on the first position -pkg_mgmt = ["xxhash-rust", "git2", "serde", "serde_json", "libz-sys"] +git = ["git2", "pkg_mgmt"] +pkg_mgmt = ["xxhash-rust", "serde", "serde_json"] diff --git a/lib/src/metta/runner/pkg_mgmt/git_cache.rs b/lib/src/metta/runner/pkg_mgmt/git_cache.rs index 040770386..a1189334b 100644 --- a/lib/src/metta/runner/pkg_mgmt/git_cache.rs +++ b/lib/src/metta/runner/pkg_mgmt/git_cache.rs @@ -7,13 +7,19 @@ //! use std::path::{Path, PathBuf}; +#[cfg(feature = "git")] use std::time::{SystemTime, Duration, UNIX_EPOCH}; +#[cfg(feature = "git")] use std::fs::{File, read_to_string}; +#[cfg(feature = "git")] use std::io::prelude::*; use xxhash_rust::xxh3::xxh3_64; + +#[cfg(feature = "git")] use git2::{*, build::*}; +#[cfg(feature = "git")] const TIMESTAMP_FILENAME: &'static str = "_timestamp_"; /// Indicates the desired behavior for updating the locally-cached repo @@ -22,6 +28,7 @@ pub enum UpdateMode { /// Clones the repo if it doesn't exist, otherwise leaves it alone PullIfMissing, /// Pulls the latest from the remote repo. Fails if the remote is unavailable + #[allow(dead_code)] PullLatest, /// Attempts to pull from the remote repo. Continues with the existing repo if /// the remote is unavailable @@ -34,7 +41,9 @@ pub enum UpdateMode { #[derive(Debug)] pub struct CachedRepo { name: String, + #[allow(dead_code)] url: String, + #[allow(dead_code)] branch: Option, repo_local_path: PathBuf, local_path: PathBuf, @@ -63,13 +72,14 @@ impl CachedRepo { } else { name.to_string() }; - let repo_local_path = caches_dir.join(cache_name).join(local_filename); + let this_cache_dir = caches_dir.join(cache_name); + let repo_local_path = this_cache_dir.join(local_filename); let local_path = match subdir { Some(subdir) => repo_local_path.join(subdir), None => repo_local_path.clone() }; - std::fs::create_dir_all(&repo_local_path).map_err(|e| e.to_string())?; + std::fs::create_dir_all(&this_cache_dir).map_err(|e| e.to_string())?; Ok(Self { name: name.to_string(), @@ -86,6 +96,7 @@ impl CachedRepo { pub fn update(&self, mode: UpdateMode) -> Result { //TODO: If there is a subdir then we can perform a sparse checkout and avoid cloning unnecessary data + #[cfg(feature = "git")] match Repository::open(self.repo_local_path()) { //We have an existing repo on disk @@ -138,9 +149,33 @@ impl CachedRepo { } }, } + + #[cfg(not(feature = "git"))] + self.update_repo_no_git_support(mode) + } + + //Internal function to provide appropriate status / errors if we don't have git support enabled + #[cfg(not(feature = "git"))] + fn update_repo_no_git_support(&self, mode: UpdateMode) -> Result { + let err_msg = || format!("Cannot update repo: {}; hyperon built without git support", self.name); + match mode { + UpdateMode::PullLatest => { + return Err(err_msg()); + } + UpdateMode::TryPullLatest => { + log::warn!("{}", err_msg()); + }, + _ => {} + } + if self.repo_local_path().exists() { + Ok(false) + } else { + Err(err_msg()) + } } /// Internal method to get the branch name + #[cfg(feature = "git")] fn get_branch(&self, remote: &Remote) -> Result { Ok(match &self.branch { Some(b) => b.to_owned(), @@ -151,6 +186,7 @@ impl CachedRepo { } /// Internal method to perform a merge. Intended to approximate the `git merge` command-line behavior + #[cfg(feature = "git")] fn merge(&self, repo: &Repository, branch: &str, incomming_commit_ref: &Reference) -> Result<(), git2::Error> { let annotated_commit = repo.reference_to_annotated_commit(incomming_commit_ref)?; let analysis = repo.merge_analysis(&[&annotated_commit])?; @@ -186,6 +222,7 @@ impl CachedRepo { } /// Internal function to write the timestamp file, with the value of "now" + #[cfg(feature = "git")] fn write_timestamp_file(&self) -> Result<(), String> { let duration_since_epoch = SystemTime::now().duration_since(UNIX_EPOCH).unwrap(); let file_path = self.repo_local_path().join(TIMESTAMP_FILENAME); @@ -196,6 +233,7 @@ impl CachedRepo { /// Returns `true` if `mode == TryPullIfOlderThan`, and the timestamp file indicates /// that amount of time has elapsed. Otherwise returns `false` + #[cfg(feature = "git")] fn check_timestamp(&self, mode: UpdateMode) -> bool { match mode { UpdateMode::TryPullIfOlderThan(secs) => { From c147b089efc1e1a8db0254a6693e4b0229a576d7 Mon Sep 17 00:00:00 2001 From: Luke Peterson Date: Fri, 26 Apr 2024 14:48:55 +0900 Subject: [PATCH 33/77] Bumping git2 crate to version 0.18.3. I only had it at 15 to align with version served by Conan --- lib/Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/Cargo.toml b/lib/Cargo.toml index c34d89b3c..fe1b34b9b 100644 --- a/lib/Cargo.toml +++ b/lib/Cargo.toml @@ -18,7 +18,7 @@ dyn-fmt = "0.4.0" xxhash-rust = {version="0.8.7", features=["xxh3"], optional=true } serde = { version="1.0.198", features = ["derive"], optional=true } serde_json = { version="1.0.116", optional=true } -git2 = { version="0.15.0", features=["vendored-libgit2"], optional=true } +git2 = { version="0.18.3", features=["vendored-libgit2"], optional=true } [lib] name = "hyperon" From fff9c25489ef979b00d7a34938429742e6f6533d Mon Sep 17 00:00:00 2001 From: Luke Peterson Date: Sat, 27 Apr 2024 11:47:33 +0900 Subject: [PATCH 34/77] Adding versioning support for packages. Still haven't merged pre-fetch & sub-module version requirements --- c/src/metta.rs | 15 +- lib/Cargo.toml | 3 +- lib/src/metta/runner/environment.rs | 21 +- lib/src/metta/runner/modules/mod.rs | 27 +- lib/src/metta/runner/pkg_mgmt/catalog.rs | 297 +++++++++++-------- lib/src/metta/runner/pkg_mgmt/git_cache.rs | 2 +- lib/src/metta/runner/pkg_mgmt/git_catalog.rs | 44 ++- lib/src/metta/runner/stdlib.rs | 4 +- python/install-hyperonc.sh | 4 +- 9 files changed, 255 insertions(+), 162 deletions(-) diff --git a/c/src/metta.rs b/c/src/metta.rs index fb3edbdf5..acc02d369 100644 --- a/c/src/metta.rs +++ b/c/src/metta.rs @@ -1566,15 +1566,16 @@ pub extern "C" fn env_builder_set_config_dir(builder: *mut env_builder_t, path: *builder_arg_ref = builder.into(); } -/// @brief Configures the environment to create the config dir if it doesn't already exist +/// @brief Sets whether the config dir should be created if it doesn't already exist /// @ingroup environment_group /// @param[in] builder A pointer to the in-process environment builder state +/// @param[in] should_create Whether the directory will be created. Defaults to `true` /// #[no_mangle] -pub extern "C" fn env_builder_create_config_dir(builder: *mut env_builder_t) { +pub extern "C" fn env_builder_create_config_dir(builder: *mut env_builder_t, should_create: bool) { let builder_arg_ref = unsafe{ &mut *builder }; let builder = core::mem::replace(builder_arg_ref, env_builder_t::null()).into_inner(); - let builder = builder.create_config_dir(); + let builder = builder.set_create_config_dir(should_create); *builder_arg_ref = builder.into(); } @@ -1738,7 +1739,8 @@ pub extern "C" fn module_id_is_valid(mod_id: *const module_id_t) -> bool { /// #[no_mangle] pub extern "C" fn module_descriptor_new(name: *const c_char) -> module_descriptor_t { - ModuleDescriptor::new(cstr_as_str(name).to_string()).into() + //TODO-NEXT: We should probably take a version string, and parse it into a semver version + ModuleDescriptor::new(cstr_as_str(name).to_string(), None).into() } /// @brief Creates a new module_descriptor_t that represents the error attempting to interpret a module @@ -1906,7 +1908,10 @@ impl FsModuleFormat for CFsModFmtLoader { let result_context = (api.try_path)(self.payload, path_c_string.as_ptr(), mod_name_c_string.as_ptr()); if !result_context.is_null() { - let descriptor = ModuleDescriptor::new_with_path_and_fmt_id(mod_name.to_string(), path, self.fmt_id); + //TODO-NEXT. We want to provide a way for the loader to support loading a PkgInfo, and also pass + // the version from that PkgInfo when the new descriptor is created + + let descriptor = ModuleDescriptor::new_with_path_and_fmt_id(mod_name.to_string(), None, path, self.fmt_id); let mut new_loader = self.clone(); new_loader.callback_context = result_context; diff --git a/lib/Cargo.toml b/lib/Cargo.toml index fe1b34b9b..a64912a9c 100644 --- a/lib/Cargo.toml +++ b/lib/Cargo.toml @@ -18,6 +18,7 @@ dyn-fmt = "0.4.0" xxhash-rust = {version="0.8.7", features=["xxh3"], optional=true } serde = { version="1.0.198", features = ["derive"], optional=true } serde_json = { version="1.0.116", optional=true } +semver = { version="1.0", features = ["serde"], optional=true } git2 = { version="0.18.3", features=["vendored-libgit2"], optional=true } [lib] @@ -33,4 +34,4 @@ minimal = [] # enables minimal MeTTa interpreter variable_operation = [] # enables evaluation of the expressions which have # a variable on the first position git = ["git2", "pkg_mgmt"] -pkg_mgmt = ["xxhash-rust", "serde", "serde_json"] +pkg_mgmt = ["xxhash-rust", "serde", "serde_json", "semver"] diff --git a/lib/src/metta/runner/environment.rs b/lib/src/metta/runner/environment.rs index 059f68f1a..91595ab29 100644 --- a/lib/src/metta/runner/environment.rs +++ b/lib/src/metta/runner/environment.rs @@ -136,7 +136,7 @@ impl EnvBuilder { Self { env: Environment::new(), no_cfg_dir: false, - create_cfg_dir: false, + create_cfg_dir: true, #[cfg(feature = "pkg_mgmt")] proto_catalogs: vec![], #[cfg(feature = "pkg_mgmt")] @@ -158,8 +158,7 @@ impl EnvBuilder { self } - /// Sets the `config_dir` that the environment will load. A directory at the specified path will - /// be created its contents populated with default values, if one does not already exist + /// Sets the `config_dir` that the environment will load pub fn set_config_dir(mut self, config_dir: &Path) -> Self { self.env.config_dir = Some(config_dir.into()); if self.no_cfg_dir { @@ -168,13 +167,13 @@ impl EnvBuilder { self } - /// Configures the environment to create a config directory with default config files, if no directory is found + /// Sets whether or not a config directory with default config files will be created, if no directory is found /// - /// NOTE: If the config directory exists but some config files are missing, default files will not be created. - pub fn create_config_dir(mut self) -> Self { - self.create_cfg_dir = true; - if self.no_cfg_dir { - panic!("Fatal Error: create_config_dir is incompatible with set_no_config_dir"); + /// NOTE: If the config directory exists but some config files are missing, default files will *not* be created. + pub fn set_create_config_dir(mut self, should_create: bool) -> Self { + self.create_cfg_dir = should_create; + if self.no_cfg_dir && should_create { + panic!("Fatal Error: set_create_config_dir(true) is incompatible with set_no_config_dir"); } self } @@ -182,9 +181,7 @@ impl EnvBuilder { /// Configures the Environment not to load nor create any config files pub fn set_no_config_dir(mut self) -> Self { self.no_cfg_dir = true; - if self.create_cfg_dir { - panic!("Fatal Error: set_no_config_dir is incompatible with create_config_dir"); - } + self.create_cfg_dir = false; if self.env.config_dir.is_some() { panic!("Fatal Error: set_config_dir is incompatible with set_no_config_dir"); } diff --git a/lib/src/metta/runner/modules/mod.rs b/lib/src/metta/runner/modules/mod.rs index 500424fb4..6722bf9f2 100644 --- a/lib/src/metta/runner/modules/mod.rs +++ b/lib/src/metta/runner/modules/mod.rs @@ -51,6 +51,8 @@ impl ModId { } } +pub(crate) static DEFAULT_PKG_INFO: OnceLock = OnceLock::new(); + /// Contains state associated with a loaded MeTTa module #[derive(Debug)] pub struct MettaMod { @@ -60,8 +62,6 @@ pub struct MettaMod { tokenizer: Shared, imported_deps: Mutex>, loader: Option>, - #[cfg(feature = "pkg_mgmt")] - pkg_info: PkgInfo, } impl MettaMod { @@ -85,8 +85,6 @@ impl MettaMod { imported_deps: Mutex::new(HashMap::new()), resource_dir, loader: None, - #[cfg(feature = "pkg_mgmt")] - pkg_info: PkgInfo::default(), }; // Load the base tokens for the module's new Tokenizer @@ -302,12 +300,14 @@ impl MettaMod { #[cfg(feature = "pkg_mgmt")] pub fn pkg_info(&self) -> &PkgInfo { - &self.pkg_info - } - - #[cfg(feature = "pkg_mgmt")] - pub fn pkg_info_mut(&mut self) -> &mut PkgInfo { - &mut self.pkg_info + let default_pkg_info = DEFAULT_PKG_INFO.get_or_init(|| PkgInfo::default()); + match &self.loader { + Some(loader) => match loader.pkg_info() { + Some(pkg_info) => pkg_info, + _ => default_pkg_info + }, + None => default_pkg_info + } } pub fn space(&self) -> &DynSpace { @@ -602,6 +602,11 @@ pub trait ModuleLoader: std::fmt::Debug + Send + Sync { /// [RunContext::load_module], or any other method that leads to the loading of modules fn load(&self, context: &mut RunContext) -> Result<(), String>; + /// A function to access the [PkgInfo] struct of meta-data associated with a module + fn pkg_info(&self) -> Option<&PkgInfo> { + None + } + /// Returns a data blob containing a given named resource belonging to a module fn get_resource(&self, _res_key: ResourceKey) -> Result, String> { Err("resource not found".to_string()) @@ -618,6 +623,8 @@ pub enum ResourceKey<'a> { /// NOTE: there is no guarantee the code in the `module.metta` resource will work outside /// the module's context. This use case must be supported by each module individually. MainMettaSrc, + /// A [semver compliant](https://semver.org) version string + Version, /// A list of people or organizations responsible for the module **TODO** Authors, /// A short description of the module **TODO** diff --git a/lib/src/metta/runner/pkg_mgmt/catalog.rs b/lib/src/metta/runner/pkg_mgmt/catalog.rs index 713da164e..9ce227a57 100644 --- a/lib/src/metta/runner/pkg_mgmt/catalog.rs +++ b/lib/src/metta/runner/pkg_mgmt/catalog.rs @@ -82,8 +82,6 @@ use crate::metta::runner::{*, git_catalog::*}; use xxhash_rust::xxh3::xxh3_64; use serde::Deserialize; -pub(crate) const EXPLICIT_GIT_MOD_CACHE_DIR: &'static str = "git-modules"; - /// Implemented for types capable of locating MeTTa modules /// /// For example, `ModuleCatalog` would be an interface to a module respository, analogous to `PyPI` or @@ -108,6 +106,9 @@ pub trait ModuleCatalog: std::fmt::Debug + Send + Sync { // fn lookup_newest_within_version_range(name: &str, version_range: ) -> Option; //TODO: provide default implementation + //TODO-NEXT, add "prepare" function that consumes loader, and returns another one. This will + // allow us to pre-fetch modules + /// Returns a [ModuleLoader] for the specified module from the `ModuleCatalog` fn get_loader(&self, descriptor: &ModuleDescriptor) -> Result, String>; } @@ -115,23 +116,26 @@ pub trait ModuleCatalog: std::fmt::Debug + Send + Sync { /// The object responsible for locating and selecting dependency modules for each [MettaMod] /// /// This structure is conceptually analogous to the a `Cargo.toml` file for a given module. -#[derive(Clone, Debug, Default)] -//TODO: Use serde to deserialize a PkgInfo from an expression atom +#[derive(Clone, Debug, Default, Deserialize)] pub struct PkgInfo { /// The public name of the module. Should be composed of alpha-numeric characters with '-' and '_' /// characters allowed. Must not contain any other punctuation. - pub name: String, + pub name: Option, - //TODO: version field, to indicate the version of this module + // The version of this module + #[serde(default)] + pub version: Option, /// If `strict == true` then a dependency must be declared in the `PkgInfo`, otherwise a permissive /// version requirement will be assumed for any modules that are not explicitly declared + #[serde(default)] pub strict: bool, /// Entries mapping module names to requirements for each dependency sub-module /// /// A Duplicate entry for a given sub-module in the deps list is an error. + #[serde(default)] pub deps: HashMap, } @@ -147,7 +151,10 @@ pub struct DepEntry { #[serde(flatten)] git_location: ModuleGitLocation, - //TODO: field to indicate acceptable version range for dependency + /// An acceptable version of version bounds to satisfy the dependency. None means any version + /// acceptable + #[serde(default)] + pub version_req: Option } impl PkgInfo { @@ -171,7 +178,7 @@ impl PkgInfo { } //Get the module if it's specified with git keys - if let Some(pair) = entry.git_location.get_loader(context.metta.environment().fs_mod_formats(), context.metta.environment().caches_dir(), EXPLICIT_GIT_MOD_CACHE_DIR, mod_name, None)? { + if let Some(pair) = entry.git_location.get_loader(context.metta.environment().fs_mod_formats(), context.metta.environment().caches_dir(), EXPLICIT_GIT_MOD_CACHE, mod_name, None, None)? { return Ok(Some(pair)); } @@ -214,6 +221,17 @@ impl PkgInfo { Ok(None) } + /// Returns the version of the package + pub fn version(&self) -> Option<&semver::Version> { + self.version.as_ref() + } + /// Returns the version of the package as a [semver compliant](https://semver.org) string of bytes + pub fn version_bytes(&self) -> Result, String> { + match self.version() { + Some(ver) => Ok(format!("{ver}").into_bytes()), + None => Err("no version available".to_string()) + } + } } /// Internal function to get a loader for a module at a specific file system path, by trying each FsModuleFormat in order @@ -247,11 +265,12 @@ pub(crate) fn loader_for_module_at_path<'a, P: AsRef, FmtIter: Iterator Self { - Self {path: path.into() } + fn new(path: &Path, pkg_info: PkgInfo) -> Self { + Self {path: path.into(), pkg_info } } fn read_contents(&self) -> Result, String> { std::fs::read(&self.path) @@ -277,7 +296,8 @@ impl ModuleLoader for SingleFileModule { fn get_resource(&self, res_key: ResourceKey) -> Result, String> { match res_key { ResourceKey::MainMettaSrc => self.read_contents(), - _ => Err("unsupported resoruce key".to_string()) + ResourceKey::Version => self.pkg_info.version_bytes(), + _ => Err("unsupported resource key".to_string()) } } } @@ -291,11 +311,12 @@ impl ModuleLoader for SingleFileModule { #[derive(Debug)] pub(crate) struct DirModule { path: PathBuf, + pkg_info: PkgInfo, } impl DirModule { - fn new(path: &Path) -> Self { - Self {path: path.into() } + fn new(path: &Path, pkg_info: PkgInfo) -> Self { + Self { path: path.into(), pkg_info } } fn read_module_metta(&self) -> Option> { let module_metta_path = self.path.join("module.metta"); @@ -324,7 +345,8 @@ impl ModuleLoader for DirModule { fn get_resource(&self, res_key: ResourceKey) -> Result, String> { match res_key { ResourceKey::MainMettaSrc => self.read_module_metta().ok_or_else(|| format!("no module.metta file found in {} dir module", self.path.display())), - _ => Err("unsupported resoruce key".to_string()) + ResourceKey::Version => self.pkg_info.version_bytes(), + _ => Err("unsupported resource key".to_string()) } } } @@ -373,13 +395,14 @@ impl FsModuleFormat for SingleFileModuleFmt { None => path.file_stem().unwrap().to_str().unwrap(), //LP-TODO-NEXT: Unify the code to extract the mod-name from the file name between here and DirModuleFmt::try_path }; - //TODO: Add accessor for the module version here + //TODO: parse out the module version here, and pass it to new_with_path_and_fmt_id below //In a single-file module, the discriptor information will be embedded within the MeTTa code // Therefore, we need to parse the whole text of the module looking for a `_pkg-info` atom, // that we can then convert into a PkgInfo structure + let pkg_info = PkgInfo::default(); - let descriptor = ModuleDescriptor::new_with_path_and_fmt_id(mod_name.to_string(), path, SINGLE_FILE_MOD_FMT_ID); - let loader = Box::new(SingleFileModule::new(path)); + let descriptor = ModuleDescriptor::new_with_path_and_fmt_id(mod_name.to_string(), None, path, SINGLE_FILE_MOD_FMT_ID); + let loader = Box::new(SingleFileModule::new(path, pkg_info)); Some((loader, descriptor)) } else { None @@ -398,23 +421,41 @@ impl FsModuleFormat for DirModuleFmt { } fn try_path(&self, path: &Path, mod_name: Option<&str>) -> Option<(Box, ModuleDescriptor)> { if path.is_dir() { + + //First see if we can extract a [PkgInfo] from a `pkg-info.json` file + let mut pkg_info: Option = None; + let pkginfo_json_path = path.join("pkg-info.json"); + if pkginfo_json_path.exists() { + let file_contents = std::fs::read_to_string(&pkginfo_json_path).unwrap(); + pkg_info = Some(serde_json::from_str(&file_contents).unwrap()); + } + + //TODO: Also check for a `pkg-info.metta` file, as soon as I have implemented Atom-Serde + // Also try and parse a `_pkg-info` atom from the `module.metta` file if it's not in a dedicated file + + let pkg_info = pkg_info.unwrap_or_else(|| PkgInfo::default()); + + //Get the module name, first use the name provided. If none, then use the name from the + // pkg-info, and if that's also none, construct a module name from the file name let full_path; let mod_name = match mod_name { Some(mod_name) => mod_name, None => { - //LP-TODO-Next: I need to gracefully create a legal module name from the file name - // if the file name happens to contain characters that are illegal in a module name - full_path = path.canonicalize().unwrap(); - full_path.file_stem().unwrap().to_str().unwrap() + match &pkg_info.name { + Some(name) => name, + None => { + //LP-TODO-Next: I need to gracefully create a legal module name from the file name + // if the file name happens to contain characters that are illegal in a module name + full_path = path.canonicalize().unwrap(); + full_path.file_stem().unwrap().to_str().unwrap() + } + } }, }; - //LP-TODO-Next: Try and read the module version here - //If there is a `pkg-info.metta` file, information from that file will take precedence. - // Otherwise, try and parse a `_pkg-info` atom from the `module.metta` file - - let descriptor = ModuleDescriptor::new_with_path_and_fmt_id(mod_name.to_string(), path, DIR_MOD_FMT_ID); - let loader = Box::new(DirModule::new(path)); + let version = pkg_info.version.clone(); + let descriptor = ModuleDescriptor::new_with_path_and_fmt_id(mod_name.to_string(), version, path, DIR_MOD_FMT_ID); + let loader = Box::new(DirModule::new(path, pkg_info)); return Some((loader, descriptor)); } None @@ -517,21 +558,21 @@ fn visit_modules_in_dir_using_mod_formats(fmts: &[Box], dir_ pub struct ModuleDescriptor { name: String, uid: Option, - //TODO: version + version: Option, } impl ModuleDescriptor { /// Create a new ModuleDescriptor - pub fn new(name: String) -> Self { - Self { name, uid: None } + pub fn new(name: String, version: Option) -> Self { + Self { name, uid: None, version } } /// Create a new ModuleDescriptor - pub fn new_with_uid(name: String, uid: u64) -> Self { - Self { name, uid: Some(uid) } + pub fn new_with_uid(name: String, version: Option, uid: u64) -> Self { + Self { name, uid: Some(uid), version } } - pub fn new_with_ident_bytes_and_fmt_id(name: String, ident: &[u8], fmt_id: u64) -> Self { + pub fn new_with_ident_bytes_and_fmt_id(name: String, version: Option, ident: &[u8], fmt_id: u64) -> Self { let uid = xxh3_64(ident) ^ fmt_id; - ModuleDescriptor::new_with_uid(name, uid) + ModuleDescriptor::new_with_uid(name, version, uid) } /// Create a new ModuleDescriptor using a file system path and another unique id /// @@ -540,13 +581,17 @@ impl ModuleDescriptor { /// /// The purpose of the `fmt_id` is to ensure two different formats or catalogs don't generate /// the same ModuleDescriptor, but you can pass 0 if it doesn't matter - pub fn new_with_path_and_fmt_id(name: String, path: &Path, fmt_id: u64) -> Self { - Self::new_with_ident_bytes_and_fmt_id(name, path.as_os_str().as_encoded_bytes(), fmt_id) + pub fn new_with_path_and_fmt_id(name: String, version: Option, path: &Path, fmt_id: u64) -> Self { + Self::new_with_ident_bytes_and_fmt_id(name, version, path.as_os_str().as_encoded_bytes(), fmt_id) } /// Returns the name of the module represented by the ModuleDescriptor pub fn name(&self) -> &str { &self.name } + /// Returns the version of the module represented by the ModuleDescriptor + pub fn version(&self) -> Option<&semver::Version> { + self.version.as_ref() + } /// Returns `true` if the `ident_bytes` and `fmt_id` match what was used to create the descriptor pub fn ident_bytes_and_fmt_id_matches(&self, ident: &[u8], fmt_id: u64) -> bool { let uid = xxh3_64(ident) ^ fmt_id; @@ -576,107 +621,123 @@ pub fn mod_name_from_url(url: &str) -> Option { // TESTS //-=-+-=-=-+-=-=-+-=-=-+-=-=-+-=-=-+-=-=-+-=-=-+-=-=-+-=-=-+-=-=-+-=-=-+-=-=-+-=-=-+-=-=-+-=-=-+- -/// Bogus test catalog that returns a fake module in response to any query with a single capital letter -/// used by `recursive_submodule_import_test` -#[derive(Debug)] -struct TestCatalog; +#[cfg(test)] +mod tests { + use super::*; -impl ModuleCatalog for TestCatalog { - fn lookup(&self, name: &str) -> Vec { - if name.len() == 1 && name.chars().last().unwrap().is_uppercase() { - vec![ModuleDescriptor::new(name.to_string())] - } else { - vec![] + /// Bogus test catalog that returns a fake module in response to any query with a single capital letter + /// used by `recursive_submodule_import_test` + #[derive(Debug)] + struct TestCatalog; + + impl ModuleCatalog for TestCatalog { + fn lookup(&self, name: &str) -> Vec { + if name.len() == 1 && name.chars().last().unwrap().is_uppercase() { + vec![ModuleDescriptor::new(name.to_string(), None)] + } else { + vec![] + } + } + fn get_loader(&self, _descriptor: &ModuleDescriptor) -> Result, String> { + Ok(Box::new(TestCatalog)) } } - fn get_loader(&self, _descriptor: &ModuleDescriptor) -> Result, String> { - Ok(Box::new(TestCatalog)) - } -} -impl ModuleLoader for TestCatalog { - fn load(&self, context: &mut RunContext) -> Result<(), String> { - let space = DynSpace::new(GroundingSpace::new()); - context.init_self_module(space, None); - Ok(()) + impl ModuleLoader for TestCatalog { + fn load(&self, context: &mut RunContext) -> Result<(), String> { + let space = DynSpace::new(GroundingSpace::new()); + context.init_self_module(space, None); + Ok(()) + } } -} -/// This tests the core recursive sub-module loading code -#[test] -fn recursive_submodule_import_test() { + /// This tests the core recursive sub-module loading code + #[test] + fn recursive_submodule_import_test() { - //Make a new runner with the TestCatalog - let runner = Metta::new(Some(EnvBuilder::test_env().push_module_catalog(TestCatalog))); + //Make a new runner with the TestCatalog + let runner = Metta::new(Some(EnvBuilder::test_env().push_module_catalog(TestCatalog))); - //Now try loading an inner-module, and make sure it can recursively load all the needed parents - let result = runner.run(SExprParser::new("!(import! &self A:B:C)")); - assert_eq!(result, Ok(vec![vec![expr!()]])); + //Now try loading an inner-module, and make sure it can recursively load all the needed parents + let result = runner.run(SExprParser::new("!(import! &self A:B:C)")); + assert_eq!(result, Ok(vec![vec![expr!()]])); - //Test that each parent sub-module is indeed loaded - assert!(runner.get_module_by_name("A").is_ok()); - assert!(runner.get_module_by_name("A:B").is_ok()); - assert!(runner.get_module_by_name("A:B:C").is_ok()); + //Test that each parent sub-module is indeed loaded + assert!(runner.get_module_by_name("A").is_ok()); + assert!(runner.get_module_by_name("A:B").is_ok()); + assert!(runner.get_module_by_name("A:B:C").is_ok()); - //Test that we fail to load a module with an invalid parent, even if the module itself resolves - let _result = runner.run(SExprParser::new("!(import! &self a:B)")); - assert!(runner.get_module_by_name("a:B").is_err()); -} + //Test that we fail to load a module with an invalid parent, even if the module itself resolves + let _result = runner.run(SExprParser::new("!(import! &self a:B)")); + assert!(runner.get_module_by_name("a:B").is_err()); + } -// -//LP-TODO-NEXT, Next make sure the catalogs are able to do the recursive loading from the file system, -// using their working dirs. Maybe make this second test a C API test to get better coverage -// + // + //LP-TODO-NEXT, Next make sure the catalogs are able to do the recursive loading from the file system, + // using their working dirs. Maybe make this second test a C API test to get better coverage + // -//LP-TODO-NEXT, Add a test for loading a module from a DirCatalog by passing a name with an extension (ie. `my_mod.metta`) to `resolve`, -// and make sure the loaded module that comes back doesn't have the extension + //LP-TODO-NEXT, Add a test for loading a module from a DirCatalog by passing a name with an extension (ie. `my_mod.metta`) to `resolve`, + // and make sure the loaded module that comes back doesn't have the extension -#[derive(Debug)] -struct TestLoader; + #[derive(Debug)] + struct TestLoader { + pkg_info: PkgInfo, + } -impl ModuleLoader for TestLoader { - fn load(&self, context: &mut RunContext) -> Result<(), String> { - let space = DynSpace::new(GroundingSpace::new()); - context.init_self_module(space, None); - - //Set up the module [PkgInfo] so it knows to load a sub-module from git - let pkg_info = context.module_mut().unwrap().pkg_info_mut(); - pkg_info.name = "test-mod".to_string(); - pkg_info.deps.insert("metta-morph".to_string(), DepEntry{ - fs_path: None, - git_location: ModuleGitLocation { - //TODO: We probably want a smaller test repo - git_url: Some("https://github.com/trueagi-io/metta-morph/".to_string()), - git_branch: None, //Some("Hyperpose".to_string()), - git_subdir: None, - git_main_file: Some(PathBuf::from("mettamorph.metta")), - } - }); + impl TestLoader { + fn new() -> Self { + let mut pkg_info = PkgInfo::default(); + + //Set up the module [PkgInfo] so it knows to load a sub-module from git + pkg_info.name = Some("test-mod".to_string()); + pkg_info.deps.insert("metta-morph".to_string(), DepEntry{ + fs_path: None, + git_location: ModuleGitLocation { + //TODO: We probably want a smaller test repo + git_url: Some("https://github.com/trueagi-io/metta-morph/".to_string()), + git_branch: None, //Some("Hyperpose".to_string()), + git_subdir: None, + git_main_file: Some(PathBuf::from("mettamorph.metta")), + }, + version_req: None, + }); + Self { pkg_info } + } + } - Ok(()) + impl ModuleLoader for TestLoader { + fn load(&self, context: &mut RunContext) -> Result<(), String> { + let space = DynSpace::new(GroundingSpace::new()); + context.init_self_module(space, None); + + Ok(()) + } + fn pkg_info(&self) -> Option<&PkgInfo> { + Some(&self.pkg_info) + } } -} -/// Tests that a module can be fetched from git and loaded, when the git URL is specified in -/// the module's PkgInfo. This test requires a network connection -/// -/// NOTE. Ignored because we may not want it fetching from the internet when running the -/// test suite. Invoke `cargo test git_pkginfo_fetch_test -- --ignored` to run it. -#[ignore] -#[test] -fn git_pkginfo_fetch_test() { + /// Tests that a module can be fetched from git and loaded, when the git URL is specified in + /// the module's PkgInfo. This test requires a network connection + /// + /// NOTE. Ignored because we may not want it fetching from the internet when running the + /// test suite. Invoke `cargo test git_pkginfo_fetch_test -- --ignored` to run it. + #[ignore] + #[test] + fn git_pkginfo_fetch_test() { - //Make a new runner, with the config dir in `/tmp/hyperon-test/` - let runner = Metta::new(Some(EnvBuilder::new().set_config_dir(Path::new("/tmp/hyperon-test/")))); - let _mod_id = runner.load_module_direct(Box::new(TestLoader), "test-mod").unwrap(); + //Make a new runner, with the config dir in `/tmp/hyperon-test/` + let runner = Metta::new(Some(EnvBuilder::new().set_config_dir(Path::new("/tmp/hyperon-test/")))); + let _mod_id = runner.load_module_direct(Box::new(TestLoader::new()), "test-mod").unwrap(); - let result = runner.run(SExprParser::new("!(import! &self test-mod:metta-morph)")); - assert_eq!(result, Ok(vec![vec![expr!()]])); + let result = runner.run(SExprParser::new("!(import! &self test-mod:metta-morph)")); + assert_eq!(result, Ok(vec![vec![expr!()]])); - //Test that we can use a function imported from the module - let result = runner.run(SExprParser::new("!(sequential (A B))")); - assert_eq!(result, Ok(vec![vec![sym!("A"), sym!("B")]])); + //Test that we can use a function imported from the module + let result = runner.run(SExprParser::new("!(sequential (A B))")); + assert_eq!(result, Ok(vec![vec![sym!("A"), sym!("B")]])); - runner.display_loaded_modules(); + runner.display_loaded_modules(); + } } - diff --git a/lib/src/metta/runner/pkg_mgmt/git_cache.rs b/lib/src/metta/runner/pkg_mgmt/git_cache.rs index a1189334b..86b7af82a 100644 --- a/lib/src/metta/runner/pkg_mgmt/git_cache.rs +++ b/lib/src/metta/runner/pkg_mgmt/git_cache.rs @@ -95,7 +95,7 @@ impl CachedRepo { /// repo was updated, and `false` if the repo was left unchanged pub fn update(&self, mode: UpdateMode) -> Result { - //TODO: If there is a subdir then we can perform a sparse checkout and avoid cloning unnecessary data + //TODO-FUTURE: If there is a subdir field on &self then we can perform a sparse checkout and avoid cloning unnecessary data #[cfg(feature = "git")] match Repository::open(self.repo_local_path()) { diff --git a/lib/src/metta/runner/pkg_mgmt/git_catalog.rs b/lib/src/metta/runner/pkg_mgmt/git_catalog.rs index 4df89ffec..9f47909f2 100644 --- a/lib/src/metta/runner/pkg_mgmt/git_catalog.rs +++ b/lib/src/metta/runner/pkg_mgmt/git_catalog.rs @@ -17,6 +17,10 @@ use crate::metta::runner::pkg_mgmt::{*, git_cache::*}; // * Funtion to trigger explicit updates. Accessible from metta ops // - Update specific module, update to a specific version, latest, or latest stable // - update all modules, to latest or latest stable +// - implemented in a way that also works on the EXPLICIT_GIT_MOD_CACHE (e.g. by cache dir) + +/// The name of the cache for modules loaded explicitly by git URL +pub(crate) const EXPLICIT_GIT_MOD_CACHE: &'static str = "git-modules"; /// A set of keys describing how to access a module via git. Deserialized from within a [PkgInfo] /// or a catalog file [CatalogFileFormat] @@ -43,12 +47,20 @@ pub struct ModuleGitLocation { } impl ModuleGitLocation { - pub(crate) fn get_loader<'a, FmtIter: Iterator>(&self, fmts: FmtIter, caches_dir: Option<&Path>, cache_name: &str, mod_name: &str, ident_str: Option<&str>) -> Result, ModuleDescriptor)>, String> { + /// Returns a ModuleLoader & ModuleDescriptor pair for a module hosted at a specific git location. + /// Checks the cache to avoid unnecessaryily fetching the module if we have it locally + pub(crate) fn get_loader<'a, FmtIter: Iterator>(&self, fmts: FmtIter, caches_dir: Option<&Path>, cache_name: &str, mod_name: &str, version: Option<&semver::Version>, ident_str: Option<&str>) -> Result, ModuleDescriptor)>, String> { //If a git URL is specified in the entry, see if we have it in the git-cache and // clone it locally if we don't if self.git_url.is_some() { - let cached_repo = self.get_cache(caches_dir, cache_name, mod_name, ident_str)?; + let cached_repo = self.get_cache(caches_dir, cache_name, mod_name, version, ident_str)?; + //TODO We want "version-locking" behavior from this cache. ie. don't update once + // we pulled a version, but we need a way to cause it to pull the latest. + //The tricky part is how the user will specify the modules. Doing by url sounds tedious. + // but doing it by mod_name might update modules in unwanted ways. + //At the very least, we need to offer a "update everything" command that can run across + // an entire cache cached_repo.update(UpdateMode::PullIfMissing)?; let mod_path = match &self.git_main_file { @@ -60,14 +72,22 @@ impl ModuleGitLocation { Ok(None) } - pub(crate) fn get_cache(&self, caches_dir: Option<&Path>, cache_name: &str, mod_name: &str, ident_str: Option<&str>) -> Result { + pub(crate) fn get_cache(&self, caches_dir: Option<&Path>, cache_name: &str, mod_name: &str, version: Option<&semver::Version>, ident_str: Option<&str>) -> Result { let caches_dir = caches_dir.ok_or_else(|| "Unable to clone git repository; no local \"caches\" directory available".to_string())?; let url = self.git_url.as_ref().unwrap(); let ident_str = match ident_str { Some(ident_str) => ident_str, None => url, }; - CachedRepo::new(caches_dir, cache_name, mod_name, ident_str, url, self.git_branch.as_ref().map(|s| s.as_str()), self.git_subdir.as_ref().map(|p| p.as_path())) + let repo_name_string; + let mod_repo_name = match version { + Some(version) => { + repo_name_string = format!("{mod_name}-{version}"); + &repo_name_string + }, + None => mod_name + }; + CachedRepo::new(caches_dir, cache_name, mod_repo_name, ident_str, url, self.git_branch.as_ref().map(|s| s.as_str()), self.git_subdir.as_ref().map(|p| p.as_path())) } /// Returns a new ModuleGitLocation. This is a convenience; the usual interface involves deserializing this struct pub(crate) fn new(url: String) -> Self { @@ -90,8 +110,9 @@ struct CatalogFileFormat { #[derive(Deserialize, Debug)] struct CatalogFileMod { name: String, + version: Option, #[serde(flatten)] - git_location: ModuleGitLocation + git_location: ModuleGitLocation, } #[derive(Debug)] @@ -118,25 +139,27 @@ impl GitCatalog { catalog: Mutex::new(None), }) } + /// Scans the catalog and finds all the modules with a given name fn find_mods_with_name(&self, name: &str) -> Vec { let cat_lock = self.catalog.lock().unwrap(); let catalog = cat_lock.as_ref().unwrap(); let mut results = vec![]; for cat_mod in catalog.modules.iter() { if cat_mod.name == name { - //TODO: incorporate the name into the descriptor's ident bytes - let descriptor = ModuleDescriptor::new_with_ident_bytes_and_fmt_id(name.to_string(), cat_mod.git_location.get_url().unwrap().as_bytes(), 0); + let descriptor = ModuleDescriptor::new_with_ident_bytes_and_fmt_id(name.to_string(), cat_mod.version.clone(), cat_mod.git_location.get_url().unwrap().as_bytes(), 0); results.push(descriptor); } } results } + /// Scans the catalog looking for a single module that matches the provided descriptor fn find_mod_idx_with_descriptor(&self, descriptor: &ModuleDescriptor) -> Option { let cat_lock = self.catalog.lock().unwrap(); let catalog = cat_lock.as_ref().unwrap(); for (mod_idx, cat_mod) in catalog.modules.iter().enumerate() { - //TODO: Also check version here - if cat_mod.name == descriptor.name() && descriptor.ident_bytes_and_fmt_id_matches(cat_mod.git_location.get_url().unwrap().as_bytes(), 0) { + if cat_mod.name == descriptor.name() && + cat_mod.version.as_ref() == descriptor.version() && + descriptor.ident_bytes_and_fmt_id_matches(cat_mod.git_location.get_url().unwrap().as_bytes(), 0) { return Some(mod_idx); } } @@ -178,13 +201,12 @@ impl ModuleCatalog for GitCatalog { } fn get_loader(&self, descriptor: &ModuleDescriptor) -> Result, String> { let mod_idx = self.find_mod_idx_with_descriptor(descriptor).unwrap(); - let version_str = ""; //TODO, get the version from the descriptor let cat_lock = self.catalog.lock().unwrap(); let catalog = cat_lock.as_ref().unwrap(); let module = catalog.modules.get(mod_idx).unwrap(); - let loader = match module.git_location.get_loader(self.fmts.iter().map(|f| &**f), Some(&self.caches_dir), &self.name, descriptor.name(), Some(&version_str))? { + let loader = match module.git_location.get_loader(self.fmts.iter().map(|f| &**f), Some(&self.caches_dir), &self.name, descriptor.name(), descriptor.version(), None)? { Some((loader, _)) => loader, None => unreachable!(), }; diff --git a/lib/src/metta/runner/stdlib.rs b/lib/src/metta/runner/stdlib.rs index 2bc2cf2c9..8a2ff5aac 100644 --- a/lib/src/metta/runner/stdlib.rs +++ b/lib/src/metta/runner/stdlib.rs @@ -5,7 +5,7 @@ use crate::metta::*; use crate::metta::text::Tokenizer; use crate::metta::text::SExprParser; use crate::metta::runner::{Metta, RunContext, ModuleLoader, ResourceKey, mod_name_from_url}; -use crate::metta::runner::{EXPLICIT_GIT_MOD_CACHE_DIR, git_catalog::ModuleGitLocation, git_cache::UpdateMode}; +use crate::metta::runner::{git_catalog::EXPLICIT_GIT_MOD_CACHE, git_catalog::ModuleGitLocation, git_cache::UpdateMode}; use crate::metta::types::{get_atom_types, get_meta_type}; use crate::common::shared::Shared; use crate::common::CachingMapper; @@ -370,7 +370,7 @@ impl Grounded for GitModuleOp { }; let git_mod_location = ModuleGitLocation::new(url.to_string()); - let cached_mod = git_mod_location.get_cache(self.metta.environment().caches_dir(), EXPLICIT_GIT_MOD_CACHE_DIR, &mod_name, None)?; + let cached_mod = git_mod_location.get_cache(self.metta.environment().caches_dir(), EXPLICIT_GIT_MOD_CACHE, &mod_name, None, None)?; cached_mod.update(UpdateMode::TryPullLatest)?; self.metta.load_module_at_path(cached_mod.local_path(), Some(&mod_name)).map_err(|e| ExecError::from(e))?; diff --git a/python/install-hyperonc.sh b/python/install-hyperonc.sh index 65d2ed921..3880b4ade 100755 --- a/python/install-hyperonc.sh +++ b/python/install-hyperonc.sh @@ -45,8 +45,8 @@ git reset --hard FETCH_HEAD mkdir -p ${HOME}/hyperonc/c/build cd ${HOME}/hyperonc/c/build -# Rust doesn't support building shared libraries under musllinux environment. -CMAKE_ARGS="$CMAKE_ARGS -DBUILD_SHARED_LIBS=OFF" + +CMAKE_ARGS="$CMAKE_ARGS -DBUILD_SHARED_LIBS=ON" # Local prefix is used to support MacOSX Apple Silicon GitHub actions environment. CMAKE_ARGS="$CMAKE_ARGS -DCMAKE_INSTALL_PREFIX=${HOME}/.local" CMAKE_ARGS="$CMAKE_ARGS -DCMAKE_BUILD_TYPE=Release" From f57c9a6a8323e3f2982c6e0a180611d4b6327c8f Mon Sep 17 00:00:00 2001 From: Luke Peterson Date: Sat, 27 Apr 2024 14:06:31 +0900 Subject: [PATCH 35/77] Adding version_req to both catalog and PkgInfo APIs, so it's possible to express sub-module version requirements --- lib/src/metta/runner/modules/mod.rs | 3 + lib/src/metta/runner/pkg_mgmt/catalog.rs | 90 ++++++++++++++++++------ 2 files changed, 70 insertions(+), 23 deletions(-) diff --git a/lib/src/metta/runner/modules/mod.rs b/lib/src/metta/runner/modules/mod.rs index 6722bf9f2..9927f2b15 100644 --- a/lib/src/metta/runner/modules/mod.rs +++ b/lib/src/metta/runner/modules/mod.rs @@ -607,6 +607,9 @@ pub trait ModuleLoader: std::fmt::Debug + Send + Sync { None } + //TODO-NEXT, add "prepare" function that consumes loader, and returns another one. This will + // allow us to pre-fetch modules + /// Returns a data blob containing a given named resource belonging to a module fn get_resource(&self, _res_key: ResourceKey) -> Result, String> { Err("resource not found".to_string()) diff --git a/lib/src/metta/runner/pkg_mgmt/catalog.rs b/lib/src/metta/runner/pkg_mgmt/catalog.rs index 9ce227a57..073cc44a4 100644 --- a/lib/src/metta/runner/pkg_mgmt/catalog.rs +++ b/lib/src/metta/runner/pkg_mgmt/catalog.rs @@ -94,20 +94,64 @@ pub trait ModuleCatalog: std::fmt::Debug + Send + Sync { /// Returns the [ModuleDescriptor] for every module in the `ModuleCatalog` with the specified name fn lookup(&self, name: &str) -> Vec; - //TODO: Add this function when I add module versioning - // /// Returns the [ModuleDescriptor] for every module in the `ModuleCatalog` with the specified name - // /// matching the version requirements - // fn lookup_within_version_range(name: &str, version_range: ) -> Vec; - //TODO: provide default implementation - - //TODO: Add this function when I add module versioning - // /// Returns the [ModuleDescriptor] for the newest module in the `ModuleCatalog`, that falls within - // /// the specified version range, or `None` if no module exists - // fn lookup_newest_within_version_range(name: &str, version_range: ) -> Option; - //TODO: provide default implementation + /// Returns the [ModuleDescriptor] for every module in the `ModuleCatalog` with the specified name + /// matching the version requirements + /// + /// NOTE: Unversioned modules will never match any version_req, so this method should never return + /// any un-versioned ModuleDescriptors if `version_req.is_some()` + fn lookup_with_version_req(&self, name: &str, version_req: Option<&semver::VersionReq>) -> Vec { + let all_named_descs = self.lookup(name); + match version_req { + Some(req) => all_named_descs.into_iter().filter(|desc| { + match desc.version() { + Some(ver) => req.matches(ver), + None => false + } + }).collect(), + None => all_named_descs + } + } - //TODO-NEXT, add "prepare" function that consumes loader, and returns another one. This will - // allow us to pre-fetch modules + /// Returns the [ModuleDescriptor] for the newest module in the `ModuleCatalog`, that matches the + /// specified version requirement, or `None` if no module exists + /// + /// If `version_req == None`, this method should return the newest module available in the catalog + /// + /// NOTE: unversioned modules are considered to have the lowest possible version, and thus this method + /// should only return an unversioned module if no matching modules are available + /// NOTE: Unversioned modules will never match any version_req, so this method should never return + /// any un-versioned ModuleDescriptors if `version_req.is_some()` + fn lookup_newest_with_version_req(&self, name: &str, version_req: Option<&semver::VersionReq>) -> Option { + let mut highest_version: Option = None; + let mut ret_desc = None; + for desc in self.lookup_with_version_req(name, version_req).into_iter() { + match desc.version().cloned() { + Some(ver) => { + match &mut highest_version { + Some(highest_ver) => { + if ver > *highest_ver { + *highest_ver = ver; + ret_desc = Some(desc); + } + }, + None => { + ret_desc = Some(desc); + highest_version = Some(ver) + } + } + }, + None => { + if highest_version.is_none() { + if ret_desc.is_some() { + log::warn!("Multiple un-versioned {name} modules in catalog; impossible to select newest"); + } + ret_desc = Some(desc) + } + } + } + } + ret_desc + } /// Returns a [ModuleLoader] for the specified module from the `ModuleCatalog` fn get_loader(&self, descriptor: &ModuleDescriptor) -> Result, String>; @@ -169,6 +213,7 @@ impl PkgInfo { } //See if we have a pkg_info dep entry for the module + let mut version_req = None; if let Some(entry) = self.deps.get(mod_name) { //If path is explicitly specified in the dep entry, then we must load the module at the @@ -182,9 +227,8 @@ impl PkgInfo { return Ok(Some(pair)); } - //TODO, If a version range is specified in the dep entry, then use that version range to specify - // modules discovered in the catalogs - + //If `version_req` is specified in the dep entry, then use it to constrain the catalog search + version_req = entry.version_req.as_ref(); } else { //If the PkgInfo doesn't have an entry for the module, it's an error if the PkgInfo is flagged as "strict" if self.strict { @@ -209,13 +253,13 @@ impl PkgInfo { //Search the catalogs, starting with the resource dir, and continuing to the runner's Environment for catalog in local_catalogs.into_iter().chain(context.metta.environment().catalogs()) { log::trace!("Looking for module: \"{mod_name}\" inside {catalog:?}"); - //TODO: use lookup_newest_within_version_range, as soon as I add module versioning - let results = catalog.lookup(mod_name); - if results.len() > 0 { - log::info!("Found module: \"{mod_name}\" inside {catalog:?}"); - let descriptor = results.into_iter().next().unwrap(); - log::info!("Preparing to load module: \'{}\' as \'{}\'", descriptor.name, name_path); - return Ok(Some((catalog.get_loader(&descriptor)?, descriptor))) + match catalog.lookup_newest_with_version_req(mod_name, version_req) { + Some(descriptor) => { + log::info!("Found module: \"{mod_name}\" inside {catalog:?}"); + log::info!("Preparing to load module: \'{}\' as \'{}\'", descriptor.name, name_path); + return Ok(Some((catalog.get_loader(&descriptor)?, descriptor))) + }, + None => {} } } From 2d01a0f4b81d685b6d4c49833ead271f98d00272 Mon Sep 17 00:00:00 2001 From: Luke Peterson Date: Sat, 27 Apr 2024 16:06:39 +0900 Subject: [PATCH 36/77] Clarifying my thoughts on a git cache-management interface, prior to coding it. --- lib/src/metta/runner/pkg_mgmt/git_catalog.rs | 32 ++++++++++++++++++-- 1 file changed, 29 insertions(+), 3 deletions(-) diff --git a/lib/src/metta/runner/pkg_mgmt/git_catalog.rs b/lib/src/metta/runner/pkg_mgmt/git_catalog.rs index 9f47909f2..a8c08da82 100644 --- a/lib/src/metta/runner/pkg_mgmt/git_catalog.rs +++ b/lib/src/metta/runner/pkg_mgmt/git_catalog.rs @@ -11,13 +11,39 @@ use crate::metta::runner::*; use crate::metta::runner::pkg_mgmt::{*, git_cache::*}; //TODO: -// * Need a function to clean up local repos that have been removed from the catalog file -// * Need a function to delete a whole catalog cache. Both of these interfaces should probably -// be added to the catalog trait as optional methods. // * Funtion to trigger explicit updates. Accessible from metta ops // - Update specific module, update to a specific version, latest, or latest stable // - update all modules, to latest or latest stable // - implemented in a way that also works on the EXPLICIT_GIT_MOD_CACHE (e.g. by cache dir) +// +//Current thinking: +// * Implement the "prepare" method on ModuleLoader +// * Implement an "all" method on Catalog, and possibly "all_mod_names" which lists sorted mod names +// +//Less sure about this but... I think that we want two objects both implementing Catalog, and +// both sharing the same on-disk backing. One includes the remote fetching, while the other +// allows for explicit manipulation. +// +// * Implement a "ManagedCatalog" trait with methods: +// * origin_catalog ???? +// * local_catalog (accessor) ???? +// * clear_all +// * remove_by_name(mod_name) ????? (probably not) +// * remove_by_desc(descriptor) +// * fetch(descriptor) +// * upgrade(descriptor) (performs lookup_newest, then if newer is found, removes existing, and fetches) +// * upgrade_all() + +//QUESTION: I'm really not sure about whether the explicit git cache is a catalog. +// The No arguments: +// not queryable +// +// The Yes arguments: +// packages should be upgradable +// +//I think the way to square this circle is to make catalog query functions that work a descriptor uid +// + /// The name of the cache for modules loaded explicitly by git URL pub(crate) const EXPLICIT_GIT_MOD_CACHE: &'static str = "git-modules"; From 9b101b3ab431ca564e22d4c516c85008cc55aa86 Mon Sep 17 00:00:00 2001 From: Luke Peterson Date: Sat, 27 Apr 2024 16:14:03 +0900 Subject: [PATCH 37/77] Enabling "git" feature in default set --- lib/Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/Cargo.toml b/lib/Cargo.toml index a64912a9c..9c56c35f4 100644 --- a/lib/Cargo.toml +++ b/lib/Cargo.toml @@ -27,7 +27,7 @@ path = "src/lib.rs" crate-type = ["lib"] [features] -default = ["pkg_mgmt"] +default = ["pkg_mgmt", "git"] # Add one of the features below into default list to enable. # See https://doc.rust-lang.org/cargo/reference/features.html#the-features-section minimal = [] # enables minimal MeTTa interpreter From 69e2d6f217823b961ac3270cf0da156669dca829 Mon Sep 17 00:00:00 2001 From: Luke Peterson Date: Sat, 27 Apr 2024 16:33:19 +0900 Subject: [PATCH 38/77] Updating pythong API with config dir create param change --- python/hyperon/runner.py | 8 ++++---- python/hyperonpy.cpp | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/python/hyperon/runner.py b/python/hyperon/runner.py index 2ec9a2209..33a37dda7 100644 --- a/python/hyperon/runner.py +++ b/python/hyperon/runner.py @@ -218,7 +218,7 @@ def config_dir(): else: return None - def init_common_env(working_dir = None, config_dir = None, create_config = False, disable_config = False, is_test = False, include_paths = []): + def init_common_env(working_dir = None, config_dir = None, create_config = None, disable_config = False, is_test = False, include_paths = []): """Initialize the common environment with the supplied args""" builder = Environment.custom_env(working_dir, config_dir, create_config, disable_config, is_test, include_paths) return hp.env_builder_init_common_env(builder) @@ -227,15 +227,15 @@ def test_env(): """Returns an EnvBuilder object specifying a unit-test environment, that can be used to init a MeTTa runner""" return hp.env_builder_use_test_env() - def custom_env(working_dir = None, config_dir = None, create_config = False, disable_config = False, is_test = False, include_paths = []): + def custom_env(working_dir = None, config_dir = None, create_config = None, disable_config = False, is_test = False, include_paths = []): """Returns an EnvBuilder object that can be used to init a MeTTa runner, if you need multiple environments to coexist in the same process""" builder = hp.env_builder_start() if (working_dir is not None): hp.env_builder_set_working_dir(builder, working_dir) if (config_dir is not None): hp.env_builder_set_config_dir(builder, config_dir) - if (create_config): - hp.env_builder_create_config_dir(builder) + if (create_config is not None): + hp.env_builder_create_config_dir(builder, create_config) #Pass True for "create if missing" behavior (default), and False to never create a new dir if (disable_config): hp.env_builder_disable_config_dir(builder) if (is_test): diff --git a/python/hyperonpy.cpp b/python/hyperonpy.cpp index 9429c3d50..8d6c3dcdd 100644 --- a/python/hyperonpy.cpp +++ b/python/hyperonpy.cpp @@ -1049,7 +1049,7 @@ PYBIND11_MODULE(hyperonpy, m) { m.def("env_builder_init_common_env", [](EnvBuilder builder) { return env_builder_init_common_env(builder.obj); }, "Finish initialization of the common environment"); m.def("env_builder_set_working_dir", [](EnvBuilder& builder, std::string path) { env_builder_set_working_dir(builder.ptr(), path.c_str()); }, "Sets the working dir in the environment"); m.def("env_builder_set_config_dir", [](EnvBuilder& builder, std::string path) { env_builder_set_config_dir(builder.ptr(), path.c_str()); }, "Sets the config dir in the environment"); - m.def("env_builder_create_config_dir", [](EnvBuilder& builder) { env_builder_create_config_dir(builder.ptr()); }, "Creates the config dir if it doesn't exist"); + m.def("env_builder_create_config_dir", [](EnvBuilder& builder, bool should_create) { env_builder_create_config_dir(builder.ptr(), should_create); }, "Creates the config dir if it doesn't exist"); m.def("env_builder_disable_config_dir", [](EnvBuilder& builder) { env_builder_disable_config_dir(builder.ptr()); }, "Disables the config dir in the environment"); m.def("env_builder_set_is_test", [](EnvBuilder& builder, bool is_test) { env_builder_set_is_test(builder.ptr(), is_test); }, "Disables the config dir in the environment"); m.def("env_builder_push_include_path", [](EnvBuilder& builder, std::string path) { env_builder_push_include_path(builder.ptr(), path.c_str()); }, "Adds an include path to the environment"); From 2c74e36ab48ed3317c4399503372db29316d8fcc Mon Sep 17 00:00:00 2001 From: Luke Peterson Date: Sat, 27 Apr 2024 21:58:53 +0900 Subject: [PATCH 39/77] Fixing logic bug in FSModuleFormat visitation order --- lib/src/metta/runner/pkg_mgmt/catalog.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/src/metta/runner/pkg_mgmt/catalog.rs b/lib/src/metta/runner/pkg_mgmt/catalog.rs index 073cc44a4..40d11f67c 100644 --- a/lib/src/metta/runner/pkg_mgmt/catalog.rs +++ b/lib/src/metta/runner/pkg_mgmt/catalog.rs @@ -545,7 +545,7 @@ impl ModuleCatalog for DirCatalog { //Inspect the directory using each FsModuleFormat, in order visit_modules_in_dir_using_mod_formats(&self.fmts, &self.path, name, |_loader, descriptor| { found_modules.push(descriptor); - false + true }); found_modules From 750f68a79e063bd44ac7b6fdd26d1db47c053838 Mon Sep 17 00:00:00 2001 From: Luke Peterson Date: Wed, 1 May 2024 13:49:19 +0900 Subject: [PATCH 40/77] Adding ManagedCatalog trait --- lib/src/metta/runner/pkg_mgmt/catalog.rs | 133 +++++++++++++----- lib/src/metta/runner/pkg_mgmt/git_catalog.rs | 9 +- .../metta/runner/pkg_mgmt/managed_catalog.rs | 70 +++++++++ lib/src/metta/runner/pkg_mgmt/mod.rs | 4 +- repl/src/metta_shim.rs | 2 +- 5 files changed, 179 insertions(+), 39 deletions(-) create mode 100644 lib/src/metta/runner/pkg_mgmt/managed_catalog.rs diff --git a/lib/src/metta/runner/pkg_mgmt/catalog.rs b/lib/src/metta/runner/pkg_mgmt/catalog.rs index 40d11f67c..0d98fcff0 100644 --- a/lib/src/metta/runner/pkg_mgmt/catalog.rs +++ b/lib/src/metta/runner/pkg_mgmt/catalog.rs @@ -74,6 +74,7 @@ use std::path::Path; use std::collections::hash_map::DefaultHasher; use std::hash::Hasher; use std::ffi::{OsStr, OsString}; +use std::collections::HashSet; use crate::metta::text::OwnedSExprParser; use crate::metta::runner::modules::*; @@ -94,22 +95,19 @@ pub trait ModuleCatalog: std::fmt::Debug + Send + Sync { /// Returns the [ModuleDescriptor] for every module in the `ModuleCatalog` with the specified name fn lookup(&self, name: &str) -> Vec; + /// Returns the [ModuleDescriptor] for every module in the `ModuleCatalog` with the specified name, + /// and uid match + fn lookup_with_uid(&self, name: &str, uid: Option) -> Vec { + self.lookup(name).into_iter().filter(|desc| desc.uid == uid).collect() + } + /// Returns the [ModuleDescriptor] for every module in the `ModuleCatalog` with the specified name /// matching the version requirements /// /// NOTE: Unversioned modules will never match any version_req, so this method should never return /// any un-versioned ModuleDescriptors if `version_req.is_some()` fn lookup_with_version_req(&self, name: &str, version_req: Option<&semver::VersionReq>) -> Vec { - let all_named_descs = self.lookup(name); - match version_req { - Some(req) => all_named_descs.into_iter().filter(|desc| { - match desc.version() { - Some(ver) => req.matches(ver), - None => false - } - }).collect(), - None => all_named_descs - } + filter_by_version_req(self.lookup(name).into_iter(), version_req).collect() } /// Returns the [ModuleDescriptor] for the newest module in the `ModuleCatalog`, that matches the @@ -122,39 +120,100 @@ pub trait ModuleCatalog: std::fmt::Debug + Send + Sync { /// NOTE: Unversioned modules will never match any version_req, so this method should never return /// any un-versioned ModuleDescriptors if `version_req.is_some()` fn lookup_newest_with_version_req(&self, name: &str, version_req: Option<&semver::VersionReq>) -> Option { - let mut highest_version: Option = None; - let mut ret_desc = None; - for desc in self.lookup_with_version_req(name, version_req).into_iter() { - match desc.version().cloned() { - Some(ver) => { - match &mut highest_version { - Some(highest_ver) => { - if ver > *highest_ver { - *highest_ver = ver; - ret_desc = Some(desc); - } - }, - None => { + find_newest_module(self.lookup_with_version_req(name, version_req).into_iter()) + } + + /// Returns the [ModuleDescriptor] for the newest module in the `ModuleCatalog`, that matches the + /// specified name, uid, and version requirement, or `None` if no module exists + /// + /// See [ModuleCatalog::lookup_newest_with_version_req] for more details + fn lookup_newest_with_uid_and_version_req(&self, name: &str, uid: Option, version_req: Option<&semver::VersionReq>) -> Option { + let result_iter = self.lookup_with_uid(name, uid).into_iter(); + find_newest_module(filter_by_version_req(result_iter, version_req)) + } + + /// Returns a [ModuleLoader] for the specified module from the `ModuleCatalog` + fn get_loader(&self, descriptor: &ModuleDescriptor) -> Result, String>; + + /// Returns an iterator over every module available in the catalog. May not be supported + /// by all catalog implementations + fn list<'a>(&'a self) -> Option + 'a>> { + None + } + + /// Returns an iterator over every unique module name in the catalog. May not be supported + /// by all catalog implementations + fn list_names<'a>(&'a self) -> Option + 'a>> { + self.list().map(|desc_iter| { + let mut names = HashSet::new(); + for desc in desc_iter { + if !names.contains(desc.name()) { + names.insert(desc.name().to_string()); + } + } + Box::new(names.into_iter()) as Box> + }) + } + + /// Returns an iterator over every unique (module name, uid) pair in the catalog. May not + /// be supported by all catalog implementations + fn list_name_uid_pairs<'a>(&'a self) -> Option)> + 'a>> { + self.list().map(|desc_iter| { + let mut results = HashSet::new(); + for desc in desc_iter { + results.insert((desc.name().to_string(), desc.uid())); + } + Box::new(results.into_iter()) as Box)>> + }) + } +} + +/// Internal function to filter a set of [ModuleDescriptor]s by a [semver::VersionReq]. See +/// [ModuleCatalog::lookup_with_version_req] for an explanation of behavior +fn filter_by_version_req<'a>(mods_iter: impl Iterator + 'a, version_req: Option<&'a semver::VersionReq>) -> Box + 'a> { + match version_req { + Some(req) => Box::new(mods_iter.filter(|desc| { + match desc.version() { + Some(ver) => req.matches(ver), + None => false + } + })), + None => Box::new(mods_iter) + } +} + +/// Internal function to find the newest module in a set. See [ModuleCatalog::lookup_newest_with_version_req] +/// for an explanation of behavior +fn find_newest_module(mods_iter: impl Iterator) -> Option { + let mut highest_version: Option = None; + let mut ret_desc = None; + for desc in mods_iter { + match desc.version().cloned() { + Some(ver) => { + match &mut highest_version { + Some(highest_ver) => { + if ver > *highest_ver { + *highest_ver = ver; ret_desc = Some(desc); - highest_version = Some(ver) } + }, + None => { + ret_desc = Some(desc); + highest_version = Some(ver) } - }, - None => { - if highest_version.is_none() { - if ret_desc.is_some() { - log::warn!("Multiple un-versioned {name} modules in catalog; impossible to select newest"); - } - ret_desc = Some(desc) + } + }, + None => { + if highest_version.is_none() { + if let Some(ret_desc) = ret_desc { + log::warn!("Multiple un-versioned {} modules in catalog; impossible to select newest", ret_desc.name()); } + ret_desc = Some(desc) } } } - ret_desc } - - /// Returns a [ModuleLoader] for the specified module from the `ModuleCatalog` - fn get_loader(&self, descriptor: &ModuleDescriptor) -> Result, String>; + ret_desc } /// The object responsible for locating and selecting dependency modules for each [MettaMod] @@ -632,6 +691,10 @@ impl ModuleDescriptor { pub fn name(&self) -> &str { &self.name } + /// Returns the uid associated with the ModuleDescriptor + pub fn uid(&self) -> Option { + self.uid + } /// Returns the version of the module represented by the ModuleDescriptor pub fn version(&self) -> Option<&semver::Version> { self.version.as_ref() diff --git a/lib/src/metta/runner/pkg_mgmt/git_catalog.rs b/lib/src/metta/runner/pkg_mgmt/git_catalog.rs index a8c08da82..0370cac32 100644 --- a/lib/src/metta/runner/pkg_mgmt/git_catalog.rs +++ b/lib/src/metta/runner/pkg_mgmt/git_catalog.rs @@ -10,7 +10,7 @@ use serde::Deserialize; use crate::metta::runner::*; use crate::metta::runner::pkg_mgmt::{*, git_cache::*}; -//TODO: +//TODO: TODO-NEXT. This is almost implemented. But keeping these notes until complete // * Funtion to trigger explicit updates. Accessible from metta ops // - Update specific module, update to a specific version, latest, or latest stable // - update all modules, to latest or latest stable @@ -44,6 +44,10 @@ use crate::metta::runner::pkg_mgmt::{*, git_cache::*}; //I think the way to square this circle is to make catalog query functions that work a descriptor uid // +//UPDATE: Need to implement ManagedCatalog for an object that shares the same back-end with +// GitCatalog, +// - also add the `prepare` interface to the module loader + /// The name of the cache for modules loaded explicitly by git URL pub(crate) const EXPLICIT_GIT_MOD_CACHE: &'static str = "git-modules"; @@ -239,4 +243,5 @@ impl ModuleCatalog for GitCatalog { Ok(loader) } -} \ No newline at end of file +} + diff --git a/lib/src/metta/runner/pkg_mgmt/managed_catalog.rs b/lib/src/metta/runner/pkg_mgmt/managed_catalog.rs new file mode 100644 index 000000000..8e01ca366 --- /dev/null +++ b/lib/src/metta/runner/pkg_mgmt/managed_catalog.rs @@ -0,0 +1,70 @@ + +use crate::metta::runner::pkg_mgmt::*; + +/// An interface to facilitate explicit management of a catalog, usually as a local mirror +/// of one or more remote catalogs used by a user to insulate them from upstream changes +/// +/// NOTE: ModuleDescriptors used by the catalog should be identical to those used by the +/// UpstreamCatalog(s) +// +//NOTE FOR THE FUTURE: There are two major problems with this `fetch_newest_for_all` +// interface. +// 1. There is no way to know which modules may be deleted from the catalog and which must +// be kept. Therefore it is impossible to simply "upgrade" a module - ie. pulling a +// new version and removing the old. +// +// This is because an older version of the module may satisfy a dependency that is not +// satisfied by the newer version. And this object does not have enough visibility to +// know. +// +// 2. Relatedly, there is no way to automatically fetch the latest module for a given +// dependency. For example, if the catalog has v0.1.3 of a mod, and the upstream +// catalog contains v0.2.0 and v0.1.5, there is no way to know which is needed between +// those two, in the context of the importer's requirements. +// +//PROPOSAL: Requirement API. A ManagedCatalog would need to track which requirements are +// satisfied by each module, so that if a requirement were better satisfied by another +// module then the old module could be removed. +// +// There are a number of unanswered questions however: +// - How should the managed catalog interact with modules from other catalogs? Should +// the managed catalog track dependencies outside the upstream catalog? A module from +// any catalog can theoretically satisfy a dependency so what happens if a local dir +// catalog mod satisfies a sub-dependency, but a newer version of the mod exists on the +// remote catalog? +// - How will the managed catalog logic work with regard to the sat-set solving? +// See "QUESTION on shared base dependencies". In other words, the best dependency mod +// in isolation might not be the best when considered holistically. The Requirement API +// needs to take that into account. +// +pub trait ManagedCatalog: ModuleCatalog { + + /// Clears all locally stored modules, resetting the local catalog to an empty state + fn clear_all(&self) -> Result<(), String>; + + /// Fetch a specific module from the UpstreamCatalog. Returns `Ok(())`` if the module + /// already exists in the catalog + /// + /// NOTE: This method will likely become async in the future + fn fetch(&self, descriptor: &ModuleDescriptor) -> Result<(), String>; + + /// Remove a specific module from the catalog + fn remove(&self, descriptor: &ModuleDescriptor) -> Result<(), String>; + + /// AKA "upgrade". Fetches the newest version for each module that currently exists in + /// the catalog + /// + /// NOTE: This API will likely change in the future. See "NOTE FOR THE FUTURE" in comments + /// for `ManagedCatalog` + fn fetch_newest_for_all(&self) -> Result<(), String> { + let iter = self.list_name_uid_pairs() + .ok_or_else(|| "managed catalog must support `list` method".to_string())?; + for (name, uid) in iter { + if let Some(desc) = self.lookup_newest_with_uid_and_version_req(&name, uid, None) { + self.fetch(&desc)?; + } + } + Ok(()) + } +} + diff --git a/lib/src/metta/runner/pkg_mgmt/mod.rs b/lib/src/metta/runner/pkg_mgmt/mod.rs index a3c1a07a6..db55c09b3 100644 --- a/lib/src/metta/runner/pkg_mgmt/mod.rs +++ b/lib/src/metta/runner/pkg_mgmt/mod.rs @@ -2,6 +2,8 @@ mod catalog; pub use catalog::*; -pub(crate) mod git_cache; +mod managed_catalog; +pub use managed_catalog::*; +pub(crate) mod git_cache; pub(crate) mod git_catalog; \ No newline at end of file diff --git a/repl/src/metta_shim.rs b/repl/src/metta_shim.rs index 255ca5ecc..ecdfef859 100644 --- a/repl/src/metta_shim.rs +++ b/repl/src/metta_shim.rs @@ -340,7 +340,7 @@ pub mod metta_interface_mod { pub fn init_common_env(working_dir: PathBuf, include_paths: Vec) -> Result { let mut builder = EnvBuilder::new() .set_working_dir(Some(&working_dir)) - .create_config_dir(); + .set_create_config_dir(true); for path in include_paths.into_iter().rev() { builder = builder.push_include_path(path); From b3e50acdcc2905c4c91da510d029e76daeb3e231 Mon Sep 17 00:00:00 2001 From: Luke Peterson Date: Fri, 10 May 2024 20:39:52 +0900 Subject: [PATCH 41/77] Adding LocalCatalog, which is a managed catalog that sits in front of the removely managed GitCatalog and the explicitly managed cache --- c/src/metta.rs | 2 +- lib/src/metta/runner/environment.rs | 26 +- lib/src/metta/runner/mod.rs | 2 +- lib/src/metta/runner/modules/mod.rs | 31 +- lib/src/metta/runner/pkg_mgmt/catalog.rs | 70 +++-- lib/src/metta/runner/pkg_mgmt/git_cache.rs | 28 +- lib/src/metta/runner/pkg_mgmt/git_catalog.rs | 276 +++++++++++++----- .../metta/runner/pkg_mgmt/managed_catalog.rs | 266 ++++++++++++++++- lib/src/metta/runner/stdlib.rs | 21 +- 9 files changed, 591 insertions(+), 131 deletions(-) diff --git a/c/src/metta.rs b/c/src/metta.rs index acc02d369..0382c834f 100644 --- a/c/src/metta.rs +++ b/c/src/metta.rs @@ -1740,7 +1740,7 @@ pub extern "C" fn module_id_is_valid(mod_id: *const module_id_t) -> bool { #[no_mangle] pub extern "C" fn module_descriptor_new(name: *const c_char) -> module_descriptor_t { //TODO-NEXT: We should probably take a version string, and parse it into a semver version - ModuleDescriptor::new(cstr_as_str(name).to_string(), None).into() + ModuleDescriptor::new(cstr_as_str(name).to_string(), None, None).into() } /// @brief Creates a new module_descriptor_t that represents the error attempting to interpret a module diff --git a/lib/src/metta/runner/environment.rs b/lib/src/metta/runner/environment.rs index 91595ab29..003e22756 100644 --- a/lib/src/metta/runner/environment.rs +++ b/lib/src/metta/runner/environment.rs @@ -5,7 +5,7 @@ use std::fs; use std::sync::Arc; #[cfg(feature = "pkg_mgmt")] -use crate::metta::runner::pkg_mgmt::{ModuleCatalog, DirCatalog, FsModuleFormat, SingleFileModuleFmt, DirModuleFmt, git_catalog::*}; +use crate::metta::runner::pkg_mgmt::{ModuleCatalog, DirCatalog, LocalCatalog, FsModuleFormat, SingleFileModuleFmt, DirModuleFmt, git_catalog::*}; use directories::ProjectDirs; @@ -24,6 +24,9 @@ pub struct Environment { catalogs: Vec>, #[cfg(feature = "pkg_mgmt")] pub(crate) fs_mod_formats: Arc>>, + /// The store for modules loaded from git by explicit URL + #[cfg(feature = "pkg_mgmt")] + pub(crate) explicit_git_mods: Option, } const DEFAULT_INIT_METTA: &[u8] = include_bytes!("init.default.metta"); @@ -92,6 +95,8 @@ impl Environment { catalogs: vec![], #[cfg(feature = "pkg_mgmt")] fs_mod_formats: Arc::new(vec![]), + #[cfg(feature = "pkg_mgmt")] + explicit_git_mods: None, } } } @@ -343,11 +348,24 @@ impl EnvBuilder { } } - //Search the remote git-based catalog, if we have a caches dir to store the modules + //If we have a caches dir to cache modules locally then register remote catalogs if let Some(caches_dir) = &env.caches_dir { - //TODO: Catalog should be moved to trueagi github account, and catalog settings should come from config + + //Setup the explicit_git_mods managed catalog to hold mods fetched by explicit URL + let mut explicit_git_mods = LocalCatalog::new(caches_dir, "git-modules").unwrap(); + let git_mod_catalog = GitCatalog::new_without_source_repo(env.fs_mod_formats.clone(), "git-modules").unwrap(); + explicit_git_mods.push_upstream_catalog(Box::new(git_mod_catalog)); + env.explicit_git_mods = Some(explicit_git_mods); + + //Add the remote git-based catalog to the end of the catalog priority search list + //TODO-NOW: Catalog should be moved to trueagi github account, and catalog settings should come from config + let catalog_name = "luketpeterson-catalog"; + let catalog_url = "https://github.com/luketpeterson/metta-mod-catalog.git"; let refresh_time = 259200; //3 days = 3 days * 24 hrs * 60 minutes * 60 seconds - env.catalogs.push(Box::new(GitCatalog::new(caches_dir, env.fs_mod_formats.clone(), "luketpeterson-catalog", "https://github.com/luketpeterson/metta-mod-catalog.git", refresh_time).unwrap())); + let mut managed_remote_catalog = LocalCatalog::new(caches_dir, catalog_name).unwrap(); + let remote_catalog = GitCatalog::new(caches_dir, env.fs_mod_formats.clone(), catalog_name, catalog_url, refresh_time).unwrap(); + managed_remote_catalog.push_upstream_catalog(Box::new(remote_catalog)); + env.catalogs.push(Box::new(managed_remote_catalog)); } } diff --git a/lib/src/metta/runner/mod.rs b/lib/src/metta/runner/mod.rs index efec03020..d8d83799e 100644 --- a/lib/src/metta/runner/mod.rs +++ b/lib/src/metta/runner/mod.rs @@ -920,7 +920,7 @@ impl<'input> RunContext<'_, '_, 'input> { /// * If `descriptor` matches an existing loaded module, alias in the module name-space will be created, /// and the module's ModId will be returned, otherwise, /// * The `loader` will be used to initialize a new module, and the new ModId will be returned - fn get_or_init_module_with_descriptor(&mut self, mod_name: &str, descriptor: ModuleDescriptor, loader: Box) -> Result { + pub(crate) fn get_or_init_module_with_descriptor(&mut self, mod_name: &str, descriptor: ModuleDescriptor, loader: Box) -> Result { match self.init_state.get_module_with_descriptor(&self.metta, &descriptor) { Some(mod_id) => { self.load_module_alias(mod_name, mod_id) diff --git a/lib/src/metta/runner/modules/mod.rs b/lib/src/metta/runner/modules/mod.rs index 9927f2b15..94702d2a2 100644 --- a/lib/src/metta/runner/modules/mod.rs +++ b/lib/src/metta/runner/modules/mod.rs @@ -483,6 +483,12 @@ impl ModuleInitState { /// The init function will then call `context.init_self_module()` along with any other initialization code pub fn init_module(&mut self, runner: &Metta, mod_name: &str, loader: Box) -> Result { + //Give the prepare function a chance to run, in case it hasn't yet + let loader = match loader.prepare(None, false)? { + Some(new_loader) => new_loader, + None => loader + }; + //Create a new RunnerState in order to initialize the new module, and push the init function // to run within the new RunnerState. The init function will then call `context.init_self_module()` let mut runner_state = RunnerState::new_for_loading(runner, mod_name, self); @@ -607,8 +613,29 @@ pub trait ModuleLoader: std::fmt::Debug + Send + Sync { None } - //TODO-NEXT, add "prepare" function that consumes loader, and returns another one. This will - // allow us to pre-fetch modules + //TODO-NOW Delete this: I changed my mind about this interface - I now think the design should commit to an + // injective mapping between ModuleDescriptors and directory names + // + // /// Suggests a name that can be used by the implementation for locally cached module files + // /// + // /// The returned name should be deterministic, but unique to the module and its version, etc. + // /// For example, a git branch or a remote server URL may be encoded into the name. The name + // /// must be composed of only legal file name characters, and must not contain the '/' char. + // fn cache_dir_name(&self) -> Option { + // None + // } + + /// Prepares a module for loading. This method is responsible for fetching resources + /// from the network, performing build or pre-computation steps, or any other operations + /// that only need to be performed once and then may be cached locally + /// + /// If this method returns `Ok(Some(_))` then the loader will be dropped and the returned + /// loader will replace it. + /// + /// NOTE: This method may become async in the future + fn prepare(&self, _local_dir: Option<&Path>, _should_refresh: bool) -> Result>, String> { + Ok(None) + } /// Returns a data blob containing a given named resource belonging to a module fn get_resource(&self, _res_key: ResourceKey) -> Result, String> { diff --git a/lib/src/metta/runner/pkg_mgmt/catalog.rs b/lib/src/metta/runner/pkg_mgmt/catalog.rs index 0d98fcff0..87ecd5aad 100644 --- a/lib/src/metta/runner/pkg_mgmt/catalog.rs +++ b/lib/src/metta/runner/pkg_mgmt/catalog.rs @@ -70,6 +70,7 @@ // // I think my personal preference is for #2. +use core::any::Any; use std::path::Path; use std::collections::hash_map::DefaultHasher; use std::hash::Hasher; @@ -81,7 +82,7 @@ use crate::metta::runner::modules::*; use crate::metta::runner::{*, git_catalog::*}; use xxhash_rust::xxh3::xxh3_64; -use serde::Deserialize; +use serde::{Deserialize, Serialize}; /// Implemented for types capable of locating MeTTa modules /// @@ -166,6 +167,18 @@ pub trait ModuleCatalog: std::fmt::Debug + Send + Sync { Box::new(results.into_iter()) as Box)>> }) } + + /// Returns the catalog as an [Any] in order to get back to the underlying object + fn as_any(&self) -> Option<&dyn Any> { + None + } +} + +impl dyn ModuleCatalog { + /// Returns the catalog as as an underlying type, if it's supported by the catalog format + pub fn downcast(&self) -> Option<&T> { + self.as_any()?.downcast_ref() + } } /// Internal function to filter a set of [ModuleDescriptor]s by a [semver::VersionReq]. See @@ -282,7 +295,7 @@ impl PkgInfo { } //Get the module if it's specified with git keys - if let Some(pair) = entry.git_location.get_loader(context.metta.environment().fs_mod_formats(), context.metta.environment().caches_dir(), EXPLICIT_GIT_MOD_CACHE, mod_name, None, None)? { + if let Some(pair) = entry.git_location.get_loader_in_explicit_catalog(mod_name, false, context.metta.environment())? { return Ok(Some(pair)); } @@ -657,7 +670,7 @@ fn visit_modules_in_dir_using_mod_formats(fmts: &[Box], dir_ /// /// NOTE: It is possible for a module to have both a version and a uid. Module version uniqueness is /// enforced by the catalog(s), and two catalogs may disagree -#[derive(Clone, Debug, PartialEq, Eq, Hash)] +#[derive(Clone, Debug, PartialEq, Eq, Hash, Deserialize, Serialize)] pub struct ModuleDescriptor { name: String, uid: Option, @@ -666,16 +679,13 @@ pub struct ModuleDescriptor { impl ModuleDescriptor { /// Create a new ModuleDescriptor - pub fn new(name: String, version: Option) -> Self { - Self { name, uid: None, version } - } - /// Create a new ModuleDescriptor - pub fn new_with_uid(name: String, version: Option, uid: u64) -> Self { - Self { name, uid: Some(uid), version } + pub fn new(name: String, version: Option, uid: Option) -> Self { + Self { name, uid, version } } + /// Returns a new ModuleDescriptor by computing a stable hash of the `ident` bytes, and using the `fmt_id` pub fn new_with_ident_bytes_and_fmt_id(name: String, version: Option, ident: &[u8], fmt_id: u64) -> Self { - let uid = xxh3_64(ident) ^ fmt_id; - ModuleDescriptor::new_with_uid(name, version, uid) + let uid = Self::uid_from_ident_bytes_and_fmt_id(ident, fmt_id); + ModuleDescriptor::new(name, version, Some(uid)) } /// Create a new ModuleDescriptor using a file system path and another unique id /// @@ -699,17 +709,16 @@ impl ModuleDescriptor { pub fn version(&self) -> Option<&semver::Version> { self.version.as_ref() } - /// Returns `true` if the `ident_bytes` and `fmt_id` match what was used to create the descriptor - pub fn ident_bytes_and_fmt_id_matches(&self, ident: &[u8], fmt_id: u64) -> bool { - let uid = xxh3_64(ident) ^ fmt_id; - self.uid == Some(uid) - } /// Internal. Use the Hash trait to get a uid for the whole ModuleDescriptor pub fn hash(&self) -> u64 { let mut hasher = DefaultHasher::new(); std::hash::Hash::hash(self, &mut hasher); hasher.finish() } + /// Returns a uid based on a stable hash of `the ident` bytes, and the fmt_id + pub fn uid_from_ident_bytes_and_fmt_id(ident: &[u8], fmt_id: u64) -> u64 { + xxh3_64(ident) ^ fmt_id + } } /// Extracts the module name from a `.git` URL @@ -740,7 +749,7 @@ mod tests { impl ModuleCatalog for TestCatalog { fn lookup(&self, name: &str) -> Vec { if name.len() == 1 && name.chars().last().unwrap().is_uppercase() { - vec![ModuleDescriptor::new(name.to_string(), None)] + vec![ModuleDescriptor::new(name.to_string(), None, None)] } else { vec![] } @@ -798,7 +807,7 @@ mod tests { //Set up the module [PkgInfo] so it knows to load a sub-module from git pkg_info.name = Some("test-mod".to_string()); - pkg_info.deps.insert("metta-morph".to_string(), DepEntry{ + pkg_info.deps.insert("metta-morph-test".to_string(), DepEntry{ fs_path: None, git_location: ModuleGitLocation { //TODO: We probably want a smaller test repo @@ -829,7 +838,7 @@ mod tests { /// the module's PkgInfo. This test requires a network connection /// /// NOTE. Ignored because we may not want it fetching from the internet when running the - /// test suite. Invoke `cargo test git_pkginfo_fetch_test -- --ignored` to run it. + /// test suite. Invoke `cargo test --features git git_pkginfo_fetch_test -- --ignored --nocapture` to run it. #[ignore] #[test] fn git_pkginfo_fetch_test() { @@ -838,7 +847,28 @@ mod tests { let runner = Metta::new(Some(EnvBuilder::new().set_config_dir(Path::new("/tmp/hyperon-test/")))); let _mod_id = runner.load_module_direct(Box::new(TestLoader::new()), "test-mod").unwrap(); - let result = runner.run(SExprParser::new("!(import! &self test-mod:metta-morph)")); + let result = runner.run(SExprParser::new("!(import! &self test-mod:metta-morph-test)")); + assert_eq!(result, Ok(vec![vec![expr!()]])); + + //Test that we can use a function imported from the module + let result = runner.run(SExprParser::new("!(sequential (A B))")); + assert_eq!(result, Ok(vec![vec![sym!("A"), sym!("B")]])); + + runner.display_loaded_modules(); + } + + /// Tests that a module can be resolved in a remote cataloc, fetched from git and then + /// loaded. This test requires a network connection + /// + /// NOTE. Ignored because we may not want it fetching from the internet when running the + /// test suite. Invoke `cargo test --features git git_remote_catalog_fetch_test -- --ignored --nocapture` to run it. + #[ignore] + #[test] + fn git_remote_catalog_fetch_test() { + + //Make a new runner, with the config dir in `/tmp/hyperon-test/` + let runner = Metta::new(Some(EnvBuilder::new().set_config_dir(Path::new("/tmp/hyperon-test/")))); + let result = runner.run(SExprParser::new("!(import! &self metta-morph)")); assert_eq!(result, Ok(vec![vec![expr!()]])); //Test that we can use a function imported from the module diff --git a/lib/src/metta/runner/pkg_mgmt/git_cache.rs b/lib/src/metta/runner/pkg_mgmt/git_cache.rs index 86b7af82a..571ceb2ee 100644 --- a/lib/src/metta/runner/pkg_mgmt/git_cache.rs +++ b/lib/src/metta/runner/pkg_mgmt/git_cache.rs @@ -14,8 +14,6 @@ use std::fs::{File, read_to_string}; #[cfg(feature = "git")] use std::io::prelude::*; -use xxhash_rust::xxh3::xxh3_64; - #[cfg(feature = "git")] use git2::{*, build::*}; @@ -28,7 +26,6 @@ pub enum UpdateMode { /// Clones the repo if it doesn't exist, otherwise leaves it alone PullIfMissing, /// Pulls the latest from the remote repo. Fails if the remote is unavailable - #[allow(dead_code)] PullLatest, /// Attempts to pull from the remote repo. Continues with the existing repo if /// the remote is unavailable @@ -52,34 +49,15 @@ pub struct CachedRepo { impl CachedRepo { /// Initializes a new CachedRepo object - /// - /// * `cache_name` - A name to describe the cache. For the default cache for URLs specified - /// from the [PkgInfo], the cache is named `git-modules` - /// * `name` - The name of this repo within in the cache. Often equal to the catalog name of a module - /// * `ident_str` - An ascii string that identifies the specific repo among other verions. - /// For example this could be a version, for a MeTTa module catalog cache. - /// * `url` - The remote URL from which to fetch the repo - /// * `branch` - The branch to use, or default if None - pub fn new(caches_dir: &Path, cache_name: &str, name: &str, ident_str: &str, url: &str, branch: Option<&str>, subdir: Option<&Path>) -> Result { + pub fn new(name: &str, repo_local_path: PathBuf, url: &str, branch: Option<&str>, subdir: Option<&Path>) -> Result { - let local_filename = if branch.is_some() || ident_str.len() > 0 { - let branch_str = match &branch { - Some(s) => s, - None => "" - }; - let unique_id = xxh3_64(format!("{}{}", ident_str, branch_str).as_bytes()); - format!("{name}.{unique_id:016x}") - } else { - name.to_string() - }; - let this_cache_dir = caches_dir.join(cache_name); - let repo_local_path = this_cache_dir.join(local_filename); let local_path = match subdir { Some(subdir) => repo_local_path.join(subdir), None => repo_local_path.clone() }; - std::fs::create_dir_all(&this_cache_dir).map_err(|e| e.to_string())?; + let parent_dir = repo_local_path.parent().unwrap(); + std::fs::create_dir_all(parent_dir).map_err(|e| e.to_string())?; Ok(Self { name: name.to_string(), diff --git a/lib/src/metta/runner/pkg_mgmt/git_catalog.rs b/lib/src/metta/runner/pkg_mgmt/git_catalog.rs index 0370cac32..df619d41d 100644 --- a/lib/src/metta/runner/pkg_mgmt/git_catalog.rs +++ b/lib/src/metta/runner/pkg_mgmt/git_catalog.rs @@ -1,6 +1,7 @@ //! Implements a [ModuleCatalog] serving remotely hosted modules via git //! +use core::any::Any; use std::path::{Path, PathBuf}; use std::fs::read_to_string; use std::sync::Mutex; @@ -10,7 +11,7 @@ use serde::Deserialize; use crate::metta::runner::*; use crate::metta::runner::pkg_mgmt::{*, git_cache::*}; -//TODO: TODO-NEXT. This is almost implemented. But keeping these notes until complete +//TODO: TODO-NOW. This is almost implemented. But keeping these notes until complete // * Funtion to trigger explicit updates. Accessible from metta ops // - Update specific module, update to a specific version, latest, or latest stable // - update all modules, to latest or latest stable @@ -48,10 +49,6 @@ use crate::metta::runner::pkg_mgmt::{*, git_cache::*}; // GitCatalog, // - also add the `prepare` interface to the module loader - -/// The name of the cache for modules loaded explicitly by git URL -pub(crate) const EXPLICIT_GIT_MOD_CACHE: &'static str = "git-modules"; - /// A set of keys describing how to access a module via git. Deserialized from within a [PkgInfo] /// or a catalog file [CatalogFileFormat] #[derive(Clone, Debug, Default, Deserialize)] @@ -77,21 +74,15 @@ pub struct ModuleGitLocation { } impl ModuleGitLocation { - /// Returns a ModuleLoader & ModuleDescriptor pair for a module hosted at a specific git location. - /// Checks the cache to avoid unnecessaryily fetching the module if we have it locally - pub(crate) fn get_loader<'a, FmtIter: Iterator>(&self, fmts: FmtIter, caches_dir: Option<&Path>, cache_name: &str, mod_name: &str, version: Option<&semver::Version>, ident_str: Option<&str>) -> Result, ModuleDescriptor)>, String> { + /// Fetches the module from git if it doesn't exist in `local_cache_dir`, and then returns + /// a ModuleLoader & ModuleDescriptor pair for the module + pub(crate) fn fetch_and_get_loader<'a, FmtIter: Iterator>(&self, fmts: FmtIter, mod_name: &str, local_cache_dir: PathBuf, update_mode: UpdateMode) -> Result, ModuleDescriptor)>, String> { //If a git URL is specified in the entry, see if we have it in the git-cache and // clone it locally if we don't if self.git_url.is_some() { - let cached_repo = self.get_cache(caches_dir, cache_name, mod_name, version, ident_str)?; - //TODO We want "version-locking" behavior from this cache. ie. don't update once - // we pulled a version, but we need a way to cause it to pull the latest. - //The tricky part is how the user will specify the modules. Doing by url sounds tedious. - // but doing it by mod_name might update modules in unwanted ways. - //At the very least, we need to offer a "update everything" command that can run across - // an entire cache - cached_repo.update(UpdateMode::PullIfMissing)?; + let cached_repo = self.get_cache(mod_name, local_cache_dir)?; + cached_repo.update(update_mode)?; let mod_path = match &self.git_main_file { Some(main_file) => cached_repo.local_path().join(main_file), @@ -102,23 +93,57 @@ impl ModuleGitLocation { Ok(None) } - pub(crate) fn get_cache(&self, caches_dir: Option<&Path>, cache_name: &str, mod_name: &str, version: Option<&semver::Version>, ident_str: Option<&str>) -> Result { - let caches_dir = caches_dir.ok_or_else(|| "Unable to clone git repository; no local \"caches\" directory available".to_string())?; + /// Gets a loader for a module identified by a ModuleGitLocation, using the [Environment]'s managed `explicit_git_mods` catalog + pub(crate) fn get_loader_in_explicit_catalog(&self, mod_name: &str, should_refresh: bool, env: &Environment) -> Result, ModuleDescriptor)>, String> { + if self.get_url().is_some() { + if let Some(explicit_git_catalog) = env.explicit_git_mods.as_ref() { + let descriptor = explicit_git_catalog.upstream_catalogs().first().unwrap().downcast::().unwrap().register_mod(mod_name, None, self)?; + let loader = explicit_git_catalog.get_loader_with_explicit_refresh(&descriptor, should_refresh)?; + Ok(Some((loader, descriptor))) + } else { + Err(format!("Unable to pull module \"{mod_name}\" from git; no local \"caches\" directory available")) + } + } else { + Ok(None) + } + } + pub(crate) fn get_cache(&self, mod_name: &str, local_cache_dir: PathBuf) -> Result { let url = self.git_url.as_ref().unwrap(); - let ident_str = match ident_str { - Some(ident_str) => ident_str, - None => url, - }; - let repo_name_string; - let mod_repo_name = match version { - Some(version) => { - repo_name_string = format!("{mod_name}-{version}"); - &repo_name_string - }, - None => mod_name - }; - CachedRepo::new(caches_dir, cache_name, mod_repo_name, ident_str, url, self.git_branch.as_ref().map(|s| s.as_str()), self.git_subdir.as_ref().map(|p| p.as_path())) + let branch = self.git_branch.as_ref().map(|s| s.as_str()); + let subdir = self.git_subdir.as_ref().map(|p| p.as_path()); + CachedRepo::new(mod_name, local_cache_dir, url, branch, subdir) + } + pub(crate) fn uid(&self) -> u64 { + let subdir_string; + let subdir_str = if let Some(p) = &self.git_subdir { + subdir_string = format!("{p:?}"); + subdir_string.as_str() + } else {""}; + let main_file_string; + let main_file_str = if let Some(p) = &self.git_main_file { + main_file_string = format!("{p:?}"); + main_file_string.as_str() + } else {""}; + let unique_string = format!("{}-{}-{subdir_str}-{main_file_str}", + self.git_url.as_ref().map(|s| s.as_str()).unwrap_or(""), + self.git_branch.as_ref().map(|s| s.as_str()).unwrap_or(""), + ); + ModuleDescriptor::uid_from_ident_bytes_and_fmt_id(unique_string.as_bytes(), 0) } + //TODO-NOW: Now, delete this. Unnecessary + // pub(crate) fn cache_dir_name(&self, mod_name: &str, version: Option<&semver::Version>) -> String { + // let uid = self.uid(); + + // let repo_name_string; + // let mod_repo_name = match version { + // Some(version) => { + // repo_name_string = format!("{mod_name}-{version}"); + // &repo_name_string + // }, + // None => mod_name + // }; + // format!("{mod_repo_name}.{uid:016x}") + // } /// Returns a new ModuleGitLocation. This is a convenience; the usual interface involves deserializing this struct pub(crate) fn new(url: String) -> Self { let mut new_self = Self::default(); @@ -131,13 +156,46 @@ impl ModuleGitLocation { } /// Struct that matches the catalog.json file fetched from the `catalog.repo` -#[derive(Deserialize, Debug)] +#[derive(Deserialize, Debug, Default)] struct CatalogFileFormat { + //TODO-NOW. Upon reflection, I see no good reason not to use a HashMap here instead of a Vec modules: Vec } +impl CatalogFileFormat { + fn find_mods_with_name(&self, name: &str) -> Vec { + let mut results = vec![]; + for cat_mod in self.modules.iter() { + if cat_mod.name == name { + let uid = cat_mod.git_location.uid(); + let descriptor = ModuleDescriptor::new(name.to_string(), cat_mod.version.clone(), Some(uid)); + results.push(descriptor); + } + } + results + } + fn find_mod_idx_with_descriptor(&self, descriptor: &ModuleDescriptor) -> Option { + for (mod_idx, cat_mod) in self.modules.iter().enumerate() { + if cat_mod.name == descriptor.name() && cat_mod.version.as_ref() == descriptor.version() { + if Some(cat_mod.git_location.uid()) == descriptor.uid() { + return Some(mod_idx); + } + } + } + None + } + fn add(&mut self, new_mod: CatalogFileMod) -> Result { + let uid = new_mod.git_location.uid(); + let descriptor = ModuleDescriptor::new(new_mod.name.clone(), new_mod.version.clone(), Some(uid)); + if self.find_mod_idx_with_descriptor(&descriptor).is_none() { + self.modules.push(new_mod); + } + Ok(descriptor) + } +} + /// A single module in a catalog.json file -#[derive(Deserialize, Debug)] +#[derive(Clone, Deserialize, Debug)] struct CatalogFileMod { name: String, version: Option, @@ -145,13 +203,19 @@ struct CatalogFileMod { git_location: ModuleGitLocation, } +impl CatalogFileMod { + fn new(name: String, version: Option, git_location: ModuleGitLocation) -> Self { + Self {name, version, git_location} + } +} + +/// Provides an interface to a git repo hosting a table of available modules #[derive(Debug)] pub struct GitCatalog { name: String, - caches_dir: PathBuf, fmts: Arc>>, refresh_time: u64, - catalog_repo: CachedRepo, + catalog_repo: Option, catalog: Mutex>, } @@ -159,61 +223,66 @@ impl GitCatalog { /// Creates a new GitCatalog with the name and url specified. `refresh_time` is the time, in /// seconds, between refreshes of the catalog file pub fn new(caches_dir: &Path, fmts: Arc>>, name: &str, url: &str, refresh_time: u64) -> Result { - let catalog_repo = CachedRepo::new(caches_dir, &name, "catalog.repo", "", url, None, None)?; + let catalog_repo_dir = caches_dir.join(name).join("catalog.repo"); + let catalog_repo_name = format!("{name}-catalog.repo"); + let catalog_repo = CachedRepo::new(&catalog_repo_name, catalog_repo_dir, url, None, None)?; + let mut new_self = Self::new_without_source_repo(fmts, name)?; + new_self.refresh_time = refresh_time; + new_self.catalog_repo = Some(catalog_repo); + Ok(new_self) + } + /// Used for a git-based catalog that isn't synced to a remote source + pub fn new_without_source_repo(fmts: Arc>>, name: &str) -> Result { Ok(Self { name: name.to_string(), fmts, - refresh_time, - caches_dir: caches_dir.to_owned(), - catalog_repo, + refresh_time: 0, + catalog_repo: None, catalog: Mutex::new(None), }) } + /// Registers a new module in the catalog with a specified remote location, and returns the [ModuleDescriptor] to refer to that module + /// + /// NOTE: explicitly setting a module may + pub(crate) fn register_mod(&self, mod_name: &str, version: Option<&semver::Version>, git_location: &ModuleGitLocation) -> Result { + let mut catalog_ref = self.catalog.lock().unwrap(); + if catalog_ref.is_none() { + *catalog_ref = Some(CatalogFileFormat::default()); + } + let descriptor = catalog_ref.as_mut().unwrap().add(CatalogFileMod::new(mod_name.to_string(), version.cloned(), git_location.clone()))?; + Ok(descriptor) + } /// Scans the catalog and finds all the modules with a given name fn find_mods_with_name(&self, name: &str) -> Vec { let cat_lock = self.catalog.lock().unwrap(); let catalog = cat_lock.as_ref().unwrap(); - let mut results = vec![]; - for cat_mod in catalog.modules.iter() { - if cat_mod.name == name { - let descriptor = ModuleDescriptor::new_with_ident_bytes_and_fmt_id(name.to_string(), cat_mod.version.clone(), cat_mod.git_location.get_url().unwrap().as_bytes(), 0); - results.push(descriptor); - } - } - results + catalog.find_mods_with_name(name) } /// Scans the catalog looking for a single module that matches the provided descriptor fn find_mod_idx_with_descriptor(&self, descriptor: &ModuleDescriptor) -> Option { let cat_lock = self.catalog.lock().unwrap(); let catalog = cat_lock.as_ref().unwrap(); - for (mod_idx, cat_mod) in catalog.modules.iter().enumerate() { - if cat_mod.name == descriptor.name() && - cat_mod.version.as_ref() == descriptor.version() && - descriptor.ident_bytes_and_fmt_id_matches(cat_mod.git_location.get_url().unwrap().as_bytes(), 0) { - return Some(mod_idx); - } - } - None + catalog.find_mod_idx_with_descriptor(descriptor) } } impl ModuleCatalog for GitCatalog { fn lookup(&self, name: &str) -> Vec { - //Get the catalog from the git cache - let did_update = match self.catalog_repo.update(UpdateMode::TryPullIfOlderThan(self.refresh_time)) { - Ok(did_update) => did_update, - Err(e) => { - log::warn!("Warning: error encountered attempting to fetch remote catalog: {}, {e}", self.name); - return vec![]; - } - }; + if let Some(catalog_repo) = &self.catalog_repo { + //Get the catalog from the git cache + let did_update = match catalog_repo.update(UpdateMode::TryPullIfOlderThan(self.refresh_time)) { + Ok(did_update) => did_update, + Err(e) => { + log::warn!("Warning: error encountered attempting to fetch remote catalog: {}, {e}", self.name); + return vec![]; + } + }; - //Parse the catalog JSON file if we need to - { + //Parse the catalog JSON file if we need to let mut catalog = self.catalog.lock().unwrap(); if did_update || catalog.is_none() { - let catalog_file_path = self.catalog_repo.local_path().join("catalog.json"); + let catalog_file_path = catalog_repo.local_path().join("catalog.json"); match read_to_string(&catalog_file_path) { Ok(file_contents) => { *catalog = Some(serde_json::from_str(&file_contents).unwrap()); @@ -236,12 +305,81 @@ impl ModuleCatalog for GitCatalog { let catalog = cat_lock.as_ref().unwrap(); let module = catalog.modules.get(mod_idx).unwrap(); - let loader = match module.git_location.get_loader(self.fmts.iter().map(|f| &**f), Some(&self.caches_dir), &self.name, descriptor.name(), descriptor.version(), None)? { + Ok(Box::new(GitModLoader{ + module: module.clone(), + fmts: self.fmts.clone(), + })) + } + fn as_any(&self) -> Option<&dyn Any> { + Some(self as &dyn Any) + } +} + +//TODO-NOW: I don't think we need this. We can just use an instance of LocalCatalog +// /// Provides an interface to access, inspect, and upgrade the modules fetched from git using +// /// a specific URL +// #[derive(Debug)] +// pub struct ExplicitGitCatalog; + +// impl ExplicitGitCatalog { +// pub(crate) fn get_explicit_loader(env: &Environment, name: String, version: Option, git_location: ModuleGitLocation) -> Result, ModuleDescriptor)>, String> { +// let module = CatalogFileMod { +// name, +// version, +// git_location, +// }; +// let descriptor = module.get_descriptor(); +// let loader = Box::new(GitModLoader{ +// module: module, +// fmts: env.fs_mod_formats.clone(), +// }); +// Ok(Some((loader, descriptor))) +// } +// } + +// impl ModuleCatalog for ExplicitGitCatalog { +// fn lookup(&self, _name: &str) -> Vec { +// unreachable!() //Nobody should be searching the ExplicitGitCatalog +// } +// fn get_loader(&self, _descriptor: &ModuleDescriptor) -> Result, String> { +// //The ExplicitGitCatalog object exists only for management of the cache. Use `get_explicit_loader` +// unreachable!() +// } +// } + +#[derive(Debug)] +pub struct GitModLoader { + module: CatalogFileMod, + fmts: Arc>>, +} + +impl ModuleLoader for GitModLoader { + //TODO-NOW: Delete this + // fn cache_dir_name(&self) -> Option { + // Some(self.module.git_location.cache_dir_name(&self.module.name, self.module.version.as_ref())) + // } + fn prepare(&self, local_dir: Option<&Path>, should_refresh: bool) -> Result>, String> { + let update_mode = match should_refresh { + true => UpdateMode::TryPullLatest, + false => UpdateMode::PullIfMissing + }; + let local_dir = match local_dir { + Some(local_dir) => local_dir, + None => return Err("GitCatalog: Cannot prepare git-based module without local cache directory".to_string()) + }; + let loader = match self.module.git_location.fetch_and_get_loader(self.fmts.iter().map(|f| &**f), &self.module.name, local_dir.to_owned(), update_mode)? { Some((loader, _)) => loader, None => unreachable!(), }; - - Ok(loader) + Ok(Some(loader)) + } + fn load(&self, _context: &mut RunContext) -> Result<(), String> { + unreachable!() } } + +//TODO-NOW Add some status output when modules are fetched from GIT +//TODO-NOW implement list methods on the local catalog +//TODO-NOW implement the managed catalog trait on the local catalog +//TODO-NOW implement ops to manage the catalog \ No newline at end of file diff --git a/lib/src/metta/runner/pkg_mgmt/managed_catalog.rs b/lib/src/metta/runner/pkg_mgmt/managed_catalog.rs index 8e01ca366..9cd4d3684 100644 --- a/lib/src/metta/runner/pkg_mgmt/managed_catalog.rs +++ b/lib/src/metta/runner/pkg_mgmt/managed_catalog.rs @@ -1,11 +1,12 @@ +use std::path::{Path, PathBuf}; +use std::sync::Mutex; + +use crate::metta::runner::*; use crate::metta::runner::pkg_mgmt::*; /// An interface to facilitate explicit management of a catalog, usually as a local mirror /// of one or more remote catalogs used by a user to insulate them from upstream changes -/// -/// NOTE: ModuleDescriptors used by the catalog should be identical to those used by the -/// UpstreamCatalog(s) // //NOTE FOR THE FUTURE: There are two major problems with this `fetch_newest_for_all` // interface. @@ -68,3 +69,262 @@ pub trait ManagedCatalog: ModuleCatalog { } } +#[derive(Debug)] +pub struct LocalCatalog { + _name: String, + upstream_catalogs: Vec>, + storage_dir: PathBuf, + local_toc: Mutex, +} + +impl LocalCatalog { + pub fn new(caches_dir: &Path, name: &str) -> Result { + let storage_dir = caches_dir.join(name); + let local_toc = LocalCatalogTOC::build_from_dir(&storage_dir)?; + + Ok(Self { + _name: name.to_string(), + upstream_catalogs: vec![], + storage_dir, + local_toc: Mutex::new(local_toc), + }) + } + pub fn push_upstream_catalog(&mut self, catalog: Box) { + self.upstream_catalogs.push(catalog); + } + pub fn upstream_catalogs(&self) -> &[Box] { + &self.upstream_catalogs[..] + } + fn lookup_by_name_in_toc(&self, name: &str) -> Option> { + let local_toc = self.local_toc.lock().unwrap(); + local_toc.lookup_by_name(name) + } + //TODO-NOW, Delete this, unneeded, I think + // fn lookup_by_descriptor_in_index(&self, desc: &ModuleDescriptor) -> Result, String> { + // let local_index = self.local_index.lock().unwrap(); + // if let Some(descriptors) = local_index.mods.get(desc.name()) { + // for index_desc in mods { + // if index_desc == desc { + // return Ok(Some(dir_name)); + // } + // } + // } + // Ok(None) + // } + + /// Adds the [ModuleDescriptor] to the TOC if it doesn't exist. Won't create duplicates + fn add_to_toc(&self, descriptor: ModuleDescriptor) -> Result<(), String> { + let mut local_toc = self.local_toc.lock().unwrap(); + local_toc.add_descriptor(descriptor) + } + pub(crate) fn get_loader_with_explicit_refresh(&self, descriptor: &ModuleDescriptor, should_refresh: bool) -> Result, String> { + + //Figure out which upstream catalog furnished this descriptor by trying each one + let mut upstream_loader = None; + for upstream in self.upstream_catalogs.iter() { + match upstream.get_loader(descriptor) { + Ok(loader) => { + upstream_loader = Some(loader); + break + }, + Err(_) => {} + } + } + let upstream_loader = match upstream_loader { + Some(loader) => loader, + None => { + // TODO: It would be nice to have the option here to pull a different but compatible + // mod from the upstream catalogs; however we don't have the original requirement info, + // so currently we cannot do that. See the write-up above about the "Requirement API". + return Err(format!("Upstream Catalogs can no longer supply module {}", descriptor.name())); + } + }; + + //Resolve the local dir to use as the local cache + let cache_dir_name = dir_name_from_descriptor(descriptor); + let local_cache_dir = self.storage_dir.join(cache_dir_name); + + //Make sure this mod is in the TOC + self.add_to_toc(descriptor.to_owned())?; + + //Wrap the upstream loader in a loader object from this catalog + let wrapper_loader = LocalCatalogLoader {local_cache_dir, upstream_loader, should_refresh}; + Ok(Box::new(wrapper_loader)) + } +} + +//TODO-NOW, I think this is also unneeded +// fn read_index_file(file_path: &Path) -> LocalCatalogTOC { +// match read_to_string(&file_path) { +// Ok(file_contents) => { +// serde_json::from_str(&file_contents).unwrap() +// }, +// Err(_e) => { +// LocalCatalogTOC::default() +// } +// } +// } + +//TODO-NOW, I think this is also unneeded +// fn write_index_file(file_path: &Path, catalog_file_data: &LocalCatalogFile) -> Result<(), String> { +// let file = File::create(file_path).map_err(|e| e.to_string())?; +// let mut writer = BufWriter::new(file); +// serde_json::to_writer(&mut writer, catalog_file_data).map_err(|e| e.to_string())?; +// writer.flush().map_err(|e| e.to_string()) +// } + +impl ModuleCatalog for LocalCatalog { + fn lookup(&self, name: &str) -> Vec { + + //If we have some matching modules in the local cache then return them + if let Some(descriptors) = self.lookup_by_name_in_toc(name) { + return descriptors; + } + + //If we don't have anything locally, check the upstream catalogs in order until one + // of them returns some results + for upstream in self.upstream_catalogs.iter() { + let upstream_results = upstream.lookup(name); + if upstream_results.len() > 0 { + return upstream_results; + } + } + + //We didn't find any matching modules, locally or upstream + vec![] + } + fn get_loader(&self, descriptor: &ModuleDescriptor) -> Result, String> { + self.get_loader_with_explicit_refresh(descriptor, false) + } +} + +/// A [ModuleLoader] for a [LocalCatalog] that wraps another ModuleLoader for an upstream [ModuleCatalog] +#[derive(Debug)] +struct LocalCatalogLoader { + local_cache_dir: PathBuf, + should_refresh: bool, + upstream_loader: Box +} + +impl ModuleLoader for LocalCatalogLoader { + fn prepare(&self, _local_dir: Option<&Path>, should_refresh: bool) -> Result>, String> { + self.upstream_loader.prepare(Some(&self.local_cache_dir), should_refresh | self.should_refresh) + } + fn load(&self, _context: &mut RunContext) -> Result<(), String> { + unreachable!() //We will substitute the `upstream_loader` during prepare + } +} + +/// A Table of Contents (TOC) for a LocalCatalog +#[derive(Debug)] +struct LocalCatalogTOC { + mods_by_name: HashMap> +} + +impl LocalCatalogTOC { + /// Scans a directory and builds up a TOC from the contents + fn build_from_dir(storage_dir: &Path) -> Result { + if !storage_dir.exists() { + std::fs::create_dir_all(&storage_dir).map_err(|e| e.to_string())?; + } else { + if !storage_dir.is_dir() { + return Err(format!("Found file instead of directory at {}", storage_dir.display())); + } + } + + let mut new_self = Self { + mods_by_name: HashMap::new() + }; + + for dir_item_handle in std::fs::read_dir(storage_dir).map_err(|e| e.to_string())? { + let dir_entry = dir_item_handle.map_err(|e| e.to_string())?; + let file_name = dir_entry.file_name(); + let name_str = file_name.to_str() + .ok_or_else(|| format!("Invalid characters found in local cache at path: {}", dir_entry.path().display()))?; + + let descriptor = parse_descriptor_from_dir_name(name_str)?; + new_self.add_descriptor(descriptor)?; + } + + Ok(new_self) + } + fn lookup_by_name(&self, name: &str) -> Option> { + if let Some(descriptors) = self.mods_by_name.get(name) { + if descriptors.len() > 0 { + return Some(descriptors.clone()); + } + } + None + } + /// Adds a descriptor to a TOC. Won't add a duplicate + fn add_descriptor(&mut self, descriptor: ModuleDescriptor) -> Result<(), String> { + let desc_vec = self.mods_by_name.entry(descriptor.name().to_owned()).or_insert(vec![]); + if !desc_vec.contains(&descriptor) { + desc_vec.push(descriptor); + } + Ok(()) + } + +} + +/// Returns a String that can be used as a directory to cache local files associated +/// with the module, such as build artifacts and/or downloads +pub(crate) fn dir_name_from_descriptor(desc: &ModuleDescriptor) -> String { + let mod_dir_name = match desc.version() { + Some(version) => format!("{}@{version}", desc.name()), + None => desc.name().to_string() + }; + match desc.uid() { + Some(uid) => format!("{mod_dir_name}#{uid:016x}"), + None => format!("{mod_dir_name}") + } +} + +/// Performs the inverse of [dir_name_from_descriptor], deconstructing a dir_name str into a [ModuleDescriptor] +pub(crate) fn parse_descriptor_from_dir_name(dir_name: &str) -> Result { + let (name_and_vers, uid) = match dir_name.rfind('#') { + Some(pos) => (&dir_name[0..pos], Some(&dir_name[pos+1..])), + None => (dir_name, None) + }; + let (name, version) = match name_and_vers.find('@') { + Some(pos) => (&name_and_vers[0..pos], Some(&name_and_vers[pos+1..])), + None => (name_and_vers, None) + }; + let version = match version { + Some(ver_str) => Some(semver::Version::parse(ver_str).map_err(|e| e.to_string())?), + None => None + }; + let uid = match uid { + Some(uid_str) => Some(u64::from_str_radix(uid_str, 16).map_err(|e| e.to_string())?), + None => None + }; + Ok(ModuleDescriptor::new(name.to_string(), version, uid)) +} + +//TODO-NOW: This below documents my thought process, but it's probably of little value +// now that the design has been implemented fully +// +//DISCUSSION: Who is responsible for managing the on-disk modules for a ManagedCatalog, +// between the LocalCatalog object and the upstream ModuleCatalogs? +// +// The answer to that question comes down to a few sub-questions. +// +// Firstly, There are several desiderata. +// 1. We don't want the managed catalog to be limited to a specific upstream format, e.g git +// 2. But We want the local catalog to be able to delete (ie manage) modules without risking +// corrupting indices and other state kept upstream +// 3. We don't want unnecessary file moving, and certainly no duplication +// +//So... +// - If we make the LocalCatalog responsible, and create a mechanism for the local catalog +// to define a directory and instruct the upstream catalog to use that directory, e.g. +// through the "prepare" method of the loader, that feels cleanest. +// +// However, the monkey wrench comes in when we consider a module that is a subdirectory of +// a git repo. We need to hold the repo in the cache, but export the module from the +// subdirectory. Which means the local cache is aware of the quirks of the git format. +// +// So it's not enough to say that once the module is "prepared" the upstream catalog can +// wash its hands. The prepare method needs to take a directory as input, which returns +// a new loader, with the directory as part of the new loader's internal state. +// diff --git a/lib/src/metta/runner/stdlib.rs b/lib/src/metta/runner/stdlib.rs index 55b98fde5..8c6ca928c 100644 --- a/lib/src/metta/runner/stdlib.rs +++ b/lib/src/metta/runner/stdlib.rs @@ -5,7 +5,7 @@ use crate::metta::*; use crate::metta::text::Tokenizer; use crate::metta::text::SExprParser; use crate::metta::runner::{Metta, RunContext, ModuleLoader, ResourceKey, mod_name_from_url}; -use crate::metta::runner::{git_catalog::EXPLICIT_GIT_MOD_CACHE, git_catalog::ModuleGitLocation, git_cache::UpdateMode}; +use crate::metta::runner::git_catalog::ModuleGitLocation; use crate::metta::types::{get_atom_types, get_meta_type}; use crate::common::shared::Shared; use crate::common::CachingMapper; @@ -303,6 +303,8 @@ impl Grounded for RegisterModuleOp { let path_arg_atom = args.get(0).ok_or_else(|| ExecError::from(arg_error))?; // TODO: replace Symbol by grounded String? + //TODO-NOW, investigate what goes off the rails when I run this with quotes now? Maybe I'm getting a grounded string arg now... + // in the repl, test `!(register-module! "/tmp/")` let path = match path_arg_atom { Atom::Symbol(path_arg) => path_arg.name(), _ => return Err(arg_error.into()) @@ -329,7 +331,8 @@ impl Grounded for RegisterModuleOp { /// Similar to `register-module!`, this op will bypass the catalog search #[derive(Clone, Debug)] pub struct GitModuleOp { - metta: Metta + //TODO-HACK: This is a terrible horrible ugly hack that should be fixed ASAP + context: std::sync::Arc>>>>>, } impl PartialEq for GitModuleOp { @@ -338,7 +341,7 @@ impl PartialEq for GitModuleOp { impl GitModuleOp { pub fn new(metta: Metta) -> Self { - Self{ metta } + Self{ context: metta.0.context.clone() } } } @@ -359,6 +362,8 @@ impl Grounded for GitModuleOp { // TODO: When we figure out how to address varargs, it will be nice to take an optional branch name // TODO: replace Symbol by grounded String? + //TODO-NOW, investigate what goes off the rails when I run this with quotes now? Maybe I'm getting a grounded string arg now... + // in the repl, test `!(register-module! "/tmp/")` let url = match url_arg_atom { Atom::Symbol(url_arg) => url_arg.name(), _ => return Err(arg_error.into()) @@ -372,10 +377,14 @@ impl Grounded for GitModuleOp { None => return Err(ExecError::from("git-module! error extracting module name from URL")) }; + let ctx_ref = self.context.lock().unwrap().last().unwrap().clone(); + let mut context = ctx_ref.lock().unwrap(); + let git_mod_location = ModuleGitLocation::new(url.to_string()); - let cached_mod = git_mod_location.get_cache(self.metta.environment().caches_dir(), EXPLICIT_GIT_MOD_CACHE, &mod_name, None, None)?; - cached_mod.update(UpdateMode::TryPullLatest)?; - self.metta.load_module_at_path(cached_mod.local_path(), Some(&mod_name)).map_err(|e| ExecError::from(e))?; + + if let Some((loader, descriptor)) = git_mod_location.get_loader_in_explicit_catalog(&mod_name, true, context.metta.environment()).map_err(|e| ExecError::from(e))? { + context.get_or_init_module_with_descriptor(&mod_name, descriptor, loader).map_err(|e| ExecError::from(e))?; + } unit_result() } From 5b65d5e4ac294cd2e49ce5ebc3b06b3e3c9b9e20 Mon Sep 17 00:00:00 2001 From: Luke Peterson Date: Fri, 10 May 2024 22:48:55 +0900 Subject: [PATCH 42/77] Changing around initialization order for LocalCatalog and GitCatalog, so that everything gets initialized properly when loading from local cache --- lib/src/metta/runner/pkg_mgmt/git_catalog.rs | 44 +++++++++++++------- repl/Cargo.toml | 1 + 2 files changed, 30 insertions(+), 15 deletions(-) diff --git a/lib/src/metta/runner/pkg_mgmt/git_catalog.rs b/lib/src/metta/runner/pkg_mgmt/git_catalog.rs index df619d41d..8c98d7495 100644 --- a/lib/src/metta/runner/pkg_mgmt/git_catalog.rs +++ b/lib/src/metta/runner/pkg_mgmt/git_catalog.rs @@ -242,13 +242,9 @@ impl GitCatalog { }) } /// Registers a new module in the catalog with a specified remote location, and returns the [ModuleDescriptor] to refer to that module - /// - /// NOTE: explicitly setting a module may pub(crate) fn register_mod(&self, mod_name: &str, version: Option<&semver::Version>, git_location: &ModuleGitLocation) -> Result { + self.refresh_catalog()?; let mut catalog_ref = self.catalog.lock().unwrap(); - if catalog_ref.is_none() { - *catalog_ref = Some(CatalogFileFormat::default()); - } let descriptor = catalog_ref.as_mut().unwrap().add(CatalogFileMod::new(mod_name.to_string(), version.cloned(), git_location.clone()))?; Ok(descriptor) } @@ -261,21 +257,19 @@ impl GitCatalog { /// Scans the catalog looking for a single module that matches the provided descriptor fn find_mod_idx_with_descriptor(&self, descriptor: &ModuleDescriptor) -> Option { let cat_lock = self.catalog.lock().unwrap(); - let catalog = cat_lock.as_ref().unwrap(); - catalog.find_mod_idx_with_descriptor(descriptor) + match &*cat_lock { + Some(catalog) => catalog.find_mod_idx_with_descriptor(descriptor), + None => None + } } -} - -impl ModuleCatalog for GitCatalog { - fn lookup(&self, name: &str) -> Vec { - + fn refresh_catalog(&self) -> Result { if let Some(catalog_repo) = &self.catalog_repo { //Get the catalog from the git cache let did_update = match catalog_repo.update(UpdateMode::TryPullIfOlderThan(self.refresh_time)) { Ok(did_update) => did_update, Err(e) => { log::warn!("Warning: error encountered attempting to fetch remote catalog: {}, {e}", self.name); - return vec![]; + false } }; @@ -286,19 +280,39 @@ impl ModuleCatalog for GitCatalog { match read_to_string(&catalog_file_path) { Ok(file_contents) => { *catalog = Some(serde_json::from_str(&file_contents).unwrap()); + return Ok(true) }, Err(e) => { - log::warn!("Warning: Error reading catalog file. remote catalog appears to be corrupt: {}, {e}", self.name); - return vec![]; + return Err(format!("Error reading catalog file. remote catalog unavailable: {}, {e}", self.name)) } } } + } else { + let mut catalog = self.catalog.lock().unwrap(); + if catalog.is_none() { + *catalog = Some(CatalogFileFormat::default()); + } + } + Ok(false) + } +} + +impl ModuleCatalog for GitCatalog { + fn lookup(&self, name: &str) -> Vec { + match self.refresh_catalog() { + Ok(_) => {}, + Err(e) => { + log::warn!("{e}"); + return vec![] + } } //Find the modules that match in the catalog self.find_mods_with_name(name) } fn get_loader(&self, descriptor: &ModuleDescriptor) -> Result, String> { + self.refresh_catalog()?; + let mod_idx = self.find_mod_idx_with_descriptor(descriptor).unwrap(); let cat_lock = self.catalog.lock().unwrap(); diff --git a/repl/Cargo.toml b/repl/Cargo.toml index fbf041a87..8439872c3 100644 --- a/repl/Cargo.toml +++ b/repl/Cargo.toml @@ -22,3 +22,4 @@ default = ["python"] no_python = ["hyperon"] python = ["pyo3", "pep440_rs"] minimal = ["hyperon/minimal", "no_python"] #WARNING: The interpreter belonging to the hyperon python module will be used if hyperon is run through python +git = ["hyperon/git"] \ No newline at end of file From 3471e4c69cba7bb4d3dfe54546c5d74f9ded4964 Mon Sep 17 00:00:00 2001 From: Luke Peterson Date: Sat, 11 May 2024 10:44:55 +0900 Subject: [PATCH 43/77] Purging dead code and clarifying comments --- lib/src/metta/runner/modules/mod.rs | 12 --- lib/src/metta/runner/pkg_mgmt/git_cache.rs | 6 ++ lib/src/metta/runner/pkg_mgmt/git_catalog.rs | 88 ------------------- .../metta/runner/pkg_mgmt/managed_catalog.rs | 61 ------------- 4 files changed, 6 insertions(+), 161 deletions(-) diff --git a/lib/src/metta/runner/modules/mod.rs b/lib/src/metta/runner/modules/mod.rs index 94702d2a2..2742aaa0b 100644 --- a/lib/src/metta/runner/modules/mod.rs +++ b/lib/src/metta/runner/modules/mod.rs @@ -613,18 +613,6 @@ pub trait ModuleLoader: std::fmt::Debug + Send + Sync { None } - //TODO-NOW Delete this: I changed my mind about this interface - I now think the design should commit to an - // injective mapping between ModuleDescriptors and directory names - // - // /// Suggests a name that can be used by the implementation for locally cached module files - // /// - // /// The returned name should be deterministic, but unique to the module and its version, etc. - // /// For example, a git branch or a remote server URL may be encoded into the name. The name - // /// must be composed of only legal file name characters, and must not contain the '/' char. - // fn cache_dir_name(&self) -> Option { - // None - // } - /// Prepares a module for loading. This method is responsible for fetching resources /// from the network, performing build or pre-computation steps, or any other operations /// that only need to be performed once and then may be cached locally diff --git a/lib/src/metta/runner/pkg_mgmt/git_cache.rs b/lib/src/metta/runner/pkg_mgmt/git_cache.rs index 571ceb2ee..9e77b0204 100644 --- a/lib/src/metta/runner/pkg_mgmt/git_cache.rs +++ b/lib/src/metta/runner/pkg_mgmt/git_cache.rs @@ -20,6 +20,12 @@ use git2::{*, build::*}; #[cfg(feature = "git")] const TIMESTAMP_FILENAME: &'static str = "_timestamp_"; +//NOTE: When the "git" feature is not enabled, we can still access CachedRepos that have +// already been pulled locally. However we cannot update them or pull new ones. This +// theoretically allows for one tool to be used to update and pull remote repos while +// other tools linking hyperon without the "git" feature may then access those repos. That +// said, having multiple tools that link hyperon is inviting version incompatibilities. + /// Indicates the desired behavior for updating the locally-cached repo #[derive(Clone, Copy, Debug, PartialEq, Eq)] pub enum UpdateMode { diff --git a/lib/src/metta/runner/pkg_mgmt/git_catalog.rs b/lib/src/metta/runner/pkg_mgmt/git_catalog.rs index 8c98d7495..7369af53b 100644 --- a/lib/src/metta/runner/pkg_mgmt/git_catalog.rs +++ b/lib/src/metta/runner/pkg_mgmt/git_catalog.rs @@ -11,44 +11,6 @@ use serde::Deserialize; use crate::metta::runner::*; use crate::metta::runner::pkg_mgmt::{*, git_cache::*}; -//TODO: TODO-NOW. This is almost implemented. But keeping these notes until complete -// * Funtion to trigger explicit updates. Accessible from metta ops -// - Update specific module, update to a specific version, latest, or latest stable -// - update all modules, to latest or latest stable -// - implemented in a way that also works on the EXPLICIT_GIT_MOD_CACHE (e.g. by cache dir) -// -//Current thinking: -// * Implement the "prepare" method on ModuleLoader -// * Implement an "all" method on Catalog, and possibly "all_mod_names" which lists sorted mod names -// -//Less sure about this but... I think that we want two objects both implementing Catalog, and -// both sharing the same on-disk backing. One includes the remote fetching, while the other -// allows for explicit manipulation. -// -// * Implement a "ManagedCatalog" trait with methods: -// * origin_catalog ???? -// * local_catalog (accessor) ???? -// * clear_all -// * remove_by_name(mod_name) ????? (probably not) -// * remove_by_desc(descriptor) -// * fetch(descriptor) -// * upgrade(descriptor) (performs lookup_newest, then if newer is found, removes existing, and fetches) -// * upgrade_all() - -//QUESTION: I'm really not sure about whether the explicit git cache is a catalog. -// The No arguments: -// not queryable -// -// The Yes arguments: -// packages should be upgradable -// -//I think the way to square this circle is to make catalog query functions that work a descriptor uid -// - -//UPDATE: Need to implement ManagedCatalog for an object that shares the same back-end with -// GitCatalog, -// - also add the `prepare` interface to the module loader - /// A set of keys describing how to access a module via git. Deserialized from within a [PkgInfo] /// or a catalog file [CatalogFileFormat] #[derive(Clone, Debug, Default, Deserialize)] @@ -130,20 +92,6 @@ impl ModuleGitLocation { ); ModuleDescriptor::uid_from_ident_bytes_and_fmt_id(unique_string.as_bytes(), 0) } - //TODO-NOW: Now, delete this. Unnecessary - // pub(crate) fn cache_dir_name(&self, mod_name: &str, version: Option<&semver::Version>) -> String { - // let uid = self.uid(); - - // let repo_name_string; - // let mod_repo_name = match version { - // Some(version) => { - // repo_name_string = format!("{mod_name}-{version}"); - // &repo_name_string - // }, - // None => mod_name - // }; - // format!("{mod_repo_name}.{uid:016x}") - // } /// Returns a new ModuleGitLocation. This is a convenience; the usual interface involves deserializing this struct pub(crate) fn new(url: String) -> Self { let mut new_self = Self::default(); @@ -329,38 +277,6 @@ impl ModuleCatalog for GitCatalog { } } -//TODO-NOW: I don't think we need this. We can just use an instance of LocalCatalog -// /// Provides an interface to access, inspect, and upgrade the modules fetched from git using -// /// a specific URL -// #[derive(Debug)] -// pub struct ExplicitGitCatalog; - -// impl ExplicitGitCatalog { -// pub(crate) fn get_explicit_loader(env: &Environment, name: String, version: Option, git_location: ModuleGitLocation) -> Result, ModuleDescriptor)>, String> { -// let module = CatalogFileMod { -// name, -// version, -// git_location, -// }; -// let descriptor = module.get_descriptor(); -// let loader = Box::new(GitModLoader{ -// module: module, -// fmts: env.fs_mod_formats.clone(), -// }); -// Ok(Some((loader, descriptor))) -// } -// } - -// impl ModuleCatalog for ExplicitGitCatalog { -// fn lookup(&self, _name: &str) -> Vec { -// unreachable!() //Nobody should be searching the ExplicitGitCatalog -// } -// fn get_loader(&self, _descriptor: &ModuleDescriptor) -> Result, String> { -// //The ExplicitGitCatalog object exists only for management of the cache. Use `get_explicit_loader` -// unreachable!() -// } -// } - #[derive(Debug)] pub struct GitModLoader { module: CatalogFileMod, @@ -368,10 +284,6 @@ pub struct GitModLoader { } impl ModuleLoader for GitModLoader { - //TODO-NOW: Delete this - // fn cache_dir_name(&self) -> Option { - // Some(self.module.git_location.cache_dir_name(&self.module.name, self.module.version.as_ref())) - // } fn prepare(&self, local_dir: Option<&Path>, should_refresh: bool) -> Result>, String> { let update_mode = match should_refresh { true => UpdateMode::TryPullLatest, diff --git a/lib/src/metta/runner/pkg_mgmt/managed_catalog.rs b/lib/src/metta/runner/pkg_mgmt/managed_catalog.rs index 9cd4d3684..8a3f3b9a8 100644 --- a/lib/src/metta/runner/pkg_mgmt/managed_catalog.rs +++ b/lib/src/metta/runner/pkg_mgmt/managed_catalog.rs @@ -99,19 +99,6 @@ impl LocalCatalog { let local_toc = self.local_toc.lock().unwrap(); local_toc.lookup_by_name(name) } - //TODO-NOW, Delete this, unneeded, I think - // fn lookup_by_descriptor_in_index(&self, desc: &ModuleDescriptor) -> Result, String> { - // let local_index = self.local_index.lock().unwrap(); - // if let Some(descriptors) = local_index.mods.get(desc.name()) { - // for index_desc in mods { - // if index_desc == desc { - // return Ok(Some(dir_name)); - // } - // } - // } - // Ok(None) - // } - /// Adds the [ModuleDescriptor] to the TOC if it doesn't exist. Won't create duplicates fn add_to_toc(&self, descriptor: ModuleDescriptor) -> Result<(), String> { let mut local_toc = self.local_toc.lock().unwrap(); @@ -153,26 +140,6 @@ impl LocalCatalog { } } -//TODO-NOW, I think this is also unneeded -// fn read_index_file(file_path: &Path) -> LocalCatalogTOC { -// match read_to_string(&file_path) { -// Ok(file_contents) => { -// serde_json::from_str(&file_contents).unwrap() -// }, -// Err(_e) => { -// LocalCatalogTOC::default() -// } -// } -// } - -//TODO-NOW, I think this is also unneeded -// fn write_index_file(file_path: &Path, catalog_file_data: &LocalCatalogFile) -> Result<(), String> { -// let file = File::create(file_path).map_err(|e| e.to_string())?; -// let mut writer = BufWriter::new(file); -// serde_json::to_writer(&mut writer, catalog_file_data).map_err(|e| e.to_string())?; -// writer.flush().map_err(|e| e.to_string()) -// } - impl ModuleCatalog for LocalCatalog { fn lookup(&self, name: &str) -> Vec { @@ -300,31 +267,3 @@ pub(crate) fn parse_descriptor_from_dir_name(dir_name: &str) -> Result Date: Sat, 11 May 2024 12:17:10 +0900 Subject: [PATCH 44/77] Adding list support to ManagedCatalog and catalog-list op atom --- lib/src/metta/runner/pkg_mgmt/catalog.rs | 21 +++++++ lib/src/metta/runner/pkg_mgmt/git_catalog.rs | 10 +++- .../metta/runner/pkg_mgmt/managed_catalog.rs | 33 ++++++++--- lib/src/metta/runner/stdlib.rs | 58 +++++++++++++++++++ lib/src/metta/runner/stdlib_minimal.rs | 2 + 5 files changed, 115 insertions(+), 9 deletions(-) diff --git a/lib/src/metta/runner/pkg_mgmt/catalog.rs b/lib/src/metta/runner/pkg_mgmt/catalog.rs index 87ecd5aad..6458a474b 100644 --- a/lib/src/metta/runner/pkg_mgmt/catalog.rs +++ b/lib/src/metta/runner/pkg_mgmt/catalog.rs @@ -93,6 +93,11 @@ use serde::{Deserialize, Serialize}; /// `ModuleCatalog` types are closely connected with [ModuleLoader] types because the `ModuleCatalog` must /// recognize the module in whatever media it exists, and supply the `ModuleLoader` to load that module pub trait ModuleCatalog: std::fmt::Debug + Send + Sync { + /// The name of the catalog, to be displayed to the user + fn display_name(&self) -> String { + std::any::type_name::().to_string() + } + /// Returns the [ModuleDescriptor] for every module in the `ModuleCatalog` with the specified name fn lookup(&self, name: &str) -> Vec; @@ -594,6 +599,9 @@ impl DirCatalog { } impl ModuleCatalog for DirCatalog { + fn display_name(&self) -> String { + format!("Dir \"{}\"", self.path.display()) + } fn lookup(&self, name: &str) -> Vec { //QUESTION: How should we handle modules with an internal "package-name" that differs from their @@ -677,6 +685,19 @@ pub struct ModuleDescriptor { version: Option, } +impl core::fmt::Display for ModuleDescriptor { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "{}", self.name)?; + if let Some(version) = &self.version { + write!(f, " @{version}")?; + } + if let Some(uid) = self.uid { + write!(f, " #{uid:016x}")?; + } + Ok(()) + } +} + impl ModuleDescriptor { /// Create a new ModuleDescriptor pub fn new(name: String, version: Option, uid: Option) -> Self { diff --git a/lib/src/metta/runner/pkg_mgmt/git_catalog.rs b/lib/src/metta/runner/pkg_mgmt/git_catalog.rs index 7369af53b..2efda747d 100644 --- a/lib/src/metta/runner/pkg_mgmt/git_catalog.rs +++ b/lib/src/metta/runner/pkg_mgmt/git_catalog.rs @@ -246,6 +246,9 @@ impl GitCatalog { } impl ModuleCatalog for GitCatalog { + fn display_name(&self) -> String { + self.name.clone() + } fn lookup(&self, name: &str) -> Vec { match self.refresh_catalog() { Ok(_) => {}, @@ -306,6 +309,9 @@ impl ModuleLoader for GitModLoader { //TODO-NOW Add some status output when modules are fetched from GIT -//TODO-NOW implement list methods on the local catalog //TODO-NOW implement the managed catalog trait on the local catalog -//TODO-NOW implement ops to manage the catalog \ No newline at end of file +//TODO-NOW implement ops to manage the catalog +//TODO-NOW Implement a MeTTaMod that separates apart the catalog management functions +//TODO-NOW Implement a builtin-catalog for acccess to std mods +//TODO-NOW Fix the build without pkg_mgmt feature +//TODO-NOW Get the repl to immeditately display logs of warn!() or err!() diff --git a/lib/src/metta/runner/pkg_mgmt/managed_catalog.rs b/lib/src/metta/runner/pkg_mgmt/managed_catalog.rs index 8a3f3b9a8..209927184 100644 --- a/lib/src/metta/runner/pkg_mgmt/managed_catalog.rs +++ b/lib/src/metta/runner/pkg_mgmt/managed_catalog.rs @@ -1,5 +1,6 @@ use std::path::{Path, PathBuf}; +use std::collections::BTreeMap; use std::sync::Mutex; use crate::metta::runner::*; @@ -71,7 +72,7 @@ pub trait ManagedCatalog: ModuleCatalog { #[derive(Debug)] pub struct LocalCatalog { - _name: String, + name: String, upstream_catalogs: Vec>, storage_dir: PathBuf, local_toc: Mutex, @@ -83,7 +84,7 @@ impl LocalCatalog { let local_toc = LocalCatalogTOC::build_from_dir(&storage_dir)?; Ok(Self { - _name: name.to_string(), + name: name.to_string(), upstream_catalogs: vec![], storage_dir, local_toc: Mutex::new(local_toc), @@ -104,6 +105,10 @@ impl LocalCatalog { let mut local_toc = self.local_toc.lock().unwrap(); local_toc.add_descriptor(descriptor) } + fn list_toc(&self) -> Vec { + let local_toc = self.local_toc.lock().unwrap(); + local_toc.all_sorted_descriptors() + } pub(crate) fn get_loader_with_explicit_refresh(&self, descriptor: &ModuleDescriptor, should_refresh: bool) -> Result, String> { //Figure out which upstream catalog furnished this descriptor by trying each one @@ -141,6 +146,9 @@ impl LocalCatalog { } impl ModuleCatalog for LocalCatalog { + fn display_name(&self) -> String { + self.name.clone() + } fn lookup(&self, name: &str) -> Vec { //If we have some matching modules in the local cache then return them @@ -163,6 +171,9 @@ impl ModuleCatalog for LocalCatalog { fn get_loader(&self, descriptor: &ModuleDescriptor) -> Result, String> { self.get_loader_with_explicit_refresh(descriptor, false) } + fn list<'a>(&'a self) -> Option + 'a>> { + Some(Box::new(self.list_toc().into_iter())) + } } /// A [ModuleLoader] for a [LocalCatalog] that wraps another ModuleLoader for an upstream [ModuleCatalog] @@ -185,7 +196,7 @@ impl ModuleLoader for LocalCatalogLoader { /// A Table of Contents (TOC) for a LocalCatalog #[derive(Debug)] struct LocalCatalogTOC { - mods_by_name: HashMap> + mods_by_name: BTreeMap> } impl LocalCatalogTOC { @@ -200,7 +211,7 @@ impl LocalCatalogTOC { } let mut new_self = Self { - mods_by_name: HashMap::new() + mods_by_name: BTreeMap::new() }; for dir_item_handle in std::fs::read_dir(storage_dir).map_err(|e| e.to_string())? { @@ -209,8 +220,12 @@ impl LocalCatalogTOC { let name_str = file_name.to_str() .ok_or_else(|| format!("Invalid characters found in local cache at path: {}", dir_entry.path().display()))?; - let descriptor = parse_descriptor_from_dir_name(name_str)?; - new_self.add_descriptor(descriptor)?; + // Name reserved by GitCatalog. We may generalize this "reserved" mechanism when + // we support additional upstream catalog types + if name_str != "catalog.repo" { + let descriptor = parse_descriptor_from_dir_name(name_str)?; + new_self.add_descriptor(descriptor)?; + } } Ok(new_self) @@ -223,15 +238,19 @@ impl LocalCatalogTOC { } None } + /// Returns a Vec containing all ModuleDescriptors in the TOC, sorted by name + fn all_sorted_descriptors(&self) -> Vec { + self.mods_by_name.iter().flat_map(|(_name, desc_vec)| desc_vec).cloned().collect() + } /// Adds a descriptor to a TOC. Won't add a duplicate fn add_descriptor(&mut self, descriptor: ModuleDescriptor) -> Result<(), String> { let desc_vec = self.mods_by_name.entry(descriptor.name().to_owned()).or_insert(vec![]); if !desc_vec.contains(&descriptor) { desc_vec.push(descriptor); + desc_vec.sort_by(|a, b| a.version().cmp(&b.version())); } Ok(()) } - } /// Returns a String that can be used as a directory to cache local files associated diff --git a/lib/src/metta/runner/stdlib.rs b/lib/src/metta/runner/stdlib.rs index 8c6ca928c..bcb7528c2 100644 --- a/lib/src/metta/runner/stdlib.rs +++ b/lib/src/metta/runner/stdlib.rs @@ -394,6 +394,62 @@ impl Grounded for GitModuleOp { } } +/// Lists contents of all Catalogs that support the "list" method +#[derive(Clone, Debug)] +pub struct CatalogListOp { + metta: Metta +} + +impl PartialEq for CatalogListOp { + fn eq(&self, _other: &Self) -> bool { true } +} + +impl CatalogListOp { + pub fn new(metta: Metta) -> Self { + Self{ metta } + } +} + +impl Display for CatalogListOp { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "catalog-list") + } +} + +impl Grounded for CatalogListOp { + fn type_(&self) -> Atom { + //TODO-FUTURE, when we decide on a friendly standard for var-args, it would be nice to + // allow an optional arg to list a specific catalog. For now we list all of them + //TODO-FUTURE, we may want to return the list as atoms, but now it just prints to stdout + Atom::expr([ARROW_SYMBOL, UNIT_TYPE()]) + } + + fn execute(&self, _args: &[Atom]) -> Result, ExecError> { + + fn list_catalog(cat: &dyn crate::metta::runner::ModuleCatalog) { + if let Some(cat_iter) = cat.list() { + println!("{}:", cat.display_name()); + for desc in cat_iter { + println!("\t{desc}"); + } + } + } + + if let Some(explicit_git_catalog) = &self.metta.environment().explicit_git_mods { + list_catalog(explicit_git_catalog); + } + for cat in self.metta.environment().catalogs() { + list_catalog(cat); + } + + unit_result() + } + + fn match_(&self, other: &Atom) -> MatchResultIter { + match_by_equality(self, other) + } +} + /// This operation prints the modules loaded from the top of the runner /// /// NOTE: This is a temporary stop-gap to help MeTTa users inspect which modules they have loaded and @@ -1657,6 +1713,8 @@ mod non_minimal_only_stdlib { tref.register_token(regex(r"get-metatype"), move |_| { get_meta_type_op.clone() }); let register_module_op = Atom::gnd(RegisterModuleOp::new(metta.clone())); tref.register_token(regex(r"register-module!"), move |_| { register_module_op.clone() }); + let catalog_list_op = Atom::gnd(CatalogListOp::new(metta.clone())); + tref.register_token(regex(r"catalog-list"), move |_| { catalog_list_op.clone() }); let git_module_op = Atom::gnd(GitModuleOp::new(metta.clone())); tref.register_token(regex(r"git-module!"), move |_| { git_module_op.clone() }); let mod_space_op = Atom::gnd(ModSpaceOp::new(metta.clone())); diff --git a/lib/src/metta/runner/stdlib_minimal.rs b/lib/src/metta/runner/stdlib_minimal.rs index bdf2d3daf..94d40a37b 100644 --- a/lib/src/metta/runner/stdlib_minimal.rs +++ b/lib/src/metta/runner/stdlib_minimal.rs @@ -461,6 +461,8 @@ pub fn register_common_tokens(tref: &mut Tokenizer, _tokenizer: Shared Date: Sat, 11 May 2024 17:41:52 +0900 Subject: [PATCH 45/77] Adding catalog-update op atom Implementing ManagedCatalog for LocalCatalog --- lib/src/metta/runner/environment.rs | 2 +- lib/src/metta/runner/modules/mod.rs | 4 +- lib/src/metta/runner/pkg_mgmt/catalog.rs | 13 +- lib/src/metta/runner/pkg_mgmt/git_cache.rs | 29 +--- lib/src/metta/runner/pkg_mgmt/git_catalog.rs | 141 +++++++++++------- .../metta/runner/pkg_mgmt/managed_catalog.rs | 106 +++++++++++-- lib/src/metta/runner/stdlib.rs | 56 ++++++- lib/src/metta/runner/stdlib_minimal.rs | 2 + 8 files changed, 266 insertions(+), 87 deletions(-) diff --git a/lib/src/metta/runner/environment.rs b/lib/src/metta/runner/environment.rs index 003e22756..a027b4382 100644 --- a/lib/src/metta/runner/environment.rs +++ b/lib/src/metta/runner/environment.rs @@ -353,7 +353,7 @@ impl EnvBuilder { //Setup the explicit_git_mods managed catalog to hold mods fetched by explicit URL let mut explicit_git_mods = LocalCatalog::new(caches_dir, "git-modules").unwrap(); - let git_mod_catalog = GitCatalog::new_without_source_repo(env.fs_mod_formats.clone(), "git-modules").unwrap(); + let git_mod_catalog = GitCatalog::new_without_source_repo(caches_dir, env.fs_mod_formats.clone(), "git-modules").unwrap(); explicit_git_mods.push_upstream_catalog(Box::new(git_mod_catalog)); env.explicit_git_mods = Some(explicit_git_mods); diff --git a/lib/src/metta/runner/modules/mod.rs b/lib/src/metta/runner/modules/mod.rs index 2742aaa0b..04f0c1cf5 100644 --- a/lib/src/metta/runner/modules/mod.rs +++ b/lib/src/metta/runner/modules/mod.rs @@ -484,7 +484,7 @@ impl ModuleInitState { pub fn init_module(&mut self, runner: &Metta, mod_name: &str, loader: Box) -> Result { //Give the prepare function a chance to run, in case it hasn't yet - let loader = match loader.prepare(None, false)? { + let loader = match loader.prepare(None, UpdateMode::FetchIfMissing)? { Some(new_loader) => new_loader, None => loader }; @@ -621,7 +621,7 @@ pub trait ModuleLoader: std::fmt::Debug + Send + Sync { /// loader will replace it. /// /// NOTE: This method may become async in the future - fn prepare(&self, _local_dir: Option<&Path>, _should_refresh: bool) -> Result>, String> { + fn prepare(&self, _local_dir: Option<&Path>, _update_mode: UpdateMode) -> Result>, String> { Ok(None) } diff --git a/lib/src/metta/runner/pkg_mgmt/catalog.rs b/lib/src/metta/runner/pkg_mgmt/catalog.rs index 6458a474b..7bec5f379 100644 --- a/lib/src/metta/runner/pkg_mgmt/catalog.rs +++ b/lib/src/metta/runner/pkg_mgmt/catalog.rs @@ -177,6 +177,17 @@ pub trait ModuleCatalog: std::fmt::Debug + Send + Sync { fn as_any(&self) -> Option<&dyn Any> { None } + + /// Synchronize the catalog's internal tables, so fresh upstream info is reflected + /// locally. Does not fetch any modules + fn sync_toc(&self, _update_mode: UpdateMode) -> Result<(), String> { + Ok(()) + } + + /// Returns the catalog as a [ManagedCatalog] if the catalog supports active management + fn as_managed(&self) -> Option<&dyn ManagedCatalog> { + None + } } impl dyn ModuleCatalog { @@ -300,7 +311,7 @@ impl PkgInfo { } //Get the module if it's specified with git keys - if let Some(pair) = entry.git_location.get_loader_in_explicit_catalog(mod_name, false, context.metta.environment())? { + if let Some(pair) = entry.git_location.get_loader_in_explicit_catalog(mod_name, UpdateMode::FetchIfMissing, context.metta.environment())? { return Ok(Some(pair)); } diff --git a/lib/src/metta/runner/pkg_mgmt/git_cache.rs b/lib/src/metta/runner/pkg_mgmt/git_cache.rs index 9e77b0204..420484a44 100644 --- a/lib/src/metta/runner/pkg_mgmt/git_cache.rs +++ b/lib/src/metta/runner/pkg_mgmt/git_cache.rs @@ -20,27 +20,14 @@ use git2::{*, build::*}; #[cfg(feature = "git")] const TIMESTAMP_FILENAME: &'static str = "_timestamp_"; +use crate::metta::runner::pkg_mgmt::managed_catalog::UpdateMode; + //NOTE: When the "git" feature is not enabled, we can still access CachedRepos that have // already been pulled locally. However we cannot update them or pull new ones. This // theoretically allows for one tool to be used to update and pull remote repos while // other tools linking hyperon without the "git" feature may then access those repos. That // said, having multiple tools that link hyperon is inviting version incompatibilities. -/// Indicates the desired behavior for updating the locally-cached repo -#[derive(Clone, Copy, Debug, PartialEq, Eq)] -pub enum UpdateMode { - /// Clones the repo if it doesn't exist, otherwise leaves it alone - PullIfMissing, - /// Pulls the latest from the remote repo. Fails if the remote is unavailable - PullLatest, - /// Attempts to pull from the remote repo. Continues with the existing repo if - /// the remote is unavailable - TryPullLatest, - /// Attempts to pull from the remote repo is the local cache is older than the - /// specified number of seconds. Otherwise continues with the repo on the disk - TryPullIfOlderThan(u64) -} - #[derive(Debug)] pub struct CachedRepo { name: String, @@ -87,12 +74,12 @@ impl CachedRepo { Ok(repo) => { //Do a `git pull` to bring it up to date - if mode == UpdateMode::PullLatest || mode == UpdateMode::TryPullLatest || self.check_timestamp(mode) { + if mode == UpdateMode::FetchLatest || mode == UpdateMode::TryFetchLatest || self.check_timestamp(mode) { let mut remote = repo.find_remote("origin").map_err(|e| format!("Failed find 'origin' in git repo: {}, {}", self.url, e))?; match remote.connect(Direction::Fetch) { Ok(_) => {}, Err(e) => { - if mode == UpdateMode::PullLatest { + if mode == UpdateMode::FetchLatest { return Err(format!("Failed to connect to origin repo: {}, {}", self.url, e)) } else { // We couldn't connect, but the UpdateMode allows soft failure @@ -143,10 +130,10 @@ impl CachedRepo { fn update_repo_no_git_support(&self, mode: UpdateMode) -> Result { let err_msg = || format!("Cannot update repo: {}; hyperon built without git support", self.name); match mode { - UpdateMode::PullLatest => { + UpdateMode::FetchLatest => { return Err(err_msg()); } - UpdateMode::TryPullLatest => { + UpdateMode::TryFetchLatest => { log::warn!("{}", err_msg()); }, _ => {} @@ -215,12 +202,12 @@ impl CachedRepo { .map_err(|e| format!("Error writing file: {}, {e}", file_path.display())) } - /// Returns `true` if `mode == TryPullIfOlderThan`, and the timestamp file indicates + /// Returns `true` if `mode == TryFetchIfOlderThan`, and the timestamp file indicates /// that amount of time has elapsed. Otherwise returns `false` #[cfg(feature = "git")] fn check_timestamp(&self, mode: UpdateMode) -> bool { match mode { - UpdateMode::TryPullIfOlderThan(secs) => { + UpdateMode::TryFetchIfOlderThan(secs) => { let file_path = self.repo_local_path().join(TIMESTAMP_FILENAME); match read_to_string(&file_path) { Ok(file_contents) => { diff --git a/lib/src/metta/runner/pkg_mgmt/git_catalog.rs b/lib/src/metta/runner/pkg_mgmt/git_catalog.rs index 2efda747d..4fb4253c7 100644 --- a/lib/src/metta/runner/pkg_mgmt/git_catalog.rs +++ b/lib/src/metta/runner/pkg_mgmt/git_catalog.rs @@ -3,17 +3,18 @@ use core::any::Any; use std::path::{Path, PathBuf}; -use std::fs::read_to_string; +use std::fs::File; +use std::io::{BufReader, BufWriter}; use std::sync::Mutex; -use serde::Deserialize; +use serde::{Serialize, Deserialize}; use crate::metta::runner::*; use crate::metta::runner::pkg_mgmt::{*, git_cache::*}; /// A set of keys describing how to access a module via git. Deserialized from within a [PkgInfo] /// or a catalog file [CatalogFileFormat] -#[derive(Clone, Debug, Default, Deserialize)] +#[derive(Clone, Debug, Default, Serialize, Deserialize)] pub struct ModuleGitLocation { /// Indicates that the dependency module should be fetched from the specified `git` URL #[serde(default)] @@ -56,11 +57,11 @@ impl ModuleGitLocation { Ok(None) } /// Gets a loader for a module identified by a ModuleGitLocation, using the [Environment]'s managed `explicit_git_mods` catalog - pub(crate) fn get_loader_in_explicit_catalog(&self, mod_name: &str, should_refresh: bool, env: &Environment) -> Result, ModuleDescriptor)>, String> { + pub(crate) fn get_loader_in_explicit_catalog(&self, mod_name: &str, update_mode: UpdateMode, env: &Environment) -> Result, ModuleDescriptor)>, String> { if self.get_url().is_some() { if let Some(explicit_git_catalog) = env.explicit_git_mods.as_ref() { let descriptor = explicit_git_catalog.upstream_catalogs().first().unwrap().downcast::().unwrap().register_mod(mod_name, None, self)?; - let loader = explicit_git_catalog.get_loader_with_explicit_refresh(&descriptor, should_refresh)?; + let loader = explicit_git_catalog.get_loader_with_explicit_refresh(&descriptor, update_mode)?; Ok(Some((loader, descriptor))) } else { Err(format!("Unable to pull module \"{mod_name}\" from git; no local \"caches\" directory available")) @@ -103,8 +104,8 @@ impl ModuleGitLocation { } } -/// Struct that matches the catalog.json file fetched from the `catalog.repo` -#[derive(Deserialize, Debug, Default)] +/// Struct that matches the catalog.json file fetched from the `_catalog.repo` +#[derive(Serialize, Deserialize, Debug, Default)] struct CatalogFileFormat { //TODO-NOW. Upon reflection, I see no good reason not to use a HashMap here instead of a Vec modules: Vec @@ -143,7 +144,7 @@ impl CatalogFileFormat { } /// A single module in a catalog.json file -#[derive(Clone, Deserialize, Debug)] +#[derive(Clone, Debug, Serialize, Deserialize)] struct CatalogFileMod { name: String, version: Option, @@ -164,36 +165,54 @@ pub struct GitCatalog { fmts: Arc>>, refresh_time: u64, catalog_repo: Option, + catalog_file_path: PathBuf, catalog: Mutex>, } impl GitCatalog { + fn new_internal(fmts: Arc>>, name: &str, catalog_file_path: PathBuf, catalog: Option) -> Self { + Self { + name: name.to_string(), + fmts, + refresh_time: 0, + catalog_repo: None, + catalog_file_path, + catalog: Mutex::new(catalog), + } + } /// Creates a new GitCatalog with the name and url specified. `refresh_time` is the time, in /// seconds, between refreshes of the catalog file pub fn new(caches_dir: &Path, fmts: Arc>>, name: &str, url: &str, refresh_time: u64) -> Result { - let catalog_repo_dir = caches_dir.join(name).join("catalog.repo"); + let catalog_repo_dir = caches_dir.join(name).join("_catalog.repo"); let catalog_repo_name = format!("{name}-catalog.repo"); let catalog_repo = CachedRepo::new(&catalog_repo_name, catalog_repo_dir, url, None, None)?; - let mut new_self = Self::new_without_source_repo(fmts, name)?; + let mut new_self = Self::new_internal(fmts, name, catalog_repo.local_path().join("catalog.json"), None); new_self.refresh_time = refresh_time; new_self.catalog_repo = Some(catalog_repo); Ok(new_self) } /// Used for a git-based catalog that isn't synced to a remote source - pub fn new_without_source_repo(fmts: Arc>>, name: &str) -> Result { - Ok(Self { - name: name.to_string(), - fmts, - refresh_time: 0, - catalog_repo: None, - catalog: Mutex::new(None), - }) + pub fn new_without_source_repo(caches_dir: &Path, fmts: Arc>>, name: &str) -> Result { + let catalog_file_path = caches_dir.join(name).join("_catalog.json"); + let new_self = if !catalog_file_path.exists() { + let new_self = Self::new_internal(fmts, name, catalog_file_path, Some(CatalogFileFormat::default())); + new_self.write_catalog()?; + new_self + } else { + Self::new_internal(fmts, name, catalog_file_path, None) + }; + Ok(new_self) } /// Registers a new module in the catalog with a specified remote location, and returns the [ModuleDescriptor] to refer to that module + /// + /// WARNING: if a catalog is synced to an upstream source, the upstream source will + /// eventually overwrite anything you register with this method pub(crate) fn register_mod(&self, mod_name: &str, version: Option<&semver::Version>, git_location: &ModuleGitLocation) -> Result { - self.refresh_catalog()?; - let mut catalog_ref = self.catalog.lock().unwrap(); - let descriptor = catalog_ref.as_mut().unwrap().add(CatalogFileMod::new(mod_name.to_string(), version.cloned(), git_location.clone()))?; + let descriptor = { + let mut catalog_ref = self.catalog.lock().unwrap(); + catalog_ref.as_mut().unwrap().add(CatalogFileMod::new(mod_name.to_string(), version.cloned(), git_location.clone()))? + }; + self.write_catalog()?; Ok(descriptor) } /// Scans the catalog and finds all the modules with a given name @@ -210,38 +229,59 @@ impl GitCatalog { None => None } } - fn refresh_catalog(&self) -> Result { - if let Some(catalog_repo) = &self.catalog_repo { + fn refresh_catalog(&self, update_mode: UpdateMode) -> Result { + let did_update = if let Some(catalog_repo) = &self.catalog_repo { //Get the catalog from the git cache - let did_update = match catalog_repo.update(UpdateMode::TryPullIfOlderThan(self.refresh_time)) { + match catalog_repo.update(update_mode) { Ok(did_update) => did_update, Err(e) => { log::warn!("Warning: error encountered attempting to fetch remote catalog: {}, {e}", self.name); false } - }; - - //Parse the catalog JSON file if we need to - let mut catalog = self.catalog.lock().unwrap(); - if did_update || catalog.is_none() { - let catalog_file_path = catalog_repo.local_path().join("catalog.json"); - match read_to_string(&catalog_file_path) { - Ok(file_contents) => { - *catalog = Some(serde_json::from_str(&file_contents).unwrap()); - return Ok(true) - }, - Err(e) => { - return Err(format!("Error reading catalog file. remote catalog unavailable: {}, {e}", self.name)) - } - } } } else { let mut catalog = self.catalog.lock().unwrap(); if catalog.is_none() { *catalog = Some(CatalogFileFormat::default()); + true + } else { + false + } + }; + + //Parse the catalog JSON file if we need to + if did_update || self.catalog_is_uninit() { + self.parse_catalog()?; + Ok(true) + } else { + Ok(false) + } + } + fn catalog_is_uninit(&self) -> bool { + let catalog = self.catalog.lock().unwrap(); + catalog.is_none() + } + fn parse_catalog(&self) -> Result<(), String> { + let mut catalog = self.catalog.lock().unwrap(); + match File::open(&self.catalog_file_path) { + Ok(file) => { + let reader = BufReader::new(file); + *catalog = Some(serde_json::from_reader(reader).unwrap()); + Ok(()) + }, + Err(e) => { + Err(format!("Error reading catalog file. remote catalog unavailable: {}, {e}", self.name)) } } - Ok(false) + } + /// Writes the catalog to a file, overwriting the file that is currently on disk + fn write_catalog(&self) -> Result<(), String> { + let cat_lock = self.catalog.lock().unwrap(); + let catalog = cat_lock.as_ref().unwrap(); + let file = File::create(&self.catalog_file_path).map_err(|e| e.to_string())?; + let writer = BufWriter::new(file); + serde_json::to_writer(writer, catalog).map_err(|e| e.to_string())?; + Ok(()) } } @@ -250,7 +290,7 @@ impl ModuleCatalog for GitCatalog { self.name.clone() } fn lookup(&self, name: &str) -> Vec { - match self.refresh_catalog() { + match self.refresh_catalog(UpdateMode::TryFetchIfOlderThan(self.refresh_time)) { Ok(_) => {}, Err(e) => { log::warn!("{e}"); @@ -262,9 +302,10 @@ impl ModuleCatalog for GitCatalog { self.find_mods_with_name(name) } fn get_loader(&self, descriptor: &ModuleDescriptor) -> Result, String> { - self.refresh_catalog()?; + self.refresh_catalog(UpdateMode::TryFetchIfOlderThan(self.refresh_time))?; - let mod_idx = self.find_mod_idx_with_descriptor(descriptor).unwrap(); + let mod_idx = self.find_mod_idx_with_descriptor(descriptor) + .ok_or_else(|| format!("Error: module {descriptor} no longer exists in catalog {}", self.display_name()))?; let cat_lock = self.catalog.lock().unwrap(); let catalog = cat_lock.as_ref().unwrap(); @@ -275,6 +316,10 @@ impl ModuleCatalog for GitCatalog { fmts: self.fmts.clone(), })) } + fn sync_toc(&self, update_mode: UpdateMode) -> Result<(), String> { + self.refresh_catalog(update_mode)?; + Ok(()) + } fn as_any(&self) -> Option<&dyn Any> { Some(self as &dyn Any) } @@ -287,11 +332,7 @@ pub struct GitModLoader { } impl ModuleLoader for GitModLoader { - fn prepare(&self, local_dir: Option<&Path>, should_refresh: bool) -> Result>, String> { - let update_mode = match should_refresh { - true => UpdateMode::TryPullLatest, - false => UpdateMode::PullIfMissing - }; + fn prepare(&self, local_dir: Option<&Path>, update_mode: UpdateMode) -> Result>, String> { let local_dir = match local_dir { Some(local_dir) => local_dir, None => return Err("GitCatalog: Cannot prepare git-based module without local cache directory".to_string()) @@ -309,8 +350,8 @@ impl ModuleLoader for GitModLoader { //TODO-NOW Add some status output when modules are fetched from GIT -//TODO-NOW implement the managed catalog trait on the local catalog -//TODO-NOW implement ops to manage the catalog +//TODO-NOW implement catalog clear op +//TODO-NOW migrate remote catalog file to a BTreeMap, and test auto-upgrading to new version //TODO-NOW Implement a MeTTaMod that separates apart the catalog management functions //TODO-NOW Implement a builtin-catalog for acccess to std mods //TODO-NOW Fix the build without pkg_mgmt feature diff --git a/lib/src/metta/runner/pkg_mgmt/managed_catalog.rs b/lib/src/metta/runner/pkg_mgmt/managed_catalog.rs index 209927184..1ac441a67 100644 --- a/lib/src/metta/runner/pkg_mgmt/managed_catalog.rs +++ b/lib/src/metta/runner/pkg_mgmt/managed_catalog.rs @@ -39,6 +39,37 @@ use crate::metta::runner::pkg_mgmt::*; // in isolation might not be the best when considered holistically. The Requirement API // needs to take that into account. // + +/// Indicates the desired behavior for updating the locally-cached module +#[derive(Clone, Copy, Debug, PartialEq, Eq)] +pub enum UpdateMode { + /// Fetches the module if it doesn't exist, otherwise leaves it alone + FetchIfMissing, + /// Attempts to fetch from the remote catalog is the local cached version is older + /// than the specified number of seconds. Otherwise continues with the cached mod + TryFetchIfOlderThan(u64), + /// Attempts to fetch from the remote catalog. Continues with the existing module + /// if the remote is unavailable + TryFetchLatest, + /// Fetches the latest from the remote catalog. Fails if the remote is unavailable + FetchLatest, +} + +impl UpdateMode { + /// Returns the more aggressive (more likely to fetch) of the two modes + pub fn promote_to(self, other: Self) -> Self { + match (&self, &other) { + (Self::FetchIfMissing, _) => other, + (Self::TryFetchIfOlderThan(_), Self::FetchIfMissing) => self, + (Self::TryFetchIfOlderThan(t_s), Self::TryFetchIfOlderThan(t_o)) => Self::TryFetchIfOlderThan((*t_s).min(*t_o)), + (Self::TryFetchIfOlderThan(_), _) => other, + (Self::TryFetchLatest, Self::FetchLatest) => Self::FetchLatest, + (Self::TryFetchLatest, _) => Self::TryFetchLatest, + _ => Self::FetchLatest + } + } +} + pub trait ManagedCatalog: ModuleCatalog { /// Clears all locally stored modules, resetting the local catalog to an empty state @@ -48,7 +79,7 @@ pub trait ManagedCatalog: ModuleCatalog { /// already exists in the catalog /// /// NOTE: This method will likely become async in the future - fn fetch(&self, descriptor: &ModuleDescriptor) -> Result<(), String>; + fn fetch(&self, descriptor: &ModuleDescriptor, update_mode: UpdateMode) -> Result<(), String>; /// Remove a specific module from the catalog fn remove(&self, descriptor: &ModuleDescriptor) -> Result<(), String>; @@ -58,12 +89,13 @@ pub trait ManagedCatalog: ModuleCatalog { /// /// NOTE: This API will likely change in the future. See "NOTE FOR THE FUTURE" in comments /// for `ManagedCatalog` - fn fetch_newest_for_all(&self) -> Result<(), String> { + fn fetch_newest_for_all(&self, update_mode: UpdateMode) -> Result<(), String> { + self.sync_toc(update_mode)?; let iter = self.list_name_uid_pairs() .ok_or_else(|| "managed catalog must support `list` method".to_string())?; for (name, uid) in iter { if let Some(desc) = self.lookup_newest_with_uid_and_version_req(&name, uid, None) { - self.fetch(&desc)?; + self.fetch(&desc, update_mode)?; } } Ok(()) @@ -109,7 +141,7 @@ impl LocalCatalog { let local_toc = self.local_toc.lock().unwrap(); local_toc.all_sorted_descriptors() } - pub(crate) fn get_loader_with_explicit_refresh(&self, descriptor: &ModuleDescriptor, should_refresh: bool) -> Result, String> { + pub(crate) fn get_loader_with_explicit_refresh(&self, descriptor: &ModuleDescriptor, update_mode: UpdateMode) -> Result, String> { //Figure out which upstream catalog furnished this descriptor by trying each one let mut upstream_loader = None; @@ -128,7 +160,7 @@ impl LocalCatalog { // TODO: It would be nice to have the option here to pull a different but compatible // mod from the upstream catalogs; however we don't have the original requirement info, // so currently we cannot do that. See the write-up above about the "Requirement API". - return Err(format!("Upstream Catalogs can no longer supply module {}", descriptor.name())); + return Err(format!("Upstream Catalogs can no longer supply module \"{descriptor}\"")); } }; @@ -140,7 +172,7 @@ impl LocalCatalog { self.add_to_toc(descriptor.to_owned())?; //Wrap the upstream loader in a loader object from this catalog - let wrapper_loader = LocalCatalogLoader {local_cache_dir, upstream_loader, should_refresh}; + let wrapper_loader = LocalCatalogLoader {local_cache_dir, upstream_loader, update_mode}; Ok(Box::new(wrapper_loader)) } } @@ -169,30 +201,67 @@ impl ModuleCatalog for LocalCatalog { vec![] } fn get_loader(&self, descriptor: &ModuleDescriptor) -> Result, String> { - self.get_loader_with_explicit_refresh(descriptor, false) + self.get_loader_with_explicit_refresh(descriptor, UpdateMode::FetchIfMissing) } fn list<'a>(&'a self) -> Option + 'a>> { Some(Box::new(self.list_toc().into_iter())) } + fn sync_toc(&self, update_mode: UpdateMode) -> Result<(), String> { + for upstream in self.upstream_catalogs.iter() { + upstream.sync_toc(update_mode)?; + } + Ok(()) + } + fn as_managed(&self) -> Option<&dyn ManagedCatalog> { + Some(self) + } } /// A [ModuleLoader] for a [LocalCatalog] that wraps another ModuleLoader for an upstream [ModuleCatalog] #[derive(Debug)] struct LocalCatalogLoader { local_cache_dir: PathBuf, - should_refresh: bool, + update_mode: UpdateMode, upstream_loader: Box } impl ModuleLoader for LocalCatalogLoader { - fn prepare(&self, _local_dir: Option<&Path>, should_refresh: bool) -> Result>, String> { - self.upstream_loader.prepare(Some(&self.local_cache_dir), should_refresh | self.should_refresh) + fn prepare(&self, _local_dir: Option<&Path>, update_mode: UpdateMode) -> Result>, String> { + let update_mode = self.update_mode.promote_to(update_mode); + self.upstream_loader.prepare(Some(&self.local_cache_dir), update_mode) } fn load(&self, _context: &mut RunContext) -> Result<(), String> { unreachable!() //We will substitute the `upstream_loader` during prepare } } +impl ManagedCatalog for LocalCatalog { + fn clear_all(&self) -> Result<(), String> { + if self.storage_dir.is_dir() { + std::fs::remove_dir_all(&self.storage_dir).map_err(|e| e.to_string())?; + } + let mut local_toc = self.local_toc.lock().unwrap(); + *local_toc = LocalCatalogTOC::build_from_dir(&self.storage_dir)?; + Ok(()) + } + fn fetch(&self, descriptor: &ModuleDescriptor, update_mode: UpdateMode) -> Result<(), String> { + let loader = self.get_loader_with_explicit_refresh(descriptor, update_mode)?; + let _ = loader.prepare(None, update_mode)?; + Ok(()) + } + fn remove(&self, descriptor: &ModuleDescriptor) -> Result<(), String> { + let cache_dir_name = dir_name_from_descriptor(descriptor); + let mod_cache_dir = self.storage_dir.join(cache_dir_name); + if mod_cache_dir.is_dir() { + std::fs::remove_dir_all(mod_cache_dir).map_err(|e| e.to_string())?; + let mut local_toc = self.local_toc.lock().unwrap(); + local_toc.remove_descriptor(descriptor) + } else { + Err("No such module in catalog".to_string()) + } + } +} + /// A Table of Contents (TOC) for a LocalCatalog #[derive(Debug)] struct LocalCatalogTOC { @@ -222,7 +291,7 @@ impl LocalCatalogTOC { // Name reserved by GitCatalog. We may generalize this "reserved" mechanism when // we support additional upstream catalog types - if name_str != "catalog.repo" { + if name_str != "_catalog.repo" && name_str != "_catalog.json" { let descriptor = parse_descriptor_from_dir_name(name_str)?; new_self.add_descriptor(descriptor)?; } @@ -251,6 +320,21 @@ impl LocalCatalogTOC { } Ok(()) } + fn remove_descriptor(&mut self, descriptor: &ModuleDescriptor) -> Result<(), String> { + fn ret_err() -> Result<(), String> { Err("No such module in catalog".to_string()) } + match self.mods_by_name.get_mut(descriptor.name()) { + Some(desc_vec) => { + match desc_vec.iter().position(|vec_desc| vec_desc==descriptor) { + Some(idx) => { + desc_vec.remove(idx); + Ok(()) + }, + None => ret_err() + } + }, + None => ret_err() + } + } } /// Returns a String that can be used as a directory to cache local files associated diff --git a/lib/src/metta/runner/stdlib.rs b/lib/src/metta/runner/stdlib.rs index bcb7528c2..470e30a4f 100644 --- a/lib/src/metta/runner/stdlib.rs +++ b/lib/src/metta/runner/stdlib.rs @@ -6,6 +6,7 @@ use crate::metta::text::Tokenizer; use crate::metta::text::SExprParser; use crate::metta::runner::{Metta, RunContext, ModuleLoader, ResourceKey, mod_name_from_url}; use crate::metta::runner::git_catalog::ModuleGitLocation; +use crate::metta::runner::pkg_mgmt::{UpdateMode, ManagedCatalog}; use crate::metta::types::{get_atom_types, get_meta_type}; use crate::common::shared::Shared; use crate::common::CachingMapper; @@ -382,7 +383,7 @@ impl Grounded for GitModuleOp { let git_mod_location = ModuleGitLocation::new(url.to_string()); - if let Some((loader, descriptor)) = git_mod_location.get_loader_in_explicit_catalog(&mod_name, true, context.metta.environment()).map_err(|e| ExecError::from(e))? { + if let Some((loader, descriptor)) = git_mod_location.get_loader_in_explicit_catalog(&mod_name, UpdateMode::TryFetchLatest, context.metta.environment()).map_err(|e| ExecError::from(e))? { context.get_or_init_module_with_descriptor(&mod_name, descriptor, loader).map_err(|e| ExecError::from(e))?; } @@ -450,6 +451,57 @@ impl Grounded for CatalogListOp { } } +/// Update all contents of all ManagedCatalogs to the latest version of all modules +#[derive(Clone, Debug)] +pub struct CatalogUpdateOp { + metta: Metta +} + +impl PartialEq for CatalogUpdateOp { + fn eq(&self, _other: &Self) -> bool { true } +} + +impl CatalogUpdateOp { + pub fn new(metta: Metta) -> Self { + Self{ metta } + } +} + +impl Display for CatalogUpdateOp { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "catalog-update") + } +} + +impl Grounded for CatalogUpdateOp { + fn type_(&self) -> Atom { + //TODO-FUTURE, when we decide on a friendly standard for var-args, it would be nice to + // allow an optional arg to list a specific catalog. For now we list all of them + //TODO-FUTURE, we may want to return the list as atoms, but now it just prints to stdout + Atom::expr([ARROW_SYMBOL, UNIT_TYPE()]) + } + + fn execute(&self, _args: &[Atom]) -> Result, ExecError> { + + if let Some(explicit_git_catalog) = &self.metta.environment().explicit_git_mods { + explicit_git_catalog.fetch_newest_for_all(UpdateMode::FetchLatest)?; + } + + for cat in self.metta.environment().catalogs() { + match cat.as_managed() { + Some(cat) => cat.fetch_newest_for_all(UpdateMode::FetchLatest)?, + None => {} + } + } + + unit_result() + } + + fn match_(&self, other: &Atom) -> MatchResultIter { + match_by_equality(self, other) + } +} + /// This operation prints the modules loaded from the top of the runner /// /// NOTE: This is a temporary stop-gap to help MeTTa users inspect which modules they have loaded and @@ -1715,6 +1767,8 @@ mod non_minimal_only_stdlib { tref.register_token(regex(r"register-module!"), move |_| { register_module_op.clone() }); let catalog_list_op = Atom::gnd(CatalogListOp::new(metta.clone())); tref.register_token(regex(r"catalog-list"), move |_| { catalog_list_op.clone() }); + let catalog_update_op = Atom::gnd(CatalogUpdateOp::new(metta.clone())); + tref.register_token(regex(r"catalog-update"), move |_| { catalog_update_op.clone() }); let git_module_op = Atom::gnd(GitModuleOp::new(metta.clone())); tref.register_token(regex(r"git-module!"), move |_| { git_module_op.clone() }); let mod_space_op = Atom::gnd(ModSpaceOp::new(metta.clone())); diff --git a/lib/src/metta/runner/stdlib_minimal.rs b/lib/src/metta/runner/stdlib_minimal.rs index 94d40a37b..830da741e 100644 --- a/lib/src/metta/runner/stdlib_minimal.rs +++ b/lib/src/metta/runner/stdlib_minimal.rs @@ -463,6 +463,8 @@ pub fn register_common_tokens(tref: &mut Tokenizer, _tokenizer: Shared Date: Sat, 11 May 2024 18:30:17 +0900 Subject: [PATCH 46/77] Enabling log output at `info` level by default in repl Simplifying GitCatalog TOC refresh logic --- lib/src/metta/runner/modules/mod.rs | 2 +- lib/src/metta/runner/pkg_mgmt/catalog.rs | 2 +- lib/src/metta/runner/pkg_mgmt/git_catalog.rs | 43 +++++++------------- lib/src/metta/runner/stdlib.rs | 2 +- repl/Cargo.toml | 2 + repl/src/main.rs | 2 + 6 files changed, 22 insertions(+), 31 deletions(-) diff --git a/lib/src/metta/runner/modules/mod.rs b/lib/src/metta/runner/modules/mod.rs index 04f0c1cf5..d67f77c2c 100644 --- a/lib/src/metta/runner/modules/mod.rs +++ b/lib/src/metta/runner/modules/mod.rs @@ -190,7 +190,7 @@ impl MettaMod { } // Get the space associated with the dependent module - log::info!("import_all_from_dependency: importing from {} into {}", mod_ptr.path(), self.path()); + log::debug!("import_all_from_dependency: importing from {} into {}", mod_ptr.path(), self.path()); let (dep_space, transitive_deps) = mod_ptr.stripped_space(); // Add a new Grounded Space atom to the &self space, so we can access the dependent module diff --git a/lib/src/metta/runner/pkg_mgmt/catalog.rs b/lib/src/metta/runner/pkg_mgmt/catalog.rs index 7bec5f379..244c68d3c 100644 --- a/lib/src/metta/runner/pkg_mgmt/catalog.rs +++ b/lib/src/metta/runner/pkg_mgmt/catalog.rs @@ -343,7 +343,7 @@ impl PkgInfo { log::trace!("Looking for module: \"{mod_name}\" inside {catalog:?}"); match catalog.lookup_newest_with_version_req(mod_name, version_req) { Some(descriptor) => { - log::info!("Found module: \"{mod_name}\" inside {catalog:?}"); + log::info!("Found module: \"{mod_name}\" inside {:?}", catalog.display_name()); log::info!("Preparing to load module: \'{}\' as \'{}\'", descriptor.name, name_path); return Ok(Some((catalog.get_loader(&descriptor)?, descriptor))) }, diff --git a/lib/src/metta/runner/pkg_mgmt/git_catalog.rs b/lib/src/metta/runner/pkg_mgmt/git_catalog.rs index 4fb4253c7..ae12c54e0 100644 --- a/lib/src/metta/runner/pkg_mgmt/git_catalog.rs +++ b/lib/src/metta/runner/pkg_mgmt/git_catalog.rs @@ -194,13 +194,12 @@ impl GitCatalog { /// Used for a git-based catalog that isn't synced to a remote source pub fn new_without_source_repo(caches_dir: &Path, fmts: Arc>>, name: &str) -> Result { let catalog_file_path = caches_dir.join(name).join("_catalog.json"); - let new_self = if !catalog_file_path.exists() { - let new_self = Self::new_internal(fmts, name, catalog_file_path, Some(CatalogFileFormat::default())); - new_self.write_catalog()?; - new_self + let new_self = Self::new_internal(fmts, name, catalog_file_path, Some(CatalogFileFormat::default())); + if new_self.catalog_file_path.exists() { + new_self.parse_catalog()? } else { - Self::new_internal(fmts, name, catalog_file_path, None) - }; + new_self.write_catalog()?; + } Ok(new_self) } /// Registers a new module in the catalog with a specified remote location, and returns the [ModuleDescriptor] to refer to that module @@ -229,33 +228,23 @@ impl GitCatalog { None => None } } - fn refresh_catalog(&self, update_mode: UpdateMode) -> Result { - let did_update = if let Some(catalog_repo) = &self.catalog_repo { - //Get the catalog from the git cache - match catalog_repo.update(update_mode) { + fn refresh_catalog(&self, update_mode: UpdateMode) -> Result<(), String> { + if let Some(catalog_repo) = &self.catalog_repo { + //Update the catalog from the git cache + let did_update = match catalog_repo.update(update_mode) { Ok(did_update) => did_update, Err(e) => { log::warn!("Warning: error encountered attempting to fetch remote catalog: {}, {e}", self.name); false } - } - } else { - let mut catalog = self.catalog.lock().unwrap(); - if catalog.is_none() { - *catalog = Some(CatalogFileFormat::default()); - true - } else { - false - } - }; + }; - //Parse the catalog JSON file if we need to - if did_update || self.catalog_is_uninit() { - self.parse_catalog()?; - Ok(true) - } else { - Ok(false) + //Parse the catalog JSON file if we need to + if did_update || self.catalog_is_uninit() { + self.parse_catalog()?; + } } + Ok(()) } fn catalog_is_uninit(&self) -> bool { let catalog = self.catalog.lock().unwrap(); @@ -349,10 +338,8 @@ impl ModuleLoader for GitModLoader { } -//TODO-NOW Add some status output when modules are fetched from GIT //TODO-NOW implement catalog clear op //TODO-NOW migrate remote catalog file to a BTreeMap, and test auto-upgrading to new version //TODO-NOW Implement a MeTTaMod that separates apart the catalog management functions //TODO-NOW Implement a builtin-catalog for acccess to std mods //TODO-NOW Fix the build without pkg_mgmt feature -//TODO-NOW Get the repl to immeditately display logs of warn!() or err!() diff --git a/lib/src/metta/runner/stdlib.rs b/lib/src/metta/runner/stdlib.rs index 470e30a4f..7077d5081 100644 --- a/lib/src/metta/runner/stdlib.rs +++ b/lib/src/metta/runner/stdlib.rs @@ -431,7 +431,7 @@ impl Grounded for CatalogListOp { if let Some(cat_iter) = cat.list() { println!("{}:", cat.display_name()); for desc in cat_iter { - println!("\t{desc}"); + println!(" {desc}"); } } } diff --git a/repl/Cargo.toml b/repl/Cargo.toml index 8439872c3..1b4f676c5 100644 --- a/repl/Cargo.toml +++ b/repl/Cargo.toml @@ -5,6 +5,8 @@ edition.workspace = true description = "A shell to execute MeTTa" [dependencies] +log = { workspace = true } +env_logger = { workspace = true } anyhow = { version = "1.0.75", features = ["std"] } rustyline = { version = "13.0.0", features = ["derive"] } clap = { version = "4.4.0", features = ["derive"] } diff --git a/repl/src/main.rs b/repl/src/main.rs index 7330bf016..4a9b28a6c 100644 --- a/repl/src/main.rs +++ b/repl/src/main.rs @@ -36,6 +36,8 @@ struct CliArgs { fn main() -> Result<()> { let cli_args = CliArgs::parse(); + let _ = env_logger::builder().filter_level(log::LevelFilter::Info).try_init(); + //If we have a metta_file, then the working dir is the parent of that file //If we are running in interactive mode, it's the working dir at the time the repl is invoked let metta_working_dir: PathBuf = match &cli_args.file { From 3e826f27abd478445b9c70adbc5dfae85dc22224 Mon Sep 17 00:00:00 2001 From: Luke Peterson Date: Sat, 11 May 2024 20:48:08 +0900 Subject: [PATCH 47/77] Changing layout of upstream git-based catalog data and ensuring automatic upgrading is working --- lib/src/metta/runner/pkg_mgmt/catalog.rs | 2 +- lib/src/metta/runner/pkg_mgmt/git_catalog.rs | 44 ++++++++----------- .../metta/runner/pkg_mgmt/managed_catalog.rs | 16 +++++++ 3 files changed, 36 insertions(+), 26 deletions(-) diff --git a/lib/src/metta/runner/pkg_mgmt/catalog.rs b/lib/src/metta/runner/pkg_mgmt/catalog.rs index 244c68d3c..cadd5e952 100644 --- a/lib/src/metta/runner/pkg_mgmt/catalog.rs +++ b/lib/src/metta/runner/pkg_mgmt/catalog.rs @@ -213,7 +213,7 @@ fn filter_by_version_req<'a>(mods_iter: impl Iterator + ' /// Internal function to find the newest module in a set. See [ModuleCatalog::lookup_newest_with_version_req] /// for an explanation of behavior -fn find_newest_module(mods_iter: impl Iterator) -> Option { +pub(crate) fn find_newest_module(mods_iter: impl Iterator) -> Option { let mut highest_version: Option = None; let mut ret_desc = None; for desc in mods_iter { diff --git a/lib/src/metta/runner/pkg_mgmt/git_catalog.rs b/lib/src/metta/runner/pkg_mgmt/git_catalog.rs index ae12c54e0..291222d3d 100644 --- a/lib/src/metta/runner/pkg_mgmt/git_catalog.rs +++ b/lib/src/metta/runner/pkg_mgmt/git_catalog.rs @@ -6,6 +6,7 @@ use std::path::{Path, PathBuf}; use std::fs::File; use std::io::{BufReader, BufWriter}; use std::sync::Mutex; +use std::collections::BTreeMap; use serde::{Serialize, Deserialize}; @@ -107,15 +108,14 @@ impl ModuleGitLocation { /// Struct that matches the catalog.json file fetched from the `_catalog.repo` #[derive(Serialize, Deserialize, Debug, Default)] struct CatalogFileFormat { - //TODO-NOW. Upon reflection, I see no good reason not to use a HashMap here instead of a Vec - modules: Vec + modules: BTreeMap> } impl CatalogFileFormat { fn find_mods_with_name(&self, name: &str) -> Vec { let mut results = vec![]; - for cat_mod in self.modules.iter() { - if cat_mod.name == name { + if let Some(cat_mod_vec) = self.modules.get(name) { + for cat_mod in cat_mod_vec { let uid = cat_mod.git_location.uid(); let descriptor = ModuleDescriptor::new(name.to_string(), cat_mod.version.clone(), Some(uid)); results.push(descriptor); @@ -123,11 +123,15 @@ impl CatalogFileFormat { } results } - fn find_mod_idx_with_descriptor(&self, descriptor: &ModuleDescriptor) -> Option { - for (mod_idx, cat_mod) in self.modules.iter().enumerate() { - if cat_mod.name == descriptor.name() && cat_mod.version.as_ref() == descriptor.version() { - if Some(cat_mod.git_location.uid()) == descriptor.uid() { - return Some(mod_idx); + /// Scans the catalog looking for a single module that matches the provided descriptor + fn find_mod_with_descriptor(&self, descriptor: &ModuleDescriptor) -> Option<&CatalogFileMod> { + if let Some(cat_mod_vec) = self.modules.get(descriptor.name()) { + for cat_mod in cat_mod_vec.iter() { + if cat_mod.version.as_ref() == descriptor.version() { + //NOTE ModuleDescriptors from GitCatalog always have a uid + if Some(cat_mod.git_location.uid()) == descriptor.uid() { + return Some(cat_mod); + } } } } @@ -136,8 +140,9 @@ impl CatalogFileFormat { fn add(&mut self, new_mod: CatalogFileMod) -> Result { let uid = new_mod.git_location.uid(); let descriptor = ModuleDescriptor::new(new_mod.name.clone(), new_mod.version.clone(), Some(uid)); - if self.find_mod_idx_with_descriptor(&descriptor).is_none() { - self.modules.push(new_mod); + if self.find_mod_with_descriptor(&descriptor).is_none() { + let cat_mod_vec = self.modules.entry(new_mod.name.clone()).or_insert(vec![]); + cat_mod_vec.push(new_mod); } Ok(descriptor) } @@ -147,6 +152,7 @@ impl CatalogFileFormat { #[derive(Clone, Debug, Serialize, Deserialize)] struct CatalogFileMod { name: String, + #[serde(default)] version: Option, #[serde(flatten)] git_location: ModuleGitLocation, @@ -220,14 +226,6 @@ impl GitCatalog { let catalog = cat_lock.as_ref().unwrap(); catalog.find_mods_with_name(name) } - /// Scans the catalog looking for a single module that matches the provided descriptor - fn find_mod_idx_with_descriptor(&self, descriptor: &ModuleDescriptor) -> Option { - let cat_lock = self.catalog.lock().unwrap(); - match &*cat_lock { - Some(catalog) => catalog.find_mod_idx_with_descriptor(descriptor), - None => None - } - } fn refresh_catalog(&self, update_mode: UpdateMode) -> Result<(), String> { if let Some(catalog_repo) = &self.catalog_repo { //Update the catalog from the git cache @@ -293,12 +291,10 @@ impl ModuleCatalog for GitCatalog { fn get_loader(&self, descriptor: &ModuleDescriptor) -> Result, String> { self.refresh_catalog(UpdateMode::TryFetchIfOlderThan(self.refresh_time))?; - let mod_idx = self.find_mod_idx_with_descriptor(descriptor) - .ok_or_else(|| format!("Error: module {descriptor} no longer exists in catalog {}", self.display_name()))?; - let cat_lock = self.catalog.lock().unwrap(); let catalog = cat_lock.as_ref().unwrap(); - let module = catalog.modules.get(mod_idx).unwrap(); + let module = catalog.find_mod_with_descriptor(descriptor) + .ok_or_else(|| format!("Error: module {descriptor} no longer exists in catalog {}", self.display_name()))?; Ok(Box::new(GitModLoader{ module: module.clone(), @@ -337,9 +333,7 @@ impl ModuleLoader for GitModLoader { } } - //TODO-NOW implement catalog clear op -//TODO-NOW migrate remote catalog file to a BTreeMap, and test auto-upgrading to new version //TODO-NOW Implement a MeTTaMod that separates apart the catalog management functions //TODO-NOW Implement a builtin-catalog for acccess to std mods //TODO-NOW Fix the build without pkg_mgmt feature diff --git a/lib/src/metta/runner/pkg_mgmt/managed_catalog.rs b/lib/src/metta/runner/pkg_mgmt/managed_catalog.rs index 1ac441a67..f8c22e107 100644 --- a/lib/src/metta/runner/pkg_mgmt/managed_catalog.rs +++ b/lib/src/metta/runner/pkg_mgmt/managed_catalog.rs @@ -260,6 +260,22 @@ impl ManagedCatalog for LocalCatalog { Err("No such module in catalog".to_string()) } } + fn fetch_newest_for_all(&self, update_mode: UpdateMode) -> Result<(), String> { + self.sync_toc(update_mode)?; + let iter = self.list_name_uid_pairs() + .ok_or_else(|| "managed catalog must support `list` method".to_string())?; + for (name, uid) in iter { + + //Find the newest version of the mod in each upstream catalog + let upstream_bests: Vec = self.upstream_catalogs.iter().filter_map(|upstream| { + upstream.lookup_newest_with_uid_and_version_req(&name, uid, None) + }).collect(); + if let Some(newest_desc) = find_newest_module(upstream_bests.into_iter()) { + self.fetch(&newest_desc, update_mode)?; + } + } + Ok(()) + } } /// A Table of Contents (TOC) for a LocalCatalog From 34f05de77b06cf7e0eb08f83ececaa7d34daa050 Mon Sep 17 00:00:00 2001 From: Luke Peterson Date: Sat, 11 May 2024 21:20:00 +0900 Subject: [PATCH 48/77] Fixing issue when FS path or URL arg to stdlib op atoms comes as grounded Str --- lib/src/metta/runner/stdlib.rs | 13 +++++-------- lib/src/metta/runner/string.rs | 9 +++++++++ 2 files changed, 14 insertions(+), 8 deletions(-) diff --git a/lib/src/metta/runner/stdlib.rs b/lib/src/metta/runner/stdlib.rs index 7077d5081..b2dd62aab 100644 --- a/lib/src/metta/runner/stdlib.rs +++ b/lib/src/metta/runner/stdlib.rs @@ -7,6 +7,7 @@ use crate::metta::text::SExprParser; use crate::metta::runner::{Metta, RunContext, ModuleLoader, ResourceKey, mod_name_from_url}; use crate::metta::runner::git_catalog::ModuleGitLocation; use crate::metta::runner::pkg_mgmt::{UpdateMode, ManagedCatalog}; +use crate::metta::runner::string::Str; use crate::metta::types::{get_atom_types, get_meta_type}; use crate::common::shared::Shared; use crate::common::CachingMapper; @@ -303,12 +304,10 @@ impl Grounded for RegisterModuleOp { let arg_error = "register-module! expects a file system path; use quotes if needed"; let path_arg_atom = args.get(0).ok_or_else(|| ExecError::from(arg_error))?; - // TODO: replace Symbol by grounded String? - //TODO-NOW, investigate what goes off the rails when I run this with quotes now? Maybe I'm getting a grounded string arg now... - // in the repl, test `!(register-module! "/tmp/")` let path = match path_arg_atom { Atom::Symbol(path_arg) => path_arg.name(), - _ => return Err(arg_error.into()) + Atom::Grounded(g) => g.downcast_ref::().ok_or_else(|| ExecError::from(arg_error))?.as_str(), + _ => return Err(arg_error.into()), }; let path = strip_quotes(path); let path = std::path::PathBuf::from(path); @@ -362,12 +361,10 @@ impl Grounded for GitModuleOp { let url_arg_atom = args.get(0).ok_or_else(|| ExecError::from(arg_error))?; // TODO: When we figure out how to address varargs, it will be nice to take an optional branch name - // TODO: replace Symbol by grounded String? - //TODO-NOW, investigate what goes off the rails when I run this with quotes now? Maybe I'm getting a grounded string arg now... - // in the repl, test `!(register-module! "/tmp/")` let url = match url_arg_atom { Atom::Symbol(url_arg) => url_arg.name(), - _ => return Err(arg_error.into()) + Atom::Grounded(g) => g.downcast_ref::().ok_or_else(|| ExecError::from(arg_error))?.as_str(), + _ => return Err(arg_error.into()), }; let url = strip_quotes(url); diff --git a/lib/src/metta/runner/string.rs b/lib/src/metta/runner/string.rs index 2404dd2b0..147d91e01 100644 --- a/lib/src/metta/runner/string.rs +++ b/lib/src/metta/runner/string.rs @@ -14,6 +14,15 @@ impl Str { pub fn from_string(s: String) -> Self { Str(ImmutableString::Allocated(s)) } + pub fn as_str(&self) -> &str { + self.0.as_str() + } +} + +impl AsRef for Str { + fn as_ref(&self) -> &str { + self.as_str() + } } impl Grounded for Str { From 7634838482a4ccb14da925e29d55afb50584ff5a Mon Sep 17 00:00:00 2001 From: Luke Peterson Date: Sat, 11 May 2024 22:24:15 +0900 Subject: [PATCH 49/77] Adding catalog-clear op to stdlib --- lib/src/metta/runner/pkg_mgmt/git_catalog.rs | 1 - lib/src/metta/runner/stdlib.rs | 49 ++++++++++++++++++++ lib/src/metta/runner/stdlib_minimal.rs | 2 + 3 files changed, 51 insertions(+), 1 deletion(-) diff --git a/lib/src/metta/runner/pkg_mgmt/git_catalog.rs b/lib/src/metta/runner/pkg_mgmt/git_catalog.rs index 291222d3d..4fef9b08a 100644 --- a/lib/src/metta/runner/pkg_mgmt/git_catalog.rs +++ b/lib/src/metta/runner/pkg_mgmt/git_catalog.rs @@ -333,7 +333,6 @@ impl ModuleLoader for GitModLoader { } } -//TODO-NOW implement catalog clear op //TODO-NOW Implement a MeTTaMod that separates apart the catalog management functions //TODO-NOW Implement a builtin-catalog for acccess to std mods //TODO-NOW Fix the build without pkg_mgmt feature diff --git a/lib/src/metta/runner/stdlib.rs b/lib/src/metta/runner/stdlib.rs index b2dd62aab..9a3f09c88 100644 --- a/lib/src/metta/runner/stdlib.rs +++ b/lib/src/metta/runner/stdlib.rs @@ -499,6 +499,53 @@ impl Grounded for CatalogUpdateOp { } } +/// Clears the contents of all ManagedCatalogs +#[derive(Clone, Debug)] +pub struct CatalogClearOp { + metta: Metta +} + +impl PartialEq for CatalogClearOp { + fn eq(&self, _other: &Self) -> bool { true } +} + +impl CatalogClearOp { + pub fn new(metta: Metta) -> Self { + Self{ metta } + } +} + +impl Display for CatalogClearOp { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "catalog-clear") + } +} + +impl Grounded for CatalogClearOp { + fn type_(&self) -> Atom { + //TODO-FUTURE, when we decide on a friendly standard for var-args, it would be nice to + // allow an optional arg to list a specific catalog. For now we list all of them + //TODO-FUTURE, we may want to return the list as atoms, but now it just prints to stdout + Atom::expr([ARROW_SYMBOL, UNIT_TYPE()]) + } + + fn execute(&self, _args: &[Atom]) -> Result, ExecError> { + + if let Some(explicit_git_catalog) = &self.metta.environment().explicit_git_mods { + explicit_git_catalog.clear_all()?; + } + for cat in self.metta.environment().catalogs().filter_map(|cat| cat.as_managed()) { + cat.clear_all()?; + } + + unit_result() + } + + fn match_(&self, other: &Atom) -> MatchResultIter { + match_by_equality(self, other) + } +} + /// This operation prints the modules loaded from the top of the runner /// /// NOTE: This is a temporary stop-gap to help MeTTa users inspect which modules they have loaded and @@ -1766,6 +1813,8 @@ mod non_minimal_only_stdlib { tref.register_token(regex(r"catalog-list"), move |_| { catalog_list_op.clone() }); let catalog_update_op = Atom::gnd(CatalogUpdateOp::new(metta.clone())); tref.register_token(regex(r"catalog-update"), move |_| { catalog_update_op.clone() }); + let catalog_clear_op = Atom::gnd(CatalogClearOp::new(metta.clone())); + tref.register_token(regex(r"catalog-clear"), move |_| { catalog_clear_op.clone() }); let git_module_op = Atom::gnd(GitModuleOp::new(metta.clone())); tref.register_token(regex(r"git-module!"), move |_| { git_module_op.clone() }); let mod_space_op = Atom::gnd(ModSpaceOp::new(metta.clone())); diff --git a/lib/src/metta/runner/stdlib_minimal.rs b/lib/src/metta/runner/stdlib_minimal.rs index 830da741e..76b09fd7f 100644 --- a/lib/src/metta/runner/stdlib_minimal.rs +++ b/lib/src/metta/runner/stdlib_minimal.rs @@ -465,6 +465,8 @@ pub fn register_common_tokens(tref: &mut Tokenizer, _tokenizer: Shared Date: Sun, 12 May 2024 11:35:41 +0900 Subject: [PATCH 50/77] Fixing build with "pkg_mgmt" feature disabled --- lib/src/metta/runner/mod.rs | 10 +- lib/src/metta/runner/modules/mod.rs | 31 +- lib/src/metta/runner/modules/mod_names.rs | 4 + lib/src/metta/runner/pkg_mgmt/git_catalog.rs | 1 - lib/src/metta/runner/stdlib.rs | 592 ++++++++++--------- 5 files changed, 345 insertions(+), 293 deletions(-) diff --git a/lib/src/metta/runner/mod.rs b/lib/src/metta/runner/mod.rs index d8d83799e..e048153ea 100644 --- a/lib/src/metta/runner/mod.rs +++ b/lib/src/metta/runner/mod.rs @@ -65,7 +65,9 @@ use super::text::{Tokenizer, Parser, SExprParser}; use super::types::validate_atom; pub mod modules; -use modules::{MettaMod, ModId, ModuleInitState, ModNameNode, ModuleLoader, ResourceKey, TOP_MOD_NAME, ModNameNodeDisplayWrapper, normalize_relative_module_name, decompose_name_path, compose_name_path}; +use modules::{MettaMod, ModId, ModuleInitState, ModNameNode, ModuleLoader, ResourceKey, TOP_MOD_NAME, ModNameNodeDisplayWrapper, normalize_relative_module_name}; +#[cfg(feature = "pkg_mgmt")] +use modules::{decompose_name_path, compose_name_path}; #[cfg(feature = "pkg_mgmt")] pub mod pkg_mgmt; @@ -305,6 +307,7 @@ impl Metta { descriptors.get(descriptor).cloned() } + #[cfg(feature = "pkg_mgmt")] /// Internal method to add a ModuleDescriptor, ModId pair to the runner's lookup table fn add_module_descriptor(&self, descriptor: ModuleDescriptor, mod_id: ModId) { let mut descriptors = self.0.module_descriptors.lock().unwrap(); @@ -314,7 +317,10 @@ impl Metta { /// Merges all modules in a [ModuleInitState] into the runner fn merge_init_state(&self, init_state: ModuleInitState) -> Result { let mut main_mod_id = ModId::INVALID; + #[cfg(feature = "pkg_mgmt")] let (frames, descriptors) = init_state.decompose(); + #[cfg(not(feature = "pkg_mgmt"))] + let frames = init_state.decompose(); // Unpack each frame and ,erge the modules from the ModuleInitState into the // runner, and build the mapping table for ModIds @@ -347,6 +353,7 @@ impl Metta { } // Merge the [ModuleDescriptor]s into the runner's table + #[cfg(feature = "pkg_mgmt")] for (descriptor, mod_id) in descriptors.into_iter() { let mod_id = match mod_id_mapping.get(&mod_id) { Some(mapped_id) => *mapped_id, @@ -724,6 +731,7 @@ impl<'input> RunContext<'_, '_, 'input> { } /// Runs the function in the context of the mod_id + #[allow(dead_code)] //Some clients are behind feature gates fn in_mod_context Result>(&mut self, mod_id: ModId, f: F) -> Result { if mod_id == self.mod_id { f(self) diff --git a/lib/src/metta/runner/modules/mod.rs b/lib/src/metta/runner/modules/mod.rs index d67f77c2c..768d7333a 100644 --- a/lib/src/metta/runner/modules/mod.rs +++ b/lib/src/metta/runner/modules/mod.rs @@ -16,7 +16,10 @@ use super::interpreter_minimal::interpret; use super::stdlib_minimal::*; mod mod_names; -pub(crate) use mod_names::{ModNameNode, mod_name_from_path, normalize_relative_module_name, module_name_is_legal, module_name_make_legal, mod_name_remove_prefix, decompose_name_path, compose_name_path, ModNameNodeDisplayWrapper}; +pub(crate) use mod_names::{ModNameNode, mod_name_from_path, normalize_relative_module_name, mod_name_remove_prefix, ModNameNodeDisplayWrapper}; +#[cfg(feature = "pkg_mgmt")] +pub(crate) use mod_names::{module_name_is_legal, module_name_make_legal, decompose_name_path, compose_name_path}; + pub use mod_names::{TOP_MOD_NAME, SELF_MOD_NAME, MOD_NAME_SEPARATOR}; /// A reference to a [MettaMod] that is loaded into a [Metta] runner @@ -51,6 +54,7 @@ impl ModId { } } +#[cfg(feature = "pkg_mgmt")] pub(crate) static DEFAULT_PKG_INFO: OnceLock = OnceLock::new(); /// Contains state associated with a loaded MeTTa module @@ -356,6 +360,7 @@ pub(crate) enum ModuleInitState { pub(crate) struct ModuleInitStateInsides { frames: Vec, + #[cfg(feature = "pkg_mgmt")] module_descriptors: HashMap, } @@ -366,6 +371,7 @@ impl ModuleInitState { pub fn new(mod_name: String) -> (Self, ModId) { let new_insides = ModuleInitStateInsides { frames: vec![ModuleInitFrame::new_with_name(mod_name)], + #[cfg(feature = "pkg_mgmt")] module_descriptors: HashMap::new(), }; let init_state = Self::Root(Rc::new(RefCell::new(new_insides))); @@ -400,6 +406,7 @@ impl ModuleInitState { _ => false } } + #[cfg(feature = "pkg_mgmt")] pub fn decompose(self) -> (Vec, HashMap) { match self { Self::Root(cell) => { @@ -411,6 +418,17 @@ impl ModuleInitState { _ => unreachable!() } } + #[cfg(not(feature = "pkg_mgmt"))] + pub fn decompose(self) -> Vec { + match self { + Self::Root(cell) => { + let mut insides_ref = cell.borrow_mut(); + let frames = core::mem::take(&mut insides_ref.frames); + frames + }, + _ => unreachable!() + } + } /// Internal method to retrieve the mod_ptr to a module that's either loading in the /// InitFrame, or loaded into the runner @@ -484,6 +502,7 @@ impl ModuleInitState { pub fn init_module(&mut self, runner: &Metta, mod_name: &str, loader: Box) -> Result { //Give the prepare function a chance to run, in case it hasn't yet + #[cfg(feature = "pkg_mgmt")] let loader = match loader.prepare(None, UpdateMode::FetchIfMissing)? { Some(new_loader) => new_loader, None => loader @@ -509,6 +528,7 @@ impl ModuleInitState { Ok(mod_id) } + #[cfg(feature = "pkg_mgmt")] pub fn get_module_with_descriptor(&self, runner: &Metta, descriptor: &ModuleDescriptor) -> Option { match self { Self::Root(cell) | @@ -525,6 +545,7 @@ impl ModuleInitState { } } + #[cfg(feature = "pkg_mgmt")] pub fn add_module_descriptor(&self, runner: &Metta, descriptor: ModuleDescriptor, mod_id: ModId) { match self { Self::Root(cell) | @@ -609,6 +630,9 @@ pub trait ModuleLoader: std::fmt::Debug + Send + Sync { fn load(&self, context: &mut RunContext) -> Result<(), String>; /// A function to access the [PkgInfo] struct of meta-data associated with a module + /// + /// NOTE: Requires `pkg_mgmt` feature + #[cfg(feature = "pkg_mgmt")] fn pkg_info(&self) -> Option<&PkgInfo> { None } @@ -621,6 +645,11 @@ pub trait ModuleLoader: std::fmt::Debug + Send + Sync { /// loader will replace it. /// /// NOTE: This method may become async in the future + /// + /// FUTURE-QUESTION: Should "Fetch" and "Build" be separated? Currently they are lumped + /// together into one interface but it may make sense to split them into separate entry + /// points. I will keep them as one until the need arises. + #[cfg(feature = "pkg_mgmt")] fn prepare(&self, _local_dir: Option<&Path>, _update_mode: UpdateMode) -> Result>, String> { Ok(None) } diff --git a/lib/src/metta/runner/modules/mod_names.rs b/lib/src/metta/runner/modules/mod_names.rs index 510ac69c0..307e1b720 100644 --- a/lib/src/metta/runner/modules/mod_names.rs +++ b/lib/src/metta/runner/modules/mod_names.rs @@ -105,6 +105,7 @@ pub(crate) fn normalize_relative_module_name(base_path: &str, mod_name: &str) -> } /// Decomposes name path components into individual module names. Reverse of [compose_name_path] +#[allow(dead_code)] //Some clients are behind feature gates pub(crate) fn decompose_name_path(name: &str) -> Result, String> { let mut components: Vec<&str> = vec![]; let (_, _, last) = ModNameNode::parse_parent_generic(ModNameNode::top(), name, &OverlayMap::none(), @@ -117,6 +118,7 @@ pub(crate) fn decompose_name_path(name: &str) -> Result, String> { } /// Composes a name path from a slice of individual module names. Reverse of [decompose_name_path] +#[allow(dead_code)] //Some clients are behind feature gates pub(crate) fn compose_name_path(components: &[&str]) -> Result { let mut new_name = TOP_MOD_NAME.to_string(); for component in components { @@ -545,6 +547,7 @@ impl std::fmt::Display for ModNameNode { /// Returns `true` if a str is a legal name for a module /// /// A module name must be an ascii string, containing only alpha-numeric characters plus [`_`, `-`] +#[allow(dead_code)] //Some clients are behind feature gates pub fn module_name_is_legal(name: &str) -> bool { for the_char in name.chars() { if !the_char.is_ascii() { @@ -561,6 +564,7 @@ pub fn module_name_is_legal(name: &str) -> bool { /// Returns a legal module name composed from the supplied string, by removing or substituting /// all illlegal characters. Returns None if that isn't possible +#[allow(dead_code)] //Some clients are behind feature gates pub fn module_name_make_legal(name: &str) -> Option { let new_name: String = name.chars().filter(|&the_char| { the_char.is_ascii_alphanumeric() || diff --git a/lib/src/metta/runner/pkg_mgmt/git_catalog.rs b/lib/src/metta/runner/pkg_mgmt/git_catalog.rs index 4fef9b08a..963694657 100644 --- a/lib/src/metta/runner/pkg_mgmt/git_catalog.rs +++ b/lib/src/metta/runner/pkg_mgmt/git_catalog.rs @@ -335,4 +335,3 @@ impl ModuleLoader for GitModLoader { //TODO-NOW Implement a MeTTaMod that separates apart the catalog management functions //TODO-NOW Implement a builtin-catalog for acccess to std mods -//TODO-NOW Fix the build without pkg_mgmt feature diff --git a/lib/src/metta/runner/stdlib.rs b/lib/src/metta/runner/stdlib.rs index fbb8663be..9c26494f7 100644 --- a/lib/src/metta/runner/stdlib.rs +++ b/lib/src/metta/runner/stdlib.rs @@ -4,14 +4,15 @@ use crate::space::*; use crate::metta::*; use crate::metta::text::Tokenizer; use crate::metta::text::SExprParser; -use crate::metta::runner::{Metta, RunContext, ModuleLoader, ResourceKey, mod_name_from_url}; -use crate::metta::runner::git_catalog::ModuleGitLocation; -use crate::metta::runner::pkg_mgmt::{UpdateMode, ManagedCatalog}; +use crate::metta::runner::{Metta, RunContext, ModuleLoader, ResourceKey}; use crate::metta::runner::string::Str; use crate::metta::types::{get_atom_types, get_meta_type}; use crate::common::shared::Shared; use crate::common::CachingMapper; +#[cfg(feature = "pkg_mgmt")] +use crate::metta::runner::{git_catalog::ModuleGitLocation, mod_name_from_url, pkg_mgmt::{UpdateMode, ManagedCatalog}}; + use std::rc::Rc; use std::cell::RefCell; use std::fmt::Display; @@ -25,6 +26,10 @@ fn unit_result() -> Result, ExecError> { Ok(vec![UNIT_ATOM()]) } +fn regex(regex: &str) -> Regex { + Regex::new(regex).unwrap() +} + #[derive(Clone, Debug)] pub struct ImportOp { //TODO-HACK: This is a terrible horrible ugly hack that should be fixed ASAP @@ -271,279 +276,6 @@ impl Grounded for ModSpaceOp { } } -/// Provides a way to access [Metta::load_module_at_path] from within MeTTa code -#[derive(Clone, Debug)] -pub struct RegisterModuleOp { - metta: Metta -} - -impl PartialEq for RegisterModuleOp { - fn eq(&self, _other: &Self) -> bool { true } -} - -impl RegisterModuleOp { - pub fn new(metta: Metta) -> Self { - Self{ metta } - } -} - -impl Display for RegisterModuleOp { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - write!(f, "register-module!") - } -} - -impl Grounded for RegisterModuleOp { - fn type_(&self) -> Atom { - Atom::expr([ARROW_SYMBOL, ATOM_TYPE_ATOM, UNIT_TYPE()]) - } - - fn execute(&self, args: &[Atom]) -> Result, ExecError> { - let arg_error = "register-module! expects a file system path; use quotes if needed"; - let path_arg_atom = args.get(0).ok_or_else(|| ExecError::from(arg_error))?; - - let path = match path_arg_atom { - Atom::Symbol(path_arg) => path_arg.name(), - Atom::Grounded(g) => g.downcast_ref::().ok_or_else(|| ExecError::from(arg_error))?.as_str(), - _ => return Err(arg_error.into()), - }; - let path = strip_quotes(path); - let path = std::path::PathBuf::from(path); - - // Load the module from the path - // QUESTION: Do we want to expose the ability to give the module a different name and/ or - // load it into a different part of the namespace hierarchy? For now I was just thinking - // it is better to keep the args simple. IMO this is a place for optional var-args when we - // decide on the best way to handle them language-wide. - self.metta.load_module_at_path(path, None).map_err(|e| ExecError::from(e))?; - - unit_result() - } - - fn match_(&self, other: &Atom) -> MatchResultIter { - match_by_equality(self, other) - } -} - -/// Provides access to module in a remote git repo, from within MeTTa code -/// Similar to `register-module!`, this op will bypass the catalog search -#[derive(Clone, Debug)] -pub struct GitModuleOp { - //TODO-HACK: This is a terrible horrible ugly hack that should be fixed ASAP - context: std::sync::Arc>>>>>, -} - -impl PartialEq for GitModuleOp { - fn eq(&self, _other: &Self) -> bool { true } -} - -impl GitModuleOp { - pub fn new(metta: Metta) -> Self { - Self{ context: metta.0.context.clone() } - } -} - -impl Display for GitModuleOp { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - write!(f, "git-module!") - } -} - -impl Grounded for GitModuleOp { - fn type_(&self) -> Atom { - Atom::expr([ARROW_SYMBOL, ATOM_TYPE_ATOM, UNIT_TYPE()]) - } - - fn execute(&self, args: &[Atom]) -> Result, ExecError> { - let arg_error = "git-module! expects a URL; use quotes if needed"; - let url_arg_atom = args.get(0).ok_or_else(|| ExecError::from(arg_error))?; - // TODO: When we figure out how to address varargs, it will be nice to take an optional branch name - - let url = match url_arg_atom { - Atom::Symbol(url_arg) => url_arg.name(), - Atom::Grounded(g) => g.downcast_ref::().ok_or_else(|| ExecError::from(arg_error))?.as_str(), - _ => return Err(arg_error.into()), - }; - let url = strip_quotes(url); - - // TODO: Depending on what we do with `register-module!`, we might want to let the - // caller provide an optional mod_name here too, rather than extracting it from the url - let mod_name = match mod_name_from_url(url) { - Some(mod_name) => mod_name, - None => return Err(ExecError::from("git-module! error extracting module name from URL")) - }; - - let ctx_ref = self.context.lock().unwrap().last().unwrap().clone(); - let mut context = ctx_ref.lock().unwrap(); - - let git_mod_location = ModuleGitLocation::new(url.to_string()); - - if let Some((loader, descriptor)) = git_mod_location.get_loader_in_explicit_catalog(&mod_name, UpdateMode::TryFetchLatest, context.metta.environment()).map_err(|e| ExecError::from(e))? { - context.get_or_init_module_with_descriptor(&mod_name, descriptor, loader).map_err(|e| ExecError::from(e))?; - } - - unit_result() - } - - fn match_(&self, other: &Atom) -> MatchResultIter { - match_by_equality(self, other) - } -} - -/// Lists contents of all Catalogs that support the "list" method -#[derive(Clone, Debug)] -pub struct CatalogListOp { - metta: Metta -} - -impl PartialEq for CatalogListOp { - fn eq(&self, _other: &Self) -> bool { true } -} - -impl CatalogListOp { - pub fn new(metta: Metta) -> Self { - Self{ metta } - } -} - -impl Display for CatalogListOp { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - write!(f, "catalog-list") - } -} - -impl Grounded for CatalogListOp { - fn type_(&self) -> Atom { - //TODO-FUTURE, when we decide on a friendly standard for var-args, it would be nice to - // allow an optional arg to list a specific catalog. For now we list all of them - //TODO-FUTURE, we may want to return the list as atoms, but now it just prints to stdout - Atom::expr([ARROW_SYMBOL, UNIT_TYPE()]) - } - - fn execute(&self, _args: &[Atom]) -> Result, ExecError> { - - fn list_catalog(cat: &dyn crate::metta::runner::ModuleCatalog) { - if let Some(cat_iter) = cat.list() { - println!("{}:", cat.display_name()); - for desc in cat_iter { - println!(" {desc}"); - } - } - } - - if let Some(explicit_git_catalog) = &self.metta.environment().explicit_git_mods { - list_catalog(explicit_git_catalog); - } - for cat in self.metta.environment().catalogs() { - list_catalog(cat); - } - - unit_result() - } - - fn match_(&self, other: &Atom) -> MatchResultIter { - match_by_equality(self, other) - } -} - -/// Update all contents of all ManagedCatalogs to the latest version of all modules -#[derive(Clone, Debug)] -pub struct CatalogUpdateOp { - metta: Metta -} - -impl PartialEq for CatalogUpdateOp { - fn eq(&self, _other: &Self) -> bool { true } -} - -impl CatalogUpdateOp { - pub fn new(metta: Metta) -> Self { - Self{ metta } - } -} - -impl Display for CatalogUpdateOp { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - write!(f, "catalog-update") - } -} - -impl Grounded for CatalogUpdateOp { - fn type_(&self) -> Atom { - //TODO-FUTURE, when we decide on a friendly standard for var-args, it would be nice to - // allow an optional arg to list a specific catalog. For now we list all of them - //TODO-FUTURE, we may want to return the list as atoms, but now it just prints to stdout - Atom::expr([ARROW_SYMBOL, UNIT_TYPE()]) - } - - fn execute(&self, _args: &[Atom]) -> Result, ExecError> { - - if let Some(explicit_git_catalog) = &self.metta.environment().explicit_git_mods { - explicit_git_catalog.fetch_newest_for_all(UpdateMode::FetchLatest)?; - } - - for cat in self.metta.environment().catalogs() { - match cat.as_managed() { - Some(cat) => cat.fetch_newest_for_all(UpdateMode::FetchLatest)?, - None => {} - } - } - - unit_result() - } - - fn match_(&self, other: &Atom) -> MatchResultIter { - match_by_equality(self, other) - } -} - -/// Clears the contents of all ManagedCatalogs -#[derive(Clone, Debug)] -pub struct CatalogClearOp { - metta: Metta -} - -impl PartialEq for CatalogClearOp { - fn eq(&self, _other: &Self) -> bool { true } -} - -impl CatalogClearOp { - pub fn new(metta: Metta) -> Self { - Self{ metta } - } -} - -impl Display for CatalogClearOp { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - write!(f, "catalog-clear") - } -} - -impl Grounded for CatalogClearOp { - fn type_(&self) -> Atom { - //TODO-FUTURE, when we decide on a friendly standard for var-args, it would be nice to - // allow an optional arg to list a specific catalog. For now we list all of them - //TODO-FUTURE, we may want to return the list as atoms, but now it just prints to stdout - Atom::expr([ARROW_SYMBOL, UNIT_TYPE()]) - } - - fn execute(&self, _args: &[Atom]) -> Result, ExecError> { - - if let Some(explicit_git_catalog) = &self.metta.environment().explicit_git_mods { - explicit_git_catalog.clear_all()?; - } - for cat in self.metta.environment().catalogs().filter_map(|cat| cat.as_managed()) { - cat.clear_all()?; - } - - unit_result() - } - - fn match_(&self, other: &Atom) -> MatchResultIter { - match_by_equality(self, other) - } -} - /// This operation prints the modules loaded from the top of the runner /// /// NOTE: This is a temporary stop-gap to help MeTTa users inspect which modules they have loaded and @@ -1234,6 +966,297 @@ impl Grounded for MatchOp { } } +/// The op atoms that depend on the pkg_mgmt feature +#[cfg(feature = "pkg_mgmt")] +mod pkg_mgmt_ops { + use super::*; + + /// Provides a way to access [Metta::load_module_at_path] from within MeTTa code + #[derive(Clone, Debug)] + pub struct RegisterModuleOp { + metta: Metta + } + + impl PartialEq for RegisterModuleOp { + fn eq(&self, _other: &Self) -> bool { true } + } + + impl RegisterModuleOp { + pub fn new(metta: Metta) -> Self { + Self{ metta } + } + } + + impl Display for RegisterModuleOp { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "register-module!") + } + } + + impl Grounded for RegisterModuleOp { + fn type_(&self) -> Atom { + Atom::expr([ARROW_SYMBOL, ATOM_TYPE_ATOM, UNIT_TYPE()]) + } + + fn execute(&self, args: &[Atom]) -> Result, ExecError> { + let arg_error = "register-module! expects a file system path; use quotes if needed"; + let path_arg_atom = args.get(0).ok_or_else(|| ExecError::from(arg_error))?; + + let path = match path_arg_atom { + Atom::Symbol(path_arg) => path_arg.name(), + Atom::Grounded(g) => g.downcast_ref::().ok_or_else(|| ExecError::from(arg_error))?.as_str(), + _ => return Err(arg_error.into()), + }; + let path = strip_quotes(path); + let path = std::path::PathBuf::from(path); + + // Load the module from the path + // QUESTION: Do we want to expose the ability to give the module a different name and/ or + // load it into a different part of the namespace hierarchy? For now I was just thinking + // it is better to keep the args simple. IMO this is a place for optional var-args when we + // decide on the best way to handle them language-wide. + self.metta.load_module_at_path(path, None).map_err(|e| ExecError::from(e))?; + + unit_result() + } + + fn match_(&self, other: &Atom) -> MatchResultIter { + match_by_equality(self, other) + } + } + + /// Provides access to module in a remote git repo, from within MeTTa code + /// Similar to `register-module!`, this op will bypass the catalog search + #[derive(Clone, Debug)] + pub struct GitModuleOp { + //TODO-HACK: This is a terrible horrible ugly hack that should be fixed ASAP + context: std::sync::Arc>>>>>, + } + + impl PartialEq for GitModuleOp { + fn eq(&self, _other: &Self) -> bool { true } + } + + impl GitModuleOp { + pub fn new(metta: Metta) -> Self { + Self{ context: metta.0.context.clone() } + } + } + + impl Display for GitModuleOp { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "git-module!") + } + } + + impl Grounded for GitModuleOp { + fn type_(&self) -> Atom { + Atom::expr([ARROW_SYMBOL, ATOM_TYPE_ATOM, UNIT_TYPE()]) + } + + fn execute(&self, args: &[Atom]) -> Result, ExecError> { + let arg_error = "git-module! expects a URL; use quotes if needed"; + let url_arg_atom = args.get(0).ok_or_else(|| ExecError::from(arg_error))?; + // TODO: When we figure out how to address varargs, it will be nice to take an optional branch name + + let url = match url_arg_atom { + Atom::Symbol(url_arg) => url_arg.name(), + Atom::Grounded(g) => g.downcast_ref::().ok_or_else(|| ExecError::from(arg_error))?.as_str(), + _ => return Err(arg_error.into()), + }; + let url = strip_quotes(url); + + // TODO: Depending on what we do with `register-module!`, we might want to let the + // caller provide an optional mod_name here too, rather than extracting it from the url + let mod_name = match mod_name_from_url(url) { + Some(mod_name) => mod_name, + None => return Err(ExecError::from("git-module! error extracting module name from URL")) + }; + + let ctx_ref = self.context.lock().unwrap().last().unwrap().clone(); + let mut context = ctx_ref.lock().unwrap(); + + let git_mod_location = ModuleGitLocation::new(url.to_string()); + + if let Some((loader, descriptor)) = git_mod_location.get_loader_in_explicit_catalog(&mod_name, UpdateMode::TryFetchLatest, context.metta.environment()).map_err(|e| ExecError::from(e))? { + context.get_or_init_module_with_descriptor(&mod_name, descriptor, loader).map_err(|e| ExecError::from(e))?; + } + + unit_result() + } + + fn match_(&self, other: &Atom) -> MatchResultIter { + match_by_equality(self, other) + } + } + + /// Lists contents of all Catalogs that support the "list" method + #[derive(Clone, Debug)] + pub struct CatalogListOp { + metta: Metta + } + + impl PartialEq for CatalogListOp { + fn eq(&self, _other: &Self) -> bool { true } + } + + impl CatalogListOp { + pub fn new(metta: Metta) -> Self { + Self{ metta } + } + } + + impl Display for CatalogListOp { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "catalog-list") + } + } + + impl Grounded for CatalogListOp { + fn type_(&self) -> Atom { + //TODO-FUTURE, when we decide on a friendly standard for var-args, it would be nice to + // allow an optional arg to list a specific catalog. For now we list all of them + //TODO-FUTURE, we may want to return the list as atoms, but now it just prints to stdout + Atom::expr([ARROW_SYMBOL, UNIT_TYPE()]) + } + + fn execute(&self, _args: &[Atom]) -> Result, ExecError> { + + fn list_catalog(cat: &dyn crate::metta::runner::ModuleCatalog) { + if let Some(cat_iter) = cat.list() { + println!("{}:", cat.display_name()); + for desc in cat_iter { + println!(" {desc}"); + } + } + } + + if let Some(explicit_git_catalog) = &self.metta.environment().explicit_git_mods { + list_catalog(explicit_git_catalog); + } + for cat in self.metta.environment().catalogs() { + list_catalog(cat); + } + + unit_result() + } + + fn match_(&self, other: &Atom) -> MatchResultIter { + match_by_equality(self, other) + } + } + + /// Update all contents of all ManagedCatalogs to the latest version of all modules + #[derive(Clone, Debug)] + pub struct CatalogUpdateOp { + metta: Metta + } + + impl PartialEq for CatalogUpdateOp { + fn eq(&self, _other: &Self) -> bool { true } + } + + impl CatalogUpdateOp { + pub fn new(metta: Metta) -> Self { + Self{ metta } + } + } + + impl Display for CatalogUpdateOp { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "catalog-update") + } + } + + impl Grounded for CatalogUpdateOp { + fn type_(&self) -> Atom { + //TODO-FUTURE, when we decide on a friendly standard for var-args, it would be nice to + // allow an optional arg to list a specific catalog. For now we list all of them + //TODO-FUTURE, we may want to return the list as atoms, but now it just prints to stdout + Atom::expr([ARROW_SYMBOL, UNIT_TYPE()]) + } + + fn execute(&self, _args: &[Atom]) -> Result, ExecError> { + + if let Some(explicit_git_catalog) = &self.metta.environment().explicit_git_mods { + explicit_git_catalog.fetch_newest_for_all(UpdateMode::FetchLatest)?; + } + + for cat in self.metta.environment().catalogs() { + match cat.as_managed() { + Some(cat) => cat.fetch_newest_for_all(UpdateMode::FetchLatest)?, + None => {} + } + } + + unit_result() + } + + fn match_(&self, other: &Atom) -> MatchResultIter { + match_by_equality(self, other) + } + } + + /// Clears the contents of all ManagedCatalogs + #[derive(Clone, Debug)] + pub struct CatalogClearOp { + metta: Metta + } + + impl PartialEq for CatalogClearOp { + fn eq(&self, _other: &Self) -> bool { true } + } + + impl CatalogClearOp { + pub fn new(metta: Metta) -> Self { + Self{ metta } + } + } + + impl Display for CatalogClearOp { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "catalog-clear") + } + } + + impl Grounded for CatalogClearOp { + fn type_(&self) -> Atom { + //TODO-FUTURE, when we decide on a friendly standard for var-args, it would be nice to + // allow an optional arg to list a specific catalog. For now we list all of them + //TODO-FUTURE, we may want to return the list as atoms, but now it just prints to stdout + Atom::expr([ARROW_SYMBOL, UNIT_TYPE()]) + } + + fn execute(&self, _args: &[Atom]) -> Result, ExecError> { + + if let Some(explicit_git_catalog) = &self.metta.environment().explicit_git_mods { + explicit_git_catalog.clear_all()?; + } + for cat in self.metta.environment().catalogs().filter_map(|cat| cat.as_managed()) { + cat.clear_all()?; + } + + unit_result() + } + + fn match_(&self, other: &Atom) -> MatchResultIter { + match_by_equality(self, other) + } + } + + pub fn register_pkg_mgmt_tokens(tref: &mut Tokenizer, metta: &Metta) { + let register_module_op = Atom::gnd(RegisterModuleOp::new(metta.clone())); + tref.register_token(regex(r"register-module!"), move |_| { register_module_op.clone() }); + let git_module_op = Atom::gnd(GitModuleOp::new(metta.clone())); + tref.register_token(regex(r"git-module!"), move |_| { git_module_op.clone() }); + let catalog_list_op = Atom::gnd(CatalogListOp::new(metta.clone())); + tref.register_token(regex(r"catalog-list"), move |_| { catalog_list_op.clone() }); + let catalog_update_op = Atom::gnd(CatalogUpdateOp::new(metta.clone())); + tref.register_token(regex(r"catalog-update"), move |_| { catalog_update_op.clone() }); + let catalog_clear_op = Atom::gnd(CatalogClearOp::new(metta.clone())); + tref.register_token(regex(r"catalog-clear"), move |_| { catalog_clear_op.clone() }); + } +} /// The internal `non_minimal_only_stdlib` module contains code that is never used by the minimal stdlib #[cfg(not(feature = "minimal"))] @@ -1758,10 +1781,6 @@ mod non_minimal_only_stdlib { } } - fn regex(regex: &str) -> Regex { - Regex::new(regex).unwrap() - } - //TODO: The additional arguments are a temporary hack on account of the way the operation atoms store references // to the runner & module state. https://github.com/trueagi-io/hyperon-experimental/issues/410 #[cfg(not(feature = "minimal"))] @@ -1805,22 +1824,15 @@ mod non_minimal_only_stdlib { tref.register_token(regex(r"get-state"), move |_| { get_state_op.clone() }); let get_meta_type_op = Atom::gnd(GetMetaTypeOp{}); tref.register_token(regex(r"get-metatype"), move |_| { get_meta_type_op.clone() }); - let register_module_op = Atom::gnd(RegisterModuleOp::new(metta.clone())); - tref.register_token(regex(r"register-module!"), move |_| { register_module_op.clone() }); - let catalog_list_op = Atom::gnd(CatalogListOp::new(metta.clone())); - tref.register_token(regex(r"catalog-list"), move |_| { catalog_list_op.clone() }); - let catalog_update_op = Atom::gnd(CatalogUpdateOp::new(metta.clone())); - tref.register_token(regex(r"catalog-update"), move |_| { catalog_update_op.clone() }); - let catalog_clear_op = Atom::gnd(CatalogClearOp::new(metta.clone())); - tref.register_token(regex(r"catalog-clear"), move |_| { catalog_clear_op.clone() }); - let git_module_op = Atom::gnd(GitModuleOp::new(metta.clone())); - tref.register_token(regex(r"git-module!"), move |_| { git_module_op.clone() }); let mod_space_op = Atom::gnd(ModSpaceOp::new(metta.clone())); tref.register_token(regex(r"mod-space!"), move |_| { mod_space_op.clone() }); let print_mods_op = Atom::gnd(PrintModsOp::new(metta.clone())); tref.register_token(regex(r"print-mods!"), move |_| { print_mods_op.clone() }); let sealed_op = Atom::gnd(SealedOp{}); tref.register_token(regex(r"sealed"), move |_| { sealed_op.clone() }); + + #[cfg(feature = "pkg_mgmt")] + pkg_mgmt_ops::register_pkg_mgmt_tokens(tref, metta); } //TODO: The metta argument is a temporary hack on account of the way the operation atoms store references From 82cc6362b997ab9da11ecb6ebd346b0769d3602e Mon Sep 17 00:00:00 2001 From: Luke Peterson Date: Sun, 12 May 2024 11:44:24 +0900 Subject: [PATCH 51/77] Fixing minimal build when pkg_mgmt feature is off --- lib/src/metta/runner/stdlib.rs | 4 ++-- lib/src/metta/runner/stdlib_minimal.rs | 19 ++++--------------- 2 files changed, 6 insertions(+), 17 deletions(-) diff --git a/lib/src/metta/runner/stdlib.rs b/lib/src/metta/runner/stdlib.rs index 9c26494f7..dced9b118 100644 --- a/lib/src/metta/runner/stdlib.rs +++ b/lib/src/metta/runner/stdlib.rs @@ -26,7 +26,7 @@ fn unit_result() -> Result, ExecError> { Ok(vec![UNIT_ATOM()]) } -fn regex(regex: &str) -> Regex { +pub(crate) fn regex(regex: &str) -> Regex { Regex::new(regex).unwrap() } @@ -968,7 +968,7 @@ impl Grounded for MatchOp { /// The op atoms that depend on the pkg_mgmt feature #[cfg(feature = "pkg_mgmt")] -mod pkg_mgmt_ops { +pub(crate) mod pkg_mgmt_ops { use super::*; /// Provides a way to access [Metta::load_module_at_path] from within MeTTa code diff --git a/lib/src/metta/runner/stdlib_minimal.rs b/lib/src/metta/runner/stdlib_minimal.rs index dc65a5b31..9b62777c7 100644 --- a/lib/src/metta/runner/stdlib_minimal.rs +++ b/lib/src/metta/runner/stdlib_minimal.rs @@ -8,9 +8,9 @@ use crate::metta::types::get_atom_types; use crate::common::assert::vec_eq_no_order; use crate::common::shared::Shared; use crate::metta::runner::stdlib; +use crate::metta::runner::stdlib::regex; use std::fmt::Display; -use regex::Regex; use std::convert::TryInto; use super::arithmetics::*; @@ -421,10 +421,6 @@ impl Grounded for CaseOp { } } -fn regex(regex: &str) -> Regex { - Regex::new(regex).unwrap() -} - //TODO: The additional arguments are a temporary hack on account of the way the operation atoms store references // to the runner & module state. https://github.com/trueagi-io/hyperon-experimental/issues/410 pub fn register_common_tokens(tref: &mut Tokenizer, _tokenizer: Shared, space: &DynSpace, metta: &Metta) { @@ -455,20 +451,13 @@ pub fn register_common_tokens(tref: &mut Tokenizer, _tokenizer: Shared Date: Mon, 13 May 2024 14:45:34 +0900 Subject: [PATCH 52/77] Moving some environment config behaviors from being hard-coded to the "environment.metta" file in the config dir --- .../metta/runner/environment.default.metta | 18 ++ lib/src/metta/runner/environment.rs | 166 +++++++++++++++--- lib/src/metta/runner/init.default.metta | 2 +- lib/src/metta/runner/pkg_mgmt/git_catalog.rs | 3 - lib/src/metta/runner/stdlib.rs | 23 +-- lib/src/metta/runner/string.rs | 16 ++ repl/src/metta_shim.rs | 25 ++- 7 files changed, 193 insertions(+), 60 deletions(-) create mode 100644 lib/src/metta/runner/environment.default.metta diff --git a/lib/src/metta/runner/environment.default.metta b/lib/src/metta/runner/environment.default.metta new file mode 100644 index 000000000..9b63f12e3 --- /dev/null +++ b/lib/src/metta/runner/environment.default.metta @@ -0,0 +1,18 @@ + +; Use this file to configure the environment. Settings from this file will be used in +; addition to settings configured programmatically through the EnvironmentBuilder API. +; The API will always take precedence over settings from this file if there is a conflict. + +; NOTE: stdlib operations such as string manipulation and arithmetic are not available when +; running this file + +(#includePath + "{$cfgdir}/modules/" +) + +(#gitCatalog + ; TODO, Default catalog should be moved to trueagi github account + (#name "luketpeterson-catalog") + (#url "https://github.com/luketpeterson/metta-mod-catalog.git") + (#refreshTime 259200) ; 3 days = 3 days * 24 hrs * 60 minutes * 60 seconds +) \ No newline at end of file diff --git a/lib/src/metta/runner/environment.rs b/lib/src/metta/runner/environment.rs index a027b4382..90c1f0636 100644 --- a/lib/src/metta/runner/environment.rs +++ b/lib/src/metta/runner/environment.rs @@ -1,9 +1,11 @@ use std::path::{Path, PathBuf}; -use std::io::Write; +use std::io::{Read, BufReader, Write}; use std::fs; use std::sync::Arc; +use crate::{sym, ExpressionAtom, SymbolAtom, metta::GroundingSpace}; + #[cfg(feature = "pkg_mgmt")] use crate::metta::runner::pkg_mgmt::{ModuleCatalog, DirCatalog, LocalCatalog, FsModuleFormat, SingleFileModuleFmt, DirModuleFmt, git_catalog::*}; @@ -30,6 +32,7 @@ pub struct Environment { } const DEFAULT_INIT_METTA: &[u8] = include_bytes!("init.default.metta"); +const DEFAULT_ENVIRONMENT_METTA: &[u8] = include_bytes!("environment.default.metta"); static COMMON_ENV: std::sync::OnceLock> = std::sync::OnceLock::new(); @@ -121,6 +124,9 @@ enum ProtoCatalog { Other(Box), } +#[cfg(not(feature = "pkg_mgmt"))] +type ProtoCatalog = (); + impl EnvBuilder { /// Returns a new EnvBuilder, to set the parameters for the MeTTa Environment @@ -284,8 +290,6 @@ impl EnvBuilder { if let Some(config_dir) = &env.config_dir { - #[cfg(feature = "pkg_mgmt")] - let modules_dir = config_dir.join("modules"); let init_metta_path = config_dir.join("init.metta"); //Create the default config dir and its contents, if that part of our directive @@ -293,9 +297,6 @@ impl EnvBuilder { std::fs::create_dir_all(&config_dir).unwrap(); - #[cfg(feature = "pkg_mgmt")] - std::fs::create_dir_all(&modules_dir).unwrap(); - //Create the default init.metta file let mut file = fs::OpenOptions::new() .create(true) @@ -313,19 +314,33 @@ impl EnvBuilder { // Set the caches dir within the config dir. We may want to move it elsewhere in the future env.caches_dir = env.config_dir.as_ref().map(|cfg_dir| cfg_dir.join("caches")); - //Push the "modules" dir, to search after the other paths that were specified - //TODO: the config.metta file should be able to append / modify the catalogs, and can choose not to - // include the "modules" dir in the future. - #[cfg(feature = "pkg_mgmt")] - if modules_dir.exists() { - proto_catalogs.push(ProtoCatalog::Path(modules_dir)); - } - if init_metta_path.exists() { env.init_metta_path = Some(init_metta_path); } } + if let Some(config_dir) = &env.config_dir { + let env_metta_path = config_dir.join("environment.metta"); + + //Create the default environment.metta file if it doesn't exist + if !env_metta_path.exists() { + let mut file = fs::OpenOptions::new() + .create(true) + .write(true) + .open(&env_metta_path) + .expect(&format!("Error creating default environment config file at {env_metta_path:?}")); + file.write_all(&DEFAULT_ENVIRONMENT_METTA).unwrap(); + } + + #[cfg(feature = "pkg_mgmt")] + let env_metta_interp_result = interpret_environment_metta(env_metta_path, &mut env, &mut proto_catalogs); + #[cfg(not(feature = "pkg_mgmt"))] + let env_metta_interp_result = interpret_environment_metta(env_metta_path, &mut env, &mut Vec::<()>::new()); + env_metta_interp_result.unwrap_or_else(|e| { + log::warn!("Error occurred interpreting environment.metta file: {e}"); + }); + } + #[cfg(feature = "pkg_mgmt")] { //Append the built-in [FSModuleFormat]s, [SingleFileModuleFmt] and [DirModuleFmt] @@ -356,16 +371,6 @@ impl EnvBuilder { let git_mod_catalog = GitCatalog::new_without_source_repo(caches_dir, env.fs_mod_formats.clone(), "git-modules").unwrap(); explicit_git_mods.push_upstream_catalog(Box::new(git_mod_catalog)); env.explicit_git_mods = Some(explicit_git_mods); - - //Add the remote git-based catalog to the end of the catalog priority search list - //TODO-NOW: Catalog should be moved to trueagi github account, and catalog settings should come from config - let catalog_name = "luketpeterson-catalog"; - let catalog_url = "https://github.com/luketpeterson/metta-mod-catalog.git"; - let refresh_time = 259200; //3 days = 3 days * 24 hrs * 60 minutes * 60 seconds - let mut managed_remote_catalog = LocalCatalog::new(caches_dir, catalog_name).unwrap(); - let remote_catalog = GitCatalog::new(caches_dir, env.fs_mod_formats.clone(), catalog_name, catalog_url, refresh_time).unwrap(); - managed_remote_catalog.push_upstream_catalog(Box::new(remote_catalog)); - env.catalogs.push(Box::new(managed_remote_catalog)); } } @@ -373,3 +378,116 @@ impl EnvBuilder { } } + +/// Interprets the file at `env_metta_path`, and modifies settings in the Environment +/// +/// NOTE: I wonder if users will get confused by the fact that the full set of runner +/// features aren't available in the environment.metta file. But there is a bootstrapping +/// problem trying to using a runner here +fn interpret_environment_metta>(env_metta_path: P, env: &mut Environment, proto_catalogs: &mut Vec) -> Result<(), String> { + let file = fs::File::open(env_metta_path).map_err(|e| e.to_string())?; + let mut buf_reader = BufReader::new(file); + let mut file_contents = String::new(); + buf_reader.read_to_string(&mut file_contents).map_err(|e| e.to_string())?; + + let space = GroundingSpace::new(); + let tokenizer = crate::metta::runner::Tokenizer::new(); + let mut parser = crate::metta::runner::SExprParser::new(&file_contents); + while let Some(atom) = parser.parse(&tokenizer)? { + let atoms = crate::metta::runner::interpret(&space, &atom)?; + let atom = if atoms.len() != 1 { + return Err(format!("Error in environment.metta. Atom must evaluate into a single deterministic result. Found {atoms:?}")); + } else { + atoms.into_iter().next().unwrap() + }; + + //TODO-FUTURE: Use atom-serde here to cut down on boilerplate from interpreting these atoms + let expr = ExpressionAtom::try_from(atom)?; + match expr.children().get(0) { + Some(atom_0) if *atom_0 == sym!("#includePath") => { + #[cfg(feature = "pkg_mgmt")] + proto_catalogs.push(include_path_from_cfg_atom(&expr, env)?); + #[cfg(not(feature = "pkg_mgmt"))] + log::warn!("#includePath in environment.metta not supported without pkg_mgmt feature"); + }, + Some(atom_0) if *atom_0 == sym!("#gitCatalog") => { + #[cfg(feature = "pkg_mgmt")] + proto_catalogs.push(git_catalog_from_cfg_atom(&expr, env)?); + #[cfg(not(feature = "pkg_mgmt"))] + log::warn!("#gitCatalog in environment.metta not supported without pkg_mgmt feature"); + }, + _ => return Err(format!("Error in environment.metta. Unrecognized setting: {expr:?}")) + } + } + Ok(()) +} + +#[cfg(feature = "pkg_mgmt")] +fn git_catalog_from_cfg_atom(atom: &ExpressionAtom, env: &Environment) -> Result { + + let mut catalog_name = None; + let mut catalog_url = None; + let mut refresh_time = None; + + let mut atom_iter = atom.children().iter(); + let _ = atom_iter.next(); + for atom in atom_iter { + let expr = <&ExpressionAtom>::try_from(atom)?; + if expr.children().len() < 1 { + continue; + } + let key_atom = expr.children().get(0).unwrap(); + let val_atom = match expr.children().get(1) { + Some(atom) => atom, + None => return Err(format!("Error in environment.metta. Key without value: {key_atom}")) + }; + + match key_atom { + _ if *key_atom == sym!("#name") => catalog_name = Some(<&SymbolAtom>::try_from(val_atom)?.name()), + _ if *key_atom == sym!("#url") => catalog_url = Some(<&SymbolAtom>::try_from(val_atom)?.name()), + _ if *key_atom == sym!("#refreshTime") => refresh_time = Some(<&SymbolAtom>::try_from(val_atom)?.name()), + _ => return Err(format!("Error in environment.metta. Unknown key: {key_atom}")) + } + } + + let caches_dir = env.caches_dir.as_ref().unwrap(); + let catalog_name = catalog_name.ok_or_else(|| format!("Error in environment.metta. \"name\" property required for #gitCatalog"))?; + let catalog_url = catalog_url.ok_or_else(|| format!("Error in environment.metta. \"url\" property required for #gitCatalog"))?; + let refresh_time = refresh_time.ok_or_else(|| format!("Error in environment.metta. \"refreshTime\" property required for #gitCatalog"))? + .parse::().map_err(|e| format!("Error in environment.metta. Error parsing \"refreshTime\": {e}"))?; + + let catalog_name = crate::metta::runner::string::strip_quotes(catalog_name); + let catalog_url = crate::metta::runner::string::strip_quotes(catalog_url); + + let mut managed_remote_catalog = LocalCatalog::new(caches_dir, catalog_name).unwrap(); + let remote_catalog = GitCatalog::new(caches_dir, env.fs_mod_formats.clone(), catalog_name, catalog_url, refresh_time).unwrap(); + managed_remote_catalog.push_upstream_catalog(Box::new(remote_catalog)); + Ok(ProtoCatalog::Other(Box::new(managed_remote_catalog))) +} + +#[cfg(feature = "pkg_mgmt")] +fn include_path_from_cfg_atom(atom: &ExpressionAtom, env: &Environment) -> Result { + + let mut atom_iter = atom.children().iter(); + let _ = atom_iter.next(); + let path_atom = match atom_iter.next() { + Some(atom) => atom, + None => return Err(format!("Error in environment.metta. #includePath missing path value")) + }; + let path = <&SymbolAtom>::try_from(path_atom)?.name(); + let path = crate::metta::runner::string::strip_quotes(path); + + //TODO-FUTURE: In the future we may want to replace dyn-fmt with strfmt, and do something a + // little bit nicer than this + let path = match path.strip_prefix("{$cfgdir}/") { + Some(rel_path) => env.config_dir().unwrap().join(rel_path), + None => PathBuf::from(path) + }; + + if !path.exists() { + log::info!("Creating search directory for modules: \"{}\"", path.display()); + std::fs::create_dir_all(&path).map_err(|e| e.to_string())?; + } + + Ok(ProtoCatalog::Path(path)) +} diff --git a/lib/src/metta/runner/init.default.metta b/lib/src/metta/runner/init.default.metta index 04e8c3752..cf0794834 100644 --- a/lib/src/metta/runner/init.default.metta +++ b/lib/src/metta/runner/init.default.metta @@ -1,2 +1,2 @@ -; TODO: Let the "importPaths" be modifiable, but I want better string manipulation atoms +; The contents of this file are evaluated for each new runner in the runner's top context diff --git a/lib/src/metta/runner/pkg_mgmt/git_catalog.rs b/lib/src/metta/runner/pkg_mgmt/git_catalog.rs index 963694657..d3e22a386 100644 --- a/lib/src/metta/runner/pkg_mgmt/git_catalog.rs +++ b/lib/src/metta/runner/pkg_mgmt/git_catalog.rs @@ -332,6 +332,3 @@ impl ModuleLoader for GitModLoader { unreachable!() } } - -//TODO-NOW Implement a MeTTaMod that separates apart the catalog management functions -//TODO-NOW Implement a builtin-catalog for acccess to std mods diff --git a/lib/src/metta/runner/stdlib.rs b/lib/src/metta/runner/stdlib.rs index dced9b118..a75b238c8 100644 --- a/lib/src/metta/runner/stdlib.rs +++ b/lib/src/metta/runner/stdlib.rs @@ -136,24 +136,6 @@ impl Grounded for ImportOp { } } -/// A utility function to return the part of a string in between starting and ending quotes -// TODO: Roll this into a stdlib grounded string module, maybe as a test case for -// https://github.com/trueagi-io/hyperon-experimental/issues/351 -fn strip_quotes(src: &str) -> &str { - if let Some(first) = src.chars().next() { - if first == '"' { - if let Some(last) = src.chars().last() { - if last == '"' { - if src.len() > 1 { - return &src[1..src.len()-1] - } - } - } - } - } - src -} - #[derive(Clone, Debug)] pub struct IncludeOp { //TODO-HACK: This is a terrible horrible ugly hack that should be fixed ASAP @@ -971,6 +953,11 @@ impl Grounded for MatchOp { pub(crate) mod pkg_mgmt_ops { use super::*; + //QUESTION: Do we want to factor these catalog management ops and specialized loading + // ops into a separate module? The argument for "yes" is that the it avoids polluting + // the namespace with ops that are seldom used. The argument for "no" is that importing + // the module to use the ops is another step users must remember. + /// Provides a way to access [Metta::load_module_at_path] from within MeTTa code #[derive(Clone, Debug)] pub struct RegisterModuleOp { diff --git a/lib/src/metta/runner/string.rs b/lib/src/metta/runner/string.rs index 147d91e01..54b577974 100644 --- a/lib/src/metta/runner/string.rs +++ b/lib/src/metta/runner/string.rs @@ -44,3 +44,19 @@ impl std::fmt::Display for Str { write!(f, "\"{}\"", self.0) } } + +/// A utility function to return the part of a string in between starting and ending quotes +pub fn strip_quotes(src: &str) -> &str { + if let Some(first) = src.chars().next() { + if first == '"' { + if let Some(last) = src.chars().last() { + if last == '"' { + if src.len() > 1 { + return &src[1..src.len()-1] + } + } + } + } + } + src +} diff --git a/repl/src/metta_shim.rs b/repl/src/metta_shim.rs index ecdfef859..7fb81a191 100644 --- a/repl/src/metta_shim.rs +++ b/repl/src/metta_shim.rs @@ -232,7 +232,7 @@ pub mod metta_interface_mod { } else { match result.downcast::() { Ok(result_list) => { - Some(result_list.iter().map(|atom| strip_quotes(atom.to_string())).collect()) + Some(result_list.iter().map(|atom| strip_quotes(&atom.to_string()).to_string()).collect()) }, Err(_) => None } @@ -252,7 +252,7 @@ pub mod metta_interface_mod { Ok(if result.is_none() { None } else { - Some(strip_quotes(result.to_string())) + Some(strip_quotes(&result.to_string()).to_string()) }) }).unwrap() } @@ -432,7 +432,7 @@ pub mod metta_interface_mod { pub fn get_config_string(&mut self, config_name: &str) -> Option { let atom = self.get_config_atom(config_name)?; //TODO: We need to do atom type checking here - Some(strip_quotes(atom.to_string())) + Some(strip_quotes(&atom.to_string()).to_string()) } pub fn get_config_expr_vec(&mut self, config_name: &str) -> Option> { @@ -442,7 +442,7 @@ pub mod metta_interface_mod { .into_iter() .map(|atom| { //TODO: We need to do atom type checking here - strip_quotes(atom.to_string()) + strip_quotes(&atom.to_string()).to_string() }) .collect()) } else { @@ -457,20 +457,17 @@ pub mod metta_interface_mod { } -/// A utility function to return the part of a string in between starting and ending quotes -// TODO: Roll this into a stdlib grounded string module, maybe as a test case for -// https://github.com/trueagi-io/hyperon-experimental/issues/351 -fn strip_quotes(the_string: String) -> String { - if let Some(first) = the_string.chars().next() { +pub fn strip_quotes(src: &str) -> &str { + if let Some(first) = src.chars().next() { if first == '"' { - if let Some(last) = the_string.chars().last() { + if let Some(last) = src.chars().last() { if last == '"' { - if the_string.len() > 1 { - return String::from_utf8(the_string.as_bytes()[1..the_string.len()-1].to_vec()).unwrap(); + if src.len() > 1 { + return &src[1..src.len()-1] } } } } } - the_string -} + src +} \ No newline at end of file From a298cc07b51b1e12ffe1b6ee239fd1bf33cd0607 Mon Sep 17 00:00:00 2001 From: Luke Peterson Date: Mon, 13 May 2024 15:21:15 +0900 Subject: [PATCH 53/77] Oops, broke minimal test by removing `use` statement when cleaning up warnings earlier --- lib/src/metta/runner/stdlib_minimal.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/lib/src/metta/runner/stdlib_minimal.rs b/lib/src/metta/runner/stdlib_minimal.rs index 9b62777c7..e63459bf2 100644 --- a/lib/src/metta/runner/stdlib_minimal.rs +++ b/lib/src/metta/runner/stdlib_minimal.rs @@ -555,6 +555,7 @@ mod tests { use crate::common::test_utils::metta_space; use std::convert::TryFrom; + use regex::Regex; fn run_program(program: &str) -> Result>, String> { let metta = Metta::new(Some(EnvBuilder::test_env())); From 8878872fa2deaea87b53533d1dbab0a27a6d5be7 Mon Sep 17 00:00:00 2001 From: Luke Peterson Date: Mon, 13 May 2024 16:20:51 +0900 Subject: [PATCH 54/77] Adding `GIT` CMake option, default to ON, for git support --- CMakeLists.txt | 2 ++ c/CMakeLists.txt | 7 +++++++ lib/Cargo.toml | 2 +- python/CMakeLists.txt | 12 +++++++++--- 4 files changed, 19 insertions(+), 4 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 89b229101..ab27037a9 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -5,6 +5,8 @@ project(hyperon) include(ExternalProject) +option(GIT "Adds git features to hyperon library; requires OpenSSL" ON) + set(HYPERONC_INSTALL_PREFIX "${CMAKE_CURRENT_BINARY_DIR}/hyperonc-prefix") message(STATUS "HYPERONC_INSTALL_PREFIX = ${HYPERONC_INSTALL_PREFIX}") diff --git a/c/CMakeLists.txt b/c/CMakeLists.txt index 7b9877a15..08a725ee6 100644 --- a/c/CMakeLists.txt +++ b/c/CMakeLists.txt @@ -2,6 +2,8 @@ cmake_minimum_required(VERSION 3.15) project(hyperonc) +option(GIT "Adds git features to hyperon library; requires OpenSSL" ON) + enable_testing() option(BUILD_SHARED_LIBS "Build shared library" ON) add_custom_target(check COMMAND ${CMAKE_CTEST_COMMAND} --output-on-failure) @@ -34,10 +36,15 @@ set(HYPERONC_STATIC_LIB_PATH ${HYPERONC_TARGET_DIR}/${HYPERONC_STATIC_LIB_FILE}) set(HYPERONC_INCLUDE_DIR ${HYPERONC_TARGET_DIR}/hyperon) separate_arguments(CARGO_ARGS_LIST NATIVE_COMMAND ${CARGO_ARGS}) +if(GIT) + set(GIT_FEATURE --features hyperon/git) +endif() + add_custom_target(build-hyperonc ALL COMMAND cargo build ${CARGO_ARGS_LIST} $<${IS_RELEASE_BUILD}:--release> + ${GIT_FEATURE} --target-dir ${HYPERONC_TARGET_DIR} COMMAND ${CMAKE_COMMAND} -E copy diff --git a/lib/Cargo.toml b/lib/Cargo.toml index 9c56c35f4..a64912a9c 100644 --- a/lib/Cargo.toml +++ b/lib/Cargo.toml @@ -27,7 +27,7 @@ path = "src/lib.rs" crate-type = ["lib"] [features] -default = ["pkg_mgmt", "git"] +default = ["pkg_mgmt"] # Add one of the features below into default list to enable. # See https://doc.rust-lang.org/cargo/reference/features.html#the-features-section minimal = [] # enables minimal MeTTa interpreter diff --git a/python/CMakeLists.txt b/python/CMakeLists.txt index 596e2dd27..aa226133f 100644 --- a/python/CMakeLists.txt +++ b/python/CMakeLists.txt @@ -2,6 +2,8 @@ cmake_minimum_required(VERSION 3.10.2) project(hyperonpy) +option(GIT "Adds git features to hyperon library; requires OpenSSL" ON) + enable_testing() add_custom_target(check COMMAND ${CMAKE_CTEST_COMMAND} --output-on-failure) @@ -46,8 +48,10 @@ execute_process( WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}) include(${CMAKE_CURRENT_BINARY_DIR}/conan_paths.cmake) -find_package(OpenSSL REQUIRED) -find_package(ZLIB REQUIRED) +if(GIT) + find_package(OpenSSL REQUIRED) + find_package(ZLIB REQUIRED) +endif() find_package(pybind11 REQUIRED) find_package(optional-lite REQUIRED) @@ -62,7 +66,9 @@ endif() pybind11_add_module(hyperonpy MODULE ./hyperonpy.cpp) target_link_libraries(hyperonpy PRIVATE "${hyperonc_STATIC_LIBRARY}") -target_link_libraries(hyperonpy PUBLIC ${PLATFORM_LIBRARIES} OpenSSL::SSL OpenSSL::Crypto ZLIB::ZLIB) +if(GIT) + target_link_libraries(hyperonpy PUBLIC ${PLATFORM_LIBRARIES} OpenSSL::SSL OpenSSL::Crypto ZLIB::ZLIB) +endif() set(PYTHONPATH "${CMAKE_CURRENT_SOURCE_DIR}") add_subdirectory(tests) From eabc5182ab4372da904587c8c34a7e30a5543e9b Mon Sep 17 00:00:00 2001 From: Luke Peterson Date: Mon, 13 May 2024 16:31:11 +0900 Subject: [PATCH 55/77] Fixing bug instroduced with rearrangement of environment init for environment.metta file --- lib/src/metta/runner/environment.rs | 57 ++++++++++++++--------------- 1 file changed, 28 insertions(+), 29 deletions(-) diff --git a/lib/src/metta/runner/environment.rs b/lib/src/metta/runner/environment.rs index 90c1f0636..aa330b9f3 100644 --- a/lib/src/metta/runner/environment.rs +++ b/lib/src/metta/runner/environment.rs @@ -319,28 +319,6 @@ impl EnvBuilder { } } - if let Some(config_dir) = &env.config_dir { - let env_metta_path = config_dir.join("environment.metta"); - - //Create the default environment.metta file if it doesn't exist - if !env_metta_path.exists() { - let mut file = fs::OpenOptions::new() - .create(true) - .write(true) - .open(&env_metta_path) - .expect(&format!("Error creating default environment config file at {env_metta_path:?}")); - file.write_all(&DEFAULT_ENVIRONMENT_METTA).unwrap(); - } - - #[cfg(feature = "pkg_mgmt")] - let env_metta_interp_result = interpret_environment_metta(env_metta_path, &mut env, &mut proto_catalogs); - #[cfg(not(feature = "pkg_mgmt"))] - let env_metta_interp_result = interpret_environment_metta(env_metta_path, &mut env, &mut Vec::<()>::new()); - env_metta_interp_result.unwrap_or_else(|e| { - log::warn!("Error occurred interpreting environment.metta file: {e}"); - }); - } - #[cfg(feature = "pkg_mgmt")] { //Append the built-in [FSModuleFormat]s, [SingleFileModuleFmt] and [DirModuleFmt] @@ -362,7 +340,28 @@ impl EnvBuilder { } } } + } + + if let Some(config_dir) = &env.config_dir { + let env_metta_path = config_dir.join("environment.metta"); + + //Create the default environment.metta file if it doesn't exist + if !env_metta_path.exists() { + let mut file = fs::OpenOptions::new() + .create(true) + .write(true) + .open(&env_metta_path) + .expect(&format!("Error creating default environment config file at {env_metta_path:?}")); + file.write_all(&DEFAULT_ENVIRONMENT_METTA).unwrap(); + } + interpret_environment_metta(env_metta_path, &mut env).unwrap_or_else(|e| { + log::warn!("Error occurred interpreting environment.metta file: {e}"); + }); + } + + #[cfg(feature = "pkg_mgmt")] + { //If we have a caches dir to cache modules locally then register remote catalogs if let Some(caches_dir) = &env.caches_dir { @@ -384,7 +383,7 @@ impl EnvBuilder { /// NOTE: I wonder if users will get confused by the fact that the full set of runner /// features aren't available in the environment.metta file. But there is a bootstrapping /// problem trying to using a runner here -fn interpret_environment_metta>(env_metta_path: P, env: &mut Environment, proto_catalogs: &mut Vec) -> Result<(), String> { +fn interpret_environment_metta>(env_metta_path: P, env: &mut Environment) -> Result<(), String> { let file = fs::File::open(env_metta_path).map_err(|e| e.to_string())?; let mut buf_reader = BufReader::new(file); let mut file_contents = String::new(); @@ -406,13 +405,13 @@ fn interpret_environment_metta>(env_metta_path: P, env: &mut Envi match expr.children().get(0) { Some(atom_0) if *atom_0 == sym!("#includePath") => { #[cfg(feature = "pkg_mgmt")] - proto_catalogs.push(include_path_from_cfg_atom(&expr, env)?); + env.catalogs.push(include_path_from_cfg_atom(&expr, env)?); #[cfg(not(feature = "pkg_mgmt"))] log::warn!("#includePath in environment.metta not supported without pkg_mgmt feature"); }, Some(atom_0) if *atom_0 == sym!("#gitCatalog") => { #[cfg(feature = "pkg_mgmt")] - proto_catalogs.push(git_catalog_from_cfg_atom(&expr, env)?); + env.catalogs.push(git_catalog_from_cfg_atom(&expr, env)?); #[cfg(not(feature = "pkg_mgmt"))] log::warn!("#gitCatalog in environment.metta not supported without pkg_mgmt feature"); }, @@ -423,7 +422,7 @@ fn interpret_environment_metta>(env_metta_path: P, env: &mut Envi } #[cfg(feature = "pkg_mgmt")] -fn git_catalog_from_cfg_atom(atom: &ExpressionAtom, env: &Environment) -> Result { +fn git_catalog_from_cfg_atom(atom: &ExpressionAtom, env: &Environment) -> Result, String> { let mut catalog_name = None; let mut catalog_url = None; @@ -462,11 +461,11 @@ fn git_catalog_from_cfg_atom(atom: &ExpressionAtom, env: &Environment) -> Result let mut managed_remote_catalog = LocalCatalog::new(caches_dir, catalog_name).unwrap(); let remote_catalog = GitCatalog::new(caches_dir, env.fs_mod_formats.clone(), catalog_name, catalog_url, refresh_time).unwrap(); managed_remote_catalog.push_upstream_catalog(Box::new(remote_catalog)); - Ok(ProtoCatalog::Other(Box::new(managed_remote_catalog))) + Ok(Box::new(managed_remote_catalog)) } #[cfg(feature = "pkg_mgmt")] -fn include_path_from_cfg_atom(atom: &ExpressionAtom, env: &Environment) -> Result { +fn include_path_from_cfg_atom(atom: &ExpressionAtom, env: &Environment) -> Result, String> { let mut atom_iter = atom.children().iter(); let _ = atom_iter.next(); @@ -489,5 +488,5 @@ fn include_path_from_cfg_atom(atom: &ExpressionAtom, env: &Environment) -> Resul std::fs::create_dir_all(&path).map_err(|e| e.to_string())?; } - Ok(ProtoCatalog::Path(path)) + Ok(Box::new(DirCatalog::new(path, env.fs_mod_formats.clone()))) } From d6695916ff86616eb20d3fbc878458a902bf393a Mon Sep 17 00:00:00 2001 From: Luke Peterson Date: Mon, 13 May 2024 16:37:55 +0900 Subject: [PATCH 56/77] Cleaning up unneeded change for build without pkg_mgmt --- lib/src/metta/runner/environment.rs | 3 --- 1 file changed, 3 deletions(-) diff --git a/lib/src/metta/runner/environment.rs b/lib/src/metta/runner/environment.rs index aa330b9f3..99dc29ffc 100644 --- a/lib/src/metta/runner/environment.rs +++ b/lib/src/metta/runner/environment.rs @@ -124,9 +124,6 @@ enum ProtoCatalog { Other(Box), } -#[cfg(not(feature = "pkg_mgmt"))] -type ProtoCatalog = (); - impl EnvBuilder { /// Returns a new EnvBuilder, to set the parameters for the MeTTa Environment From 22d40a3eb9b6f9d65e963d9e9dbb2ae94b3b7064 Mon Sep 17 00:00:00 2001 From: Luke Peterson Date: Sat, 25 May 2024 16:05:18 +0900 Subject: [PATCH 57/77] Cleaning up unused var and adding comment to install-hyperonc.sh, code review from @vsbogd --- python/install-hyperonc.sh | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/python/install-hyperonc.sh b/python/install-hyperonc.sh index eaed7b614..2a7800b89 100755 --- a/python/install-hyperonc.sh +++ b/python/install-hyperonc.sh @@ -22,8 +22,6 @@ done echo "hyperonc repository URL: $HYPERONC_URL" echo "hyperonc revision: $HYPERONC_REV" -os_type=$(uname) - # This is to build subunit from Conan on CentOS based manylinux images. if test "$AUDITWHEEL_POLICY" = "manylinux2014"; then yum install -y perl-devel openssl-devel zlib-devel @@ -48,6 +46,7 @@ git reset --hard FETCH_HEAD mkdir -p ${HOME}/hyperonc/c/build cd ${HOME}/hyperonc/c/build +# Rust doesn't support building shared libraries under musllinux environment, so musllinux is currently unsupported CMAKE_ARGS="$CMAKE_ARGS -DBUILD_SHARED_LIBS=ON" # Local prefix is used to support MacOSX Apple Silicon GitHub actions environment. CMAKE_ARGS="$CMAKE_ARGS -DCMAKE_INSTALL_PREFIX=${HOME}/.local" From 03c6394c6e722e17fb2420d4dd7256715f80b155 Mon Sep 17 00:00:00 2001 From: Luke Peterson Date: Sat, 25 May 2024 17:11:07 +0900 Subject: [PATCH 58/77] Changing Python Environment interface so create_config is an ordinary boolean --- python/hyperon/runner.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/python/hyperon/runner.py b/python/hyperon/runner.py index 33a37dda7..c2d55d82f 100644 --- a/python/hyperon/runner.py +++ b/python/hyperon/runner.py @@ -218,7 +218,7 @@ def config_dir(): else: return None - def init_common_env(working_dir = None, config_dir = None, create_config = None, disable_config = False, is_test = False, include_paths = []): + def init_common_env(working_dir = None, config_dir = None, create_config = True, disable_config = False, is_test = False, include_paths = []): """Initialize the common environment with the supplied args""" builder = Environment.custom_env(working_dir, config_dir, create_config, disable_config, is_test, include_paths) return hp.env_builder_init_common_env(builder) @@ -227,15 +227,15 @@ def test_env(): """Returns an EnvBuilder object specifying a unit-test environment, that can be used to init a MeTTa runner""" return hp.env_builder_use_test_env() - def custom_env(working_dir = None, config_dir = None, create_config = None, disable_config = False, is_test = False, include_paths = []): + def custom_env(working_dir = None, config_dir = None, create_config = True, disable_config = False, is_test = False, include_paths = []): """Returns an EnvBuilder object that can be used to init a MeTTa runner, if you need multiple environments to coexist in the same process""" builder = hp.env_builder_start() if (working_dir is not None): hp.env_builder_set_working_dir(builder, working_dir) if (config_dir is not None): hp.env_builder_set_config_dir(builder, config_dir) - if (create_config is not None): - hp.env_builder_create_config_dir(builder, create_config) #Pass True for "create if missing" behavior (default), and False to never create a new dir + if (create_config is False): + hp.env_builder_create_config_dir(builder, False) #Pass False to disable "create if missing" behavior if (disable_config): hp.env_builder_disable_config_dir(builder) if (is_test): From da6a627df278c815a433089725c3fc9f2346fa03 Mon Sep 17 00:00:00 2001 From: Luke Peterson Date: Sat, 25 May 2024 20:46:15 +0900 Subject: [PATCH 59/77] Changing arg to catalog management ops to support specifying specific catalogs, as opposed to always applying to "all" Adding '!' char to catalog managment op names to indicate side-effects --- lib/src/metta/runner/stdlib.rs | 101 +++++++++++++++++++++++++-------- 1 file changed, 78 insertions(+), 23 deletions(-) diff --git a/lib/src/metta/runner/stdlib.rs b/lib/src/metta/runner/stdlib.rs index a75b238c8..b540f6337 100644 --- a/lib/src/metta/runner/stdlib.rs +++ b/lib/src/metta/runner/stdlib.rs @@ -1014,6 +1014,11 @@ pub(crate) mod pkg_mgmt_ops { /// Provides access to module in a remote git repo, from within MeTTa code /// Similar to `register-module!`, this op will bypass the catalog search + /// + /// NOTE: Even if Hyperon is build without git support, this operation may still be used to + /// load existing modules from a git cache. That situation may occur if modules were fetched + /// earlier or by another tool that manages the module cache. However this operation requres + /// git support to actually clone or pull from a git repository. #[derive(Clone, Debug)] pub struct GitModuleOp { //TODO-HACK: This is a terrible horrible ugly hack that should be fixed ASAP @@ -1095,19 +1100,24 @@ pub(crate) mod pkg_mgmt_ops { impl Display for CatalogListOp { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - write!(f, "catalog-list") + write!(f, "catalog-list!") } } impl Grounded for CatalogListOp { fn type_(&self) -> Atom { - //TODO-FUTURE, when we decide on a friendly standard for var-args, it would be nice to - // allow an optional arg to list a specific catalog. For now we list all of them //TODO-FUTURE, we may want to return the list as atoms, but now it just prints to stdout - Atom::expr([ARROW_SYMBOL, UNIT_TYPE()]) + Atom::expr([ARROW_SYMBOL, ATOM_TYPE_SYMBOL, UNIT_TYPE()]) } - fn execute(&self, _args: &[Atom]) -> Result, ExecError> { + fn execute(&self, args: &[Atom]) -> Result, ExecError> { + let arg_error = "catalog-list! expects a catalog name, or \"all\" to list all available"; + let cat_name_arg_atom = args.get(0).ok_or_else(|| ExecError::from(arg_error))?; + let cat_name = if let Atom::Symbol(cat_name) = cat_name_arg_atom { + cat_name.name() + } else { + return Err(ExecError::from(arg_error)); + }; fn list_catalog(cat: &dyn crate::metta::runner::ModuleCatalog) { if let Some(cat_iter) = cat.list() { @@ -1118,13 +1128,23 @@ pub(crate) mod pkg_mgmt_ops { } } - if let Some(explicit_git_catalog) = &self.metta.environment().explicit_git_mods { - list_catalog(explicit_git_catalog); + let mut found_one = false; + if cat_name == "all" || cat_name == "git-modules" { + if let Some(explicit_git_catalog) = &self.metta.environment().explicit_git_mods { + list_catalog(explicit_git_catalog); + found_one = true; + } } for cat in self.metta.environment().catalogs() { - list_catalog(cat); + if cat_name == "all" || cat_name == cat.display_name() { + list_catalog(cat); + found_one = true; + } } + if !found_one { + return Err(ExecError::from(format!("no catalog(s) identified by \"{cat_name}\""))); + } unit_result() } @@ -1151,7 +1171,7 @@ pub(crate) mod pkg_mgmt_ops { impl Display for CatalogUpdateOp { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - write!(f, "catalog-update") + write!(f, "catalog-update!") } } @@ -1160,22 +1180,39 @@ pub(crate) mod pkg_mgmt_ops { //TODO-FUTURE, when we decide on a friendly standard for var-args, it would be nice to // allow an optional arg to list a specific catalog. For now we list all of them //TODO-FUTURE, we may want to return the list as atoms, but now it just prints to stdout - Atom::expr([ARROW_SYMBOL, UNIT_TYPE()]) + Atom::expr([ARROW_SYMBOL, ATOM_TYPE_SYMBOL, UNIT_TYPE()]) } - fn execute(&self, _args: &[Atom]) -> Result, ExecError> { + fn execute(&self, args: &[Atom]) -> Result, ExecError> { + let arg_error = "catalog-update! expects a catalog name, or \"all\" to update all"; + let cat_name_arg_atom = args.get(0).ok_or_else(|| ExecError::from(arg_error))?; + let cat_name = if let Atom::Symbol(cat_name) = cat_name_arg_atom { + cat_name.name() + } else { + return Err(ExecError::from(arg_error)); + }; - if let Some(explicit_git_catalog) = &self.metta.environment().explicit_git_mods { - explicit_git_catalog.fetch_newest_for_all(UpdateMode::FetchLatest)?; + let mut found_one = false; + if cat_name == "all" || cat_name == "git-modules" { + if let Some(explicit_git_catalog) = &self.metta.environment().explicit_git_mods { + explicit_git_catalog.fetch_newest_for_all(UpdateMode::FetchLatest)?; + found_one = true; + } } for cat in self.metta.environment().catalogs() { match cat.as_managed() { - Some(cat) => cat.fetch_newest_for_all(UpdateMode::FetchLatest)?, + Some(cat) => if cat_name == "all" || cat_name == cat.display_name() { + cat.fetch_newest_for_all(UpdateMode::FetchLatest)?; + found_one = true; + }, None => {} } } + if !found_one { + return Err(ExecError::from(format!("no catalog(s) identified by \"{cat_name}\""))); + } unit_result() } @@ -1202,7 +1239,7 @@ pub(crate) mod pkg_mgmt_ops { impl Display for CatalogClearOp { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - write!(f, "catalog-clear") + write!(f, "catalog-clear!") } } @@ -1211,18 +1248,36 @@ pub(crate) mod pkg_mgmt_ops { //TODO-FUTURE, when we decide on a friendly standard for var-args, it would be nice to // allow an optional arg to list a specific catalog. For now we list all of them //TODO-FUTURE, we may want to return the list as atoms, but now it just prints to stdout - Atom::expr([ARROW_SYMBOL, UNIT_TYPE()]) + Atom::expr([ARROW_SYMBOL, ATOM_TYPE_SYMBOL, UNIT_TYPE()]) } - fn execute(&self, _args: &[Atom]) -> Result, ExecError> { + fn execute(&self, args: &[Atom]) -> Result, ExecError> { + let arg_error = "catalog-clear! expects a catalog name, or \"all\" to clear all"; + let cat_name_arg_atom = args.get(0).ok_or_else(|| ExecError::from(arg_error))?; + let cat_name = if let Atom::Symbol(cat_name) = cat_name_arg_atom { + cat_name.name() + } else { + return Err(ExecError::from(arg_error)); + }; - if let Some(explicit_git_catalog) = &self.metta.environment().explicit_git_mods { - explicit_git_catalog.clear_all()?; + let mut found_one = false; + if cat_name == "all" || cat_name == "git-modules" { + if let Some(explicit_git_catalog) = &self.metta.environment().explicit_git_mods { + explicit_git_catalog.clear_all()?; + found_one = true; + } } + for cat in self.metta.environment().catalogs().filter_map(|cat| cat.as_managed()) { - cat.clear_all()?; + if cat_name == "all" || cat_name == cat.display_name() { + cat.clear_all()?; + found_one = true; + } } + if !found_one { + return Err(ExecError::from(format!("no catalog(s) identified by \"{cat_name}\""))); + } unit_result() } @@ -1237,11 +1292,11 @@ pub(crate) mod pkg_mgmt_ops { let git_module_op = Atom::gnd(GitModuleOp::new(metta.clone())); tref.register_token(regex(r"git-module!"), move |_| { git_module_op.clone() }); let catalog_list_op = Atom::gnd(CatalogListOp::new(metta.clone())); - tref.register_token(regex(r"catalog-list"), move |_| { catalog_list_op.clone() }); + tref.register_token(regex(r"catalog-list!"), move |_| { catalog_list_op.clone() }); let catalog_update_op = Atom::gnd(CatalogUpdateOp::new(metta.clone())); - tref.register_token(regex(r"catalog-update"), move |_| { catalog_update_op.clone() }); + tref.register_token(regex(r"catalog-update!"), move |_| { catalog_update_op.clone() }); let catalog_clear_op = Atom::gnd(CatalogClearOp::new(metta.clone())); - tref.register_token(regex(r"catalog-clear"), move |_| { catalog_clear_op.clone() }); + tref.register_token(regex(r"catalog-clear!"), move |_| { catalog_clear_op.clone() }); } } From 1a4da7615a7444be0da24c33fe7a777441b19a7e Mon Sep 17 00:00:00 2001 From: Luke Peterson Date: Sat, 25 May 2024 21:08:23 +0900 Subject: [PATCH 60/77] Moving Module tests behind test cfg gate to squish warnings deleting obsolete comments --- lib/src/metta/runner/modules/mod.rs | 177 ++++++++++++++-------------- lib/src/metta/runner/stdlib.rs | 4 - 2 files changed, 91 insertions(+), 90 deletions(-) diff --git a/lib/src/metta/runner/modules/mod.rs b/lib/src/metta/runner/modules/mod.rs index 768d7333a..1b2a1e141 100644 --- a/lib/src/metta/runner/modules/mod.rs +++ b/lib/src/metta/runner/modules/mod.rs @@ -684,115 +684,120 @@ pub enum ResourceKey<'a> { // TESTS //-=-+-=-=-+-=-=-+-=-=-+-=-=-+-=-=-+-=-=-+-=-=-+-=-=-+-=-=-+-=-=-+-=-=-+-=-=-+-=-=-+-=-=-+-=-=-+- -#[derive(Debug)] -struct OuterLoader; +#[cfg(test)] +mod test { + use super::*; -impl ModuleLoader for OuterLoader { - fn load(&self, context: &mut RunContext) -> Result<(), String> { - let space = DynSpace::new(GroundingSpace::new()); - context.init_self_module(space, None); + #[derive(Debug)] + struct OuterLoader; - let parser = SExprParser::new("outer-module-test-atom"); - context.push_parser(Box::new(parser)); + impl ModuleLoader for OuterLoader { + fn load(&self, context: &mut RunContext) -> Result<(), String> { + let space = DynSpace::new(GroundingSpace::new()); + context.init_self_module(space, None); - Ok(()) + let parser = SExprParser::new("outer-module-test-atom"); + context.push_parser(Box::new(parser)); + + Ok(()) + } } -} -#[derive(Debug)] -struct InnerLoader; + #[derive(Debug)] + struct InnerLoader; -impl ModuleLoader for InnerLoader { - fn load(&self, context: &mut RunContext) -> Result<(), String> { - let space = DynSpace::new(GroundingSpace::new()); - context.init_self_module(space, None); + impl ModuleLoader for InnerLoader { + fn load(&self, context: &mut RunContext) -> Result<(), String> { + let space = DynSpace::new(GroundingSpace::new()); + context.init_self_module(space, None); - let parser = SExprParser::new("inner-module-test-atom"); - context.push_parser(Box::new(parser)); + let parser = SExprParser::new("inner-module-test-atom"); + context.push_parser(Box::new(parser)); - Ok(()) + Ok(()) + } } -} -/// This tests loading a module as a sub-module of another loaded module using a hierarchical -/// namespace path -#[test] -fn hierarchical_module_import_test() { - let runner = Metta::new(Some(EnvBuilder::test_env())); - - //Make sure we get a reasonable error, if we try to load a sub-module to a module that doesn't exist - let result = runner.load_module_direct(Box::new(InnerLoader), "outer:inner"); - assert!(result.is_err()); - - //Make sure we can load sub-modules sucessfully - let _outer_mod_id = runner.load_module_direct(Box::new(OuterLoader), "outer").unwrap(); - let _inner_mod_id = runner.load_module_direct(Box::new(InnerLoader), "outer:inner").unwrap(); - - //Make sure we load the outer module sucessfully and can match the outer module's atom, but not - // the inner module's - let result = runner.run(SExprParser::new("!(import! &self outer)")); - assert_eq!(result, Ok(vec![vec![expr!()]])); - let result = runner.run(SExprParser::new("!(match &self outer-module-test-atom found!)")); - assert_eq!(result, Ok(vec![vec![sym!("found!")]])); - let result = runner.run(SExprParser::new("!(match &self inner-module-test-atom found!)")); - assert_eq!(result, Ok(vec![vec![]])); - - //Now import the inner module by relative module namespace, and check to make sure we can match - // its atom - let result = runner.run(SExprParser::new("!(import! &self outer:inner)")); - assert_eq!(result, Ok(vec![vec![expr!()]])); - let result = runner.run(SExprParser::new("!(match &self inner-module-test-atom found!)")); - assert_eq!(result, Ok(vec![vec![sym!("found!")]])); -} + /// This tests loading a module as a sub-module of another loaded module using a hierarchical + /// namespace path + #[test] + fn hierarchical_module_import_test() { + let runner = Metta::new(Some(EnvBuilder::test_env())); -#[derive(Debug)] -struct RelativeOuterLoader; + //Make sure we get a reasonable error, if we try to load a sub-module to a module that doesn't exist + let result = runner.load_module_direct(Box::new(InnerLoader), "outer:inner"); + assert!(result.is_err()); + + //Make sure we can load sub-modules sucessfully + let _outer_mod_id = runner.load_module_direct(Box::new(OuterLoader), "outer").unwrap(); + let _inner_mod_id = runner.load_module_direct(Box::new(InnerLoader), "outer:inner").unwrap(); -impl ModuleLoader for RelativeOuterLoader { - fn load(&self, context: &mut RunContext) -> Result<(), String> { - let space = DynSpace::new(GroundingSpace::new()); - context.init_self_module(space, None); + //Make sure we load the outer module sucessfully and can match the outer module's atom, but not + // the inner module's + let result = runner.run(SExprParser::new("!(import! &self outer)")); + assert_eq!(result, Ok(vec![vec![expr!()]])); + let result = runner.run(SExprParser::new("!(match &self outer-module-test-atom found!)")); + assert_eq!(result, Ok(vec![vec![sym!("found!")]])); + let result = runner.run(SExprParser::new("!(match &self inner-module-test-atom found!)")); + assert_eq!(result, Ok(vec![vec![]])); - let _inner_mod_id = context.load_module_direct(Box::new(InnerLoader), "self:inner").unwrap(); + //Now import the inner module by relative module namespace, and check to make sure we can match + // its atom + let result = runner.run(SExprParser::new("!(import! &self outer:inner)")); + assert_eq!(result, Ok(vec![vec![expr!()]])); + let result = runner.run(SExprParser::new("!(match &self inner-module-test-atom found!)")); + assert_eq!(result, Ok(vec![vec![sym!("found!")]])); + } - let parser = SExprParser::new("outer-module-test-atom"); - context.push_parser(Box::new(parser)); + #[derive(Debug)] + struct RelativeOuterLoader; - //Test to see if I can resolve the module we just loaded, - assert!(context.get_module_by_name("self:inner").is_ok()); + impl ModuleLoader for RelativeOuterLoader { + fn load(&self, context: &mut RunContext) -> Result<(), String> { + let space = DynSpace::new(GroundingSpace::new()); + context.init_self_module(space, None); - Ok(()) + let _inner_mod_id = context.load_module_direct(Box::new(InnerLoader), "self:inner").unwrap(); + + let parser = SExprParser::new("outer-module-test-atom"); + context.push_parser(Box::new(parser)); + + //Test to see if I can resolve the module we just loaded, + assert!(context.get_module_by_name("self:inner").is_ok()); + + Ok(()) + } } -} -/// This tests loading a sub-module from another module's runner, using a relative namespace path -#[test] -fn relative_submodule_import_test() { - let runner = Metta::new(Some(EnvBuilder::test_env())); + /// This tests loading a sub-module from another module's runner, using a relative namespace path + #[test] + fn relative_submodule_import_test() { + let runner = Metta::new(Some(EnvBuilder::test_env())); - //Load the "outer" module, which will load the inner module as part of its loader - let _outer_mod_id = runner.load_module_direct(Box::new(RelativeOuterLoader), "outer").unwrap(); + //Load the "outer" module, which will load the inner module as part of its loader + let _outer_mod_id = runner.load_module_direct(Box::new(RelativeOuterLoader), "outer").unwrap(); - // runner.display_loaded_modules(); + // runner.display_loaded_modules(); - //Make sure we didn't accidentally load "inner" at the top level - assert!(runner.get_module_by_name("inner").is_err()); + //Make sure we didn't accidentally load "inner" at the top level + assert!(runner.get_module_by_name("inner").is_err()); - //Confirm we didn't end up with a module called "self" - assert!(runner.get_module_by_name("self:inner").is_err()); - assert!(runner.get_module_by_name("self").is_err()); + //Confirm we didn't end up with a module called "self" + assert!(runner.get_module_by_name("self:inner").is_err()); + assert!(runner.get_module_by_name("self").is_err()); - //Now make sure we can actually resolve the loaded sub-module - runner.get_module_by_name("outer:inner").unwrap(); + //Now make sure we can actually resolve the loaded sub-module + runner.get_module_by_name("outer:inner").unwrap(); - //LP-TODO-NEXT, test that I can add a second inner from the runner, by adding "top:outer:inner2", - // and then that I can import it directly into "outer" from within the runner's context using the "self:inner2" mod path + //LP-TODO-NEXT, test that I can add a second inner from the runner, by adding "top:outer:inner2", + // and then that I can import it directly into "outer" from within the runner's context using the "self:inner2" mod path -} + } -//LP-TODO-NEXT, Make a test for an inner-loader that throws an error, blocking the outer-loader from loading sucessfully, -// and make sure neither module is loaded into the named index -// -//Also test the case where the inner loader is sucessul, but then the outer loader throws an error. Also make sure neither -// module is loaded into the namespace -// + //LP-TODO-NEXT, Make a test for an inner-loader that throws an error, blocking the outer-loader from loading sucessfully, + // and make sure neither module is loaded into the named index + // + //Also test the case where the inner loader is sucessul, but then the outer loader throws an error. Also make sure neither + // module is loaded into the namespace + // +} \ No newline at end of file diff --git a/lib/src/metta/runner/stdlib.rs b/lib/src/metta/runner/stdlib.rs index b540f6337..2d7d6c6b6 100644 --- a/lib/src/metta/runner/stdlib.rs +++ b/lib/src/metta/runner/stdlib.rs @@ -1177,8 +1177,6 @@ pub(crate) mod pkg_mgmt_ops { impl Grounded for CatalogUpdateOp { fn type_(&self) -> Atom { - //TODO-FUTURE, when we decide on a friendly standard for var-args, it would be nice to - // allow an optional arg to list a specific catalog. For now we list all of them //TODO-FUTURE, we may want to return the list as atoms, but now it just prints to stdout Atom::expr([ARROW_SYMBOL, ATOM_TYPE_SYMBOL, UNIT_TYPE()]) } @@ -1245,8 +1243,6 @@ pub(crate) mod pkg_mgmt_ops { impl Grounded for CatalogClearOp { fn type_(&self) -> Atom { - //TODO-FUTURE, when we decide on a friendly standard for var-args, it would be nice to - // allow an optional arg to list a specific catalog. For now we list all of them //TODO-FUTURE, we may want to return the list as atoms, but now it just prints to stdout Atom::expr([ARROW_SYMBOL, ATOM_TYPE_SYMBOL, UNIT_TYPE()]) } From 86cf8209db30324d5df4e714acf32aaff174fa33 Mon Sep 17 00:00:00 2001 From: luketpeterson <36806965+luketpeterson@users.noreply.github.com> Date: Sat, 25 May 2024 21:26:50 +0900 Subject: [PATCH 61/77] Update python/CMakeLists.txt Co-authored-by: Vitaly Bogdanov --- python/CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/CMakeLists.txt b/python/CMakeLists.txt index aa226133f..328ceadd1 100644 --- a/python/CMakeLists.txt +++ b/python/CMakeLists.txt @@ -2,7 +2,7 @@ cmake_minimum_required(VERSION 3.10.2) project(hyperonpy) -option(GIT "Adds git features to hyperon library; requires OpenSSL" ON) +option(GIT "Adds git features to hyperon library; requires OpenSSL and Zlib" ON) enable_testing() add_custom_target(check COMMAND ${CMAKE_CTEST_COMMAND} --output-on-failure) From 8182306bcaa6d2662b9f932ea2d431dfb5de4804 Mon Sep 17 00:00:00 2001 From: luketpeterson <36806965+luketpeterson@users.noreply.github.com> Date: Sat, 25 May 2024 21:27:14 +0900 Subject: [PATCH 62/77] Update c/CMakeLists.txt Co-authored-by: Vitaly Bogdanov --- c/CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/c/CMakeLists.txt b/c/CMakeLists.txt index 08a725ee6..4c844fa4c 100644 --- a/c/CMakeLists.txt +++ b/c/CMakeLists.txt @@ -2,7 +2,7 @@ cmake_minimum_required(VERSION 3.15) project(hyperonc) -option(GIT "Adds git features to hyperon library; requires OpenSSL" ON) +option(GIT "Adds git features to hyperon library; requires OpenSSL and Zlib" ON) enable_testing() option(BUILD_SHARED_LIBS "Build shared library" ON) From a4ad66f1413f6ad8532f763caa58a6fc8cc37b3f Mon Sep 17 00:00:00 2001 From: Luke Peterson Date: Sun, 26 May 2024 10:55:15 +0900 Subject: [PATCH 63/77] Removing unneeded CMake explicit directory, left-over from an earlier experiment --- c/tests/CMakeLists.txt | 5 ----- 1 file changed, 5 deletions(-) diff --git a/c/tests/CMakeLists.txt b/c/tests/CMakeLists.txt index 2fe11ebda..b3bcff3e8 100644 --- a/c/tests/CMakeLists.txt +++ b/c/tests/CMakeLists.txt @@ -6,26 +6,21 @@ set(TEST_SOURCES ) add_executable(check_atom check_atom.c ${TEST_SOURCES}) -include_directories(${HYPERONC_TARGET_DIR}) target_link_libraries(check_atom hyperonc-shared CONAN_PKG::libcheck) add_test(NAME check_atom COMMAND check_atom) add_executable(check_space check_space.c ${TEST_SOURCES}) -include_directories(${HYPERONC_TARGET_DIR}) target_link_libraries(check_space hyperonc-shared CONAN_PKG::libcheck) add_test(NAME check_space COMMAND check_space) add_executable(check_sexpr_parser check_sexpr_parser.c ${TEST_SOURCES}) -include_directories(${HYPERONC_TARGET_DIR}) target_link_libraries(check_sexpr_parser hyperonc-shared CONAN_PKG::libcheck) add_test(NAME check_sexpr_parser COMMAND check_sexpr_parser) add_executable(check_types check_types.c ${TEST_SOURCES}) -include_directories(${HYPERONC_TARGET_DIR}) target_link_libraries(check_types hyperonc-shared CONAN_PKG::libcheck) add_test(NAME check_types COMMAND check_types) add_executable(check_runner check_runner.c ${TEST_SOURCES}) -include_directories(${HYPERONC_TARGET_DIR}) target_link_libraries(check_runner hyperonc-shared CONAN_PKG::libcheck) add_test(NAME check_runner COMMAND check_runner) From bb15e1cbf76093dc4d56e86ee66d18f158252e23 Mon Sep 17 00:00:00 2001 From: luketpeterson <36806965+luketpeterson@users.noreply.github.com> Date: Sun, 26 May 2024 11:01:21 +0900 Subject: [PATCH 64/77] Update CMakeLists.txt Co-authored-by: Vitaly Bogdanov --- CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index ab27037a9..c999b0c22 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -5,7 +5,7 @@ project(hyperon) include(ExternalProject) -option(GIT "Adds git features to hyperon library; requires OpenSSL" ON) +option(GIT "Adds git features to hyperon library; requires OpenSSL and Zlib" ON) set(HYPERONC_INSTALL_PREFIX "${CMAKE_CURRENT_BINARY_DIR}/hyperonc-prefix") message(STATUS "HYPERONC_INSTALL_PREFIX = ${HYPERONC_INSTALL_PREFIX}") From 62c5b6b8bf12b7b8ba919d4d0fe8eebc29b1db95 Mon Sep 17 00:00:00 2001 From: Luke Peterson Date: Sun, 26 May 2024 11:13:37 +0900 Subject: [PATCH 65/77] Changing default git catalog to https://github.com/trueagi-io/metta-catalog --- lib/src/metta/runner/environment.default.metta | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/lib/src/metta/runner/environment.default.metta b/lib/src/metta/runner/environment.default.metta index 9b63f12e3..a7d1d9c0c 100644 --- a/lib/src/metta/runner/environment.default.metta +++ b/lib/src/metta/runner/environment.default.metta @@ -11,8 +11,7 @@ ) (#gitCatalog - ; TODO, Default catalog should be moved to trueagi github account - (#name "luketpeterson-catalog") - (#url "https://github.com/luketpeterson/metta-mod-catalog.git") + (#name "trueagi-catalog") + (#url "https://github.com/trueagi-io/metta-catalog.git") (#refreshTime 259200) ; 3 days = 3 days * 24 hrs * 60 minutes * 60 seconds -) \ No newline at end of file +) From 34bf21a9e6c6ea7b830000f5dc38aba55fec3737 Mon Sep 17 00:00:00 2001 From: Luke Peterson Date: Sun, 26 May 2024 11:45:08 +0900 Subject: [PATCH 66/77] Clarifying behavior of strict flag in comment --- lib/src/metta/runner/pkg_mgmt/catalog.rs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/lib/src/metta/runner/pkg_mgmt/catalog.rs b/lib/src/metta/runner/pkg_mgmt/catalog.rs index cadd5e952..2a1fb2b52 100644 --- a/lib/src/metta/runner/pkg_mgmt/catalog.rs +++ b/lib/src/metta/runner/pkg_mgmt/catalog.rs @@ -318,7 +318,8 @@ impl PkgInfo { //If `version_req` is specified in the dep entry, then use it to constrain the catalog search version_req = entry.version_req.as_ref(); } else { - //If the PkgInfo doesn't have an entry for the module, it's an error if the PkgInfo is flagged as "strict" + //If the PkgInfo doesn't have an entry for the module and the PkgInfo is flagged as "strict" + // then we will not attempt to resolve the module any further, and the resolution will fail. if self.strict { return Ok(None); } From 23a0d30e0fb762fc5589c63931dcc0edf93c3070 Mon Sep 17 00:00:00 2001 From: Luke Peterson Date: Mon, 27 May 2024 12:02:20 +0900 Subject: [PATCH 67/77] Fixing CMake files so -DGIT=OFF is correctly propagated and ensures build of both hyperonpy and hyperonc is sucessful on systems without OpenSSL --- CMakeLists.txt | 2 ++ c/CMakeLists.txt | 10 +++++++--- 2 files changed, 9 insertions(+), 3 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index c999b0c22..194ba0d5d 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -19,6 +19,7 @@ ExternalProject_Add( PREFIX "${HYPERONC_INSTALL_PREFIX}" SOURCE_DIR "${CMAKE_CURRENT_SOURCE_DIR}/c" CMAKE_ARGS + -DGIT=${GIT} "-DCMAKE_INSTALL_PREFIX=${HYPERONC_INSTALL_PREFIX}" "-DCMAKE_BUILD_TYPE=${BUILD_CONFIGURATION}" "-DCARGO_ARGS=${CARGO_ARGS}" @@ -34,6 +35,7 @@ ExternalProject_Add( SOURCE_DIR "${CMAKE_CURRENT_SOURCE_DIR}/python" DEPENDS hyperonc CMAKE_ARGS + -DGIT=${GIT} "-DHYPERONC_INSTALL_PREFIX=${HYPERONC_INSTALL_PREFIX}" "-DCMAKE_LIBRARY_OUTPUT_DIRECTORY=${CMAKE_CURRENT_SOURCE_DIR}/python" "-DCMAKE_BUILD_TYPE=${BUILD_CONFIGURATION}" diff --git a/c/CMakeLists.txt b/c/CMakeLists.txt index 4c844fa4c..ad26930b0 100644 --- a/c/CMakeLists.txt +++ b/c/CMakeLists.txt @@ -78,8 +78,10 @@ set(STATIC_LIBRARY_INSTALL_PATH "${BINARY_INSTALL_PATH}/${HYPERONC_STATIC_LIB_FI include(CMakePackageConfigHelpers) if(BUILD_SHARED_LIBS) - find_package(OpenSSL REQUIRED) - find_package(ZLIB REQUIRED) + if(GIT) + find_package(OpenSSL REQUIRED) + find_package(ZLIB REQUIRED) + endif(GIT) set(HYPERONC_SHARED_LIB_FILE ${CMAKE_SHARED_LIBRARY_PREFIX}hyperonc${CMAKE_SHARED_LIBRARY_SUFFIX}) set(HYPERONC_SHARED_LIB_PATH ${HYPERONC_TARGET_DIR}/${HYPERONC_SHARED_LIB_FILE}) @@ -100,7 +102,9 @@ if(BUILD_SHARED_LIBS) # required to import hyperonc-shared by name not by relative path IMPORTED_NO_SONAME TRUE ) - target_link_libraries(hyperonc-shared INTERFACE OpenSSL::SSL OpenSSL::Crypto ZLIB::ZLIB) + if(GIT) + target_link_libraries(hyperonc-shared INTERFACE OpenSSL::SSL OpenSSL::Crypto ZLIB::ZLIB) + endif(GIT) add_dependencies(hyperonc-shared copy-hyperonc-shared build-hyperonc) install(FILES "${HYPERONC_SHARED_LIB_PATH}" From 2d827ec2ed3697acf020d03741628f8a5945579f Mon Sep 17 00:00:00 2001 From: Luke Peterson Date: Mon, 27 May 2024 12:28:08 +0900 Subject: [PATCH 68/77] Changing MettaMod's pkg_info API to return an option, matching the loader API. This meant moving resolve_module from a pkg_info module to a function, but that's probably the right thing to do anyway --- lib/src/metta/runner/mod.rs | 4 +- lib/src/metta/runner/modules/mod.rs | 15 +-- lib/src/metta/runner/pkg_mgmt/catalog.rs | 135 ++++++++++++----------- 3 files changed, 76 insertions(+), 78 deletions(-) diff --git a/lib/src/metta/runner/mod.rs b/lib/src/metta/runner/mod.rs index e048153ea..89f212ddb 100644 --- a/lib/src/metta/runner/mod.rs +++ b/lib/src/metta/runner/mod.rs @@ -879,7 +879,7 @@ impl<'input> RunContext<'_, '_, 'input> { #[cfg(feature = "pkg_mgmt")] fn load_module_internal(&mut self, mod_path: &str, parent_mod_id: ModId) -> Result { self.in_mod_context(parent_mod_id, |context| { - match context.module().pkg_info().resolve_module(context, mod_path)? { + match resolve_module(context.module().pkg_info(), context, mod_path)? { Some((loader, descriptor)) => { context.get_or_init_module_with_descriptor(mod_path, descriptor, loader) }, @@ -907,7 +907,7 @@ impl<'input> RunContext<'_, '_, 'input> { let parent_mod_id = self.load_module_parents(mod_name)?; let normalized_mod_path = self.normalize_module_name(mod_name)?; self.in_mod_context(parent_mod_id, |context| { - match context.module().pkg_info().resolve_module(context, &normalized_mod_path)? { + match resolve_module(context.module().pkg_info(), context, &normalized_mod_path)? { Some((loader, _descriptor)) => { loader.get_resource(res_key) }, diff --git a/lib/src/metta/runner/modules/mod.rs b/lib/src/metta/runner/modules/mod.rs index 1b2a1e141..97fa6e58f 100644 --- a/lib/src/metta/runner/modules/mod.rs +++ b/lib/src/metta/runner/modules/mod.rs @@ -54,9 +54,6 @@ impl ModId { } } -#[cfg(feature = "pkg_mgmt")] -pub(crate) static DEFAULT_PKG_INFO: OnceLock = OnceLock::new(); - /// Contains state associated with a loaded MeTTa module #[derive(Debug)] pub struct MettaMod { @@ -302,16 +299,10 @@ impl MettaMod { mod_name_from_path(&self.mod_path) } + /// Returns a reference to the module's [PkgInfo], if it has one #[cfg(feature = "pkg_mgmt")] - pub fn pkg_info(&self) -> &PkgInfo { - let default_pkg_info = DEFAULT_PKG_INFO.get_or_init(|| PkgInfo::default()); - match &self.loader { - Some(loader) => match loader.pkg_info() { - Some(pkg_info) => pkg_info, - _ => default_pkg_info - }, - None => default_pkg_info - } + pub fn pkg_info(&self) -> Option<&PkgInfo> { + self.loader.as_ref().and_then(|loader| loader.pkg_info()) } pub fn space(&self) -> &DynSpace { diff --git a/lib/src/metta/runner/pkg_mgmt/catalog.rs b/lib/src/metta/runner/pkg_mgmt/catalog.rs index 2a1fb2b52..d4b47f77a 100644 --- a/lib/src/metta/runner/pkg_mgmt/catalog.rs +++ b/lib/src/metta/runner/pkg_mgmt/catalog.rs @@ -251,11 +251,15 @@ pub(crate) fn find_newest_module(mods_iter: impl Iterator #[derive(Clone, Debug, Default, Deserialize)] pub struct PkgInfo { - /// The public name of the module. Should be composed of alpha-numeric characters with '-' and '_' - /// characters allowed. Must not contain any other punctuation. + /// The public name of the module + /// + /// Should be composed of alpha-numeric characters with '-' and '_' characters allowed. Must not + /// contain any other punctuation pub name: Option, - // The version of this module + /// The version of this module + /// + /// A `None` or missing version is considered inferior to all other versions #[serde(default)] pub version: Option, @@ -264,7 +268,7 @@ pub struct PkgInfo { #[serde(default)] pub strict: bool, - /// Entries mapping module names to requirements for each dependency sub-module + /// Requirements for each dependency sub-module /// /// A Duplicate entry for a given sub-module in the deps list is an error. #[serde(default)] @@ -290,81 +294,84 @@ pub struct DepEntry { } impl PkgInfo { - /// Resolves which module to load from which available location or catalog, and returns the [ModuleLoader] to - /// load that module - pub fn resolve_module(&self, context: &RunContext, name_path: &str) -> Result, ModuleDescriptor)>, String> { - let mod_name = mod_name_from_path(name_path); - - //Make sure the name is a legal module name - if !module_name_is_legal(mod_name) { - return Err(format!("Illegal module name: {mod_name}")); + /// Returns the version of the package + pub fn version(&self) -> Option<&semver::Version> { + self.version.as_ref() + } + /// Returns the version of the package as a [semver compliant](https://semver.org) string of bytes + pub fn version_bytes(&self) -> Result, String> { + match self.version() { + Some(ver) => Ok(format!("{ver}").into_bytes()), + None => Err("no version available".to_string()) } + } +} - //See if we have a pkg_info dep entry for the module - let mut version_req = None; - if let Some(entry) = self.deps.get(mod_name) { +/// Resolves which module to load from which available location or catalog, and returns the [ModuleLoader] to +/// load that module +pub(crate) fn resolve_module(pkg_info: Option<&PkgInfo>, context: &RunContext, name_path: &str) -> Result, ModuleDescriptor)>, String> { + let mod_name = mod_name_from_path(name_path); - //If path is explicitly specified in the dep entry, then we must load the module at the - // specified path, and cannot search anywhere else - if let Some(path) = &entry.fs_path { - return loader_for_module_at_path(context.metta.environment().fs_mod_formats(), path, Some(mod_name), context.module().resource_dir()); - } + //Make sure the name is a legal module name + if !module_name_is_legal(mod_name) { + return Err(format!("Illegal module name: {mod_name}")); + } - //Get the module if it's specified with git keys - if let Some(pair) = entry.git_location.get_loader_in_explicit_catalog(mod_name, UpdateMode::FetchIfMissing, context.metta.environment())? { - return Ok(Some(pair)); - } + //See if we have a pkg_info dep entry for the module + let mut version_req = None; + if let Some(entry) = pkg_info.as_ref().and_then(|pkg_info| pkg_info.deps.get(mod_name)) { - //If `version_req` is specified in the dep entry, then use it to constrain the catalog search - version_req = entry.version_req.as_ref(); - } else { - //If the PkgInfo doesn't have an entry for the module and the PkgInfo is flagged as "strict" - // then we will not attempt to resolve the module any further, and the resolution will fail. - if self.strict { - return Ok(None); - } + //If path is explicitly specified in the dep entry, then we must load the module at the + // specified path, and cannot search anywhere else + if let Some(path) = &entry.fs_path { + return loader_for_module_at_path(context.metta.environment().fs_mod_formats(), path, Some(mod_name), context.module().resource_dir()); } - //Search the module's resource dir before searching the environment's catalogs - // This allows a module to import another module inside its directory or as a peer of itself for - // single-file modules, without including an explicit PkgInfo dep entry. On the other hand, If we - // want to require module authors to include a dep entry to be explicit about their dependencies, we - // can remove this catalog - let resource_dir_catalog; - let mut local_catalogs = vec![]; - if let Some(mod_resource_dir) = context.module().resource_dir() { - if context.metta.environment().working_dir() != Some(mod_resource_dir) { - resource_dir_catalog = DirCatalog::new(PathBuf::from(mod_resource_dir), context.metta().environment().fs_mod_formats.clone()); - local_catalogs.push(&resource_dir_catalog as &dyn ModuleCatalog); - } + //Get the module if it's specified with git keys + if let Some(pair) = entry.git_location.get_loader_in_explicit_catalog(mod_name, UpdateMode::FetchIfMissing, context.metta.environment())? { + return Ok(Some(pair)); } - //Search the catalogs, starting with the resource dir, and continuing to the runner's Environment - for catalog in local_catalogs.into_iter().chain(context.metta.environment().catalogs()) { - log::trace!("Looking for module: \"{mod_name}\" inside {catalog:?}"); - match catalog.lookup_newest_with_version_req(mod_name, version_req) { - Some(descriptor) => { - log::info!("Found module: \"{mod_name}\" inside {:?}", catalog.display_name()); - log::info!("Preparing to load module: \'{}\' as \'{}\'", descriptor.name, name_path); - return Ok(Some((catalog.get_loader(&descriptor)?, descriptor))) - }, - None => {} + //If `version_req` is specified in the dep entry, then use it to constrain the catalog search + version_req = entry.version_req.as_ref(); + } else { + //If the PkgInfo doesn't have an entry for the module and the PkgInfo is flagged as "strict" + // then we will not attempt to resolve the module any further, and the resolution will fail. + if let Some(pkg_info) = &pkg_info { + if pkg_info.strict { + return Ok(None); } } - - Ok(None) } - /// Returns the version of the package - pub fn version(&self) -> Option<&semver::Version> { - self.version.as_ref() + + //Search the module's resource dir before searching the environment's catalogs + // This allows a module to import another module inside its directory or as a peer of itself for + // single-file modules, without including an explicit PkgInfo dep entry. On the other hand, If we + // want to require module authors to include a dep entry to be explicit about their dependencies, we + // can remove this catalog + let resource_dir_catalog; + let mut local_catalogs = vec![]; + if let Some(mod_resource_dir) = context.module().resource_dir() { + if context.metta.environment().working_dir() != Some(mod_resource_dir) { + resource_dir_catalog = DirCatalog::new(PathBuf::from(mod_resource_dir), context.metta().environment().fs_mod_formats.clone()); + local_catalogs.push(&resource_dir_catalog as &dyn ModuleCatalog); + } } - /// Returns the version of the package as a [semver compliant](https://semver.org) string of bytes - pub fn version_bytes(&self) -> Result, String> { - match self.version() { - Some(ver) => Ok(format!("{ver}").into_bytes()), - None => Err("no version available".to_string()) + + //Search the catalogs, starting with the resource dir, and continuing to the runner's Environment + for catalog in local_catalogs.into_iter().chain(context.metta.environment().catalogs()) { + log::trace!("Looking for module: \"{mod_name}\" inside {catalog:?}"); + match catalog.lookup_newest_with_version_req(mod_name, version_req) { + Some(descriptor) => { + log::info!("Found module: \"{mod_name}\" inside {:?}", catalog.display_name()); + log::info!("Preparing to load module: \'{}\' as \'{}\'", descriptor.name, name_path); + return Ok(Some((catalog.get_loader(&descriptor)?, descriptor))) + }, + None => {} } } + + Ok(None) } /// Internal function to get a loader for a module at a specific file system path, by trying each FsModuleFormat in order From 21940bf9c0d650e5f4c32e1eb25ca927bcb57cda Mon Sep 17 00:00:00 2001 From: Luke Peterson Date: Mon, 27 May 2024 12:41:40 +0900 Subject: [PATCH 69/77] Making the documentation of ModuleDescriptor a little more clear --- lib/src/metta/runner/pkg_mgmt/catalog.rs | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/lib/src/metta/runner/pkg_mgmt/catalog.rs b/lib/src/metta/runner/pkg_mgmt/catalog.rs index d4b47f77a..10c3a21d9 100644 --- a/lib/src/metta/runner/pkg_mgmt/catalog.rs +++ b/lib/src/metta/runner/pkg_mgmt/catalog.rs @@ -691,12 +691,14 @@ fn visit_modules_in_dir_using_mod_formats(fmts: &[Box], dir_ } } -/// A data structure that uniquely identifies an exact version of a module with a particular provenance +/// A data structure that uniquely identifies an exact instance of a module /// /// If two modules have the same ModuleDescriptor, they are considered to be the same module /// -/// NOTE: It is possible for a module to have both a version and a uid. Module version uniqueness is -/// enforced by the catalog(s), and two catalogs may disagree +/// The uid field encodes particulars about a module so it will never be mistaken for another copy +/// or variation of the module even if the version field is the same. For example, a module loaded +/// from the file system will use the uid to hash the path, while a module fetched from git will +/// hash the url and branch. #[derive(Clone, Debug, PartialEq, Eq, Hash, Deserialize, Serialize)] pub struct ModuleDescriptor { name: String, From a4d93698aeb0aaeab7924a831389da0d15c68ea3 Mon Sep 17 00:00:00 2001 From: Luke Peterson Date: Mon, 27 May 2024 13:22:03 +0900 Subject: [PATCH 70/77] Moving GitCatalog::register_mod from pub(crate) to fully private --- lib/src/metta/runner/pkg_mgmt/git_catalog.rs | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/lib/src/metta/runner/pkg_mgmt/git_catalog.rs b/lib/src/metta/runner/pkg_mgmt/git_catalog.rs index d3e22a386..87556870e 100644 --- a/lib/src/metta/runner/pkg_mgmt/git_catalog.rs +++ b/lib/src/metta/runner/pkg_mgmt/git_catalog.rs @@ -208,11 +208,12 @@ impl GitCatalog { } Ok(new_self) } - /// Registers a new module in the catalog with a specified remote location, and returns the [ModuleDescriptor] to refer to that module + /// Registers a new module in the catalog with a specified remote location, and returns + /// the [ModuleDescriptor] to refer to that module /// /// WARNING: if a catalog is synced to an upstream source, the upstream source will - /// eventually overwrite anything you register with this method - pub(crate) fn register_mod(&self, mod_name: &str, version: Option<&semver::Version>, git_location: &ModuleGitLocation) -> Result { + /// eventually overwrite anything you register with this method. + fn register_mod(&self, mod_name: &str, version: Option<&semver::Version>, git_location: &ModuleGitLocation) -> Result { let descriptor = { let mut catalog_ref = self.catalog.lock().unwrap(); catalog_ref.as_mut().unwrap().add(CatalogFileMod::new(mod_name.to_string(), version.cloned(), git_location.clone()))? From a276a0ac64de30e4e3152f2b35b06a87b1d3e517 Mon Sep 17 00:00:00 2001 From: Luke Peterson Date: Tue, 4 Jun 2024 10:00:59 +0900 Subject: [PATCH 71/77] Slightly simplifying the amount of code that switches based on the pkg_mgmt feature. Mainly eliminating dual implementations of ModuleInitState::decompose --- lib/src/metta/runner/environment.rs | 15 +++++++++------ lib/src/metta/runner/mod.rs | 8 +++++--- lib/src/metta/runner/modules/mod.rs | 14 -------------- 3 files changed, 14 insertions(+), 23 deletions(-) diff --git a/lib/src/metta/runner/environment.rs b/lib/src/metta/runner/environment.rs index 99dc29ffc..42c2a9252 100644 --- a/lib/src/metta/runner/environment.rs +++ b/lib/src/metta/runner/environment.rs @@ -4,7 +4,7 @@ use std::io::{Read, BufReader, Write}; use std::fs; use std::sync::Arc; -use crate::{sym, ExpressionAtom, SymbolAtom, metta::GroundingSpace}; +use crate::{sym, ExpressionAtom, metta::GroundingSpace}; #[cfg(feature = "pkg_mgmt")] use crate::metta::runner::pkg_mgmt::{ModuleCatalog, DirCatalog, LocalCatalog, FsModuleFormat, SingleFileModuleFmt, DirModuleFmt, git_catalog::*}; @@ -404,7 +404,10 @@ fn interpret_environment_metta>(env_metta_path: P, env: &mut Envi #[cfg(feature = "pkg_mgmt")] env.catalogs.push(include_path_from_cfg_atom(&expr, env)?); #[cfg(not(feature = "pkg_mgmt"))] - log::warn!("#includePath in environment.metta not supported without pkg_mgmt feature"); + { + let _ = &env; + log::warn!("#includePath in environment.metta not supported without pkg_mgmt feature"); + } }, Some(atom_0) if *atom_0 == sym!("#gitCatalog") => { #[cfg(feature = "pkg_mgmt")] @@ -439,9 +442,9 @@ fn git_catalog_from_cfg_atom(atom: &ExpressionAtom, env: &Environment) -> Result }; match key_atom { - _ if *key_atom == sym!("#name") => catalog_name = Some(<&SymbolAtom>::try_from(val_atom)?.name()), - _ if *key_atom == sym!("#url") => catalog_url = Some(<&SymbolAtom>::try_from(val_atom)?.name()), - _ if *key_atom == sym!("#refreshTime") => refresh_time = Some(<&SymbolAtom>::try_from(val_atom)?.name()), + _ if *key_atom == sym!("#name") => catalog_name = Some(<&crate::SymbolAtom>::try_from(val_atom)?.name()), + _ if *key_atom == sym!("#url") => catalog_url = Some(<&crate::SymbolAtom>::try_from(val_atom)?.name()), + _ if *key_atom == sym!("#refreshTime") => refresh_time = Some(<&crate::SymbolAtom>::try_from(val_atom)?.name()), _ => return Err(format!("Error in environment.metta. Unknown key: {key_atom}")) } } @@ -470,7 +473,7 @@ fn include_path_from_cfg_atom(atom: &ExpressionAtom, env: &Environment) -> Resul Some(atom) => atom, None => return Err(format!("Error in environment.metta. #includePath missing path value")) }; - let path = <&SymbolAtom>::try_from(path_atom)?.name(); + let path = <&crate::SymbolAtom>::try_from(path_atom)?.name(); let path = crate::metta::runner::string::strip_quotes(path); //TODO-FUTURE: In the future we may want to replace dyn-fmt with strfmt, and do something a diff --git a/lib/src/metta/runner/mod.rs b/lib/src/metta/runner/mod.rs index 89f212ddb..c435e2ace 100644 --- a/lib/src/metta/runner/mod.rs +++ b/lib/src/metta/runner/mod.rs @@ -74,6 +74,9 @@ pub mod pkg_mgmt; #[cfg(feature = "pkg_mgmt")] use pkg_mgmt::*; +#[cfg(not(feature = "pkg_mgmt"))] +pub(crate) type ModuleDescriptor = (); + use std::rc::Rc; use std::path::PathBuf; use std::collections::HashMap; @@ -317,10 +320,7 @@ impl Metta { /// Merges all modules in a [ModuleInitState] into the runner fn merge_init_state(&self, init_state: ModuleInitState) -> Result { let mut main_mod_id = ModId::INVALID; - #[cfg(feature = "pkg_mgmt")] let (frames, descriptors) = init_state.decompose(); - #[cfg(not(feature = "pkg_mgmt"))] - let frames = init_state.decompose(); // Unpack each frame and ,erge the modules from the ModuleInitState into the // runner, and build the mapping table for ModIds @@ -361,6 +361,8 @@ impl Metta { }; self.add_module_descriptor(descriptor, mod_id); } + #[cfg(not(feature = "pkg_mgmt"))] + let _ = descriptors; // Finally, re-map the module's "deps" ModIds for added_mod_id in mod_id_mapping.values() { diff --git a/lib/src/metta/runner/modules/mod.rs b/lib/src/metta/runner/modules/mod.rs index 97fa6e58f..400668e2d 100644 --- a/lib/src/metta/runner/modules/mod.rs +++ b/lib/src/metta/runner/modules/mod.rs @@ -351,7 +351,6 @@ pub(crate) enum ModuleInitState { pub(crate) struct ModuleInitStateInsides { frames: Vec, - #[cfg(feature = "pkg_mgmt")] module_descriptors: HashMap, } @@ -362,7 +361,6 @@ impl ModuleInitState { pub fn new(mod_name: String) -> (Self, ModId) { let new_insides = ModuleInitStateInsides { frames: vec![ModuleInitFrame::new_with_name(mod_name)], - #[cfg(feature = "pkg_mgmt")] module_descriptors: HashMap::new(), }; let init_state = Self::Root(Rc::new(RefCell::new(new_insides))); @@ -397,7 +395,6 @@ impl ModuleInitState { _ => false } } - #[cfg(feature = "pkg_mgmt")] pub fn decompose(self) -> (Vec, HashMap) { match self { Self::Root(cell) => { @@ -409,17 +406,6 @@ impl ModuleInitState { _ => unreachable!() } } - #[cfg(not(feature = "pkg_mgmt"))] - pub fn decompose(self) -> Vec { - match self { - Self::Root(cell) => { - let mut insides_ref = cell.borrow_mut(); - let frames = core::mem::take(&mut insides_ref.frames); - frames - }, - _ => unreachable!() - } - } /// Internal method to retrieve the mod_ptr to a module that's either loading in the /// InitFrame, or loaded into the runner From 0fb5cc29a96ff6d90f63a17c48cbb1752fa36df6 Mon Sep 17 00:00:00 2001 From: Luke Peterson Date: Wed, 5 Jun 2024 19:42:56 +0900 Subject: [PATCH 72/77] Adding better documentation on the LocalCatalog vs. upstream catalog and the purpose of GitCatalog's data members --- lib/src/metta/runner/pkg_mgmt/git_catalog.rs | 7 +++++++ lib/src/metta/runner/pkg_mgmt/managed_catalog.rs | 13 +++++++++++++ 2 files changed, 20 insertions(+) diff --git a/lib/src/metta/runner/pkg_mgmt/git_catalog.rs b/lib/src/metta/runner/pkg_mgmt/git_catalog.rs index 87556870e..1f0d706fb 100644 --- a/lib/src/metta/runner/pkg_mgmt/git_catalog.rs +++ b/lib/src/metta/runner/pkg_mgmt/git_catalog.rs @@ -167,11 +167,18 @@ impl CatalogFileMod { /// Provides an interface to a git repo hosting a table of available modules #[derive(Debug)] pub struct GitCatalog { + /// The name of this catalog name: String, + /// The FsModuleFormats from the environment, to load the modules from their respective repositories fmts: Arc>>, + /// An interval in seconds to control the refresh of the catalog from the upstream source refresh_time: u64, + /// The git repo for the catalog info. This is the table-of-contents for the catalog, not the modules catalog_repo: Option, + /// The path to the catalog file(s), to store the metadata to connect a module to its source location + /// parameters. This path does not have any reliable connection to the on-disk location of the modules catalog_file_path: PathBuf, + /// The in-memory catalog object, mirroring what is on disk catalog: Mutex>, } diff --git a/lib/src/metta/runner/pkg_mgmt/managed_catalog.rs b/lib/src/metta/runner/pkg_mgmt/managed_catalog.rs index f8c22e107..b1198b489 100644 --- a/lib/src/metta/runner/pkg_mgmt/managed_catalog.rs +++ b/lib/src/metta/runner/pkg_mgmt/managed_catalog.rs @@ -102,6 +102,19 @@ pub trait ManagedCatalog: ModuleCatalog { } } +/// A ManagedCatalog type to mediate and aggregate the contents of one or more "upstream" catalogs +/// +/// ## Division of responsibilities with upstream catalogs +/// +/// LocalCatalog: +/// - Tracks which modules are installed on-disk +/// - Manages the on-disk location of the downloaded / cached modules +/// - TODO-Future: will track requirements and module dependency trees +/// +/// upstream Catalogs: +/// - Track which modules are available from the outside world +/// - Track the remote location of each module and any parameters needed to access it +/// #[derive(Debug)] pub struct LocalCatalog { name: String, From fa92d152653ad2175f280a4925635cb2ffb7bb7e Mon Sep 17 00:00:00 2001 From: Luke Peterson Date: Thu, 6 Jun 2024 11:32:49 +0900 Subject: [PATCH 73/77] Separating catalog op atoms from the rest of the stdlib --- .../metta/runner/builtin_mods/catalog_mods.rs | 236 ++++++++++++++++++ lib/src/metta/runner/builtin_mods/mod.rs | 12 + lib/src/metta/runner/mod.rs | 6 + lib/src/metta/runner/stdlib.rs | 215 +--------------- 4 files changed, 256 insertions(+), 213 deletions(-) create mode 100644 lib/src/metta/runner/builtin_mods/catalog_mods.rs create mode 100644 lib/src/metta/runner/builtin_mods/mod.rs diff --git a/lib/src/metta/runner/builtin_mods/catalog_mods.rs b/lib/src/metta/runner/builtin_mods/catalog_mods.rs new file mode 100644 index 000000000..66ef50fc0 --- /dev/null +++ b/lib/src/metta/runner/builtin_mods/catalog_mods.rs @@ -0,0 +1,236 @@ + +use std::fmt::Display; + +use crate::atom::{Atom, Grounded, ExecError}; +use crate::space::grounding::GroundingSpace; +use crate::metta::{ARROW_SYMBOL, ATOM_TYPE_SYMBOL, UNIT_TYPE}; +use crate::metta::runner::{Metta, ModuleLoader, RunContext, DynSpace}; +use crate::metta::runner::pkg_mgmt::{UpdateMode, ManagedCatalog}; +use crate::metta::runner::stdlib::{regex, unit_result}; + +//TODO, delete these when merging with https://github.com/trueagi-io/hyperon-experimental/pull/706 +use crate::metta::matcher::*; +use crate::atom::match_by_equality; + +/// Loader to Initialize the "catalog" module +#[derive(Debug)] +pub(crate) struct CatalogModLoader; + +impl ModuleLoader for CatalogModLoader { + fn load(&self, context: &mut RunContext) -> Result<(), String> { + let space = DynSpace::new(GroundingSpace::new()); + context.init_self_module(space, None); + + let metta = context.metta(); + let mut tref = context.module().tokenizer().borrow_mut(); + + let catalog_list_op = Atom::gnd(CatalogListOp::new(metta.clone())); + tref.register_token(regex(r"catalog-list!"), move |_| { catalog_list_op.clone() }); + let catalog_update_op = Atom::gnd(CatalogUpdateOp::new(metta.clone())); + tref.register_token(regex(r"catalog-update!"), move |_| { catalog_update_op.clone() }); + let catalog_clear_op = Atom::gnd(CatalogClearOp::new(metta.clone())); + tref.register_token(regex(r"catalog-clear!"), move |_| { catalog_clear_op.clone() }); + + Ok(()) + } +} + +/// Lists contents of all Catalogs that support the "list" method +#[derive(Clone, Debug)] +pub struct CatalogListOp { + metta: Metta +} + +impl PartialEq for CatalogListOp { + fn eq(&self, _other: &Self) -> bool { true } +} + +impl CatalogListOp { + pub fn new(metta: Metta) -> Self { + Self{ metta } + } +} + +impl Display for CatalogListOp { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "catalog-list!") + } +} + +impl Grounded for CatalogListOp { + fn type_(&self) -> Atom { + //TODO-FUTURE, we may want to return the list as atoms, but now it just prints to stdout + Atom::expr([ARROW_SYMBOL, ATOM_TYPE_SYMBOL, UNIT_TYPE()]) + } + + fn execute(&self, args: &[Atom]) -> Result, ExecError> { + let arg_error = "catalog-list! expects a catalog name, or \"all\" to list all available"; + let cat_name_arg_atom = args.get(0).ok_or_else(|| ExecError::from(arg_error))?; + let cat_name = if let Atom::Symbol(cat_name) = cat_name_arg_atom { + cat_name.name() + } else { + return Err(ExecError::from(arg_error)); + }; + + fn list_catalog(cat: &dyn crate::metta::runner::ModuleCatalog) { + if let Some(cat_iter) = cat.list() { + println!("{}:", cat.display_name()); + for desc in cat_iter { + println!(" {desc}"); + } + } + } + + let mut found_one = false; + if cat_name == "all" || cat_name == "git-modules" { + if let Some(explicit_git_catalog) = &self.metta.environment().explicit_git_mods { + list_catalog(explicit_git_catalog); + found_one = true; + } + } + for cat in self.metta.environment().catalogs() { + if cat_name == "all" || cat_name == cat.display_name() { + list_catalog(cat); + found_one = true; + } + } + + if !found_one { + return Err(ExecError::from(format!("no catalog(s) identified by \"{cat_name}\""))); + } + unit_result() + } + + fn match_(&self, other: &Atom) -> MatchResultIter { + match_by_equality(self, other) + } +} + +/// Update all contents of all ManagedCatalogs to the latest version of all modules +#[derive(Clone, Debug)] +pub struct CatalogUpdateOp { + metta: Metta +} + +impl PartialEq for CatalogUpdateOp { + fn eq(&self, _other: &Self) -> bool { true } +} + +impl CatalogUpdateOp { + pub fn new(metta: Metta) -> Self { + Self{ metta } + } +} + +impl Display for CatalogUpdateOp { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "catalog-update!") + } +} + +impl Grounded for CatalogUpdateOp { + fn type_(&self) -> Atom { + //TODO-FUTURE, we may want to return the list as atoms, but now it just prints to stdout + Atom::expr([ARROW_SYMBOL, ATOM_TYPE_SYMBOL, UNIT_TYPE()]) + } + + fn execute(&self, args: &[Atom]) -> Result, ExecError> { + let arg_error = "catalog-update! expects a catalog name, or \"all\" to update all"; + let cat_name_arg_atom = args.get(0).ok_or_else(|| ExecError::from(arg_error))?; + let cat_name = if let Atom::Symbol(cat_name) = cat_name_arg_atom { + cat_name.name() + } else { + return Err(ExecError::from(arg_error)); + }; + + let mut found_one = false; + if cat_name == "all" || cat_name == "git-modules" { + if let Some(explicit_git_catalog) = &self.metta.environment().explicit_git_mods { + explicit_git_catalog.fetch_newest_for_all(UpdateMode::FetchLatest)?; + found_one = true; + } + } + + for cat in self.metta.environment().catalogs() { + match cat.as_managed() { + Some(cat) => if cat_name == "all" || cat_name == cat.display_name() { + cat.fetch_newest_for_all(UpdateMode::FetchLatest)?; + found_one = true; + }, + None => {} + } + } + + if !found_one { + return Err(ExecError::from(format!("no catalog(s) identified by \"{cat_name}\""))); + } + unit_result() + } + + fn match_(&self, other: &Atom) -> MatchResultIter { + match_by_equality(self, other) + } +} + +/// Clears the contents of all ManagedCatalogs +#[derive(Clone, Debug)] +pub struct CatalogClearOp { + metta: Metta +} + +impl PartialEq for CatalogClearOp { + fn eq(&self, _other: &Self) -> bool { true } +} + +impl CatalogClearOp { + pub fn new(metta: Metta) -> Self { + Self{ metta } + } +} + +impl Display for CatalogClearOp { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "catalog-clear!") + } +} + +impl Grounded for CatalogClearOp { + fn type_(&self) -> Atom { + //TODO-FUTURE, we may want to return the list as atoms, but now it just prints to stdout + Atom::expr([ARROW_SYMBOL, ATOM_TYPE_SYMBOL, UNIT_TYPE()]) + } + + fn execute(&self, args: &[Atom]) -> Result, ExecError> { + let arg_error = "catalog-clear! expects a catalog name, or \"all\" to clear all"; + let cat_name_arg_atom = args.get(0).ok_or_else(|| ExecError::from(arg_error))?; + let cat_name = if let Atom::Symbol(cat_name) = cat_name_arg_atom { + cat_name.name() + } else { + return Err(ExecError::from(arg_error)); + }; + + let mut found_one = false; + if cat_name == "all" || cat_name == "git-modules" { + if let Some(explicit_git_catalog) = &self.metta.environment().explicit_git_mods { + explicit_git_catalog.clear_all()?; + found_one = true; + } + } + + for cat in self.metta.environment().catalogs().filter_map(|cat| cat.as_managed()) { + if cat_name == "all" || cat_name == cat.display_name() { + cat.clear_all()?; + found_one = true; + } + } + + if !found_one { + return Err(ExecError::from(format!("no catalog(s) identified by \"{cat_name}\""))); + } + unit_result() + } + + fn match_(&self, other: &Atom) -> MatchResultIter { + match_by_equality(self, other) + } +} diff --git a/lib/src/metta/runner/builtin_mods/mod.rs b/lib/src/metta/runner/builtin_mods/mod.rs new file mode 100644 index 000000000..4a3c4158f --- /dev/null +++ b/lib/src/metta/runner/builtin_mods/mod.rs @@ -0,0 +1,12 @@ + +use crate::metta::runner::Metta; + +/// Op atoms for working with catalogs +#[cfg(feature = "pkg_mgmt")] +pub mod catalog_mods; + +pub fn load_builtin_mods(metta: &Metta) -> Result<(), String> { + let _mod_id = metta.load_module_direct(Box::new(catalog_mods::CatalogModLoader), "catalog").map_err(|e| format!("error loading builtin \"catalog\" module: {e}")); + + Ok(()) +} diff --git a/lib/src/metta/runner/mod.rs b/lib/src/metta/runner/mod.rs index c435e2ace..5a48fd23d 100644 --- a/lib/src/metta/runner/mod.rs +++ b/lib/src/metta/runner/mod.rs @@ -98,6 +98,9 @@ use stdlib_minimal::*; use stdlib::CoreLibLoader; +mod builtin_mods; +use builtin_mods::*; + pub mod arithmetics; pub mod string; @@ -176,6 +179,9 @@ impl Metta { //Set the runner's stdlib mod_id metta.0.stdlib_mod.set(stdlib_mod_id).unwrap(); + //Load the rest of the builtin mods, but don't `import` (aka "use") them + load_builtin_mods(&metta).unwrap(); + //Import the stdlib into the top module, now that it is loaded let mut runner_state = RunnerState::new(&metta); runner_state.run_in_context(|context| { diff --git a/lib/src/metta/runner/stdlib.rs b/lib/src/metta/runner/stdlib.rs index 971ac11c2..7e9cc6ff9 100644 --- a/lib/src/metta/runner/stdlib.rs +++ b/lib/src/metta/runner/stdlib.rs @@ -14,7 +14,7 @@ use crate::common::multitrie::MultiTrie; use crate::space::grounding::atom_to_trie_key; #[cfg(feature = "pkg_mgmt")] -use crate::metta::runner::{git_catalog::ModuleGitLocation, mod_name_from_url, pkg_mgmt::{UpdateMode, ManagedCatalog}}; +use crate::metta::runner::{git_catalog::ModuleGitLocation, mod_name_from_url, pkg_mgmt::UpdateMode}; use std::rc::Rc; use std::cell::RefCell; @@ -25,7 +25,7 @@ use regex::Regex; use super::arithmetics::*; use super::string::*; -fn unit_result() -> Result, ExecError> { +pub(crate) fn unit_result() -> Result, ExecError> { Ok(vec![UNIT_ATOM()]) } @@ -967,11 +967,6 @@ impl Grounded for MatchOp { pub(crate) mod pkg_mgmt_ops { use super::*; - //QUESTION: Do we want to factor these catalog management ops and specialized loading - // ops into a separate module? The argument for "yes" is that the it avoids polluting - // the namespace with ops that are seldom used. The argument for "no" is that importing - // the module to use the ops is another step users must remember. - /// Provides a way to access [Metta::load_module_at_path] from within MeTTa code #[derive(Clone, Debug)] pub struct RegisterModuleOp { @@ -1096,217 +1091,11 @@ pub(crate) mod pkg_mgmt_ops { } } - /// Lists contents of all Catalogs that support the "list" method - #[derive(Clone, Debug)] - pub struct CatalogListOp { - metta: Metta - } - - impl PartialEq for CatalogListOp { - fn eq(&self, _other: &Self) -> bool { true } - } - - impl CatalogListOp { - pub fn new(metta: Metta) -> Self { - Self{ metta } - } - } - - impl Display for CatalogListOp { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - write!(f, "catalog-list!") - } - } - - impl Grounded for CatalogListOp { - fn type_(&self) -> Atom { - //TODO-FUTURE, we may want to return the list as atoms, but now it just prints to stdout - Atom::expr([ARROW_SYMBOL, ATOM_TYPE_SYMBOL, UNIT_TYPE()]) - } - - fn execute(&self, args: &[Atom]) -> Result, ExecError> { - let arg_error = "catalog-list! expects a catalog name, or \"all\" to list all available"; - let cat_name_arg_atom = args.get(0).ok_or_else(|| ExecError::from(arg_error))?; - let cat_name = if let Atom::Symbol(cat_name) = cat_name_arg_atom { - cat_name.name() - } else { - return Err(ExecError::from(arg_error)); - }; - - fn list_catalog(cat: &dyn crate::metta::runner::ModuleCatalog) { - if let Some(cat_iter) = cat.list() { - println!("{}:", cat.display_name()); - for desc in cat_iter { - println!(" {desc}"); - } - } - } - - let mut found_one = false; - if cat_name == "all" || cat_name == "git-modules" { - if let Some(explicit_git_catalog) = &self.metta.environment().explicit_git_mods { - list_catalog(explicit_git_catalog); - found_one = true; - } - } - for cat in self.metta.environment().catalogs() { - if cat_name == "all" || cat_name == cat.display_name() { - list_catalog(cat); - found_one = true; - } - } - - if !found_one { - return Err(ExecError::from(format!("no catalog(s) identified by \"{cat_name}\""))); - } - unit_result() - } - - fn match_(&self, other: &Atom) -> MatchResultIter { - match_by_equality(self, other) - } - } - - /// Update all contents of all ManagedCatalogs to the latest version of all modules - #[derive(Clone, Debug)] - pub struct CatalogUpdateOp { - metta: Metta - } - - impl PartialEq for CatalogUpdateOp { - fn eq(&self, _other: &Self) -> bool { true } - } - - impl CatalogUpdateOp { - pub fn new(metta: Metta) -> Self { - Self{ metta } - } - } - - impl Display for CatalogUpdateOp { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - write!(f, "catalog-update!") - } - } - - impl Grounded for CatalogUpdateOp { - fn type_(&self) -> Atom { - //TODO-FUTURE, we may want to return the list as atoms, but now it just prints to stdout - Atom::expr([ARROW_SYMBOL, ATOM_TYPE_SYMBOL, UNIT_TYPE()]) - } - - fn execute(&self, args: &[Atom]) -> Result, ExecError> { - let arg_error = "catalog-update! expects a catalog name, or \"all\" to update all"; - let cat_name_arg_atom = args.get(0).ok_or_else(|| ExecError::from(arg_error))?; - let cat_name = if let Atom::Symbol(cat_name) = cat_name_arg_atom { - cat_name.name() - } else { - return Err(ExecError::from(arg_error)); - }; - - let mut found_one = false; - if cat_name == "all" || cat_name == "git-modules" { - if let Some(explicit_git_catalog) = &self.metta.environment().explicit_git_mods { - explicit_git_catalog.fetch_newest_for_all(UpdateMode::FetchLatest)?; - found_one = true; - } - } - - for cat in self.metta.environment().catalogs() { - match cat.as_managed() { - Some(cat) => if cat_name == "all" || cat_name == cat.display_name() { - cat.fetch_newest_for_all(UpdateMode::FetchLatest)?; - found_one = true; - }, - None => {} - } - } - - if !found_one { - return Err(ExecError::from(format!("no catalog(s) identified by \"{cat_name}\""))); - } - unit_result() - } - - fn match_(&self, other: &Atom) -> MatchResultIter { - match_by_equality(self, other) - } - } - - /// Clears the contents of all ManagedCatalogs - #[derive(Clone, Debug)] - pub struct CatalogClearOp { - metta: Metta - } - - impl PartialEq for CatalogClearOp { - fn eq(&self, _other: &Self) -> bool { true } - } - - impl CatalogClearOp { - pub fn new(metta: Metta) -> Self { - Self{ metta } - } - } - - impl Display for CatalogClearOp { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - write!(f, "catalog-clear!") - } - } - - impl Grounded for CatalogClearOp { - fn type_(&self) -> Atom { - //TODO-FUTURE, we may want to return the list as atoms, but now it just prints to stdout - Atom::expr([ARROW_SYMBOL, ATOM_TYPE_SYMBOL, UNIT_TYPE()]) - } - - fn execute(&self, args: &[Atom]) -> Result, ExecError> { - let arg_error = "catalog-clear! expects a catalog name, or \"all\" to clear all"; - let cat_name_arg_atom = args.get(0).ok_or_else(|| ExecError::from(arg_error))?; - let cat_name = if let Atom::Symbol(cat_name) = cat_name_arg_atom { - cat_name.name() - } else { - return Err(ExecError::from(arg_error)); - }; - - let mut found_one = false; - if cat_name == "all" || cat_name == "git-modules" { - if let Some(explicit_git_catalog) = &self.metta.environment().explicit_git_mods { - explicit_git_catalog.clear_all()?; - found_one = true; - } - } - - for cat in self.metta.environment().catalogs().filter_map(|cat| cat.as_managed()) { - if cat_name == "all" || cat_name == cat.display_name() { - cat.clear_all()?; - found_one = true; - } - } - - if !found_one { - return Err(ExecError::from(format!("no catalog(s) identified by \"{cat_name}\""))); - } - unit_result() - } - - fn match_(&self, other: &Atom) -> MatchResultIter { - match_by_equality(self, other) - } - } - pub fn register_pkg_mgmt_tokens(tref: &mut Tokenizer, metta: &Metta) { let register_module_op = Atom::gnd(RegisterModuleOp::new(metta.clone())); tref.register_token(regex(r"register-module!"), move |_| { register_module_op.clone() }); let git_module_op = Atom::gnd(GitModuleOp::new(metta.clone())); tref.register_token(regex(r"git-module!"), move |_| { git_module_op.clone() }); - let catalog_list_op = Atom::gnd(CatalogListOp::new(metta.clone())); - tref.register_token(regex(r"catalog-list!"), move |_| { catalog_list_op.clone() }); - let catalog_update_op = Atom::gnd(CatalogUpdateOp::new(metta.clone())); - tref.register_token(regex(r"catalog-update!"), move |_| { catalog_update_op.clone() }); - let catalog_clear_op = Atom::gnd(CatalogClearOp::new(metta.clone())); - tref.register_token(regex(r"catalog-clear!"), move |_| { catalog_clear_op.clone() }); } } From 33b77aebfc4cfa18c7748da31791babc0683785e Mon Sep 17 00:00:00 2001 From: Luke Peterson Date: Thu, 6 Jun 2024 12:45:38 +0900 Subject: [PATCH 74/77] Ignoring invisible files in managed catalog, because they definitely don't represent installed modules --- lib/src/metta/runner/pkg_mgmt/managed_catalog.rs | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/lib/src/metta/runner/pkg_mgmt/managed_catalog.rs b/lib/src/metta/runner/pkg_mgmt/managed_catalog.rs index b1198b489..b3b11bf77 100644 --- a/lib/src/metta/runner/pkg_mgmt/managed_catalog.rs +++ b/lib/src/metta/runner/pkg_mgmt/managed_catalog.rs @@ -318,9 +318,7 @@ impl LocalCatalogTOC { let name_str = file_name.to_str() .ok_or_else(|| format!("Invalid characters found in local cache at path: {}", dir_entry.path().display()))?; - // Name reserved by GitCatalog. We may generalize this "reserved" mechanism when - // we support additional upstream catalog types - if name_str != "_catalog.repo" && name_str != "_catalog.json" { + if !Self::should_ignore_dir_entry(name_str) { let descriptor = parse_descriptor_from_dir_name(name_str)?; new_self.add_descriptor(descriptor)?; } @@ -328,6 +326,18 @@ impl LocalCatalogTOC { Ok(new_self) } + /// Returns `false` if the file / directory name is not a module entry, otherwise `true` if it could be + fn should_ignore_dir_entry(dir_name: &str) -> bool { + // GitCatalog reserves the names "_catalog.repo" and "_catalog.json" + if dir_name == "_catalog.repo" || dir_name == "_catalog.json" { + return true; + } + // '.' is illegal in a module name, but lots of software creates .invisible_files, e.g. `.DS_Store` + if dir_name.starts_with('.') { + return true; + } + false + } fn lookup_by_name(&self, name: &str) -> Option> { if let Some(descriptors) = self.mods_by_name.get(name) { if descriptors.len() > 0 { From 6ffe2f9ffe9897f9346a95a4d782fa8cfc402746 Mon Sep 17 00:00:00 2001 From: Luke Peterson Date: Thu, 6 Jun 2024 13:48:31 +0900 Subject: [PATCH 75/77] Adding discussion about what we want in programmatic catalog management (from MeTTa) --- .../metta/runner/builtin_mods/catalog_mods.rs | 43 +++++++++++++++++++ 1 file changed, 43 insertions(+) diff --git a/lib/src/metta/runner/builtin_mods/catalog_mods.rs b/lib/src/metta/runner/builtin_mods/catalog_mods.rs index 66ef50fc0..24f64d175 100644 --- a/lib/src/metta/runner/builtin_mods/catalog_mods.rs +++ b/lib/src/metta/runner/builtin_mods/catalog_mods.rs @@ -12,6 +12,49 @@ use crate::metta::runner::stdlib::{regex, unit_result}; use crate::metta::matcher::*; use crate::atom::match_by_equality; +//DISCUSSION: We want to expose more of the pkg_mgmt / catalog system to MeTTa through programmatic +// interfaces, but the details are unclear. Most importantly, the use cases are unclear, and those +// will inform all design decisions. +// +//## Potential wish-list for operations available directly in MeTTa: +// - Ability to work with semver version objects and requirements. ie. parse versions and requirments +// from strings, compare versions to other versions and reqs, etc. +// - Ability to resolve a module by name (and optionally with a version requirement) without loading it, +// returning a ModuleDescriptor and the Catalog in which it was found +// - Ability to query within a specific catalog, rather than searching all catalogs in priority order +// - Ability to load an exact module from a specific catalog based on its ModuleDescriptor +// - Accessor for a resource (see [ResourceKey]) of a module +// - Ability to inspect / traverse a module dependency hierarchy +// - Full control over what is "installed". (inspect, upgrade, install, remove) See below. +// +//## The concept of "installed" +// The user-level concept of an "installed" module has a imprecise mapping to concepts in the pkg_mgmt / +// catalog system. The loosest idea of "installed" is "available", but most users would not consider +// software that's freely available on the internet to be "installed". Most users probably feel like +// "installed" implies that a specific version of the module is local in their file system. +// +// Therefore, modules in remote catalogs wouldn't count as installed, while modules in local catalogs +// would. Right now, there are 2 kinds of "local" catalogs. DirCatalog, and LocalCatalog. +// +// DirCatalogs cannot be managed via MeTTa because it is a "read-only" interface over a directory. We +// wouldn't want the user to do something that deletes items from a directory the user is managing +// themselves or under the control of another piece of software like pip or apt. Additionally, there +// is no metadata available in a DirCatalog to automatically upgrade any of the contents. +// +// LocalCatalogs are designed to be managed by Hyperon, from MeTTa. A LocalCatalog mirrors one or more +// upstream module sources (remote Catalogs), so it's expected that a user will install, upgrade, remove, +// etc. modules in a LocalCatalog. +// +// Therefore, an interface for "managing" what is installed must be limited to LocalCatalogs. +// +//## Should it be supported to list the contents of a DirCatalog? +// A DirCatalog that contains only MeTTa modules, such as the 'modules' directory in the MeTTa config dir, +// can be listed easily enough. Things get complicated however because any directory can be loaded as +// a MeTTa module. Therefore when configuration adds a heterogeneous directory, such as `site-packages`, +// the DirCatalog will report every subdir as a module. Which is correct, but probably not useful. +// At the very least, it makes `!(catalog-list! all)` much more noisy +// + /// Loader to Initialize the "catalog" module #[derive(Debug)] pub(crate) struct CatalogModLoader; From 46f1df0f06c47ec9d4643cae29148a2e08e66adc Mon Sep 17 00:00:00 2001 From: Luke Peterson Date: Thu, 6 Jun 2024 15:26:36 +0900 Subject: [PATCH 76/77] Reworking access to the Environment's explicit_git_mods catalog, so that GitCatalog doesn't have a method that's aware of a certain field in the environment --- lib/src/metta/runner/pkg_mgmt/catalog.rs | 9 ++++++-- lib/src/metta/runner/pkg_mgmt/git_catalog.rs | 22 +++++-------------- .../metta/runner/pkg_mgmt/managed_catalog.rs | 21 ++++++++++++++++++ lib/src/metta/runner/stdlib.rs | 7 ++++-- 4 files changed, 38 insertions(+), 21 deletions(-) diff --git a/lib/src/metta/runner/pkg_mgmt/catalog.rs b/lib/src/metta/runner/pkg_mgmt/catalog.rs index 10c3a21d9..716fb10bb 100644 --- a/lib/src/metta/runner/pkg_mgmt/catalog.rs +++ b/lib/src/metta/runner/pkg_mgmt/catalog.rs @@ -328,8 +328,13 @@ pub(crate) fn resolve_module(pkg_info: Option<&PkgInfo>, context: &RunContext, n } //Get the module if it's specified with git keys - if let Some(pair) = entry.git_location.get_loader_in_explicit_catalog(mod_name, UpdateMode::FetchIfMissing, context.metta.environment())? { - return Ok(Some(pair)); + if entry.git_location.get_url().is_some() { + match context.metta.environment().explicit_git_mods.as_ref() { + Some(explicit_git_catalog) => if let Some(pair) = explicit_git_catalog.loader_for_explicit_git_module(mod_name, UpdateMode::FetchIfMissing, &entry.git_location)? { + return Ok(Some(pair)); + }, + None => return Err(format!("Unable to pull module \"{mod_name}\" from git; no local \"caches\" directory available")) + } } //If `version_req` is specified in the dep entry, then use it to constrain the catalog search diff --git a/lib/src/metta/runner/pkg_mgmt/git_catalog.rs b/lib/src/metta/runner/pkg_mgmt/git_catalog.rs index 1f0d706fb..0eb0f8b99 100644 --- a/lib/src/metta/runner/pkg_mgmt/git_catalog.rs +++ b/lib/src/metta/runner/pkg_mgmt/git_catalog.rs @@ -57,20 +57,6 @@ impl ModuleGitLocation { Ok(None) } - /// Gets a loader for a module identified by a ModuleGitLocation, using the [Environment]'s managed `explicit_git_mods` catalog - pub(crate) fn get_loader_in_explicit_catalog(&self, mod_name: &str, update_mode: UpdateMode, env: &Environment) -> Result, ModuleDescriptor)>, String> { - if self.get_url().is_some() { - if let Some(explicit_git_catalog) = env.explicit_git_mods.as_ref() { - let descriptor = explicit_git_catalog.upstream_catalogs().first().unwrap().downcast::().unwrap().register_mod(mod_name, None, self)?; - let loader = explicit_git_catalog.get_loader_with_explicit_refresh(&descriptor, update_mode)?; - Ok(Some((loader, descriptor))) - } else { - Err(format!("Unable to pull module \"{mod_name}\" from git; no local \"caches\" directory available")) - } - } else { - Ok(None) - } - } pub(crate) fn get_cache(&self, mod_name: &str, local_cache_dir: PathBuf) -> Result { let url = self.git_url.as_ref().unwrap(); let branch = self.git_branch.as_ref().map(|s| s.as_str()); @@ -218,9 +204,11 @@ impl GitCatalog { /// Registers a new module in the catalog with a specified remote location, and returns /// the [ModuleDescriptor] to refer to that module /// - /// WARNING: if a catalog is synced to an upstream source, the upstream source will - /// eventually overwrite anything you register with this method. - fn register_mod(&self, mod_name: &str, version: Option<&semver::Version>, git_location: &ModuleGitLocation) -> Result { + /// WARNING: This method is incompatible with a catalog synced to an upstream source + pub(crate) fn register_mod(&self, mod_name: &str, version: Option<&semver::Version>, git_location: &ModuleGitLocation) -> Result { + if self.catalog_repo.is_some() { + return Err(format!("cannot explicitly register module in a catalog synced to an upstream source")); + } let descriptor = { let mut catalog_ref = self.catalog.lock().unwrap(); catalog_ref.as_mut().unwrap().add(CatalogFileMod::new(mod_name.to_string(), version.cloned(), git_location.clone()))? diff --git a/lib/src/metta/runner/pkg_mgmt/managed_catalog.rs b/lib/src/metta/runner/pkg_mgmt/managed_catalog.rs index b3b11bf77..624d68192 100644 --- a/lib/src/metta/runner/pkg_mgmt/managed_catalog.rs +++ b/lib/src/metta/runner/pkg_mgmt/managed_catalog.rs @@ -3,6 +3,7 @@ use std::path::{Path, PathBuf}; use std::collections::BTreeMap; use std::sync::Mutex; +use git_catalog::{GitCatalog, ModuleGitLocation}; use crate::metta::runner::*; use crate::metta::runner::pkg_mgmt::*; @@ -141,6 +142,26 @@ impl LocalCatalog { pub fn upstream_catalogs(&self) -> &[Box] { &self.upstream_catalogs[..] } + /// Returns an accessor for the first upstream [GitCatalog] if the LocalCatalog has one, + /// otherwise returns None + fn first_upstream_git_catalog(&self) -> Option<&GitCatalog> { + for upstream in self.upstream_catalogs() { + if let Some(git_catalog) = upstream.downcast::() { + return Some(git_catalog) + } + } + None + } + /// Adds a specific module into the catalog based on a [ModuleGitLocation] + /// + /// Returns an error if the LocalCatalog is not capable of working with git modules + pub(crate) fn loader_for_explicit_git_module(&self, mod_name: &str, update_mode: UpdateMode, location: &ModuleGitLocation) -> Result, ModuleDescriptor)>, String> { + let descriptor = self.first_upstream_git_catalog() + .ok_or_else(|| format!("Catalog {} cannot pull modules from git", self.name))? + .register_mod(mod_name, None, location)?; + let loader = self.get_loader_with_explicit_refresh(&descriptor, update_mode)?; + Ok(Some((loader, descriptor))) + } fn lookup_by_name_in_toc(&self, name: &str) -> Option> { let local_toc = self.local_toc.lock().unwrap(); local_toc.lookup_by_name(name) diff --git a/lib/src/metta/runner/stdlib.rs b/lib/src/metta/runner/stdlib.rs index 7e9cc6ff9..9886ae6d5 100644 --- a/lib/src/metta/runner/stdlib.rs +++ b/lib/src/metta/runner/stdlib.rs @@ -1079,8 +1079,11 @@ pub(crate) mod pkg_mgmt_ops { let git_mod_location = ModuleGitLocation::new(url.to_string()); - if let Some((loader, descriptor)) = git_mod_location.get_loader_in_explicit_catalog(&mod_name, UpdateMode::TryFetchLatest, context.metta.environment()).map_err(|e| ExecError::from(e))? { - context.get_or_init_module_with_descriptor(&mod_name, descriptor, loader).map_err(|e| ExecError::from(e))?; + match context.metta.environment().explicit_git_mods.as_ref() { + Some(explicit_git_catalog) => if let Some((loader, descriptor)) = explicit_git_catalog.loader_for_explicit_git_module(&mod_name, UpdateMode::TryFetchLatest, &git_mod_location)? { + context.get_or_init_module_with_descriptor(&mod_name, descriptor, loader).map_err(|e| ExecError::from(e))?; + }, + None => return Err(ExecError::from(format!("Unable to pull module \"{mod_name}\" from git; no local \"caches\" directory available"))) } unit_result() From da74ea2a929aa6684f0eb902a367d9098a701853 Mon Sep 17 00:00:00 2001 From: Luke Peterson Date: Thu, 6 Jun 2024 20:24:20 +0900 Subject: [PATCH 77/77] Renaming `explicit_git_mods` to `specified_mods` and slightly reducing the number of places I use the word "explicit"! (I never realized how big a part of my vocab it has become) --- .../metta/runner/builtin_mods/catalog_mods.rs | 18 +++++++++--------- lib/src/metta/runner/environment.rs | 16 ++++++++-------- lib/src/metta/runner/pkg_mgmt/catalog.rs | 4 ++-- .../metta/runner/pkg_mgmt/managed_catalog.rs | 4 ++-- lib/src/metta/runner/stdlib.rs | 4 ++-- 5 files changed, 23 insertions(+), 23 deletions(-) diff --git a/lib/src/metta/runner/builtin_mods/catalog_mods.rs b/lib/src/metta/runner/builtin_mods/catalog_mods.rs index 24f64d175..140859fbd 100644 --- a/lib/src/metta/runner/builtin_mods/catalog_mods.rs +++ b/lib/src/metta/runner/builtin_mods/catalog_mods.rs @@ -125,9 +125,9 @@ impl Grounded for CatalogListOp { } let mut found_one = false; - if cat_name == "all" || cat_name == "git-modules" { - if let Some(explicit_git_catalog) = &self.metta.environment().explicit_git_mods { - list_catalog(explicit_git_catalog); + if cat_name == "all" || cat_name == "specified-mods" { + if let Some(specified_mods) = &self.metta.environment().specified_mods { + list_catalog(specified_mods); found_one = true; } } @@ -187,9 +187,9 @@ impl Grounded for CatalogUpdateOp { }; let mut found_one = false; - if cat_name == "all" || cat_name == "git-modules" { - if let Some(explicit_git_catalog) = &self.metta.environment().explicit_git_mods { - explicit_git_catalog.fetch_newest_for_all(UpdateMode::FetchLatest)?; + if cat_name == "all" || cat_name == "specified-mods" { + if let Some(specified_mods) = &self.metta.environment().specified_mods { + specified_mods.fetch_newest_for_all(UpdateMode::FetchLatest)?; found_one = true; } } @@ -253,9 +253,9 @@ impl Grounded for CatalogClearOp { }; let mut found_one = false; - if cat_name == "all" || cat_name == "git-modules" { - if let Some(explicit_git_catalog) = &self.metta.environment().explicit_git_mods { - explicit_git_catalog.clear_all()?; + if cat_name == "all" || cat_name == "specified-mods" { + if let Some(specified_mods) = &self.metta.environment().specified_mods { + specified_mods.clear_all()?; found_one = true; } } diff --git a/lib/src/metta/runner/environment.rs b/lib/src/metta/runner/environment.rs index 42c2a9252..b276fe46e 100644 --- a/lib/src/metta/runner/environment.rs +++ b/lib/src/metta/runner/environment.rs @@ -26,9 +26,9 @@ pub struct Environment { catalogs: Vec>, #[cfg(feature = "pkg_mgmt")] pub(crate) fs_mod_formats: Arc>>, - /// The store for modules loaded from git by explicit URL + /// The store for modules cached locally after loading from a specific location, for example, via git. #[cfg(feature = "pkg_mgmt")] - pub(crate) explicit_git_mods: Option, + pub(crate) specified_mods: Option, } const DEFAULT_INIT_METTA: &[u8] = include_bytes!("init.default.metta"); @@ -99,7 +99,7 @@ impl Environment { #[cfg(feature = "pkg_mgmt")] fs_mod_formats: Arc::new(vec![]), #[cfg(feature = "pkg_mgmt")] - explicit_git_mods: None, + specified_mods: None, } } } @@ -362,11 +362,11 @@ impl EnvBuilder { //If we have a caches dir to cache modules locally then register remote catalogs if let Some(caches_dir) = &env.caches_dir { - //Setup the explicit_git_mods managed catalog to hold mods fetched by explicit URL - let mut explicit_git_mods = LocalCatalog::new(caches_dir, "git-modules").unwrap(); - let git_mod_catalog = GitCatalog::new_without_source_repo(caches_dir, env.fs_mod_formats.clone(), "git-modules").unwrap(); - explicit_git_mods.push_upstream_catalog(Box::new(git_mod_catalog)); - env.explicit_git_mods = Some(explicit_git_mods); + //Setup the specified_mods managed catalog to hold mods fetched by explicit means + let mut specified_mods = LocalCatalog::new(caches_dir, "specified-mods").unwrap(); + let git_mod_catalog = GitCatalog::new_without_source_repo(caches_dir, env.fs_mod_formats.clone(), "specified-mods").unwrap(); + specified_mods.push_upstream_catalog(Box::new(git_mod_catalog)); + env.specified_mods = Some(specified_mods); } } diff --git a/lib/src/metta/runner/pkg_mgmt/catalog.rs b/lib/src/metta/runner/pkg_mgmt/catalog.rs index 716fb10bb..818cd4644 100644 --- a/lib/src/metta/runner/pkg_mgmt/catalog.rs +++ b/lib/src/metta/runner/pkg_mgmt/catalog.rs @@ -329,8 +329,8 @@ pub(crate) fn resolve_module(pkg_info: Option<&PkgInfo>, context: &RunContext, n //Get the module if it's specified with git keys if entry.git_location.get_url().is_some() { - match context.metta.environment().explicit_git_mods.as_ref() { - Some(explicit_git_catalog) => if let Some(pair) = explicit_git_catalog.loader_for_explicit_git_module(mod_name, UpdateMode::FetchIfMissing, &entry.git_location)? { + match context.metta.environment().specified_mods.as_ref() { + Some(specified_mods) => if let Some(pair) = specified_mods.loader_for_explicit_git_module(mod_name, UpdateMode::FetchIfMissing, &entry.git_location)? { return Ok(Some(pair)); }, None => return Err(format!("Unable to pull module \"{mod_name}\" from git; no local \"caches\" directory available")) diff --git a/lib/src/metta/runner/pkg_mgmt/managed_catalog.rs b/lib/src/metta/runner/pkg_mgmt/managed_catalog.rs index 624d68192..406c435b4 100644 --- a/lib/src/metta/runner/pkg_mgmt/managed_catalog.rs +++ b/lib/src/metta/runner/pkg_mgmt/managed_catalog.rs @@ -7,8 +7,8 @@ use git_catalog::{GitCatalog, ModuleGitLocation}; use crate::metta::runner::*; use crate::metta::runner::pkg_mgmt::*; -/// An interface to facilitate explicit management of a catalog, usually as a local mirror -/// of one or more remote catalogs used by a user to insulate them from upstream changes +/// An interface to facilitate direct programatic management of a catalog, usually as a local +/// mirror of one or more remote catalogs used by a user to insulate them from upstream changes // //NOTE FOR THE FUTURE: There are two major problems with this `fetch_newest_for_all` // interface. diff --git a/lib/src/metta/runner/stdlib.rs b/lib/src/metta/runner/stdlib.rs index 9886ae6d5..15036423d 100644 --- a/lib/src/metta/runner/stdlib.rs +++ b/lib/src/metta/runner/stdlib.rs @@ -1079,8 +1079,8 @@ pub(crate) mod pkg_mgmt_ops { let git_mod_location = ModuleGitLocation::new(url.to_string()); - match context.metta.environment().explicit_git_mods.as_ref() { - Some(explicit_git_catalog) => if let Some((loader, descriptor)) = explicit_git_catalog.loader_for_explicit_git_module(&mod_name, UpdateMode::TryFetchLatest, &git_mod_location)? { + match context.metta.environment().specified_mods.as_ref() { + Some(specified_mods) => if let Some((loader, descriptor)) = specified_mods.loader_for_explicit_git_module(&mod_name, UpdateMode::TryFetchLatest, &git_mod_location)? { context.get_or_init_module_with_descriptor(&mod_name, descriptor, loader).map_err(|e| ExecError::from(e))?; }, None => return Err(ExecError::from(format!("Unable to pull module \"{mod_name}\" from git; no local \"caches\" directory available")))