Merge pull request #475 from NREL/remove_eagle

NREL · Nov 6, 2024 · bb444bb · bb444bb
2 parents 24d6523 + 069d0f5
commit bb444bb
Show file tree

Hide file tree

Showing 21 changed files with 229 additions and 302 deletions.
diff --git a/.github/ISSUE_TEMPLATE/bug_report.md b/.github/ISSUE_TEMPLATE/bug_report.md
@@ -29,7 +29,7 @@ log file here
 ```
 
 **Platform (please complete the following information):**
-- Simulation platform: [e.g. Kestrel, Eagle, AWS, local docker; please label with this as well]
+- Simulation platform: [e.g. Kestrel, AWS, local docker; please label with this as well]
 - BuildStockBatch version, branch, or sha:
 - resstock or comstock repo version, branch, or sha:
 - Local Desktop OS: [e.g. Windows, Mac, Linux, especially important if running locally]

diff --git a/.github/pull_request_template.md b/.github/pull_request_template.md
@@ -14,5 +14,5 @@ Not all may apply
 - [ ] All other unit and integration tests passing
 - [ ] Update validation for project config yaml file changes
 - [ ] Update existing documentation
-- [ ] Run a small batch run on Kestrel/Eagle to make sure it all works if you made changes that will affect Kestrel/Eagle
+- [ ] Run a small batch run on Kestrel to make sure it all works if you made changes that will affect Kestrel
 - [ ] Add to the changelog_dev.rst file and propose migration text in the pull request
diff --git a/buildstockbatch/eagle.sh b/buildstockbatch/eagle.sh
diff --git a/buildstockbatch/eagle_postprocessing.sh b/buildstockbatch/eagle_postprocessing.sh
diff --git a/buildstockbatch/gcp/gcp.py b/buildstockbatch/gcp/gcp.py
@@ -807,7 +807,7 @@ def process_results(self, skip_combine=False, use_dask_cluster=True):
         Storage. The BSB implementation tries to write both indirectly (via
         `postprocessing.combine_results()`, using `get_fs()`), and directly (through
         `upload_results`). Which way the results end up on S3 depends on whether the script was run
-        via aws.py (indirect write), or locally or Eagle (direct upload).
+        via aws.py (indirect write), or locally or Kestrel (direct upload).
 
         Here, where writing to GCS is (currently) coupled to running on GCS, the writing
         to GCS will happen indirectly (via `postprocessing.combine_results()`), and we don't need to

diff --git a/buildstockbatch/hpc.py b/buildstockbatch/hpc.py
@@ -3,7 +3,7 @@
 """
 buildstockbatch.hpc
 ~~~~~~~~~~~~~~~
-This class contains the object & methods that allow for usage of the library with Eagle and Kestrel
+This class contains the object & methods that allow for usage of the library with Kestrel
 
 :author: Noel Merket
 :copyright: (c) 2018 by The Alliance for Sustainable Energy
@@ -732,38 +732,6 @@ def rerun_failed_jobs(self, hipri=False):
         self.queue_post_processing(job_ids, hipri=hipri)
 
 
-class EagleBatch(SlurmBatch):
-    DEFAULT_SYS_IMAGE_DIR = "/shared-projects/buildstock/singularity_images"
-    HPC_NAME = "eagle"
-    CORES_PER_NODE = 36
-    MIN_SIMS_PER_JOB = 36 * 2
-    DEFAULT_POSTPROCESSING_NODE_MEMORY_MB = 85248
-    DEFAULT_NODE_MEMORY_MB = 85248  # standard node on Eagle
-    DEFAULT_POSTPROCESSING_N_PROCS = 18
-    DEFAULT_POSTPROCESSING_N_WORKERS = 2
-
-    @classmethod
-    def validate_output_directory_eagle(cls, project_file):
-        cfg = get_project_configuration(project_file)
-        output_dir = path_rel_to_file(project_file, cfg["output_directory"])
-        if not re.match(r"/(lustre/eaglefs/)?(scratch|projects)", output_dir):
-            raise ValidationError(
-                f"`output_directory` must be in /scratch or /projects," f" `output_directory` = {output_dir}"
-            )
-
-    @classmethod
-    def validate_project(cls, project_file):
-        super(cls, cls).validate_project(project_file)
-        cls.validate_output_directory_eagle(project_file)
-        logger.info("Eagle Validation Successful")
-        return True
-
-    @staticmethod
-    def _queue_jobs_env_vars() -> dict:
-        env = {"MY_CONDA_ENV": os.environ["CONDA_PREFIX"]}
-        return env
-
-
 class KestrelBatch(SlurmBatch):
     DEFAULT_SYS_IMAGE_DIR = "/kfs2/shared-projects/buildstock/apptainer_images"
     HPC_NAME = "kestrel"
@@ -824,17 +792,13 @@ def _queue_jobs_env_vars() -> dict:
 }
 
 
-def eagle_cli(argv=sys.argv[1:]):
-    user_cli(EagleBatch, argv)
-
-
 def kestrel_cli(argv=sys.argv[1:]):
     user_cli(KestrelBatch, argv)
 
 
 def user_cli(Batch: SlurmBatch, argv: list):
     """
-    This is the user entry point for running buildstockbatch on Eagle/Kestrel
+    This is the user entry point for running buildstockbatch on Kestrel
     """
     # set up logging, currently based on within-this-file hard-coded config
     logging.config.dictConfig(logging_config)
@@ -916,24 +880,21 @@ def main():
     - upload results to Athena (job_array_number == 0 and POSTPROCESS and UPLOADONLY)
 
     The context for the work is deinfed by the project_filename (project .yml file),
-    which is used to initialize an EagleBatch object.
+    which is used to initialize an KestrelBatch object.
     """
 
     # set up logging, currently based on within-this-file hard-coded config
     logging.config.dictConfig(logging_config)
 
     # only direct script argument is the project .yml file
     parser = argparse.ArgumentParser()
-    parser.add_argument("hpc_name", choices=["eagle", "kestrel"])
+    parser.add_argument("hpc_name", choices=["kestrel"])
     parser.add_argument("project_filename")
     args = parser.parse_args()
 
-    # initialize the EagleBatch/KestrelBatch object
-    if args.hpc_name == "eagle":
-        batch = EagleBatch(args.project_filename)
-    else:
-        assert args.hpc_name == "kestrel"
-        batch = KestrelBatch(args.project_filename)
+    # initialize the KestrelBatch object
+    assert args.hpc_name == "kestrel"
+    batch = KestrelBatch(args.project_filename)
     # other arguments/cues about which part of the process we are in are
     # encoded in slurm job environment variables
     job_array_number = int(os.environ.get("SLURM_ARRAY_TASK_ID", 0))
@@ -966,9 +927,7 @@ def main():
 
 if __name__ == "__main__":
     bsb_cli = os.environ.get("BUILDSTOCKBATCH_CLI")
-    if bsb_cli == "eagle":
-        eagle_cli()
-    elif bsb_cli == "kestrel":
+    if bsb_cli == "kestrel":
         kestrel_cli()
     else:
         main()
diff --git a/buildstockbatch/postprocessing.py b/buildstockbatch/postprocessing.py
@@ -537,7 +537,7 @@ def combine_results(fs, results_dir, cfg, do_timeseries=True):
 
             # Determine how many files should be in each partition and group the files
             parquet_memory = int(
-                cfg.get("eagle", {}).get("postprocessing", {}).get("parquet_memory_mb", MAX_PARQUET_MEMORY)
+                cfg.get("kestrel", {}).get("postprocessing", {}).get("parquet_memory_mb", MAX_PARQUET_MEMORY)
             )
             logger.info(f"Max parquet memory: {parquet_memory} MB")
             max_files_per_partition = max(1, math.floor(parquet_memory / (mean_mem / 1e6)))

diff --git a/buildstockbatch/schemas/v0.4.yaml b/buildstockbatch/schemas/v0.4.yaml
@@ -5,8 +5,8 @@ weather_files_path: str(required=False)
 weather_files_url: str(required=False)
 sampler: include('sampler-spec', required=True)
 workflow_generator: include('workflow-generator-spec', required=True)
-eagle: include('hpc-spec', required=False)
 kestrel: include('hpc-spec', required=False)
+eagle: include('hpc-spec', required=False)
 gcp: include('gcp-spec', required=False)
 aws: include('aws-spec', required=False)
 output_directory: regex('^(.*\/)?[a-z][a-z0-9_]*\/?$', required=True)

diff --git a/buildstockbatch/schemas/v0.5.yaml b/buildstockbatch/schemas/v0.5.yaml
@@ -0,0 +1,168 @@
+schema_version: enum('0.5')
+buildstock_directory: str()
+project_directory: str(required=True)
+weather_files_path: str(required=False)
+weather_files_url: str(required=False)
+sampler: include('sampler-spec', required=True)
+workflow_generator: include('workflow-generator-spec', required=True)
+kestrel: include('hpc-spec', required=False)
+gcp: include('gcp-spec', required=False)
+aws: include('aws-spec', required=False)
+output_directory: regex('^(.*\/)?[a-z][a-z0-9_]*\/?$', required=True)
+sys_image_dir: str(required=False)
+baseline: include('sim-spec', required=True)
+os_version: str(required=True)
+os_sha: str(required=True)
+max_minutes_per_sim: int(max=1440, required=False)
+upgrades: list(include('upgrade-spec'), required=False)
+postprocessing: include('postprocessing-spec', required=False)
+references: map(required=False)
+---
+gcp-spec:
+  # The GCP job ID (for Batch and Cloud Run) pattern is `^[a-z]([a-z0-9-]{0,61}[a-z0-9])?$`.
+  # For postprocessing job id, we append 3 characters ("-pp"), so this can be up to 60 chars.
+  job_identifier: regex('^[a-z]([a-z0-9-]{0,58}[a-z0-9])?$', required=True)
+  project: str(required=True)
+  region: str(required=True)
+  service_account: str(required=False)
+  artifact_registry: include('gcp-ar-spec', required=True)
+  batch_array_size: num(min=1, max=10000, required=True)
+  parallelism: num(min=1, max=10000, required=False)
+  gcs: include('gcs-spec', required=True)
+  job_environment: include('gcp-job-environment-spec', required=False)
+  postprocessing_environment: include('gcp-postprocessing_environment-spec', required=False)
+
+gcs-spec:
+  bucket: str(required=True)
+  prefix: str(required=True)
+  upload_chunk_size_mib: num(min=5, max=5000, required=False)
+
+gcp-ar-spec:
+  repository: str(required=True)
+
+gcp-job-environment-spec:
+  vcpus: int(min=1, max=224, required=False)
+  memory_mib: int(min=512, required=False)
+  boot_disk_mib: int(required=False)
+  machine_type: str(required=False)
+  use_spot: bool(required=False)
+  minutes_per_sim: num(min=0.05, max=480, required=False)
+
+gcp-postprocessing_environment-spec:
+  # Limits documented at
+  # https://cloud.google.com/run/docs/configuring/services/memory-limits
+  # https://cloud.google.com/run/docs/configuring/services/cpu
+  cpus: int(min=1, max=8, required=False)
+  memory_mib: int(min=512, max=32768, required=False)
+
+aws-spec:
+  job_identifier: regex('^[a-zA-Z]\w{,9}$', required=True)
+  s3: include('s3-aws-postprocessing-spec', required=True)
+  region: str(required=True)
+  use_spot: bool(required=False)
+  spot_bid_percent: num(min=1, max=100, required=False)
+  batch_array_size: num(min=1, max=10000, required=True)
+  notifications_email: regex('^[a-zA-Z0-9_.+-]+@[a-zA-Z0-9-]+\.[a-zA-Z0-9-.]+$', name='email', required=True)
+  dask: include('aws-dask-spec', required=True)
+  job_environment: include('aws-job-environment', required=False)
+  tags: map(str(), str(), required=False)
+
+aws-job-environment:
+  vcpus: int(min=1, max=36, required=False)
+  memory: int(min=1024, required=False)
+
+aws-dask-spec:
+  scheduler_cpu: enum(1024, 2048, 4096, 8192, 16384, required=False)
+  scheduler_memory: int(min=1024, required=False)
+  worker_cpu: enum(1024, 2048, 4096, 8192, 16384, required=False)
+  worker_memory: int(min=1024, required=False)
+  n_workers: int(min=1, required=True)
+
+hpc-spec:
+  account: str(required=True)
+  minutes_per_sim: num(min=0.05, max=480, required=True)
+  n_jobs: int(required=False)
+  postprocessing: include('hpc-postprocessing-spec', required=False)
+  sampling: include('sampling-spec', required=False)
+
+hpc-postprocessing-spec:
+  time: int(required=True)
+  n_workers: int(min=1, max=32, required=False)
+  node_memory_mb: int(min=85248, max=751616, required=False)
+  n_procs: int(min=1, max=36, required=False)
+  parquet_memory_mb: int(min=100, max=4096, required=False)
+
+
+sampler-spec:
+  type: str(required=True)
+  args: map(key=regex(r'^[a-zA-Z_]\w*$', name='valid variable name'), required=False)
+
+workflow-generator-spec:
+  type: enum('residential_hpxml', 'commercial_default', required=True)
+  version: str(required=True)
+  args: map(key=regex(r'^[a-zA-Z_]\w*$', name='valid variable name'), required=False)
+
+sampling-spec:
+  time: int(required=True)
+
+sim-spec:
+  n_buildings_represented: int(required=True)
+  skip_sims: bool(required=False)
+  custom_gems: bool(required=False)
+
+upgrade-spec:
+  upgrade_name: str(required=True)
+  options: list(include('option-spec'), required=True)
+  package_apply_logic: include('apply-logic-spec', required=False)
+  reference_scenario: str(required=False)
+
+option-spec:
+  option: include('param_option-spec', required=True)
+  apply_logic: include('apply-logic-spec', required=False)
+  costs: list(include('cost-spec'), required=False)
+  lifetime: num(required=False)
+
+param_option-spec: str(exclude=':(){}[]')
+
+apply-logic-spec: >
+  any(
+    list(
+      include('and-spec'),
+      include('or-spec'),
+      include('not-spec'),
+      include('param_option-spec'),
+    ),
+    include('and-spec'),
+    include('or-spec'),
+    include('not-spec'),
+    include('param_option-spec')
+  )
+or-spec:
+  or: list(include('apply-logic-spec'))
+and-spec:
+  and: list(include('apply-logic-spec'))
+not-spec:
+  not: any(include('apply-logic-spec'), list(include('apply-logic-spec')))
+
+cost-spec:
+  value: num(required=True)
+  multiplier: str(required=True)
+
+postprocessing-spec:
+  partition_columns: list(str(), required=False)
+  aws: include('aws-postprocessing-spec', required=False)
+  keep_individual_timeseries: bool(required=False)
+
+aws-postprocessing-spec:
+  region_name: str(required=False)
+  s3: include('s3-aws-postprocessing-spec', required=True)
+  athena: include('athena-aws-postprocessing-spec', required=False)
+
+s3-aws-postprocessing-spec:
+  bucket: str(required=True)
+  prefix: str(required=True)
+
+athena-aws-postprocessing-spec:
+  glue_service_role: str(required=False)
+  database_name: regex('^[a-z][a-z0-9_]*$', required=True)
+  max_crawling_time: num(requried=False)
diff --git a/buildstockbatch/test/conftest.py b/buildstockbatch/test/conftest.py
@@ -11,7 +11,7 @@
 def basic_residential_project_file():
     with tempfile.TemporaryDirectory() as test_directory:
 
-        def _basic_residential_project_file(update_args={}, raw=False, hpc_name="eagle"):
+        def _basic_residential_project_file(update_args={}, raw=False, hpc_name="kestrel"):
             output_dir = "simulations_job0" if raw else "simulation_output"
             buildstock_directory = os.path.join(test_directory, "openstudio_buildstock")
             shutil.copytree(