Skip to content

Commit

Permalink
Merge pull request #475 from NREL/remove_eagle
Browse files Browse the repository at this point in the history
  • Loading branch information
rajeee authored Nov 6, 2024
2 parents 24d6523 + 069d0f5 commit bb444bb
Show file tree
Hide file tree
Showing 21 changed files with 229 additions and 302 deletions.
2 changes: 1 addition & 1 deletion .github/ISSUE_TEMPLATE/bug_report.md
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ log file here
```

**Platform (please complete the following information):**
- Simulation platform: [e.g. Kestrel, Eagle, AWS, local docker; please label with this as well]
- Simulation platform: [e.g. Kestrel, AWS, local docker; please label with this as well]
- BuildStockBatch version, branch, or sha:
- resstock or comstock repo version, branch, or sha:
- Local Desktop OS: [e.g. Windows, Mac, Linux, especially important if running locally]
Expand Down
2 changes: 1 addition & 1 deletion .github/pull_request_template.md
Original file line number Diff line number Diff line change
Expand Up @@ -14,5 +14,5 @@ Not all may apply
- [ ] All other unit and integration tests passing
- [ ] Update validation for project config yaml file changes
- [ ] Update existing documentation
- [ ] Run a small batch run on Kestrel/Eagle to make sure it all works if you made changes that will affect Kestrel/Eagle
- [ ] Run a small batch run on Kestrel to make sure it all works if you made changes that will affect Kestrel
- [ ] Add to the changelog_dev.rst file and propose migration text in the pull request
16 changes: 0 additions & 16 deletions buildstockbatch/eagle.sh

This file was deleted.

33 changes: 0 additions & 33 deletions buildstockbatch/eagle_postprocessing.sh

This file was deleted.

2 changes: 1 addition & 1 deletion buildstockbatch/gcp/gcp.py
Original file line number Diff line number Diff line change
Expand Up @@ -807,7 +807,7 @@ def process_results(self, skip_combine=False, use_dask_cluster=True):
Storage. The BSB implementation tries to write both indirectly (via
`postprocessing.combine_results()`, using `get_fs()`), and directly (through
`upload_results`). Which way the results end up on S3 depends on whether the script was run
via aws.py (indirect write), or locally or Eagle (direct upload).
via aws.py (indirect write), or locally or Kestrel (direct upload).
Here, where writing to GCS is (currently) coupled to running on GCS, the writing
to GCS will happen indirectly (via `postprocessing.combine_results()`), and we don't need to
Expand Down
57 changes: 8 additions & 49 deletions buildstockbatch/hpc.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
"""
buildstockbatch.hpc
~~~~~~~~~~~~~~~
This class contains the object & methods that allow for usage of the library with Eagle and Kestrel
This class contains the object & methods that allow for usage of the library with Kestrel
:author: Noel Merket
:copyright: (c) 2018 by The Alliance for Sustainable Energy
Expand Down Expand Up @@ -732,38 +732,6 @@ def rerun_failed_jobs(self, hipri=False):
self.queue_post_processing(job_ids, hipri=hipri)


class EagleBatch(SlurmBatch):
DEFAULT_SYS_IMAGE_DIR = "/shared-projects/buildstock/singularity_images"
HPC_NAME = "eagle"
CORES_PER_NODE = 36
MIN_SIMS_PER_JOB = 36 * 2
DEFAULT_POSTPROCESSING_NODE_MEMORY_MB = 85248
DEFAULT_NODE_MEMORY_MB = 85248 # standard node on Eagle
DEFAULT_POSTPROCESSING_N_PROCS = 18
DEFAULT_POSTPROCESSING_N_WORKERS = 2

@classmethod
def validate_output_directory_eagle(cls, project_file):
cfg = get_project_configuration(project_file)
output_dir = path_rel_to_file(project_file, cfg["output_directory"])
if not re.match(r"/(lustre/eaglefs/)?(scratch|projects)", output_dir):
raise ValidationError(
f"`output_directory` must be in /scratch or /projects," f" `output_directory` = {output_dir}"
)

@classmethod
def validate_project(cls, project_file):
super(cls, cls).validate_project(project_file)
cls.validate_output_directory_eagle(project_file)
logger.info("Eagle Validation Successful")
return True

@staticmethod
def _queue_jobs_env_vars() -> dict:
env = {"MY_CONDA_ENV": os.environ["CONDA_PREFIX"]}
return env


class KestrelBatch(SlurmBatch):
DEFAULT_SYS_IMAGE_DIR = "/kfs2/shared-projects/buildstock/apptainer_images"
HPC_NAME = "kestrel"
Expand Down Expand Up @@ -824,17 +792,13 @@ def _queue_jobs_env_vars() -> dict:
}


def eagle_cli(argv=sys.argv[1:]):
user_cli(EagleBatch, argv)


def kestrel_cli(argv=sys.argv[1:]):
user_cli(KestrelBatch, argv)


def user_cli(Batch: SlurmBatch, argv: list):
"""
This is the user entry point for running buildstockbatch on Eagle/Kestrel
This is the user entry point for running buildstockbatch on Kestrel
"""
# set up logging, currently based on within-this-file hard-coded config
logging.config.dictConfig(logging_config)
Expand Down Expand Up @@ -916,24 +880,21 @@ def main():
- upload results to Athena (job_array_number == 0 and POSTPROCESS and UPLOADONLY)
The context for the work is deinfed by the project_filename (project .yml file),
which is used to initialize an EagleBatch object.
which is used to initialize an KestrelBatch object.
"""

# set up logging, currently based on within-this-file hard-coded config
logging.config.dictConfig(logging_config)

# only direct script argument is the project .yml file
parser = argparse.ArgumentParser()
parser.add_argument("hpc_name", choices=["eagle", "kestrel"])
parser.add_argument("hpc_name", choices=["kestrel"])
parser.add_argument("project_filename")
args = parser.parse_args()

# initialize the EagleBatch/KestrelBatch object
if args.hpc_name == "eagle":
batch = EagleBatch(args.project_filename)
else:
assert args.hpc_name == "kestrel"
batch = KestrelBatch(args.project_filename)
# initialize the KestrelBatch object
assert args.hpc_name == "kestrel"
batch = KestrelBatch(args.project_filename)
# other arguments/cues about which part of the process we are in are
# encoded in slurm job environment variables
job_array_number = int(os.environ.get("SLURM_ARRAY_TASK_ID", 0))
Expand Down Expand Up @@ -966,9 +927,7 @@ def main():

if __name__ == "__main__":
bsb_cli = os.environ.get("BUILDSTOCKBATCH_CLI")
if bsb_cli == "eagle":
eagle_cli()
elif bsb_cli == "kestrel":
if bsb_cli == "kestrel":
kestrel_cli()
else:
main()
2 changes: 1 addition & 1 deletion buildstockbatch/postprocessing.py
Original file line number Diff line number Diff line change
Expand Up @@ -537,7 +537,7 @@ def combine_results(fs, results_dir, cfg, do_timeseries=True):

# Determine how many files should be in each partition and group the files
parquet_memory = int(
cfg.get("eagle", {}).get("postprocessing", {}).get("parquet_memory_mb", MAX_PARQUET_MEMORY)
cfg.get("kestrel", {}).get("postprocessing", {}).get("parquet_memory_mb", MAX_PARQUET_MEMORY)
)
logger.info(f"Max parquet memory: {parquet_memory} MB")
max_files_per_partition = max(1, math.floor(parquet_memory / (mean_mem / 1e6)))
Expand Down
2 changes: 1 addition & 1 deletion buildstockbatch/schemas/v0.4.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,8 @@ weather_files_path: str(required=False)
weather_files_url: str(required=False)
sampler: include('sampler-spec', required=True)
workflow_generator: include('workflow-generator-spec', required=True)
eagle: include('hpc-spec', required=False)
kestrel: include('hpc-spec', required=False)
eagle: include('hpc-spec', required=False)
gcp: include('gcp-spec', required=False)
aws: include('aws-spec', required=False)
output_directory: regex('^(.*\/)?[a-z][a-z0-9_]*\/?$', required=True)
Expand Down
168 changes: 168 additions & 0 deletions buildstockbatch/schemas/v0.5.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,168 @@
schema_version: enum('0.5')
buildstock_directory: str()
project_directory: str(required=True)
weather_files_path: str(required=False)
weather_files_url: str(required=False)
sampler: include('sampler-spec', required=True)
workflow_generator: include('workflow-generator-spec', required=True)
kestrel: include('hpc-spec', required=False)
gcp: include('gcp-spec', required=False)
aws: include('aws-spec', required=False)
output_directory: regex('^(.*\/)?[a-z][a-z0-9_]*\/?$', required=True)
sys_image_dir: str(required=False)
baseline: include('sim-spec', required=True)
os_version: str(required=True)
os_sha: str(required=True)
max_minutes_per_sim: int(max=1440, required=False)
upgrades: list(include('upgrade-spec'), required=False)
postprocessing: include('postprocessing-spec', required=False)
references: map(required=False)
---
gcp-spec:
# The GCP job ID (for Batch and Cloud Run) pattern is `^[a-z]([a-z0-9-]{0,61}[a-z0-9])?$`.
# For postprocessing job id, we append 3 characters ("-pp"), so this can be up to 60 chars.
job_identifier: regex('^[a-z]([a-z0-9-]{0,58}[a-z0-9])?$', required=True)
project: str(required=True)
region: str(required=True)
service_account: str(required=False)
artifact_registry: include('gcp-ar-spec', required=True)
batch_array_size: num(min=1, max=10000, required=True)
parallelism: num(min=1, max=10000, required=False)
gcs: include('gcs-spec', required=True)
job_environment: include('gcp-job-environment-spec', required=False)
postprocessing_environment: include('gcp-postprocessing_environment-spec', required=False)

gcs-spec:
bucket: str(required=True)
prefix: str(required=True)
upload_chunk_size_mib: num(min=5, max=5000, required=False)

gcp-ar-spec:
repository: str(required=True)

gcp-job-environment-spec:
vcpus: int(min=1, max=224, required=False)
memory_mib: int(min=512, required=False)
boot_disk_mib: int(required=False)
machine_type: str(required=False)
use_spot: bool(required=False)
minutes_per_sim: num(min=0.05, max=480, required=False)

gcp-postprocessing_environment-spec:
# Limits documented at
# https://cloud.google.com/run/docs/configuring/services/memory-limits
# https://cloud.google.com/run/docs/configuring/services/cpu
cpus: int(min=1, max=8, required=False)
memory_mib: int(min=512, max=32768, required=False)

aws-spec:
job_identifier: regex('^[a-zA-Z]\w{,9}$', required=True)
s3: include('s3-aws-postprocessing-spec', required=True)
region: str(required=True)
use_spot: bool(required=False)
spot_bid_percent: num(min=1, max=100, required=False)
batch_array_size: num(min=1, max=10000, required=True)
notifications_email: regex('^[a-zA-Z0-9_.+-]+@[a-zA-Z0-9-]+\.[a-zA-Z0-9-.]+$', name='email', required=True)
dask: include('aws-dask-spec', required=True)
job_environment: include('aws-job-environment', required=False)
tags: map(str(), str(), required=False)

aws-job-environment:
vcpus: int(min=1, max=36, required=False)
memory: int(min=1024, required=False)

aws-dask-spec:
scheduler_cpu: enum(1024, 2048, 4096, 8192, 16384, required=False)
scheduler_memory: int(min=1024, required=False)
worker_cpu: enum(1024, 2048, 4096, 8192, 16384, required=False)
worker_memory: int(min=1024, required=False)
n_workers: int(min=1, required=True)

hpc-spec:
account: str(required=True)
minutes_per_sim: num(min=0.05, max=480, required=True)
n_jobs: int(required=False)
postprocessing: include('hpc-postprocessing-spec', required=False)
sampling: include('sampling-spec', required=False)

hpc-postprocessing-spec:
time: int(required=True)
n_workers: int(min=1, max=32, required=False)
node_memory_mb: int(min=85248, max=751616, required=False)
n_procs: int(min=1, max=36, required=False)
parquet_memory_mb: int(min=100, max=4096, required=False)


sampler-spec:
type: str(required=True)
args: map(key=regex(r'^[a-zA-Z_]\w*$', name='valid variable name'), required=False)

workflow-generator-spec:
type: enum('residential_hpxml', 'commercial_default', required=True)
version: str(required=True)
args: map(key=regex(r'^[a-zA-Z_]\w*$', name='valid variable name'), required=False)

sampling-spec:
time: int(required=True)

sim-spec:
n_buildings_represented: int(required=True)
skip_sims: bool(required=False)
custom_gems: bool(required=False)

upgrade-spec:
upgrade_name: str(required=True)
options: list(include('option-spec'), required=True)
package_apply_logic: include('apply-logic-spec', required=False)
reference_scenario: str(required=False)

option-spec:
option: include('param_option-spec', required=True)
apply_logic: include('apply-logic-spec', required=False)
costs: list(include('cost-spec'), required=False)
lifetime: num(required=False)

param_option-spec: str(exclude=':(){}[]')

apply-logic-spec: >
any(
list(
include('and-spec'),
include('or-spec'),
include('not-spec'),
include('param_option-spec'),
),
include('and-spec'),
include('or-spec'),
include('not-spec'),
include('param_option-spec')
)
or-spec:
or: list(include('apply-logic-spec'))
and-spec:
and: list(include('apply-logic-spec'))
not-spec:
not: any(include('apply-logic-spec'), list(include('apply-logic-spec')))

cost-spec:
value: num(required=True)
multiplier: str(required=True)

postprocessing-spec:
partition_columns: list(str(), required=False)
aws: include('aws-postprocessing-spec', required=False)
keep_individual_timeseries: bool(required=False)

aws-postprocessing-spec:
region_name: str(required=False)
s3: include('s3-aws-postprocessing-spec', required=True)
athena: include('athena-aws-postprocessing-spec', required=False)

s3-aws-postprocessing-spec:
bucket: str(required=True)
prefix: str(required=True)

athena-aws-postprocessing-spec:
glue_service_role: str(required=False)
database_name: regex('^[a-z][a-z0-9_]*$', required=True)
max_crawling_time: num(requried=False)
2 changes: 1 addition & 1 deletion buildstockbatch/test/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
def basic_residential_project_file():
with tempfile.TemporaryDirectory() as test_directory:

def _basic_residential_project_file(update_args={}, raw=False, hpc_name="eagle"):
def _basic_residential_project_file(update_args={}, raw=False, hpc_name="kestrel"):
output_dir = "simulations_job0" if raw else "simulation_output"
buildstock_directory = os.path.join(test_directory, "openstudio_buildstock")
shutil.copytree(
Expand Down
Loading

0 comments on commit bb444bb

Please sign in to comment.