From c67bf19eea1230258b1935ad34522589c8b9ad79 Mon Sep 17 00:00:00 2001 From: Noel Merket Date: Fri, 10 Nov 2023 10:36:27 -0700 Subject: [PATCH 1/5] adding service account credentials --- buildstockbatch/hpc.py | 4 ++++ buildstockbatch/kestrel.sh | 1 + buildstockbatch/kestrel_postprocessing.sh | 3 ++- 3 files changed, 7 insertions(+), 1 deletion(-) diff --git a/buildstockbatch/hpc.py b/buildstockbatch/hpc.py index 652bcf06..4427d06e 100644 --- a/buildstockbatch/hpc.py +++ b/buildstockbatch/hpc.py @@ -654,6 +654,10 @@ def queue_post_processing(self, after_jobids=[], upload_only=False, hipri=False) logger.debug("sbatch: {}".format(line)) def get_dask_client(self): + # Keep this, helpful for debugging on a bigmem node + # from dask.distributed import LocalCluster + # cluster = LocalCluster(local_directory="/tmp/scratch/dask", n_workers=90, memory_limit="16GiB") + # return Client(cluster) return Client(scheduler_file=os.path.join(self.output_dir, "dask_scheduler.json")) def process_results(self, *args, **kwargs): diff --git a/buildstockbatch/kestrel.sh b/buildstockbatch/kestrel.sh index d85858b0..5d4b9422 100644 --- a/buildstockbatch/kestrel.sh +++ b/buildstockbatch/kestrel.sh @@ -12,5 +12,6 @@ df -h module load python apptainer source "$MY_PYTHON_ENV/bin/activate" +source /kfs2/shared-projects/buildstock/aws_credentials.sh time python -u -m buildstockbatch.hpc kestrel "$PROJECTFILE" diff --git a/buildstockbatch/kestrel_postprocessing.sh b/buildstockbatch/kestrel_postprocessing.sh index d90962f9..6c86f5ef 100644 --- a/buildstockbatch/kestrel_postprocessing.sh +++ b/buildstockbatch/kestrel_postprocessing.sh @@ -11,6 +11,7 @@ df -h module load python apptainer source "$MY_PYTHON_ENV/bin/activate" +source /kfs2/shared-projects/buildstock/aws_credentials.sh export POSTPROCESS=1 @@ -29,6 +30,6 @@ pdsh -w $SLURM_JOB_NODELIST_PACK_GROUP_1 "free -h" pdsh -w $SLURM_JOB_NODELIST_PACK_GROUP_1 "df -i; df -h" $MY_PYTHON_ENV/bin/dask scheduler --scheduler-file $SCHEDULER_FILE &> $OUT_DIR/dask_scheduler.out & -pdsh -w $SLURM_JOB_NODELIST_PACK_GROUP_1 "$MY_PYTHON_ENV/bin/dask worker --scheduler-file $SCHEDULER_FILE --local-directory /tmp/scratch/dask --nworkers ${NPROCS} --nthreads 1 --memory-limit ${MEMORY}MB" &> $OUT_DIR/dask_workers.out & +pdsh -w $SLURM_JOB_NODELIST_PACK_GROUP_1 "source /kfs2/shared-projects/buildstock/aws_credentials.sh; $MY_PYTHON_ENV/bin/dask worker --scheduler-file $SCHEDULER_FILE --local-directory /tmp/scratch/dask --nworkers ${NPROCS} --nthreads 1 --memory-limit ${MEMORY}MB" &> $OUT_DIR/dask_workers.out & time python -u -m buildstockbatch.hpc kestrel "$PROJECTFILE" From 9e613e6cd218e6ec9f1b0d1be75ab0d2a22a996f Mon Sep 17 00:00:00 2001 From: Noel Merket Date: Fri, 10 Nov 2023 10:44:58 -0700 Subject: [PATCH 2/5] changelog --- docs/changelog/changelog_dev.rst | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/docs/changelog/changelog_dev.rst b/docs/changelog/changelog_dev.rst index 32db9051..c84f3d1b 100644 --- a/docs/changelog/changelog_dev.rst +++ b/docs/changelog/changelog_dev.rst @@ -28,3 +28,11 @@ Development Changelog :tickets: 313 Add support for NREL's Kestrel supercomputer. + + .. change:: + :tags: general, postprocessing + :pullreq: 414 + :tickets: 412 + + Add support for an AWS service account on Kestrel/Eagle so the user + doesn't have to manage AWS keys. From d1f3eb3aa3fffef73cb9882e83da204ec6c01ffc Mon Sep 17 00:00:00 2001 From: Noel Merket Date: Fri, 10 Nov 2023 10:49:30 -0700 Subject: [PATCH 3/5] updating docs for AWS keys --- docs/changelog/migration_dev.rst | 6 ++++++ docs/installation.rst | 28 ++-------------------------- docs/project_defn.rst | 10 +++++----- 3 files changed, 13 insertions(+), 31 deletions(-) diff --git a/docs/changelog/migration_dev.rst b/docs/changelog/migration_dev.rst index 400b350d..3fa04d07 100644 --- a/docs/changelog/migration_dev.rst +++ b/docs/changelog/migration_dev.rst @@ -59,6 +59,12 @@ Calling buildstockbatch uses the ``buildstock_kestrel`` command line interface is very similar to Eagle. A few of the optional args were renamed in this version for consistency. +AWS Keys on Kestrel and Eagle +============================= + +You no longer need to manage AWS keys on Kestrel or Eagle. A service account has +been created for each and the software knows where to find those keys. + Schema Updates ============== diff --git a/docs/installation.rst b/docs/installation.rst index dc44facb..50cf380a 100644 --- a/docs/installation.rst +++ b/docs/installation.rst @@ -125,7 +125,8 @@ configure your user account with your AWS credentials. This setup only needs to Kestrel ~~~~~~~ -The most common way to run buildstockbatch on Kestrel will be to use a pre-built python environment. This is done as follows: +The most common way to run buildstockbatch on Kestrel will be to use a pre-built +python environment. This is done as follows: :: @@ -193,31 +194,6 @@ You can get a list of installed environments by looking in the envs directory ls /shared-projects/buildstock/envs -.. _aws-user-config-eagle: - -AWS User Configuration -...................... - -To use the automatic upload of processed results to AWS Athena, you'll need to -configure your user account with your AWS credentials. This setup only needs to -be done once. - -First, `ssh into Eagle`_, then -issue the following commands - -:: - - module load conda - source activate /shared-projects/buildstock/envs/awscli - aws configure - -Follow the on screen instructions to enter your AWS credentials. When you are -done: - -:: - - source deactivate - Developer installation ...................... diff --git a/docs/project_defn.rst b/docs/project_defn.rst index 4177ca52..9ff12459 100644 --- a/docs/project_defn.rst +++ b/docs/project_defn.rst @@ -276,12 +276,12 @@ Uploading to AWS Athena BuildStock results can optionally be uploaded to AWS for further analysis using Athena. This process requires appropriate access to an AWS account to be -configured on your machine. You will need to set this up wherever you use buildstockbatch. -If you don't have -keys, consult your AWS administrator to get them set up. +configured on your machine. You will need to set this up wherever you use +buildstockbatch. If you don't have keys, consult your AWS administrator to get +them set up. The appropriate keys are already installed on Eagle and Kestrel, so +no action is required. -* :ref:`Local Docker AWS setup instructions ` -* :ref:`Eagle AWS setup instructions ` +* :ref:`Local AWS setup instructions ` * `Detailed instructions from AWS `_ .. _post-config-opts: From ea057c4dd73d5c56fa90f9e2e47654ed08184949 Mon Sep 17 00:00:00 2001 From: Noel Merket Date: Fri, 10 Nov 2023 12:10:40 -0700 Subject: [PATCH 4/5] adding aws_credentials to eagle --- buildstockbatch/eagle.sh | 1 + buildstockbatch/eagle_postprocessing.sh | 3 ++- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/buildstockbatch/eagle.sh b/buildstockbatch/eagle.sh index 2974ff40..8a5aca08 100644 --- a/buildstockbatch/eagle.sh +++ b/buildstockbatch/eagle.sh @@ -11,5 +11,6 @@ df -h module load conda singularity-container source activate "$MY_CONDA_ENV" +source /shared-projects/buildstock/aws_credentials.sh time python -u -m buildstockbatch.hpc eagle "$PROJECTFILE" diff --git a/buildstockbatch/eagle_postprocessing.sh b/buildstockbatch/eagle_postprocessing.sh index 630dc9e1..26f9b638 100644 --- a/buildstockbatch/eagle_postprocessing.sh +++ b/buildstockbatch/eagle_postprocessing.sh @@ -9,6 +9,7 @@ df -h module load conda singularity-container source activate "$MY_CONDA_ENV" +source /shared-projects/buildstock/aws_credentials.sh export POSTPROCESS=1 @@ -27,6 +28,6 @@ pdsh -w $SLURM_JOB_NODELIST_PACK_GROUP_1 "free -h" pdsh -w $SLURM_JOB_NODELIST_PACK_GROUP_1 "df -i; df -h" $MY_CONDA_ENV/bin/dask scheduler --scheduler-file $SCHEDULER_FILE &> $OUT_DIR/dask_scheduler.out & -pdsh -w $SLURM_JOB_NODELIST_PACK_GROUP_1 "$MY_CONDA_ENV/bin/dask worker --scheduler-file $SCHEDULER_FILE --local-directory /tmp/scratch/dask --nworkers ${NPROCS} --nthreads 1 --memory-limit ${MEMORY}MB" &> $OUT_DIR/dask_workers.out & +pdsh -w $SLURM_JOB_NODELIST_PACK_GROUP_1 "source /shared-projects/buildstock/aws_credentials.sh; $MY_CONDA_ENV/bin/dask worker --scheduler-file $SCHEDULER_FILE --local-directory /tmp/scratch/dask --nworkers ${NPROCS} --nthreads 1 --memory-limit ${MEMORY}MB" &> $OUT_DIR/dask_workers.out & time python -u -m buildstockbatch.hpc eagle "$PROJECTFILE" From fe6ef7a7cb14fbcd52bdf860b00ee7e1fe9e582c Mon Sep 17 00:00:00 2001 From: Noel Merket Date: Fri, 17 Nov 2023 16:46:50 -0700 Subject: [PATCH 5/5] fixing test --- buildstockbatch/test/test_base.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/buildstockbatch/test/test_base.py b/buildstockbatch/test/test_base.py index 658d82a9..47e8725b 100644 --- a/buildstockbatch/test/test_base.py +++ b/buildstockbatch/test/test_base.py @@ -128,7 +128,9 @@ def test_upload_files(mocked_boto3, basic_residential_project_file): } } mocked_glueclient = MagicMock() - mocked_glueclient.get_crawler = MagicMock(return_value={"Crawler": {"State": "READY"}}) + mocked_glueclient.get_crawler = MagicMock( + return_value={"Crawler": {"State": "READY", "LastCrawl": {"Status": "SUCCEEDED"}}} + ) mocked_boto3.client = MagicMock(return_value=mocked_glueclient) mocked_boto3.resource().Bucket().objects.filter.side_effect = [[], ["a", "b", "c"]] project_filename, results_dir = basic_residential_project_file(upload_config)