Skip to content

Commit

Permalink
add debug
Browse files Browse the repository at this point in the history
  • Loading branch information
laszewsk committed Oct 4, 2023
1 parent 71ce9ee commit 7a9833c
Show file tree
Hide file tree
Showing 5 changed files with 33 additions and 476 deletions.
17 changes: 10 additions & 7 deletions benchmarks/cloudmask/target/greene_v0.5/a.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ submission:
platform: rivanna
accelerators_per_node: 1


experiment:
# card_name: v100
card_name: v100
Expand All @@ -57,8 +58,9 @@ experiment:
early_stoppage_patience: "25"
early_stoppage: "False"

identifier: "{experiment.card_name}-{experiment.early_stoppage}-{experiment.early_stoppage_patience}-{experiment.epoch}-{experiment.learningrate}-{experiment.repeat}"

identifier: "{experiment.card_name}-{experiment.early_stoppage}-{experiment.early_stoppage_patience}-{experiment.epoch}-{experiment.learning_rate}-{experiment.repeat}"


#system:
# host: "rivanna"
# python: "3.10.8"
Expand All @@ -84,9 +86,7 @@ train_dir: "/scratch/{os.USER}/data/one-day"
# Inference data
inference_dir: "/scratch/{os.USER}/data/ssts"

# Model file
model_file: "{os.TARGET}/outputs/slstr_cloud/cloudModel-{identifier}.h5"


# training
training_loss: binary_crossentropy
training_metrics: accuracy
Expand All @@ -95,11 +95,14 @@ training_metrics: accuracy
# Output directory
output_dir: "{os.TARGET}/outputs/slstr_cloud"

# Model file
model_file: "{output_dir}/cloudModel-{identifier}.h5"

# Log file for recording runtimes
log_file: ./cloudmask_final_1.log
log_file: "{output_dir}/cloudmask_final_{identifier}.log"

# Log file for MLCommons logging
mlperf_logfile: ./mlperf_cloudmask_final_1.log
mlperf_logfile: "{output_dir}/mlperf_cloudmask_final_{identifier}.log"

# Size of each patch to feed to the network
PATCH_SIZE: 256
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ submission:
platform: rivanna
accelerators_per_node: 1


experiment:
# card_name: v100
card_name: v100
Expand All @@ -57,8 +58,9 @@ experiment:
early_stoppage_patience: "25"
early_stoppage: "False"

identifier: "{experiment.card_name}-{experiment.early_stoppage}-{experiment.early_stoppage_patience}-{experiment.epoch}-{experiment.learningrate}-{experiment.repeat}"

identifier: "{experiment.card_name}-{experiment.early_stoppage}-{experiment.early_stoppage_patience}-{experiment.epoch}-{experiment.learning_rate}-{experiment.repeat}"


#system:
# host: "rivanna"
# python: "3.10.8"
Expand Down
18 changes: 18 additions & 0 deletions benchmarks/cloudmask/target/greene_v0.5/d.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
from cloudmesh.common.FlatDict import FlatDict
from cloudmesh.common.util import banner
import os
from pprint import pprint

configYamlFile = os.path.expanduser("a.yaml")

print("Config file:", configYamlFile)

config = FlatDict()
config.load(content=configYamlFile)

s = str(config)



print(type(config))
pprint(config.__dict__)
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,6 @@ cd $TARGET

cms gpu watch --gpu=0 --delay=0.5 --dense > outputs/gpu0.log &

python ./slstr_uva_nyu_cloud.py --config config_simple_rivanna.yaml
python ./cloudmask_v0.5.py --config config_simple_rivanna.yaml

seff $SLURM_JOB_ID
Loading

0 comments on commit 7a9833c

Please sign in to comment.