Skip to content

Commit

Permalink
Remove MLAB-heavy tasks
Browse files Browse the repository at this point in the history
  • Loading branch information
danesherbs committed Mar 19, 2024
1 parent 64794ea commit f4bde4e
Show file tree
Hide file tree
Showing 8 changed files with 9 additions and 66 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -11,4 +11,9 @@ fi
find "$start_directory" -type f -name 'requirements.txt' | while read -r file; do
echo "Installing requirements from: $file"
pip install -r "$file"

if [[ $? -ne 0 ]]; then
echo "Error: Failed to install requirements from $file"
exit 1
fi
done
3 changes: 0 additions & 3 deletions evals/registry/data/ml_agent_bench/babylm.jsonl

This file was deleted.

3 changes: 0 additions & 3 deletions evals/registry/data/ml_agent_bench/clrs.jsonl

This file was deleted.

3 changes: 0 additions & 3 deletions evals/registry/data/ml_agent_bench/fathomnet/fathomnet.jsonl

This file was deleted.

This file was deleted.

3 changes: 0 additions & 3 deletions evals/registry/data/ml_agent_bench/llama-inference.jsonl

This file was deleted.

1 change: 0 additions & 1 deletion evals/registry/eval_sets/ml-agent-bench.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,6 @@ ml-agent-bench:
- ml-agent-bench.humanoid
- ml-agent-bench.imdb
- ml-agent-bench.inverted-pendulum
- ml-agent-bench.llama-inference
- ml-agent-bench.ogbn-arxiv
- ml-agent-bench.parkinsons-disease
- ml-agent-bench.pong
Expand Down
54 changes: 4 additions & 50 deletions evals/registry/evals/ml-agent-bench.yaml
Original file line number Diff line number Diff line change
@@ -1,12 +1,3 @@
ml-agent-bench.babylm:
id: ml-agent-bench.babylm.v0
metrics:
[model_score, naive_baseline_score, human_baseline_score, model_score_normalized, naive_baseline_score_normalized, human_baseline_score_normalized, model_score_humanrelative]
ml-agent-bench.babylm.v0:
class: evals.elsuite.ml_agent_bench.eval:MLAgentBench
args:
samples_jsonl: ml_agent_bench/babylm.jsonl

ml-agent-bench.ant:
id: ml-agent-bench.ant.gpu.v0
metrics:
Expand All @@ -24,6 +15,10 @@ ml-agent-bench.cifar10:
id: ml-agent-bench.cifar10.v0
metrics:
[model_score, naive_baseline_score, human_baseline_score, model_score_normalized, naive_baseline_score_normalized, human_baseline_score_normalized, model_score_humanrelative]
ml-agent-bench.cifar10.v0:
class: evals.elsuite.ml_agent_bench.eval:MLAgentBench
args:
samples_jsonl: ml_agent_bench/cifar10.jsonl

ml-agent-bench.bipedal-walker:
id: ml-agent-bench.bipedal-walker.v0
Expand All @@ -43,29 +38,6 @@ ml-agent-bench.cartpole.v0:
args:
samples_jsonl: ml_agent_bench/cartpole.jsonl

ml-agent-bench.cifar10.v0:
class: evals.elsuite.ml_agent_bench.eval:MLAgentBench
args:
samples_jsonl: ml_agent_bench/cifar10.jsonl

ml-agent-bench.clrs:
id: ml-agent-bench.clrs.v0
metrics:
[model_score, naive_baseline_score, human_baseline_score, model_score_normalized, naive_baseline_score_normalized, human_baseline_score_normalized, model_score_humanrelative]
ml-agent-bench.clrs.v0:
class: evals.elsuite.ml_agent_bench.eval:MLAgentBench
args:
samples_jsonl: ml_agent_bench/clrs.jsonl

ml-agent-bench.fathomnet:
id: ml-agent-bench.fathomnet.v0
metrics:
[model_score, naive_baseline_score, human_baseline_score, model_score_normalized, naive_baseline_score_normalized, human_baseline_score_normalized, model_score_humanrelative]
ml-agent-bench.fathomnet.v0:
class: evals.elsuite.ml_agent_bench.eval:MLAgentBench
args:
samples_jsonl: ml_agent_bench/fathomnet/fathomnet.jsonl

ml-agent-bench.feedback:
id: ml-agent-bench.feedback.v0
metrics:
Expand Down Expand Up @@ -97,15 +69,6 @@ ml-agent-bench.humanoid.gpu.v0:
args:
samples_jsonl: ml_agent_bench/humanoid/gpu.jsonl

ml-agent-bench.identify-contrails:
id: ml-agent-bench.identify-contrails.v0
metrics:
[model_score, naive_baseline_score, human_baseline_score, model_score_normalized, naive_baseline_score_normalized, human_baseline_score_normalized, model_score_humanrelative]
ml-agent-bench.identify-contrails.v0:
class: evals.elsuite.ml_agent_bench.eval:MLAgentBench
args:
samples_jsonl: ml_agent_bench/identify_contrails/identify-contrails.jsonl

ml-agent-bench.imdb:
id: ml-agent-bench.imdb.v0
metrics:
Expand Down Expand Up @@ -133,15 +96,6 @@ ml-agent-bench.parkinsons-disease.v0:
args:
samples_jsonl: ml_agent_bench/parkinsons_disease/parkinsons-disease.jsonl

ml-agent-bench.llama-inference:
id: ml-agent-bench.llama-inference.v0
metrics:
[model_score, naive_baseline_score, human_baseline_score, model_score_normalized, naive_baseline_score_normalized, human_baseline_score_normalized, model_score_humanrelative]
ml-agent-bench.llama-inference.v0:
class: evals.elsuite.ml_agent_bench.eval:MLAgentBench
args:
samples_jsonl: ml_agent_bench/llama-inference.jsonl

ml-agent-bench.ogbn-arxiv:
id: ml-agent-bench.ogbn-arxiv.v0
metrics:
Expand Down

0 comments on commit f4bde4e

Please sign in to comment.