From c9b632cd7879481b07c788a3447014f47d47f5a6 Mon Sep 17 00:00:00 2001 From: Roman Joeres Date: Mon, 19 Aug 2024 11:53:52 +0200 Subject: [PATCH] Setup for clean run --- configs/downstream/all.yaml | 79 +++++++++++----------- configs/downstream/both.yaml | 39 +++++++++++ configs/downstream/dl.yaml | 21 ------ configs/downstream/gnngly.yaml | 32 ++------- configs/downstream/{dev.yaml => lppe.yaml} | 11 +-- configs/downstream/rwpe.yaml | 37 ++++++++++ gifflar/baselines/gnngly.py | 6 +- gifflar/train.py | 2 +- 8 files changed, 133 insertions(+), 94 deletions(-) create mode 100644 configs/downstream/both.yaml delete mode 100644 configs/downstream/dl.yaml rename configs/downstream/{dev.yaml => lppe.yaml} (88%) create mode 100644 configs/downstream/rwpe.yaml diff --git a/configs/downstream/all.yaml b/configs/downstream/all.yaml index 7916acf..98f1fbd 100644 --- a/configs/downstream/all.yaml +++ b/configs/downstream/all.yaml @@ -1,65 +1,66 @@ seed: 42 data_dir: /scratch/SCRATCH_SAS/roman/Gothenburg/GIFFLAR/ root_dir: /scratch/SCRATCH_SAS/roman/Gothenburg/GIFFLAR/data +logs_dir: logs datasets: - - name: Immunogenicity - task: classification - - name: Glycosylation - task: classification + #- name: Immunogenicity + # task: classification + #- name: Glycosylation + # task: classification - name: Taxonomy_Domain task: multilabel - - name: Taxonomy_Phylum - task: multilabel - - name: Taxonomy_Class - task: multilabel - - name: Taxonomy_Order - task: multilabel - - name: Taxonomy_Family - task: multilabel - - name: Taxonomy_Genus - task: multilabel - - name: Taxonomy_Species - task: multilabel + #- name: Taxonomy_Phylum + # task: multilabel + #- name: Taxonomy_Class + # task: multilabel + #- name: Taxonomy_Order + # task: multilabel + #- name: Taxonomy_Family + # task: multilabel + #- name: Taxonomy_Genus + # task: multilabel + #- name: Taxonomy_Species + # task: multilabel pre-transforms: model: - - name: rf - n_estimators: 500 - n_jobs: -1 - random_state: 42 - - name: svm - random_state: 42 - - name: xgb - random_state: 42 + #- name: rf + # n_estimators: 500 + # n_jobs: -1 + # random_state: 42 + #- name: svm + # random_state: 42 + #- name: xgb + # random_state: 42 - name: mlp hidden_dim: 1024 batch_size: 256 num_layers: 3 - epochs: 100 + epochs: 1 patience: 30 learning_rate: 0 optimizer: Adam - name: sweetnet - hidden_dim: 768 - batch_size: 256 - epochs: 50 + hidden_dim: 1024 + batch_size: 512 + epochs: 1 patience: 30 learning_rate: 0.001 optimizer: Adam - suffix: _768 + suffix: - name: gnngly - hidden_dim: 14 - batch_size: 256 + hidden_dim: 1024 + batch_size: 512 num_layers: 5 - epochs: 200 + epochs: 1 patience: 30 - learning_rate: 0 + learning_rate: 0.001 optimizer: Adam - suffix: _5_14 + suffix: - name: gifflar - hidden_dim: 768 - batch_size: 32 - num_layers: 6 - epochs: 100 + hidden_dim: 1024 + batch_size: 512 + num_layers: 8 + epochs: 1 learning_rate: 0.001 optimizer: Adam - suffix: _768_6 + suffix: diff --git a/configs/downstream/both.yaml b/configs/downstream/both.yaml new file mode 100644 index 0000000..b9c6c54 --- /dev/null +++ b/configs/downstream/both.yaml @@ -0,0 +1,39 @@ +seed: 42 +data_dir: /scratch/SCRATCH_SAS/roman/Gothenburg/GIFFLAR/ +root_dir: /scratch/SCRATCH_SAS/roman/Gothenburg/GIFFLAR/data +logs_dir: logs +datasets: + - name: Immunogenicity + task: classification + - name: Glycosylation + task: classification + - name: Taxonomy_Domain + task: multilabel + - name: Taxonomy_Kingdom + task: multilabel + - name: Taxonomy_Phylum + task: multilabel + - name: Taxonomy_Class + task: multilabel + - name: Taxonomy_Order + task: multilabel + - name: Taxonomy_Family + task: multilabel + - name: Taxonomy_Genus + task: multilabel + - name: Taxonomy_Species + task: multilabel +pre-transforms: + LaplacianPE: + dim: 20 + RandomWalkPE: + dim: 20 +model: + - name: gifflar + hidden_dim: 1024 + batch_size: 512 + num_layers: 8 + epochs: 100 + learning_rate: 0.001 + optimizer: Adam + suffix: _both diff --git a/configs/downstream/dl.yaml b/configs/downstream/dl.yaml deleted file mode 100644 index 0ccec0b..0000000 --- a/configs/downstream/dl.yaml +++ /dev/null @@ -1,21 +0,0 @@ -seed: 42 -data_dir: /scratch/SCRATCH_SAS/roman/Gothenburg/GIFFLAR/ -root_dir: /scratch/SCRATCH_SAS/roman/Gothenburg/GIFFLAR/data -logs_dir: logs_dl -datasets: - - name: Taxonomy_Phylum - task: multilabel -pre-transforms: - LaplacianPE: - dim: 20 - RandomWalkPE: - dim: 20 -model: - - name: gifflar - hidden_dim: 1024 - batch_size: 256 - num_layers: 8 - epochs: 1 - learning_rate: 0.001 - optimizer: Adam - suffix: _1024_8 diff --git a/configs/downstream/gnngly.yaml b/configs/downstream/gnngly.yaml index 4803b94..d381b90 100644 --- a/configs/downstream/gnngly.yaml +++ b/configs/downstream/gnngly.yaml @@ -1,35 +1,17 @@ seed: 42 data_dir: /scratch/SCRATCH_SAS/roman/Gothenburg/GIFFLAR/ root_dir: /scratch/SCRATCH_SAS/roman/Gothenburg/GIFFLAR/data +logs_dir: logs datasets: - # - name: Immunogenicity - # task: classification - # - name: Glycosylation - # task: classification - # - name: Taxonomy_Domain - # task: multilabel - # - name: Taxonomy_Kingdom - # task: multilabel - # - name: Taxonomy_Phylum - # task: multilabel - # - name: Taxonomy_Class - # task: multilabel - # - name: Taxonomy_Order - # task: multilabel - # - name: Taxonomy_Family - # task: multilabel - # - name: Taxonomy_Genus - # task: multilabel - # - name: Taxonomy_Species - # task: multilabel + - name: Taxonomy_Domain + task: multilabel pre-transforms: model: - name: gnngly - hidden_dim: 14 - batch_size: 256 + hidden_dim: 1024 + batch_size: 512 num_layers: 5 - epochs: 200 + epochs: 1 patience: 30 - learning_rate: 0 + learning_rate: 0.001 optimizer: Adam - suffix: _5_14 diff --git a/configs/downstream/dev.yaml b/configs/downstream/lppe.yaml similarity index 88% rename from configs/downstream/dev.yaml rename to configs/downstream/lppe.yaml index 360fb82..fba5f63 100644 --- a/configs/downstream/dev.yaml +++ b/configs/downstream/lppe.yaml @@ -1,7 +1,7 @@ seed: 42 data_dir: /scratch/SCRATCH_SAS/roman/Gothenburg/GIFFLAR/ root_dir: /scratch/SCRATCH_SAS/roman/Gothenburg/GIFFLAR/data -logs_dir: logs_1024_10 +logs_dir: logs datasets: - name: Immunogenicity task: classification @@ -24,13 +24,14 @@ datasets: - name: Taxonomy_Species task: multilabel pre-transforms: + LaplacianPE: + dim: 20 model: - name: gifflar hidden_dim: 1024 - batch_size: 33 - num_layers: 5 + batch_size: 512 + num_layers: 8 epochs: 100 learning_rate: 0.001 optimizer: Adam - suffix: _1024_5 - + suffix: _lp diff --git a/configs/downstream/rwpe.yaml b/configs/downstream/rwpe.yaml new file mode 100644 index 0000000..7daa5c4 --- /dev/null +++ b/configs/downstream/rwpe.yaml @@ -0,0 +1,37 @@ +seed: 42 +data_dir: /scratch/SCRATCH_SAS/roman/Gothenburg/GIFFLAR/ +root_dir: /scratch/SCRATCH_SAS/roman/Gothenburg/GIFFLAR/data +logs_dir: logs +datasets: + - name: Immunogenicity + task: classification + - name: Glycosylation + task: classification + - name: Taxonomy_Domain + task: multilabel + - name: Taxonomy_Kingdom + task: multilabel + - name: Taxonomy_Phylum + task: multilabel + - name: Taxonomy_Class + task: multilabel + - name: Taxonomy_Order + task: multilabel + - name: Taxonomy_Family + task: multilabel + - name: Taxonomy_Genus + task: multilabel + - name: Taxonomy_Species + task: multilabel +pre-transforms: + RandomWalkPE: + dim: 20 +model: + - name: gifflar + hidden_dim: 1024 + batch_size: 512 + num_layers: 8 + epochs: 100 + learning_rate: 0.001 + optimizer: Adam + suffix: _rw diff --git a/gifflar/baselines/gnngly.py b/gifflar/baselines/gnngly.py index a9f65f4..e78ef74 100644 --- a/gifflar/baselines/gnngly.py +++ b/gifflar/baselines/gnngly.py @@ -31,8 +31,8 @@ def __init__(self, output_dim, task, **kwargs): """ super().__init__(14, output_dim, task) - del self.convs - del self.head + # del self.convs + # del self.head # Five layers of plain graph convolution with a hidden dimension of 14. self.layers = [ @@ -71,7 +71,7 @@ def forward(self, batch): Dict holding the node embeddings, the graph embedding, and the final model prediction """ # Extract atomic graph from the heterogeneous graph - x = batch["gnngly_x"] + x = batch["gnngly_x"].float() batch_ids = batch["gnngly_batch"] edge_index = batch["gnngly_edge_index"] diff --git a/gifflar/train.py b/gifflar/train.py index 41675b5..2d975dc 100644 --- a/gifflar/train.py +++ b/gifflar/train.py @@ -36,7 +36,7 @@ def setup(**kwargs): pre_transform=get_pretransforms(**(kwargs["pre-transforms"] or {})), **data_config, ) data_config["num_classes"] = datamodule.train.dataset_args["num_classes"] - logger = CSVLogger(kwargs["logs_dir"], name=kwargs["model"]["name"] + kwargs["model"].get("suffix", "")) + logger = CSVLogger(kwargs["logs_dir"], name=kwargs["model"]["name"] + (kwargs["model"].get("suffix", None) or "")) kwargs["dataset"]["filepath"] = str(data_config["filepath"]) logger.log_hyperparams(kwargs) metrics = get_metrics(data_config["task"], data_config["num_classes"])