Skip to content

Commit

Permalink
Move from InMemory to OnDisk data
Browse files Browse the repository at this point in the history
  • Loading branch information
Old-Shatterhand committed Oct 24, 2024
1 parent b2dba11 commit 663b14a
Showing 1 changed file with 4 additions and 5 deletions.
9 changes: 4 additions & 5 deletions gifflar/data/datasets.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,13 +5,13 @@
import numpy as np
import pandas as pd
import torch
from torch_geometric.data import InMemoryDataset, HeteroData
from torch_geometric.data import InMemoryDataset, HeteroData, OnDiskDataset
from tqdm import tqdm

from gifflar.data.utils import GlycanStorage


class GlycanDataset(InMemoryDataset):
class GlycanDataset(OnDiskDataset):
def __init__(
self,
root: str | Path,
Expand All @@ -36,8 +36,8 @@ def __init__(
"""
self.filename = Path(filename)
self.dataset_args = dataset_args
super().__init__(root=str(Path(root) / f"{self.filename.stem}_{hash_code}"), transform=transform,
pre_transform=pre_transform)
self.pre_transform = pre_transform
super().__init__(root=str(Path(root) / f"{self.filename.stem}_{hash_code}"), transform=transform)
self.data, self.dataset_args = torch.load(self.processed_paths[path_idx])

def __len__(self) -> int:
Expand Down Expand Up @@ -68,7 +68,6 @@ def process_(self, data: list[HeteroData], path_idx: int = 0) -> None:
if self.pre_filter is not None:
data = [d for d in data if self.pre_filter(d)]
if self.pre_transform is not None:
# data = [self.pre_transform(d) for d in data]
data = self.pre_transform(data)

torch.save((data, self.dataset_args), self.processed_paths[path_idx])
Expand Down

0 comments on commit 663b14a

Please sign in to comment.