Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Update for backend changes #243

Merged
merged 13 commits into from
Dec 17, 2024
1 change: 1 addition & 0 deletions docs/user-guide/testing.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@
"\n",
"dataset = Dataset(\n",
" type=\"raw\",\n",
" name=\"Important data\",\n",
" owner_group=\"faculty\",\n",
" owner=\"ridcully\",\n",
" principal_investigator=\"Ridcully\",\n",
Expand Down
2 changes: 1 addition & 1 deletion src/scitacean/_base_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -313,7 +313,7 @@ def _model_field_name_of(cls_name: str, name: str) -> str:

Converts snake_case to camelCase and strips leading underscores.
E.g.,
`proposal_id` -> `proposalId`,
`proposal_ids` -> `proposalIds`,
`_created_at` -> `createdAt`,
`_History__id` -> `id`.
"""
Expand Down
176 changes: 126 additions & 50 deletions src/scitacean/_dataset_fields.py

Large diffs are not rendered by default.

8 changes: 4 additions & 4 deletions src/scitacean/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -761,15 +761,15 @@ def query_datasets(

.. code-block:: python

scicat_client.query_datasets({'proposalId': 'abc.123'})
scicat_client.query_datasets({'proposalIds': ['abc.123']})

Get all datasets that belong to proposal ``abc.123``
**and** have name ``"ds name"``: (The name and proposal must match exactly.)

.. code-block:: python

scicat_client.query_datasets({
'proposalId': 'abc.123',
'proposalIds': ['abc.123'],
'datasetName': 'ds name'
})

Expand All @@ -778,7 +778,7 @@ def query_datasets(
.. code-block:: python

scicat_client.query_datasets(
{'proposalId': 'bc.123'},
{'proposalIds': ['bc.123']},
limit=5,
order="creationTime:desc",
)
Expand Down Expand Up @@ -1224,7 +1224,7 @@ def _log_in_via_users_login(
) -> httpx.Response:
# Currently only used for functional accounts.
response = httpx.post(
_url_concat(url, "Users/login"),
_url_concat(url, "auth/login"),
json={"username": username.get_str(), "password": password.get_str()},
timeout=timeout.seconds,
)
Expand Down
4 changes: 3 additions & 1 deletion src/scitacean/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -437,7 +437,7 @@ def make_upload_model(self) -> UploadDerivedDataset | UploadRawDataset:
)
# Datablocks are not included here because they are handled separately
# by make_datablock_upload_models and their own endpoints.
special = ("relationships", "techniques")
special = ("relationships", "techniques", "input_datasets", "used_software")
return model(
numberOfFiles=self.number_of_files,
numberOfFilesArchived=self.number_of_files_archived,
Expand All @@ -450,6 +450,8 @@ def make_upload_model(self) -> UploadDerivedDataset | UploadRawDataset:
relationships=convert_user_to_upload_model( # type: ignore[arg-type]
self.relationships
),
inputDatasets=self.input_datasets or [],
usedSoftware=self.used_software or [],
**{
field.scicat_name: value
for field in self.fields()
Expand Down
70 changes: 55 additions & 15 deletions src/scitacean/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -101,14 +101,14 @@
from .thumbnail import Thumbnail


# TODO remove extra masks after API v4
class DownloadDataset(
BaseModel, masked=("attachments", "datablocks", "history", "origdatablocks")
BaseModel, masked=("history", "proposalId", "sampleId", "instrumentId")
):
contactEmail: str | None = None
creationLocation: str | None = None
creationTime: datetime | None = None
inputDatasets: list[PID] | None = None
investigator: str | None = None
numberOfFilesArchived: NonNegativeInt | None = None
owner: str | None = None
ownerGroup: str | None = None
Expand All @@ -127,7 +127,7 @@ class DownloadDataset(
description: str | None = None
endTime: datetime | None = None
instrumentGroup: str | None = None
instrumentId: str | None = None
instrumentIds: list[str] | None = None
isPublished: bool | None = None
jobLogData: str | None = None
jobParameters: dict[str, Any] | None = None
Expand All @@ -141,12 +141,13 @@ class DownloadDataset(
ownerEmail: str | None = None
packedSize: NonNegativeInt | None = None
pid: PID | None = None
proposalId: str | None = None
proposalIds: list[str] | None = None
relationships: list[DownloadRelationship] | None = None
sampleId: str | None = None
sampleIds: list[str] | None = None
sharedWith: list[str] | None = None
size: NonNegativeInt | None = None
sourceFolderHost: str | None = None
startTime: datetime | None = None
techniques: list[DownloadTechnique] | None = None
updatedAt: datetime | None = None
updatedBy: str | None = None
Expand All @@ -166,6 +167,25 @@ def _validate_emails(cls, value: Any) -> Any:
def _validate_orcids(cls, value: Any) -> Any:
return validate_orcids(value)

# TODO remove after API v4
@pydantic.field_validator("sampleIds", mode="before")
def _validate_sample_ids(cls, value: Any) -> Any:
if value == [None]:
return []
return value

@pydantic.field_validator("proposalIds", mode="before")
def _validate_proposal_ids(cls, value: Any) -> Any:
if value == [None]:
return []
return value

@pydantic.field_validator("instrumentIds", mode="before")
def _validate_instrument_ids(cls, value: Any) -> Any:
if value == [None]:
return []
return value


class UploadDerivedDataset(BaseModel):
contactEmail: str
Expand All @@ -178,6 +198,7 @@ class UploadDerivedDataset(BaseModel):
sourceFolder: RemotePath
type: DatasetType
usedSoftware: list[str]
datasetName: str
accessGroups: list[str] | None = None
classification: str | None = None
comment: str | None = None
Expand All @@ -190,11 +211,11 @@ class UploadDerivedDataset(BaseModel):
keywords: list[str] | None = None
license: str | None = None
scientificMetadata: dict[str, Any] | None = None
datasetName: str | None = None
numberOfFiles: NonNegativeInt | None = None
orcidOfOwner: str | None = None
ownerEmail: str | None = None
packedSize: NonNegativeInt | None = None
proposalId: str | None = None
relationships: list[UploadRelationship] | None = None
sharedWith: list[str] | None = None
size: NonNegativeInt | None = None
Expand All @@ -219,12 +240,16 @@ class UploadRawDataset(BaseModel):
contactEmail: str
creationLocation: str
creationTime: datetime
inputDatasets: list[PID]
numberOfFilesArchived: NonNegativeInt
owner: str
ownerGroup: str
principalInvestigator: str
sourceFolder: RemotePath
type: DatasetType
usedSoftware: list[str]
datasetName: str
investigator: str | None = None
accessGroups: list[str] | None = None
classification: str | None = None
comment: str | None = None
Expand All @@ -235,10 +260,11 @@ class UploadRawDataset(BaseModel):
instrumentGroup: str | None = None
instrumentId: str | None = None
isPublished: bool | None = None
jobLogData: str | None = None
jobParameters: dict[str, Any] | None = None
keywords: list[str] | None = None
license: str | None = None
scientificMetadata: dict[str, Any] | None = None
datasetName: str | None = None
numberOfFiles: NonNegativeInt | None = None
orcidOfOwner: str | None = None
ownerEmail: str | None = None
Expand All @@ -249,9 +275,23 @@ class UploadRawDataset(BaseModel):
sharedWith: list[str] | None = None
size: NonNegativeInt | None = None
sourceFolderHost: str | None = None
startTime: datetime | None = None
techniques: list[UploadTechnique] | None = None
validationStatus: str | None = None

@pydantic.model_validator(mode="before")
@classmethod
def _set_investigator(cls, data: Any) -> Any:
# The model currently has both `investigator` and `principalInvestigator`
# and both are mandatory. Eventually, `investigator` will be removed.
# So make sure we can construct the model if only one is given.
if isinstance(data, dict):
if (inv := data.get("investigator")) is not None:
data.setdefault("principalInvestigator", inv)
elif (pi := data.get("principalInvestigator")) is not None:
data["investigator"] = pi
return data

@pydantic.field_validator("creationTime", "endTime", mode="before")
def _validate_datetime(cls, value: Any) -> Any:
return validate_datetime(value)
Expand Down Expand Up @@ -316,13 +356,13 @@ def download_model_type(cls) -> type[DownloadAttachment]:

class DownloadOrigDatablock(BaseModel):
dataFileList: list[DownloadDataFile] | None = None
datasetId: PID | None = None
size: NonNegativeInt | None = None
id: str | None = pydantic.Field(alias="_id", default=None)
accessGroups: list[str] | None = None
chkAlg: str | None = None
createdAt: datetime | None = None
createdBy: str | None = None
datasetId: PID | None = None
instrumentGroup: str | None = None
isPublished: bool | None = None
ownerGroup: str | None = None
Expand Down Expand Up @@ -472,9 +512,9 @@ def download_model_type(cls) -> type[DownloadRelationship]:


class DownloadHistory(BaseModel):
id: str | None = pydantic.Field(alias="_id", default=None)
id: str | None = None
updatedAt: datetime | None = None
updatedBy: datetime | None = None
updatedBy: str | None = None

@pydantic.field_validator("updatedAt", mode="before")
def _validate_datetime(cls, value: Any) -> Any:
Expand Down Expand Up @@ -764,20 +804,20 @@ def download_model_type(cls) -> type[DownloadRelationship]:

@dataclass(kw_only=True, slots=True)
class History(BaseUserModel):
__id: str | None = None
_id: str | None = None
_updated_at: datetime | None = None
_updated_by: datetime | None = None
_updated_by: str | None = None

@property
def _id(self) -> str | None:
return self.__id
def id(self) -> str | None:
return self._id

@property
def updated_at(self) -> datetime | None:
return self._updated_at

@property
def updated_by(self) -> datetime | None:
def updated_by(self) -> str | None:
return self._updated_by

@classmethod
Expand Down
6 changes: 6 additions & 0 deletions src/scitacean/testing/backend/seed.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@
ownerEmail="PLACE@HOLD.ER",
sourceFolder=RemotePath("/hex/data/123"),
type=DatasetType.RAW,
investigator="Ponder Stibbons",
principalInvestigator="Ponder Stibbons",
creationLocation=SITE,
techniques=[UploadTechnique(pid="DM666", name="dark_magic")],
Expand All @@ -56,6 +57,8 @@
"temperature": {"value": "123", "unit": "K"},
"weight": {"value": "42", "unit": "mg"},
},
usedSoftware=[],
inputDatasets=[],
),
"derived": UploadDerivedDataset(
ownerGroup="PLACEHOLDER",
Expand Down Expand Up @@ -93,9 +96,12 @@
ownerEmail="PLACE@HOLD.ER",
sourceFolder=RemotePath("/hex/secret/stuff"),
type=DatasetType.RAW,
investigator="Mustrum Ridcully",
principalInvestigator="Mustrum Ridcully",
creationLocation=SITE,
techniques=[UploadTechnique(pid="S", name="shoes")],
inputDatasets=[],
usedSoftware=["scitacean"],
),
"partially-broken": model.construct(
UploadDerivedDataset,
Expand Down
7 changes: 7 additions & 0 deletions src/scitacean/testing/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -324,6 +324,13 @@ def _process_dataset(
)
if "techniques" in fields:
fields["techniques"] = list(map(_process_technique, fields["techniques"]))

# TODO remove in API v4
for singular in ("proposalId", "sampleId", "instrumentId"):
if singular in fields:
fields[singular + "s"] = [fields[singular]]
fields.pop("investigator")

return model.construct(
model.DownloadDataset,
_strict_validation=False,
Expand Down
1 change: 1 addition & 0 deletions tests/client/attachment_client_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ def scicat_client(client: Client) -> ScicatClient:
@pytest.fixture
def derived_dataset(scicat_access):
return UploadDerivedDataset(
datasetName="Koelsche Lieder",
contactEmail="black.foess@dom.koelle",
creationTime=parse_date("1995-11-11T11:11:11.000Z"),
owner="bfoess",
Expand Down
1 change: 1 addition & 0 deletions tests/client/datablock_client_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ def scicat_client(client: Client) -> ScicatClient:
@pytest.fixture
def derived_dataset(scicat_access):
return UploadDerivedDataset(
datasetName="Koelsche Lieder",
contactEmail="black.foess@dom.koelle",
creationTime=parse_date("1995-11-11T11:11:11.000Z"),
owner="bfoess",
Expand Down
9 changes: 5 additions & 4 deletions tests/client/dataset_client_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ def scicat_client(client: Client) -> ScicatClient:
@pytest.fixture
def derived_dataset(scicat_access):
return UploadDerivedDataset(
datasetName="Koelsche Lieder",
contactEmail="black.foess@dom.koelle",
creationTime=parse_date("1995-11-11T11:11:11.000Z"),
owner="bfoess",
Expand All @@ -43,7 +44,7 @@ def derived_dataset(scicat_access):
@pytest.mark.parametrize("key", ["raw", "derived"])
def test_get_dataset_model(scicat_client, key):
dset = INITIAL_DATASETS[key]
downloaded = scicat_client.get_dataset_model(dset.pid)
downloaded = scicat_client.get_dataset_model(dset.pid, strict_validation=True)
# The backend may update the dataset after upload.
# We cannot easily predict when that happens.
downloaded.updatedAt = dset.updatedAt
Expand All @@ -57,7 +58,7 @@ def test_get_dataset_model_bad_id(scicat_client):

def test_create_dataset_model(scicat_client, derived_dataset):
finalized = scicat_client.create_dataset_model(derived_dataset)
downloaded = scicat_client.get_dataset_model(finalized.pid)
downloaded = scicat_client.get_dataset_model(finalized.pid, strict_validation=True)
for key, expected in finalized:
# The database populates a number of fields that are None in dset.
# But we don't want to test those here as we don't want to test the database.
Expand All @@ -75,7 +76,7 @@ def test_validate_dataset_model(real_client, require_scicat_backend, derived_dat
def test_get_dataset(client):
dset = INITIAL_DATASETS["raw"]
dblock = INITIAL_ORIG_DATABLOCKS["raw"][0]
downloaded = client.get_dataset(dset.pid)
downloaded = client.get_dataset(dset.pid, strict_validation=True)

assert downloaded.source_folder == dset.sourceFolder
assert downloaded.creation_time == dset.creationTime
Expand All @@ -96,7 +97,7 @@ def test_can_get_public_dataset_without_login(require_scicat_backend, scicat_acc

dset = INITIAL_DATASETS["public"]
dblock = INITIAL_ORIG_DATABLOCKS["public"][0]
downloaded = client.get_dataset(dset.pid)
downloaded = client.get_dataset(dset.pid, strict_validation=True)

assert downloaded.source_folder == dset.sourceFolder
assert downloaded.creation_time == dset.creationTime
Expand Down
Loading
Loading