From cf9ab294584880d5f4575890a45707c8f8e37931 Mon Sep 17 00:00:00 2001 From: Jan-Lukas Wynen Date: Wed, 30 Oct 2024 13:19:22 +0100 Subject: [PATCH 01/13] Use auth/login endpoint to get token --- src/scitacean/client.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/scitacean/client.py b/src/scitacean/client.py index c0bdcf26..700665e7 100644 --- a/src/scitacean/client.py +++ b/src/scitacean/client.py @@ -1224,7 +1224,7 @@ def _log_in_via_users_login( ) -> httpx.Response: # Currently only used for functional accounts. response = httpx.post( - _url_concat(url, "Users/login"), + _url_concat(url, "auth/login"), json={"username": username.get_str(), "password": password.get_str()}, timeout=timeout.seconds, ) From af1212f526eb2b75fd294f8ff4bf47e462671963 Mon Sep 17 00:00:00 2001 From: Jan-Lukas Wynen Date: Wed, 30 Oct 2024 13:19:54 +0100 Subject: [PATCH 02/13] Experiment: Get new models from SciCat --- src/scitacean/_dataset_fields.py | 170 +++++++++++++----- src/scitacean/model.py | 35 ++-- tests/client/dataset_client_test.py | 8 +- tools/model-generation/README.md | 2 +- tools/model-generation/spec/__init__.py | 4 +- tools/model-generation/spec/masked-fields.yml | 3 - tools/model-generation/spec/schema.py | 8 +- .../model-generation/templates/model.py.jinja | 2 +- 8 files changed, 154 insertions(+), 78 deletions(-) diff --git a/src/scitacean/_dataset_fields.py b/src/scitacean/_dataset_fields.py index 265bda42..51816f7d 100644 --- a/src/scitacean/_dataset_fields.py +++ b/src/scitacean/_dataset_fields.py @@ -109,7 +109,7 @@ def used_by(self, dataset_type: DatasetType) -> bool: ), Field( name="api_version", - description="Version of the API used in creation of the dataset.", + description="Version of the API used when the dataset was created or last updated. API version is defined in code for each release. Managed by the system.", read_only=True, required=False, scicat_name="version", @@ -119,7 +119,7 @@ def used_by(self, dataset_type: DatasetType) -> bool: ), Field( name="classification", - description="ACIA information about AUthenticity,COnfidentiality,INtegrity and AVailability requirements of dataset. E.g. AV(ailabilty)=medium could trigger the creation of a two tape copies. Format 'AV=medium,CO=low'", + description="ACIA information about AUthenticity,COnfidentiality,INtegrity and AVailability requirements of dataset. E.g. AV(ailabilty)=medium could trigger the creation of a two tape copies. Format 'AV=medium,CO=low'. Please check the following post for more info: https://en.wikipedia.org/wiki/Parkerian_Hexad", read_only=False, required=False, scicat_name="classification", @@ -129,7 +129,7 @@ def used_by(self, dataset_type: DatasetType) -> bool: ), Field( name="comment", - description="Comment the user has about a given dataset.", + description="Short comment provided by the user about a given dataset. This is additional to the description field.", read_only=False, required=False, scicat_name="comment", @@ -149,7 +149,7 @@ def used_by(self, dataset_type: DatasetType) -> bool: ), Field( name="created_at", - description="Date and time when this record was created. This property is added and maintained by mongoose.", + description="Date and time when this record was created. This field is managed by mongoose with through the timestamp settings. The field should be a string containing a date in ISO 8601 format (2024-02-27T12:26:57.313Z)", read_only=True, required=False, scicat_name="createdAt", @@ -169,7 +169,7 @@ def used_by(self, dataset_type: DatasetType) -> bool: ), Field( name="creation_location", - description="Unique location identifier where data was taken, usually in the form /Site-name/facility-name/instrumentOrBeamline-name. This field is required if the dataset is a Raw dataset.", + description="Unique location identifier where data was acquired. Usually in the form /Site-name/facility-name/instrumentOrBeamline-name.", read_only=False, required=True, scicat_name="creationLocation", @@ -179,7 +179,7 @@ def used_by(self, dataset_type: DatasetType) -> bool: ), Field( name="creation_time", - description="Time when dataset became fully available on disk, i.e. all containing files have been written. Format according to chapter 5.6 internet date/time format in RFC 3339. Local times without timezone/offset info are automatically transformed to UTC using the timezone of the API server.", + description="Time when dataset became fully available on disk, i.e. all containing files have been written, or the dataset was created in SciCat.
It is expected to be in ISO8601 format according to specifications for internet date/time format in RFC 3339, chapter 5.6 (https://www.rfc-editor.org/rfc/rfc3339#section-5).
Local times without timezone/offset info are automatically transformed to UTC using the timezone of the API server.", read_only=False, required=True, scicat_name="creationTime", @@ -219,7 +219,7 @@ def used_by(self, dataset_type: DatasetType) -> bool: ), Field( name="end_time", - description="End time of data acquisition for this dataset, format according to chapter 5.6 internet date/time format in RFC 3339. Local times without timezone/offset info are automatically transformed to UTC using the timezone of the API server.", + description="End time of data acquisition for the current dataset.
It is expected to be in ISO8601 format according to specifications for internet date/time format in RFC 3339, chapter 5.6 (https://www.rfc-editor.org/rfc/rfc3339#section-5).
Local times without timezone/offset info are automatically transformed to UTC using the timezone of the API server.", read_only=False, required=False, scicat_name="endTime", @@ -229,13 +229,13 @@ def used_by(self, dataset_type: DatasetType) -> bool: ), Field( name="input_datasets", - description="Array of input dataset identifiers used in producing the derived dataset. Ideally these are the global identifier to existing datasets inside this or federated data catalogs. This field is required if the dataset is a Derived dataset.", + description="Array of input dataset identifiers used in producing the derived dataset. Ideally these are the global identifier to existing datasets inside this or federated data catalogs.", read_only=False, required=True, scicat_name="inputDatasets", type=list[PID], used_by_derived=True, - used_by_raw=False, + used_by_raw=True, ), Field( name="instrument_group", @@ -257,15 +257,25 @@ def used_by(self, dataset_type: DatasetType) -> bool: used_by_derived=False, used_by_raw=True, ), + Field( + name="instrument_ids", + description="Id of the instrument or array of IDS of the instruments where the data contained in this dataset was created/acquired.", + read_only=True, + required=False, + scicat_name="instrumentIds", + type=list[str], + used_by_derived=True, + used_by_raw=True, + ), Field( name="investigator", - description="First name and last name of the person or people pursuing the data analysis. The string may contain a list of names, which should then be separated by semicolons.", + description="", read_only=False, required=True, scicat_name="investigator", type=str, used_by_derived=True, - used_by_raw=False, + used_by_raw=True, ), Field( name="is_published", @@ -285,7 +295,7 @@ def used_by(self, dataset_type: DatasetType) -> bool: scicat_name="jobLogData", type=str, used_by_derived=True, - used_by_raw=False, + used_by_raw=True, ), Field( name="job_parameters", @@ -295,7 +305,7 @@ def used_by(self, dataset_type: DatasetType) -> bool: scicat_name="jobParameters", type=dict[str, Any], used_by_derived=True, - used_by_raw=False, + used_by_raw=True, ), Field( name="keywords", @@ -329,7 +339,7 @@ def used_by(self, dataset_type: DatasetType) -> bool: ), Field( name="name", - description="A name for the dataset, given by the creator to carry some semantic meaning. Useful for display purposes e.g. instead of displaying the pid. Will be autofilled if missing using info from sourceFolder.", + description="A name for the dataset, given by the creator to carry some semantic meaning. Useful for display purposes e.g. instead of displaying the pid.", read_only=False, required=False, scicat_name="datasetName", @@ -404,12 +414,22 @@ def used_by(self, dataset_type: DatasetType) -> bool: required=False, scicat_name="proposalId", type=str, - used_by_derived=False, + used_by_derived=True, + used_by_raw=True, + ), + Field( + name="proposal_ids", + description="The ID of the proposal to which the dataset belongs to and it has been acquired under.", + read_only=True, + required=False, + scicat_name="proposalIds", + type=list[str], + used_by_derived=True, used_by_raw=True, ), Field( name="relationships", - description="Stores the relationships with other datasets.", + description="Array of relationships with other datasets. It contains relationship type and destination dataset", read_only=False, required=False, scicat_name="relationships", @@ -427,9 +447,19 @@ def used_by(self, dataset_type: DatasetType) -> bool: used_by_derived=False, used_by_raw=True, ), + Field( + name="sample_ids", + description="Single ID or array of IDS of the samples used when collecting the data.", + read_only=True, + required=False, + scicat_name="sampleIds", + type=list[str], + used_by_derived=True, + used_by_raw=True, + ), Field( name="shared_with", - description="List of users that the dataset has been shared with.", + description="List of additional users that the dataset has been shared with.", read_only=False, required=False, scicat_name="sharedWith", @@ -457,9 +487,19 @@ def used_by(self, dataset_type: DatasetType) -> bool: used_by_derived=True, used_by_raw=True, ), + Field( + name="start_time", + description="Start time of data acquisition for the current dataset.
It is expected to be in ISO8601 format according to specifications for internet date/time format in RFC 3339, chapter 5.6 (https://www.rfc-editor.org/rfc/rfc3339#section-5).
Local times without timezone/offset info are automatically transformed to UTC using the timezone of the API server.", + read_only=False, + required=False, + scicat_name="startTime", + type=datetime, + used_by_derived=False, + used_by_raw=True, + ), Field( name="techniques", - description="Stores the metadata information for techniques.", + description="Array of techniques information, with technique name and pid.", read_only=False, required=False, scicat_name="techniques", @@ -469,7 +509,7 @@ def used_by(self, dataset_type: DatasetType) -> bool: ), Field( name="updated_at", - description="Date and time when this record was updated last. This property is added and maintained by mongoose.", + description="Date and time when this record was updated last. This field is managed by mongoose with through the timestamp settings. The field should be a string containing a date in ISO 8601 format (2024-02-27T12:26:57.313Z)", read_only=True, required=False, scicat_name="updatedAt", @@ -489,13 +529,13 @@ def used_by(self, dataset_type: DatasetType) -> bool: ), Field( name="used_software", - description="A list of links to software repositories which uniquely identifies the pieces of software, including versions, used for yielding the derived data. This field is required if the dataset is a Derived dataset.", + description="A list of links to software repositories which uniquely identifies the pieces of software, including versions, used for yielding the derived data.", read_only=False, required=True, scicat_name="usedSoftware", type=list[str], used_by_derived=True, - used_by_raw=False, + used_by_raw=True, ), Field( name="validation_status", @@ -526,6 +566,7 @@ def used_by(self, dataset_type: DatasetType) -> bool: "_input_datasets", "_instrument_group", "_instrument_id", + "_instrument_ids", "_investigator", "_is_published", "_job_log_data", @@ -541,11 +582,14 @@ def used_by(self, dataset_type: DatasetType) -> bool: "_pid", "_principal_investigator", "_proposal_id", + "_proposal_ids", "_relationships", "_sample_id", + "_sample_ids", "_shared_with", "_source_folder", "_source_folder_host", + "_start_time", "_techniques", "_updated_at", "_updated_by", @@ -592,6 +636,7 @@ def __init__( shared_with: list[str] | None = None, source_folder: RemotePath | str | None = None, source_folder_host: str | None = None, + start_time: datetime | None = None, techniques: list[Technique] | None = None, used_software: list[str] | None = None, validation_status: str | None = None, @@ -630,14 +675,18 @@ def __init__( self._shared_with = shared_with self._source_folder = _parse_remote_path(source_folder) self._source_folder_host = source_folder_host + self._start_time = start_time self._techniques = techniques self._used_software = used_software self._validation_status = validation_status self._api_version = None self._created_at = None self._created_by = None + self._instrument_ids = None self._lifecycle = None self._pid = None + self._proposal_ids = None + self._sample_ids = None self._updated_at = None self._updated_by = None self._meta = meta or {} @@ -659,27 +708,27 @@ def access_groups(self, access_groups: list[str] | None) -> None: @property def api_version(self) -> str | None: - """Version of the API used in creation of the dataset.""" + """Version of the API used when the dataset was created or last updated. API version is defined in code for each release. Managed by the system.""" return self._api_version @property def classification(self) -> str | None: - """ACIA information about AUthenticity,COnfidentiality,INtegrity and AVailability requirements of dataset. E.g. AV(ailabilty)=medium could trigger the creation of a two tape copies. Format 'AV=medium,CO=low'""" + """ACIA information about AUthenticity,COnfidentiality,INtegrity and AVailability requirements of dataset. E.g. AV(ailabilty)=medium could trigger the creation of a two tape copies. Format 'AV=medium,CO=low'. Please check the following post for more info: https://en.wikipedia.org/wiki/Parkerian_Hexad""" return self._classification @classification.setter def classification(self, classification: str | None) -> None: - """ACIA information about AUthenticity,COnfidentiality,INtegrity and AVailability requirements of dataset. E.g. AV(ailabilty)=medium could trigger the creation of a two tape copies. Format 'AV=medium,CO=low'""" + """ACIA information about AUthenticity,COnfidentiality,INtegrity and AVailability requirements of dataset. E.g. AV(ailabilty)=medium could trigger the creation of a two tape copies. Format 'AV=medium,CO=low'. Please check the following post for more info: https://en.wikipedia.org/wiki/Parkerian_Hexad""" self._classification = classification @property def comment(self) -> str | None: - """Comment the user has about a given dataset.""" + """Short comment provided by the user about a given dataset. This is additional to the description field.""" return self._comment @comment.setter def comment(self, comment: str | None) -> None: - """Comment the user has about a given dataset.""" + """Short comment provided by the user about a given dataset. This is additional to the description field.""" self._comment = comment @property @@ -694,7 +743,7 @@ def contact_email(self, contact_email: str | None) -> None: @property def created_at(self) -> datetime | None: - """Date and time when this record was created. This property is added and maintained by mongoose.""" + """Date and time when this record was created. This field is managed by mongoose with through the timestamp settings. The field should be a string containing a date in ISO 8601 format (2024-02-27T12:26:57.313Z)""" return self._created_at @property @@ -704,22 +753,22 @@ def created_by(self) -> str | None: @property def creation_location(self) -> str | None: - """Unique location identifier where data was taken, usually in the form /Site-name/facility-name/instrumentOrBeamline-name. This field is required if the dataset is a Raw dataset.""" + """Unique location identifier where data was acquired. Usually in the form /Site-name/facility-name/instrumentOrBeamline-name.""" return self._creation_location @creation_location.setter def creation_location(self, creation_location: str | None) -> None: - """Unique location identifier where data was taken, usually in the form /Site-name/facility-name/instrumentOrBeamline-name. This field is required if the dataset is a Raw dataset.""" + """Unique location identifier where data was acquired. Usually in the form /Site-name/facility-name/instrumentOrBeamline-name.""" self._creation_location = creation_location @property def creation_time(self) -> datetime | None: - """Time when dataset became fully available on disk, i.e. all containing files have been written. Format according to chapter 5.6 internet date/time format in RFC 3339. Local times without timezone/offset info are automatically transformed to UTC using the timezone of the API server.""" + """Time when dataset became fully available on disk, i.e. all containing files have been written, or the dataset was created in SciCat.
It is expected to be in ISO8601 format according to specifications for internet date/time format in RFC 3339, chapter 5.6 (https://www.rfc-editor.org/rfc/rfc3339#section-5).
Local times without timezone/offset info are automatically transformed to UTC using the timezone of the API server.""" return self._creation_time @creation_time.setter def creation_time(self, creation_time: str | datetime | None) -> None: - """Time when dataset became fully available on disk, i.e. all containing files have been written. Format according to chapter 5.6 internet date/time format in RFC 3339. Local times without timezone/offset info are automatically transformed to UTC using the timezone of the API server.""" + """Time when dataset became fully available on disk, i.e. all containing files have been written, or the dataset was created in SciCat.
It is expected to be in ISO8601 format according to specifications for internet date/time format in RFC 3339, chapter 5.6 (https://www.rfc-editor.org/rfc/rfc3339#section-5).
Local times without timezone/offset info are automatically transformed to UTC using the timezone of the API server.""" self._creation_time = _parse_datetime(creation_time) @property @@ -754,22 +803,22 @@ def description(self, description: str | None) -> None: @property def end_time(self) -> datetime | None: - """End time of data acquisition for this dataset, format according to chapter 5.6 internet date/time format in RFC 3339. Local times without timezone/offset info are automatically transformed to UTC using the timezone of the API server.""" + """End time of data acquisition for the current dataset.
It is expected to be in ISO8601 format according to specifications for internet date/time format in RFC 3339, chapter 5.6 (https://www.rfc-editor.org/rfc/rfc3339#section-5).
Local times without timezone/offset info are automatically transformed to UTC using the timezone of the API server.""" return self._end_time @end_time.setter def end_time(self, end_time: datetime | None) -> None: - """End time of data acquisition for this dataset, format according to chapter 5.6 internet date/time format in RFC 3339. Local times without timezone/offset info are automatically transformed to UTC using the timezone of the API server.""" + """End time of data acquisition for the current dataset.
It is expected to be in ISO8601 format according to specifications for internet date/time format in RFC 3339, chapter 5.6 (https://www.rfc-editor.org/rfc/rfc3339#section-5).
Local times without timezone/offset info are automatically transformed to UTC using the timezone of the API server.""" self._end_time = end_time @property def input_datasets(self) -> list[PID] | None: - """Array of input dataset identifiers used in producing the derived dataset. Ideally these are the global identifier to existing datasets inside this or federated data catalogs. This field is required if the dataset is a Derived dataset.""" + """Array of input dataset identifiers used in producing the derived dataset. Ideally these are the global identifier to existing datasets inside this or federated data catalogs.""" return self._input_datasets @input_datasets.setter def input_datasets(self, input_datasets: list[PID] | None) -> None: - """Array of input dataset identifiers used in producing the derived dataset. Ideally these are the global identifier to existing datasets inside this or federated data catalogs. This field is required if the dataset is a Derived dataset.""" + """Array of input dataset identifiers used in producing the derived dataset. Ideally these are the global identifier to existing datasets inside this or federated data catalogs.""" self._input_datasets = input_datasets @property @@ -792,14 +841,19 @@ def instrument_id(self, instrument_id: str | None) -> None: """ID of the instrument where the data was created.""" self._instrument_id = instrument_id + @property + def instrument_ids(self) -> list[str] | None: + """Id of the instrument or array of IDS of the instruments where the data contained in this dataset was created/acquired.""" + return self._instrument_ids + @property def investigator(self) -> str | None: - """First name and last name of the person or people pursuing the data analysis. The string may contain a list of names, which should then be separated by semicolons.""" + """""" return self._investigator @investigator.setter def investigator(self, investigator: str | None) -> None: - """First name and last name of the person or people pursuing the data analysis. The string may contain a list of names, which should then be separated by semicolons.""" + """""" self._investigator = investigator @property @@ -859,12 +913,12 @@ def lifecycle(self) -> Lifecycle | None: @property def name(self) -> str | None: - """A name for the dataset, given by the creator to carry some semantic meaning. Useful for display purposes e.g. instead of displaying the pid. Will be autofilled if missing using info from sourceFolder.""" + """A name for the dataset, given by the creator to carry some semantic meaning. Useful for display purposes e.g. instead of displaying the pid.""" return self._name @name.setter def name(self, name: str | None) -> None: - """A name for the dataset, given by the creator to carry some semantic meaning. Useful for display purposes e.g. instead of displaying the pid. Will be autofilled if missing using info from sourceFolder.""" + """A name for the dataset, given by the creator to carry some semantic meaning. Useful for display purposes e.g. instead of displaying the pid.""" self._name = name @property @@ -932,14 +986,19 @@ def proposal_id(self, proposal_id: str | None) -> None: """The ID of the proposal to which the dataset belongs.""" self._proposal_id = proposal_id + @property + def proposal_ids(self) -> list[str] | None: + """The ID of the proposal to which the dataset belongs to and it has been acquired under.""" + return self._proposal_ids + @property def relationships(self) -> list[Relationship] | None: - """Stores the relationships with other datasets.""" + """Array of relationships with other datasets. It contains relationship type and destination dataset""" return self._relationships @relationships.setter def relationships(self, relationships: list[Relationship] | None) -> None: - """Stores the relationships with other datasets.""" + """Array of relationships with other datasets. It contains relationship type and destination dataset""" self._relationships = relationships @property @@ -952,14 +1011,19 @@ def sample_id(self, sample_id: str | None) -> None: """ID of the sample used when collecting the data.""" self._sample_id = sample_id + @property + def sample_ids(self) -> list[str] | None: + """Single ID or array of IDS of the samples used when collecting the data.""" + return self._sample_ids + @property def shared_with(self) -> list[str] | None: - """List of users that the dataset has been shared with.""" + """List of additional users that the dataset has been shared with.""" return self._shared_with @shared_with.setter def shared_with(self, shared_with: list[str] | None) -> None: - """List of users that the dataset has been shared with.""" + """List of additional users that the dataset has been shared with.""" self._shared_with = shared_with @property @@ -982,19 +1046,29 @@ def source_folder_host(self, source_folder_host: str | None) -> None: """DNS host name of file server hosting sourceFolder, optionally including a protocol e.g. [protocol://]fileserver1.example.com""" self._source_folder_host = source_folder_host + @property + def start_time(self) -> datetime | None: + """Start time of data acquisition for the current dataset.
It is expected to be in ISO8601 format according to specifications for internet date/time format in RFC 3339, chapter 5.6 (https://www.rfc-editor.org/rfc/rfc3339#section-5).
Local times without timezone/offset info are automatically transformed to UTC using the timezone of the API server.""" + return self._start_time + + @start_time.setter + def start_time(self, start_time: datetime | None) -> None: + """Start time of data acquisition for the current dataset.
It is expected to be in ISO8601 format according to specifications for internet date/time format in RFC 3339, chapter 5.6 (https://www.rfc-editor.org/rfc/rfc3339#section-5).
Local times without timezone/offset info are automatically transformed to UTC using the timezone of the API server.""" + self._start_time = start_time + @property def techniques(self) -> list[Technique] | None: - """Stores the metadata information for techniques.""" + """Array of techniques information, with technique name and pid.""" return self._techniques @techniques.setter def techniques(self, techniques: list[Technique] | None) -> None: - """Stores the metadata information for techniques.""" + """Array of techniques information, with technique name and pid.""" self._techniques = techniques @property def updated_at(self) -> datetime | None: - """Date and time when this record was updated last. This property is added and maintained by mongoose.""" + """Date and time when this record was updated last. This field is managed by mongoose with through the timestamp settings. The field should be a string containing a date in ISO 8601 format (2024-02-27T12:26:57.313Z)""" return self._updated_at @property @@ -1004,12 +1078,12 @@ def updated_by(self) -> str | None: @property def used_software(self) -> list[str] | None: - """A list of links to software repositories which uniquely identifies the pieces of software, including versions, used for yielding the derived data. This field is required if the dataset is a Derived dataset.""" + """A list of links to software repositories which uniquely identifies the pieces of software, including versions, used for yielding the derived data.""" return self._used_software @used_software.setter def used_software(self, used_software: list[str] | None) -> None: - """A list of links to software repositories which uniquely identifies the pieces of software, including versions, used for yielding the derived data. This field is required if the dataset is a Derived dataset.""" + """A list of links to software repositories which uniquely identifies the pieces of software, including versions, used for yielding the derived data.""" self._used_software = used_software @property diff --git a/src/scitacean/model.py b/src/scitacean/model.py index cdc88510..de38c145 100644 --- a/src/scitacean/model.py +++ b/src/scitacean/model.py @@ -101,14 +101,11 @@ from .thumbnail import Thumbnail -class DownloadDataset( - BaseModel, masked=("attachments", "datablocks", "history", "origdatablocks") -): +class DownloadDataset(BaseModel, masked=("history",)): contactEmail: str | None = None creationLocation: str | None = None creationTime: datetime | None = None inputDatasets: list[PID] | None = None - investigator: str | None = None numberOfFilesArchived: NonNegativeInt | None = None owner: str | None = None ownerGroup: str | None = None @@ -127,7 +124,7 @@ class DownloadDataset( description: str | None = None endTime: datetime | None = None instrumentGroup: str | None = None - instrumentId: str | None = None + instrumentIds: list[str] | None = None isPublished: bool | None = None jobLogData: str | None = None jobParameters: dict[str, Any] | None = None @@ -141,12 +138,13 @@ class DownloadDataset( ownerEmail: str | None = None packedSize: NonNegativeInt | None = None pid: PID | None = None - proposalId: str | None = None + proposalIds: list[str] | None = None relationships: list[DownloadRelationship] | None = None - sampleId: str | None = None + sampleIds: list[str] | None = None sharedWith: list[str] | None = None size: NonNegativeInt | None = None sourceFolderHost: str | None = None + startTime: datetime | None = None techniques: list[DownloadTechnique] | None = None updatedAt: datetime | None = None updatedBy: str | None = None @@ -195,6 +193,7 @@ class UploadDerivedDataset(BaseModel): orcidOfOwner: str | None = None ownerEmail: str | None = None packedSize: NonNegativeInt | None = None + proposalId: str | None = None relationships: list[UploadRelationship] | None = None sharedWith: list[str] | None = None size: NonNegativeInt | None = None @@ -219,12 +218,15 @@ class UploadRawDataset(BaseModel): contactEmail: str creationLocation: str creationTime: datetime + inputDatasets: list[PID] + investigator: str numberOfFilesArchived: NonNegativeInt owner: str ownerGroup: str principalInvestigator: str sourceFolder: RemotePath type: DatasetType + usedSoftware: list[str] accessGroups: list[str] | None = None classification: str | None = None comment: str | None = None @@ -235,6 +237,8 @@ class UploadRawDataset(BaseModel): instrumentGroup: str | None = None instrumentId: str | None = None isPublished: bool | None = None + jobLogData: str | None = None + jobParameters: dict[str, Any] | None = None keywords: list[str] | None = None license: str | None = None scientificMetadata: dict[str, Any] | None = None @@ -249,6 +253,7 @@ class UploadRawDataset(BaseModel): sharedWith: list[str] | None = None size: NonNegativeInt | None = None sourceFolderHost: str | None = None + startTime: datetime | None = None techniques: list[UploadTechnique] | None = None validationStatus: str | None = None @@ -316,13 +321,13 @@ def download_model_type(cls) -> type[DownloadAttachment]: class DownloadOrigDatablock(BaseModel): dataFileList: list[DownloadDataFile] | None = None - datasetId: PID | None = None size: NonNegativeInt | None = None id: str | None = pydantic.Field(alias="_id", default=None) accessGroups: list[str] | None = None chkAlg: str | None = None createdAt: datetime | None = None createdBy: str | None = None + datasetId: PID | None = None instrumentGroup: str | None = None isPublished: bool | None = None ownerGroup: str | None = None @@ -472,9 +477,9 @@ def download_model_type(cls) -> type[DownloadRelationship]: class DownloadHistory(BaseModel): - id: str | None = pydantic.Field(alias="_id", default=None) + id: str | None = None updatedAt: datetime | None = None - updatedBy: datetime | None = None + updatedBy: str | None = None @pydantic.field_validator("updatedAt", mode="before") def _validate_datetime(cls, value: Any) -> Any: @@ -764,20 +769,20 @@ def download_model_type(cls) -> type[DownloadRelationship]: @dataclass(kw_only=True, slots=True) class History(BaseUserModel): - __id: str | None = None + _id: str | None = None _updated_at: datetime | None = None - _updated_by: datetime | None = None + _updated_by: str | None = None @property - def _id(self) -> str | None: - return self.__id + def id(self) -> str | None: + return self._id @property def updated_at(self) -> datetime | None: return self._updated_at @property - def updated_by(self) -> datetime | None: + def updated_by(self) -> str | None: return self._updated_by @classmethod diff --git a/tests/client/dataset_client_test.py b/tests/client/dataset_client_test.py index 73f2ab21..d8fb4287 100644 --- a/tests/client/dataset_client_test.py +++ b/tests/client/dataset_client_test.py @@ -43,7 +43,7 @@ def derived_dataset(scicat_access): @pytest.mark.parametrize("key", ["raw", "derived"]) def test_get_dataset_model(scicat_client, key): dset = INITIAL_DATASETS[key] - downloaded = scicat_client.get_dataset_model(dset.pid) + downloaded = scicat_client.get_dataset_model(dset.pid, strict_validation=True) # The backend may update the dataset after upload. # We cannot easily predict when that happens. downloaded.updatedAt = dset.updatedAt @@ -57,7 +57,7 @@ def test_get_dataset_model_bad_id(scicat_client): def test_create_dataset_model(scicat_client, derived_dataset): finalized = scicat_client.create_dataset_model(derived_dataset) - downloaded = scicat_client.get_dataset_model(finalized.pid) + downloaded = scicat_client.get_dataset_model(finalized.pid, strict_validation=True) for key, expected in finalized: # The database populates a number of fields that are None in dset. # But we don't want to test those here as we don't want to test the database. @@ -75,7 +75,7 @@ def test_validate_dataset_model(real_client, require_scicat_backend, derived_dat def test_get_dataset(client): dset = INITIAL_DATASETS["raw"] dblock = INITIAL_ORIG_DATABLOCKS["raw"][0] - downloaded = client.get_dataset(dset.pid) + downloaded = client.get_dataset(dset.pid, strict_validation=True) assert downloaded.source_folder == dset.sourceFolder assert downloaded.creation_time == dset.creationTime @@ -96,7 +96,7 @@ def test_can_get_public_dataset_without_login(require_scicat_backend, scicat_acc dset = INITIAL_DATASETS["public"] dblock = INITIAL_ORIG_DATABLOCKS["public"][0] - downloaded = client.get_dataset(dset.pid) + downloaded = client.get_dataset(dset.pid, strict_validation=True) assert downloaded.source_folder == dset.sourceFolder assert downloaded.creation_time == dset.creationTime diff --git a/tools/model-generation/README.md b/tools/model-generation/README.md index b59d4adb..4a2ecd87 100644 --- a/tools/model-generation/README.md +++ b/tools/model-generation/README.md @@ -24,7 +24,7 @@ python generate_models.py --launch-scicat ``` This overwrites the relevant files in the source directory. -If will clean up the docker resources afterwards. +It will clean up the docker resources afterward. See `generate_models.py` for options to configure the schema URL and output file paths. diff --git a/tools/model-generation/spec/__init__.py b/tools/model-generation/spec/__init__.py index 43b53ac9..e1b53779 100644 --- a/tools/model-generation/spec/__init__.py +++ b/tools/model-generation/spec/__init__.py @@ -160,8 +160,8 @@ def _collect_schemas( ) -> dict[str, _UpDownSchemas | _DatasetSchemas]: return { "Dataset": _DatasetSchemas( - upload_derived=schemas["CreateDerivedDatasetDto"], - upload_raw=schemas["CreateRawDatasetDto"], + upload_derived=schemas["CreateDerivedDatasetObsoleteDto"], + upload_raw=schemas["CreateRawDatasetObsoleteDto"], download=schemas["DatasetClass"], ), **{ diff --git a/tools/model-generation/spec/masked-fields.yml b/tools/model-generation/spec/masked-fields.yml index 1af45ac9..a58a3c11 100644 --- a/tools/model-generation/spec/masked-fields.yml +++ b/tools/model-generation/spec/masked-fields.yml @@ -4,8 +4,5 @@ # what model to mask it in. # Field names must be SciCat names (camelCase). Dataset: - - attachments - - datablocks - history # because history is dropped (see field-validations.yml) - - origdatablocks - datasetlifecycle: upload diff --git a/tools/model-generation/spec/schema.py b/tools/model-generation/spec/schema.py index 0905556a..43aea246 100644 --- a/tools/model-generation/spec/schema.py +++ b/tools/model-generation/spec/schema.py @@ -30,10 +30,10 @@ def parse_field_type(spec: dict[str, Any]): return parse_field_type(spec["allOf"][0]) if "$ref" in spec: return spec["$ref"].rsplit("/", 1)[1] - if "enum" in spec: - if spec["type"] != "string": - raise ValueError(f"Enum fields must have type 'string', got: {spec}") - return "Enum[" + ", ".join(spec["enum"]) + "]" + # if "enum" in spec: + # if spec["type"] != "string": + # raise ValueError(f"Enum fields must have type 'string', got: {spec}") + # return "Enum[" + ", ".join(spec["enum"]) + "]" if spec["type"] == "number": return "int" if spec["type"] == "string": diff --git a/tools/model-generation/templates/model.py.jinja b/tools/model-generation/templates/model.py.jinja index fc247ad9..c03414af 100644 --- a/tools/model-generation/templates/model.py.jinja +++ b/tools/model-generation/templates/model.py.jinja @@ -12,7 +12,7 @@ {% macro mask_keyword(spec, kind) %} {% if kind == "download" and spec.masked_fields_download %} -, masked=({{ spec.masked_fields_download|map("quote")|join(", ") }}) +, masked=({{ spec.masked_fields_download|map("quote")|join(", ") }},) {% endif %} {% endmacro %} From 70cd90cf32021a6449d3fd45cbb8476c9d3a14f6 Mon Sep 17 00:00:00 2001 From: Jan-Lukas Wynen Date: Mon, 11 Nov 2024 15:02:44 +0100 Subject: [PATCH 03/13] Attempt 2 --- src/scitacean/_base_model.py | 2 +- src/scitacean/_dataset_fields.py | 120 +++++++++++++------------ src/scitacean/client.py | 6 +- src/scitacean/dataset.py | 4 +- src/scitacean/model.py | 12 +-- src/scitacean/testing/backend/seed.py | 4 + tests/client/attachment_client_test.py | 2 +- tests/client/query_client_test.py | 28 +++--- tests/dataset_fields_test.py | 3 + tests/dataset_test.py | 51 +++-------- tests/html_repr/html_repr_test.py | 2 +- tests/model_test.py | 6 +- tools/model-generation/spec/schema.py | 4 - 13 files changed, 119 insertions(+), 125 deletions(-) diff --git a/src/scitacean/_base_model.py b/src/scitacean/_base_model.py index b2d20a1f..6b3f0575 100644 --- a/src/scitacean/_base_model.py +++ b/src/scitacean/_base_model.py @@ -313,7 +313,7 @@ def _model_field_name_of(cls_name: str, name: str) -> str: Converts snake_case to camelCase and strips leading underscores. E.g., - `proposal_id` -> `proposalId`, + `proposal_ids` -> `proposalIds`, `_created_at` -> `createdAt`, `_History__id` -> `id`. """ diff --git a/src/scitacean/_dataset_fields.py b/src/scitacean/_dataset_fields.py index 51816f7d..40fdf74d 100644 --- a/src/scitacean/_dataset_fields.py +++ b/src/scitacean/_dataset_fields.py @@ -99,7 +99,7 @@ def used_by(self, dataset_type: DatasetType) -> bool: ), Field( name="access_groups", - description="Optional additional groups which have read access to the data. Users which are members in one of the groups listed here are allowed to access this data. The special group 'public' makes data available to all users.", + description="List of groups which have access to this item.", read_only=False, required=False, scicat_name="accessGroups", @@ -199,7 +199,7 @@ def used_by(self, dataset_type: DatasetType) -> bool: ), Field( name="data_quality_metrics", - description="Data Quality Metrics given by the user to rate the dataset.", + description="Data Quality Metrics is a number given by the user to rate the dataset.", read_only=False, required=False, scicat_name="dataQualityMetrics", @@ -239,7 +239,7 @@ def used_by(self, dataset_type: DatasetType) -> bool: ), Field( name="instrument_group", - description="Optional additional groups which have read and write access to the data. Users which are members in one of the groups listed here are allowed to access this data.", + description="Group of the instrument which this item was acquired on.", read_only=False, required=False, scicat_name="instrumentGroup", @@ -248,12 +248,12 @@ def used_by(self, dataset_type: DatasetType) -> bool: used_by_raw=True, ), Field( - name="instrument_id", + name="instrument_ids", description="ID of the instrument where the data was created.", read_only=False, required=False, - scicat_name="instrumentId", - type=str, + scicat_name="instrumentIds", + type=list[str], used_by_derived=False, used_by_raw=True, ), @@ -379,7 +379,7 @@ def used_by(self, dataset_type: DatasetType) -> bool: ), Field( name="owner_group", - description="Defines the group which owns the data, and therefore has unrestricted access to this data. Usually a pgroup like p12151", + description="Name of the group owning this item.", read_only=False, required=True, scicat_name="ownerGroup", @@ -389,7 +389,7 @@ def used_by(self, dataset_type: DatasetType) -> bool: ), Field( name="pid", - description="Persistent Identifier for datasets derived from UUIDv4 and prepended automatically by site specific PID prefix like 20.500.12345/", + description="Persistent identifier of the dataset.", read_only=True, required=False, scicat_name="pid", @@ -408,20 +408,10 @@ def used_by(self, dataset_type: DatasetType) -> bool: used_by_raw=True, ), Field( - name="proposal_id", + name="proposal_ids", description="The ID of the proposal to which the dataset belongs.", read_only=False, required=False, - scicat_name="proposalId", - type=str, - used_by_derived=True, - used_by_raw=True, - ), - Field( - name="proposal_ids", - description="The ID of the proposal to which the dataset belongs to and it has been acquired under.", - read_only=True, - required=False, scicat_name="proposalIds", type=list[str], used_by_derived=True, @@ -438,12 +428,22 @@ def used_by(self, dataset_type: DatasetType) -> bool: used_by_raw=True, ), Field( - name="sample_id", - description="ID of the sample used when collecting the data.", + name="run_number", + description="Run number assigned by the system to the data acquisition for the current dataset.", read_only=False, required=False, - scicat_name="sampleId", + scicat_name="runNumber", type=str, + used_by_derived=True, + used_by_raw=True, + ), + Field( + name="sample_ids", + description="ID of the sample used when collecting the data.", + read_only=False, + required=False, + scicat_name="sampleIds", + type=list[str], used_by_derived=False, used_by_raw=True, ), @@ -565,7 +565,6 @@ def used_by(self, dataset_type: DatasetType) -> bool: "_end_time", "_input_datasets", "_instrument_group", - "_instrument_id", "_instrument_ids", "_investigator", "_is_published", @@ -581,10 +580,9 @@ def used_by(self, dataset_type: DatasetType) -> bool: "_owner_group", "_pid", "_principal_investigator", - "_proposal_id", "_proposal_ids", "_relationships", - "_sample_id", + "_run_number", "_sample_ids", "_shared_with", "_source_folder", @@ -617,7 +615,7 @@ def __init__( end_time: datetime | None = None, input_datasets: list[PID] | None = None, instrument_group: str | None = None, - instrument_id: str | None = None, + instrument_ids: list[str] | None = None, investigator: str | None = None, is_published: bool | None = None, job_log_data: str | None = None, @@ -630,9 +628,10 @@ def __init__( owner_email: str | None = None, owner_group: str | None = None, principal_investigator: str | None = None, - proposal_id: str | None = None, + proposal_ids: list[str] | None = None, relationships: list[Relationship] | None = None, - sample_id: str | None = None, + run_number: str | None = None, + sample_ids: list[str] | None = None, shared_with: list[str] | None = None, source_folder: RemotePath | str | None = None, source_folder_host: str | None = None, @@ -656,7 +655,7 @@ def __init__( self._end_time = end_time self._input_datasets = input_datasets self._instrument_group = instrument_group - self._instrument_id = instrument_id + self._instrument_ids = instrument_ids self._investigator = investigator self._is_published = is_published self._job_log_data = job_log_data @@ -669,9 +668,10 @@ def __init__( self._owner_email = owner_email self._owner_group = owner_group self._principal_investigator = principal_investigator - self._proposal_id = proposal_id + self._proposal_ids = proposal_ids self._relationships = relationships - self._sample_id = sample_id + self._run_number = run_number + self._sample_ids = sample_ids self._shared_with = shared_with self._source_folder = _parse_remote_path(source_folder) self._source_folder_host = source_folder_host @@ -698,12 +698,12 @@ def __init__( @property def access_groups(self) -> list[str] | None: - """Optional additional groups which have read access to the data. Users which are members in one of the groups listed here are allowed to access this data. The special group 'public' makes data available to all users.""" + """List of groups which have access to this item.""" return self._access_groups @access_groups.setter def access_groups(self, access_groups: list[str] | None) -> None: - """Optional additional groups which have read access to the data. Users which are members in one of the groups listed here are allowed to access this data. The special group 'public' makes data available to all users.""" + """List of groups which have access to this item.""" self._access_groups = access_groups @property @@ -783,12 +783,12 @@ def data_format(self, data_format: str | None) -> None: @property def data_quality_metrics(self) -> int | None: - """Data Quality Metrics given by the user to rate the dataset.""" + """Data Quality Metrics is a number given by the user to rate the dataset.""" return self._data_quality_metrics @data_quality_metrics.setter def data_quality_metrics(self, data_quality_metrics: int | None) -> None: - """Data Quality Metrics given by the user to rate the dataset.""" + """Data Quality Metrics is a number given by the user to rate the dataset.""" self._data_quality_metrics = data_quality_metrics @property @@ -823,23 +823,23 @@ def input_datasets(self, input_datasets: list[PID] | None) -> None: @property def instrument_group(self) -> str | None: - """Optional additional groups which have read and write access to the data. Users which are members in one of the groups listed here are allowed to access this data.""" + """Group of the instrument which this item was acquired on.""" return self._instrument_group @instrument_group.setter def instrument_group(self, instrument_group: str | None) -> None: - """Optional additional groups which have read and write access to the data. Users which are members in one of the groups listed here are allowed to access this data.""" + """Group of the instrument which this item was acquired on.""" self._instrument_group = instrument_group @property - def instrument_id(self) -> str | None: + def instrument_ids(self) -> list[str] | None: """ID of the instrument where the data was created.""" - return self._instrument_id + return self._instrument_ids - @instrument_id.setter - def instrument_id(self, instrument_id: str | None) -> None: + @instrument_ids.setter + def instrument_ids(self, instrument_ids: list[str] | None) -> None: """ID of the instrument where the data was created.""" - self._instrument_id = instrument_id + self._instrument_ids = instrument_ids @property def instrument_ids(self) -> list[str] | None: @@ -953,17 +953,17 @@ def owner_email(self, owner_email: str | None) -> None: @property def owner_group(self) -> str | None: - """Defines the group which owns the data, and therefore has unrestricted access to this data. Usually a pgroup like p12151""" + """Name of the group owning this item.""" return self._owner_group @owner_group.setter def owner_group(self, owner_group: str | None) -> None: - """Defines the group which owns the data, and therefore has unrestricted access to this data. Usually a pgroup like p12151""" + """Name of the group owning this item.""" self._owner_group = owner_group @property def pid(self) -> PID | None: - """Persistent Identifier for datasets derived from UUIDv4 and prepended automatically by site specific PID prefix like 20.500.12345/""" + """Persistent identifier of the dataset.""" return self._pid @property @@ -977,14 +977,14 @@ def principal_investigator(self, principal_investigator: str | None) -> None: self._principal_investigator = principal_investigator @property - def proposal_id(self) -> str | None: + def proposal_ids(self) -> list[str] | None: """The ID of the proposal to which the dataset belongs.""" - return self._proposal_id + return self._proposal_ids - @proposal_id.setter - def proposal_id(self, proposal_id: str | None) -> None: + @proposal_ids.setter + def proposal_ids(self, proposal_ids: list[str] | None) -> None: """The ID of the proposal to which the dataset belongs.""" - self._proposal_id = proposal_id + self._proposal_ids = proposal_ids @property def proposal_ids(self) -> list[str] | None: @@ -1002,14 +1002,24 @@ def relationships(self, relationships: list[Relationship] | None) -> None: self._relationships = relationships @property - def sample_id(self) -> str | None: + def run_number(self) -> str | None: + """Run number assigned by the system to the data acquisition for the current dataset.""" + return self._run_number + + @run_number.setter + def run_number(self, run_number: str | None) -> None: + """Run number assigned by the system to the data acquisition for the current dataset.""" + self._run_number = run_number + + @property + def sample_ids(self) -> list[str] | None: """ID of the sample used when collecting the data.""" - return self._sample_id + return self._sample_ids - @sample_id.setter - def sample_id(self, sample_id: str | None) -> None: + @sample_ids.setter + def sample_ids(self, sample_ids: str | None) -> None: """ID of the sample used when collecting the data.""" - self._sample_id = sample_id + self._sample_ids = sample_ids @property def sample_ids(self) -> list[str] | None: diff --git a/src/scitacean/client.py b/src/scitacean/client.py index 700665e7..06f96e74 100644 --- a/src/scitacean/client.py +++ b/src/scitacean/client.py @@ -761,7 +761,7 @@ def query_datasets( .. code-block:: python - scicat_client.query_datasets({'proposalId': 'abc.123'}) + scicat_client.query_datasets({'proposalIds': ['abc.123']}) Get all datasets that belong to proposal ``abc.123`` **and** have name ``"ds name"``: (The name and proposal must match exactly.) @@ -769,7 +769,7 @@ def query_datasets( .. code-block:: python scicat_client.query_datasets({ - 'proposalId': 'abc.123', + 'proposalIds': ['abc.123'], 'datasetName': 'ds name' }) @@ -778,7 +778,7 @@ def query_datasets( .. code-block:: python scicat_client.query_datasets( - {'proposalId': 'bc.123'}, + {'proposalIds': ['bc.123']}, limit=5, order="creationTime:desc", ) diff --git a/src/scitacean/dataset.py b/src/scitacean/dataset.py index 36430ba6..95bc6026 100644 --- a/src/scitacean/dataset.py +++ b/src/scitacean/dataset.py @@ -437,7 +437,7 @@ def make_upload_model(self) -> UploadDerivedDataset | UploadRawDataset: ) # Datablocks are not included here because they are handled separately # by make_datablock_upload_models and their own endpoints. - special = ("relationships", "techniques") + special = ("relationships", "techniques", "input_datasets", "used_software") return model( numberOfFiles=self.number_of_files, numberOfFilesArchived=self.number_of_files_archived, @@ -450,6 +450,8 @@ def make_upload_model(self) -> UploadDerivedDataset | UploadRawDataset: relationships=convert_user_to_upload_model( # type: ignore[arg-type] self.relationships ), + inputDatasets=self.input_datasets or [], + usedSoftware=self.used_software or [], **{ field.scicat_name: value for field in self.fields() diff --git a/src/scitacean/model.py b/src/scitacean/model.py index de38c145..57286018 100644 --- a/src/scitacean/model.py +++ b/src/scitacean/model.py @@ -140,6 +140,7 @@ class DownloadDataset(BaseModel, masked=("history",)): pid: PID | None = None proposalIds: list[str] | None = None relationships: list[DownloadRelationship] | None = None + runNumber: str | None = None sampleIds: list[str] | None = None sharedWith: list[str] | None = None size: NonNegativeInt | None = None @@ -193,8 +194,9 @@ class UploadDerivedDataset(BaseModel): orcidOfOwner: str | None = None ownerEmail: str | None = None packedSize: NonNegativeInt | None = None - proposalId: str | None = None + proposalIds: list[str] | None = None relationships: list[UploadRelationship] | None = None + runNumber: str | None = None sharedWith: list[str] | None = None size: NonNegativeInt | None = None sourceFolderHost: str | None = None @@ -219,7 +221,6 @@ class UploadRawDataset(BaseModel): creationLocation: str creationTime: datetime inputDatasets: list[PID] - investigator: str numberOfFilesArchived: NonNegativeInt owner: str ownerGroup: str @@ -235,7 +236,7 @@ class UploadRawDataset(BaseModel): description: str | None = None endTime: datetime | None = None instrumentGroup: str | None = None - instrumentId: str | None = None + instrumentIds: list[str] | None = None isPublished: bool | None = None jobLogData: str | None = None jobParameters: dict[str, Any] | None = None @@ -247,9 +248,10 @@ class UploadRawDataset(BaseModel): orcidOfOwner: str | None = None ownerEmail: str | None = None packedSize: NonNegativeInt | None = None - proposalId: str | None = None + proposalIds: list[str] | None = None relationships: list[UploadRelationship] | None = None - sampleId: str | None = None + runNumber: str | None = None + sampleIds: list[str] | None = None sharedWith: list[str] | None = None size: NonNegativeInt | None = None sourceFolderHost: str | None = None diff --git a/src/scitacean/testing/backend/seed.py b/src/scitacean/testing/backend/seed.py index 4d8f188e..eed7ceed 100644 --- a/src/scitacean/testing/backend/seed.py +++ b/src/scitacean/testing/backend/seed.py @@ -56,6 +56,8 @@ "temperature": {"value": "123", "unit": "K"}, "weight": {"value": "42", "unit": "mg"}, }, + usedSoftware=[], + inputDatasets=[], ), "derived": UploadDerivedDataset( ownerGroup="PLACEHOLDER", @@ -96,6 +98,8 @@ principalInvestigator="Mustrum Ridcully", creationLocation=SITE, techniques=[UploadTechnique(pid="S", name="shoes")], + inputDatasets=[], + usedSoftware=["scitacean"], ), "partially-broken": model.construct( UploadDerivedDataset, diff --git a/tests/client/attachment_client_test.py b/tests/client/attachment_client_test.py index c627c0d1..99f575e7 100644 --- a/tests/client/attachment_client_test.py +++ b/tests/client/attachment_client_test.py @@ -120,7 +120,7 @@ def test_create_attachment_for_dataset_for_dataset_populates_ids( assert finalized.id is not None assert finalized.datasetId is not None assert finalized.sampleId is None - assert finalized.proposalId is None + assert finalized.proposalIds is None def test_get_attachments_for_dataset(scicat_client): diff --git a/tests/client/query_client_test.py b/tests/client/query_client_test.py index 243b25e0..1a478351 100644 --- a/tests/client/query_client_test.py +++ b/tests/client/query_client_test.py @@ -22,7 +22,9 @@ type=DatasetType.RAW, principalInvestigator="investigator 1", creationLocation="UU", - proposalId="p0124", + proposalIds=["p0124"], + inputDatasets=[], + usedSoftware=["scitacean"], ), "raw2": model.UploadRawDataset( ownerGroup="PLACEHOLDER", @@ -37,7 +39,9 @@ type=DatasetType.RAW, principalInvestigator="investigator 2", creationLocation="UU", - proposalId="p0124", + proposalIds=["p0124"], + inputDatasets=[], + usedSoftware=[], ), "raw3": model.UploadRawDataset( ownerGroup="PLACEHOLDER", @@ -52,7 +56,9 @@ type=DatasetType.RAW, principalInvestigator="investigator 1", creationLocation="UU", - proposalId="p0124", + proposalIds=["p0124"], + inputDatasets=[], + usedSoftware=["scitacean"], ), "raw4": model.UploadRawDataset( ownerGroup="PLACEHOLDER", @@ -67,6 +73,8 @@ type=DatasetType.RAW, principalInvestigator="investigator X", creationLocation="UU", + inputDatasets=[], + usedSoftware=[], ), "derived1": model.UploadDerivedDataset( ownerGroup="PLACEHOLDER", @@ -118,7 +126,7 @@ def _seed_database(request: pytest.FixtureRequest, scicat_access: SciCatAccess) @pytest.mark.usefixtures("_seed_database") def test_query_dataset_multiple_by_single_field(real_client): - datasets = real_client.scicat.query_datasets({"proposalId": "p0124"}) + datasets = real_client.scicat.query_datasets({"proposalIds": ["p0124"]}) actual = {ds.pid: ds for ds in datasets} expected = {SEED[key].pid: SEED[key] for key in ("raw1", "raw2", "raw3")} assert actual == expected @@ -133,7 +141,7 @@ def test_query_dataset_no_match(real_client): @pytest.mark.usefixtures("_seed_database") def test_query_dataset_multiple_by_multiple_fields(real_client): datasets = real_client.scicat.query_datasets( - {"proposalId": "p0124", "principalInvestigator": "investigator 1"}, + {"proposalIds": ["p0124"], "principalInvestigator": "investigator 1"}, ) actual = {ds.pid: ds for ds in datasets} expected = {SEED[key].pid: SEED[key] for key in ("raw1", "raw3")} @@ -153,7 +161,7 @@ def test_query_dataset_multiple_by_derived_field(real_client): @pytest.mark.usefixtures("_seed_database") def test_query_dataset_uses_conjunction_of_fields(real_client): datasets = real_client.scicat.query_datasets( - {"proposalId": "p0124", "investigator": "investigator X"}, + {"proposalIds": ["p0124"], "investigator": "investigator X"}, ) assert not datasets @@ -170,7 +178,7 @@ def test_query_dataset_can_use_custom_type(real_client): @pytest.mark.usefixtures("_seed_database") def test_query_dataset_set_order(real_client): datasets = real_client.scicat.query_datasets( - {"proposalId": "p0124"}, + {"proposalIds": ["p0124"]}, order="creationTime:desc", ) # This test uses a list to check the order @@ -181,7 +189,7 @@ def test_query_dataset_set_order(real_client): @pytest.mark.usefixtures("_seed_database") def test_query_dataset_limit_ascending_creation_time(real_client): datasets = real_client.scicat.query_datasets( - {"proposalId": "p0124"}, + {"proposalIds": "p0124"}, limit=2, order="creationTime:asc", ) @@ -193,7 +201,7 @@ def test_query_dataset_limit_ascending_creation_time(real_client): @pytest.mark.usefixtures("_seed_database") def test_query_dataset_limit_descending_creation_time(real_client): datasets = real_client.scicat.query_datasets( - {"proposalId": "p0124"}, + {"proposalIds": ["p0124"]}, limit=2, order="creationTime:desc", ) @@ -206,7 +214,7 @@ def test_query_dataset_limit_descending_creation_time(real_client): def test_query_dataset_limit_needs_order(real_client): with pytest.raises(ValueError, match="limit"): real_client.scicat.query_datasets( - {"proposalId": "p0124"}, + {"proposalIds": ["p0124"]}, limit=2, ) diff --git a/tests/dataset_fields_test.py b/tests/dataset_fields_test.py index 84e090bc..9a9b245b 100644 --- a/tests/dataset_fields_test.py +++ b/tests/dataset_fields_test.py @@ -344,6 +344,7 @@ def test_make_raw_model(): source_folder=RemotePath("/hex/source62"), creation_location="ANK/UU", shared_with=["librarian", "hicks"], + used_software=["scitacean"], ) expected = UploadRawDataset( contactEmail="p.stibbons@uu.am", @@ -360,6 +361,8 @@ def test_make_raw_model(): numberOfFilesArchived=0, packedSize=0, size=0, + inputDatasets=[], + usedSoftware=["scitacean"], ) assert dset.make_upload_model() == expected diff --git a/tests/dataset_test.py b/tests/dataset_test.py index bb649419..2a37f67c 100644 --- a/tests/dataset_test.py +++ b/tests/dataset_test.py @@ -43,7 +43,7 @@ def raw_download_model(): description="Some shady data", endTime=parse_datetime("1995-08-03T00:00:00Z"), instrumentGroup="professors", - instrumentId="0000-aa", + instrumentIds=["0000-aa"], isPublished=True, jobLogData=None, jobParameters=None, @@ -55,8 +55,8 @@ def raw_download_model(): ownerEmail="m.ridcully@uu.am", packedSize=0, pid=PID.parse("123.cc/948.f7.2a"), - proposalId="33.dc", - sampleId="bac.a4", + proposalIds=["33.dc"], + sampleIds=["bac.a4"], sharedWith=["librarian"], size=400, sourceFolderHost="ftp://uu.am/data", @@ -112,7 +112,7 @@ def derived_download_model(): description="Dubiously analyzed data", endTime=None, instrumentGroup="professors", - instrumentId=None, + instrumentIds=None, isPublished=True, jobLogData="process interrupted", jobParameters={"nodes": 4}, @@ -124,8 +124,8 @@ def derived_download_model(): ownerEmail="m.ridcully@uu.am", packedSize=0, pid=PID.parse("123.cc/948.f7.2a"), - proposalId=None, - sampleId=None, + proposalIds=None, + sampleIds=None, sharedWith=["librarian"], size=400, sourceFolderHost="ftp://uu.am/data", @@ -797,13 +797,8 @@ def test_derive_removes_attachments(initial, attachments): assert derived.attachments == [] -def invalid_field_example(my_type): - if my_type == DatasetType.DERIVED: - return "data_format", "sth_not_None" - elif my_type == DatasetType.RAW: - return "job_log_data", "sth_not_None" - else: - raise ValueError(my_type, " is not valid DatasetType.") +def invalid_field_example() -> tuple[str, str]: + return "not_a_field", "sth_not_None" @given(initial=sst.datasets(for_upload=True)) @@ -815,22 +810,6 @@ def test_dataset_dict_like_keys_per_type(initial: Dataset) -> None: assert set(initial.keys()) == my_names -@given(initial=sst.datasets(for_upload=True)) -@settings(max_examples=10) -def test_dataset_dict_like_keys_including_invalid_field(initial): - invalid_name, invalid_value = invalid_field_example(initial.type) - - my_names = { - field.name for field in Dataset._FIELD_SPEC if field.used_by(initial.type) - } - assert invalid_name not in my_names - my_names.add(invalid_name) - - setattr(initial, invalid_name, invalid_value) - - assert set(initial.keys()) == my_names - - @given(initial=sst.datasets(for_upload=True)) @settings(max_examples=10) def test_dataset_dict_like_values(initial: Dataset) -> None: @@ -841,7 +820,7 @@ def test_dataset_dict_like_values(initial: Dataset) -> None: @given(initial=sst.datasets(for_upload=True)) @settings(max_examples=10) def test_dataset_dict_like_values_with_invalid_field(initial: Dataset) -> None: - setattr(initial, *invalid_field_example(initial.type)) + setattr(initial, *invalid_field_example()) for key, value in zip(initial.keys(), initial.values(), strict=True): assert value == getattr(initial, key) @@ -849,7 +828,7 @@ def test_dataset_dict_like_values_with_invalid_field(initial: Dataset) -> None: @given(initial=sst.datasets(for_upload=True)) @settings(max_examples=10) def test_dataset_dict_like_items_with_invalid_field(initial: Dataset) -> None: - setattr(initial, *invalid_field_example(initial.type)) + setattr(initial, *invalid_field_example()) for key, value in initial.items(): assert value == getattr(initial, key) @@ -884,16 +863,6 @@ def test_dataset_dict_like_setitem(initial: Dataset) -> None: assert initial["comment"] == sample_comment -@given(initial=sst.datasets(for_upload=True)) -@settings(max_examples=10) -def test_dataset_dict_like_setitem_invalid_field(initial: Dataset) -> None: - # ``__setitem__`` doesn't check if the item is invalid for the current type or not. - invalid_field, invalid_value = invalid_field_example(initial.type) - assert initial[invalid_field] is None - initial[invalid_field] = invalid_value - assert initial[invalid_field] == invalid_value - - @pytest.mark.parametrize( ("is_attr", "wrong_field", "wrong_value"), [(True, "size", 10), (False, "OBVIOUSLYWRONGNAME", "OBVIOUSLYWRONGVALUE")], diff --git a/tests/html_repr/html_repr_test.py b/tests/html_repr/html_repr_test.py index da172ffc..433baa61 100644 --- a/tests/html_repr/html_repr_test.py +++ b/tests/html_repr/html_repr_test.py @@ -13,7 +13,7 @@ def test_dataset_html_repr(): name="My dataset", contact_email="devsci.cat", owner="The People", - instrument_id="the-peoples-neutron-gun", + instrument_ids=["the-peoples-neutron-gun"], used_software=["scitacean"], source_folder=RemotePath("/remote/dir/"), meta={ diff --git a/tests/model_test.py b/tests/model_test.py index 59b3903b..ee33206c 100644 --- a/tests/model_test.py +++ b/tests/model_test.py @@ -200,12 +200,12 @@ def test_raw_dataset_default_values(real_client, require_scicat_backend, scicat_ assert finalized.dataFormat is None assert finalized.description is None assert finalized.endTime is None - assert finalized.instrumentId is None + assert finalized.instrumentIds is None assert finalized.license is None assert finalized.orcidOfOwner is None assert finalized.ownerEmail is None - assert finalized.proposalId is None - assert finalized.sampleId is None + assert finalized.proposalIds is None + assert finalized.sampleIds is None assert finalized.sourceFolderHost is None assert finalized.validationStatus is None assert finalized.version is None diff --git a/tools/model-generation/spec/schema.py b/tools/model-generation/spec/schema.py index 43aea246..9632c0a0 100644 --- a/tools/model-generation/spec/schema.py +++ b/tools/model-generation/spec/schema.py @@ -30,10 +30,6 @@ def parse_field_type(spec: dict[str, Any]): return parse_field_type(spec["allOf"][0]) if "$ref" in spec: return spec["$ref"].rsplit("/", 1)[1] - # if "enum" in spec: - # if spec["type"] != "string": - # raise ValueError(f"Enum fields must have type 'string', got: {spec}") - # return "Enum[" + ", ".join(spec["enum"]) + "]" if spec["type"] == "number": return "int" if spec["type"] == "string": From 361f034cd57f8927c9200196ff90dd531bee937d Mon Sep 17 00:00:00 2001 From: Jan-Lukas Wynen Date: Mon, 16 Dec 2024 11:38:46 +0100 Subject: [PATCH 04/13] Make investigator optional --- src/scitacean/model.py | 25 ++++++++++++++++++------- 1 file changed, 18 insertions(+), 7 deletions(-) diff --git a/src/scitacean/model.py b/src/scitacean/model.py index 57286018..152b2862 100644 --- a/src/scitacean/model.py +++ b/src/scitacean/model.py @@ -140,7 +140,6 @@ class DownloadDataset(BaseModel, masked=("history",)): pid: PID | None = None proposalIds: list[str] | None = None relationships: list[DownloadRelationship] | None = None - runNumber: str | None = None sampleIds: list[str] | None = None sharedWith: list[str] | None = None size: NonNegativeInt | None = None @@ -194,9 +193,8 @@ class UploadDerivedDataset(BaseModel): orcidOfOwner: str | None = None ownerEmail: str | None = None packedSize: NonNegativeInt | None = None - proposalIds: list[str] | None = None + proposalId: str | None = None relationships: list[UploadRelationship] | None = None - runNumber: str | None = None sharedWith: list[str] | None = None size: NonNegativeInt | None = None sourceFolderHost: str | None = None @@ -221,6 +219,7 @@ class UploadRawDataset(BaseModel): creationLocation: str creationTime: datetime inputDatasets: list[PID] + investigator: str | None = None numberOfFilesArchived: NonNegativeInt owner: str ownerGroup: str @@ -236,7 +235,7 @@ class UploadRawDataset(BaseModel): description: str | None = None endTime: datetime | None = None instrumentGroup: str | None = None - instrumentIds: list[str] | None = None + instrumentId: str | None = None isPublished: bool | None = None jobLogData: str | None = None jobParameters: dict[str, Any] | None = None @@ -248,10 +247,9 @@ class UploadRawDataset(BaseModel): orcidOfOwner: str | None = None ownerEmail: str | None = None packedSize: NonNegativeInt | None = None - proposalIds: list[str] | None = None + proposalId: str | None = None relationships: list[UploadRelationship] | None = None - runNumber: str | None = None - sampleIds: list[str] | None = None + sampleId: str | None = None sharedWith: list[str] | None = None size: NonNegativeInt | None = None sourceFolderHost: str | None = None @@ -259,6 +257,19 @@ class UploadRawDataset(BaseModel): techniques: list[UploadTechnique] | None = None validationStatus: str | None = None + @pydantic.model_validator(mode="before") + @classmethod + def _set_investigator(cls, data): + # The model currently has both `investigator` and `principalInvestigator` + # and both are mandatory. Eventually, `investigator` will be removed. + # So make sure we can construct the model if only one is given. + if isinstance(data, dict): + if (inv := data.get("investigator")) is not None: + data.setdefault("principalInvestigator", inv) + elif (pi := data.get("principalInvestigator")) is not None: + data["investigator"] = pi + return data + @pydantic.field_validator("creationTime", "endTime", mode="before") def _validate_datetime(cls, value: Any) -> Any: return validate_datetime(value) From 61efd9e0285457b0697b798081578bfbf98b73e2 Mon Sep 17 00:00:00 2001 From: Jan-Lukas Wynen Date: Mon, 16 Dec 2024 14:11:21 +0100 Subject: [PATCH 05/13] Work around schema inconsistencies --- src/scitacean/_dataset_fields.py | 124 ++++++++---------- src/scitacean/model.py | 2 +- src/scitacean/testing/backend/seed.py | 2 + src/scitacean/testing/client.py | 4 + tests/client/attachment_client_test.py | 2 +- tests/client/dataset_client_test.py | 1 + tests/client/query_client_test.py | 6 +- tests/dataset_test.py | 18 ++- tests/html_repr/html_repr_test.py | 2 +- tests/model_test.py | 1 - .../templates/dataset_fields.py.jinja | 4 +- 11 files changed, 89 insertions(+), 77 deletions(-) diff --git a/src/scitacean/_dataset_fields.py b/src/scitacean/_dataset_fields.py index 40fdf74d..6b994aa4 100644 --- a/src/scitacean/_dataset_fields.py +++ b/src/scitacean/_dataset_fields.py @@ -99,7 +99,7 @@ def used_by(self, dataset_type: DatasetType) -> bool: ), Field( name="access_groups", - description="List of groups which have access to this item.", + description="Optional additional groups which have read access to the data. Users which are members in one of the groups listed here are allowed to access this data. The special group 'public' makes data available to all users.", read_only=False, required=False, scicat_name="accessGroups", @@ -199,7 +199,7 @@ def used_by(self, dataset_type: DatasetType) -> bool: ), Field( name="data_quality_metrics", - description="Data Quality Metrics is a number given by the user to rate the dataset.", + description="Data Quality Metrics given by the user to rate the dataset.", read_only=False, required=False, scicat_name="dataQualityMetrics", @@ -239,7 +239,7 @@ def used_by(self, dataset_type: DatasetType) -> bool: ), Field( name="instrument_group", - description="Group of the instrument which this item was acquired on.", + description="Optional additional groups which have read and write access to the data. Users which are members in one of the groups listed here are allowed to access this data.", read_only=False, required=False, scicat_name="instrumentGroup", @@ -248,12 +248,12 @@ def used_by(self, dataset_type: DatasetType) -> bool: used_by_raw=True, ), Field( - name="instrument_ids", + name="instrument_id", description="ID of the instrument where the data was created.", read_only=False, required=False, - scicat_name="instrumentIds", - type=list[str], + scicat_name="instrumentId", + type=str, used_by_derived=False, used_by_raw=True, ), @@ -379,7 +379,7 @@ def used_by(self, dataset_type: DatasetType) -> bool: ), Field( name="owner_group", - description="Name of the group owning this item.", + description="Defines the group which owns the data, and therefore has unrestricted access to this data. Usually a pgroup like p12151", read_only=False, required=True, scicat_name="ownerGroup", @@ -389,7 +389,7 @@ def used_by(self, dataset_type: DatasetType) -> bool: ), Field( name="pid", - description="Persistent identifier of the dataset.", + description="Persistent Identifier for datasets derived from UUIDv4 and prepended automatically by site specific PID prefix like 20.500.12345/", read_only=True, required=False, scicat_name="pid", @@ -408,10 +408,20 @@ def used_by(self, dataset_type: DatasetType) -> bool: used_by_raw=True, ), Field( - name="proposal_ids", + name="proposal_id", description="The ID of the proposal to which the dataset belongs.", read_only=False, required=False, + scicat_name="proposalId", + type=str, + used_by_derived=True, + used_by_raw=True, + ), + Field( + name="proposal_ids", + description="The ID of the proposal to which the dataset belongs to and it has been acquired under.", + read_only=True, + required=False, scicat_name="proposalIds", type=list[str], used_by_derived=True, @@ -428,22 +438,12 @@ def used_by(self, dataset_type: DatasetType) -> bool: used_by_raw=True, ), Field( - name="run_number", - description="Run number assigned by the system to the data acquisition for the current dataset.", - read_only=False, - required=False, - scicat_name="runNumber", - type=str, - used_by_derived=True, - used_by_raw=True, - ), - Field( - name="sample_ids", + name="sample_id", description="ID of the sample used when collecting the data.", read_only=False, required=False, - scicat_name="sampleIds", - type=list[str], + scicat_name="sampleId", + type=str, used_by_derived=False, used_by_raw=True, ), @@ -565,6 +565,7 @@ def used_by(self, dataset_type: DatasetType) -> bool: "_end_time", "_input_datasets", "_instrument_group", + "_instrument_id", "_instrument_ids", "_investigator", "_is_published", @@ -580,9 +581,10 @@ def used_by(self, dataset_type: DatasetType) -> bool: "_owner_group", "_pid", "_principal_investigator", + "_proposal_id", "_proposal_ids", "_relationships", - "_run_number", + "_sample_id", "_sample_ids", "_shared_with", "_source_folder", @@ -615,7 +617,7 @@ def __init__( end_time: datetime | None = None, input_datasets: list[PID] | None = None, instrument_group: str | None = None, - instrument_ids: list[str] | None = None, + instrument_id: str | None = None, investigator: str | None = None, is_published: bool | None = None, job_log_data: str | None = None, @@ -628,10 +630,9 @@ def __init__( owner_email: str | None = None, owner_group: str | None = None, principal_investigator: str | None = None, - proposal_ids: list[str] | None = None, + proposal_id: str | None = None, relationships: list[Relationship] | None = None, - run_number: str | None = None, - sample_ids: list[str] | None = None, + sample_id: str | None = None, shared_with: list[str] | None = None, source_folder: RemotePath | str | None = None, source_folder_host: str | None = None, @@ -655,7 +656,7 @@ def __init__( self._end_time = end_time self._input_datasets = input_datasets self._instrument_group = instrument_group - self._instrument_ids = instrument_ids + self._instrument_id = instrument_id self._investigator = investigator self._is_published = is_published self._job_log_data = job_log_data @@ -668,10 +669,9 @@ def __init__( self._owner_email = owner_email self._owner_group = owner_group self._principal_investigator = principal_investigator - self._proposal_ids = proposal_ids + self._proposal_id = proposal_id self._relationships = relationships - self._run_number = run_number - self._sample_ids = sample_ids + self._sample_id = sample_id self._shared_with = shared_with self._source_folder = _parse_remote_path(source_folder) self._source_folder_host = source_folder_host @@ -698,12 +698,12 @@ def __init__( @property def access_groups(self) -> list[str] | None: - """List of groups which have access to this item.""" + """Optional additional groups which have read access to the data. Users which are members in one of the groups listed here are allowed to access this data. The special group 'public' makes data available to all users.""" return self._access_groups @access_groups.setter def access_groups(self, access_groups: list[str] | None) -> None: - """List of groups which have access to this item.""" + """Optional additional groups which have read access to the data. Users which are members in one of the groups listed here are allowed to access this data. The special group 'public' makes data available to all users.""" self._access_groups = access_groups @property @@ -783,12 +783,12 @@ def data_format(self, data_format: str | None) -> None: @property def data_quality_metrics(self) -> int | None: - """Data Quality Metrics is a number given by the user to rate the dataset.""" + """Data Quality Metrics given by the user to rate the dataset.""" return self._data_quality_metrics @data_quality_metrics.setter def data_quality_metrics(self, data_quality_metrics: int | None) -> None: - """Data Quality Metrics is a number given by the user to rate the dataset.""" + """Data Quality Metrics given by the user to rate the dataset.""" self._data_quality_metrics = data_quality_metrics @property @@ -823,23 +823,23 @@ def input_datasets(self, input_datasets: list[PID] | None) -> None: @property def instrument_group(self) -> str | None: - """Group of the instrument which this item was acquired on.""" + """Optional additional groups which have read and write access to the data. Users which are members in one of the groups listed here are allowed to access this data.""" return self._instrument_group @instrument_group.setter def instrument_group(self, instrument_group: str | None) -> None: - """Group of the instrument which this item was acquired on.""" + """Optional additional groups which have read and write access to the data. Users which are members in one of the groups listed here are allowed to access this data.""" self._instrument_group = instrument_group @property - def instrument_ids(self) -> list[str] | None: + def instrument_id(self) -> str | None: """ID of the instrument where the data was created.""" - return self._instrument_ids + return self._instrument_id - @instrument_ids.setter - def instrument_ids(self, instrument_ids: list[str] | None) -> None: + @instrument_id.setter + def instrument_id(self, instrument_id: str | None) -> None: """ID of the instrument where the data was created.""" - self._instrument_ids = instrument_ids + self._instrument_id = instrument_id @property def instrument_ids(self) -> list[str] | None: @@ -953,17 +953,17 @@ def owner_email(self, owner_email: str | None) -> None: @property def owner_group(self) -> str | None: - """Name of the group owning this item.""" + """Defines the group which owns the data, and therefore has unrestricted access to this data. Usually a pgroup like p12151""" return self._owner_group @owner_group.setter def owner_group(self, owner_group: str | None) -> None: - """Name of the group owning this item.""" + """Defines the group which owns the data, and therefore has unrestricted access to this data. Usually a pgroup like p12151""" self._owner_group = owner_group @property def pid(self) -> PID | None: - """Persistent identifier of the dataset.""" + """Persistent Identifier for datasets derived from UUIDv4 and prepended automatically by site specific PID prefix like 20.500.12345/""" return self._pid @property @@ -977,14 +977,14 @@ def principal_investigator(self, principal_investigator: str | None) -> None: self._principal_investigator = principal_investigator @property - def proposal_ids(self) -> list[str] | None: + def proposal_id(self) -> str | None: """The ID of the proposal to which the dataset belongs.""" - return self._proposal_ids + return self._proposal_id - @proposal_ids.setter - def proposal_ids(self, proposal_ids: list[str] | None) -> None: + @proposal_id.setter + def proposal_id(self, proposal_id: str | None) -> None: """The ID of the proposal to which the dataset belongs.""" - self._proposal_ids = proposal_ids + self._proposal_id = proposal_id @property def proposal_ids(self) -> list[str] | None: @@ -1002,24 +1002,14 @@ def relationships(self, relationships: list[Relationship] | None) -> None: self._relationships = relationships @property - def run_number(self) -> str | None: - """Run number assigned by the system to the data acquisition for the current dataset.""" - return self._run_number - - @run_number.setter - def run_number(self, run_number: str | None) -> None: - """Run number assigned by the system to the data acquisition for the current dataset.""" - self._run_number = run_number - - @property - def sample_ids(self) -> list[str] | None: + def sample_id(self) -> str | None: """ID of the sample used when collecting the data.""" - return self._sample_ids + return self._sample_id - @sample_ids.setter - def sample_ids(self, sample_ids: str | None) -> None: + @sample_id.setter + def sample_id(self, sample_id: str | None) -> None: """ID of the sample used when collecting the data.""" - self._sample_ids = sample_ids + self._sample_id = sample_id @property def sample_ids(self) -> list[str] | None: @@ -1130,7 +1120,9 @@ def _prepare_fields_from_download( for field in DatasetBase._FIELD_SPEC: if field.read_only: read_only["_" + field.name] = getattr(download_model, field.scicat_name) - else: + elif hasattr( + download_model, field.scicat_name + ): # TODO remove condition in API v4 init_args[field.name] = getattr(download_model, field.scicat_name) init_args["meta"] = download_model.scientificMetadata diff --git a/src/scitacean/model.py b/src/scitacean/model.py index 152b2862..99f66384 100644 --- a/src/scitacean/model.py +++ b/src/scitacean/model.py @@ -219,7 +219,6 @@ class UploadRawDataset(BaseModel): creationLocation: str creationTime: datetime inputDatasets: list[PID] - investigator: str | None = None numberOfFilesArchived: NonNegativeInt owner: str ownerGroup: str @@ -227,6 +226,7 @@ class UploadRawDataset(BaseModel): sourceFolder: RemotePath type: DatasetType usedSoftware: list[str] + investigator: str | None = None accessGroups: list[str] | None = None classification: str | None = None comment: str | None = None diff --git a/src/scitacean/testing/backend/seed.py b/src/scitacean/testing/backend/seed.py index eed7ceed..ff8c2bf0 100644 --- a/src/scitacean/testing/backend/seed.py +++ b/src/scitacean/testing/backend/seed.py @@ -48,6 +48,7 @@ ownerEmail="PLACE@HOLD.ER", sourceFolder=RemotePath("/hex/data/123"), type=DatasetType.RAW, + investigator="Ponder Stibbons", principalInvestigator="Ponder Stibbons", creationLocation=SITE, techniques=[UploadTechnique(pid="DM666", name="dark_magic")], @@ -95,6 +96,7 @@ ownerEmail="PLACE@HOLD.ER", sourceFolder=RemotePath("/hex/secret/stuff"), type=DatasetType.RAW, + investigator="Mustrum Ridcully", principalInvestigator="Mustrum Ridcully", creationLocation=SITE, techniques=[UploadTechnique(pid="S", name="shoes")], diff --git a/src/scitacean/testing/client.py b/src/scitacean/testing/client.py index 55e8bddb..ea084650 100644 --- a/src/scitacean/testing/client.py +++ b/src/scitacean/testing/client.py @@ -324,6 +324,10 @@ def _process_dataset( ) if "techniques" in fields: fields["techniques"] = list(map(_process_technique, fields["techniques"])) + # TODO remove in API v4 + for singular in ("proposalId", "sampleId", "instrumentId"): + if singular in fields: + fields[singular + "s"] = [fields[singular]] return model.construct( model.DownloadDataset, _strict_validation=False, diff --git a/tests/client/attachment_client_test.py b/tests/client/attachment_client_test.py index 99f575e7..c627c0d1 100644 --- a/tests/client/attachment_client_test.py +++ b/tests/client/attachment_client_test.py @@ -120,7 +120,7 @@ def test_create_attachment_for_dataset_for_dataset_populates_ids( assert finalized.id is not None assert finalized.datasetId is not None assert finalized.sampleId is None - assert finalized.proposalIds is None + assert finalized.proposalId is None def test_get_attachments_for_dataset(scicat_client): diff --git a/tests/client/dataset_client_test.py b/tests/client/dataset_client_test.py index d8fb4287..d39eea22 100644 --- a/tests/client/dataset_client_test.py +++ b/tests/client/dataset_client_test.py @@ -76,6 +76,7 @@ def test_get_dataset(client): dset = INITIAL_DATASETS["raw"] dblock = INITIAL_ORIG_DATABLOCKS["raw"][0] downloaded = client.get_dataset(dset.pid, strict_validation=True) + print(downloaded.source_folder) assert downloaded.source_folder == dset.sourceFolder assert downloaded.creation_time == dset.creationTime diff --git a/tests/client/query_client_test.py b/tests/client/query_client_test.py index 1a478351..4081ea42 100644 --- a/tests/client/query_client_test.py +++ b/tests/client/query_client_test.py @@ -22,7 +22,7 @@ type=DatasetType.RAW, principalInvestigator="investigator 1", creationLocation="UU", - proposalIds=["p0124"], + proposalId="p0124", inputDatasets=[], usedSoftware=["scitacean"], ), @@ -39,7 +39,7 @@ type=DatasetType.RAW, principalInvestigator="investigator 2", creationLocation="UU", - proposalIds=["p0124"], + proposalId="p0124", inputDatasets=[], usedSoftware=[], ), @@ -56,7 +56,7 @@ type=DatasetType.RAW, principalInvestigator="investigator 1", creationLocation="UU", - proposalIds=["p0124"], + proposalId="p0124", inputDatasets=[], usedSoftware=["scitacean"], ), diff --git a/tests/dataset_test.py b/tests/dataset_test.py index 2a37f67c..3804c2a3 100644 --- a/tests/dataset_test.py +++ b/tests/dataset_test.py @@ -24,7 +24,6 @@ def raw_download_model(): creationLocation="UnseenUniversity", creationTime=parse_datetime("1995-08-06T14:14:14Z"), inputDatasets=None, - investigator=None, numberOfFilesArchived=None, owner="pstibbons", ownerGroup="faculty", @@ -93,11 +92,10 @@ def derived_download_model(): creationLocation=None, creationTime=parse_datetime("1995-08-06T14:14:14Z"), inputDatasets=[PID.parse("123.cc/948.f7.2a")], - investigator="Ponder Stibbons", numberOfFilesArchived=None, owner="pstibbons", ownerGroup="faculty", - principalInvestigator=None, + principalInvestigator="Ponder Stibbons", sourceFolder=RemotePath("/uu/hex"), type=DatasetType.DERIVED, usedSoftware=["scitacean"], @@ -173,6 +171,8 @@ def get_model_field(name): dset = Dataset.from_download_models(dataset_download_model, []) for field in dset.fields(): + if field.name in ("instrument_id", "sample_id", "proposal_id", "investigator"): + continue # TODO remove when API v4 is released if field.used_by(dataset_download_model.type): assert getattr(dset, field.name) == get_model_field(field.scicat_name) @@ -180,6 +180,8 @@ def get_model_field(name): def test_from_download_models_does_not_initialize_wrong_fields(dataset_download_model): dset = Dataset.from_download_models(dataset_download_model, []) for field in dset.fields(): + if field.name == "principal_investigator": + continue # TODO remove when API v4 is released if not field.used_by(dataset_download_model.type): assert getattr(dset, field.name) is None @@ -318,6 +320,16 @@ def test_dataset_models_roundtrip(initial): orig_datablock_models=dblock_models, attachment_models=attachment_models, ) + + # TODO remove in API v4 + rebuilt.investigator = initial.investigator + rebuilt.proposal_id = initial.proposal_id + initial._proposal_ids = rebuilt.proposal_ids + rebuilt.sample_id = initial.sample_id + initial._sample_ids = rebuilt.sample_ids + rebuilt.instrument_id = initial.instrument_id + initial._instrument_ids = rebuilt.instrument_ids + assert initial == rebuilt diff --git a/tests/html_repr/html_repr_test.py b/tests/html_repr/html_repr_test.py index 433baa61..da172ffc 100644 --- a/tests/html_repr/html_repr_test.py +++ b/tests/html_repr/html_repr_test.py @@ -13,7 +13,7 @@ def test_dataset_html_repr(): name="My dataset", contact_email="devsci.cat", owner="The People", - instrument_ids=["the-peoples-neutron-gun"], + instrument_id="the-peoples-neutron-gun", used_software=["scitacean"], source_folder=RemotePath("/remote/dir/"), meta={ diff --git a/tests/model_test.py b/tests/model_test.py index ee33206c..24386e2c 100644 --- a/tests/model_test.py +++ b/tests/model_test.py @@ -226,7 +226,6 @@ def test_default_masked_fields_are_dropped(): def test_custom_masked_fields_are_dropped(): mod = DownloadDataset( # type: ignore[call-arg] - attachments=[{"id": "abc"}], id="abc", _id="def", _v="123", diff --git a/tools/model-generation/templates/dataset_fields.py.jinja b/tools/model-generation/templates/dataset_fields.py.jinja index 2e8d5a3b..a5eb10d5 100644 --- a/tools/model-generation/templates/dataset_fields.py.jinja +++ b/tools/model-generation/templates/dataset_fields.py.jinja @@ -202,7 +202,9 @@ class DatasetBase: for field in DatasetBase._FIELD_SPEC: if field.read_only: read_only["_"+field.name] = getattr(download_model, field.scicat_name) - else: + elif hasattr( + download_model, field.scicat_name + ): # TODO remove condition in API v4 init_args[field.name] = getattr(download_model, field.scicat_name) init_args["meta"] = download_model.scientificMetadata From 8e538fbb9fb49f20476c47c5d5ccf69a08644deb Mon Sep 17 00:00:00 2001 From: Jan-Lukas Wynen Date: Tue, 17 Dec 2024 09:58:24 +0100 Subject: [PATCH 06/13] Make datasetName mandatory --- src/scitacean/_dataset_fields.py | 2 +- src/scitacean/model.py | 4 ++-- tests/client/attachment_client_test.py | 1 + tests/client/datablock_client_test.py | 1 + tests/client/dataset_client_test.py | 1 + tests/dataset_fields_test.py | 5 +++++ 6 files changed, 11 insertions(+), 3 deletions(-) diff --git a/src/scitacean/_dataset_fields.py b/src/scitacean/_dataset_fields.py index 6b994aa4..7496ca0a 100644 --- a/src/scitacean/_dataset_fields.py +++ b/src/scitacean/_dataset_fields.py @@ -341,7 +341,7 @@ def used_by(self, dataset_type: DatasetType) -> bool: name="name", description="A name for the dataset, given by the creator to carry some semantic meaning. Useful for display purposes e.g. instead of displaying the pid.", read_only=False, - required=False, + required=True, scicat_name="datasetName", type=str, used_by_derived=True, diff --git a/src/scitacean/model.py b/src/scitacean/model.py index 99f66384..1f51a120 100644 --- a/src/scitacean/model.py +++ b/src/scitacean/model.py @@ -176,6 +176,7 @@ class UploadDerivedDataset(BaseModel): sourceFolder: RemotePath type: DatasetType usedSoftware: list[str] + datasetName: str accessGroups: list[str] | None = None classification: str | None = None comment: str | None = None @@ -188,7 +189,6 @@ class UploadDerivedDataset(BaseModel): keywords: list[str] | None = None license: str | None = None scientificMetadata: dict[str, Any] | None = None - datasetName: str | None = None numberOfFiles: NonNegativeInt | None = None orcidOfOwner: str | None = None ownerEmail: str | None = None @@ -226,6 +226,7 @@ class UploadRawDataset(BaseModel): sourceFolder: RemotePath type: DatasetType usedSoftware: list[str] + datasetName: str investigator: str | None = None accessGroups: list[str] | None = None classification: str | None = None @@ -242,7 +243,6 @@ class UploadRawDataset(BaseModel): keywords: list[str] | None = None license: str | None = None scientificMetadata: dict[str, Any] | None = None - datasetName: str | None = None numberOfFiles: NonNegativeInt | None = None orcidOfOwner: str | None = None ownerEmail: str | None = None diff --git a/tests/client/attachment_client_test.py b/tests/client/attachment_client_test.py index c627c0d1..89d90615 100644 --- a/tests/client/attachment_client_test.py +++ b/tests/client/attachment_client_test.py @@ -29,6 +29,7 @@ def scicat_client(client: Client) -> ScicatClient: @pytest.fixture def derived_dataset(scicat_access): return UploadDerivedDataset( + datasetName="Koelsche Lieder", contactEmail="black.foess@dom.koelle", creationTime=parse_date("1995-11-11T11:11:11.000Z"), owner="bfoess", diff --git a/tests/client/datablock_client_test.py b/tests/client/datablock_client_test.py index 27f5f07f..bcfd753b 100644 --- a/tests/client/datablock_client_test.py +++ b/tests/client/datablock_client_test.py @@ -26,6 +26,7 @@ def scicat_client(client: Client) -> ScicatClient: @pytest.fixture def derived_dataset(scicat_access): return UploadDerivedDataset( + datasetName="Koelsche Lieder", contactEmail="black.foess@dom.koelle", creationTime=parse_date("1995-11-11T11:11:11.000Z"), owner="bfoess", diff --git a/tests/client/dataset_client_test.py b/tests/client/dataset_client_test.py index d39eea22..77d8195f 100644 --- a/tests/client/dataset_client_test.py +++ b/tests/client/dataset_client_test.py @@ -26,6 +26,7 @@ def scicat_client(client: Client) -> ScicatClient: @pytest.fixture def derived_dataset(scicat_access): return UploadDerivedDataset( + datasetName="Koelsche Lieder", contactEmail="black.foess@dom.koelle", creationTime=parse_date("1995-11-11T11:11:11.000Z"), owner="bfoess", diff --git a/tests/dataset_fields_test.py b/tests/dataset_fields_test.py index 9a9b245b..126fe6ff 100644 --- a/tests/dataset_fields_test.py +++ b/tests/dataset_fields_test.py @@ -335,6 +335,7 @@ def test_fields_read_only__and_type_filter(): def test_make_raw_model(): dset = Dataset( + name="raw-dataset-62", type="raw", contact_email="p.stibbons@uu.am", creation_time="2142-04-02T16:44:56", @@ -349,6 +350,7 @@ def test_make_raw_model(): expected = UploadRawDataset( contactEmail="p.stibbons@uu.am", creationTime=dateutil.parser.parse("2142-04-02T16:44:56"), + datasetName="raw-dataset-62", owner="Ponder Stibbons;Mustrum Ridcully", ownerGroup="faculty", principalInvestigator="my principal investigator", @@ -370,6 +372,7 @@ def test_make_raw_model(): def test_make_derived_model(): dset = Dataset( type="derived", + name="derived-dataset", contact_email="p.stibbons@uu.am;m.ridcully@uu.am", creation_time="2142-04-02T16:44:56", owner="Ponder Stibbons;Mustrum Ridcully", @@ -381,6 +384,7 @@ def test_make_derived_model(): used_software=["scitacean", "magick"], ) expected = UploadDerivedDataset( + datasetName="derived-dataset", contactEmail="p.stibbons@uu.am;m.ridcully@uu.am", creationTime=dateutil.parser.parse("2142-04-02T16:44:56"), owner="Ponder Stibbons;Mustrum Ridcully", @@ -484,6 +488,7 @@ def test_email_validation(field): def test_orcid_validation_valid(good_orcid): dset = Dataset( type="raw", + name="test ORCID", contact_email="jan-lukas.wynen@ess.eu", creation_location="scitacean/tests", creation_time="2142-04-02T16:44:56", From d6cdf36e984da0ef9d8a7b24e96f588d817c9a86 Mon Sep 17 00:00:00 2001 From: Jan-Lukas Wynen Date: Tue, 17 Dec 2024 10:16:47 +0100 Subject: [PATCH 07/13] Be lenient about plural fields --- src/scitacean/model.py | 24 +++++++++++++++++++++++- 1 file changed, 23 insertions(+), 1 deletion(-) diff --git a/src/scitacean/model.py b/src/scitacean/model.py index 1f51a120..9d3465d1 100644 --- a/src/scitacean/model.py +++ b/src/scitacean/model.py @@ -101,7 +101,10 @@ from .thumbnail import Thumbnail -class DownloadDataset(BaseModel, masked=("history",)): +# TODO remove extra masks after API v4 +class DownloadDataset( + BaseModel, masked=("history", "proposalId", "sampleId", "instrumentId") +): contactEmail: str | None = None creationLocation: str | None = None creationTime: datetime | None = None @@ -164,6 +167,25 @@ def _validate_emails(cls, value: Any) -> Any: def _validate_orcids(cls, value: Any) -> Any: return validate_orcids(value) + # TODO remove after API v4 + @pydantic.field_validator("sampleIds", mode="before") + def _validate_sample_ids(cls, value: Any) -> Any: + if value == [None]: + return [] + return value + + @pydantic.field_validator("proposalIds", mode="before") + def _validate_proposal_ids(cls, value: Any) -> Any: + if value == [None]: + return [] + return value + + @pydantic.field_validator("instrumentIds", mode="before") + def _validate_instrument_ids(cls, value: Any) -> Any: + if value == [None]: + return [] + return value + class UploadDerivedDataset(BaseModel): contactEmail: str From 81dc0c1b0df2642ccf644c0c187c2ca35680acd4 Mon Sep 17 00:00:00 2001 From: Jan-Lukas Wynen Date: Tue, 17 Dec 2024 10:25:46 +0100 Subject: [PATCH 08/13] Fix more tests --- tests/model_test.py | 22 ++++++++++++++-------- tests/transfer/sftp_test.py | 1 + 2 files changed, 15 insertions(+), 8 deletions(-) diff --git a/tests/model_test.py b/tests/model_test.py index 24386e2c..841f7214 100644 --- a/tests/model_test.py +++ b/tests/model_test.py @@ -101,6 +101,7 @@ def test_derived_dataset_default_values( accessGroups=["access1"], contactEmail="contact@email.com", creationTime=parse_date("2000-01-01T01:01:01.000Z"), + datasetName="Test derived dataset", inputDatasets=[PID(prefix="PID.prefix.a0b1", pid="abcd")], investigator="inv@esti.gator", numberOfFilesArchived=0, @@ -117,8 +118,9 @@ def test_derived_dataset_default_values( assert finalized.accessGroups == ["access1"] assert finalized.contactEmail == "contact@email.com" assert finalized.creationTime == parse_date("2000-01-01T01:01:01.000Z") + assert finalized.datasetName == "Test derived dataset" assert finalized.inputDatasets == [PID(prefix="PID.prefix.a0b1", pid="abcd")] - assert finalized.investigator == "inv@esti.gator" + assert finalized.principalInvestigator == "inv@esti.gator" assert finalized.owner == scicat_access.user.username assert finalized.ownerGroup == scicat_access.user.group assert finalized.sourceFolder == "/source/folder" @@ -128,7 +130,6 @@ def test_derived_dataset_default_values( assert finalized.createdAt # some non-empty str assert finalized.createdBy # some non-empty str assert finalized.classification # some non-empty str - assert finalized.datasetName # some non-empty str assert finalized.isPublished is False assert finalized.keywords == [] assert finalized.numberOfFiles == 0 @@ -140,6 +141,7 @@ def test_derived_dataset_default_values( assert finalized.size == 0 assert finalized.techniques == [] assert finalized.updatedAt # some non-empty str + assert finalized.version == "v3" # Left empty assert finalized.description is None is None @@ -150,7 +152,6 @@ def test_derived_dataset_default_values( assert finalized.ownerEmail is None assert finalized.sourceFolderHost is None assert finalized.validationStatus is None - assert finalized.version is None def test_raw_dataset_default_values(real_client, require_scicat_backend, scicat_access): @@ -159,56 +160,61 @@ def test_raw_dataset_default_values(real_client, require_scicat_backend, scicat_ contactEmail="contact@email.com", creationTime=parse_date("2000-01-01T01:01:01.000Z"), creationLocation="site", + datasetName="Test raw dataset", + inputDatasets=[], numberOfFilesArchived=0, owner=scicat_access.user.username, ownerGroup=scicat_access.user.group, principalInvestigator="inv@esti.gator", sourceFolder=RemotePath("/source/folder"), type=DatasetType.RAW, + usedSoftware=["software1"], ) pid = real_client.scicat.create_dataset_model(dset).pid finalized = real_client.scicat.get_dataset_model(pid) # Inputs + assert finalized.datasetName == "Test raw dataset" assert finalized.accessGroups == ["access1"] assert finalized.contactEmail == "contact@email.com" assert finalized.creationLocation == "site" assert finalized.creationTime == parse_date("2000-01-01T01:01:01.000Z") + assert finalized.inputDatasets == [] assert finalized.owner == scicat_access.user.username assert finalized.ownerGroup == scicat_access.user.group assert finalized.principalInvestigator == "inv@esti.gator" assert finalized.sourceFolder == "/source/folder" + assert finalized.usedSoftware == ["software1"] # Default values assert finalized.createdAt # some non-empty str assert finalized.createdBy # some non-empty str assert finalized.classification # some non-empty str - assert finalized.datasetName # some non-empty str + assert finalized.instrumentIds == [] assert finalized.isPublished is False assert finalized.keywords == [] assert finalized.numberOfFiles == 0 assert finalized.numberOfFilesArchived == 0 assert finalized.packedSize == 0 assert finalized.pid # some non-empty str + assert finalized.proposalIds == [] + assert finalized.sampleIds == [] assert finalized.scientificMetadata == {} assert finalized.sharedWith == [] assert finalized.size == 0 assert finalized.techniques == [] assert finalized.updatedAt # some non-empty str + assert finalized.version == "v3" # Left empty assert finalized.dataFormat is None assert finalized.description is None assert finalized.endTime is None - assert finalized.instrumentIds is None assert finalized.license is None assert finalized.orcidOfOwner is None assert finalized.ownerEmail is None - assert finalized.proposalIds is None - assert finalized.sampleIds is None assert finalized.sourceFolderHost is None assert finalized.validationStatus is None - assert finalized.version is None def test_default_masked_fields_are_dropped(): diff --git a/tests/transfer/sftp_test.py b/tests/transfer/sftp_test.py index 8c461798..d6676630 100644 --- a/tests/transfer/sftp_test.py +++ b/tests/transfer/sftp_test.py @@ -402,6 +402,7 @@ def test_client_with_sftp( contact_email="p.stibbons@uu.am", creation_location="UU", creation_time=datetime(2023, 6, 23, 10, 0, 0, tzinfo=timezone.utc), + name="Secret Thaum Storage", owner="PonderStibbons", owner_group="uu", principal_investigator="MustrumRidcully", From 470afce5b3286d5093eb6912b4b4ffad9d4f7660 Mon Sep 17 00:00:00 2001 From: Jan-Lukas Wynen Date: Tue, 17 Dec 2024 11:50:45 +0100 Subject: [PATCH 09/13] Remove print --- tests/client/dataset_client_test.py | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/client/dataset_client_test.py b/tests/client/dataset_client_test.py index 77d8195f..e882865c 100644 --- a/tests/client/dataset_client_test.py +++ b/tests/client/dataset_client_test.py @@ -77,7 +77,6 @@ def test_get_dataset(client): dset = INITIAL_DATASETS["raw"] dblock = INITIAL_ORIG_DATABLOCKS["raw"][0] downloaded = client.get_dataset(dset.pid, strict_validation=True) - print(downloaded.source_folder) assert downloaded.source_folder == dset.sourceFolder assert downloaded.creation_time == dset.creationTime From e381c530fb0b1c0957e180ccfa80b1d55bb6a8d5 Mon Sep 17 00:00:00 2001 From: Jan-Lukas Wynen Date: Tue, 17 Dec 2024 14:55:30 +0100 Subject: [PATCH 10/13] Fix query --- tests/client/query_client_test.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/tests/client/query_client_test.py b/tests/client/query_client_test.py index 4081ea42..2ab5db4f 100644 --- a/tests/client/query_client_test.py +++ b/tests/client/query_client_test.py @@ -151,17 +151,19 @@ def test_query_dataset_multiple_by_multiple_fields(real_client): @pytest.mark.usefixtures("_seed_database") def test_query_dataset_multiple_by_derived_field(real_client): datasets = real_client.scicat.query_datasets( - {"investigator": "investigator 1"}, + {"principalInvestigator": "investigator 1"} ) actual = {ds.pid: ds for ds in datasets} - expected = {SEED[key].pid: SEED[key] for key in ("derived1", "derived2")} + expected = { + SEED[key].pid: SEED[key] for key in ("derived1", "derived2", "raw1", "raw3") + } assert actual == expected @pytest.mark.usefixtures("_seed_database") def test_query_dataset_uses_conjunction_of_fields(real_client): datasets = real_client.scicat.query_datasets( - {"proposalIds": ["p0124"], "investigator": "investigator X"}, + {"proposalIds": ["p0124"], "principalInvestigator": "investigator X"}, ) assert not datasets From 5419c9367ca8fe8786e7720e1c2c020a2bc15773 Mon Sep 17 00:00:00 2001 From: Jan-Lukas Wynen Date: Tue, 17 Dec 2024 14:59:26 +0100 Subject: [PATCH 11/13] Appease mypy --- src/scitacean/model.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/scitacean/model.py b/src/scitacean/model.py index 9d3465d1..43f9715b 100644 --- a/src/scitacean/model.py +++ b/src/scitacean/model.py @@ -281,7 +281,7 @@ class UploadRawDataset(BaseModel): @pydantic.model_validator(mode="before") @classmethod - def _set_investigator(cls, data): + def _set_investigator(cls, data: Any) -> Any: # The model currently has both `investigator` and `principalInvestigator` # and both are mandatory. Eventually, `investigator` will be removed. # So make sure we can construct the model if only one is given. From aab22d3b53bd0c29c069c420ef94a1a826b4a1fd Mon Sep 17 00:00:00 2001 From: Jan-Lukas Wynen Date: Tue, 17 Dec 2024 15:49:14 +0100 Subject: [PATCH 12/13] Remove investigator in fake upload --- src/scitacean/testing/client.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/scitacean/testing/client.py b/src/scitacean/testing/client.py index ea084650..97ca5ce6 100644 --- a/src/scitacean/testing/client.py +++ b/src/scitacean/testing/client.py @@ -324,10 +324,13 @@ def _process_dataset( ) if "techniques" in fields: fields["techniques"] = list(map(_process_technique, fields["techniques"])) + # TODO remove in API v4 for singular in ("proposalId", "sampleId", "instrumentId"): if singular in fields: fields[singular + "s"] = [fields[singular]] + fields.pop("investigator") + return model.construct( model.DownloadDataset, _strict_validation=False, From 00aa09d7990333d55c9024d3a0f39c9be06ccc21 Mon Sep 17 00:00:00 2001 From: Jan-Lukas Wynen Date: Tue, 17 Dec 2024 15:50:31 +0100 Subject: [PATCH 13/13] Add missing dataset name in docs --- docs/user-guide/testing.ipynb | 1 + 1 file changed, 1 insertion(+) diff --git a/docs/user-guide/testing.ipynb b/docs/user-guide/testing.ipynb index 278d019b..79e1e273 100644 --- a/docs/user-guide/testing.ipynb +++ b/docs/user-guide/testing.ipynb @@ -36,6 +36,7 @@ "\n", "dataset = Dataset(\n", " type=\"raw\",\n", + " name=\"Important data\",\n", " owner_group=\"faculty\",\n", " owner=\"ridcully\",\n", " principal_investigator=\"Ridcully\",\n",