Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[pre-commit.ci] pre-commit autoupdate #1329

Merged
merged 3 commits into from
Oct 14, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 4 additions & 4 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -7,28 +7,28 @@ files: |
)/.*\.py$
repos:
- repo: https://github.com/astral-sh/ruff-pre-commit
rev: v0.1.14
rev: v0.6.9
hooks:
- id: ruff
args: [--fix, --exit-non-zero-on-fix, --no-cache]
- id: ruff-format
- repo: https://github.com/pre-commit/mirrors-mypy
rev: v1.8.0
rev: v1.11.2
hooks:
- id: mypy
additional_dependencies:
- types-requests
- types-python-dateutil
- repo: https://github.com/python-jsonschema/check-jsonschema
rev: 0.27.3
rev: 0.29.4
hooks:
- id: check-github-workflows
files: '^github/workflows/.*\.ya?ml$'
types: ["yaml"]
- id: check-dependabot
files: '^\.github/dependabot\.ya?ml$'
- repo: https://github.com/pre-commit/pre-commit-hooks
rev: v4.5.0
rev: v5.0.0
hooks:
- id: check-added-large-files
files: ".*"
Expand Down
19 changes: 8 additions & 11 deletions openml/_api_calls.py
Original file line number Diff line number Diff line change
Expand Up @@ -351,7 +351,7 @@ def __is_checksum_equal(downloaded_file_binary: bytes, md5_checksum: str | None
return md5_checksum == md5_checksum_download


def _send_request( # noqa: C901
def _send_request( # noqa: C901, PLR0912
request_method: str,
url: str,
data: DATA_TYPE,
Expand Down Expand Up @@ -387,18 +387,15 @@ def _send_request( # noqa: C901
# -- Check if encoding is not UTF-8 perhaps
if __is_checksum_equal(response.content, md5_checksum):
raise OpenMLHashException(
"Checksum of downloaded file is unequal to the expected checksum {}"
"because the text encoding is not UTF-8 when downloading {}. "
"There might be a sever-sided issue with the file, "
"see: https://github.com/openml/openml-python/issues/1180.".format(
md5_checksum,
url,
),
f"Checksum of downloaded file is unequal to the expected checksum"
f"{md5_checksum} because the text encoding is not UTF-8 when "
f"downloading {url}. There might be a sever-sided issue with the file, "
"see: https://github.com/openml/openml-python/issues/1180.",
)

raise OpenMLHashException(
"Checksum of downloaded file is unequal to the expected checksum {} "
"when downloading {}.".format(md5_checksum, url),
f"Checksum of downloaded file is unequal to the expected checksum "
f"{md5_checksum} when downloading {url}.",
)

return response
Expand Down Expand Up @@ -464,7 +461,7 @@ def __parse_server_exception(
server_exception = xmltodict.parse(response.text)
except xml.parsers.expat.ExpatError as e:
raise e
except Exception as e: # noqa: BLE001
except Exception as e:
# OpenML has a sophisticated error system
# where information about failures is provided. try to parse this
raise OpenMLServerError(
Expand Down
3 changes: 2 additions & 1 deletion openml/cli.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
""""Command Line Interface for `openml` to configure its settings."""
"""Command Line Interface for `openml` to configure its settings."""

from __future__ import annotations

import argparse
Expand Down
8 changes: 4 additions & 4 deletions openml/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -278,17 +278,17 @@ def _setup(config: _Config | None = None) -> None:
_root_cache_directory.mkdir(exist_ok=True, parents=True)
except PermissionError:
openml_logger.warning(
"No permission to create openml cache directory at %s! This can result in "
"OpenML-Python not working properly." % _root_cache_directory,
f"No permission to create openml cache directory at {_root_cache_directory}!"
" This can result in OpenML-Python not working properly.",
)

if cache_exists:
_create_log_handlers()
else:
_create_log_handlers(create_file_handler=False)
openml_logger.warning(
"No permission to create OpenML directory at %s! This can result in OpenML-Python "
"not working properly." % config_dir,
f"No permission to create OpenML directory at {config_dir}! This can result in "
" OpenML-Python not working properly.",
)


Expand Down
8 changes: 4 additions & 4 deletions openml/datasets/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -156,14 +156,14 @@ def find_invalid_characters(string: str, pattern: str) -> str:
)

if dataset_id is None:
pattern = "^[\x00-\x7F]*$"
pattern = "^[\x00-\x7f]*$"
if description and not re.match(pattern, description):
# not basiclatin (XSD complains)
invalid_characters = find_invalid_characters(description, pattern)
raise ValueError(
f"Invalid symbols {invalid_characters} in description: {description}",
)
pattern = "^[\x00-\x7F]*$"
pattern = "^[\x00-\x7f]*$"
if citation and not re.match(pattern, citation):
# not basiclatin (XSD complains)
invalid_characters = find_invalid_characters(citation, pattern)
Expand Down Expand Up @@ -574,7 +574,7 @@ def _parse_data_from_file(self, data_file: Path) -> tuple[list[str], list[bool],
def _parse_data_from_pq(self, data_file: Path) -> tuple[list[str], list[bool], pd.DataFrame]:
try:
data = pd.read_parquet(data_file)
except Exception as e: # noqa: BLE001
except Exception as e:
raise Exception(f"File: {data_file}") from e
categorical = [data[c].dtype.name == "category" for c in data.columns]
attribute_names = list(data.columns)
Expand Down Expand Up @@ -816,7 +816,7 @@ def get_data( # noqa: C901, PLR0912, PLR0915
to_exclude.extend(self.ignore_attribute)

if len(to_exclude) > 0:
logger.info("Going to remove the following attributes: %s" % to_exclude)
logger.info(f"Going to remove the following attributes: {to_exclude}")
keep = np.array([column not in to_exclude for column in attribute_names])
data = data.loc[:, keep] if isinstance(data, pd.DataFrame) else data[:, keep]

Expand Down
31 changes: 11 additions & 20 deletions openml/datasets/functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
import warnings
from collections import OrderedDict
from pathlib import Path
from pyexpat import ExpatError
from typing import TYPE_CHECKING, Any, overload
from typing_extensions import Literal

Expand All @@ -15,7 +16,6 @@
import pandas as pd
import urllib3
import xmltodict
from pyexpat import ExpatError
from scipy.sparse import coo_matrix

import openml._api_calls
Expand Down Expand Up @@ -85,8 +85,7 @@ def list_datasets(
*,
output_format: Literal["dataframe"],
**kwargs: Any,
) -> pd.DataFrame:
...
) -> pd.DataFrame: ...


@overload
Expand All @@ -98,8 +97,7 @@ def list_datasets(
tag: str | None,
output_format: Literal["dataframe"],
**kwargs: Any,
) -> pd.DataFrame:
...
) -> pd.DataFrame: ...


@overload
Expand All @@ -111,8 +109,7 @@ def list_datasets(
tag: str | None = ...,
output_format: Literal["dict"] = "dict",
**kwargs: Any,
) -> pd.DataFrame:
...
) -> pd.DataFrame: ...


def list_datasets(
Expand Down Expand Up @@ -207,17 +204,15 @@ def _list_datasets(
data_id: list | None = ...,
output_format: Literal["dict"] = "dict",
**kwargs: Any,
) -> dict:
...
) -> dict: ...


@overload
def _list_datasets(
data_id: list | None = ...,
output_format: Literal["dataframe"] = "dataframe",
**kwargs: Any,
) -> pd.DataFrame:
...
) -> pd.DataFrame: ...


def _list_datasets(
Expand Down Expand Up @@ -256,18 +251,16 @@ def _list_datasets(
for operator, value in kwargs.items():
api_call += f"/{operator}/{value}"
if data_id is not None:
api_call += "/data_id/%s" % ",".join([str(int(i)) for i in data_id])
api_call += "/data_id/{}".format(",".join([str(int(i)) for i in data_id]))
return __list_datasets(api_call=api_call, output_format=output_format)


@overload
def __list_datasets(api_call: str, output_format: Literal["dict"] = "dict") -> dict:
...
def __list_datasets(api_call: str, output_format: Literal["dict"] = "dict") -> dict: ...


@overload
def __list_datasets(api_call: str, output_format: Literal["dataframe"]) -> pd.DataFrame:
...
def __list_datasets(api_call: str, output_format: Literal["dataframe"]) -> pd.DataFrame: ...


def __list_datasets(
Expand Down Expand Up @@ -785,10 +778,8 @@ def create_dataset( # noqa: C901, PLR0912, PLR0915
if not is_row_id_an_attribute:
raise ValueError(
"'row_id_attribute' should be one of the data attribute. "
" Got '{}' while candidates are {}.".format(
row_id_attribute,
[attr[0] for attr in attributes_],
),
f" Got '{row_id_attribute}' while candidates are"
f" {[attr[0] for attr in attributes_]}.",
)

if isinstance(data, pd.DataFrame):
Expand Down
22 changes: 10 additions & 12 deletions openml/evaluations/functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,8 +32,7 @@ def list_evaluations(
per_fold: bool | None = ...,
sort_order: str | None = ...,
output_format: Literal["dict", "object"] = "dict",
) -> dict:
...
) -> dict: ...


@overload
Expand All @@ -51,8 +50,7 @@ def list_evaluations(
per_fold: bool | None = ...,
sort_order: str | None = ...,
output_format: Literal["dataframe"] = ...,
) -> pd.DataFrame:
...
) -> pd.DataFrame: ...


def list_evaluations(
Expand Down Expand Up @@ -204,24 +202,24 @@ def _list_evaluations(
-------
dict of objects, or dataframe
"""
api_call = "evaluation/list/function/%s" % function
api_call = f"evaluation/list/function/{function}"
if kwargs is not None:
for operator, value in kwargs.items():
api_call += f"/{operator}/{value}"
if tasks is not None:
api_call += "/task/%s" % ",".join([str(int(i)) for i in tasks])
api_call += "/task/{}".format(",".join([str(int(i)) for i in tasks]))
if setups is not None:
api_call += "/setup/%s" % ",".join([str(int(i)) for i in setups])
api_call += "/setup/{}".format(",".join([str(int(i)) for i in setups]))
if flows is not None:
api_call += "/flow/%s" % ",".join([str(int(i)) for i in flows])
api_call += "/flow/{}".format(",".join([str(int(i)) for i in flows]))
if runs is not None:
api_call += "/run/%s" % ",".join([str(int(i)) for i in runs])
api_call += "/run/{}".format(",".join([str(int(i)) for i in runs]))
if uploaders is not None:
api_call += "/uploader/%s" % ",".join([str(int(i)) for i in uploaders])
api_call += "/uploader/{}".format(",".join([str(int(i)) for i in uploaders]))
if study is not None:
api_call += "/study/%d" % study
if sort_order is not None:
api_call += "/sort_order/%s" % sort_order
api_call += f"/sort_order/{sort_order}"

return __list_evaluations(api_call, output_format=output_format)

Expand All @@ -236,7 +234,7 @@ def __list_evaluations(
# Minimalistic check if the XML is useful
if "oml:evaluations" not in evals_dict:
raise ValueError(
"Error in return XML, does not contain " '"oml:evaluations": %s' % str(evals_dict),
"Error in return XML, does not contain " f'"oml:evaluations": {evals_dict!s}',
)

assert isinstance(evals_dict["oml:evaluations"]["oml:evaluation"], list), type(
Expand Down
Loading
Loading