Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add functionality to list StatsBomb open competitions and available matches #363

Open
wants to merge 5 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 12 additions & 1 deletion kloppy/_providers/statsbomb.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,13 @@
import warnings
from typing import Union
from typing import Union, Literal

from kloppy.config import get_config
from kloppy.domain.models.statsbomb.event import StatsBombEventFactory
from kloppy.infra.serializers.event.statsbomb import (
StatsBombDeserializer,
StatsBombInputs,
)
from kloppy.infra.serializers.event.statsbomb.helpers import parse_open_data
from kloppy.domain import EventDataset, Optional, List, EventFactory
from kloppy.io import open_as_file, FileLike, Source

Expand Down Expand Up @@ -77,3 +78,13 @@ def load_open_data(
coordinates=coordinates,
event_factory=event_factory,
)


def list_open_data(
fmt: Literal["dataframe", "dict"] = "dataframe",
competition_id: int = None,
season_id: int = None,
):
return parse_open_data(
fmt=fmt, competition_id=competition_id, season_id=season_id
)
74 changes: 74 additions & 0 deletions kloppy/infra/serializers/event/statsbomb/helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,18 @@
)
from kloppy.exceptions import DeserializationError

OPEN_COMPETITIONS_PATH = "https://raw.githubusercontent.com/statsbomb/open-data/master/data/competitions.json"
OPEN_MATCHES_PATH = "https://raw.githubusercontent.com/statsbomb/open-data/master/data/matches/{competition_id}/{season_id}.json"

import requests as re


def get_response(path):
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please move this to a more generic place like utils.py

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks for pointing this out. These lines weren't necessary anymore so I've removed them.

response = re.get(path)
response.raise_for_status()
data = response.json()
return data


def parse_str_ts(timestamp: str) -> float:
"""Parse a HH:mm:ss string timestamp into number of seconds."""
Expand Down Expand Up @@ -141,3 +153,65 @@ def get_player_from_freeze_frame(player_data, team, i):
ball_owning_team=event.ball_owning_team,
other_data={"visible_area": visible_area},
)


def parse_open_data(
competition_id: int = None, season_id: int = None, fmt="dataframe"
):
try:
from statsbombpy import sb
from statsbombpy.api_client import NoAuthWarning
except ImportError:
print("Please install the statsbombpy library to use this function.")
return

all_matches = []
try:
if competition_id is not None and season_id is not None:
matches = sb.matches(
competition_id=competition_id, season_id=season_id, fmt=fmt
)
all_matches.append(matches)
elif competition_id is None and season_id is None:
import warnings

competitions = sb.competitions(fmt="dict")
for competition in competitions.values():
with warnings.catch_warnings():
warnings.filterwarnings("ignore", category=NoAuthWarning)
competition_id = competition["competition_id"]
season_id = competition["season_id"]
matches = sb.matches(
competition_id=competition_id,
season_id=season_id,
fmt=fmt,
)
if fmt == "dataframe":
if not "competition_id" in matches.columns:
matches["competition_id"] = competition_id
if not "season_id" in matches.columns:
matches["season_id"] = season_id

all_matches.append(matches)
else:
raise ValueError(
"Invalid input: Both competition_id and season_id must either be provided together or omitted together."
)

if fmt == "dataframe":
try:
import pandas as pd
except ImportError:
print(
"Please install the pandas library to use this function."
)
return
combined_matches = pd.concat(all_matches, ignore_index=True)
return combined_matches
elif fmt == "dict":
return all_matches
else:
raise ValueError("Invalid format. Use 'dataframe' or 'dict'.")

except Exception as e:
raise RuntimeError(f"An error occurred while fetching data: {e}")
2 changes: 1 addition & 1 deletion kloppy/statsbomb.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
from ._providers.statsbomb import load, load_open_data
from ._providers.statsbomb import load, load_open_data, list_open_data
Loading