From 80e16fb861c9a1ad6ad64c8acfcac5c3f5f3f222 Mon Sep 17 00:00:00 2001 From: driesdeprest Date: Thu, 21 Nov 2024 18:49:00 +0100 Subject: [PATCH 1/2] wyscout v3 - add position information for players --- .../event/wyscout/deserializer_v3.py | 79 ++++++++++++++----- kloppy/tests/test_wyscout.py | 7 ++ 2 files changed, 68 insertions(+), 18 deletions(-) diff --git a/kloppy/infra/serializers/event/wyscout/deserializer_v3.py b/kloppy/infra/serializers/event/wyscout/deserializer_v3.py index 08088dd1..64e93a1e 100644 --- a/kloppy/infra/serializers/event/wyscout/deserializer_v3.py +++ b/kloppy/infra/serializers/event/wyscout/deserializer_v3.py @@ -45,6 +45,7 @@ Team, FormationType, CarryResult, + PositionType, ) from kloppy.exceptions import DeserializationError from kloppy.utils import performance_logging @@ -81,36 +82,78 @@ "3-2-3-2": FormationType.THREE_TWO_THREE_TWO, } +position_types_mapping: Dict[str, PositionType] = { + "GK": PositionType.Goalkeeper, + "LB": PositionType.LeftBack, + "LWB": PositionType.LeftWing, + "LB5": PositionType.LeftBack, + "LCB": PositionType.LeftCenterBack, + "LCB3": PositionType.LeftCenterBack, + "CB": PositionType.CenterBack, + "RCB": PositionType.RightCenterBack, + "RCB3": PositionType.RightCenterBack, + "RB": PositionType.RightBack, + "RWB": PositionType.RightWing, + "RB5": PositionType.RightBack, + "LW": PositionType.LeftWing, + "LAMF": PositionType.LeftAttackingMidfield, + "LCMF3": PositionType.LeftCentralMidfield, + "LCMF": PositionType.LeftCentralMidfield, + "DMF": PositionType.DefensiveMidfield, + "LDMF": PositionType.LeftDefensiveMidfield, + "RDMF": PositionType.RightDefensiveMidfield, + "RCMF3": PositionType.RightCentralMidfield, + "RCMF": PositionType.RightCentralMidfield, + "RAMF": PositionType.RightAttackingMidfield, + "RW": PositionType.RightWing, + "AMF": PositionType.AttackingMidfield, + "LWF": PositionType.LeftForward, + "CF": PositionType.Striker, + "SS": PositionType.Striker, + "RWF": PositionType.RightForward, +} + def _flip_point(point: Point) -> Point: return Point(x=100 - point.x, y=100 - point.y) def _parse_team(raw_events, wyId: str, ground: Ground) -> Team: + # Get the first formation description + first_period_formation_info = raw_events["formations"][wyId]["1H"] + first_formation_descr = next(iter(first_period_formation_info.values())) + formation_str, formation_info = next(iter(first_formation_descr.items())) + + # Extract the formation and players' positions + starting_formation = formations[formation_str] + starting_players_positions = { + player_id: position_types_mapping[player_info["position"].upper()] + for player_descr in formation_info["players"] + for player_id, player_info in player_descr.items() + } + team = Team( team_id=wyId, name=raw_events["teams"][wyId]["team"]["officialName"], ground=ground, - starting_formation=formations[ - next( - iter( - raw_events["formations"][wyId]["1H"][ - next(iter(raw_events["formations"][wyId]["1H"])) - ] - ) - ) - ], + starting_formation=starting_formation, ) - team.players = [ - Player( - player_id=str(player["player"]["wyId"]), - team=team, - jersey_no=None, - first_name=player["player"]["firstName"], - last_name=player["player"]["lastName"], + + for player in raw_events["players"][wyId]: + player_id = str(player["player"]["wyId"]) + starting_position = starting_players_positions.get(player_id) + team.players.append( + Player( + player_id=player_id, + team=team, + jersey_no=None, + first_name=player["player"]["firstName"], + last_name=player["player"]["lastName"], + starting=starting_position is not None, + starting_position=starting_position, + ) ) - for player in raw_events["players"][wyId] - ] + return team diff --git a/kloppy/tests/test_wyscout.py b/kloppy/tests/test_wyscout.py index 725ae92f..5d28f64e 100644 --- a/kloppy/tests/test_wyscout.py +++ b/kloppy/tests/test_wyscout.py @@ -23,6 +23,7 @@ Time, PassType, PassQualifier, + PositionType, ) from kloppy import wyscout @@ -203,6 +204,12 @@ def test_metadata(self, dataset: EventDataset): == FormationType.FOUR_THREE_ONE_TWO ) + cr7 = dataset.metadata.teams[0].get_player_by_id("3322") + + assert cr7.full_name == "Cristiano Ronaldo dos Santos Aveiro" + assert cr7.starting is True + assert cr7.positions.last() == PositionType.Striker + def test_enriched_metadata(self, dataset: EventDataset): date = dataset.metadata.date if date: From 11b1ed590327fcbd7077f2eb9d33124c85c22b4a Mon Sep 17 00:00:00 2001 From: driesdeprest Date: Tue, 26 Nov 2024 11:06:57 +0100 Subject: [PATCH 2/2] wyscout v3 - add substitutions to the event stream --- .../event/wyscout/deserializer_v3.py | 75 ++++++++++++++++++- kloppy/tests/test_wyscout.py | 26 +++++++ 2 files changed, 100 insertions(+), 1 deletion(-) diff --git a/kloppy/infra/serializers/event/wyscout/deserializer_v3.py b/kloppy/infra/serializers/event/wyscout/deserializer_v3.py index 64e93a1e..f980670e 100644 --- a/kloppy/infra/serializers/event/wyscout/deserializer_v3.py +++ b/kloppy/infra/serializers/event/wyscout/deserializer_v3.py @@ -592,6 +592,75 @@ def _parse_period_id(raw_period: str) -> int: return period_id +def insert_substitution_events( + deserializer, events, raw_events, players, teams, periods, transformer +): + # Step 1: Create substitution events + substitution_events = [] + for team_id, periods_subs in raw_events["substitutions"].items(): + for raw_period, sub_info in periods_subs.items(): + for raw_seconds, players_info in sub_info.items(): + subs_out = players_info["out"] + subs_in = players_info["in"] + for sub_out, sub_in in zip(subs_out, subs_in): + sub_out_player = players[team_id][str(sub_out["playerId"])] + sub_in_player = players[team_id][str(sub_in["playerId"])] + + # Build the substitution event + sub_event = deserializer.event_factory.build_substitution( + event_id=f"substitution-{sub_out['playerId']}-{sub_in['playerId']}", + ball_owning_team=None, + ball_state=None, + coordinates=Point(x=0, y=0), + player=sub_out_player, + replacement_player=sub_in_player, + team=teams[team_id], + period=periods[int(raw_period[0]) - 1], + timestamp=timedelta(seconds=int(raw_seconds)), + result=None, + raw_event=None, + qualifiers=None, + ) + + if sub_event and deserializer.should_include_event( + sub_event + ): + substitution_events.append( + transformer.transform_event(sub_event) + ) + + # Step 2: Sort substitution events globally by period and timestamp + substitution_events.sort(key=lambda e: (e.period.id, e.timestamp)) + + # Step 3: Merge events and substitutions in ascending order + merged_events = [] + sub_index = 0 + total_subs = len(substitution_events) + + for event in events: + # Insert all substitution events that occur before or at the current event's timestamp + while sub_index < total_subs: + sub_event = substitution_events[sub_index] + if sub_event.period.id < event.period.id or ( + sub_event.period.id == event.period.id + and sub_event.timestamp <= event.timestamp + ): + merged_events.append(sub_event) + sub_index += 1 + else: + break + + # Add the current event to the merged list + merged_events.append(event) + + # Step 4: Add any remaining substitution events + while sub_index < total_subs: + merged_events.append(substitution_events[sub_index]) + sub_index += 1 + + return merged_events + + class WyscoutDeserializerV3(EventDataDeserializer[WyscoutInputs]): @property def provider(self) -> Provider: @@ -864,6 +933,10 @@ def deserialize(self, inputs: WyscoutInputs) -> EventDataset: if event and self.should_include_event(event): events.append(transformer.transform_event(event)) + all_events = insert_substitution_events( + self, events, raw_events, players, teams, periods, transformer + ) + metadata = Metadata( teams=[home_team, away_team], periods=periods, @@ -881,4 +954,4 @@ def deserialize(self, inputs: WyscoutInputs) -> EventDataset: away_coach=away_coach, ) - return EventDataset(metadata=metadata, records=events) + return EventDataset(metadata=metadata, records=all_events) diff --git a/kloppy/tests/test_wyscout.py b/kloppy/tests/test_wyscout.py index 5d28f64e..3b9d58bc 100644 --- a/kloppy/tests/test_wyscout.py +++ b/kloppy/tests/test_wyscout.py @@ -319,3 +319,29 @@ def test_carry_event(self, dataset: EventDataset): carry_event = dataset.get_event_by_id(1927028490) assert carry_event.event_type == EventType.CARRY assert carry_event.end_coordinates == Point(17.0, 4.0) + + def test_sub_event(self, dataset: EventDataset): + second_period = dataset.metadata.periods[1] + + sub_events = [ + event + for event in dataset.events + if event.event_type == EventType.SUBSTITUTION + ] + assert len(sub_events) == 9 + + first_sub_event = sub_events[0] + assert first_sub_event.time == Time( + period=second_period, timestamp=timedelta(seconds=4) + ) + assert first_sub_event.team.team_id == "3164" + assert first_sub_event.player.player_id == "415809" + assert first_sub_event.replacement_player.player_id == "703" + + last_sub_event = sub_events[-1] + assert last_sub_event.time == Time( + period=second_period, timestamp=timedelta(seconds=2192) + ) + assert last_sub_event.team.team_id == "3159" + assert last_sub_event.player.player_id == "20461" + assert last_sub_event.replacement_player.player_id == "345695"