Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Wyscout V3] Add substitutions to the event stream #368

Open
wants to merge 2 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
154 changes: 135 additions & 19 deletions kloppy/infra/serializers/event/wyscout/deserializer_v3.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@
Team,
FormationType,
CarryResult,
PositionType,
)
from kloppy.exceptions import DeserializationError
from kloppy.utils import performance_logging
Expand Down Expand Up @@ -81,36 +82,78 @@
"3-2-3-2": FormationType.THREE_TWO_THREE_TWO,
}

position_types_mapping: Dict[str, PositionType] = {
"GK": PositionType.Goalkeeper,
"LB": PositionType.LeftBack,
"LWB": PositionType.LeftWing,
"LB5": PositionType.LeftBack,
"LCB": PositionType.LeftCenterBack,
"LCB3": PositionType.LeftCenterBack,
"CB": PositionType.CenterBack,
"RCB": PositionType.RightCenterBack,
"RCB3": PositionType.RightCenterBack,
"RB": PositionType.RightBack,
"RWB": PositionType.RightWing,
"RB5": PositionType.RightBack,
"LW": PositionType.LeftWing,
"LAMF": PositionType.LeftAttackingMidfield,
"LCMF3": PositionType.LeftCentralMidfield,
"LCMF": PositionType.LeftCentralMidfield,
"DMF": PositionType.DefensiveMidfield,
"LDMF": PositionType.LeftDefensiveMidfield,
"RDMF": PositionType.RightDefensiveMidfield,
"RCMF3": PositionType.RightCentralMidfield,
"RCMF": PositionType.RightCentralMidfield,
"RAMF": PositionType.RightAttackingMidfield,
"RW": PositionType.RightWing,
"AMF": PositionType.AttackingMidfield,
"LWF": PositionType.LeftForward,
"CF": PositionType.Striker,
"SS": PositionType.Striker,
"RWF": PositionType.RightForward,
}


def _flip_point(point: Point) -> Point:
return Point(x=100 - point.x, y=100 - point.y)


def _parse_team(raw_events, wyId: str, ground: Ground) -> Team:
# Get the first formation description
first_period_formation_info = raw_events["formations"][wyId]["1H"]
first_formation_descr = next(iter(first_period_formation_info.values()))
formation_str, formation_info = next(iter(first_formation_descr.items()))

# Extract the formation and players' positions
starting_formation = formations[formation_str]
starting_players_positions = {
player_id: position_types_mapping[player_info["position"].upper()]
for player_descr in formation_info["players"]
for player_id, player_info in player_descr.items()
}

team = Team(
team_id=wyId,
name=raw_events["teams"][wyId]["team"]["officialName"],
ground=ground,
starting_formation=formations[
next(
iter(
raw_events["formations"][wyId]["1H"][
next(iter(raw_events["formations"][wyId]["1H"]))
]
)
)
],
starting_formation=starting_formation,
)
team.players = [
Player(
player_id=str(player["player"]["wyId"]),
team=team,
jersey_no=None,
first_name=player["player"]["firstName"],
last_name=player["player"]["lastName"],

for player in raw_events["players"][wyId]:
player_id = str(player["player"]["wyId"])
starting_position = starting_players_positions.get(player_id)
team.players.append(
Player(
player_id=player_id,
team=team,
jersey_no=None,
first_name=player["player"]["firstName"],
last_name=player["player"]["lastName"],
starting=starting_position is not None,
starting_position=starting_position,
)
)
for player in raw_events["players"][wyId]
]

return team


Expand Down Expand Up @@ -549,6 +592,75 @@ def _parse_period_id(raw_period: str) -> int:
return period_id


def insert_substitution_events(
deserializer, events, raw_events, players, teams, periods, transformer
):
# Step 1: Create substitution events
substitution_events = []
for team_id, periods_subs in raw_events["substitutions"].items():
for raw_period, sub_info in periods_subs.items():
for raw_seconds, players_info in sub_info.items():
subs_out = players_info["out"]
subs_in = players_info["in"]
for sub_out, sub_in in zip(subs_out, subs_in):
sub_out_player = players[team_id][str(sub_out["playerId"])]
sub_in_player = players[team_id][str(sub_in["playerId"])]

# Build the substitution event
sub_event = deserializer.event_factory.build_substitution(
event_id=f"substitution-{sub_out['playerId']}-{sub_in['playerId']}",
ball_owning_team=None,
ball_state=None,
coordinates=Point(x=0, y=0),
player=sub_out_player,
replacement_player=sub_in_player,
team=teams[team_id],
period=periods[int(raw_period[0]) - 1],
timestamp=timedelta(seconds=int(raw_seconds)),
result=None,
raw_event=None,
qualifiers=None,
)

if sub_event and deserializer.should_include_event(
sub_event
):
substitution_events.append(
transformer.transform_event(sub_event)
)

# Step 2: Sort substitution events globally by period and timestamp
substitution_events.sort(key=lambda e: (e.period.id, e.timestamp))

# Step 3: Merge events and substitutions in ascending order
merged_events = []
sub_index = 0
total_subs = len(substitution_events)

for event in events:
# Insert all substitution events that occur before or at the current event's timestamp
while sub_index < total_subs:
sub_event = substitution_events[sub_index]
if sub_event.period.id < event.period.id or (
sub_event.period.id == event.period.id
and sub_event.timestamp <= event.timestamp
):
merged_events.append(sub_event)
sub_index += 1
else:
break

# Add the current event to the merged list
merged_events.append(event)

# Step 4: Add any remaining substitution events
while sub_index < total_subs:
merged_events.append(substitution_events[sub_index])
sub_index += 1

return merged_events


class WyscoutDeserializerV3(EventDataDeserializer[WyscoutInputs]):
@property
def provider(self) -> Provider:
Expand Down Expand Up @@ -821,6 +933,10 @@ def deserialize(self, inputs: WyscoutInputs) -> EventDataset:
if event and self.should_include_event(event):
events.append(transformer.transform_event(event))

all_events = insert_substitution_events(
self, events, raw_events, players, teams, periods, transformer
)

metadata = Metadata(
teams=[home_team, away_team],
periods=periods,
Expand All @@ -838,4 +954,4 @@ def deserialize(self, inputs: WyscoutInputs) -> EventDataset:
away_coach=away_coach,
)

return EventDataset(metadata=metadata, records=events)
return EventDataset(metadata=metadata, records=all_events)
33 changes: 33 additions & 0 deletions kloppy/tests/test_wyscout.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
Time,
PassType,
PassQualifier,
PositionType,
)

from kloppy import wyscout
Expand Down Expand Up @@ -203,6 +204,12 @@ def test_metadata(self, dataset: EventDataset):
== FormationType.FOUR_THREE_ONE_TWO
)

cr7 = dataset.metadata.teams[0].get_player_by_id("3322")

assert cr7.full_name == "Cristiano Ronaldo dos Santos Aveiro"
assert cr7.starting is True
assert cr7.positions.last() == PositionType.Striker

def test_enriched_metadata(self, dataset: EventDataset):
date = dataset.metadata.date
if date:
Expand Down Expand Up @@ -312,3 +319,29 @@ def test_carry_event(self, dataset: EventDataset):
carry_event = dataset.get_event_by_id(1927028490)
assert carry_event.event_type == EventType.CARRY
assert carry_event.end_coordinates == Point(17.0, 4.0)

def test_sub_event(self, dataset: EventDataset):
second_period = dataset.metadata.periods[1]

sub_events = [
event
for event in dataset.events
if event.event_type == EventType.SUBSTITUTION
]
assert len(sub_events) == 9

first_sub_event = sub_events[0]
assert first_sub_event.time == Time(
period=second_period, timestamp=timedelta(seconds=4)
)
assert first_sub_event.team.team_id == "3164"
assert first_sub_event.player.player_id == "415809"
assert first_sub_event.replacement_player.player_id == "703"

last_sub_event = sub_events[-1]
assert last_sub_event.time == Time(
period=second_period, timestamp=timedelta(seconds=2192)
)
assert last_sub_event.team.team_id == "3159"
assert last_sub_event.player.player_id == "20461"
assert last_sub_event.replacement_player.player_id == "345695"
Loading