Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Exclude pbp participation for any years where the data file isn't available #107

Merged
merged 2 commits into from
Sep 17, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 18 additions & 8 deletions nfl_data_py/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
import numpy
import pandas
import appdirs
from urllib.error import HTTPError

# module level doc string
__doc__ = """
Expand Down Expand Up @@ -143,13 +144,20 @@ def import_pbp_data(
raw = pandas.DataFrame(data)
raw['season'] = year

if all([include_participation, year >= 2016, not cache]):

if include_participation and not cache:
path = r'https://github.com/nflverse/nflverse-data/releases/download/pbp_participation/pbp_participation_{}.parquet'.format(year)
partic = pandas.read_parquet(path)
raw = raw.merge(partic,
how='left',
left_on=['play_id','game_id'],
right_on=['play_id','nflverse_game_id'])

try:
partic = pandas.read_parquet(path)
raw = raw.merge(
partic,
how='left',
left_on=['play_id','game_id'],
right_on=['play_id','nflverse_game_id']
)
except HTTPError:
pass

pbp_data.append(raw)
print(str(year) + ' done.')
Expand All @@ -158,8 +166,10 @@ def import_pbp_data(
print(e)
print('Data not available for ' + str(year))

if pbp_data:
plays = pandas.concat(pbp_data).reset_index(drop=True)
if not pbp_data:
return pandas.DataFrame()

plays = pandas.concat(pbp_data, ignore_index=True)

# converts float64 to float32, saves ~30% memory
if downcast:
Expand Down
23 changes: 19 additions & 4 deletions nfl_data_py/tests/nfl_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,17 +9,17 @@


class test_pbp(TestCase):
pbp = nfl.import_pbp_data([2020])

def test_is_df_with_data(self):
s = nfl.import_pbp_data([2020])
self.assertIsInstance(s, pd.DataFrame)
self.assertTrue(len(s) > 0)
self.assertIsInstance(self.pbp, pd.DataFrame)
self.assertTrue(len(self.pbp) > 0)

def test_is_df_with_data_thread_requests(self):
s = nfl.import_pbp_data([2020, 2021], thread_requests=True)
self.assertIsInstance(s, pd.DataFrame)
self.assertTrue(len(s) > 0)


def test_uses_cache_when_cache_is_true(self):
cache = Path(__file__).parent/f"tmpcache-{random.randint(0, 10000)}"
self.assertRaises(
Expand All @@ -33,6 +33,21 @@ def test_uses_cache_when_cache_is_true(self):
self.assertIsInstance(data, pd.DataFrame)

shutil.rmtree(cache)

def test_includes_participation_by_default(self):
self.assertIn("offense_players", self.pbp.columns)

def test_excludes_participation_when_requested(self):
data = nfl.import_pbp_data([2020], include_participation=False)
self.assertIsInstance(self.pbp, pd.DataFrame)
self.assertTrue(len(self.pbp) > 0)
self.assertNotIn("offense_players", data.columns)

def test_excludes_participation_if_not_available(self):
data = nfl.import_pbp_data([2024])
self.assertIsInstance(self.pbp, pd.DataFrame)
self.assertTrue(len(self.pbp) > 0)
self.assertNotIn("offense_players", data.columns)


class test_weekly(TestCase):
Expand Down
Loading