Skip to content

Commit

Permalink
Update
Browse files Browse the repository at this point in the history
  • Loading branch information
justinjjlee committed Oct 2, 2024
1 parent 8cae9e5 commit 2b6eed3
Showing 1 changed file with 54 additions and 54 deletions.
108 changes: 54 additions & 54 deletions src/data/apinhle/data_pull_plays.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,62 +29,62 @@
# ---------------------------------------------------

for iter_year in [iter_year]: # or iter_years

# Load the previous game stats, if exist
try:

# Pull all game lists
try: # Pull all game lists, if exists
gamecode = pd.read_csv(f"./latest/box/{iter_year}_box.csv")

# Load the previous game stats, if exist
try:
# If previously pulled data exist
df_playbyplay_exist = pd.read_csv(f"./latest/play/{iter_year}_playbyplay.csv")
df_playbyplay_player_exist = pd.read_csv(f"./latest/play/{iter_year}_playbyplay_player.csv")
# Unique of all existing game records
gameids_exist = df_playbyplay_exist["gameid"].unique()
# Remove the existing game records
# Pick up games with newest data points
gamecode = gamecode.loc[~gamecode["gameid"].isin(gameids_exist), :]
# NOTE: This process does not account for any record revisions
idx_exist = True
except:
# New data needed, no need to append the old one
idx_exist = False

# If previously pulled data exist
df_playbyplay_exist = pd.read_csv(f"./latest/play/{iter_year}_playbyplay.csv")
df_playbyplay_player_exist = pd.read_csv(f"./latest/play/{iter_year}_playbyplay_player.csv")
# Unique of all existing game records
gameids_exist = df_playbyplay_exist["gameid"].unique()
# Remove the existing game records
# Pick up games with newest data points
gamecode = gamecode.loc[~gamecode["gameid"].isin(gameids_exist), :]
# NOTE: This process does not account for any record revisions
idx_exist = True
except:
# New data needed, no need to append the old one
idx_exist = False

# ---------------------------------------------------
# Pull team/game lists of the games for the season
'''
Pulling full season game would take a long time. If some game records were already pulled,
I recommend to skip those game records
'''
if len(gamecode["gameid"]) != 0:
# At least one records need to be pulled
df_playbyplay = []
df_playerinfo = []
for _, row in gamecode.iterrows():
# Pull game's play-by-play stat
r = requests.get(url=f'https://api-web.nhle.com/v1/gamecenter/{row.gameid}/play-by-play')

iter_playbyplay, iter_player = proc_playbyplay_clean(r, row)
# Append to save
df_playbyplay.append(iter_playbyplay)
df_playerinfo.append(iter_player)
# Pause to play safe with the API
time.sleep(1)
# ---------------------------------------------------
# Pull team/game lists of the games for the season
'''
Pulling full season game would take a long time. If some game records were already pulled,
I recommend to skip those game records
'''
if len(gamecode["gameid"]) != 0:
# At least one records need to be pulled
df_playbyplay = []
df_playerinfo = []
for _, row in gamecode.iterrows():
# Pull game's play-by-play stat
r = requests.get(url=f'https://api-web.nhle.com/v1/gamecenter/{row.gameid}/play-by-play')

iter_playbyplay, iter_player = proc_playbyplay_clean(r, row)
# Append to save
df_playbyplay.append(iter_playbyplay)
df_playerinfo.append(iter_player)
# Pause to play safe with the API
time.sleep(1)

# Save, full data
playbyplay = pd.concat(df_playbyplay)
df_playerinfo = pd.concat(df_playerinfo)
# Save, full data
playbyplay = pd.concat(df_playbyplay)
df_playerinfo = pd.concat(df_playerinfo)

if idx_exist:
# If the old record exists, append the old record
playbyplay = pd.concat([df_playbyplay_exist, df_playbyplay], axis=0)
df_playerinfo = pd.concat([df_playbyplay_player_exist, df_playerinfo], axis=0)

# Save data
playbyplay.to_csv(f"./latest/play/{iter_year}_playbyplay.csv", index=False)
df_playerinfo.to_csv(f"./latest/play/{iter_year}_playbyplay_player.csv", index=False)
else:
# No records need to be pulled
print("All records currently existing, no need to pull records")
if idx_exist:
# If the old record exists, append the old record
playbyplay = pd.concat([df_playbyplay_exist, df_playbyplay], axis=0)
df_playerinfo = pd.concat([df_playbyplay_player_exist, df_playerinfo], axis=0)

# Save data
playbyplay.to_csv(f"./latest/play/{iter_year}_playbyplay.csv", index=False)
df_playerinfo.to_csv(f"./latest/play/{iter_year}_playbyplay_player.csv", index=False)
else:
# No records need to be pulled
print("All records currently existing, no need to pull records")
except: # Season data not yet exist
print("The season data not yet exist. Exit the process safely.")

print("Good bye.")
print("au revoir.")

0 comments on commit 2b6eed3

Please sign in to comment.