diff --git a/FanGraphs/leaders.py b/FanGraphs/leaders.py index 05ebd93..a4280e8 100644 --- a/FanGraphs/leaders.py +++ b/FanGraphs/leaders.py @@ -993,10 +993,190 @@ def export(self, path=""): os.rename(download_path, path) -class InternationalLeaderboards: +class InternationalLeaderboards(ScrapingUtilities): + """ + Parses the FanGraphs KBO Leaderboards page + + .. py:attribute:: address + + The base URL address for the FanGraphs KBO Leaderboards page. + + :type: str + :value: https://www.fangraphs.com/leaders/international + """ + __selections = leaders_sel.intl.selections + __dropdowns = leaders_sel.intl.dropdowns + __checkboxes = leaders_sel.intl.checkboxes + __waitfor = leaders_sel.intl.waitfor + + address = "https://www.fangraphs.com/leaders/international" + + def __init__(self, browser="chromium"): + """ + :param browser: The name of the browser to use (Chromium, Firefox, WebKit) + """ + super().__init__(browser, self.address) + self.reset(waitfor=self.__waitfor) + + @classmethod + def list_queries(cls): + """ + Lists the possible filter queries which can be used to modify search results. + + :return: Filter queries which can be used to modify search results + :rtype: list + """ + queries = [] + queries.extend(cls.__selections) + queries.extend(cls.__dropdowns) + queries.extend(cls.__checkboxes) + return queries + + def list_options(self, query: str): + """ + Retrieves the option which a filter query is currently set to. + + :param query: The filter query being retrieved of its current option + :return: The option which the filter query is currently set to + :rtype: str + :raises FanGraphs.exceptions.InvalidFilterQuery: Invalid argument ``query`` + """ + query = query.lower() + if query in self.__selections: + elems = [ + self.soup.select(s)[0] + for s in self.__selections[query] + ] + options = [e.getText() for e in elems] + elif query in self.__dropdowns: + elems = self.soup.select( + f"{self.__dropdowns[query]} > div > a" + ) + options = [e.getText() for e in elems] + elif query in self.__checkboxes: + options = ["True", "False"] + else: + raise FanGraphs.exceptions.InvalidFilterQuery(query) + return options + + def current_option(self, query: str): + """ + + :param query: + :return: + """ + query = query.lower() + option = "" + if query in self.__selections: + for sel in self.__selections[query]: + elem = self.soup.select(sel)[0] + if "active" in elem.get("class"): + option = elem.getText() + break + elif query in self.__dropdowns: + elem = self.soup.select( + f"{self.__dropdowns[query]} > div > span" + )[0] + option = elem.getText() + elif query in self.__checkboxes: + elem = self.soup.select( + self.__selections["stat"][0] + ) + option = "True" if ",to" in elem[0].get("href") else "False" + else: + raise FanGraphs.exceptions.InvalidFilterQuery(query) + return option + + def configure(self, query: str, option: str): + """ + Configures a filter query to a specified option. - def __init__(self): - pass + :param query: The filter query to be configured + :param option: The option to set the filter query to + :raises FanGraphs.exceptions.InvalidFilterQuery: Invalid argument ``query`` + """ + query = query.lower() + self._close_ad() + if query in self.__selections: + self.__configure_selection(query, option) + elif query in self.__dropdowns: + self.__configure_dropdown(query, option) + elif query in self.__checkboxes: + self.__configure_checkbox(query, option) + else: + raise FanGraphs.exceptions.InvalidFilterQuery(query) + self._refresh_parser(waitfor=self.__waitfor) + + def __configure_selection(self, query: str, option: str): + """ + Configures a selection-class filter query to an option. + + :param query: The selection-class filter query to be configured + :param option: The option to set the filter query to + :raises FanGraphs.exceptions.InvalidFilterOption: Invalid argument ``option`` + """ + options = self.list_options(query) + try: + index = options.index(option) + except ValueError as err: + raise FanGraphs.exceptions.InvalidFilterOption from err + self.page.click( + self.__selections[query][index] + ) + + def __configure_dropdown(self, query: str, option: str): + """ + Configures a dropdown-class filter query to an option. + + :param query: The dropdown-class filter query to be configured + :param option: The option to set the filter query to + :raises FanGraphs.exceptions.InvalidFilterOption: Invalid argument ``option`` + """ + options = self.list_options(query) + try: + index = options.index(option) + except ValueError as err: + raise FanGraphs.exceptions.InvalidFilterOption from err + self.page.click(self.__dropdowns[query]) + elem = self.page.query_selector_all( + f"{self.__dropdowns[query]} > div > a" + )[index] + elem.click() + + def __configure_checkbox(self, query: str, option: str): + """ + Configures a checkbox-class filter query to an option. + + :param query: The checkbox-class filter query to be configured + :param option: The option to set the filter query to + :raises FanGraphs.exceptions.InvalidFilterOption: Invalid argument ``option`` + """ + options = self.list_options(query) + if option not in options: + raise FanGraphs.exceptions.InvalidFilterOption + if option == self.current_option(query): + return + self.page.click(self.__checkboxes[query]) + + def export(self, path=""): + """ + Uses the **Export Data** button on the webpage to export the current leaderboard. + The data will be exported as a CSV file and the file will be saved to *out/*. + The file will be saved to the filepath ``path``, if specified. + Otherwise, the file will be saved to the filepath *./out/%d.%m.%y %H.%M.%S.csv* + + :param path: The path to save the exported data to + """ + self._close_ad() + if not path or os.path.splitext(path)[1] != ".csv": + path = "out/{}.csv".format( + datetime.datetime.now().strftime("%d.%m.%y %H.%M.%S") + ) + with self.page.expect_download() as down_info: + self.page.click(".data-export") + download = down_info.value + download_path = download.path() + os.rename(download_path, path) class WARLeaderboards(ScrapingUtilities): diff --git a/FanGraphs/selectors/leaders_sel/__init__.py b/FanGraphs/selectors/leaders_sel/__init__.py index b29abff..1b573e1 100644 --- a/FanGraphs/selectors/leaders_sel/__init__.py +++ b/FanGraphs/selectors/leaders_sel/__init__.py @@ -6,3 +6,4 @@ from . import season_stat_grid as ssg from . import game_span_leaderboards as gsl from . import war_leaderboards as war +from . import international_leaderboards as intl diff --git a/FanGraphs/selectors/leaders_sel/international_leaderboards.py b/FanGraphs/selectors/leaders_sel/international_leaderboards.py new file mode 100644 index 0000000..2c0bf83 --- /dev/null +++ b/FanGraphs/selectors/leaders_sel/international_leaderboards.py @@ -0,0 +1,26 @@ +#! python3 +# FanGraphs/selectors/leaders_sel/international_leaderboards.py + +selections = { + "stat": [ + ".controls-stats > .fgButton:nth-child(1)", + ".controls-stats > .fgButton:nth-child(2)" + ], + "type": [ + ".controls-board-view > .fgButton:nth-child(1)", + ".controls-board-view > .fgButton:nth-child(2)" + ] +} +dropdowns = { + "position": ".controls-stats:nth-child(1) > div:nth-child(3) > .fg-selection-box__selection", + "min": ".controls-stats:nth-child(1) > div:nth-child(4) > .fg-selection-box__selection", + "single_season": ".controls-stats:nth-child(2) > div:nth-child(1) > .fg-selection-box__selection", + "season1": ".controls-stats:nth-child(2) > div:nth-child(2) > .fg-selection-box__selection", + "season2": ".controls-stats:nth-child(2) > div:nth-child(3) > .fg-selection-box__selection", + "league": ".controls-stats:nth-child(3) > div:nth-child(1) > .fg-selection-box__selection", + "team": ".controls-stats:nth-child(3) > div:nth-child(2) > .fg-selection-box__selection", +} +checkboxes = { + "split_seasons": ".controls-stats > .fg-checkbox" +} +waitfor = ".fg-data-grid.table-type" diff --git a/FanGraphs/tests/test_leaders.py b/FanGraphs/tests/test_leaders.py index be8779f..4a3b834 100644 --- a/FanGraphs/tests/test_leaders.py +++ b/FanGraphs/tests/test_leaders.py @@ -15,6 +15,27 @@ from FanGraphs.selectors import leaders_sel +def fetch_soup(address, waitfor=""): + """ + Initializes the ``bs4.BeautifulSoup`` object for parsing the FanGraphs page + + :param address: The base URL address of the FanGraphs page + :param waitfor: The CSS selector to wait for + :return: A ``BeautifulSoup`` object for parsing the page + :rtype: bs4.BeautifulSoup + """ + with sync_playwright() as play: + browser = play.chromium.launch() + page = browser.new_page() + page.goto(address, timeout=0) + page.wait_for_selector(waitfor) + soup = bs4.BeautifulSoup( + page.content(), features="lxml" + ) + browser.close() + return soup + + class TestMajorLeagueLeaderboards: """ :py:class:`FanGraphs.leaders.MajorLeagueLeaderboards` @@ -30,14 +51,7 @@ class TestMajorLeagueLeaderboards: @classmethod def setup_class(cls): - with sync_playwright() as play: - browser = play.chromium.launch() - page = browser.new_page() - page.goto(cls.address, timeout=0) - cls.soup = bs4.BeautifulSoup( - page.content(), features="lxml" - ) - browser.close() + cls.soup = fetch_soup(cls.address) def test_address(self): """ @@ -147,18 +161,7 @@ class TestSplitsLeaderboards: @classmethod def setup_class(cls): - """ - Initializes ``bs4.BeautifulSoup4`` object using ``playwright``. - """ - with sync_playwright() as play: - browser = play.chromium.launch() - page = browser.new_page() - page.goto(cls.address, timeout=0) - page.wait_for_selector(leaders_sel.splits.waitfor) - cls.soup = bs4.BeautifulSoup( - page.content(), features="lxml" - ) - browser.close() + cls.soup = fetch_soup(cls.address, leaders_sel.splits.waitfor) def test_address(self): """ @@ -336,15 +339,7 @@ class TestSeasonStatGrid: @classmethod def setup_class(cls): - with sync_playwright() as play: - browser = play.chromium.launch() - page = browser.new_page() - page.goto(cls.address, timeout=0) - page.wait_for_selector(leaders_sel.ssg.waitfor) - cls.soup = bs4.BeautifulSoup( - page.content(), features="lxml" - ) - browser.close() + cls.soup = fetch_soup(cls.address, leaders_sel.ssg.waitfor) def test_address(self): """ @@ -459,15 +454,7 @@ class TestGameSpanLeaderboards: @classmethod def setup_class(cls): - with sync_playwright() as play: - browser = play.chromium.launch() - page = browser.new_page() - page.goto(cls.address, timeout=0) - page.wait_for_selector(leaders_sel.gsl.waitfor) - cls.soup = bs4.BeautifulSoup( - page.content(), features="lxml" - ) - browser.close() + cls.soup = fetch_soup(cls.address, leaders_sel.gsl.waitfor) def test_address(self): """ @@ -575,6 +562,125 @@ def test_export(self): assert len(elems) == 1 +class TestInternationalLeaderboards: + """ + :py:class:`FanGraphs.leaders.InternationalLeaderboards` + """ + __selections = leaders_sel.intl.selections + __dropdowns = leaders_sel.intl.dropdowns + __checkboxes = leaders_sel.intl.checkboxes + address = "https://www.fangraphs.com/leaders/international" + + @classmethod + def setup_class(cls): + cls.soup = fetch_soup(cls.address, waitfor=leaders_sel.intl.waitfor) + + def test_address(self): + """ + Class attribute ``InternationalLeaderboards.address``. + """ + res = requests.get(self.address) + assert res.status_code == 200 + + def test_list_options_selections(self): + """ + Instance method ``InternationalLeaderboards.list_options``. + + Uses the following class attributes: + + - ``InternationalLeaderboards.__selections`` + """ + elem_count = { + "stat": 2, "type": 2 + } + for query, sel_list in self.__selections.items(): + elems = [self.soup.select(s)[0] for s in sel_list] + assert len(elems) == elem_count[query], query + assert all([e.getText() for e in elems]), query + + def test_list_options_dropdowns(self): + """ + Instance method ``InternationalLeaderboards.list_options``. + + Uses the following class attributes: + + - ``InternationalLeaderboards.__dropdowns`` + """ + elem_count = { + "position": 11, "min": 42, "single_season": 19, "season1": 19, "season2": 19, + "league": 1, "team": 11 + } + for query, sel in self.__dropdowns.items(): + elems = self.soup.select(f"{sel} > div > a") + assert len(elems) == elem_count[query], query + assert all([e.getText() for e in elems]), query + + def test_current_option_selections(self): + """ + Instance method ``InternationalLeaderboards.current_option``. + + Uses the following class attributes: + + - ``InternationalLeaderboards.__selections`` + """ + elem_text = { + "stat": "Batters", "type": "Standard" + } + for query, sel_list in self.__selections.items(): + elems = [] + for sel in sel_list: + elem = self.soup.select(sel)[0] + assert elem.get("class") is not None, query + elems.append(elem) + active = ["active" in e.get("class") for e in elems] + assert active.count(True) == 1, query + text = [e.getText() for e in elems] + assert elem_text[query] in text, query + + def test_current_option_dropdown(self): + """ + Instance method ``InternationalLeaderboards.current_option``. + + Uses the following class attributes: + + - ``InternationalLeaderboards.__dropdowns`` + """ + elem_text = { + "position": "All", "min": "Qualified", "single_season": "2020", + "season1": "2020", "season2": "2020", "league": "KBO", + "team": "Select" + } + for query, sel in self.__dropdowns.items(): + elems = self.soup.select(f"{sel} > div > span") + assert len(elems) == 1, query + text = elems[0].getText() + assert text == elem_text[query], query + + def test_configure_selections(self): + """ + Private instance method ``InternationalLeaderboards.__configure_selection``. + """ + for query, sel_list in self.__selections.items(): + for sel in sel_list: + elems = self.soup.select(sel) + assert len(elems) == 1, query + + def test_configure_dropdown(self): + """ + Private instance method ``InternationalLeaderboards.__configure_dropdown``. + """ + for query, sel in self.__dropdowns.items(): + elems = self.soup.select(sel) + assert len(elems) == 1, query + + def test_export(self): + """ + Instance method ``InternationalLeaderboards.export``. + """ + elems = self.soup.select(".data-export") + assert len(elems) == 1 + + class TestWARLeaderboards: """ :py:class:`FanGraphs.leaders.WARLeaderboards` @@ -586,15 +692,7 @@ class TestWARLeaderboards: @classmethod def setup_class(cls): - with sync_playwright() as play: - browser = play.chromium.launch() - page = browser.new_page() - page.goto(cls.address) - page.wait_for_selector(leaders_sel.war.waitfor) - cls.soup = bs4.BeautifulSoup( - page.content(), features="lxml" - ) - browser.close() + cls.soup = fetch_soup(cls.address, waitfor=leaders_sel.war.waitfor) @pytest.mark.parametrize( "selectors",