diff --git a/CHANGELOG.rst b/CHANGELOG.rst index 079f8e5..53b7d99 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -1,6 +1,23 @@ Change Log ========== +2.3.7 +----- +## Add +- Logic for handling setting up a session when a consent screen is encountered. This is primarily seen in European countries + and should allow for the continued use of this package. +- Keyword argument, `setup_url`, to the base `_YahooFinance` class that allows a user to override the url used in setting up the session. As a default + the Yahoo Finance home page is used (https://finance.yahoo.com). You can also create an environment variable, `YF_SETUP_URL` that will be used if set. + Example usage: + ```python + import yahooquery as yq + + t = yq.Ticker('aapl', setup_url='https://finance.yahoo.com/quote/AAPL') + ``` + +## Remove +- Webdriver manager is no longer used internally. Selenium Manager is now fully included with selenium `4.10.0`, so this package is no longer needed. + 2.3.6 ----- ## Fix @@ -110,7 +127,7 @@ Change Log to adjust the timezone (:code:`adj_timezone`) to the ticker's timezone. It defaults to :code:`True`. - Further documentation of acceptable keyword arguments to the :code:`Ticker` class. -- :code:`Ticker.news` is now a method. It accepts two arguments: :code:`count` - +- :code:`Ticker.news` is now a method. It accepts two arguments: :code:`count` - number of items to return; :code:`start` - start date to begin retrieving news items from - Bug fixes: :code:`Ticker.history` method no longer returns extra rows when retrieving intraday data. @@ -131,12 +148,12 @@ Change Log :code:`p_valuation_measures` and supply either :code:`a`, :code:`q`, or :code:`m` (annual, quarterly, monthly). The data returned with these can be seen in the `Statistics` tab through the Yahoo Finance front-end. - + .. image:: demo/valuation_measures.PNG 2.2.2 ----- -- Fix bug in retrieving cash flow / income statement data. Most recent month was +- Fix bug in retrieving cash flow / income statement data. Most recent month was combining with TTM. A new column was created in the dataframe called 'periodType'. Annual data will be shown as '12M', quarterly data will be shown as '3M', and trailing 12 month data will be shown as 'TTM'. diff --git a/poetry.lock b/poetry.lock index 6c72f6f..7f2e1ff 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1,4 +1,4 @@ -# This file is automatically @generated by Poetry 1.6.1 and should not be changed by hand. +# This file is automatically @generated by Poetry 1.7.0 and should not be changed by hand. [[package]] name = "appnope" @@ -76,6 +76,24 @@ files = [ {file = "backcall-0.2.0.tar.gz", hash = "sha256:5cbdbf27be5e7cfadb448baf0aa95508f91f2bbc6c6437cd9cd06e2a4c215e1e"}, ] +[[package]] +name = "beautifulsoup4" +version = "4.12.2" +description = "Screen-scraping library" +optional = false +python-versions = ">=3.6.0" +files = [ + {file = "beautifulsoup4-4.12.2-py3-none-any.whl", hash = "sha256:bd2520ca0d9d7d12694a53d44ac482d181b4ec1888909b035a3dbf40d0f57d4a"}, + {file = "beautifulsoup4-4.12.2.tar.gz", hash = "sha256:492bbc69dca35d12daac71c4db1bfff0c876c00ef4a2ffacce226d4638eb72da"}, +] + +[package.dependencies] +soupsieve = ">1.2" + +[package.extras] +html5lib = ["html5lib"] +lxml = ["lxml"] + [[package]] name = "black" version = "23.10.1" @@ -1507,20 +1525,6 @@ files = [ [package.dependencies] six = ">=1.5" -[[package]] -name = "python-dotenv" -version = "1.0.0" -description = "Read key-value pairs from a .env file and set them as environment variables" -optional = true -python-versions = ">=3.8" -files = [ - {file = "python-dotenv-1.0.0.tar.gz", hash = "sha256:a8df96034aae6d2d50a4ebe8216326c61c3eb64836776504fcca410e5937a3ba"}, - {file = "python_dotenv-1.0.0-py3-none-any.whl", hash = "sha256:f5971a9226b701070a4bf2c38c89e5a3f0d64de8debda981d1db98583009122a"}, -] - -[package.extras] -cli = ["click (>=5.0)"] - [[package]] name = "pytz" version = "2023.3.post1" @@ -1806,6 +1810,17 @@ files = [ {file = "sortedcontainers-2.4.0.tar.gz", hash = "sha256:25caa5a06cc30b6b83d11423433f65d1f9d76c4c6a0c90e3379eaa43b9bfdb88"}, ] +[[package]] +name = "soupsieve" +version = "2.5" +description = "A modern CSS selector implementation for Beautiful Soup." +optional = false +python-versions = ">=3.8" +files = [ + {file = "soupsieve-2.5-py3-none-any.whl", hash = "sha256:eaa337ff55a1579b6549dc679565eac1e3d000563bcb1c8ab0d0fefbc0c2cdc7"}, + {file = "soupsieve-2.5.tar.gz", hash = "sha256:5663d5a7b3bfaeee0bc4372e7fc48f9cff4940b3eec54a6451cc5299f1097690"}, +] + [[package]] name = "stack-data" version = "0.6.3" @@ -2019,22 +2034,6 @@ files = [ {file = "wcwidth-0.2.9.tar.gz", hash = "sha256:a675d1a4a2d24ef67096a04b85b02deeecd8e226f57b5e3a72dbb9ed99d27da8"}, ] -[[package]] -name = "webdriver-manager" -version = "3.9.1" -description = "Library provides the way to automatically manage drivers for different browsers" -optional = true -python-versions = ">=3.7" -files = [ - {file = "webdriver_manager-3.9.1-py2.py3-none-any.whl", hash = "sha256:1dfc29a786abb97ba28076d4766d931064eeeac71a9685a3e8d46f5d363fcbe3"}, - {file = "webdriver_manager-3.9.1.tar.gz", hash = "sha256:cd1f49ebb325a98b4dc3c41056f5b645e82fff3f83e346607844ec0bdf561c0b"}, -] - -[package.dependencies] -packaging = "*" -python-dotenv = "*" -requests = "*" - [[package]] name = "wsproto" version = "1.2.0" @@ -2070,4 +2069,4 @@ premium = ["selenium", "webdriver-manager"] [metadata] lock-version = "2.0" python-versions = ">=3.8.1,<4.0" -content-hash = "5bb142dfc188dfb4ea5909389ba272201a2148343ae8d05e3d3e5f3530604663" +content-hash = "79b43190183d3827f506292ea737f1cd95cad8d73703b9cd098e1a0bc4452744" diff --git a/pyproject.toml b/pyproject.toml index 6509077..9d3fb29 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "yahooquery" -version = "2.3.6" +version = "2.3.7" description = "Python wrapper for an unofficial Yahoo Finance API" authors = ["Doug Guthrie "] documentation = "https://yahooquery.dpguthrie.com" @@ -16,7 +16,7 @@ requests-futures = "^1.0.1" tqdm = "^4.65.0" lxml = "^4.9.3" selenium = {version = "^4.10.0", optional = true} -webdriver-manager = {version = "^3.8.6", optional = true} +beautifulsoup4 = "^4.12.2" [tool.poetry.dev-dependencies] pytest = "^7.4.0" @@ -34,4 +34,4 @@ requires = ["poetry-core>=1.0.0"] build-backend = "poetry.core.masonry.api" [tool.poetry.extras] -premium = ["selenium", "webdriver-manager"] +premium = ["selenium"] diff --git a/yahooquery/__init__.py b/yahooquery/__init__.py index e3c178f..3ef299b 100644 --- a/yahooquery/__init__.py +++ b/yahooquery/__init__.py @@ -1,11 +1,8 @@ """Python interface to unofficial Yahoo Finance API endpoints""" name = "yahooquery" -__version__ = "2.3.6" +__version__ = "2.3.7" -from .research import Research # noqa -from .ticker import Ticker # noqa -from .screener import Screener # noqa from .misc import ( # noqa get_currencies, get_exchanges, @@ -13,3 +10,6 @@ get_trending, search, ) +from .research import Research # noqa +from .screener import Screener # noqa +from .ticker import Ticker # noqa diff --git a/yahooquery/base.py b/yahooquery/base.py index 458dd07..0162d29 100644 --- a/yahooquery/base.py +++ b/yahooquery/base.py @@ -1,4 +1,5 @@ # stdlib +import logging import os import time from concurrent.futures import as_completed @@ -26,6 +27,9 @@ import urlparse as parse +logger = logging.getLogger(__name__) + + class _YahooFinance(object): CHUNK = 1500 @@ -938,11 +942,12 @@ def __init__(self, **kwargs): self.progress = kwargs.pop("progress", False) self.username = kwargs.pop("username", os.getenv("YF_USERNAME", None)) self.password = kwargs.pop("password", os.getenv("YF_PASSWORD", None)) + self._setup_url = kwargs.pop("setup_url", os.getenv("YF_SETUP_URL", None)) self.session = initialize_session(kwargs.pop("session", None), **kwargs) if self.username and self.password: self.login() else: - self.session = setup_session(self.session) + self.session = setup_session(self.session, self._setup_url) self.crumb = get_crumb(self.session) @property @@ -991,13 +996,27 @@ def default_query_params(self): params["crumb"] = self.crumb return params - def login(self): + def login(self) -> None: if _has_selenium: instance = YahooFinanceHeadless(self.username, self.password) instance.login() - self.session.cookies = instance.cookies + if instance.cookies: + self.session.cookies = instance.cookies + return - return [] + else: + logger.warning( + "Unable to login and/or retrieve the appropriate cookies. This is " + "most likely due to Yahoo Finance instituting recaptcha, which " + "this package does not support." + ) + + else: + logger.warning( + "You do not have the required libraries to use this feature. Install " + "with the following: `pip install yahooquery[premium]`" + ) + self.session = setup_session(self.session, self._setup_url) def _chunk_symbols(self, key, params={}, chunk=None, **kwargs): current_symbols = self.symbols diff --git a/yahooquery/headless.py b/yahooquery/headless.py index 55c723d..4fd2db2 100644 --- a/yahooquery/headless.py +++ b/yahooquery/headless.py @@ -7,13 +7,11 @@ try: # third party from selenium import webdriver - from selenium.common.exceptions import NoSuchElementException, TimeoutException - from selenium.webdriver.chrome.options import Options - from selenium.webdriver.chrome.service import Service as ChromeService + from selenium.common.exceptions import TimeoutException + from selenium.webdriver.chrome.service import Service from selenium.webdriver.common.by import By from selenium.webdriver.support import expected_conditions as EC from selenium.webdriver.support.ui import WebDriverWait - from webdriver_manager.chrome import ChromeDriverManager except ImportError: # Selenium was not installed _has_selenium = False @@ -28,16 +26,14 @@ def __init__(self, username: str, password: str): self.username = username self.password = password self.cookies = RequestsCookieJar() - chrome_options = Options() + chrome_options = webdriver.ChromeOptions() chrome_options.add_argument("--headless") chrome_options.add_argument("--no-sandbox") chrome_options.add_argument("--log-level=3") chrome_options.add_argument("--ignore-certificate-errors") chrome_options.add_argument("--ignore-ssl-errors") - self.driver = webdriver.Chrome( - service=ChromeService(ChromeDriverManager().install()), - options=chrome_options, - ) + service = Service() + self.driver = webdriver.Chrome(service=service, options=chrome_options) def login(self): try: diff --git a/yahooquery/misc.py b/yahooquery/misc.py index 49e96b3..25de24f 100644 --- a/yahooquery/misc.py +++ b/yahooquery/misc.py @@ -1,3 +1,6 @@ +# stdlib +import os + # third party import pandas as pd @@ -20,8 +23,9 @@ def _make_request( country, ", ".join(sorted(COUNTRIES.keys())) ) ) + setup_url = kwargs.pop("setup_url", os.getenv("YF_SETUP_URL", None)) session = initialize_session(**kwargs) - session = setup_session(session) + session = setup_session(session, setup_url) crumb = get_crumb(session) if crumb is not None: params["crumb"] = crumb diff --git a/yahooquery/utils/__init__.py b/yahooquery/utils/__init__.py index 93c9a43..8fad8ac 100644 --- a/yahooquery/utils/__init__.py +++ b/yahooquery/utils/__init__.py @@ -7,6 +7,7 @@ # third party import pandas as pd import requests +from bs4 import BeautifulSoup from requests.adapters import HTTPAdapter from requests.exceptions import ConnectionError, RetryError, SSLError from requests.packages.urllib3.util.retry import Retry @@ -17,7 +18,7 @@ DEFAULT_TIMEOUT = 5 - +DEFAULT_SESSION_URL = "https://finance.yahoo.com" CRUMB_FAILURE = ( "Failed to obtain crumb. Ability to retrieve data will be significantly limited." ) @@ -1366,8 +1367,8 @@ def initialize_session(session=None, **kwargs): return session -def setup_session(session: requests.Session): - url = "https://finance.yahoo.com" +def setup_session(session: requests.Session, url: str = None): + url = url or DEFAULT_SESSION_URL try: response = session.get(url, allow_redirects=True) except SSLError: @@ -1380,10 +1381,39 @@ def setup_session(session: requests.Session): except SSLError: counter += 1 - if not isinstance(session, FuturesSession): - return session + if isinstance(session, FuturesSession): + response = response.result() + + # check for and handle consent page:w + if response.url.find("consent") >= 0: + logger.debug(f'Redirected to consent page: "{response.url}"') + + soup = BeautifulSoup(response.content, "html.parser") + + params = {} + for param in ["csrfToken", "sessionId"]: + try: + params[param] = soup.find("input", attrs={"name": param})["value"] + except Exception as exc: + logger.critical( + f'Failed to find or extract "{param}" from response. Exception={exc}' + ) + return session + + logger.debug(f"params: {params}") + + response = session.post( + "https://consent.yahoo.com/v2/collectConsent", + data={ + "agree": ["agree", "agree"], + "consentUUID": "default", + "sessionId": params["sessionId"], + "csrfToken": params["csrfToken"], + "originalDoneUrl": url, + "namespace": "yahoo", + }, + ) - _ = response.result() return session