Skip to content

Commit

Permalink
Merge pull request #248 from dpguthrie/add-consent-handling
Browse files Browse the repository at this point in the history
Add consent handling
  • Loading branch information
dpguthrie authored Dec 16, 2023
2 parents ff568c8 + 399284b commit 57a73fc
Show file tree
Hide file tree
Showing 8 changed files with 127 additions and 62 deletions.
23 changes: 20 additions & 3 deletions CHANGELOG.rst
Original file line number Diff line number Diff line change
@@ -1,6 +1,23 @@
Change Log
==========

2.3.7
-----
## Add
- Logic for handling setting up a session when a consent screen is encountered. This is primarily seen in European countries
and should allow for the continued use of this package.
- Keyword argument, `setup_url`, to the base `_YahooFinance` class that allows a user to override the url used in setting up the session. As a default
the Yahoo Finance home page is used (https://finance.yahoo.com). You can also create an environment variable, `YF_SETUP_URL` that will be used if set.
Example usage:
```python
import yahooquery as yq
t = yq.Ticker('aapl', setup_url='https://finance.yahoo.com/quote/AAPL')
```

## Remove
- Webdriver manager is no longer used internally. Selenium Manager is now fully included with selenium `4.10.0`, so this package is no longer needed.

2.3.6
-----
## Fix
Expand Down Expand Up @@ -110,7 +127,7 @@ Change Log
to adjust the timezone (:code:`adj_timezone`) to the ticker's timezone. It defaults
to :code:`True`.
- Further documentation of acceptable keyword arguments to the :code:`Ticker` class.
- :code:`Ticker.news` is now a method. It accepts two arguments: :code:`count` -
- :code:`Ticker.news` is now a method. It accepts two arguments: :code:`count` -
number of items to return; :code:`start` - start date to begin retrieving news items from
- Bug fixes: :code:`Ticker.history` method no longer returns extra rows when retrieving
intraday data.
Expand All @@ -131,12 +148,12 @@ Change Log
:code:`p_valuation_measures` and supply either :code:`a`, :code:`q`, or
:code:`m` (annual, quarterly, monthly). The data returned with these can
be seen in the `Statistics` tab through the Yahoo Finance front-end.

.. image:: demo/valuation_measures.PNG

2.2.2
-----
- Fix bug in retrieving cash flow / income statement data. Most recent month was
- Fix bug in retrieving cash flow / income statement data. Most recent month was
combining with TTM. A new column was created in the dataframe called 'periodType'.
Annual data will be shown as '12M', quarterly data will be shown as '3M', and
trailing 12 month data will be shown as 'TTM'.
Expand Down
63 changes: 31 additions & 32 deletions poetry.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

6 changes: 3 additions & 3 deletions pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[tool.poetry]
name = "yahooquery"
version = "2.3.6"
version = "2.3.7"
description = "Python wrapper for an unofficial Yahoo Finance API"
authors = ["Doug Guthrie <douglas.p.guthrie@gmail.com>"]
documentation = "https://yahooquery.dpguthrie.com"
Expand All @@ -16,7 +16,7 @@ requests-futures = "^1.0.1"
tqdm = "^4.65.0"
lxml = "^4.9.3"
selenium = {version = "^4.10.0", optional = true}
webdriver-manager = {version = "^3.8.6", optional = true}
beautifulsoup4 = "^4.12.2"

[tool.poetry.dev-dependencies]
pytest = "^7.4.0"
Expand All @@ -34,4 +34,4 @@ requires = ["poetry-core>=1.0.0"]
build-backend = "poetry.core.masonry.api"

[tool.poetry.extras]
premium = ["selenium", "webdriver-manager"]
premium = ["selenium"]
8 changes: 4 additions & 4 deletions yahooquery/__init__.py
Original file line number Diff line number Diff line change
@@ -1,15 +1,15 @@
"""Python interface to unofficial Yahoo Finance API endpoints"""

name = "yahooquery"
__version__ = "2.3.6"
__version__ = "2.3.7"

from .research import Research # noqa
from .ticker import Ticker # noqa
from .screener import Screener # noqa
from .misc import ( # noqa
get_currencies,
get_exchanges,
get_market_summary,
get_trending,
search,
)
from .research import Research # noqa
from .screener import Screener # noqa
from .ticker import Ticker # noqa
27 changes: 23 additions & 4 deletions yahooquery/base.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
# stdlib
import logging
import os
import time
from concurrent.futures import as_completed
Expand Down Expand Up @@ -26,6 +27,9 @@
import urlparse as parse


logger = logging.getLogger(__name__)


class _YahooFinance(object):
CHUNK = 1500

Expand Down Expand Up @@ -938,11 +942,12 @@ def __init__(self, **kwargs):
self.progress = kwargs.pop("progress", False)
self.username = kwargs.pop("username", os.getenv("YF_USERNAME", None))
self.password = kwargs.pop("password", os.getenv("YF_PASSWORD", None))
self._setup_url = kwargs.pop("setup_url", os.getenv("YF_SETUP_URL", None))
self.session = initialize_session(kwargs.pop("session", None), **kwargs)
if self.username and self.password:
self.login()
else:
self.session = setup_session(self.session)
self.session = setup_session(self.session, self._setup_url)
self.crumb = get_crumb(self.session)

@property
Expand Down Expand Up @@ -991,13 +996,27 @@ def default_query_params(self):
params["crumb"] = self.crumb
return params

def login(self):
def login(self) -> None:
if _has_selenium:
instance = YahooFinanceHeadless(self.username, self.password)
instance.login()
self.session.cookies = instance.cookies
if instance.cookies:
self.session.cookies = instance.cookies
return

return []
else:
logger.warning(
"Unable to login and/or retrieve the appropriate cookies. This is "
"most likely due to Yahoo Finance instituting recaptcha, which "
"this package does not support."
)

else:
logger.warning(
"You do not have the required libraries to use this feature. Install "
"with the following: `pip install yahooquery[premium]`"
)
self.session = setup_session(self.session, self._setup_url)

def _chunk_symbols(self, key, params={}, chunk=None, **kwargs):
current_symbols = self.symbols
Expand Down
14 changes: 5 additions & 9 deletions yahooquery/headless.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,13 +7,11 @@
try:
# third party
from selenium import webdriver
from selenium.common.exceptions import NoSuchElementException, TimeoutException
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.chrome.service import Service as ChromeService
from selenium.common.exceptions import TimeoutException
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.support.ui import WebDriverWait
from webdriver_manager.chrome import ChromeDriverManager
except ImportError:
# Selenium was not installed
_has_selenium = False
Expand All @@ -28,16 +26,14 @@ def __init__(self, username: str, password: str):
self.username = username
self.password = password
self.cookies = RequestsCookieJar()
chrome_options = Options()
chrome_options = webdriver.ChromeOptions()
chrome_options.add_argument("--headless")
chrome_options.add_argument("--no-sandbox")
chrome_options.add_argument("--log-level=3")
chrome_options.add_argument("--ignore-certificate-errors")
chrome_options.add_argument("--ignore-ssl-errors")
self.driver = webdriver.Chrome(
service=ChromeService(ChromeDriverManager().install()),
options=chrome_options,
)
service = Service()
self.driver = webdriver.Chrome(service=service, options=chrome_options)

def login(self):
try:
Expand Down
6 changes: 5 additions & 1 deletion yahooquery/misc.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
# stdlib
import os

# third party
import pandas as pd

Expand All @@ -20,8 +23,9 @@ def _make_request(
country, ", ".join(sorted(COUNTRIES.keys()))
)
)
setup_url = kwargs.pop("setup_url", os.getenv("YF_SETUP_URL", None))
session = initialize_session(**kwargs)
session = setup_session(session)
session = setup_session(session, setup_url)
crumb = get_crumb(session)
if crumb is not None:
params["crumb"] = crumb
Expand Down
42 changes: 36 additions & 6 deletions yahooquery/utils/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
# third party
import pandas as pd
import requests
from bs4 import BeautifulSoup
from requests.adapters import HTTPAdapter
from requests.exceptions import ConnectionError, RetryError, SSLError
from requests.packages.urllib3.util.retry import Retry
Expand All @@ -17,7 +18,7 @@


DEFAULT_TIMEOUT = 5

DEFAULT_SESSION_URL = "https://finance.yahoo.com"
CRUMB_FAILURE = (
"Failed to obtain crumb. Ability to retrieve data will be significantly limited."
)
Expand Down Expand Up @@ -1366,8 +1367,8 @@ def initialize_session(session=None, **kwargs):
return session


def setup_session(session: requests.Session):
url = "https://finance.yahoo.com"
def setup_session(session: requests.Session, url: str = None):
url = url or DEFAULT_SESSION_URL
try:
response = session.get(url, allow_redirects=True)
except SSLError:
Expand All @@ -1380,10 +1381,39 @@ def setup_session(session: requests.Session):
except SSLError:
counter += 1

if not isinstance(session, FuturesSession):
return session
if isinstance(session, FuturesSession):
response = response.result()

# check for and handle consent page:w
if response.url.find("consent") >= 0:
logger.debug(f'Redirected to consent page: "{response.url}"')

soup = BeautifulSoup(response.content, "html.parser")

params = {}
for param in ["csrfToken", "sessionId"]:
try:
params[param] = soup.find("input", attrs={"name": param})["value"]
except Exception as exc:
logger.critical(
f'Failed to find or extract "{param}" from response. Exception={exc}'
)
return session

logger.debug(f"params: {params}")

response = session.post(
"https://consent.yahoo.com/v2/collectConsent",
data={
"agree": ["agree", "agree"],
"consentUUID": "default",
"sessionId": params["sessionId"],
"csrfToken": params["csrfToken"],
"originalDoneUrl": url,
"namespace": "yahoo",
},
)

_ = response.result()
return session


Expand Down

0 comments on commit 57a73fc

Please sign in to comment.