Skip to content

Commit

Permalink
feat: optimized performances on player profiles parsing (#210)
Browse files Browse the repository at this point in the history
  • Loading branch information
TeKrop authored Nov 5, 2024
1 parent 6620eaa commit 9a602d0
Show file tree
Hide file tree
Showing 9 changed files with 185 additions and 97 deletions.
1 change: 1 addition & 0 deletions .env.dist
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ APP_PORT=80
APP_BASE_URL=https://overfast-api.tekrop.fr
LOG_LEVEL=info
STATUS_PAGE_URL=
PROFILING=false

# Rate limiting
BLIZZARD_RATE_LIMIT_RETRY_AFTER=5
Expand Down
3 changes: 3 additions & 0 deletions app/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,9 @@ class Settings(BaseSettings):
# Optional, status page URL if you have any to provide
status_page_url: str | None = None

# Profiling with pyinstrument, for debug purposes
profiling: bool = False

############
# RATE LIMITING
############
Expand Down
28 changes: 27 additions & 1 deletion app/main.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,13 @@
"""Project main file containing FastAPI app and routes definitions"""

from collections.abc import Callable
from contextlib import asynccontextmanager, suppress

from fastapi import FastAPI, Request
from fastapi.exceptions import ResponseValidationError
from fastapi.openapi.docs import get_redoc_html, get_swagger_ui_html
from fastapi.openapi.utils import get_openapi
from fastapi.responses import JSONResponse
from fastapi.responses import HTMLResponse, JSONResponse
from fastapi.staticfiles import StaticFiles
from starlette.exceptions import HTTPException as StarletteHTTPException

Expand All @@ -22,6 +23,10 @@
from .players.commands.update_search_data_cache import update_search_data_cache
from .roles import router as roles

# pyinstrument won't be installed on production, that's why we're checking it here
with suppress(ModuleNotFoundError):
from pyinstrument import Profiler


@asynccontextmanager
async def lifespan(_: FastAPI): # pragma: no cover
Expand Down Expand Up @@ -176,6 +181,27 @@ async def overridden_swagger():
return get_swagger_ui_html(**swagger_settings)


# In case enabled in settings, add the pyinstrument profiler middleware
if settings.profiling is True:
logger.info("Profiling is enabled")

@app.middleware("http")
async def profile_request(request: Request, call_next: Callable):
"""Profile the current request"""
# if the `profile=true` HTTP query argument is passed, we profile the request
if request.query_params.get("profile", False):
# we profile the request along with all additional middlewares, by interrupting
# the program every 1ms1 and records the entire stack at that point
with Profiler(interval=0.001, async_mode="enabled") as profiler:
await call_next(request)

# we dump the profiling into a file
return HTMLResponse(profiler.output_html())

# Proceed without profiling
return await call_next(request)


# Add application routers
app.include_router(heroes.router, prefix="/heroes")
app.include_router(roles.router, prefix="/roles")
Expand Down
12 changes: 6 additions & 6 deletions app/parsers.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
from typing import ClassVar

import httpx
from bs4 import BeautifulSoup
from bs4 import BeautifulSoup, SoupStrainer
from fastapi import status

from .cache_manager import CacheManager
Expand Down Expand Up @@ -137,8 +137,7 @@ class HTMLParser(APIParser):
@property
def root_tag_params(self) -> dict:
"""Returns the BeautifulSoup params kwargs, used to find the root Tag
on the page which will be used for searching and hashing (for cache). We
don't want to calculate an hash and do the data parsing on all the HTML.
on the page which will be used for searching data.
"""
return {"name": "main", "class_": "main-content", "recursive": False}

Expand All @@ -147,9 +146,10 @@ def store_response_data(self, response: httpx.Response) -> None:
self.create_bs_tag(response.text)

def create_bs_tag(self, html_content: str) -> None:
self.root_tag = BeautifulSoup(html_content, "lxml").body.find(
**self.root_tag_params,
)
soup_strainer = SoupStrainer(**self.root_tag_params)
self.root_tag = BeautifulSoup(
html_content, "lxml", parse_only=soup_strainer
).main


class JSONParser(APIParser):
Expand Down
Loading

0 comments on commit 9a602d0

Please sign in to comment.