Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: Scheduled Updates #44

Open
wants to merge 45 commits into
base: bet-hatikvah
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
45 commits
Select commit Hold shift + click to select a range
ffcc698
refactor: Lint Update (#46)
RickiJay-WMDE Nov 21, 2024
b83b7a8
Merge branch 'main' into software-data-ii
RickiJay-WMDE Nov 21, 2024
27cd9d9
Lint Updates
RickiJay-WMDE Nov 21, 2024
888496e
Merge branch 'software-data-ii' into canadian-nicole-ii
RickiJay-WMDE Nov 21, 2024
e2736f7
Get Out of Date Wikibases
RickiJay-WMDE Nov 19, 2024
711d247
Check OOD
RickiJay-WMDE Nov 19, 2024
b043ece
Organize
RickiJay-WMDE Nov 19, 2024
d97aba5
Optional
RickiJay-WMDE Nov 19, 2024
bd76399
Update
RickiJay-WMDE Nov 19, 2024
8b3642c
Initial Tests
RickiJay-WMDE Nov 19, 2024
0fc0637
Requirement
RickiJay-WMDE Nov 19, 2024
36b25e1
Print
RickiJay-WMDE Nov 19, 2024
39140bd
Fool of a Took
RickiJay-WMDE Nov 19, 2024
066c6bd
Smooth
RickiJay-WMDE Nov 19, 2024
1aabe9d
Trombones
RickiJay-WMDE Nov 19, 2024
d4c2df4
Try Everything
RickiJay-WMDE Nov 19, 2024
3c46c28
Lint
RickiJay-WMDE Nov 19, 2024
0527124
Tweak Timing
RickiJay-WMDE Nov 19, 2024
c9ff63a
Lint
RickiJay-WMDE Nov 19, 2024
75e4f84
Await API Data
RickiJay-WMDE Nov 19, 2024
a922ef5
Await Multiple Users
RickiJay-WMDE Nov 19, 2024
081bdb8
Await Requests
RickiJay-WMDE Nov 19, 2024
3510abf
Await SPARQL
RickiJay-WMDE Nov 19, 2024
538d465
Await Math
RickiJay-WMDE Nov 19, 2024
6db01e7
Split
RickiJay-WMDE Nov 19, 2024
09ec1ac
Split
RickiJay-WMDE Nov 19, 2024
5a39eae
Fix
RickiJay-WMDE Nov 19, 2024
c706ed5
Test OOD
RickiJay-WMDE Nov 19, 2024
3249ac3
Sanity
RickiJay-WMDE Nov 19, 2024
78a17f3
Split Tests
RickiJay-WMDE Nov 19, 2024
dff05e4
Distinctive
RickiJay-WMDE Nov 19, 2024
55bb7e0
Enforce Order
RickiJay-WMDE Nov 19, 2024
e14c1de
Lint
RickiJay-WMDE Nov 19, 2024
c511ffa
Enforce Order
RickiJay-WMDE Nov 19, 2024
8e4dd24
Chain
RickiJay-WMDE Nov 19, 2024
3bb929f
Everything
RickiJay-WMDE Nov 19, 2024
cb15982
Fix Logs
RickiJay-WMDE Nov 19, 2024
0953b42
Fix Tests
RickiJay-WMDE Nov 19, 2024
bb1b8af
Less Frequent
RickiJay-WMDE Nov 19, 2024
dce9906
Timing Tweaks
RickiJay-WMDE Nov 20, 2024
5a2dc0d
Str
RickiJay-WMDE Nov 20, 2024
97999c5
reqs update
RickiJay-WMDE Nov 21, 2024
d46afe1
disable-next
RickiJay-WMDE Nov 22, 2024
bec444b
Schedule
RickiJay-WMDE Nov 22, 2024
668f3fe
disable-next
RickiJay-WMDE Nov 22, 2024
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion .pylintrc
Original file line number Diff line number Diff line change
Expand Up @@ -285,7 +285,8 @@ exclude-too-few-public-methods=
ignored-parents=

# Maximum number of arguments for function / method.
max-args=5
max-args=10
max-positional-arguments=10

# Maximum number of attributes for a class (see R0902).
max-attributes=7
Expand Down
17 changes: 16 additions & 1 deletion app.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,27 @@
"""Main Application"""

from contextlib import asynccontextmanager
from fastapi import FastAPI
from fastapi.middleware.cors import CORSMiddleware
from strawberry.fastapi import GraphQLRouter

from model.strawberry import schema
from schedule import scheduler


# Ensure the scheduler shuts down properly on application exit.
@asynccontextmanager
# pylint: disable-next=redefined-outer-name,unused-argument
async def lifespan(app: FastAPI):
"""Triggers at startup, yields, resumes at shutdown"""

scheduler.start()
yield
scheduler.shutdown()


app = FastAPI(lifespan=lifespan)

app = FastAPI()

origins = ["http://localhost", "http://0.0.0.0", "http://127.0.0.1"]

Expand Down
53 changes: 53 additions & 0 deletions check_out_of_date.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
"""Check Out of Date Observations"""

import asyncio

from fetch_data import (
get_wikibase_list_with_out_of_date_connectivity_observations,
get_wikibase_list_with_out_of_date_log_first_observations,
get_wikibase_list_with_out_of_date_log_last_observations,
get_wikibase_list_with_out_of_date_property_popularity_observations,
get_wikibase_list_with_out_of_date_quantity_observations,
get_wikibase_list_with_out_of_date_software_observations,
get_wikibase_list_with_out_of_date_stats_observations,
get_wikibase_list_with_out_of_date_user_observations,
)


async def check_out_of_date():
"""Print Number of Out of Date Observations"""

ood_con_obs = await get_wikibase_list_with_out_of_date_connectivity_observations()
print(f"Connectivity: {len(ood_con_obs)}")

ood_log_first_obs = (
await get_wikibase_list_with_out_of_date_log_first_observations()
)
print(f"Logs (First): {len(ood_log_first_obs)}")

ood_log_last_obs = await get_wikibase_list_with_out_of_date_log_last_observations()
print(f"Logs (Last): {len(ood_log_last_obs)}")

ood_prop_obs = (
await get_wikibase_list_with_out_of_date_property_popularity_observations()
)
print(f"Property: {len(ood_prop_obs)}")

ood_quant_obs = await get_wikibase_list_with_out_of_date_quantity_observations()
print(f"Quantity: {len(ood_quant_obs)}")

ood_soft_obs = await get_wikibase_list_with_out_of_date_software_observations()
print(f"Software: {len(ood_soft_obs)}")

ood_stats_obs = await get_wikibase_list_with_out_of_date_stats_observations()
print(f"Statistics: {len(ood_stats_obs)}")

ood_user_obs = await get_wikibase_list_with_out_of_date_user_observations()
print(f"User: {len(ood_user_obs)}")


if __name__ == "__main__":
loop = asyncio.get_event_loop()
tasks = [loop.create_task(check_out_of_date())]
loop.run_until_complete(asyncio.wait(tasks))
loop.close()
18 changes: 18 additions & 0 deletions fetch_data/__init__.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,24 @@
"""Fetch Data"""

from fetch_data.api_data import create_log_observation, create_user_observation
from fetch_data.out_of_date import (
get_wikibase_list_with_out_of_date_connectivity_observations,
get_wikibase_list_with_out_of_date_log_first_observations,
get_wikibase_list_with_out_of_date_log_last_observations,
get_wikibase_list_with_out_of_date_property_popularity_observations,
get_wikibase_list_with_out_of_date_quantity_observations,
get_wikibase_list_with_out_of_date_software_observations,
get_wikibase_list_with_out_of_date_stats_observations,
get_wikibase_list_with_out_of_date_user_observations,
update_out_of_date_connectivity_observations,
update_out_of_date_log_first_observations,
update_out_of_date_log_last_observations,
update_out_of_date_property_observations,
update_out_of_date_quantity_observations,
update_out_of_date_software_observations,
update_out_of_date_stats_observations,
update_out_of_date_user_observations,
)
from fetch_data.sparql_data import (
create_connectivity_observation,
create_property_popularity_observation,
Expand Down
18 changes: 12 additions & 6 deletions fetch_data/api_data/log_data/create_log_observation.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,9 +42,11 @@ async def create_log_observation(wikibase_id: int, first_month: bool) -> bool:

try:
print("FETCHING LOGS")
log_list = get_month_log_list(
log_list = await get_month_log_list(
wikibase.action_api_url.url,
comparison_date=get_log_list_comparison_date(wikibase, first_month),
comparison_date=await get_log_list_comparison_date(
wikibase, first_month
),
oldest=first_month,
)
observation = await create_log_month(wikibase, log_list, observation)
Expand All @@ -58,13 +60,17 @@ async def create_log_observation(wikibase_id: int, first_month: bool) -> bool:
return observation.returned_data


def get_log_list_comparison_date(wikibase: WikibaseModel, first: bool) -> datetime:
async def get_log_list_comparison_date(
wikibase: WikibaseModel, first: bool
) -> datetime:
"""Return either date of first log or today"""

if first:
print("FETCHING OLDEST LOG")
oldest_log = get_log_list_from_url(
wikibase.action_api_url.url + get_log_param_string(limit=1, oldest=True)
oldest_log = (
await get_log_list_from_url(
wikibase.action_api_url.url + get_log_param_string(limit=1, oldest=True)
)
)[0]
return oldest_log.log_date

Expand Down Expand Up @@ -96,7 +102,7 @@ async def create_log_month(

if len(users) > 0:
print("FETCHING USER DATA")
user_data = get_multiple_user_data(wikibase, users)
user_data = await get_multiple_user_data(wikibase, users)
for u in user_data:
user_type_dict[u["name"]] = get_user_type_from_user_data(u)

Expand Down
8 changes: 4 additions & 4 deletions fetch_data/api_data/log_data/fetch_log_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,19 +24,19 @@ def get_log_param_string(
return dict_to_url(parameters)


def get_log_list_from_url(url: str) -> list[WikibaseLogRecord]:
async def get_log_list_from_url(url: str) -> list[WikibaseLogRecord]:
"""Get Log List from URL"""

data: list[WikibaseLogRecord] = []

query_data = fetch_api_data(url)
query_data = await fetch_api_data(url)
for record in query_data["query"]["logevents"]:
data.append(WikibaseLogRecord(record))

return data


def get_month_log_list(
async def get_month_log_list(
api_url: str, comparison_date: datetime, oldest: bool = False
) -> list[WikibaseLogRecord]:
"""Get Log List from api_url, limit to within 30 days of the comparison date"""
Expand All @@ -47,7 +47,7 @@ def get_month_log_list(
should_query = True
next_from: Optional[str] = None
while should_query:
query_data = fetch_api_data(
query_data = await fetch_api_data(
api_url + get_log_param_string(limit=limit, offset=next_from, oldest=oldest)
)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ async def create_user_observation(wikibase_id: int) -> bool:

site_user_data: list[dict]
try:
site_user_data = get_all_user_data(wikibase.action_api_url.url)
site_user_data = await get_all_user_data(wikibase.action_api_url.url)
observation.returned_data = True
except (ReadTimeout, SSLError, ValueError):
observation.returned_data = False
Expand Down
6 changes: 4 additions & 2 deletions fetch_data/api_data/user_data/fetch_all_user_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
from fetch_data.utils import fetch_api_data


def get_all_user_data(api_url: str) -> list[dict]:
async def get_all_user_data(api_url: str) -> list[dict]:
"""Fetch All User Data"""

data = []
Expand All @@ -13,7 +13,9 @@ def get_all_user_data(api_url: str) -> list[dict]:
next_from: str = "!"

while should_query:
query_data = fetch_api_data(api_url + all_users_url(continue_from=next_from))
query_data = await fetch_api_data(
api_url + all_users_url(continue_from=next_from)
)
data.extend(query_data["query"]["allusers"])
print(f"\tData Length: {len(data)}")
if "continue" in query_data:
Expand Down
14 changes: 6 additions & 8 deletions fetch_data/api_data/user_data/fetch_multiple_user_data.py
Original file line number Diff line number Diff line change
@@ -1,16 +1,17 @@
"""Fetch Multiple User Data"""

from collections.abc import Iterable
import json
import requests
from fetch_data.api_data.user_data.user_data_url import user_url
from fetch_data.utils.fetch_data_from_api import fetch_api_data
from model.database import WikibaseModel


MULTIPLE_USER_QUERY_LIMIT = 50


def get_multiple_user_data(wikibase: WikibaseModel, users: Iterable[str]) -> list[dict]:
async def get_multiple_user_data(
wikibase: WikibaseModel, users: Iterable[str]
) -> list[dict]:
"""Fetch Multiple User Data"""

if len(users) == 0:
Expand All @@ -21,14 +22,11 @@ def get_multiple_user_data(wikibase: WikibaseModel, users: Iterable[str]) -> lis
list_users = list(users)
for i in range(0, len(users), MULTIPLE_USER_QUERY_LIMIT):
data.extend(
get_multiple_user_data(
await get_multiple_user_data(
wikibase, list_users[i : i + MULTIPLE_USER_QUERY_LIMIT]
)
)
return data

result = requests.get(
wikibase.action_api_url.url + user_url("|".join(users)), timeout=10
)
data = json.loads(result.content)
data = await fetch_api_data(wikibase.action_api_url.url + user_url("|".join(users)))
return data["query"]["users"]
22 changes: 22 additions & 0 deletions fetch_data/out_of_date/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
"""Out of Date"""

from fetch_data.out_of_date.get_out_of_date_wikibases import (
get_wikibase_list_with_out_of_date_connectivity_observations,
get_wikibase_list_with_out_of_date_log_first_observations,
get_wikibase_list_with_out_of_date_log_last_observations,
get_wikibase_list_with_out_of_date_property_popularity_observations,
get_wikibase_list_with_out_of_date_quantity_observations,
get_wikibase_list_with_out_of_date_software_observations,
get_wikibase_list_with_out_of_date_stats_observations,
get_wikibase_list_with_out_of_date_user_observations,
)
from fetch_data.out_of_date.update_out_of_date import (
update_out_of_date_connectivity_observations,
update_out_of_date_log_first_observations,
update_out_of_date_log_last_observations,
update_out_of_date_property_observations,
update_out_of_date_quantity_observations,
update_out_of_date_software_observations,
update_out_of_date_stats_observations,
update_out_of_date_user_observations,
)
Loading
Loading