Skip to content

Commit

Permalink
Merge pull request #205 from atoti/sync-deploy-to-aws-with-main
Browse files Browse the repository at this point in the history
  • Loading branch information
tibdex authored Jun 26, 2023
2 parents 9607eb6 + 85279b9 commit 9bc30a7
Show file tree
Hide file tree
Showing 10 changed files with 28,745 additions and 16,296 deletions.
15 changes: 6 additions & 9 deletions app/load_tables.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
from __future__ import annotations

from collections.abc import Mapping
from collections.abc import Iterable, Mapping
from datetime import timedelta
from pathlib import Path
from typing import Any, cast
Expand Down Expand Up @@ -38,14 +38,11 @@ def read_station_details(
}
)

# Drop some precision to ensure stability of reverse geocoding results.
station_information_df = station_information_df.round(
{"latitude": 6, "longitude": 6}
)
coordinates_column_names = ["latitude", "longitude"]

coordinates = cast(
list[tuple[float, float]],
station_information_df[["latitude", "longitude"]].itertuples(
Iterable[tuple[float, float]],
station_information_df[coordinates_column_names].itertuples(
index=False, name=None
),
)
Expand All @@ -63,8 +60,8 @@ def read_station_details(
)

return station_information_df.merge(
reverse_geocoded_df, how="left", on=["latitude", "longitude"]
).drop(columns=["latitude", "longitude"])
reverse_geocoded_df, how="left", on=coordinates_column_names
).drop(columns=coordinates_column_names)


def read_station_status(
Expand Down
3 changes: 1 addition & 2 deletions app/util/read_json.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,6 @@ def read_json(
url = f"{base_path}/{file_path.as_posix()}"
response = requests.get(url, timeout=timeout.total_seconds())
response.raise_for_status()
body = response.json()
return body
return response.json()

return json.loads((base_path / file_path).read_bytes())
112 changes: 70 additions & 42 deletions app/util/reverse_geocode.py
Original file line number Diff line number Diff line change
@@ -1,80 +1,108 @@
from __future__ import annotations

from collections.abc import Iterable
from collections.abc import Iterable, Mapping, Set
from datetime import timedelta
from functools import lru_cache
from functools import wraps
from io import StringIO
from pathlib import Path
from typing import IO
from typing import Any, Callable, TypeVar, cast

import pandas as pd
import requests
from pydantic import HttpUrl
from typing_extensions import ParamSpec

_Coordinates = tuple[float, float] # (latitude, longitude)

_COORDINATES_COLUMN_NAMES: Iterable[str] = ["latitude", "longitude"]

@lru_cache
def _cached_reverse_geocode(
stable_coordinates: tuple[_Coordinates, ...],
_COLUMN_NAME_MAPPING: Mapping[str, str] = {
"result_context": "department",
"result_city": "city",
"result_postcode": "postcode",
"result_street": "street",
"result_housenumber": "house_number",
}

_P = ParamSpec("_P")
_R = TypeVar("_R")


def _cache(function: Callable[_P, _R], /) -> Callable[_P, _R]:
cache: dict[_Coordinates, dict[str, str]] = {}

@wraps(function)
def function_wrapper(
*args: _P.args,
**kwargs: _P.kwargs,
) -> _R:
coordinates, *tail = args
assert isinstance(coordinates, Set)
new_coordinates = coordinates - set(cache)
new_args = cast(_P.args, (new_coordinates, *tail))
result = function(*new_args, **kwargs)
cache.update(cast(Any, result))
return result

return function_wrapper


@_cache
def _reverse_geocode(
coordinates: Set[_Coordinates],
/,
*,
reverse_geocoding_path: HttpUrl | Path,
timeout: timedelta,
) -> pd.DataFrame:
) -> dict[_Coordinates, dict[str, str]]:
if not coordinates:
return {}

data: StringIO | Path
coordinates_df = pd.DataFrame(coordinates, columns=list(_COORDINATES_COLUMN_NAMES))

if isinstance(reverse_geocoding_path, HttpUrl):
if isinstance(reverse_geocoding_path, Path):
data = reverse_geocoding_path
else:
file = StringIO()
pd.DataFrame(stable_coordinates, columns=["latitude", "longitude"]).to_csv(
file, index=False
)
coordinates_df.to_csv(file, index=False)
file.seek(0)
_file: IO[str] = file # To make Mypy happy.
response = requests.post(
reverse_geocoding_path,
files={"data": _file},
data=[
("result_columns", column_name) for column_name in _COLUMN_NAME_MAPPING
],
files={"data": file},
timeout=timeout.total_seconds(),
)
response.raise_for_status()
data = StringIO(response.text)
else:
# mypy fails to refines the type of `reverse_geocoding_path` to `Path`.
data = reverse_geocoding_path # type: ignore[assignment]

results_df = pd.read_csv(data)
results_df = results_df.rename(
columns={
"result_context": "department",
"result_city": "city",
"result_postcode": "postcode",
"result_name": "street",
"result_housenumber": "house_number",
}
)
results_df = results_df[
[
"latitude",
"longitude",
"department",
"city",
"postcode",
"street",
"house_number",
]
]
return results_df
assert len(results_df) == len(coordinates_df)

# The returned coordinates are not strictly equal to the input ones.
# They may have slightly moved.
# Using input ones to allow the caller to look up the addresses of the coordinates it has.
for column_name in coordinates_df.columns:
results_df[column_name] = coordinates_df[column_name]

results_df = results_df.set_index(list(coordinates_df.columns))
results_df = results_df.rename(columns=_COLUMN_NAME_MAPPING)
return results_df.to_dict("index") # type: ignore[return-value]


def reverse_geocode(
data: Iterable[_Coordinates],
coordinates: Iterable[_Coordinates],
/,
*,
reverse_geocoding_path: HttpUrl | Path,
timeout: timedelta,
) -> pd.DataFrame:
return _cached_reverse_geocode(
tuple(sorted(data)),
reverse_geocoding_path=reverse_geocoding_path,
timeout=timeout,
result = _reverse_geocode(
set(coordinates), reverse_geocoding_path=reverse_geocoding_path, timeout=timeout
)
result_df = pd.DataFrame.from_dict(result, orient="index")
index = result_df.index.set_names(_COORDINATES_COLUMN_NAMES)
result_df.index = index
return result_df.reset_index()
Loading

0 comments on commit 9bc30a7

Please sign in to comment.