Skip to content

Commit

Permalink
Updates 2024-09-09 - Small Refactor
Browse files Browse the repository at this point in the history
  • Loading branch information
CHRISCARLON committed Sep 9, 2024
1 parent 7268733 commit 727b082
Show file tree
Hide file tree
Showing 2 changed files with 20 additions and 18 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -195,3 +195,4 @@ src/os_open_usrn_functions/explore_and_prep.py

.venv_3.11
.test_venv
settings.json*
37 changes: 19 additions & 18 deletions HerdingCats/herding_cats.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
import pandas as pd
import polars as pl
import duckdb
import json

from io import BytesIO
from typing import Any, Dict, Optional, Union, Literal, List
Expand Down Expand Up @@ -643,21 +644,21 @@ def _extract_condensed_package_data(
Example output:
[{'name': 'police-force-strength',
'notes_markdown': 'Numbers of police officers, police civilian staff, and '
'notes_markdown': 'Numbers of police officers, police civilian staff, and '
'Police Community Support Officers in the Metropolitan '
"Police Force. Figures are reported by MOPAC to the GLA's "
'Police and Crime Committee each month. The figures are '
'full-time equivalent figures (FTE) in order to take '
'account of part-time working, job sharing etc, and do not '
'represent a measure of headcount.
'For more information, click here and here.',
'num_resources': 1,
'resources': [{'created': '2024-08-28T16:15:59.080Z',
'format': 'csv',
'name': 'Police force strength',
'url': 'https://airdrive-secure.s3-eu-west-1.amazonaws.com/
london/dataset/police-force-strength/2024-08-28T16%3A15%3A56/
Police_Force_Strength.csv'}]}
'num_resources': 1,
'resources': [{'created': '2024-08-28T16:15:59.080Z',
'format': 'csv',
'name': 'Police force strength',
'url': 'https://airdrive-secure.s3-eu-west-1.amazonaws.com/
london/dataset/police-force-strength/2024-08-28T16%3A15%3A56/
Police_Force_Strength.csv'}]}
"""
return [
{
Expand Down Expand Up @@ -729,7 +730,7 @@ def _extract_package_show_data(data: Dict[str, Any]) -> List[Dict[str, Any]]:


# START TO WRANGLE / ANALYSE
# Only support excel files for now
# Only supports excel files for now
# Plan is to account for csv, and json as well
class CkanCatAnalyser:
def __init__(self):
Expand Down Expand Up @@ -825,13 +826,13 @@ def duckdb_data_loader_persist(

# Example usage...
if __name__ == "__main__":
with CkanCatSession("") as session:
with CkanCatSession(CkanDataCatalogues.HUMANITARIAN) as session:
explore = CkanCatExplorer(session)
all_packages = explore.package_list_dictionary()
print(all_packages)
data = all_packages.get("")
info = explore.package_show_info_json(data)
dl_link = explore.extract_resource_url(info, "")
analyser = CkanCatAnalyser()
df = analyser.polars_data_loader(dl_link)
print(df)
all_packages = explore.package_search_json("water", 5)
print(json.dumps(all_packages, indent=4))
# data = all_packages.get("")
# info = explore.package_show_info_json(data)
# dl_link = explore.extract_resource_url(info, "")
# analyser = CkanCatAnalyser()
# df = analyser.polars_data_loader(dl_link)
# print(df)

0 comments on commit 727b082

Please sign in to comment.