Skip to content

Commit

Permalink
test(tests): added more ckan tests [2024-11-25]
Browse files Browse the repository at this point in the history
  • Loading branch information
CHRISCARLON committed Nov 25, 2024
1 parent ae1fdf3 commit 78652a7
Show file tree
Hide file tree
Showing 11 changed files with 295 additions and 105 deletions.
22 changes: 18 additions & 4 deletions HerdingCats/explorer/cat_explore.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ def check_site_health(self) -> None:
"""
Make sure the Ckan endpoints are healthy and reachable
This calls the Ckan site package endpoint to check if site is reacheable.
This calls the Ckan package_list endpoint to check if site is reacheable.
# Example usage...
if __name__ == "__main__":
Expand Down Expand Up @@ -82,7 +82,7 @@ def get_package_count(self) -> int:
# Example usage...
if __name__ == "__main__":
with CatSession("data.london.gov.uk") as session:
explore = CatExplorer(session)
explore = CkanCatExplorer(session)
package_count = get_package_count()
pprint(package_count)
"""
Expand Down Expand Up @@ -307,7 +307,7 @@ def package_list_dataframe_extra(
if __name__ == "__main__":
with CatSession("data.london.gov.uk") as session:
explore = CkanCatExplorer(session)
info_extra = package_list_dataframe_extra()
info_extra = package_list_dataframe_extra('pandas')
pprint(info_extra)
"""
Expand Down Expand Up @@ -374,7 +374,7 @@ def catalogue_freshness(self):
It currently uses metadata_modified at the dataset level - not resource level.
"""
logger.warning(
"This method might not work for all catalogues, and will return 0s. It currently only works for the London Datastore."
"This method might not work for all catalogues, and will return 0s. It currently only works for the London Datastore. We are working on improving this"
)

url = (
Expand Down Expand Up @@ -458,6 +458,7 @@ def package_show_info_json(self, package_name: Union[str, dict, Any]) -> List[Di
url = f"{base_url}?{urlencode(params)}" if params else base_url

try:
print(url)
response = self.cat_session.session.get(url)
response.raise_for_status()
data = response.json()
Expand All @@ -478,6 +479,19 @@ def package_search_json(self, search_query: str, num_rows: int):
Returns all available data for a particular search query
Specify the number of rows if the 'count' is large
# Example usage...
import HerdingCats as hc
def main():
with hc.CatSession(hc.CkanDataCatalogues.LONDON_DATA_STORE) as session:
explore = hc.CkanCatExplorer(session)
packages_search = explore.package_search_json("police", 50)
print(packages_search)
if __name__ =="__main__":
main()
"""

base_url = self.cat_session.base_url + CkanApiPaths.PACKAGE_SEARCH
Expand Down
8 changes: 4 additions & 4 deletions HerdingCats/session/cat_session.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,12 +7,10 @@
from ..endpoints.api_endpoints import CkanDataCatalogues, OpenDataSoftDataCatalogues
from ..errors.cats_errors import CatSessionError


class CatalogType(Enum):
CKAN = "ckan"
OPENDATA_SOFT = "opendatasoft"


class CatSession:
def __init__(
self, domain: Union[str, CkanDataCatalogues, OpenDataSoftDataCatalogues]
Expand All @@ -36,9 +34,11 @@ def _process_domain(
domain: Union[str, CkanDataCatalogues, OpenDataSoftDataCatalogues],
) -> tuple[str, CatalogType]:
"""
Process the domain to ensure it's in the correct format
Process the domain to ensure it's in the correct format.
This iterates through the CkanDataCatalogues and OpenDataSoftDataCatalogues Enums and checks for a match
Otherwise it processes the url as normal
Otherwise it processes the url as normal.
Args:
domain (url or data catalogue item): str
Returns:
Expand Down
52 changes: 41 additions & 11 deletions makefile
Original file line number Diff line number Diff line change
@@ -1,21 +1,51 @@
# Make pushing to github repo quick and easy
# Git section
.PHONY: git-all git-add git-commit git-push venv-start

DATE := $(shell date +%Y-%m-%d)

git-all: git-add git-commit git-push
define COMMIT_TYPES
feat: A new feature
fix: A bug fix
docs: Documentation only changes
style: Changes that do not affect the meaning of the code
refactor: A code change that neither fixes a bug nor adds a feature
perf: A code change that improves performance
test: Adding missing tests or correcting existing tests
build: Changes that affect the build system or external dependencies
ci: Changes to CI configuration files and scripts
chore: Other changes that don't modify src or test files
revert: Reverts a previous commit
endef
export COMMIT_TYPES

update: git-add git-commit git-push

git-add:
git add .

git-commit:
@read -p "Please enter an additional commit message: " msg; \
git commit -m "Updates $(DATE) - $$msg"
@echo "Available commit types:"
@echo "$$COMMIT_TYPES" | sed 's/^/ /'
@echo
@read -p "Enter commit type: " type; \
if echo "$$COMMIT_TYPES" | grep -q "^$$type:"; then \
read -p "Enter commit scope (optional, press enter to skip): " scope; \
read -p "Is this a breaking change? (y/N): " breaking; \
read -p "Enter commit message: " msg; \
if [ "$$breaking" = "y" ] || [ "$$breaking" = "Y" ]; then \
if [ -n "$$scope" ]; then \
git commit -m "$$type!($$scope): $$msg [$(DATE)]" -m "BREAKING CHANGE: $$msg"; \
else \
git commit -m "$$type!: $$msg [$(DATE)]" -m "BREAKING CHANGE: $$msg"; \
fi; \
else \
if [ -n "$$scope" ]; then \
git commit -m "$$type($$scope): $$msg [$(DATE)]"; \
else \
git commit -m "$$type: $$msg [$(DATE)]"; \
fi; \
fi; \
else \
echo "Invalid commit type. Please use one of the available types."; \
exit 1; \
fi

git-push:
git push

venv-start:
@echo "To activate the virtual environment, run the following command:"
@echo "source .venv/bin/activate"
2 changes: 1 addition & 1 deletion pytest.ini
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
[pytest]
pythonpath = HerdingCats/
pythonpath = .
35 changes: 35 additions & 0 deletions tests/ckan/test_ ckan_package_count.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
import pytest
from HerdingCats.session.cat_session import CatSession
from HerdingCats.explorer.cat_explore import CkanCatExplorer
from HerdingCats.endpoints.api_endpoints import CkanApiPaths
import requests
from loguru import logger

CATALOGUES = [
"https://data.london.gov.uk"
]

@pytest.mark.parametrize("catalogue_url", CATALOGUES)
def test_get_package_count(catalogue_url):
"""
Test that the get_package_count method returns a valid count of datasets
for predefined data catalogues
"""
with CatSession(catalogue_url) as cat_session:
explorer = CkanCatExplorer(cat_session)
try:
# Get the package count
package_count = explorer.get_package_count()

# Assert that we got a valid integer
assert isinstance(package_count, int), f"Expected integer package count, got {type(package_count)}"

# Assert that the count is positive
assert package_count > 0, f"Expected positive package count, got {package_count}"

logger.info(f"Successfully retrieved package count for {catalogue_url}: {package_count} packages")

except requests.RequestException as e:
pytest.fail(f"Failed to connect to CKAN endpoint for {catalogue_url}: {str(e)}")
except AssertionError as e:
pytest.fail(str(e))
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,7 @@
from loguru import logger

CATALOGUES = [
"https://data.london.gov.uk",
"https://data.humdata.org",
"https://data.gov.uk",
"https://open.africa"
"https://data.london.gov.uk"
]

@pytest.mark.parametrize("catalogue_url", CATALOGUES)
Expand All @@ -20,6 +17,7 @@ def test_ckan_health_check(catalogue_url):
url = cat_session.base_url + CkanApiPaths.PACKAGE_LIST
try:
response = cat_session.session.get(url)
print(response)

# Check status code
assert response.status_code == 200, f"Expected status code 200, but got {response.status_code}"
Expand Down
86 changes: 86 additions & 0 deletions tests/ckan/test_ckan_package_list.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
import pytest
from HerdingCats.session.cat_session import CatSession
from HerdingCats.explorer.cat_explore import CkanCatExplorer
import requests
from loguru import logger

CATALOGUES = ["https://data.london.gov.uk"]


@pytest.mark.parametrize("catalogue_url", CATALOGUES)
def test_package_list_dictionary(catalogue_url):
"""
Test the package list functionality for predefined data catalogues
"""
with CatSession(catalogue_url) as cat_session:
explorer = CkanCatExplorer(cat_session)
try:
results = explorer.package_list_dictionary()

print(results)

# Assert that we got a result
assert results is not None, f"No results returned for {catalogue_url}"

# Check if we got the expected number of rows
assert len(results) > 100, "There could be a problem - check manually"

logger.info(f"Package search test passed for {catalogue_url}")
except requests.RequestException as e:
pytest.fail(
f"Failed to perform package search for {catalogue_url}: {str(e)}"
)
except AssertionError as e:
pytest.fail(str(e))

@pytest.mark.parametrize("catalogue_url", CATALOGUES)
def test_package_list_dataframe(catalogue_url):
"""
Test the package list dataframe functionality for predefined data catalogues
"""
with CatSession(catalogue_url) as cat_session:
explorer = CkanCatExplorer(cat_session)
try:
results_pandas = explorer.package_list_dataframe("pandas")

print(results_pandas)

# Assert that we got a result
assert results_pandas is not None, f"No results returned for {catalogue_url}"

# Check if we got the expected number of rows
assert len(results_pandas) > 100, "There could be a problem - check manually"

logger.info(f"Package search test passed for {catalogue_url}")
except requests.RequestException as e:
pytest.fail(
f"Failed to perform package search for {catalogue_url}: {str(e)}"
)
except AssertionError as e:
pytest.fail(str(e))

@pytest.mark.parametrize("catalogue_url", CATALOGUES)
def test_package_list_dataframe_extra(catalogue_url):
"""
Test the package list dataframe extra functionality for predefined data catalogues
"""
with CatSession(catalogue_url) as cat_session:
explorer = CkanCatExplorer(cat_session)
try:
results_pandas = explorer.package_list_dataframe_extra("polars")

print(results_pandas)

# Assert that we got a result
assert results_pandas is not None, f"No results returned for {catalogue_url}"

# Check if we got the expected number of rows
assert len(results_pandas) > 100, "There could be a problem - check manually"

logger.info(f"Package search test passed for {catalogue_url}")
except requests.RequestException as e:
pytest.fail(
f"Failed to perform package search for {catalogue_url}: {str(e)}"
)
except AssertionError as e:
pytest.fail(str(e))
54 changes: 54 additions & 0 deletions tests/ckan/test_ckan_package_search.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
import pytest
import requests

from pprint import pprint
from HerdingCats.session.cat_session import CatSession
from HerdingCats.explorer.cat_explore import CkanCatExplorer
from HerdingCats.errors.cats_errors import CatExplorerError
from loguru import logger

CATALOGUES = ["https://data.london.gov.uk"]
TEST_SEARCH_QUERY = "police"
TEST_NUM_ROWS = 50

@pytest.mark.parametrize("catalogue_url,query,rows", [
(CATALOGUES[0], TEST_SEARCH_QUERY, TEST_NUM_ROWS),
])
def test_package_search_json(catalogue_url, query, rows):
"""
Test the package_search_json functionality
"""
with CatSession(catalogue_url) as cat_session:
explorer = CkanCatExplorer(cat_session)
try:
results = explorer.package_search_json(query, rows)
pprint(results)

# Basic assertions
assert results is not None, "No results returned"
assert isinstance(results, dict), "Results should be a dictionary"

# Check for expected keys in response - allow either 'result' or 'results'
assert 'count' in results, "Missing count key"
assert 'result' in results or 'results' in results, "Missing result/results key"

# Get the results list regardless of key name
results_list = results.get('results', results.get('result', []))
assert isinstance(results_list, list), "Results should be a list"

# Check content of results if any found
if results_list:
first_result = results_list[0]
assert isinstance(first_result, dict), "Result list items should include dictionaries"

# Check for some common CKAN package fields
package_keys = ['id', 'name', 'title']
for key in package_keys:
assert key in first_result, f"Missing expected package key: {key}"

logger.info(f"Package search test passed for query '{query}' with {len(results_list)} results")

except requests.RequestException as e:
pytest.fail(f"Failed to search packages with query '{query}': {str(e)}")
except AssertionError as e:
pytest.fail(str(e))
File renamed without changes.
Loading

0 comments on commit 78652a7

Please sign in to comment.