test(tests): added more ckan tests [2024-11-25]

CHRISCARLON · Nov 25, 2024 · 78652a7 · 78652a7
1 parent ae1fdf3
commit 78652a7
Show file tree

Hide file tree

Showing 11 changed files with 295 additions and 105 deletions.
diff --git a/HerdingCats/explorer/cat_explore.py b/HerdingCats/explorer/cat_explore.py
@@ -42,7 +42,7 @@ def check_site_health(self) -> None:
         """
         Make sure the Ckan endpoints are healthy and reachable
 
-        This calls the Ckan site package endpoint to check if site is reacheable.
+        This calls the Ckan package_list endpoint to check if site is reacheable.
 
         # Example usage...
         if __name__ == "__main__":
@@ -82,7 +82,7 @@ def get_package_count(self) -> int:
         # Example usage...
         if __name__ == "__main__":
             with CatSession("data.london.gov.uk") as session:
-                explore = CatExplorer(session)
+                explore = CkanCatExplorer(session)
                 package_count = get_package_count()
                 pprint(package_count)
         """
@@ -307,7 +307,7 @@ def package_list_dataframe_extra(
         if __name__ == "__main__":
             with CatSession("data.london.gov.uk") as session:
                 explore = CkanCatExplorer(session)
-                info_extra = package_list_dataframe_extra()
+                info_extra = package_list_dataframe_extra('pandas')
                 pprint(info_extra)
 
         """
@@ -374,7 +374,7 @@ def catalogue_freshness(self):
         It currently uses metadata_modified at the dataset level - not resource level.
         """
         logger.warning(
-            "This method might not work for all catalogues, and will return 0s. It currently only works for the London Datastore."
+            "This method might not work for all catalogues, and will return 0s. It currently only works for the London Datastore. We are working on improving this"
         )
 
         url = (
@@ -458,6 +458,7 @@ def package_show_info_json(self, package_name: Union[str, dict, Any]) -> List[Di
         url = f"{base_url}?{urlencode(params)}" if params else base_url
 
         try:
+            print(url)
             response = self.cat_session.session.get(url)
             response.raise_for_status()
             data = response.json()
@@ -478,6 +479,19 @@ def package_search_json(self, search_query: str, num_rows: int):
         Returns all available data for a particular search query
 
         Specify the number of rows if the 'count' is large
+
+        # Example usage...
+        import HerdingCats as hc
+
+        def main():
+            with hc.CatSession(hc.CkanDataCatalogues.LONDON_DATA_STORE) as session:
+                explore = hc.CkanCatExplorer(session)
+                packages_search = explore.package_search_json("police", 50)
+                print(packages_search)
+
+        if __name__ =="__main__":
+            main()
+
         """
 
         base_url = self.cat_session.base_url + CkanApiPaths.PACKAGE_SEARCH

diff --git a/HerdingCats/session/cat_session.py b/HerdingCats/session/cat_session.py
@@ -7,12 +7,10 @@
 from ..endpoints.api_endpoints import CkanDataCatalogues, OpenDataSoftDataCatalogues
 from ..errors.cats_errors import CatSessionError
 
-
 class CatalogType(Enum):
     CKAN = "ckan"
     OPENDATA_SOFT = "opendatasoft"
 
-
 class CatSession:
     def __init__(
         self, domain: Union[str, CkanDataCatalogues, OpenDataSoftDataCatalogues]
@@ -36,9 +34,11 @@ def _process_domain(
         domain: Union[str, CkanDataCatalogues, OpenDataSoftDataCatalogues],
     ) -> tuple[str, CatalogType]:
         """
-        Process the domain to ensure it's in the correct format
+        Process the domain to ensure it's in the correct format.
+
         This iterates through the CkanDataCatalogues and OpenDataSoftDataCatalogues Enums and checks for a match
-        Otherwise it processes the url as normal
+        Otherwise it processes the url as normal.
+
         Args:
             domain (url or data catalogue item): str
         Returns:

diff --git a/makefile b/makefile
@@ -1,21 +1,51 @@
-# Make pushing to github repo quick and easy
-# Git section
-.PHONY: git-all git-add git-commit git-push venv-start
-
 DATE := $(shell date +%Y-%m-%d)
 
-git-all: git-add git-commit git-push
+define COMMIT_TYPES
+feat:     A new feature
+fix:      A bug fix
+docs:     Documentation only changes
+style:    Changes that do not affect the meaning of the code
+refactor: A code change that neither fixes a bug nor adds a feature
+perf:     A code change that improves performance
+test:     Adding missing tests or correcting existing tests
+build:    Changes that affect the build system or external dependencies
+ci:       Changes to CI configuration files and scripts
+chore:    Other changes that don't modify src or test files
+revert:   Reverts a previous commit
+endef
+export COMMIT_TYPES
+
+update: git-add git-commit git-push
 
 git-add:
 	git add .
 
 git-commit:
-	@read -p "Please enter an additional commit message: " msg; \
-	git commit -m "Updates $(DATE) - $$msg"
+	@echo "Available commit types:"
+	@echo "$$COMMIT_TYPES" | sed 's/^/  /'
+	@echo
+	@read -p "Enter commit type: " type; \
+	if echo "$$COMMIT_TYPES" | grep -q "^$$type:"; then \
+		read -p "Enter commit scope (optional, press enter to skip): " scope; \
+		read -p "Is this a breaking change? (y/N): " breaking; \
+		read -p "Enter commit message: " msg; \
+		if [ "$$breaking" = "y" ] || [ "$$breaking" = "Y" ]; then \
+			if [ -n "$$scope" ]; then \
+				git commit -m "$$type!($$scope): $$msg [$(DATE)]" -m "BREAKING CHANGE: $$msg"; \
+			else \
+				git commit -m "$$type!: $$msg [$(DATE)]" -m "BREAKING CHANGE: $$msg"; \
+			fi; \
+		else \
+			if [ -n "$$scope" ]; then \
+				git commit -m "$$type($$scope): $$msg [$(DATE)]"; \
+			else \
+				git commit -m "$$type: $$msg [$(DATE)]"; \
+			fi; \
+		fi; \
+	else \
+		echo "Invalid commit type. Please use one of the available types."; \
+		exit 1; \
+	fi
 
 git-push:
 	git push
-
-venv-start:
-	@echo "To activate the virtual environment, run the following command:"
-	@echo "source .venv/bin/activate"
diff --git a/pytest.ini b/pytest.ini
@@ -1,2 +1,2 @@
 [pytest]
-pythonpath = HerdingCats/
+pythonpath = .
diff --git a/tests/ckan/test_ ckan_package_count.py b/tests/ckan/test_ ckan_package_count.py
@@ -0,0 +1,35 @@
+import pytest
+from HerdingCats.session.cat_session import CatSession
+from HerdingCats.explorer.cat_explore import CkanCatExplorer
+from HerdingCats.endpoints.api_endpoints import CkanApiPaths
+import requests
+from loguru import logger
+
+CATALOGUES = [
+    "https://data.london.gov.uk"
+]
+
+@pytest.mark.parametrize("catalogue_url", CATALOGUES)
+def test_get_package_count(catalogue_url):
+    """
+    Test that the get_package_count method returns a valid count of datasets
+    for predefined data catalogues
+    """
+    with CatSession(catalogue_url) as cat_session:
+        explorer = CkanCatExplorer(cat_session)
+        try:
+            # Get the package count
+            package_count = explorer.get_package_count()
+
+            # Assert that we got a valid integer
+            assert isinstance(package_count, int), f"Expected integer package count, got {type(package_count)}"
+
+            # Assert that the count is positive
+            assert package_count > 0, f"Expected positive package count, got {package_count}"
+
+            logger.info(f"Successfully retrieved package count for {catalogue_url}: {package_count} packages")
+
+        except requests.RequestException as e:
+            pytest.fail(f"Failed to connect to CKAN endpoint for {catalogue_url}: {str(e)}")
+        except AssertionError as e:
+            pytest.fail(str(e))
diff --git a/tests/test_endpoint_health.py → tests/ckan/test_ckan_endpoint_health.py b/tests/test_endpoint_health.py → tests/ckan/test_ckan_endpoint_health.py
@@ -5,10 +5,7 @@
 from loguru import logger
 
 CATALOGUES = [
-    "https://data.london.gov.uk",
-    "https://data.humdata.org",
-    "https://data.gov.uk",
-    "https://open.africa"
+    "https://data.london.gov.uk"
 ]
 
 @pytest.mark.parametrize("catalogue_url", CATALOGUES)
@@ -20,6 +17,7 @@ def test_ckan_health_check(catalogue_url):
         url = cat_session.base_url + CkanApiPaths.PACKAGE_LIST
         try:
             response = cat_session.session.get(url)
+            print(response)
 
             # Check status code
             assert response.status_code == 200, f"Expected status code 200, but got {response.status_code}"

diff --git a/tests/ckan/test_ckan_package_list.py b/tests/ckan/test_ckan_package_list.py
@@ -0,0 +1,86 @@
+import pytest
+from HerdingCats.session.cat_session import CatSession
+from HerdingCats.explorer.cat_explore import CkanCatExplorer
+import requests
+from loguru import logger
+
+CATALOGUES = ["https://data.london.gov.uk"]
+
+
+@pytest.mark.parametrize("catalogue_url", CATALOGUES)
+def test_package_list_dictionary(catalogue_url):
+    """
+    Test the package list functionality for predefined data catalogues
+    """
+    with CatSession(catalogue_url) as cat_session:
+        explorer = CkanCatExplorer(cat_session)
+        try:
+            results = explorer.package_list_dictionary()
+
+            print(results)
+
+            # Assert that we got a result
+            assert results is not None, f"No results returned for {catalogue_url}"
+
+            # Check if we got the expected number of rows
+            assert len(results) > 100, "There could be a problem - check manually"
+
+            logger.info(f"Package search test passed for {catalogue_url}")
+        except requests.RequestException as e:
+            pytest.fail(
+                f"Failed to perform package search for {catalogue_url}: {str(e)}"
+            )
+        except AssertionError as e:
+            pytest.fail(str(e))
+
+@pytest.mark.parametrize("catalogue_url", CATALOGUES)
+def test_package_list_dataframe(catalogue_url):
+    """
+    Test the package list dataframe functionality for predefined data catalogues
+    """
+    with CatSession(catalogue_url) as cat_session:
+        explorer = CkanCatExplorer(cat_session)
+        try:
+            results_pandas = explorer.package_list_dataframe("pandas")
+
+            print(results_pandas)
+
+            # Assert that we got a result
+            assert results_pandas is not None, f"No results returned for {catalogue_url}"
+
+            # Check if we got the expected number of rows
+            assert len(results_pandas) > 100, "There could be a problem - check manually"
+
+            logger.info(f"Package search test passed for {catalogue_url}")
+        except requests.RequestException as e:
+            pytest.fail(
+                f"Failed to perform package search for {catalogue_url}: {str(e)}"
+            )
+        except AssertionError as e:
+            pytest.fail(str(e))
+
+@pytest.mark.parametrize("catalogue_url", CATALOGUES)
+def test_package_list_dataframe_extra(catalogue_url):
+    """
+    Test the package list dataframe extra functionality for predefined data catalogues
+    """
+    with CatSession(catalogue_url) as cat_session:
+        explorer = CkanCatExplorer(cat_session)
+        try:
+            results_pandas = explorer.package_list_dataframe_extra("polars")
+
+            print(results_pandas)
+
+            # Assert that we got a result
+            assert results_pandas is not None, f"No results returned for {catalogue_url}"
+
+            # Check if we got the expected number of rows
+            assert len(results_pandas) > 100, "There could be a problem - check manually"
+
+            logger.info(f"Package search test passed for {catalogue_url}")
+        except requests.RequestException as e:
+            pytest.fail(
+                f"Failed to perform package search for {catalogue_url}: {str(e)}"
+            )
+        except AssertionError as e:
+            pytest.fail(str(e))
diff --git a/tests/ckan/test_ckan_package_search.py b/tests/ckan/test_ckan_package_search.py
@@ -0,0 +1,54 @@
+import pytest
+import requests
+
+from pprint import pprint
+from HerdingCats.session.cat_session import CatSession
+from HerdingCats.explorer.cat_explore import CkanCatExplorer
+from HerdingCats.errors.cats_errors import CatExplorerError
+from loguru import logger
+
+CATALOGUES = ["https://data.london.gov.uk"]
+TEST_SEARCH_QUERY = "police"
+TEST_NUM_ROWS = 50
+
+@pytest.mark.parametrize("catalogue_url,query,rows", [
+   (CATALOGUES[0], TEST_SEARCH_QUERY, TEST_NUM_ROWS),
+])
+def test_package_search_json(catalogue_url, query, rows):
+   """
+   Test the package_search_json functionality
+   """
+   with CatSession(catalogue_url) as cat_session:
+       explorer = CkanCatExplorer(cat_session)
+       try:
+           results = explorer.package_search_json(query, rows)
+           pprint(results)
+
+           # Basic assertions
+           assert results is not None, "No results returned"
+           assert isinstance(results, dict), "Results should be a dictionary"
+
+           # Check for expected keys in response - allow either 'result' or 'results'
+           assert 'count' in results, "Missing count key"
+           assert 'result' in results or 'results' in results, "Missing result/results key"
+
+           # Get the results list regardless of key name
+           results_list = results.get('results', results.get('result', []))
+           assert isinstance(results_list, list), "Results should be a list"
+
+           # Check content of results if any found
+           if results_list:
+               first_result = results_list[0]
+               assert isinstance(first_result, dict), "Result list items should include dictionaries"
+
+               # Check for some common CKAN package fields
+               package_keys = ['id', 'name', 'title']
+               for key in package_keys:
+                   assert key in first_result, f"Missing expected package key: {key}"
+
+           logger.info(f"Package search test passed for query '{query}' with {len(results_list)} results")
+
+       except requests.RequestException as e:
+           pytest.fail(f"Failed to search packages with query '{query}': {str(e)}")
+       except AssertionError as e:
+           pytest.fail(str(e))
diff --git a/tests/test_session_creation.py → tests/ckan/test_ckan_session_creation.py b/tests/test_session_creation.py → tests/ckan/test_ckan_session_creation.py