Updates 2024-09-20 - Readme changes and csv support added

CHRISCARLON · Sep 20, 2024 · e421851 · e421851
1 parent 6e47948
commit e421851
Show file tree

Hide file tree

Showing 2 changed files with 73 additions and 29 deletions.
diff --git a/HerdingCats/herding_cats.py b/HerdingCats/herding_cats.py
@@ -806,22 +806,35 @@ def _extract_package_show_data(data: Dict[str, Any]) -> List[Dict[str, Any]]:
 class CkanCatAnalyser:
     """
     Need to do:
-        polars ✅
-        pandas ✅
-        duckdb ✅
-        motherduck ✅
-
-        S3 (duckdb)
-        S3 (direct)
-        S3 (DeltaLake)
-        S3 (Iceberg)
-        Redshift
-
-        Databricks
-        Snowflake
-
-        Google Cloud Storage
-        Google Big Query
+
+        File Formats:
+            Xlsx ✅
+            Csv ✅
+            Xls
+            Parquet
+            JSON
+            Geopackage
+            Shapefile
+            GeoJSON
+
+
+        Tools and Libs:
+            polars ✅
+            pandas ✅
+            duckdb ✅
+            motherduck ✅
+
+            S3 (duckdb)
+            S3 (direct)
+            S3 (DeltaLake)
+            S3 (Iceberg)
+            Redshift
+
+            Databricks
+            Snowflake
+
+            Google Cloud Storage
+            Google Big Query
     """
 
     def __init__(self):
@@ -846,6 +859,9 @@ def polars_data_loader(
             ):
                 df = pl.read_excel(binary_data)
                 return df
+            elif file_format and file_format.lower() == "csv":
+                df = pl.read_csv(binary_data)
+                return df
             else:
                 logger.error("Error")
         else:
@@ -870,6 +886,9 @@ def pandas_data_loader(
             ):
                 df = pd.read_excel(binary_data)
                 return df
+            elif file_format and file_format.lower() == "csv":
+                df = pd.read_csv(binary_data)
+                return df
             else:
                 logger.error("Error")
         else:

diff --git a/README.md b/README.md
@@ -1,20 +1,45 @@
 # Herding-CATs 🐈‍⬛
 
-## Purpose
-
-**The aim of this project is simple, create a basic python library to explore and interact with open data catalogues**.
+Version: 0.1.2
 
-Easily access data from several CKAN data sources with planned integrations for:
-
-- Polars
-- Pandas
-- DuckDB
-- MotherDuck
-- S3
-- Google Big Query
-- Maybe a few others
+## Purpose
 
-This will improve and speed up how people access and use data from open data catalogues.
+**The aim of this project is simple, create a basic python library to explore and interact with open data catalogues - particularly those with CKAN backends**.
+
+Need to do:
+  File Formats:
+    Xlsx ✅
+    Csv ✅
+    Xls
+    Parquet
+    JSON
+    Geopackage
+    Shapefile
+    GeoJSON
+
+
+  Tools and Libs:
+    polars ✅
+    pandas ✅
+    duckdb ✅
+    motherduck ✅
+
+    S3 (duckdb)
+    S3 (direct)
+    S3 (DeltaLake)
+    S3 (Iceberg)
+    Redshift
+
+    Databricks
+    Snowflake
+
+    Google Cloud Storage
+    Google Big Query
+
+This will improve and speed up how people:
+  Navigate open data catalogues
+  Find the data they need
+  Get that data into a format for further analysis / aggregation
 
 ## Current Default Open Data Catalogues