diff --git a/docs/conf.py b/docs/conf.py index 2152b6f..2f28038 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -1,5 +1,27 @@ +import os +import pathlib +import sys + +import django + +import libcoveweb2.settings + project = "LibCoveWeb2" master_doc = "index" html_theme = "odsc_default_sphinx_theme" + +extensions = [ + "sphinx.ext.autodoc", +] + +###### Make Sphinx able to document our python code + +# We need to be able to see our code, so add this directory to path +sys.path.insert(0, str(pathlib.Path("..").resolve())) + +# We need to set up Django enough that it won't complain +libcoveweb2.settings.INSTALLED_APPS += ("libcoveweb2",) +os.environ["DJANGO_SETTINGS_MODULE"] = "libcoveweb2.settings" +django.setup() diff --git a/docs/django-settings.rst b/docs/django-settings.rst new file mode 100644 index 0000000..05e4934 --- /dev/null +++ b/docs/django-settings.rst @@ -0,0 +1,48 @@ +Django Settings +=============== + +To use this app you'll need to define several settings + + + +Process Tasks +------------- + +You need to define a `PROCESS_TASKS` setting. This lists all the tasks that will be processed for each uploaded data, in order of processing. + +It should be a list of tuples and every tuple should be `('Python module', 'Python class name')`. +Each class should extend libcoveweb2.process.base.ProcessDataTask + +Example: + +.. code-block:: python + + PROCESS_TASKS = [ + # Get data if not already on disk + ("libcoveweb2.process.common_tasks.download_data_task", "DownloadDataTask"), + ... + ] + +Celery Message Queue +-------------------- + +Any Celery settings needed must be set up. + +At a minimum this will include `CELERY_BROKER_URL`. + + +Settings to copy from library which have sensible defaults +---------------------------------------------------------- + +This application also needs a bunch of configuration values that already have defaults set. In most cases you can just reuse these variables. + +See TODO for a list of these settings. + +To do so, you can do something like this in your Django project's main setting.py file: + +.. code-block:: python + + from libcoveweb2 import settings + ALLOWED_JSON_CONTENT_TYPES = settings.ALLOWED_JSON_CONTENT_TYPES + ALLOWED_JSON_EXTENSIONS = settings.ALLOWED_JSON_EXTENSIONS + ... diff --git a/docs/hosting/requirements.rst b/docs/hosting/requirements.rst new file mode 100644 index 0000000..b3f9bd3 --- /dev/null +++ b/docs/hosting/requirements.rst @@ -0,0 +1,30 @@ +Hosting Requirements +==================== + +Python server for the Django app +---------------------------------- + +Normal options + +Database +---------- + +This is tested with PostgreSQL. + +Message queue compatible with Celery +-------------------------------------- + +Normal options + +File Storage +------------ + +TODO + +Cron tasks +---------- + +Some Django management commands should be run on a cron task. + +* `expire_files` should be run daily + diff --git a/docs/index.rst b/docs/index.rst index d23eedc..4579007 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -1,3 +1,30 @@ LibCoveWeb2 =========== + +LibCoveWeb2 is a Django application to use as a library in your own Django apps. + +It let's you create CoVE apps - CoVE exists to help people: + +* Convert data between common formats (e.g. csv to json) +* Validate data against rules +* Explore data, that machines find easy, but humans find harder to read + +The application consists of: + +* Database Models to save details of user submitted data +* File storage space to save the user submitted data and cache results of processing +* A message queue +* Workers to process the data according to tasks you provide (but there is a library of common tasks in this application) +* A view to show users output information from the cache of results + + +.. toctree:: + :maxdepth: 2 + + django-settings.rst + python-api/index.rst + migration-from-lib-cove-web.rst + hosting/requirements.rst + used-by.rst + diff --git a/docs/migration-from-lib-cove-web.rst b/docs/migration-from-lib-cove-web.rst new file mode 100644 index 0000000..ce96db7 --- /dev/null +++ b/docs/migration-from-lib-cove-web.rst @@ -0,0 +1,13 @@ +Migration from lib-cove-web +=========================== + + +This library is an updated version of the previous library ( https://github.com/OpenDataServices/lib-cove-web ). + +However, the structure of the library and how it works have massive changes. +For this reason the upgrade path isn't a simple upgrade, but requires rewriting work in the software that uses this. + +This is why this library is a separate git repository and a separate pypi package, +so that development of the previous library can also continue and users are not forced to upgrade before they are ready. + + diff --git a/docs/python-api/index.rst b/docs/python-api/index.rst new file mode 100644 index 0000000..8c77df4 --- /dev/null +++ b/docs/python-api/index.rst @@ -0,0 +1,15 @@ +Python API +========== + +This section documents the Python API that software using this library should consider. + +It does not document Python that is not intended for reuse by others (you can read the source code for that.) + + +.. toctree:: + :maxdepth: 2 + + settings.rst + process/base.rst + process/common_tasks/download_data_task.rst + process/common_tasks/task_with_state.rst diff --git a/docs/python-api/process/base.rst b/docs/python-api/process/base.rst new file mode 100644 index 0000000..949f131 --- /dev/null +++ b/docs/python-api/process/base.rst @@ -0,0 +1,5 @@ +Process Task Base +================= + +.. autoclass:: libcoveweb2.process.base.ProcessDataTask + :members: diff --git a/docs/python-api/process/common_tasks/download_data_task.rst b/docs/python-api/process/common_tasks/download_data_task.rst new file mode 100644 index 0000000..f33e39a --- /dev/null +++ b/docs/python-api/process/common_tasks/download_data_task.rst @@ -0,0 +1,5 @@ +Common Process Task: Download Data Task +======================================= + + +.. autoclass:: libcoveweb2.process.common_tasks.download_data_task.DownloadDataTask diff --git a/docs/python-api/process/common_tasks/task_with_state.rst b/docs/python-api/process/common_tasks/task_with_state.rst new file mode 100644 index 0000000..ec27fe6 --- /dev/null +++ b/docs/python-api/process/common_tasks/task_with_state.rst @@ -0,0 +1,7 @@ +Common Process Task: Task With State +==================================== + + +.. autoclass:: libcoveweb2.process.common_tasks.task_with_state.TaskWithState + :members: state_filename, process_get_state + diff --git a/docs/python-api/settings.rst b/docs/python-api/settings.rst new file mode 100644 index 0000000..9134811 --- /dev/null +++ b/docs/python-api/settings.rst @@ -0,0 +1,10 @@ +Settings +======== + +The application includes a settings module with several settings with default values. +If you don't need to change them, you can just take the defaults from this module. +See TODO for more information on setting up the settings you need. + +.. automodule:: libcoveweb2.settings + :members: + diff --git a/docs/used-by.rst b/docs/used-by.rst new file mode 100644 index 0000000..9e457ff --- /dev/null +++ b/docs/used-by.rst @@ -0,0 +1,10 @@ +Used by +======= + +This library is used by: + +* https://github.com/openownership/cove-bods +* https://github.com/Open-Telecoms-Data/cove-ofds +* https://github.com/GFDRR/rdls-cove + + diff --git a/libcoveweb2/process/base.py b/libcoveweb2/process/base.py index a7f8374..51e28f9 100644 --- a/libcoveweb2/process/base.py +++ b/libcoveweb2/process/base.py @@ -18,6 +18,7 @@ def is_processing_applicable(self) -> bool: eg. A task to convert a spreadsheet to JSON will never be applicable if JSON is uploaded in the first place. + eg. A task to check the data against JSON Schema will always be applicable. """ return False diff --git a/libcoveweb2/process/common_tasks/download_data_task.py b/libcoveweb2/process/common_tasks/download_data_task.py index 17e05bf..dd65aaa 100644 --- a/libcoveweb2/process/common_tasks/download_data_task.py +++ b/libcoveweb2/process/common_tasks/download_data_task.py @@ -2,7 +2,9 @@ class DownloadDataTask(ProcessDataTask): - """If user gave us a URL, we download it now.""" + """If user gave us a URL, we download it now. + + It is possible for apps to use this class with no further configuration.""" def is_processing_applicable(self) -> bool: for supplied_data_file in self.supplied_data_files: diff --git a/libcoveweb2/process/common_tasks/task_with_state.py b/libcoveweb2/process/common_tasks/task_with_state.py index 9ae068b..5ada3da 100644 --- a/libcoveweb2/process/common_tasks/task_with_state.py +++ b/libcoveweb2/process/common_tasks/task_with_state.py @@ -13,10 +13,10 @@ class TaskWithState(ProcessDataTask): Extend and provide your own state_filename and process_get_state. """ - """Set state_filename to a unique name for each task. - If you change this name the task will be rerun, so this is is a good way to - make sure all underlying data changes if a new version of this bit of cove - is released.""" + #: Set state_filename to a unique name for each task. + #: If you change this name the task will be rerun, so this is is a good way to + #: make sure all underlying data changes if a new version of this bit of cove + #: is released. state_filename: str = "task_with_state.py" def process_get_state(self, process_data: dict): diff --git a/libcoveweb2/settings.py b/libcoveweb2/settings.py index 8938158..f4a30c9 100644 --- a/libcoveweb2/settings.py +++ b/libcoveweb2/settings.py @@ -29,16 +29,20 @@ DELETE_FILES_AFTER_DAYS=(int, 7), ) +#: Details for Piwik/Matamo tracking, if wanted. Should be a dictionary with the following keys: url, site_id and dimension_map PIWIK = { "url": env("PIWIK_URL"), "site_id": env("PIWIK_SITE_ID"), "dimension_map": env("PIWIK_DIMENSION_MAP"), } +#: Details for Google Analytics tracking, if wanted. Should be a string of the id. GOOGLE_ANALYTICS_ID = env("GOOGLE_ANALYTICS_ID") VALIDATION_ERROR_LOCATIONS_LENGTH = env("VALIDATION_ERROR_LOCATIONS_LENGTH") VALIDATION_ERROR_LOCATIONS_SAMPLE = env("VALIDATION_ERROR_LOCATIONS_SAMPLE") +#: Uploaded data is deleted after a certain number of days to protect privacy, leaving only meta data for usage analysis. +#: Should be an integer of the number of days. DELETE_FILES_AFTER_DAYS = env("DELETE_FILES_AFTER_DAYS") MEDIA_ROOT = os.path.join(BASE_DIR, "media") @@ -173,37 +177,42 @@ }, } -# Sometimes uploads happen with a generic content type. -# In this case, we can't rely on content type to detect type. -# But the type is still allowed, so it's added to -# ALLOWED_*_CONTENT_TYPES when they are defined. +#: Sometimes uploads happen with a generic content type. +#: In this case, we can't rely on content type to detect type. +#: But the type is still allowed. Define these generic content-types in a handy variable +#: so we can also add them to all the ALLOWED_*_CONTENT_TYPES settings. ALLOWED_UNKNOWN_CONTENT_TYPES = ["application/octet-stream"] -# JSON details +#: Allowed types for JSON files ALLOWED_JSON_CONTENT_TYPES = ["application/json"] + ALLOWED_UNKNOWN_CONTENT_TYPES +#: Allowed file extensions for JSON files ALLOWED_JSON_EXTENSIONS = [".json"] -# Excel details +#: Allowed types for Excel files ALLOWED_SPREADSHEET_EXCEL_CONTENT_TYPES = [ "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", ] + ALLOWED_UNKNOWN_CONTENT_TYPES +#: Allowed file extensions for Excel files ALLOWED_SPREADSHEET_EXCEL_EXTENSIONS = [".xlsx"] -# Open Document details +#: Allowed types for Open Document files ALLOWED_SPREADSHEET_OPENDOCUMENT_CONTENT_TYPES = [ "application/vnd.oasis.opendocument.spreadsheet", ] + ALLOWED_UNKNOWN_CONTENT_TYPES +#: Allowed file extensions for Open Document files ALLOWED_SPREADSHEET_OPENDOCUMENT_EXTENSIONS = [".ods"] -# Spreadsheet details (sum of details above) +#: Allowed types for Spreadsheet files (sum of Excel and Open Document) ALLOWED_SPREADSHEET_CONTENT_TYPES = ( ALLOWED_SPREADSHEET_EXCEL_CONTENT_TYPES + ALLOWED_SPREADSHEET_OPENDOCUMENT_CONTENT_TYPES ) +#: Allowed file extensions for Spreadsheet files (sum of Excel and Open Document) ALLOWED_SPREADSHEET_EXTENSIONS = ( ALLOWED_SPREADSHEET_EXCEL_EXTENSIONS + ALLOWED_SPREADSHEET_OPENDOCUMENT_EXTENSIONS ) -# CSV Details +#: Allowed types for CSV files ALLOWED_CSV_CONTENT_TYPES = ["text/csv"] + ALLOWED_UNKNOWN_CONTENT_TYPES +#: Allowed file extensions for CSV files ALLOWED_CSV_EXTENSIONS = [".csv"]