streamline and clarify usage of semantic maps

ausgerechnet · Aug 27, 2024 · f71f18e · f71f18e
1 parent 541ea3b
commit f71f18e
Show file tree

Hide file tree

Showing 11 changed files with 92 additions and 227 deletions.
diff --git a/README.md b/README.md
@@ -130,3 +130,30 @@
 - [ ] rm or rename query / concordance / discourseme ranges
 - [ ] example meta data creation
 - [ ] example subcorpus creation
+
+
+## Semantic Maps
+semantic maps are created by default for all
+- collocation analyses (`GET /query/<query_id>/collocation/`)
+- keyword analyses (`POST /keyword/`)
+this behaviour cannot be de-activated right now.
+
+analyses can be patched:
+- `PATCH /collocation/<id>/`
+- `PATCH /keyword/<id>/`
+in fact, `semantic_map_id` is the only property that can be set via PATCH. I just realised that this is superfluous because there is also
+- `POST /collocation/<id>/semantic-map/`
+- `POST /keyword/<id>/semantic-map/`
+which also accepts `semantic_map_id` and makes sure that all top items of the analyses actually have coordinates (if `semantic_map_id is None`, this creates a new semantic map). **I thus removed the `PATCH` endpoint.**
+
+there's also `PUT /semantic-map/` which can be ignored right now.
+
+### MMDA
+discoursemes do not have semantic maps, this was a blunder on my part. instead, constellations descriptions have default semantic maps that can be used across analyses. these maps can be set when creating the description:
+`POST /mmda/constellation/constellation_id/description/`
+accepts a `sematntic_map_id`.
+
+additionally, when creating keyword or collocation analyses via
+- `POST /mmda/constellation/<id>/description/<description_id>/collocation/`
+- `POST /mmda/constellation/<id>/description/<description_id>/keyword/`
+also accept a `semantic_map_id`. if provided, the endpoint makes sure that there are coordinates for all top items of the analysis (as above). if None is given, the default semantic map of the constellation description will be used. if this is also None, a new one will be created. if the constellation description did not have a default semantic map, it will have one after starting an analysis.
diff --git a/cads/collocation.py b/cads/collocation.py
@@ -171,9 +171,9 @@ class CollocationOut(Schema):
 
 
 # IDENTICAL TO KEYWORDS ↓
-class CollocationPatchIn(Schema):
+# class CollocationPatchIn(Schema):
 
-    semantic_map_id = Integer(required=False, load_default=None)
+#     semantic_map_id = Integer(required=False, load_default=None)
 
 
 class CollocationItemsIn(Schema):
@@ -253,22 +253,22 @@ def delete_collocation(id):
     return 'Deletion successful.', 200
 
 
-@bp.patch('/<id>/')
-@bp.input(CollocationPatchIn)
-@bp.output(CollocationOut)
-@bp.auth_required(auth)
-def patch_collocation(id, json_data):
-    """Patch a collocation analysis. Use for updating semantic map.
+# @bp.patch('/<id>/')
+# @bp.input(CollocationPatchIn)
+# @bp.output(CollocationOut)
+# @bp.auth_required(auth)
+# def patch_collocation(id, json_data):
+#     """Patch a collocation analysis. Use for updating semantic map.
 
-    """
+#     """
 
-    collocation = db.get_or_404(Collocation, id)
+#     collocation = db.get_or_404(Collocation, id)
 
-    for attr, value in json_data.items():
-        setattr(collocation, attr, value)
-    db.session.commit()
+#     for attr, value in json_data.items():
+#         setattr(collocation, attr, value)
+#     db.session.commit()
 
-    return CollocationOut().dump(collocation), 200
+#     return CollocationOut().dump(collocation), 200
 
 
 @bp.get("/<id>/items")

diff --git a/cads/database.py b/cads/database.py
@@ -574,7 +574,7 @@ class Keyword(db.Model):
     subcorpus_id_reference = db.Column(db.Integer(), db.ForeignKey('sub_corpus.id', ondelete='CASCADE'), nullable=True)
     p_reference = db.Column(db.Unicode(255), nullable=False)  # TODO
 
-    semantic_map_id = db.Column(db.Integer, db.ForeignKey('semantic_map.id'))
+    semantic_map_id = db.Column(db.Integer, db.ForeignKey('semantic_map.id', ondelete='CASCADE'))
 
     sub_vs_rest = db.Column(db.Boolean)
     min_freq = db.Column(db.Integer)

diff --git a/cads/keyword.py b/cads/keyword.py
@@ -129,9 +129,9 @@ class KeywordOut(Schema):
 
 
 # IDENTICAL TO COLLOCATION ↓
-class KeywordPatchIn(Schema):
+# class KeywordPatchIn(Schema):
 
-    semantic_map_id = Integer(required=False, load_default=None)
+#     semantic_map_id = Integer(required=False, load_default=None)
 
 
 class KeywordItemsIn(Schema):
@@ -212,22 +212,22 @@ def delete_keyword(id):
     return 'Deletion successful.', 200
 
 
-@bp.patch('/<id>/')
-@bp.input(KeywordPatchIn)
-@bp.output(KeywordOut)
-@bp.auth_required(auth)
-def patch_keyword(id, json_data):
-    """Patch a keyword analysis. Use for updating semantic map.
+# @bp.patch('/<id>/')
+# @bp.input(KeywordPatchIn)
+# @bp.output(KeywordOut)
+# @bp.auth_required(auth)
+# def patch_keyword(id, json_data):
+#     """Patch a keyword analysis. Use for updating semantic map.
 
-    """
+#     """
 
-    keyword = db.get_or_404(Keyword, id)
+#     keyword = db.get_or_404(Keyword, id)
 
-    for attr, value in json_data.items():
-        setattr(keyword, attr, value)
-    db.session.commit()
+#     for attr, value in json_data.items():
+#         setattr(keyword, attr, value)
+#     db.session.commit()
 
-    return KeywordOut().dump(keyword), 200
+#     return KeywordOut().dump(keyword), 200
 
 
 @bp.get('/<id>/items')
@@ -300,7 +300,7 @@ def create_keyword(json_data):
     """
 
     # semantic map
-    semantic_map_id = json_data.get('semantic_map_id')
+    semantic_map_id = json_data.get('semantic_map_id', None)
 
     # corpus
     corpus_id = json_data.get('corpus_id')
@@ -317,7 +317,6 @@ def create_keyword(json_data):
     min_freq = json_data.get('min_freq')
 
     keyword = Keyword(
-        semantic_map_id=semantic_map_id,
         corpus_id=corpus_id,
         subcorpus_id=subcorpus_id,
         p=p,
@@ -331,6 +330,7 @@ def create_keyword(json_data):
     db.session.commit()
 
     ccc_keywords(keyword)
+    # TODO make optional:
     ccc_init_semmap(keyword, semantic_map_id)
 
     return KeywordOut().dump(keyword), 200

diff --git a/cads/mmda/constellation.py b/cads/mmda/constellation.py
@@ -413,6 +413,7 @@ class ConstellationDescriptionIn(Schema):
     corpus_id = Integer(required=True)
     subcorpus_id = Integer(required=False)
 
+    semantic_map_id = Integer(required=False, load_default=None)
     p = String(required=False)
     s = String(required=False)
     match_strategy = String(load_default='longest', required=False, validate=OneOf(['longest', 'shortest', 'standard']))
@@ -426,6 +427,7 @@ class ConstellationDiscoursemeDescriptionIn(Schema):
 
 class ConstellationCollocationIn(CollocationIn):
 
+    semantic_map_id = Integer(required=False, load_default=None)
     focus_discourseme_id = Integer(required=True)
     filter_discourseme_ids = List(Integer(), load_default=[], required=False)
 
@@ -537,7 +539,7 @@ def delete_constellation(id):
 @bp.output(ConstellationOut)
 @bp.auth_required(auth)
 def patch_constellation(id, json_data):
-    """Patch constellation.
+    """Patch constellation. Use for updating name, comment, or discoursemes.
 
     """
     constellation = db.get_or_404(Constellation, id)
@@ -570,7 +572,7 @@ def patch_constellation(id, json_data):
 @bp.output(ConstellationOut)
 @bp.auth_required(auth)
 def patch_constellation_add(id, json_data):
-    """Patch constellation: add discourseme.
+    """Patch constellation: add discourseme(s).
 
     """
     constellation = db.get_or_404(Constellation, id)
@@ -588,7 +590,7 @@ def patch_constellation_add(id, json_data):
 @bp.output(ConstellationOut)
 @bp.auth_required(auth)
 def patch_constellation_remove(id, json_data):
-    """Patch constellation: remove discourseme.
+    """Patch constellation: remove discourseme(s).
 
     """
     constellation = db.get_or_404(Constellation, id)
@@ -631,6 +633,7 @@ def create_description(id, json_data):
     corpus = db.get_or_404(Corpus, corpus_id)
     subcorpus_id = json_data.get('subcorpus_id')
     # subcorpus = db.get_or_404(SubCorpus, subcorpus_id) if subcorpus_id else None
+    semantic_map_id = json_data.get('semantic_map_id')
 
     p_description = json_data.get('p', corpus.p_default)
     s_query = json_data.get('s', corpus.s_default)
@@ -639,6 +642,7 @@ def create_description(id, json_data):
 
     description = ConstellationDescription(
         constellation_id=constellation.id,
+        semantic_map_id=semantic_map_id,
         corpus_id=corpus.id,
         subcorpus_id=subcorpus_id,
         p=p_description,
@@ -694,7 +698,7 @@ def get_description(id, description_id):
 
     """
 
-    # discourseme = db.get_or_404(Discourseme, id)  # TODO: needed?
+    # constellation = db.get_or_404(Constellation, id)  # TODO: needed?
     description = db.get_or_404(ConstellationDescription, description_id)
 
     return ConstellationDescriptionOut().dump(description)
@@ -707,7 +711,7 @@ def delete_description(id, description_id):
 
     """
 
-    # discourseme = db.get_or_404(Discourseme, id)  # TODO: needed?
+    # constellation = db.get_or_404(Constellation, id)  # TODO: needed?
     description = db.get_or_404(ConstellationDescription, description_id)
     db.session.delete(description)
     db.session.commit()
@@ -832,6 +836,7 @@ def create_collocation(id, description_id, json_data):
 
     # semantic map
     semantic_map_id = json_data.get('semantic_map_id', None)
+    semantic_map_id = description.semantic_map_id if not semantic_map_id else semantic_map_id
 
     # filtering
     filter_discourseme_ids = json_data.get('filter_discourseme_ids')
@@ -850,7 +855,6 @@ def create_collocation(id, description_id, json_data):
 
     # create collocation object
     collocation = Collocation(
-        # constellation_id=constellation_id,
         semantic_map_id=semantic_map_id,
         query_id=focus_query.id,
         p=p,
@@ -864,8 +868,8 @@ def create_collocation(id, description_id, json_data):
     get_or_create_counts(collocation, remove_focus_cpos=False)
     set_collocation_discourseme_scores(collocation, description.discourseme_descriptions, overlap=description.overlap)
     ccc_init_semmap(collocation, semantic_map_id)
-
-    collocation.focus_discourseme_id = json_data['focus_discourseme_id']
+    if description.semantic_map_id is None:
+        description.semantic_map_id = collocation.semantic_map_id
 
     return ConstellationCollocationOut().dump(collocation), 200
 
@@ -1031,14 +1035,14 @@ def create_keyword(id, description_id, json_data):
     p_reference = json_data.get('p_reference')
 
     # semantic map
-    semantic_map_id = json_data.get('semantic_map_id')
+    semantic_map_id = json_data.get('semantic_map_id', None)
+    semantic_map_id = description.semantic_map_id if not semantic_map_id else semantic_map_id
 
     # settings
     sub_vs_rest = json_data.get('sub_vs_rest')
     min_freq = json_data.get('min_freq')
 
     keyword = Keyword(
-        # constellation_id=constellation_id,
         semantic_map_id=semantic_map_id,
         corpus_id=corpus_id,
         subcorpus_id=subcorpus_id,
@@ -1055,6 +1059,8 @@ def create_keyword(id, description_id, json_data):
     ccc_keywords(keyword)
     set_keyword_discourseme_scores(keyword, description.discourseme_descriptions)
     ccc_init_semmap(keyword, semantic_map_id)
+    if description.semantic_map_id is None:
+        description.semantic_map_id = keyword.semantic_map_id
 
     return KeywordOut().dump(keyword), 200
 

diff --git a/cads/mmda/database.py b/cads/mmda/database.py
@@ -186,6 +186,7 @@ class ConstellationDescription(db.Model):
     overlap = db.Column(db.Unicode, default='partial')  # when to count a discourseme to be in context (partial, full, match, matchend)
 
     discourseme_descriptions = db.relationship("DiscoursemeDescription", secondary=constellation_discourseme_description)
+    semantic_map_id = db.Column(db.Integer, db.ForeignKey('semantic_map.id', ondelete='CASCADE'))
 
     @property
     def corpus(self):

diff --git a/cads/mmda/discourseme.py b/cads/mmda/discourseme.py
@@ -281,7 +281,7 @@ class DiscoursemeDescriptionOut(Schema):
     p = String(required=True)
     s = String(required=True)
     match_strategy = String(required=True)
-    semantic_map_id = Integer(required=True, dump_default=None, metadata={'nullable': True})
+    # semantic_map_id = Integer(required=True, dump_default=None, metadata={'nullable': True})
     items = Nested(DiscoursemeDescriptionItem(many=True), required=True, dump_default=[])
 
 

diff --git a/cads/query.py b/cads/query.py
@@ -670,10 +670,6 @@ def get_collocation(query_id, query_data):
     s_break = query_data.get('s_break')
     marginals = query_data.get('marginals', 'global')
 
-    # constellation and semantic map
-    # constellation_id = query_data.get('constellation_id', None)
-    # constellation = db.get_or_404(Constellation, constellation_id) if constellation_id else None
-
     semantic_map_id = query_data.get('semantic_map_id', None)
 
     # filtering for second-order collocation
@@ -697,6 +693,7 @@ def get_collocation(query_id, query_data):
 
     if len(filter_queries) > 0:
 
+        # TODO
         # note that the database scheme does not allow to have several filter queries
         # we thus name the actual query result here to be able to retrieve it
         nqr_name = "SOC" + "_" + "_q".join(["q" + str(query.id)] + [str(fq.id) for fq in filter_queries])
@@ -741,7 +738,6 @@ def get_collocation(query_id, query_data):
             corpus_id=query.corpus.id,
             subcorpus_id=query.subcorpus.id if query.subcorpus else None,
             soc_sequence=nqr_name,
-            # discourseme_id=query.discourseme.id,
             match_strategy=query.match_strategy,
             s=query.s
         )
@@ -754,8 +750,6 @@ def get_collocation(query_id, query_data):
         df_matches.to_sql('matches', con=db.engine, if_exists='append', index=False)
 
     collocation = Collocation(
-        # constellation_id=constellation_id,
-        # semantic_map_id=semantic_map_id,
         query_id=query.id,
         p=p,
         s_break=s_break,
@@ -766,10 +760,7 @@ def get_collocation(query_id, query_data):
     db.session.commit()
 
     get_or_create_counts(collocation, remove_focus_cpos=True)
+    # TODO make optional:
     ccc_init_semmap(collocation, semantic_map_id)
 
-    # if constellation:
-    #     discoursemes = constellation.highlight_discoursemes + constellation.filter_discoursemes
-    #     ccc_discourseme_counts(collocation, discoursemes)
-
     return CollocationOut().dump(collocation), 200
diff --git a/cads/semantic_map.py b/cads/semantic_map.py
@@ -18,7 +18,10 @@
 
 
 def ccc_semmap(collocation_ids, sort_by, number, blacklist_items=[], method='tsne'):
+    """
+
     # TODO make analysis agnostic (keyword, collocation)
+    """
 
     semantic_map = None
     dfs = list()
@@ -182,6 +185,7 @@ class SemanticMapIn(Schema):
 
     collocation_ids = List(Integer(), required=False, load_default=[])
     keyword_ids = List(Integer(), required=False, load_default=[])
+
     method = String(required=False, load_default='tsne', validate=OneOf(['tsne', 'umap']))
 
 

diff --git a/cads/version.py b/cads/version.py
@@ -1,4 +1,4 @@
 #!/usr/bin/python3
 # -*- coding: utf-8 -*-
 
-__version__ = "0.3.0.dev5"
+__version__ = "0.3.0.dev6"