Skip to content

Commit

Permalink
streamline and clarify usage of semantic maps
Browse files Browse the repository at this point in the history
  • Loading branch information
ausgerechnet committed Aug 27, 2024
1 parent 541ea3b commit f71f18e
Show file tree
Hide file tree
Showing 11 changed files with 92 additions and 227 deletions.
27 changes: 27 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -130,3 +130,30 @@
- [ ] rm or rename query / concordance / discourseme ranges
- [ ] example meta data creation
- [ ] example subcorpus creation


## Semantic Maps
semantic maps are created by default for all
- collocation analyses (`GET /query/<query_id>/collocation/`)
- keyword analyses (`POST /keyword/`)
this behaviour cannot be de-activated right now.

analyses can be patched:
- `PATCH /collocation/<id>/`
- `PATCH /keyword/<id>/`
in fact, `semantic_map_id` is the only property that can be set via PATCH. I just realised that this is superfluous because there is also
- `POST /collocation/<id>/semantic-map/`
- `POST /keyword/<id>/semantic-map/`
which also accepts `semantic_map_id` and makes sure that all top items of the analyses actually have coordinates (if `semantic_map_id is None`, this creates a new semantic map). **I thus removed the `PATCH` endpoint.**

there's also `PUT /semantic-map/` which can be ignored right now.

### MMDA
discoursemes do not have semantic maps, this was a blunder on my part. instead, constellations descriptions have default semantic maps that can be used across analyses. these maps can be set when creating the description:
`POST /mmda/constellation/constellation_id/description/`
accepts a `sematntic_map_id`.

additionally, when creating keyword or collocation analyses via
- `POST /mmda/constellation/<id>/description/<description_id>/collocation/`
- `POST /mmda/constellation/<id>/description/<description_id>/keyword/`
also accept a `semantic_map_id`. if provided, the endpoint makes sure that there are coordinates for all top items of the analysis (as above). if None is given, the default semantic map of the constellation description will be used. if this is also None, a new one will be created. if the constellation description did not have a default semantic map, it will have one after starting an analysis.
28 changes: 14 additions & 14 deletions cads/collocation.py
Original file line number Diff line number Diff line change
Expand Up @@ -171,9 +171,9 @@ class CollocationOut(Schema):


# IDENTICAL TO KEYWORDS ↓
class CollocationPatchIn(Schema):
# class CollocationPatchIn(Schema):

semantic_map_id = Integer(required=False, load_default=None)
# semantic_map_id = Integer(required=False, load_default=None)


class CollocationItemsIn(Schema):
Expand Down Expand Up @@ -253,22 +253,22 @@ def delete_collocation(id):
return 'Deletion successful.', 200


@bp.patch('/<id>/')
@bp.input(CollocationPatchIn)
@bp.output(CollocationOut)
@bp.auth_required(auth)
def patch_collocation(id, json_data):
"""Patch a collocation analysis. Use for updating semantic map.
# @bp.patch('/<id>/')
# @bp.input(CollocationPatchIn)
# @bp.output(CollocationOut)
# @bp.auth_required(auth)
# def patch_collocation(id, json_data):
# """Patch a collocation analysis. Use for updating semantic map.

"""
# """

collocation = db.get_or_404(Collocation, id)
# collocation = db.get_or_404(Collocation, id)

for attr, value in json_data.items():
setattr(collocation, attr, value)
db.session.commit()
# for attr, value in json_data.items():
# setattr(collocation, attr, value)
# db.session.commit()

return CollocationOut().dump(collocation), 200
# return CollocationOut().dump(collocation), 200


@bp.get("/<id>/items")
Expand Down
2 changes: 1 addition & 1 deletion cads/database.py
Original file line number Diff line number Diff line change
Expand Up @@ -574,7 +574,7 @@ class Keyword(db.Model):
subcorpus_id_reference = db.Column(db.Integer(), db.ForeignKey('sub_corpus.id', ondelete='CASCADE'), nullable=True)
p_reference = db.Column(db.Unicode(255), nullable=False) # TODO

semantic_map_id = db.Column(db.Integer, db.ForeignKey('semantic_map.id'))
semantic_map_id = db.Column(db.Integer, db.ForeignKey('semantic_map.id', ondelete='CASCADE'))

sub_vs_rest = db.Column(db.Boolean)
min_freq = db.Column(db.Integer)
Expand Down
32 changes: 16 additions & 16 deletions cads/keyword.py
Original file line number Diff line number Diff line change
Expand Up @@ -129,9 +129,9 @@ class KeywordOut(Schema):


# IDENTICAL TO COLLOCATION ↓
class KeywordPatchIn(Schema):
# class KeywordPatchIn(Schema):

semantic_map_id = Integer(required=False, load_default=None)
# semantic_map_id = Integer(required=False, load_default=None)


class KeywordItemsIn(Schema):
Expand Down Expand Up @@ -212,22 +212,22 @@ def delete_keyword(id):
return 'Deletion successful.', 200


@bp.patch('/<id>/')
@bp.input(KeywordPatchIn)
@bp.output(KeywordOut)
@bp.auth_required(auth)
def patch_keyword(id, json_data):
"""Patch a keyword analysis. Use for updating semantic map.
# @bp.patch('/<id>/')
# @bp.input(KeywordPatchIn)
# @bp.output(KeywordOut)
# @bp.auth_required(auth)
# def patch_keyword(id, json_data):
# """Patch a keyword analysis. Use for updating semantic map.

"""
# """

keyword = db.get_or_404(Keyword, id)
# keyword = db.get_or_404(Keyword, id)

for attr, value in json_data.items():
setattr(keyword, attr, value)
db.session.commit()
# for attr, value in json_data.items():
# setattr(keyword, attr, value)
# db.session.commit()

return KeywordOut().dump(keyword), 200
# return KeywordOut().dump(keyword), 200


@bp.get('/<id>/items')
Expand Down Expand Up @@ -300,7 +300,7 @@ def create_keyword(json_data):
"""

# semantic map
semantic_map_id = json_data.get('semantic_map_id')
semantic_map_id = json_data.get('semantic_map_id', None)

# corpus
corpus_id = json_data.get('corpus_id')
Expand All @@ -317,7 +317,6 @@ def create_keyword(json_data):
min_freq = json_data.get('min_freq')

keyword = Keyword(
semantic_map_id=semantic_map_id,
corpus_id=corpus_id,
subcorpus_id=subcorpus_id,
p=p,
Expand All @@ -331,6 +330,7 @@ def create_keyword(json_data):
db.session.commit()

ccc_keywords(keyword)
# TODO make optional:
ccc_init_semmap(keyword, semantic_map_id)

return KeywordOut().dump(keyword), 200
Expand Down
26 changes: 16 additions & 10 deletions cads/mmda/constellation.py
Original file line number Diff line number Diff line change
Expand Up @@ -413,6 +413,7 @@ class ConstellationDescriptionIn(Schema):
corpus_id = Integer(required=True)
subcorpus_id = Integer(required=False)

semantic_map_id = Integer(required=False, load_default=None)
p = String(required=False)
s = String(required=False)
match_strategy = String(load_default='longest', required=False, validate=OneOf(['longest', 'shortest', 'standard']))
Expand All @@ -426,6 +427,7 @@ class ConstellationDiscoursemeDescriptionIn(Schema):

class ConstellationCollocationIn(CollocationIn):

semantic_map_id = Integer(required=False, load_default=None)
focus_discourseme_id = Integer(required=True)
filter_discourseme_ids = List(Integer(), load_default=[], required=False)

Expand Down Expand Up @@ -537,7 +539,7 @@ def delete_constellation(id):
@bp.output(ConstellationOut)
@bp.auth_required(auth)
def patch_constellation(id, json_data):
"""Patch constellation.
"""Patch constellation. Use for updating name, comment, or discoursemes.
"""
constellation = db.get_or_404(Constellation, id)
Expand Down Expand Up @@ -570,7 +572,7 @@ def patch_constellation(id, json_data):
@bp.output(ConstellationOut)
@bp.auth_required(auth)
def patch_constellation_add(id, json_data):
"""Patch constellation: add discourseme.
"""Patch constellation: add discourseme(s).
"""
constellation = db.get_or_404(Constellation, id)
Expand All @@ -588,7 +590,7 @@ def patch_constellation_add(id, json_data):
@bp.output(ConstellationOut)
@bp.auth_required(auth)
def patch_constellation_remove(id, json_data):
"""Patch constellation: remove discourseme.
"""Patch constellation: remove discourseme(s).
"""
constellation = db.get_or_404(Constellation, id)
Expand Down Expand Up @@ -631,6 +633,7 @@ def create_description(id, json_data):
corpus = db.get_or_404(Corpus, corpus_id)
subcorpus_id = json_data.get('subcorpus_id')
# subcorpus = db.get_or_404(SubCorpus, subcorpus_id) if subcorpus_id else None
semantic_map_id = json_data.get('semantic_map_id')

p_description = json_data.get('p', corpus.p_default)
s_query = json_data.get('s', corpus.s_default)
Expand All @@ -639,6 +642,7 @@ def create_description(id, json_data):

description = ConstellationDescription(
constellation_id=constellation.id,
semantic_map_id=semantic_map_id,
corpus_id=corpus.id,
subcorpus_id=subcorpus_id,
p=p_description,
Expand Down Expand Up @@ -694,7 +698,7 @@ def get_description(id, description_id):
"""

# discourseme = db.get_or_404(Discourseme, id) # TODO: needed?
# constellation = db.get_or_404(Constellation, id) # TODO: needed?
description = db.get_or_404(ConstellationDescription, description_id)

return ConstellationDescriptionOut().dump(description)
Expand All @@ -707,7 +711,7 @@ def delete_description(id, description_id):
"""

# discourseme = db.get_or_404(Discourseme, id) # TODO: needed?
# constellation = db.get_or_404(Constellation, id) # TODO: needed?
description = db.get_or_404(ConstellationDescription, description_id)
db.session.delete(description)
db.session.commit()
Expand Down Expand Up @@ -832,6 +836,7 @@ def create_collocation(id, description_id, json_data):

# semantic map
semantic_map_id = json_data.get('semantic_map_id', None)
semantic_map_id = description.semantic_map_id if not semantic_map_id else semantic_map_id

# filtering
filter_discourseme_ids = json_data.get('filter_discourseme_ids')
Expand All @@ -850,7 +855,6 @@ def create_collocation(id, description_id, json_data):

# create collocation object
collocation = Collocation(
# constellation_id=constellation_id,
semantic_map_id=semantic_map_id,
query_id=focus_query.id,
p=p,
Expand All @@ -864,8 +868,8 @@ def create_collocation(id, description_id, json_data):
get_or_create_counts(collocation, remove_focus_cpos=False)
set_collocation_discourseme_scores(collocation, description.discourseme_descriptions, overlap=description.overlap)
ccc_init_semmap(collocation, semantic_map_id)

collocation.focus_discourseme_id = json_data['focus_discourseme_id']
if description.semantic_map_id is None:
description.semantic_map_id = collocation.semantic_map_id

return ConstellationCollocationOut().dump(collocation), 200

Expand Down Expand Up @@ -1031,14 +1035,14 @@ def create_keyword(id, description_id, json_data):
p_reference = json_data.get('p_reference')

# semantic map
semantic_map_id = json_data.get('semantic_map_id')
semantic_map_id = json_data.get('semantic_map_id', None)
semantic_map_id = description.semantic_map_id if not semantic_map_id else semantic_map_id

# settings
sub_vs_rest = json_data.get('sub_vs_rest')
min_freq = json_data.get('min_freq')

keyword = Keyword(
# constellation_id=constellation_id,
semantic_map_id=semantic_map_id,
corpus_id=corpus_id,
subcorpus_id=subcorpus_id,
Expand All @@ -1055,6 +1059,8 @@ def create_keyword(id, description_id, json_data):
ccc_keywords(keyword)
set_keyword_discourseme_scores(keyword, description.discourseme_descriptions)
ccc_init_semmap(keyword, semantic_map_id)
if description.semantic_map_id is None:
description.semantic_map_id = keyword.semantic_map_id

return KeywordOut().dump(keyword), 200

Expand Down
1 change: 1 addition & 0 deletions cads/mmda/database.py
Original file line number Diff line number Diff line change
Expand Up @@ -186,6 +186,7 @@ class ConstellationDescription(db.Model):
overlap = db.Column(db.Unicode, default='partial') # when to count a discourseme to be in context (partial, full, match, matchend)

discourseme_descriptions = db.relationship("DiscoursemeDescription", secondary=constellation_discourseme_description)
semantic_map_id = db.Column(db.Integer, db.ForeignKey('semantic_map.id', ondelete='CASCADE'))

@property
def corpus(self):
Expand Down
2 changes: 1 addition & 1 deletion cads/mmda/discourseme.py
Original file line number Diff line number Diff line change
Expand Up @@ -281,7 +281,7 @@ class DiscoursemeDescriptionOut(Schema):
p = String(required=True)
s = String(required=True)
match_strategy = String(required=True)
semantic_map_id = Integer(required=True, dump_default=None, metadata={'nullable': True})
# semantic_map_id = Integer(required=True, dump_default=None, metadata={'nullable': True})
items = Nested(DiscoursemeDescriptionItem(many=True), required=True, dump_default=[])


Expand Down
13 changes: 2 additions & 11 deletions cads/query.py
Original file line number Diff line number Diff line change
Expand Up @@ -670,10 +670,6 @@ def get_collocation(query_id, query_data):
s_break = query_data.get('s_break')
marginals = query_data.get('marginals', 'global')

# constellation and semantic map
# constellation_id = query_data.get('constellation_id', None)
# constellation = db.get_or_404(Constellation, constellation_id) if constellation_id else None

semantic_map_id = query_data.get('semantic_map_id', None)

# filtering for second-order collocation
Expand All @@ -697,6 +693,7 @@ def get_collocation(query_id, query_data):

if len(filter_queries) > 0:

# TODO
# note that the database scheme does not allow to have several filter queries
# we thus name the actual query result here to be able to retrieve it
nqr_name = "SOC" + "_" + "_q".join(["q" + str(query.id)] + [str(fq.id) for fq in filter_queries])
Expand Down Expand Up @@ -741,7 +738,6 @@ def get_collocation(query_id, query_data):
corpus_id=query.corpus.id,
subcorpus_id=query.subcorpus.id if query.subcorpus else None,
soc_sequence=nqr_name,
# discourseme_id=query.discourseme.id,
match_strategy=query.match_strategy,
s=query.s
)
Expand All @@ -754,8 +750,6 @@ def get_collocation(query_id, query_data):
df_matches.to_sql('matches', con=db.engine, if_exists='append', index=False)

collocation = Collocation(
# constellation_id=constellation_id,
# semantic_map_id=semantic_map_id,
query_id=query.id,
p=p,
s_break=s_break,
Expand All @@ -766,10 +760,7 @@ def get_collocation(query_id, query_data):
db.session.commit()

get_or_create_counts(collocation, remove_focus_cpos=True)
# TODO make optional:
ccc_init_semmap(collocation, semantic_map_id)

# if constellation:
# discoursemes = constellation.highlight_discoursemes + constellation.filter_discoursemes
# ccc_discourseme_counts(collocation, discoursemes)

return CollocationOut().dump(collocation), 200
4 changes: 4 additions & 0 deletions cads/semantic_map.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,10 @@


def ccc_semmap(collocation_ids, sort_by, number, blacklist_items=[], method='tsne'):
"""
# TODO make analysis agnostic (keyword, collocation)
"""

semantic_map = None
dfs = list()
Expand Down Expand Up @@ -182,6 +185,7 @@ class SemanticMapIn(Schema):

collocation_ids = List(Integer(), required=False, load_default=[])
keyword_ids = List(Integer(), required=False, load_default=[])

method = String(required=False, load_default='tsne', validate=OneOf(['tsne', 'umap']))


Expand Down
2 changes: 1 addition & 1 deletion cads/version.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
#!/usr/bin/python3
# -*- coding: utf-8 -*-

__version__ = "0.3.0.dev5"
__version__ = "0.3.0.dev6"
Loading

0 comments on commit f71f18e

Please sign in to comment.