Skip to content

Commit

Permalink
various improvements
Browse files Browse the repository at this point in the history
  • Loading branch information
xrotwang committed Nov 23, 2024
1 parent d0b95ce commit 62b39b8
Show file tree
Hide file tree
Showing 30 changed files with 715 additions and 6 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/python-package.yml
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ jobs:
runs-on: ubuntu-latest
strategy:
matrix:
python-version: [3.8, 3.9, "3.10", "3.11"]
python-version: [3.9, "3.10", 3.11, 3.12]

steps:
- uses: actions/checkout@v4
Expand Down
7 changes: 7 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,12 @@
# Changes

## unreleased

- Dropped support for python 3.8, added support for python 3.13.
- Added command to compute distances between areas and Glottolog point coordinates.
- Added functions to write more compact GeoJSON by limiting float precision to 5 decimal places.


## [1.0.0] - 2024-06-19

Added function to translate GeoJSON objects to be "pacific centered".
Expand Down
7 changes: 4 additions & 3 deletions setup.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -20,11 +20,11 @@ classifiers =
Natural Language :: English
Operating System :: OS Independent
Programming Language :: Python :: 3
Programming Language :: Python :: 3.8
Programming Language :: Python :: 3.9
Programming Language :: Python :: 3.10
Programming Language :: Python :: 3.11
Programming Language :: Python :: 3.12
Programming Language :: Python :: 3.13
Programming Language :: Python :: Implementation :: CPython
Programming Language :: Python :: Implementation :: PyPy
License :: OSI Approved :: Apache Software License
Expand All @@ -34,11 +34,12 @@ zip_safe = False
packages = find:
package_dir =
= src
python_requires = >=3.8
python_requires = >=3.9
install_requires =
cldfbench
clldutils
pycldf>=1.30.0
pyglottolog
shapely
rasterio
mako
Expand Down Expand Up @@ -94,7 +95,7 @@ show_missing = true
skip_covered = true

[tox:tox]
envlist = py38, py39, py310, py311, py312
envlist = py39, py310, py311, py312, py313
isolated_build = true
skip_missing_interpreter = true

Expand Down
63 changes: 63 additions & 0 deletions src/cldfgeojson/commands/glottolog_distance.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
"""
Compute the distance between speaker areas in a CLDF dataset and corresponding Glottolog point
coordinate.
To get an overview of Glottolog distances for a dataset you may pipe the output to the csvstat
tool:
cldfbench geojson.glottolog_distance path/to/cldf --glottolog path/to/glottolog --format tsv | csvstat -t
You can also print the distances to the terminal using a tool like termgraph:
cldfbench geojson.glottolog_distance path/to/cldf --glottolog path/to/glottolog --format tsv | \
sed '/^$/d' | csvcut -t -c ID,Distance | csvsort -c Distance | csvformat -E | termgraph
""" # noqa: E501
from clldutils.clilib import Table, add_format
from shapely.geometry import Point, shape, MultiPolygon
from pycldf.cli_util import add_dataset, get_dataset, add_catalog_spec
from pycldf.media import MediaTable
from tqdm import tqdm


def register(parser):
add_dataset(parser)
add_catalog_spec(parser, 'glottolog')
add_format(parser, 'simple')


def run(args):
from cldfgeojson import MEDIA_TYPE

ds = get_dataset(args)

geojsons = {}
for media in MediaTable(ds):
if media.mimetype == MEDIA_TYPE:
geojsons[media.id] = {
f['properties']['cldf:languageReference']: shape(f['geometry'])
for f in media.read_json()['features']
if f['properties'].get('cldf:languageReference')}

gl_coords = {
lg.id: Point(float(lg.longitude), float(lg.latitude))
for lg in args.glottolog.api.languoids() if lg.longitude}

with Table(args, 'ID', 'Distance', 'Contained', 'NPolys') as t:
for i, lg in tqdm(enumerate(ds.objects('LanguageTable'), start=1)):
if lg.cldf.glottocode in gl_coords:
if lg.cldf.speakerArea in geojsons:
shp = geojsons[lg.cldf.speakerArea][lg.cldf.id]
else: # pragma: no cover
shp = shape(lg.speaker_area_as_geojson_feature['geometry'])

npolys = len(shp.geoms) if isinstance(shp, MultiPolygon) else 1
gl_coord = gl_coords[lg.cldf.glottocode]
if shp.contains(gl_coord):
t.append((lg.id, 0, True, npolys))
elif shp.convex_hull.contains(gl_coord):
t.append((lg.id, 0, False, npolys)) # pragma: no cover
else:
dist = shp.distance(gl_coord)
if dist > 180:
dist = abs(dist - 360) # pragma: no cover
t.append((lg.id, dist, False, npolys))
7 changes: 7 additions & 0 deletions src/cldfgeojson/create.py
Original file line number Diff line number Diff line change
Expand Up @@ -127,11 +127,18 @@ def merged_geometry(features: typing.Iterable[typing.Union[geojson.Feature, geoj
Specify `None` to add no buffer.
:return: The resulting Geometry object representing the merged shapes.
"""
features = list(features)

if len(features) == 1:
f = features[0]
return f.get('geometry', f)

def get_shape(f):
s = shape(f.get('geometry', f))
if buffer:
s = s.buffer(buffer)
return s

res = union_all([get_shape(f) for f in features])
if buffer:
res = res.buffer(-buffer)
Expand Down
56 changes: 55 additions & 1 deletion src/cldfgeojson/geojson.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,10 @@
import json
import typing
import collections

__all__ = ['MEDIA_TYPE', 'Geometry', 'Feature', 'pacific_centered']
from clldutils import jsonlib

__all__ = ['MEDIA_TYPE', 'Geometry', 'Feature', 'pacific_centered', 'dump', 'dumps']

# See https://datatracker.ietf.org/doc/html/rfc7946#section-12
MEDIA_TYPE = 'application/geo+json'
Expand Down Expand Up @@ -80,3 +84,53 @@ def fix_position(pos):
fix_position(pos) for pos in line] for line in poly] for poly in geom['coordinates']]

return obj


#
# To make GeoJSON as small as possible, we provide functionality to write GeoJSON with coordinates
# limited to 5 decimal places, corresponding to a precision of ~1m.
#
class FloatWrapper:
"""
In order to use the extension mechanism of Python's json.JSONEncode, we have to wrap floats in
an "unknown" type.
"""
def __init__(self, value):
self.value = value


def wrap_floats(obj, in_coordinates=False):
"""
Turns a GeoJSON object into one with all coordinates wrapped in FloatWrapper instances.
:param obj:
:param in_coordinates:
:return:
"""
if isinstance(obj, (list, tuple)):
return [wrap_floats(item, in_coordinates=in_coordinates) for item in obj]
if isinstance(obj, dict):
# Detect whether we are in the "coordinates" array of a GeoJSON object.
return collections.OrderedDict(
(k, wrap_floats(v, in_coordinates=True if k == 'coordinates' else in_coordinates))
for k, v in obj.items())
if isinstance(obj, float) and in_coordinates:
return FloatWrapper(obj)
return obj


class MeterPrecisionFloatEncoder(json.JSONEncoder):
def default(self, obj):
if isinstance(obj, FloatWrapper):
return round(obj.value, 5)
return super().default(obj)


def dump(obj, *args, **kw):
kw['cls'] = MeterPrecisionFloatEncoder
return jsonlib.dump(wrap_floats(obj), *args, **kw)


def dumps(obj, **kw):
kw['cls'] = MeterPrecisionFloatEncoder
return json.dumps(wrap_floats(obj), **kw)
Loading

0 comments on commit 62b39b8

Please sign in to comment.